isikz commited on May 5

Commit

79dffb2

verified ·

1 Parent(s): eec74ee

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +3 -0
added_tokens.json +102 -0
config.json +8 -0
finetuning_bc_prott5.py +149 -0
pytorch_model.bin +3 -0
ready_to_train.csv +3 -0
special_tokens_map.json +125 -0
spiece.model +3 -0
t5-bc-out/checkpoint-47916/optimizer.pt +3 -0
t5-bc-out/checkpoint-47916/pytorch_model.bin +3 -0
t5-bc-out/checkpoint-47916/rng_state.pth +3 -0
t5-bc-out/checkpoint-47916/scheduler.pt +3 -0
t5-bc-out/checkpoint-47916/trainer_state.json +725 -0
t5-bc-out/checkpoint-47916/training_args.bin +3 -0
tokenizer_config.json +941 -0
training_args.bin +3 -0
wandb/debug-internal.log +21 -0
wandb/debug.log +27 -0
wandb/run-20250504_132610-pxg645u5/files/config.yaml +44 -0
wandb/run-20250504_132610-pxg645u5/files/output.log +37 -0
wandb/run-20250504_132610-pxg645u5/files/requirements.txt +541 -0
wandb/run-20250504_132610-pxg645u5/files/wandb-metadata.json +77 -0
wandb/run-20250504_132610-pxg645u5/files/wandb-summary.json +1 -0
wandb/run-20250504_132610-pxg645u5/logs/debug-core.log +14 -0
wandb/run-20250504_132610-pxg645u5/logs/debug-internal.log +19 -0
wandb/run-20250504_132610-pxg645u5/logs/debug.log +26 -0
wandb/run-20250504_132610-pxg645u5/run-pxg645u5.wandb +0 -0
wandb/run-20250504_132912-1agsw1y8/files/config.yaml +374 -0
wandb/run-20250504_132912-1agsw1y8/files/output.log +87 -0
wandb/run-20250504_132912-1agsw1y8/files/requirements.txt +541 -0
wandb/run-20250504_132912-1agsw1y8/files/wandb-metadata.json +77 -0
wandb/run-20250504_132912-1agsw1y8/files/wandb-summary.json +1 -0
wandb/run-20250504_132912-1agsw1y8/logs/debug-core.log +14 -0
wandb/run-20250504_132912-1agsw1y8/logs/debug-internal.log +19 -0
wandb/run-20250504_132912-1agsw1y8/logs/debug.log +27 -0
wandb/run-20250504_132912-1agsw1y8/run-1agsw1y8.wandb +3 -0
wandb/run-20250504_160615-f65jh2lv/files/output.log +8 -0
wandb/run-20250504_160615-f65jh2lv/files/requirements.txt +541 -0
wandb/run-20250504_160615-f65jh2lv/files/wandb-metadata.json +77 -0
wandb/run-20250504_160615-f65jh2lv/logs/debug-core.log +7 -0
wandb/run-20250504_160615-f65jh2lv/logs/debug-internal.log +8 -0
wandb/run-20250504_160615-f65jh2lv/logs/debug.log +26 -0
wandb/run-20250504_160615-f65jh2lv/run-f65jh2lv.wandb +0 -0
wandb/run-20250504_160955-rqk2hbkf/files/config.yaml +44 -0
wandb/run-20250504_160955-rqk2hbkf/files/output.log +24 -0
wandb/run-20250504_160955-rqk2hbkf/files/requirements.txt +541 -0
wandb/run-20250504_160955-rqk2hbkf/files/wandb-metadata.json +77 -0
wandb/run-20250504_160955-rqk2hbkf/files/wandb-summary.json +1 -0
wandb/run-20250504_160955-rqk2hbkf/logs/debug-core.log +14 -0
wandb/run-20250504_160955-rqk2hbkf/logs/debug-internal.log +19 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+ready_to_train.csv filter=lfs diff=lfs merge=lfs -text
+wandb/run-20250504_132912-1agsw1y8/run-1agsw1y8.wandb filter=lfs diff=lfs merge=lfs -text
+wandb/run-20250504_172503-0ictlmwf/run-0ictlmwf.wandb filter=lfs diff=lfs merge=lfs -text

added_tokens.json ADDED Viewed

	@@ -0,0 +1,102 @@

+{
+  "<extra_id_0>": 127,
+  "<extra_id_10>": 117,
+  "<extra_id_11>": 116,
+  "<extra_id_12>": 115,
+  "<extra_id_13>": 114,
+  "<extra_id_14>": 113,
+  "<extra_id_15>": 112,
+  "<extra_id_16>": 111,
+  "<extra_id_17>": 110,
+  "<extra_id_18>": 109,
+  "<extra_id_19>": 108,
+  "<extra_id_1>": 126,
+  "<extra_id_20>": 107,
+  "<extra_id_21>": 106,
+  "<extra_id_22>": 105,
+  "<extra_id_23>": 104,
+  "<extra_id_24>": 103,
+  "<extra_id_25>": 102,
+  "<extra_id_26>": 101,
+  "<extra_id_27>": 100,
+  "<extra_id_28>": 99,
+  "<extra_id_29>": 98,
+  "<extra_id_2>": 125,
+  "<extra_id_30>": 97,
+  "<extra_id_31>": 96,
+  "<extra_id_32>": 95,
+  "<extra_id_33>": 94,
+  "<extra_id_34>": 93,
+  "<extra_id_35>": 92,
+  "<extra_id_36>": 91,
+  "<extra_id_37>": 90,
+  "<extra_id_38>": 89,
+  "<extra_id_39>": 88,
+  "<extra_id_3>": 124,
+  "<extra_id_40>": 87,
+  "<extra_id_41>": 86,
+  "<extra_id_42>": 85,
+  "<extra_id_43>": 84,
+  "<extra_id_44>": 83,
+  "<extra_id_45>": 82,
+  "<extra_id_46>": 81,
+  "<extra_id_47>": 80,
+  "<extra_id_48>": 79,
+  "<extra_id_49>": 78,
+  "<extra_id_4>": 123,
+  "<extra_id_50>": 77,
+  "<extra_id_51>": 76,
+  "<extra_id_52>": 75,
+  "<extra_id_53>": 74,
+  "<extra_id_54>": 73,
+  "<extra_id_55>": 72,
+  "<extra_id_56>": 71,
+  "<extra_id_57>": 70,
+  "<extra_id_58>": 69,
+  "<extra_id_59>": 68,
+  "<extra_id_5>": 122,
+  "<extra_id_60>": 67,
+  "<extra_id_61>": 66,
+  "<extra_id_62>": 65,
+  "<extra_id_63>": 64,
+  "<extra_id_64>": 63,
+  "<extra_id_65>": 62,
+  "<extra_id_66>": 61,
+  "<extra_id_67>": 60,
+  "<extra_id_68>": 59,
+  "<extra_id_69>": 58,
+  "<extra_id_6>": 121,
+  "<extra_id_70>": 57,
+  "<extra_id_71>": 56,
+  "<extra_id_72>": 55,
+  "<extra_id_73>": 54,
+  "<extra_id_74>": 53,
+  "<extra_id_75>": 52,
+  "<extra_id_76>": 51,
+  "<extra_id_77>": 50,
+  "<extra_id_78>": 49,
+  "<extra_id_79>": 48,
+  "<extra_id_7>": 120,
+  "<extra_id_80>": 47,
+  "<extra_id_81>": 46,
+  "<extra_id_82>": 45,
+  "<extra_id_83>": 44,
+  "<extra_id_84>": 43,
+  "<extra_id_85>": 42,
+  "<extra_id_86>": 41,
+  "<extra_id_87>": 40,
+  "<extra_id_88>": 39,
+  "<extra_id_89>": 38,
+  "<extra_id_8>": 119,
+  "<extra_id_90>": 37,
+  "<extra_id_91>": 36,
+  "<extra_id_92>": 35,
+  "<extra_id_93>": 34,
+  "<extra_id_94>": 33,
+  "<extra_id_95>": 32,
+  "<extra_id_96>": 31,
+  "<extra_id_97>": 30,
+  "<extra_id_98>": 29,
+  "<extra_id_99>": 28,
+  "<extra_id_9>": 118
+}

config.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+  "architectures": [
+    "T5BinaryClassifier"
+  ],
+  "model_type": "t5",
+  "d_model": 1024,
+  "is_encoder_decoder": false
+}

finetuning_bc_prott5.py ADDED Viewed

	@@ -0,0 +1,149 @@

+import torch, torch.nn as nn
+from transformers import (T5EncoderModel, T5Tokenizer,
+                          Trainer, TrainingArguments)
+from transformers.modeling_outputs import SequenceClassifierOutput
+from datasets import load_dataset
+from sklearn.metrics import accuracy_score
+import pandas as pd
+import wandb
+from huggingface_hub import login
+import re
+from datasets import Dataset
+# ---------------------------
+# 1. GİRİŞ‑ÇIKIŞ ve LOGIN
+# ---------------------------
+wandb.login()
+wandb.init(project='finetuning-bc-protT5')
+# ---------------------------
+# 2. DATA HAZIRLIK (seninkiler)
+# ---------------------------
+data = pd.read_csv("ready_to_train.csv")
+pos = data.loc[data["SITE_+/-7_AA"].str.len()==15]["SITE_+/-7_AA"].tolist()
+neg = data.loc[data["NON_PH_SITE"].str.len()==15]["NON_PH_SITE"].tolist()
+labels = [1]*len(pos)+[0]*len(neg)
+texts  = pos+neg
+prep_texts = [" ".join(list(t.upper())) for t in texts]
+prep_texts = [re.sub(r"[UZOB]", "X", pt).replace("_","-")for pt in prep_texts]
+from sklearn.model_selection import train_test_split
+X_train, X_temp, y_train, y_temp = train_test_split(prep_texts, labels, test_size=0.30, random_state=42)
+X_val, X_test, y_val, y_test     = train_test_split(X_temp, y_temp, test_size=0.50, random_state=42)
+tokenizer = T5Tokenizer.from_pretrained("Rostlab/prot_t5_xl_uniref50")
+def tokenize(batch):
+    return tokenizer(batch["text"],
+                     padding="max_length",
+                     truncation=True,
+                     max_length=64)
+def to_hf_dataset(texts, labels):
+    return {"text": texts, "label": labels}
+train_ds = Dataset.from_dict({"text": X_train, "label": y_train})
+val_ds   = Dataset.from_dict({"text": X_val,   "label": y_val})
+train_ds = train_ds.map(tokenize, batched=True).with_format("torch")
+val_ds   = val_ds.map(tokenize,   batched=True).with_format("torch")
+# ---------------------------
+# 3. MODEL: T5 + Classification Head
+# ---------------------------
+class T5BinaryClassifier(nn.Module):
+    def __init__(self, model_name, dropout=0.1):
+        super().__init__()
+        self.encoder = T5EncoderModel.from_pretrained(model_name)
+        enc_dim      = self.encoder.config.d_model       # 1024 (prot_t5_xl)
+        self.dropout = nn.Dropout(dropout)
+        self.cls     = nn.Linear(enc_dim, 2)             # binary
+    def forward(self,
+                input_ids=None,
+                attention_mask=None,
+                labels=None,
+                **kwargs):
+        enc_out = self.encoder(input_ids=input_ids,
+                               attention_mask=attention_mask,
+                               return_dict=True)
+        # [CLS]-benzeri vektör: <pad> token pozisyonu (id=0) yerine mean‑pool
+        hidden = enc_out.last_hidden_state        # (B, L, D)
+        pooled = hidden.mean(dim=1)               # (B, D)
+        logits = self.cls(self.dropout(pooled))
+        loss = None
+        if labels is not None:
+            loss_fct = nn.CrossEntropyLoss()
+            loss = loss_fct(logits, labels)
+        return SequenceClassifierOutput(
+            loss=loss,
+            logits=logits,
+            hidden_states=enc_out.hidden_states,
+            attentions=enc_out.attentions,
+        )
+model = T5BinaryClassifier("Rostlab/prot_t5_xl_uniref50").cuda()
+# ---------------------------
+# 4. TRAINING ARGUMENTS
+# ---------------------------
+args = TrainingArguments(
+    output_dir="t5-bc-out",
+    num_train_epochs=3,
+    learning_rate=5e-5,
+    per_device_train_batch_size=8,     # prot_t5_xl büyük; 512 yerine 8‑16 önerilir
+    per_device_eval_batch_size=8,
+    gradient_accumulation_steps=4,     # efektif 32
+    evaluation_strategy="epoch",
+    load_best_model_at_end=True,
+    save_strategy="epoch",
+    save_safetensors=False,
+    report_to=["wandb"],
+    fp16=True,
+)
+def compute_metrics(eval_pred):
+    logits, labels = eval_pred
+    preds = logits.argmax(-1)
+    acc  = accuracy_score(labels, preds)
+    return {"accuracy": acc}
+trainer = Trainer(
+    model=model,
+    args=args,
+    train_dataset=train_ds,
+    eval_dataset=val_ds,
+    compute_metrics=compute_metrics,
+)
+trainer.train()
+# ---------------------------
+# 5. TEST & SAVE
+# ---------------------------
+# Python dict → Hugging Face Dataset
+test_ds = Dataset.from_dict({"text": X_test, "label": y_test})
+# Tokenize ve tensor formatına çevir
+test_ds = test_ds.map(tokenize, batched=True).with_format("torch")
+metrics  = trainer.evaluate(test_ds)
+print(metrics)
+# ---- Manuel kaydetme ----
+trainer.save_model(
+    "/arf/scratch/zisik/prott5_bc_ft"
+)
+tokenizer.save_pretrained("/arf/scratch/zisik/prott5_bc_ft")
+#model.push_to_hub("isikz/prot_t5_binary_classifier")
+#tokenizer.push_to_hub("isikz/prot_t5_binary_classifier")
+#wandb.finish()

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:eb84e54c51f53eb1a49e0d52446d9e470b5ea320ae7174917832ab5aef4d31a2
+size 4832674810

ready_to_train.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:498eaceee30faf2510396e17a4f8417ce65c37e576c8792a80da432313f03c0e
+size 18584710

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,125 @@

+{
+  "additional_special_tokens": [
+    "<extra_id_0>",
+    "<extra_id_1>",
+    "<extra_id_2>",
+    "<extra_id_3>",
+    "<extra_id_4>",
+    "<extra_id_5>",
+    "<extra_id_6>",
+    "<extra_id_7>",
+    "<extra_id_8>",
+    "<extra_id_9>",
+    "<extra_id_10>",
+    "<extra_id_11>",
+    "<extra_id_12>",
+    "<extra_id_13>",
+    "<extra_id_14>",
+    "<extra_id_15>",
+    "<extra_id_16>",
+    "<extra_id_17>",
+    "<extra_id_18>",
+    "<extra_id_19>",
+    "<extra_id_20>",
+    "<extra_id_21>",
+    "<extra_id_22>",
+    "<extra_id_23>",
+    "<extra_id_24>",
+    "<extra_id_25>",
+    "<extra_id_26>",
+    "<extra_id_27>",
+    "<extra_id_28>",
+    "<extra_id_29>",
+    "<extra_id_30>",
+    "<extra_id_31>",
+    "<extra_id_32>",
+    "<extra_id_33>",
+    "<extra_id_34>",
+    "<extra_id_35>",
+    "<extra_id_36>",
+    "<extra_id_37>",
+    "<extra_id_38>",
+    "<extra_id_39>",
+    "<extra_id_40>",
+    "<extra_id_41>",
+    "<extra_id_42>",
+    "<extra_id_43>",
+    "<extra_id_44>",
+    "<extra_id_45>",
+    "<extra_id_46>",
+    "<extra_id_47>",
+    "<extra_id_48>",
+    "<extra_id_49>",
+    "<extra_id_50>",
+    "<extra_id_51>",
+    "<extra_id_52>",
+    "<extra_id_53>",
+    "<extra_id_54>",
+    "<extra_id_55>",
+    "<extra_id_56>",
+    "<extra_id_57>",
+    "<extra_id_58>",
+    "<extra_id_59>",
+    "<extra_id_60>",
+    "<extra_id_61>",
+    "<extra_id_62>",
+    "<extra_id_63>",
+    "<extra_id_64>",
+    "<extra_id_65>",
+    "<extra_id_66>",
+    "<extra_id_67>",
+    "<extra_id_68>",
+    "<extra_id_69>",
+    "<extra_id_70>",
+    "<extra_id_71>",
+    "<extra_id_72>",
+    "<extra_id_73>",
+    "<extra_id_74>",
+    "<extra_id_75>",
+    "<extra_id_76>",
+    "<extra_id_77>",
+    "<extra_id_78>",
+    "<extra_id_79>",
+    "<extra_id_80>",
+    "<extra_id_81>",
+    "<extra_id_82>",
+    "<extra_id_83>",
+    "<extra_id_84>",
+    "<extra_id_85>",
+    "<extra_id_86>",
+    "<extra_id_87>",
+    "<extra_id_88>",
+    "<extra_id_89>",
+    "<extra_id_90>",
+    "<extra_id_91>",
+    "<extra_id_92>",
+    "<extra_id_93>",
+    "<extra_id_94>",
+    "<extra_id_95>",
+    "<extra_id_96>",
+    "<extra_id_97>",
+    "<extra_id_98>",
+    "<extra_id_99>"
+  ],
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

spiece.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:74da7b4afcde53faa570114b530c726135bdfcdb813dec3abfb27f9d44db7324
+size 237990

t5-bc-out/checkpoint-47916/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3923cb1c3204d99805be4282d57866443cbdd1f5f71ad6af1c81ee4a783d7e9d
+size 9665321730

t5-bc-out/checkpoint-47916/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:80957033108061961f1d326abe9e2829f4d78524a478d52ecec37db106fbe5cc
+size 4832674810

t5-bc-out/checkpoint-47916/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d1af688f89b64a7c9246d9d5848b03b2543dd68c97861fab57333014cd508ec2
+size 14244

t5-bc-out/checkpoint-47916/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:62074fe1abf3e8558aec193d31cdd76f6c2650659b0c8d62d4b5ff6d20fd6edd
+size 1064

t5-bc-out/checkpoint-47916/trainer_state.json ADDED Viewed

	@@ -0,0 +1,725 @@

+{
+  "best_metric": 0.1829579919576645,
+  "best_model_checkpoint": "t5-bc-out/checkpoint-31944",
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 47916,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.03130478337089907,
+      "grad_norm": 1.3348039388656616,
+      "learning_rate": 4.947825361048502e-05,
+      "loss": 0.5856,
+      "step": 500
+    },
+    {
+      "epoch": 0.06260956674179814,
+      "grad_norm": 2.473144292831421,
+      "learning_rate": 4.8956507220970036e-05,
+      "loss": 0.5183,
+      "step": 1000
+    },
+    {
+      "epoch": 0.09391435011269722,
+      "grad_norm": 3.6210598945617676,
+      "learning_rate": 4.843476083145505e-05,
+      "loss": 0.4879,
+      "step": 1500
+    },
+    {
+      "epoch": 0.12521913348359628,
+      "grad_norm": 6.336288928985596,
+      "learning_rate": 4.791405793471909e-05,
+      "loss": 0.4579,
+      "step": 2000
+    },
+    {
+      "epoch": 0.15652391685449538,
+      "grad_norm": 2.6699299812316895,
+      "learning_rate": 4.739231154520411e-05,
+      "loss": 0.4421,
+      "step": 2500
+    },
+    {
+      "epoch": 0.18782870022539444,
+      "grad_norm": 7.918868064880371,
+      "learning_rate": 4.6870565155689124e-05,
+      "loss": 0.4205,
+      "step": 3000
+    },
+    {
+      "epoch": 0.2191334835962935,
+      "grad_norm": 2.9816083908081055,
+      "learning_rate": 4.634881876617414e-05,
+      "loss": 0.4044,
+      "step": 3500
+    },
+    {
+      "epoch": 0.25043826696719257,
+      "grad_norm": 7.581803321838379,
+      "learning_rate": 4.582707237665916e-05,
+      "loss": 0.3901,
+      "step": 4000
+    },
+    {
+      "epoch": 0.28174305033809166,
+      "grad_norm": 6.031352996826172,
+      "learning_rate": 4.5305325987144174e-05,
+      "loss": 0.3834,
+      "step": 4500
+    },
+    {
+      "epoch": 0.31304783370899075,
+      "grad_norm": 2.581623077392578,
+      "learning_rate": 4.478357959762919e-05,
+      "loss": 0.3601,
+      "step": 5000
+    },
+    {
+      "epoch": 0.3443526170798898,
+      "grad_norm": 4.7024245262146,
+      "learning_rate": 4.42618332081142e-05,
+      "loss": 0.3492,
+      "step": 5500
+    },
+    {
+      "epoch": 0.3756574004507889,
+      "grad_norm": 8.929915428161621,
+      "learning_rate": 4.374217380415728e-05,
+      "loss": 0.3435,
+      "step": 6000
+    },
+    {
+      "epoch": 0.406962183821688,
+      "grad_norm": 3.694370985031128,
+      "learning_rate": 4.32204274146423e-05,
+      "loss": 0.3366,
+      "step": 6500
+    },
+    {
+      "epoch": 0.438266967192587,
+      "grad_norm": 5.6961350440979,
+      "learning_rate": 4.2698681025127307e-05,
+      "loss": 0.3259,
+      "step": 7000
+    },
+    {
+      "epoch": 0.4695717505634861,
+      "grad_norm": 2.740339756011963,
+      "learning_rate": 4.217693463561232e-05,
+      "loss": 0.3224,
+      "step": 7500
+    },
+    {
+      "epoch": 0.5008765339343851,
+      "grad_norm": 3.7285494804382324,
+      "learning_rate": 4.165518824609734e-05,
+      "loss": 0.3103,
+      "step": 8000
+    },
+    {
+      "epoch": 0.5321813173052843,
+      "grad_norm": 5.1480326652526855,
+      "learning_rate": 4.1133441856582356e-05,
+      "loss": 0.3107,
+      "step": 8500
+    },
+    {
+      "epoch": 0.5634861006761833,
+      "grad_norm": 4.8817620277404785,
+      "learning_rate": 4.0611695467067366e-05,
+      "loss": 0.2945,
+      "step": 9000
+    },
+    {
+      "epoch": 0.5947908840470824,
+      "grad_norm": 5.003459453582764,
+      "learning_rate": 4.008994907755238e-05,
+      "loss": 0.2903,
+      "step": 9500
+    },
+    {
+      "epoch": 0.6260956674179815,
+      "grad_norm": 6.451533317565918,
+      "learning_rate": 3.95682026880374e-05,
+      "loss": 0.284,
+      "step": 10000
+    },
+    {
+      "epoch": 0.6574004507888805,
+      "grad_norm": 7.442136287689209,
+      "learning_rate": 3.9046456298522416e-05,
+      "loss": 0.276,
+      "step": 10500
+    },
+    {
+      "epoch": 0.6887052341597796,
+      "grad_norm": 3.617513656616211,
+      "learning_rate": 3.852575340178646e-05,
+      "loss": 0.27,
+      "step": 11000
+    },
+    {
+      "epoch": 0.7200100175306787,
+      "grad_norm": 5.776317596435547,
+      "learning_rate": 3.800400701227148e-05,
+      "loss": 0.2666,
+      "step": 11500
+    },
+    {
+      "epoch": 0.7513148009015778,
+      "grad_norm": 6.264099597930908,
+      "learning_rate": 3.7482260622756494e-05,
+      "loss": 0.257,
+      "step": 12000
+    },
+    {
+      "epoch": 0.7826195842724768,
+      "grad_norm": 4.222651481628418,
+      "learning_rate": 3.6960514233241504e-05,
+      "loss": 0.2566,
+      "step": 12500
+    },
+    {
+      "epoch": 0.813924367643376,
+      "grad_norm": 6.953704833984375,
+      "learning_rate": 3.643876784372652e-05,
+      "loss": 0.2502,
+      "step": 13000
+    },
+    {
+      "epoch": 0.845229151014275,
+      "grad_norm": 3.2264351844787598,
+      "learning_rate": 3.591806494699057e-05,
+      "loss": 0.2364,
+      "step": 13500
+    },
+    {
+      "epoch": 0.876533934385174,
+      "grad_norm": 6.233669281005859,
+      "learning_rate": 3.539631855747558e-05,
+      "loss": 0.2451,
+      "step": 14000
+    },
+    {
+      "epoch": 0.9078387177560732,
+      "grad_norm": 8.540342330932617,
+      "learning_rate": 3.48745721679606e-05,
+      "loss": 0.2364,
+      "step": 14500
+    },
+    {
+      "epoch": 0.9391435011269722,
+      "grad_norm": 4.3881516456604,
+      "learning_rate": 3.4352825778445616e-05,
+      "loss": 0.2312,
+      "step": 15000
+    },
+    {
+      "epoch": 0.9704482844978712,
+      "grad_norm": 6.7153167724609375,
+      "learning_rate": 3.383107938893063e-05,
+      "loss": 0.2323,
+      "step": 15500
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.9204725991125071,
+      "eval_loss": 0.2026778757572174,
+      "eval_runtime": 180.0542,
+      "eval_samples_per_second": 608.272,
+      "eval_steps_per_second": 76.038,
+      "step": 15972
+    },
+    {
+      "epoch": 1.0017530678687703,
+      "grad_norm": 4.329936504364014,
+      "learning_rate": 3.331037649219468e-05,
+      "loss": 0.2163,
+      "step": 16000
+    },
+    {
+      "epoch": 1.0330578512396693,
+      "grad_norm": 8.806492805480957,
+      "learning_rate": 3.278863010267969e-05,
+      "loss": 0.139,
+      "step": 16500
+    },
+    {
+      "epoch": 1.0643626346105686,
+      "grad_norm": 9.733407020568848,
+      "learning_rate": 3.226688371316471e-05,
+      "loss": 0.1419,
+      "step": 17000
+    },
+    {
+      "epoch": 1.0956674179814676,
+      "grad_norm": 3.5503616333007812,
+      "learning_rate": 3.174513732364972e-05,
+      "loss": 0.1361,
+      "step": 17500
+    },
+    {
+      "epoch": 1.1269722013523666,
+      "grad_norm": 5.853847503662109,
+      "learning_rate": 3.122339093413474e-05,
+      "loss": 0.1398,
+      "step": 18000
+    },
+    {
+      "epoch": 1.1582769847232657,
+      "grad_norm": 1.6936904191970825,
+      "learning_rate": 3.0701644544619754e-05,
+      "loss": 0.1373,
+      "step": 18500
+    },
+    {
+      "epoch": 1.1895817680941647,
+      "grad_norm": 1.5299335718154907,
+      "learning_rate": 3.017989815510477e-05,
+      "loss": 0.1423,
+      "step": 19000
+    },
+    {
+      "epoch": 1.220886551465064,
+      "grad_norm": 3.899322986602783,
+      "learning_rate": 2.965815176558978e-05,
+      "loss": 0.1391,
+      "step": 19500
+    },
+    {
+      "epoch": 1.252191334835963,
+      "grad_norm": 2.3118438720703125,
+      "learning_rate": 2.913744886885383e-05,
+      "loss": 0.1408,
+      "step": 20000
+    },
+    {
+      "epoch": 1.283496118206862,
+      "grad_norm": 0.6930440068244934,
+      "learning_rate": 2.8615702479338845e-05,
+      "loss": 0.1408,
+      "step": 20500
+    },
+    {
+      "epoch": 1.314800901577761,
+      "grad_norm": 2.851909875869751,
+      "learning_rate": 2.8093956089823858e-05,
+      "loss": 0.1404,
+      "step": 21000
+    },
+    {
+      "epoch": 1.3461056849486601,
+      "grad_norm": 0.22848767042160034,
+      "learning_rate": 2.7572209700308875e-05,
+      "loss": 0.1382,
+      "step": 21500
+    },
+    {
+      "epoch": 1.3774104683195592,
+      "grad_norm": 3.973886489868164,
+      "learning_rate": 2.7050463310793888e-05,
+      "loss": 0.1396,
+      "step": 22000
+    },
+    {
+      "epoch": 1.4087152516904582,
+      "grad_norm": 3.140080451965332,
+      "learning_rate": 2.6529760414057936e-05,
+      "loss": 0.127,
+      "step": 22500
+    },
+    {
+      "epoch": 1.4400200350613575,
+      "grad_norm": 5.468123435974121,
+      "learning_rate": 2.6008014024542953e-05,
+      "loss": 0.1276,
+      "step": 23000
+    },
+    {
+      "epoch": 1.4713248184322565,
+      "grad_norm": 0.626640260219574,
+      "learning_rate": 2.5486267635027966e-05,
+      "loss": 0.1219,
+      "step": 23500
+    },
+    {
+      "epoch": 1.5026296018031555,
+      "grad_norm": 3.1899547576904297,
+      "learning_rate": 2.496452124551298e-05,
+      "loss": 0.1319,
+      "step": 24000
+    },
+    {
+      "epoch": 1.5339343851740546,
+      "grad_norm": 3.199150562286377,
+      "learning_rate": 2.4442774855997996e-05,
+      "loss": 0.1298,
+      "step": 24500
+    },
+    {
+      "epoch": 1.5652391685449536,
+      "grad_norm": 5.129565715789795,
+      "learning_rate": 2.3921028466483013e-05,
+      "loss": 0.1217,
+      "step": 25000
+    },
+    {
+      "epoch": 1.5965439519158529,
+      "grad_norm": 4.223311424255371,
+      "learning_rate": 2.339928207696803e-05,
+      "loss": 0.1288,
+      "step": 25500
+    },
+    {
+      "epoch": 1.6278487352867517,
+      "grad_norm": 10.741965293884277,
+      "learning_rate": 2.2877535687453046e-05,
+      "loss": 0.1263,
+      "step": 26000
+    },
+    {
+      "epoch": 1.659153518657651,
+      "grad_norm": 3.0217132568359375,
+      "learning_rate": 2.235578929793806e-05,
+      "loss": 0.122,
+      "step": 26500
+    },
+    {
+      "epoch": 1.69045830202855,
+      "grad_norm": 7.847172737121582,
+      "learning_rate": 2.1835086401202104e-05,
+      "loss": 0.122,
+      "step": 27000
+    },
+    {
+      "epoch": 1.721763085399449,
+      "grad_norm": 9.223713874816895,
+      "learning_rate": 2.1313340011687117e-05,
+      "loss": 0.1266,
+      "step": 27500
+    },
+    {
+      "epoch": 1.7530678687703483,
+      "grad_norm": 2.0706963539123535,
+      "learning_rate": 2.0791593622172137e-05,
+      "loss": 0.1274,
+      "step": 28000
+    },
+    {
+      "epoch": 1.784372652141247,
+      "grad_norm": 3.1475393772125244,
+      "learning_rate": 2.0270890725436182e-05,
+      "loss": 0.1214,
+      "step": 28500
+    },
+    {
+      "epoch": 1.8156774355121463,
+      "grad_norm": 3.7348415851593018,
+      "learning_rate": 1.9749144335921196e-05,
+      "loss": 0.1191,
+      "step": 29000
+    },
+    {
+      "epoch": 1.8469822188830454,
+      "grad_norm": 3.230713129043579,
+      "learning_rate": 1.9227397946406212e-05,
+      "loss": 0.1199,
+      "step": 29500
+    },
+    {
+      "epoch": 1.8782870022539444,
+      "grad_norm": 0.4691683351993561,
+      "learning_rate": 1.8705651556891226e-05,
+      "loss": 0.1176,
+      "step": 30000
+    },
+    {
+      "epoch": 1.9095917856248434,
+      "grad_norm": 4.382262706756592,
+      "learning_rate": 1.8183905167376242e-05,
+      "loss": 0.1176,
+      "step": 30500
+    },
+    {
+      "epoch": 1.9408965689957425,
+      "grad_norm": 9.810182571411133,
+      "learning_rate": 1.7662158777861255e-05,
+      "loss": 0.1083,
+      "step": 31000
+    },
+    {
+      "epoch": 1.9722013523666417,
+      "grad_norm": 8.107538223266602,
+      "learning_rate": 1.7140412388346275e-05,
+      "loss": 0.1103,
+      "step": 31500
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.9478369642628878,
+      "eval_loss": 0.1829579919576645,
+      "eval_runtime": 179.9731,
+      "eval_samples_per_second": 608.547,
+      "eval_steps_per_second": 76.072,
+      "step": 31944
+    },
+    {
+      "epoch": 2.0035061357375405,
+      "grad_norm": 0.5452843308448792,
+      "learning_rate": 1.661866599883129e-05,
+      "loss": 0.1087,
+      "step": 32000
+    },
+    {
+      "epoch": 2.03481091910844,
+      "grad_norm": 1.0569943189620972,
+      "learning_rate": 1.6097963102095334e-05,
+      "loss": 0.0456,
+      "step": 32500
+    },
+    {
+      "epoch": 2.0661157024793386,
+      "grad_norm": 0.22022764384746552,
+      "learning_rate": 1.557621671258035e-05,
+      "loss": 0.0523,
+      "step": 33000
+    },
+    {
+      "epoch": 2.097420485850238,
+      "grad_norm": 9.75222396850586,
+      "learning_rate": 1.5054470323065365e-05,
+      "loss": 0.0492,
+      "step": 33500
+    },
+    {
+      "epoch": 2.128725269221137,
+      "grad_norm": 3.1281306743621826,
+      "learning_rate": 1.453272393355038e-05,
+      "loss": 0.0498,
+      "step": 34000
+    },
+    {
+      "epoch": 2.160030052592036,
+      "grad_norm": 0.012396792881190777,
+      "learning_rate": 1.4012021036814427e-05,
+      "loss": 0.0506,
+      "step": 34500
+    },
+    {
+      "epoch": 2.191334835962935,
+      "grad_norm": 6.527154922485352,
+      "learning_rate": 1.3490274647299442e-05,
+      "loss": 0.0569,
+      "step": 35000
+    },
+    {
+      "epoch": 2.222639619333834,
+      "grad_norm": 3.5429670810699463,
+      "learning_rate": 1.2968528257784457e-05,
+      "loss": 0.0548,
+      "step": 35500
+    },
+    {
+      "epoch": 2.2539444027047333,
+      "grad_norm": 1.333369255065918,
+      "learning_rate": 1.2446781868269472e-05,
+      "loss": 0.0558,
+      "step": 36000
+    },
+    {
+      "epoch": 2.2852491860756325,
+      "grad_norm": 0.10260029882192612,
+      "learning_rate": 1.1926078971533518e-05,
+      "loss": 0.0464,
+      "step": 36500
+    },
+    {
+      "epoch": 2.3165539694465314,
+      "grad_norm": 0.14060164988040924,
+      "learning_rate": 1.1404332582018533e-05,
+      "loss": 0.0515,
+      "step": 37000
+    },
+    {
+      "epoch": 2.3478587528174306,
+      "grad_norm": 1.031032919883728,
+      "learning_rate": 1.0882586192503548e-05,
+      "loss": 0.0448,
+      "step": 37500
+    },
+    {
+      "epoch": 2.3791635361883294,
+      "grad_norm": 0.20121368765830994,
+      "learning_rate": 1.0360839802988565e-05,
+      "loss": 0.0475,
+      "step": 38000
+    },
+    {
+      "epoch": 2.4104683195592287,
+      "grad_norm": 0.06531311571598053,
+      "learning_rate": 9.84013690625261e-06,
+      "loss": 0.0522,
+      "step": 38500
+    },
+    {
+      "epoch": 2.441773102930128,
+      "grad_norm": 0.04498385637998581,
+      "learning_rate": 9.318390516737625e-06,
+      "loss": 0.0434,
+      "step": 39000
+    },
+    {
+      "epoch": 2.4730778863010268,
+      "grad_norm": 0.3482716679573059,
+      "learning_rate": 8.796644127222641e-06,
+      "loss": 0.0468,
+      "step": 39500
+    },
+    {
+      "epoch": 2.504382669671926,
+      "grad_norm": 4.0475053787231445,
+      "learning_rate": 8.274897737707656e-06,
+      "loss": 0.0505,
+      "step": 40000
+    },
+    {
+      "epoch": 2.535687453042825,
+      "grad_norm": 0.6960127353668213,
+      "learning_rate": 7.753151348192671e-06,
+      "loss": 0.0421,
+      "step": 40500
+    },
+    {
+      "epoch": 2.566992236413724,
+      "grad_norm": 0.8902493119239807,
+      "learning_rate": 7.231404958677686e-06,
+      "loss": 0.0451,
+      "step": 41000
+    },
+    {
+      "epoch": 2.5982970197846234,
+      "grad_norm": 0.46462351083755493,
+      "learning_rate": 6.710702061941732e-06,
+      "loss": 0.0522,
+      "step": 41500
+    },
+    {
+      "epoch": 2.629601803155522,
+      "grad_norm": 0.07463126629590988,
+      "learning_rate": 6.1889556724267476e-06,
+      "loss": 0.0468,
+      "step": 42000
+    },
+    {
+      "epoch": 2.660906586526421,
+      "grad_norm": 0.05138092488050461,
+      "learning_rate": 5.6672092829117625e-06,
+      "loss": 0.0429,
+      "step": 42500
+    },
+    {
+      "epoch": 2.6922113698973202,
+      "grad_norm": 0.06017659977078438,
+      "learning_rate": 5.145462893396778e-06,
+      "loss": 0.038,
+      "step": 43000
+    },
+    {
+      "epoch": 2.7235161532682195,
+      "grad_norm": 3.794154405593872,
+      "learning_rate": 4.624759996660823e-06,
+      "loss": 0.0418,
+      "step": 43500
+    },
+    {
+      "epoch": 2.7548209366391183,
+      "grad_norm": 9.929149627685547,
+      "learning_rate": 4.103013607145838e-06,
+      "loss": 0.0418,
+      "step": 44000
+    },
+    {
+      "epoch": 2.7861257200100176,
+      "grad_norm": 0.10156802833080292,
+      "learning_rate": 3.5812672176308544e-06,
+      "loss": 0.0435,
+      "step": 44500
+    },
+    {
+      "epoch": 2.8174305033809164,
+      "grad_norm": 15.590471267700195,
+      "learning_rate": 3.0595208281158697e-06,
+      "loss": 0.039,
+      "step": 45000
+    },
+    {
+      "epoch": 2.8487352867518156,
+      "grad_norm": 0.1026441678404808,
+      "learning_rate": 2.5377744386008846e-06,
+      "loss": 0.0451,
+      "step": 45500
+    },
+    {
+      "epoch": 2.880040070122715,
+      "grad_norm": 0.08782440423965454,
+      "learning_rate": 2.0160280490859004e-06,
+      "loss": 0.0408,
+      "step": 46000
+    },
+    {
+      "epoch": 2.9113448534936137,
+      "grad_norm": 17.5203857421875,
+      "learning_rate": 1.494281659570916e-06,
+      "loss": 0.0372,
+      "step": 46500
+    },
+    {
+      "epoch": 2.942649636864513,
+      "grad_norm": 0.08832889050245285,
+      "learning_rate": 9.735787628349612e-07,
+      "loss": 0.041,
+      "step": 47000
+    },
+    {
+      "epoch": 2.973954420235412,
+      "grad_norm": 10.057083129882812,
+      "learning_rate": 4.518323733199766e-07,
+      "loss": 0.0417,
+      "step": 47500
+    },
+    {
+      "epoch": 3.0,
+      "eval_accuracy": 0.9541735906941071,
+      "eval_loss": 0.2335142344236374,
+      "eval_runtime": 176.4196,
+      "eval_samples_per_second": 620.804,
+      "eval_steps_per_second": 77.605,
+      "step": 47916
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 47916,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 0.0,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}

t5-bc-out/checkpoint-47916/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:705750eb5050da7b859b299363db4324be92a3af2ba4a8530c69e964f52524d7
+size 5176

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,941 @@

+{
+  "add_prefix_space": true,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "28": {
+      "content": "<extra_id_99>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "29": {
+      "content": "<extra_id_98>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "30": {
+      "content": "<extra_id_97>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "31": {
+      "content": "<extra_id_96>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32": {
+      "content": "<extra_id_95>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "33": {
+      "content": "<extra_id_94>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "34": {
+      "content": "<extra_id_93>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "35": {
+      "content": "<extra_id_92>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "36": {
+      "content": "<extra_id_91>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "37": {
+      "content": "<extra_id_90>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "38": {
+      "content": "<extra_id_89>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "39": {
+      "content": "<extra_id_88>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "40": {
+      "content": "<extra_id_87>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "41": {
+      "content": "<extra_id_86>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "42": {
+      "content": "<extra_id_85>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "43": {
+      "content": "<extra_id_84>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "44": {
+      "content": "<extra_id_83>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "45": {
+      "content": "<extra_id_82>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "46": {
+      "content": "<extra_id_81>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "47": {
+      "content": "<extra_id_80>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "48": {
+      "content": "<extra_id_79>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "49": {
+      "content": "<extra_id_78>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "50": {
+      "content": "<extra_id_77>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "51": {
+      "content": "<extra_id_76>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "52": {
+      "content": "<extra_id_75>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "53": {
+      "content": "<extra_id_74>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "54": {
+      "content": "<extra_id_73>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "55": {
+      "content": "<extra_id_72>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "56": {
+      "content": "<extra_id_71>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "57": {
+      "content": "<extra_id_70>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "58": {
+      "content": "<extra_id_69>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "59": {
+      "content": "<extra_id_68>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "60": {
+      "content": "<extra_id_67>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "61": {
+      "content": "<extra_id_66>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "62": {
+      "content": "<extra_id_65>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "63": {
+      "content": "<extra_id_64>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "64": {
+      "content": "<extra_id_63>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "65": {
+      "content": "<extra_id_62>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "66": {
+      "content": "<extra_id_61>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "67": {
+      "content": "<extra_id_60>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "68": {
+      "content": "<extra_id_59>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "69": {
+      "content": "<extra_id_58>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "70": {
+      "content": "<extra_id_57>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "71": {
+      "content": "<extra_id_56>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "72": {
+      "content": "<extra_id_55>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "73": {
+      "content": "<extra_id_54>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "74": {
+      "content": "<extra_id_53>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "75": {
+      "content": "<extra_id_52>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "76": {
+      "content": "<extra_id_51>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "77": {
+      "content": "<extra_id_50>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "78": {
+      "content": "<extra_id_49>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "79": {
+      "content": "<extra_id_48>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "80": {
+      "content": "<extra_id_47>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "81": {
+      "content": "<extra_id_46>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "82": {
+      "content": "<extra_id_45>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "83": {
+      "content": "<extra_id_44>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "84": {
+      "content": "<extra_id_43>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "85": {
+      "content": "<extra_id_42>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "86": {
+      "content": "<extra_id_41>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "87": {
+      "content": "<extra_id_40>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "88": {
+      "content": "<extra_id_39>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "89": {
+      "content": "<extra_id_38>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "90": {
+      "content": "<extra_id_37>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "91": {
+      "content": "<extra_id_36>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "92": {
+      "content": "<extra_id_35>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "93": {
+      "content": "<extra_id_34>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "94": {
+      "content": "<extra_id_33>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "95": {
+      "content": "<extra_id_32>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "96": {
+      "content": "<extra_id_31>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "97": {
+      "content": "<extra_id_30>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "98": {
+      "content": "<extra_id_29>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "99": {
+      "content": "<extra_id_28>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "<extra_id_27>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "<extra_id_26>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "<extra_id_25>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "<extra_id_24>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "104": {
+      "content": "<extra_id_23>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "105": {
+      "content": "<extra_id_22>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "106": {
+      "content": "<extra_id_21>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "107": {
+      "content": "<extra_id_20>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "108": {
+      "content": "<extra_id_19>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "109": {
+      "content": "<extra_id_18>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "110": {
+      "content": "<extra_id_17>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "111": {
+      "content": "<extra_id_16>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "112": {
+      "content": "<extra_id_15>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "113": {
+      "content": "<extra_id_14>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "114": {
+      "content": "<extra_id_13>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "115": {
+      "content": "<extra_id_12>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "116": {
+      "content": "<extra_id_11>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "117": {
+      "content": "<extra_id_10>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "118": {
+      "content": "<extra_id_9>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "119": {
+      "content": "<extra_id_8>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "120": {
+      "content": "<extra_id_7>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "121": {
+      "content": "<extra_id_6>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "122": {
+      "content": "<extra_id_5>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "123": {
+      "content": "<extra_id_4>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "124": {
+      "content": "<extra_id_3>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "125": {
+      "content": "<extra_id_2>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "126": {
+      "content": "<extra_id_1>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "127": {
+      "content": "<extra_id_0>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [
+    "<extra_id_0>",
+    "<extra_id_1>",
+    "<extra_id_2>",
+    "<extra_id_3>",
+    "<extra_id_4>",
+    "<extra_id_5>",
+    "<extra_id_6>",
+    "<extra_id_7>",
+    "<extra_id_8>",
+    "<extra_id_9>",
+    "<extra_id_10>",
+    "<extra_id_11>",
+    "<extra_id_12>",
+    "<extra_id_13>",
+    "<extra_id_14>",
+    "<extra_id_15>",
+    "<extra_id_16>",
+    "<extra_id_17>",
+    "<extra_id_18>",
+    "<extra_id_19>",
+    "<extra_id_20>",
+    "<extra_id_21>",
+    "<extra_id_22>",
+    "<extra_id_23>",
+    "<extra_id_24>",
+    "<extra_id_25>",
+    "<extra_id_26>",
+    "<extra_id_27>",
+    "<extra_id_28>",
+    "<extra_id_29>",
+    "<extra_id_30>",
+    "<extra_id_31>",
+    "<extra_id_32>",
+    "<extra_id_33>",
+    "<extra_id_34>",
+    "<extra_id_35>",
+    "<extra_id_36>",
+    "<extra_id_37>",
+    "<extra_id_38>",
+    "<extra_id_39>",
+    "<extra_id_40>",
+    "<extra_id_41>",
+    "<extra_id_42>",
+    "<extra_id_43>",
+    "<extra_id_44>",
+    "<extra_id_45>",
+    "<extra_id_46>",
+    "<extra_id_47>",
+    "<extra_id_48>",
+    "<extra_id_49>",
+    "<extra_id_50>",
+    "<extra_id_51>",
+    "<extra_id_52>",
+    "<extra_id_53>",
+    "<extra_id_54>",
+    "<extra_id_55>",
+    "<extra_id_56>",
+    "<extra_id_57>",
+    "<extra_id_58>",
+    "<extra_id_59>",
+    "<extra_id_60>",
+    "<extra_id_61>",
+    "<extra_id_62>",
+    "<extra_id_63>",
+    "<extra_id_64>",
+    "<extra_id_65>",
+    "<extra_id_66>",
+    "<extra_id_67>",
+    "<extra_id_68>",
+    "<extra_id_69>",
+    "<extra_id_70>",
+    "<extra_id_71>",
+    "<extra_id_72>",
+    "<extra_id_73>",
+    "<extra_id_74>",
+    "<extra_id_75>",
+    "<extra_id_76>",
+    "<extra_id_77>",
+    "<extra_id_78>",
+    "<extra_id_79>",
+    "<extra_id_80>",
+    "<extra_id_81>",
+    "<extra_id_82>",
+    "<extra_id_83>",
+    "<extra_id_84>",
+    "<extra_id_85>",
+    "<extra_id_86>",
+    "<extra_id_87>",
+    "<extra_id_88>",
+    "<extra_id_89>",
+    "<extra_id_90>",
+    "<extra_id_91>",
+    "<extra_id_92>",
+    "<extra_id_93>",
+    "<extra_id_94>",
+    "<extra_id_95>",
+    "<extra_id_96>",
+    "<extra_id_97>",
+    "<extra_id_98>",
+    "<extra_id_99>"
+  ],
+  "clean_up_tokenization_spaces": false,
+  "do_lower_case": false,
+  "eos_token": "</s>",
+  "extra_ids": 100,
+  "legacy": true,
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "<pad>",
+  "sp_model_kwargs": {},
+  "tokenizer_class": "T5Tokenizer",
+  "unk_token": "<unk>"
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:705750eb5050da7b859b299363db4324be92a3af2ba4a8530c69e964f52524d7
+size 5176

wandb/debug-internal.log ADDED Viewed

	@@ -0,0 +1,21 @@

+{"time":"2025-05-04T17:25:03.375857654+03:00","level":"INFO","msg":"using version","core version":"0.18.7"}
+{"time":"2025-05-04T17:25:03.375905253+03:00","level":"INFO","msg":"created symlink","path":"/arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_172503-0ictlmwf/logs/debug-core.log"}
+{"time":"2025-05-04T17:25:03.501241143+03:00","level":"INFO","msg":"created new stream","id":"0ictlmwf"}
+{"time":"2025-05-04T17:25:03.501294637+03:00","level":"INFO","msg":"stream: started","id":"0ictlmwf"}
+{"time":"2025-05-04T17:25:03.501448652+03:00","level":"INFO","msg":"writer: Do: started","stream_id":"0ictlmwf"}
+{"time":"2025-05-04T17:25:03.501451145+03:00","level":"INFO","msg":"handler: started","stream_id":"0ictlmwf"}
+{"time":"2025-05-04T17:25:03.501574427+03:00","level":"INFO","msg":"sender: started","stream_id":"0ictlmwf"}
+{"time":"2025-05-04T17:25:03.865922055+03:00","level":"INFO","msg":"Starting system monitor"}
+{"time":"2025-05-04T22:47:43.191425732+03:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/isikz/finetuning-bc-protT5/0ictlmwf/file_stream\": dial tcp 35.186.228.49:443: connect: connection timed out"}
+{"time":"2025-05-05T00:01:47.351449692+03:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/isikz/finetuning-bc-protT5/0ictlmwf/file_stream\": dial tcp 35.186.228.49:443: connect: connection timed out"}
+{"time":"2025-05-05T00:49:32.57779148+03:00","level":"INFO","msg":"stream: closing","id":"0ictlmwf"}
+{"time":"2025-05-05T00:49:32.577842715+03:00","level":"INFO","msg":"Stopping system monitor"}
+{"time":"2025-05-05T00:49:32.578849729+03:00","level":"INFO","msg":"Stopped system monitor"}
+{"time":"2025-05-05T00:49:32.781968337+03:00","level":"WARN","msg":"No job ingredients found, not creating job artifact"}
+{"time":"2025-05-05T00:49:32.781997123+03:00","level":"WARN","msg":"No source type found, not creating job artifact"}
+{"time":"2025-05-05T00:49:32.782008311+03:00","level":"INFO","msg":"sender: sendDefer: no job artifact to save"}
+{"time":"2025-05-05T00:49:33.357099059+03:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-05-05T00:49:33.741524339+03:00","level":"INFO","msg":"handler: closed","stream_id":"0ictlmwf"}
+{"time":"2025-05-05T00:49:33.741583153+03:00","level":"INFO","msg":"writer: Close: closed","stream_id":"0ictlmwf"}
+{"time":"2025-05-05T00:49:33.741593811+03:00","level":"INFO","msg":"sender: closed","stream_id":"0ictlmwf"}
+{"time":"2025-05-05T00:49:33.741652369+03:00","level":"INFO","msg":"stream: closed","id":"0ictlmwf"}

wandb/debug.log ADDED Viewed

	@@ -0,0 +1,27 @@

+2025-05-04 17:25:03,365 INFO    MainThread:3189710 [wandb_setup.py:_flush():79] Current SDK version is 0.18.7
+2025-05-04 17:25:03,365 INFO    MainThread:3189710 [wandb_setup.py:_flush():79] Configure stats pid to 3189710
+2025-05-04 17:25:03,365 INFO    MainThread:3189710 [wandb_setup.py:_flush():79] Loading settings from /arf/home/zisik/.config/wandb/settings
+2025-05-04 17:25:03,365 INFO    MainThread:3189710 [wandb_setup.py:_flush():79] Loading settings from /arf/scratch/zisik/prott5_bc_ft/wandb/settings
+2025-05-04 17:25:03,365 INFO    MainThread:3189710 [wandb_setup.py:_flush():79] Loading settings from environment variables: {}
+2025-05-04 17:25:03,365 INFO    MainThread:3189710 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': 'finetuning_bc_prott5.py', 'program_abspath': '/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py', 'program': '/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py'}
+2025-05-04 17:25:03,366 INFO    MainThread:3189710 [wandb_setup.py:_flush():79] Applying login settings: {}
+2025-05-04 17:25:03,366 INFO    MainThread:3189710 [wandb_setup.py:_flush():79] Applying login settings: {}
+2025-05-04 17:25:03,366 INFO    MainThread:3189710 [wandb_init.py:_log_setup():533] Logging user logs to /arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_172503-0ictlmwf/logs/debug.log
+2025-05-04 17:25:03,366 INFO    MainThread:3189710 [wandb_init.py:_log_setup():534] Logging internal logs to /arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_172503-0ictlmwf/logs/debug-internal.log
+2025-05-04 17:25:03,366 INFO    MainThread:3189710 [wandb_init.py:init():619] calling init triggers
+2025-05-04 17:25:03,366 INFO    MainThread:3189710 [wandb_init.py:init():626] wandb.init called with sweep_config: {}
+config: {}
+2025-05-04 17:25:03,366 INFO    MainThread:3189710 [wandb_init.py:init():669] starting backend
+2025-05-04 17:25:03,366 INFO    MainThread:3189710 [wandb_init.py:init():673] sending inform_init request
+2025-05-04 17:25:03,371 INFO    MainThread:3189710 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2025-05-04 17:25:03,371 INFO    MainThread:3189710 [wandb_init.py:init():686] backend started and connected
+2025-05-04 17:25:03,379 INFO    MainThread:3189710 [wandb_init.py:init():781] updated telemetry
+2025-05-04 17:25:03,382 INFO    MainThread:3189710 [wandb_init.py:init():814] communicating run to backend with 90.0 second timeout
+2025-05-04 17:25:03,852 INFO    MainThread:3189710 [wandb_init.py:init():867] starting run threads in backend
+2025-05-04 17:25:05,277 INFO    MainThread:3189710 [wandb_run.py:_console_start():2456] atexit reg
+2025-05-04 17:25:05,278 INFO    MainThread:3189710 [wandb_run.py:_redirect():2305] redirect: wrap_raw
+2025-05-04 17:25:05,278 INFO    MainThread:3189710 [wandb_run.py:_redirect():2370] Wrapping output streams.
+2025-05-04 17:25:05,278 INFO    MainThread:3189710 [wandb_run.py:_redirect():2395] Redirects installed.
+2025-05-04 17:25:05,283 INFO    MainThread:3189710 [wandb_init.py:init():911] run started, returning control to user process
+2025-05-04 17:25:53,069 INFO    MainThread:3189710 [wandb_run.py:_config_callback():1387] config_cb None None {'output_dir': 't5-bc-out', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': 't5-bc-out/runs/May04_17-25-43_kolyoz1', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': False, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 't5-bc-out', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'loss', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'epoch', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False}
+2025-05-05 00:49:32,578 WARNING MsgRouterThr:3189710 [router.py:message_loop():75] message_loop has been closed

wandb/run-20250504_132610-pxg645u5/files/config.yaml ADDED Viewed

	@@ -0,0 +1,44 @@

+_wandb:
+    value:
+        cli_version: 0.18.7
+        m: []
+        python_version: 3.10.15
+        t:
+            "1":
+                - 1
+                - 2
+                - 3
+                - 5
+                - 11
+                - 12
+                - 49
+                - 51
+                - 53
+                - 55
+                - 71
+                - 98
+                - 105
+            "2":
+                - 1
+                - 2
+                - 3
+                - 5
+                - 11
+                - 12
+                - 49
+                - 51
+                - 53
+                - 55
+                - 71
+                - 98
+                - 105
+            "3":
+                - 23
+                - 55
+            "4": 3.10.15
+            "5": 0.18.7
+            "6": 4.45.2
+            "8":
+                - 5
+            "12": 0.18.7
+            "13": linux-x86_64

wandb/run-20250504_132610-pxg645u5/files/output.log ADDED Viewed

	@@ -0,0 +1,37 @@

+You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
+Traceback (most recent call last):
+  File "/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py", line 45, in <module>
+    train_ds = load_dataset("json", data_files={"train": "-"},
+  File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/datasets/load.py", line 2132, in load_dataset
+    builder_instance = load_dataset_builder(
+  File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/datasets/load.py", line 1853, in load_dataset_builder
+    dataset_module = dataset_module_factory(
+  File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/datasets/load.py", line 1562, in dataset_module_factory
+    ).get_module()
+  File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/datasets/load.py", line 942, in get_module
+    data_files = DataFilesDict.from_patterns(
+  File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/datasets/data_files.py", line 721, in from_patterns
+    else DataFilesList.from_patterns(
+  File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/datasets/data_files.py", line 624, in from_patterns
+    resolve_pattern(
+  File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/datasets/data_files.py", line 411, in resolve_pattern
+    raise FileNotFoundError(error_msg)
+FileNotFoundError: Unable to find '/arf/scratch/zisik/prott5_bc_ft/-'
+Traceback (most recent call last):
+  File "/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py", line 45, in <module>
+    train_ds = load_dataset("json", data_files={"train": "-"},
+  File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/datasets/load.py", line 2132, in load_dataset
+    builder_instance = load_dataset_builder(
+  File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/datasets/load.py", line 1853, in load_dataset_builder
+    dataset_module = dataset_module_factory(
+  File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/datasets/load.py", line 1562, in dataset_module_factory
+    ).get_module()
+  File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/datasets/load.py", line 942, in get_module
+    data_files = DataFilesDict.from_patterns(
+  File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/datasets/data_files.py", line 721, in from_patterns
+    else DataFilesList.from_patterns(
+  File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/datasets/data_files.py", line 624, in from_patterns
+    resolve_pattern(
+  File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/datasets/data_files.py", line 411, in resolve_pattern
+    raise FileNotFoundError(error_msg)
+FileNotFoundError: Unable to find '/arf/scratch/zisik/prott5_bc_ft/-'

wandb/run-20250504_132610-pxg645u5/files/requirements.txt ADDED Viewed

	@@ -0,0 +1,541 @@

+nvidia-cuda-cupti-cu12==12.4.127
+nvidia-cuda-nvrtc-cu12==12.4.127
+pyg-lib==0.4.0+pt20cu117
+biopython==1.85
+iniconfig==2.0.0
+tokenizers==0.20.0
+accelerate==1.3.0
+torch==2.6.0
+nvidia-nccl-cu12==2.21.5
+transformers==4.45.2
+nvidia-cusparse-cu12==12.3.1.170
+torch-scatter==2.1.2+pt20cu117
+nvidia-cusparselt-cu12==0.6.2
+nvidia-nvtx-cu12==12.4.127
+zstd==1.5.6.6
+fair-esm==2.0.0
+omegaconf==2.3.0
+pluggy==1.5.0
+pytest==8.3.5
+nvidia-curand-cu12==10.3.5.147
+nvidia-cufft-cu12==11.2.1.3
+torch-cluster==1.6.3+pt20cu117
+regex==2024.9.11
+nvidia-cudnn-cu12==9.1.0.70
+torch-spline-conv==1.2.2+pt20cu117
+nvidia-cusolver-cu12==11.6.1.9
+antlr4-python3-runtime==4.9.3
+msgpack-numpy==0.4.8
+nlp==0.2.0
+einops==0.8.1
+nvidia-cublas-cu12==12.4.5.8
+triton==3.2.0
+ninja==1.11.1.3
+hydra-core==1.3.2
+nvidia-nvjitlink-cu12==12.4.127
+biotite==0.41.2
+torch-sparse==0.6.18+pt20cu117
+esm==3.1.4
+sympy==1.13.1
+nvidia-cuda-runtime-cu12==12.4.127
+jupyter-lsp==2.2.5
+jupyter-events==0.10.0
+ipykernel==6.29.5
+Mako==1.3.5
+proto-plus==1.25.0
+fst-pso==1.8.1
+gensim==4.3.3
+htmlmin==0.1.12
+tokenizers==0.13.3
+timm==1.0.11
+MarkupSafe==3.0.2
+safetensors==0.4.5
+requests==2.32.3
+gast==0.5.5
+cuml==24.12.0a33
+jaxlib==0.4.23.dev20240214
+spacy-loggers==1.0.5
+pytz==2024.1
+idna==3.10
+python-dateutil==2.9.0
+mdurl==0.1.2
+blis==0.7.10
+jupyter==1.1.1
+pyerfa==2.0.1.5
+comm==0.2.2
+pygraphviz==1.14
+dill==0.3.8
+paramiko==3.5.0
+llama-index==0.8.36
+mdit-py-plugins==0.4.2
+Werkzeug==3.1.3
+pyu2f==0.1.5
+dask-glm==0.2.0
+httpx==0.27.2
+typeguard==4.4.1
+mypy-extensions==1.0.0
+kmodes==0.12.2
+keras==2.15.0
+ydata-profiling==0.0.dev0
+regex==2024.11.6
+xarray==2024.11.0
+setuptools==75.3.0
+charset-normalizer==3.4.0
+jupyterlab_nvdashboard==0.11.0
+pylibraft==24.12.0a36
+spacy==3.7.6
+mlflow-skinny==2.17.2
+nvtx==0.2.10
+multimethod==1.12
+pexpect==4.9.0
+torch==2.1.0.post301
+flatbuffers==24.3.25
+python-json-logger==2.0.7
+PyJWT==2.9.0
+multiprocess==0.70.16
+colorlover==0.3.0
+yarl==1.16.0
+locket==1.0.0
+patsy==1.0.0
+rapids-dask-dependency==24.12.0a0
+stanza==1.9.2
+debugpy==1.8.8
+jupyterlab_pygments==0.3.0
+pylibcudf==24.12.0a337
+lz4==4.3.3
+pandas==2.2.3
+tifffile==2024.9.20
+pynvml==11.4.1
+cufflinks==0.17.3
+ipywidgets==8.1.5
+requests-oauthlib==2.0.0
+google-auth-oauthlib==1.2.1
+rsa==4.9
+webcolors==24.8.0
+jsonschema-specifications==2024.10.1
+scikit-learn==1.5.2
+langchain-text-splitters==0.3.2
+pandas-datareader==0.10.0
+tomli==2.0.2
+tzdata==2024.2
+scikit-image==0.24.0
+tensorboard_data_server==0.7.0
+kiwisolver==1.4.7
+cloudpathlib==0.20.0
+isodate==0.6.1
+adversarial-robustness-toolbox==1.19.1
+SQLAlchemy==2.0.36
+pytest-runner==6.0.0
+pycairo==1.27.0
+treelite==4.3.0
+jiter==0.7.0
+threadpoolctl==3.5.0
+pandocfilters==1.5.0
+loguru==0.7.2
+smart_open==7.0.5
+shellingham==1.5.4
+deepspeed==0.15.4
+prompt_toolkit==3.0.48
+databricks-sdk==0.34.0
+langchain-core==0.3.15
+imageio==2.36.0
+openapi-schema-pydantic==1.2.4
+zict==3.0.0
+cachetools==5.5.0
+colorful==0.5.6
+mpmath==1.3.0
+nest_asyncio==1.6.0
+pyFUME==0.2.25
+opencv-python-headless==4.9.0
+fastai==2.7.18
+importlib_resources==6.4.5
+binaryornot==0.4.4
+evaluate==0.4.1
+matplotlib-inline==0.1.7
+wasabi==1.1.2
+pycparser==2.22
+GitPython==3.1.43
+pluggy==1.5.0
+async-lru==2.0.4
+pgmpy==0.1.24
+anyio==4.4.0
+executing==2.1.0
+orjson==3.10.11
+humanfriendly==10.0
+tornado==6.4.1
+gmpy2==2.1.5
+rlPyCairo==0.2.0
+distributed==2024.11.0
+FuzzyTM==2.0.5
+torchtext==0.15.2a0+5ce3163
+pytest==8.3.5
+pyod==2.0.2
+ImageHash==4.3.1
+soupsieve==2.5
+tblib==3.0.0
+emoji==2.14.0
+aiohappyeyeballs==2.4.3
+uri-template==1.3.0
+tensorflow_estimator==2.15.0
+babel==2.16.0
+dask-cuda==24.12.0a12
+overrides==7.7.0
+opencensus==0.11.3
+openai==0.28.1
+language_data==1.2.0
+jedi==0.19.2
+cookiecutter==2.6.0
+entrypoints==0.4
+exceptiongroup==1.2.2
+marisa-trie==1.2.0
+uvloop==0.20.0
+aiosignal==1.3.1
+Flask==3.0.3
+tensorboard==2.15.2
+cffi==1.17.1
+tf_keras==2.15.0
+absl-py==2.1.0
+blinker==1.9.0
+types-python-dateutil==2.9.0.20241003
+opencv-python==4.9.0
+frozendict==2.4.6
+aiohttp-cors==0.7.0
+statsmodels==0.14.4
+tinycss2==1.4.0
+terminado==0.18.1
+pycaret==2.2.3
+aiohttp==3.10.10
+distributed-ucxx==0.41.0
+prometheus_client==0.21.0
+fastdownload==0.0.7
+grpcio==1.59.3
+google-api-core==2.22.0
+jupyterlab_widgets==3.0.13
+appdirs==1.4.4
+littleutils==0.0.0
+ray==2.24.0
+kaggle==1.6.17
+jsonschema==4.23.0
+google-auth==2.36.0
+scikit-base==0.11.0
+visions==0.7.6
+pyarrow==15.0.0
+transformers==4.33.0
+prometheus_flask_exporter==0.23.1
+dm-tree==0.1.8
+colorama==0.4.6
+requests-toolbelt==1.0.0
+cached-property==1.5.2
+cymem==2.0.8
+PyNaCl==1.5.0
+PyWavelets==1.7.0
+httptools==0.6.1
+typing-utils==0.1.0
+email_validator==2.2.0
+marshmallow==3.23.1
+Deprecated==1.2.14
+virtualenv==20.4.7
+optuna==3.6.1
+jupyter_server==2.14.2
+termcolor==2.5.0
+mpi4py==4.0.1
+torchdata==0.7.1+8cea82f
+dataclasses==0.8
+cloudpickle==3.1.0
+tree_sitter_languages==1.10.2
+tabulate==0.9.0
+ipython==8.29.0
+lightgbm==4.3.0
+captum==0.6.0
+confuse==2.0.1
+torchvision==0.16.1+adc3221
+lxml==4.9.4
+fastapi==0.115.4
+python-multipart==0.0.17
+dnspython==2.7.0
+jupyter-console==6.6.3
+preshed==3.0.9
+py-cpuinfo==9.0.0
+Send2Trash==1.8.3
+murmurhash==1.0.10
+sniffio==1.3.1
+websockets==13.1
+h11==0.14.0
+smmap==5.0.0
+textual==0.85.2
+jsonpatch==1.33
+opencensus-context==0.1.3
+nbconvert==7.16.4
+sentry-sdk==2.19.0
+opentelemetry-semantic-conventions==0.37b0
+pandas-profiling==2.8.0
+pillow==10.3.0
+peft==0.13.2
+rpds-py==0.21.0
+bokeh==3.6.1
+distro==1.9.0
+itsdangerous==2.2.0
+wandb==0.18.7
+jsonpointer==3.0.0
+astropy-iers-data==0.2024.11.11.0.32.38
+horovod==0.28.1
+graphviz==0.20.3
+vtk==9.3.1
+bleach==6.2.0
+numexpr==2.8.7
+pydantic_core==2.23.4
+Jinja2==3.1.4
+widgetsnbextension==4.0.13
+filelock==3.16.1
+catboost==1.2.7
+raft-dask==24.12.0a36
+async-timeout==4.0.3
+datefinder==0.7.3
+coloredlogs==15.0.1
+platformdirs==4.3.6
+spacy-legacy==3.0.12
+chardet==5.2.0
+jupyter_client==8.6.3
+importlib_metadata==8.5.0
+rfc3986-validator==0.1.1
+huggingface_hub==0.26.2
+PySocks==1.7.1
+mlxtend==0.23.2
+outdated==0.2.2
+partd==1.4.2
+thinc==8.2.5
+astropy==6.1.6
+rdflib==6.3.2
+h2==4.1.0
+typer==0.13.0
+xyzservices==2024.9.0
+toolz==0.12.1
+frozenlist==1.5.0
+rdkit==2024.9.2
+pyasn1==0.6.1
+jupyter_server_terminals==0.5.3
+ucx-py==0.41.0a11
+astunparse==1.6.3
+simpful==2.12.0
+notebook_shim==0.2.4
+scipy==1.13.1
+colorlog==6.9.0
+tiktoken==0.3.3
+plotly==5.24.1
+fastrlock==0.8.2
+chart-studio==1.1.0
+stack-data==0.6.2
+google-pasta==0.2.0
+sktime==0.34.0
+PyYAML==6.0.2
+sympy==1.13.3
+multidict==6.1.0
+ml-dtypes==0.2.0
+tensorboardX==2.6.2.2
+decorator==5.1.1
+cytoolz==1.0.0
+ase==3.23.0
+isoduration==20.11.0
+html5lib==1.1
+langsmith==0.1.142
+future==1.0.0
+onnx2torch==1.5.15
+multipledispatch==0.6.0
+protobuf==4.24.4
+ucxx==0.41.0
+pandas_flavor==0.6.0
+msgpack==1.1.0
+pyasn1_modules==0.4.1
+imagecodecs==2024.1.1
+mlflow==2.17.2
+watchfiles==0.24.0
+dm-sonnet==2.0.2
+langcodes==3.4.1
+freetype-py==2.3.0
+argon2-cffi-bindings==21.2.0
+trimesh==4.5.2
+opt_einsum==3.4.0
+tenacity==8.5.0
+h5py==3.12.1
+fastapi-cli==0.0.5
+oauthlib==3.2.2
+parso==0.8.4
+weasel==0.4.1
+yfinance==0.2.49
+networkx==2.8.8
+bitsandbytes==0.44.1
+lazy_loader==0.4
+querystring_parser==1.2.4
+contourpy==1.3.0
+unicodedata2==15.1.0
+bcrypt==4.2.0
+munkres==1.1.4
+langchain==0.0.298
+hpack==4.0.0
+cryptography==43.0.3
+umap-learn==0.5.7
+arrow==1.3.0
+docker==7.1.0
+certifi==2025.1.31
+fastjsonschema==2.20.0
+tensorflow==2.15.0
+googleapis-common-protos==1.65.0
+iniconfig==2.0.0
+Markdown==3.6
+llvmlite==0.43.0
+wslink==2.3.2
+attrs==24.2.0
+rich==13.9.4
+cupy==13.3.0
+uc-micro-py==1.0.3
+alembic==1.14.0
+joblib==1.4.2
+reportlab==4.2.5
+miniful==0.0.6
+jupyter_core==5.7.2
+wheel==0.45.0
+phik==0.12.3
+mistune==3.0.2
+wcwidth==0.2.13
+dacite==1.8.1
+accelerate==0.22.0
+sacremoses==0.0.53
+revtok==0.0.3
+python-slugify==8.0.4
+tangled-up-in-unicode==0.2.0
+dask==2024.11.0
+markdown-it-py==3.0.0
+sentencepiece==0.1.99
+beautifulsoup4==4.12.3
+six==1.16.0
+numba-cuda==0.0.17
+argon2-cffi==23.1.0
+xxhash==3.5.0
+hjson==3.1.0
+fonttools==4.54.1
+graphql-core==3.2.5
+pyparsing==3.2.0
+pure_eval==0.2.3
+distlib==0.3.9
+lightning==2.4.0
+wordcloud==0.0.0
+catalogue==2.0.10
+jax==0.4.27
+tree-sitter==0.23.2
+notebook==7.2.2
+dataclasses-json==0.6.7
+propcache==0.2.0
+numba==0.60.0
+dask-expr==1.1.17
+pydantic==2.9.2
+gunicorn==22.0.0
+missingno==0.5.2
+pyOpenSSL==24.2.1
+openpyxl==3.1.5
+packaging==24.1
+python-dotenv==1.0.1
+cycler==0.12.1
+types-pytz==2024.2.0.20241003
+yellowbrick==1.5
+referencing==0.35.1
+pyLDAvis==3.4.1
+lazypredict==0.2.16
+fqdn==1.5.1
+websocket-client==1.8.0
+fastcore==1.7.19
+pynvjitlink-cu12==0.3.0
+pingouin==0.5.5
+numpy==1.26.4
+typing-inspect==0.9.0
+nltk==3.9.1
+onnxruntime==1.19.2
+tensorflow-probability==0.23.0
+datasets==3.0.2
+pickleshare==0.7.5
+peewee==3.17.7
+torch-geometric==2.6.1
+ptyprocess==0.7.0
+greenlet==3.1.1
+graphql-relay==3.2.0
+graphene==3.4.3
+et_xmlfile==2.0.0
+webencodings==0.5.1
+hyperframe==6.0.1
+multitasking==0.0.9
+typer-slim==0.13.0
+onnx==1.15.0
+uvicorn==0.32.0
+memray==1.13.4
+xgboost==2.1.2
+Brotli==1.1.0
+zipp==3.21.0
+nbformat==5.10.4
+responses==0.18.0
+funcy==2.0
+Pygments==2.18.0
+tqdm==4.67.0
+linkify-it-py==2.0.3
+srsly==2.4.8
+cuda-python==12.6.0
+lightning-utilities==0.11.8
+cudf==24.12.0a337
+dask-ml==2024.4.4
+docker-pycreds==0.4.0
+pkgutil_resolve_name==1.3.10
+opentelemetry-api==1.16.0
+fsspec==2024.9.0
+nbclient==0.10.0
+psutil==5.9.8
+pytorch-lightning==2.4.0
+sortedcontainers==2.4.0
+matplotlib==3.9.2
+defusedxml==0.7.1
+urllib3==1.26.19
+jupyterlab_server==2.27.3
+retrying==1.3.3
+dask-cudf==24.12.0a337
+sqlparse==0.5.1
+text-unidecode==1.3
+seaborn==0.13.2
+typing_extensions==4.12.2
+pyzmq==26.2.0
+rfc3339-validator==0.1.4
+pynndescent==0.5.13
+pip==24.3.1
+confection==0.1.4
+wrapt==1.14.1
+fastprogress==1.0.3
+traitlets==5.14.3
+asttokens==2.4.1
+json5==0.9.28
+pandas-stubs==2.2.3.241126
+torchmetrics==1.2.1
+gitdb==4.0.11
+annotated-types==0.7.0
+ipython-autotime==0.1
+httpcore==1.0.6
+click==8.1.7
+setproctitle==1.3.3
+starlette==0.41.2
+jupyterlab==4.2.5
+rmm==24.12.0a27
+opentelemetry-sdk==1.16.0
+textblob==0.15.3
+imbalanced-learn==0.12.4
+typeguard==4.3.0
+more-itertools==10.3.0
+zipp==3.19.2
+autocommand==2.2.2
+jaraco.context==5.3.0
+packaging==24.1
+importlib_metadata==8.0.0
+platformdirs==4.2.2
+jaraco.functools==4.0.1
+importlib_resources==6.4.0
+tomli==2.0.1
+jaraco.text==3.12.1
+wheel==0.43.0
+jaraco.collections==5.1.0
+typing_extensions==4.12.2
+inflect==7.3.1
+backports.tarfile==1.2.0

wandb/run-20250504_132610-pxg645u5/files/wandb-metadata.json ADDED Viewed

	@@ -0,0 +1,77 @@

+{
+  "os":  "Linux-5.14.0-427.13.1.el9_4.x86_64-x86_64-with-glibc2.34",
+  "python":  "3.10.15",
+  "startedAt":  "2025-05-04T10:26:10.053836Z",
+  "program":  "/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py",
+  "codePath":  "finetuning_bc_prott5.py",
+  "email":  "zeynep.isik1@sabanciuniv.edu",
+  "root":  "/arf/scratch/zisik/prott5_bc_ft",
+  "host":  "kolyoz1",
+  "username":  "zisik",
+  "executable":  "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/bin/python3",
+  "codePathLocal":  "finetuning_bc_prott5.py",
+  "cpu_count":  64,
+  "cpu_count_logical":  64,
+  "gpu":  "NVIDIA H100 80GB HBM3",
+  "gpu_count":  1,
+  "disk":  {
+    "/":  {
+      "total":  "7643995308032",
+      "used":  "274767593472"
+    }
+  },
+  "memory":  {
+    "total":  "1081373220864"
+  },
+  "cpu":  {
+    "count":  64,
+    "countLogical":  64
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA H100 80GB HBM3",
+      "memoryTotal":  "85520809984",
+      "cudaCores":  16896,
+      "architecture":  "Hopper"
+    }
+  ],
+  "slurm":  {
+    "cluster_name":  "cuda",
+    "conf":  "/etc/slurm/slurm.conf",
+    "cpus_on_node":  "16",
+    "cpus_per_task":  "16",
+    "gpus_on_node":  "1",
+    "gtids":  "0",
+    "job_account":  "tbag154",
+    "job_cpus_per_node":  "16",
+    "job_end_time":  "1746613538",
+    "job_gid":  "11636",
+    "job_gpus":  "1",
+    "job_id":  "1027932",
+    "job_name":  "msa_ph_pt",
+    "job_nodelist":  "kolyoz1",
+    "job_num_nodes":  "1",
+    "job_partition":  "kolyoz-cuda",
+    "job_qos":  "tbag",
+    "job_start_time":  "1746354338",
+    "job_uid":  "11636",
+    "job_user":  "zisik",
+    "jobid":  "1027932",
+    "localid":  "0",
+    "mem_per_cpu":  "14000",
+    "nnodes":  "1",
+    "node_aliases":  "(null)",
+    "nodeid":  "0",
+    "nodelist":  "kolyoz1",
+    "prio_process":  "0",
+    "procid":  "0",
+    "submit_dir":  "/arf/scratch/zisik",
+    "submit_host":  "cuda-ui",
+    "task_pid":  "3156950",
+    "tasks_per_node":  "1",
+    "topology_addr":  "kolyoz1",
+    "topology_addr_pattern":  "node",
+    "working_cluster":  "cuda:slurmcontroller3.ib:6800:9984:109"
+  },
+  "cudaVersion":  "12.6"
+}

wandb/run-20250504_132610-pxg645u5/files/wandb-summary.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"_wandb":{"runtime":6}}

wandb/run-20250504_132610-pxg645u5/logs/debug-core.log ADDED Viewed

	@@ -0,0 +1,14 @@

+{"time":"2025-05-04T13:26:09.392354119+03:00","level":"INFO","msg":"started logging, with flags","port-filename":"/tmp/tmppack6571/port-3156976.txt","pid":3156976,"debug":false,"disable-analytics":false}
+{"time":"2025-05-04T13:26:09.392402628+03:00","level":"INFO","msg":"FeatureState","shutdownOnParentExitEnabled":false}
+{"time":"2025-05-04T13:26:09.393200765+03:00","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":36685,"Zone":""}}
+{"time":"2025-05-04T13:26:09.393299078+03:00","level":"INFO","msg":"Will exit if parent process dies.","ppid":3156976}
+{"time":"2025-05-04T13:26:09.570123715+03:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:37852"}
+{"time":"2025-05-04T13:26:10.055349971+03:00","level":"INFO","msg":"handleInformInit: received","streamId":"pxg645u5","id":"127.0.0.1:37852"}
+{"time":"2025-05-04T13:26:10.180212249+03:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"pxg645u5","id":"127.0.0.1:37852"}
+{"time":"2025-05-04T13:26:16.993053475+03:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:37852"}
+{"time":"2025-05-04T13:26:16.994546738+03:00","level":"INFO","msg":"server is shutting down"}
+{"time":"2025-05-04T13:26:16.993862146+03:00","level":"INFO","msg":"connection: Close: initiating connection closure","id":"127.0.0.1:37852"}
+{"time":"2025-05-04T13:26:16.994899765+03:00","level":"INFO","msg":"connection: Close: connection successfully closed","id":"127.0.0.1:37852"}
+{"time":"2025-05-04T13:26:17.953982632+03:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:37852"}
+{"time":"2025-05-04T13:26:17.954000039+03:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:37852"}
+{"time":"2025-05-04T13:26:17.954015604+03:00","level":"INFO","msg":"server is closed"}

wandb/run-20250504_132610-pxg645u5/logs/debug-internal.log ADDED Viewed

	@@ -0,0 +1,19 @@

+{"time":"2025-05-04T13:26:10.056874799+03:00","level":"INFO","msg":"using version","core version":"0.18.7"}
+{"time":"2025-05-04T13:26:10.056920353+03:00","level":"INFO","msg":"created symlink","path":"/arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_132610-pxg645u5/logs/debug-core.log"}
+{"time":"2025-05-04T13:26:10.180146537+03:00","level":"INFO","msg":"created new stream","id":"pxg645u5"}
+{"time":"2025-05-04T13:26:10.180200098+03:00","level":"INFO","msg":"stream: started","id":"pxg645u5"}
+{"time":"2025-05-04T13:26:10.180372555+03:00","level":"INFO","msg":"writer: Do: started","stream_id":"pxg645u5"}
+{"time":"2025-05-04T13:26:10.180478207+03:00","level":"INFO","msg":"sender: started","stream_id":"pxg645u5"}
+{"time":"2025-05-04T13:26:10.18057531+03:00","level":"INFO","msg":"handler: started","stream_id":"pxg645u5"}
+{"time":"2025-05-04T13:26:10.587540794+03:00","level":"INFO","msg":"Starting system monitor"}
+{"time":"2025-05-04T13:26:16.993666261+03:00","level":"INFO","msg":"stream: closing","id":"pxg645u5"}
+{"time":"2025-05-04T13:26:16.993748173+03:00","level":"INFO","msg":"Stopping system monitor"}
+{"time":"2025-05-04T13:26:16.995793958+03:00","level":"INFO","msg":"Stopped system monitor"}
+{"time":"2025-05-04T13:26:17.198876326+03:00","level":"WARN","msg":"No job ingredients found, not creating job artifact"}
+{"time":"2025-05-04T13:26:17.198909473+03:00","level":"WARN","msg":"No source type found, not creating job artifact"}
+{"time":"2025-05-04T13:26:17.198920913+03:00","level":"INFO","msg":"sender: sendDefer: no job artifact to save"}
+{"time":"2025-05-04T13:26:17.694743818+03:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-05-04T13:26:17.953755664+03:00","level":"INFO","msg":"handler: closed","stream_id":"pxg645u5"}
+{"time":"2025-05-04T13:26:17.953802728+03:00","level":"INFO","msg":"writer: Close: closed","stream_id":"pxg645u5"}
+{"time":"2025-05-04T13:26:17.953828101+03:00","level":"INFO","msg":"sender: closed","stream_id":"pxg645u5"}
+{"time":"2025-05-04T13:26:17.953904675+03:00","level":"INFO","msg":"stream: closed","id":"pxg645u5"}

wandb/run-20250504_132610-pxg645u5/logs/debug.log ADDED Viewed

	@@ -0,0 +1,26 @@

+2025-05-04 13:26:10,046 INFO    MainThread:3156976 [wandb_setup.py:_flush():79] Current SDK version is 0.18.7
+2025-05-04 13:26:10,046 INFO    MainThread:3156976 [wandb_setup.py:_flush():79] Configure stats pid to 3156976
+2025-05-04 13:26:10,046 INFO    MainThread:3156976 [wandb_setup.py:_flush():79] Loading settings from /arf/home/zisik/.config/wandb/settings
+2025-05-04 13:26:10,046 INFO    MainThread:3156976 [wandb_setup.py:_flush():79] Loading settings from /arf/scratch/zisik/prott5_bc_ft/wandb/settings
+2025-05-04 13:26:10,046 INFO    MainThread:3156976 [wandb_setup.py:_flush():79] Loading settings from environment variables: {}
+2025-05-04 13:26:10,046 INFO    MainThread:3156976 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': 'finetuning_bc_prott5.py', 'program_abspath': '/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py', 'program': '/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py'}
+2025-05-04 13:26:10,046 INFO    MainThread:3156976 [wandb_setup.py:_flush():79] Applying login settings: {}
+2025-05-04 13:26:10,046 INFO    MainThread:3156976 [wandb_setup.py:_flush():79] Applying login settings: {}
+2025-05-04 13:26:10,046 INFO    MainThread:3156976 [wandb_init.py:_log_setup():533] Logging user logs to /arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_132610-pxg645u5/logs/debug.log
+2025-05-04 13:26:10,047 INFO    MainThread:3156976 [wandb_init.py:_log_setup():534] Logging internal logs to /arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_132610-pxg645u5/logs/debug-internal.log
+2025-05-04 13:26:10,047 INFO    MainThread:3156976 [wandb_init.py:init():619] calling init triggers
+2025-05-04 13:26:10,047 INFO    MainThread:3156976 [wandb_init.py:init():626] wandb.init called with sweep_config: {}
+config: {}
+2025-05-04 13:26:10,047 INFO    MainThread:3156976 [wandb_init.py:init():669] starting backend
+2025-05-04 13:26:10,047 INFO    MainThread:3156976 [wandb_init.py:init():673] sending inform_init request
+2025-05-04 13:26:10,052 INFO    MainThread:3156976 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2025-05-04 13:26:10,053 INFO    MainThread:3156976 [wandb_init.py:init():686] backend started and connected
+2025-05-04 13:26:10,061 INFO    MainThread:3156976 [wandb_init.py:init():781] updated telemetry
+2025-05-04 13:26:10,064 INFO    MainThread:3156976 [wandb_init.py:init():814] communicating run to backend with 90.0 second timeout
+2025-05-04 13:26:10,574 INFO    MainThread:3156976 [wandb_init.py:init():867] starting run threads in backend
+2025-05-04 13:26:12,208 INFO    MainThread:3156976 [wandb_run.py:_console_start():2456] atexit reg
+2025-05-04 13:26:12,209 INFO    MainThread:3156976 [wandb_run.py:_redirect():2305] redirect: wrap_raw
+2025-05-04 13:26:12,209 INFO    MainThread:3156976 [wandb_run.py:_redirect():2370] Wrapping output streams.
+2025-05-04 13:26:12,209 INFO    MainThread:3156976 [wandb_run.py:_redirect():2395] Redirects installed.
+2025-05-04 13:26:12,220 INFO    MainThread:3156976 [wandb_init.py:init():911] run started, returning control to user process
+2025-05-04 13:26:16,995 WARNING MsgRouterThr:3156976 [router.py:message_loop():75] message_loop has been closed

wandb/run-20250504_132610-pxg645u5/run-pxg645u5.wandb ADDED Viewed

Binary file (5.5 kB). View file

wandb/run-20250504_132912-1agsw1y8/files/config.yaml ADDED Viewed

	@@ -0,0 +1,374 @@

+_wandb:
+    value:
+        cli_version: 0.18.7
+        m:
+            - "1": train/epoch
+              "5": 2
+              "6":
+                - 1
+                - 3
+              "7": []
+            - "1": train/global_step
+              "6":
+                - 3
+              "7": []
+            - "1": eval/runtime
+              "5": 2
+              "6":
+                - 1
+                - 3
+              "7": []
+            - "1": train/loss
+              "5": 2
+              "6":
+                - 1
+                - 3
+              "7": []
+            - "1": train/grad_norm
+              "5": 2
+              "6":
+                - 1
+                - 3
+              "7": []
+            - "1": train/learning_rate
+              "5": 2
+              "6":
+                - 1
+                - 3
+              "7": []
+            - "1": eval/loss
+              "5": 2
+              "6":
+                - 1
+                - 3
+              "7": []
+            - "1": eval/samples_per_second
+              "5": 2
+              "6":
+                - 1
+                - 3
+              "7": []
+            - "1": eval/steps_per_second
+              "5": 2
+              "6":
+                - 1
+                - 3
+              "7": []
+            - "1": eval/accuracy
+              "5": 2
+              "6":
+                - 1
+                - 3
+              "7": []
+        python_version: 3.10.15
+        t:
+            "1":
+                - 1
+                - 2
+                - 3
+                - 5
+                - 11
+                - 12
+                - 49
+                - 51
+                - 53
+                - 55
+                - 71
+                - 98
+                - 105
+            "2":
+                - 1
+                - 2
+                - 3
+                - 5
+                - 6
+                - 11
+                - 12
+                - 49
+                - 51
+                - 53
+                - 55
+                - 71
+                - 98
+                - 105
+            "3":
+                - 7
+                - 23
+                - 55
+                - 66
+            "4": 3.10.15
+            "5": 0.18.7
+            "6": 4.45.2
+            "8":
+                - 5
+            "9":
+                "1": transformers_trainer
+            "12": 0.18.7
+            "13": linux-x86_64
+accelerator_config:
+    value:
+        dispatch_batches: null
+        even_batches: true
+        gradient_accumulation_kwargs: null
+        non_blocking: false
+        split_batches: false
+        use_seedable_sampler: true
+adafactor:
+    value: false
+adam_beta1:
+    value: 0.9
+adam_beta2:
+    value: 0.999
+adam_epsilon:
+    value: 1e-08
+auto_find_batch_size:
+    value: false
+batch_eval_metrics:
+    value: false
+bf16:
+    value: false
+bf16_full_eval:
+    value: false
+data_seed:
+    value: null
+dataloader_drop_last:
+    value: false
+dataloader_num_workers:
+    value: 0
+dataloader_persistent_workers:
+    value: false
+dataloader_pin_memory:
+    value: true
+dataloader_prefetch_factor:
+    value: null
+ddp_backend:
+    value: null
+ddp_broadcast_buffers:
+    value: null
+ddp_bucket_cap_mb:
+    value: null
+ddp_find_unused_parameters:
+    value: null
+ddp_timeout:
+    value: 1800
+debug:
+    value: []
+deepspeed:
+    value: null
+disable_tqdm:
+    value: false
+dispatch_batches:
+    value: null
+do_eval:
+    value: true
+do_predict:
+    value: false
+do_train:
+    value: false
+eval_accumulation_steps:
+    value: null
+eval_delay:
+    value: 0
+eval_do_concat_batches:
+    value: true
+eval_on_start:
+    value: false
+eval_steps:
+    value: null
+eval_strategy:
+    value: epoch
+eval_use_gather_object:
+    value: false
+evaluation_strategy:
+    value: epoch
+fp16:
+    value: true
+fp16_backend:
+    value: auto
+fp16_full_eval:
+    value: false
+fp16_opt_level:
+    value: O1
+fsdp:
+    value: []
+fsdp_config:
+    value:
+        min_num_params: 0
+        xla: false
+        xla_fsdp_grad_ckpt: false
+        xla_fsdp_v2: false
+fsdp_min_num_params:
+    value: 0
+fsdp_transformer_layer_cls_to_wrap:
+    value: null
+full_determinism:
+    value: false
+gradient_accumulation_steps:
+    value: 4
+gradient_checkpointing:
+    value: false
+gradient_checkpointing_kwargs:
+    value: null
+greater_is_better:
+    value: false
+group_by_length:
+    value: false
+half_precision_backend:
+    value: auto
+hub_always_push:
+    value: false
+hub_model_id:
+    value: null
+hub_private_repo:
+    value: false
+hub_strategy:
+    value: every_save
+hub_token:
+    value: <HUB_TOKEN>
+ignore_data_skip:
+    value: false
+include_inputs_for_metrics:
+    value: false
+include_num_input_tokens_seen:
+    value: false
+include_tokens_per_second:
+    value: false
+jit_mode_eval:
+    value: false
+label_names:
+    value: null
+label_smoothing_factor:
+    value: 0
+learning_rate:
+    value: 5e-05
+length_column_name:
+    value: length
+load_best_model_at_end:
+    value: true
+local_rank:
+    value: 0
+log_level:
+    value: passive
+log_level_replica:
+    value: warning
+log_on_each_node:
+    value: true
+logging_dir:
+    value: t5-bc-out/runs/May04_13-33-08_kolyoz1
+logging_first_step:
+    value: false
+logging_nan_inf_filter:
+    value: true
+logging_steps:
+    value: 500
+logging_strategy:
+    value: steps
+lr_scheduler_type:
+    value: linear
+max_grad_norm:
+    value: 1
+max_steps:
+    value: -1
+metric_for_best_model:
+    value: loss
+mp_parameters:
+    value: ""
+neftune_noise_alpha:
+    value: null
+no_cuda:
+    value: false
+num_train_epochs:
+    value: 3
+optim:
+    value: adamw_torch
+optim_args:
+    value: null
+optim_target_modules:
+    value: null
+output_dir:
+    value: t5-bc-out
+overwrite_output_dir:
+    value: false
+past_index:
+    value: -1
+per_device_eval_batch_size:
+    value: 8
+per_device_train_batch_size:
+    value: 8
+per_gpu_eval_batch_size:
+    value: null
+per_gpu_train_batch_size:
+    value: null
+prediction_loss_only:
+    value: false
+push_to_hub:
+    value: false
+push_to_hub_model_id:
+    value: null
+push_to_hub_organization:
+    value: null
+push_to_hub_token:
+    value: <PUSH_TO_HUB_TOKEN>
+ray_scope:
+    value: last
+remove_unused_columns:
+    value: true
+report_to:
+    value:
+        - wandb
+restore_callback_states_from_checkpoint:
+    value: false
+resume_from_checkpoint:
+    value: null
+run_name:
+    value: t5-bc-out
+save_on_each_node:
+    value: false
+save_only_model:
+    value: false
+save_safetensors:
+    value: true
+save_steps:
+    value: 500
+save_strategy:
+    value: epoch
+save_total_limit:
+    value: null
+seed:
+    value: 42
+skip_memory_metrics:
+    value: true
+split_batches:
+    value: null
+tf32:
+    value: null
+torch_compile:
+    value: false
+torch_compile_backend:
+    value: null
+torch_compile_mode:
+    value: null
+torch_empty_cache_steps:
+    value: null
+torchdynamo:
+    value: null
+tpu_metrics_debug:
+    value: false
+tpu_num_cores:
+    value: null
+use_cpu:
+    value: false
+use_ipex:
+    value: false
+use_legacy_prediction_loop:
+    value: false
+use_liger_kernel:
+    value: false
+use_mps_device:
+    value: false
+warmup_ratio:
+    value: 0
+warmup_steps:
+    value: 0
+weight_decay:
+    value: 0

wandb/run-20250504_132912-1agsw1y8/files/output.log ADDED Viewed

	@@ -0,0 +1,87 @@

+You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
+Map: 100%|██████████| 511104/511104 [00:20<00:00, 25525.81 examples/s]
+Map: 100%|██████████| 109522/109522 [00:04<00:00, 26956.64 examples/s]
+/arf/home/zisik/.local/lib/python3.10/site-packages/transformers/training_args.py:1545: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead
+  warnings.warn(
+[2025-05-04 13:33:14,758] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect)
+wandb: WARNING The `run_name` is currently set to the same value as `TrainingArguments.output_dir`. If this was not intended, please specify a different run name by setting the `TrainingArguments.run_name` parameter.
+ 33%|███▎      | 15972/47916 [2:22:01<4:54:49,  1.81it/s]
+{'loss': 0.6947, 'grad_norm': 0.09912440180778503, 'learning_rate': 4.947825361048502e-05, 'epoch': 0.03}
+{'loss': 0.6939, 'grad_norm': 0.23786939680576324, 'learning_rate': 4.8956507220970036e-05, 'epoch': 0.06}
+{'loss': 0.6936, 'grad_norm': 0.10555226355791092, 'learning_rate': 4.843476083145505e-05, 'epoch': 0.09}
+{'loss': 0.6935, 'grad_norm': 0.28058305382728577, 'learning_rate': 4.791301444194006e-05, 'epoch': 0.13}
+{'loss': 0.6937, 'grad_norm': 0.13599741458892822, 'learning_rate': 4.739126805242508e-05, 'epoch': 0.16}
+{'loss': 0.6935, 'grad_norm': 0.13076388835906982, 'learning_rate': 4.6869521662910095e-05, 'epoch': 0.19}
+{'loss': 0.6934, 'grad_norm': 0.1778457760810852, 'learning_rate': 4.634777527339511e-05, 'epoch': 0.22}
+{'loss': 0.6935, 'grad_norm': 0.4112167954444885, 'learning_rate': 4.582602888388012e-05, 'epoch': 0.25}
+{'loss': 0.6934, 'grad_norm': 0.1330016702413559, 'learning_rate': 4.530428249436514e-05, 'epoch': 0.28}
+{'loss': 0.6935, 'grad_norm': 0.09426847100257874, 'learning_rate': 4.478253610485016e-05, 'epoch': 0.31}
+{'loss': 0.6933, 'grad_norm': 0.3686296343803406, 'learning_rate': 4.426078971533517e-05, 'epoch': 0.34}
+{'loss': 0.6933, 'grad_norm': 0.21278153359889984, 'learning_rate': 4.373904332582019e-05, 'epoch': 0.38}
+{'loss': 0.6935, 'grad_norm': 0.23074378073215485, 'learning_rate': 4.321834042908423e-05, 'epoch': 0.41}
+{'loss': 0.6932, 'grad_norm': 0.5192509293556213, 'learning_rate': 4.269659403956925e-05, 'epoch': 0.44}
+{'loss': 0.6932, 'grad_norm': 0.07643919438123703, 'learning_rate': 4.217484765005426e-05, 'epoch': 0.47}
+{'loss': 0.6935, 'grad_norm': 0.09435634315013885, 'learning_rate': 4.1653101260539276e-05, 'epoch': 0.5}
+{'loss': 0.6932, 'grad_norm': 0.3456329107284546, 'learning_rate': 4.113239836380333e-05, 'epoch': 0.53}
+{'loss': 0.6934, 'grad_norm': 0.11689063161611557, 'learning_rate': 4.061065197428834e-05, 'epoch': 0.56}
+{'loss': 0.6934, 'grad_norm': 0.25019219517707825, 'learning_rate': 4.0088905584773355e-05, 'epoch': 0.59}
+{'loss': 0.6933, 'grad_norm': 0.12248441576957703, 'learning_rate': 3.956715919525837e-05, 'epoch': 0.63}
+{'loss': 0.6933, 'grad_norm': 0.11549345403909683, 'learning_rate': 3.9046456298522416e-05, 'epoch': 0.66}
+{'loss': 0.6934, 'grad_norm': 0.27383607625961304, 'learning_rate': 3.852470990900743e-05, 'epoch': 0.69}
+{'loss': 0.6935, 'grad_norm': 0.21311810612678528, 'learning_rate': 3.800296351949245e-05, 'epoch': 0.72}
+{'loss': 0.6933, 'grad_norm': 0.25916823744773865, 'learning_rate': 3.7481217129977466e-05, 'epoch': 0.75}
+{'loss': 0.6934, 'grad_norm': 0.13208124041557312, 'learning_rate': 3.6960514233241504e-05, 'epoch': 0.78}
+{'loss': 0.6934, 'grad_norm': 0.4182877242565155, 'learning_rate': 3.643876784372652e-05, 'epoch': 0.81}
+{'loss': 0.6933, 'grad_norm': 0.19375275075435638, 'learning_rate': 3.5917021454211544e-05, 'epoch': 0.85}
+{'loss': 0.6933, 'grad_norm': 0.1647150218486786, 'learning_rate': 3.5395275064696554e-05, 'epoch': 0.88}
+{'loss': 0.6933, 'grad_norm': 0.458692729473114, 'learning_rate': 3.48745721679606e-05, 'epoch': 0.91}
+{'loss': 0.6933, 'grad_norm': 0.24417555332183838, 'learning_rate': 3.4352825778445616e-05, 'epoch': 0.94}
+{'loss': 0.6932, 'grad_norm': 0.10788150876760483, 'learning_rate': 3.383107938893063e-05, 'epoch': 0.97}
+  File "/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py", line 125, in <module>
+{'eval_loss': 0.6931192278862, 'eval_accuracy': 0.4992604225635032, 'eval_runtime': 182.4166, 'eval_samples_per_second': 600.395, 'eval_steps_per_second': 75.053, 'epoch': 1.0}
+    trainer.train()
+  File "/arf/home/zisik/.local/lib/python3.10/site-packages/transformers/trainer.py", line 2052, in train
+    return inner_training_loop(
+  File "/arf/home/zisik/.local/lib/python3.10/site-packages/transformers/trainer.py", line 2487, in _inner_training_loop
+    self._maybe_log_save_evaluate(tr_loss, grad_norm, model, trial, epoch, ignore_keys_for_eval)
+  File "/arf/home/zisik/.local/lib/python3.10/site-packages/transformers/trainer.py", line 2918, in _maybe_log_save_evaluate
+    self._save_checkpoint(model, trial, metrics=metrics)
+  File "/arf/home/zisik/.local/lib/python3.10/site-packages/transformers/trainer.py", line 3008, in _save_checkpoint
+    self.save_model(output_dir, _internal_call=True)
+  File "/arf/home/zisik/.local/lib/python3.10/site-packages/transformers/trainer.py", line 3623, in save_model
+    self._save(output_dir)
+  File "/arf/home/zisik/.local/lib/python3.10/site-packages/transformers/trainer.py", line 3721, in _save
+    safetensors.torch.save_file(
+  File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/safetensors/torch.py", line 286, in save_file
+    serialize_file(_flatten(tensors), filename, metadata=metadata)
+  File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/safetensors/torch.py", line 488, in _flatten
+    raise RuntimeError(
+RuntimeError:
+            Some tensors share memory, this will lead to duplicate memory on disk and potential differences when loading them again: [{'encoder.encoder.embed_tokens.weight', 'encoder.shared.weight'}].
+            A potential way to correctly save your model is to use `save_model`.
+            More information at https://huggingface.co/docs/safetensors/torch_shared_tensors
+Traceback (most recent call last):
+  File "/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py", line 125, in <module>
+    trainer.train()
+  File "/arf/home/zisik/.local/lib/python3.10/site-packages/transformers/trainer.py", line 2052, in train
+    return inner_training_loop(
+  File "/arf/home/zisik/.local/lib/python3.10/site-packages/transformers/trainer.py", line 2487, in _inner_training_loop
+    self._maybe_log_save_evaluate(tr_loss, grad_norm, model, trial, epoch, ignore_keys_for_eval)
+  File "/arf/home/zisik/.local/lib/python3.10/site-packages/transformers/trainer.py", line 2918, in _maybe_log_save_evaluate
+    self._save_checkpoint(model, trial, metrics=metrics)
+  File "/arf/home/zisik/.local/lib/python3.10/site-packages/transformers/trainer.py", line 3008, in _save_checkpoint
+    self.save_model(output_dir, _internal_call=True)
+  File "/arf/home/zisik/.local/lib/python3.10/site-packages/transformers/trainer.py", line 3623, in save_model
+    self._save(output_dir)
+  File "/arf/home/zisik/.local/lib/python3.10/site-packages/transformers/trainer.py", line 3721, in _save
+    safetensors.torch.save_file(
+  File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/safetensors/torch.py", line 286, in save_file
+    serialize_file(_flatten(tensors), filename, metadata=metadata)
+  File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/safetensors/torch.py", line 488, in _flatten
+    raise RuntimeError(
+RuntimeError:
+            Some tensors share memory, this will lead to duplicate memory on disk and potential differences when loading them again: [{'encoder.encoder.embed_tokens.weight', 'encoder.shared.weight'}].
+            A potential way to correctly save your model is to use `save_model`.
+            More information at https://huggingface.co/docs/safetensors/torch_shared_tensors

wandb/run-20250504_132912-1agsw1y8/files/requirements.txt ADDED Viewed

	@@ -0,0 +1,541 @@

+nvidia-cuda-cupti-cu12==12.4.127
+nvidia-cuda-nvrtc-cu12==12.4.127
+pyg-lib==0.4.0+pt20cu117
+biopython==1.85
+iniconfig==2.0.0
+tokenizers==0.20.0
+accelerate==1.3.0
+torch==2.6.0
+nvidia-nccl-cu12==2.21.5
+transformers==4.45.2
+nvidia-cusparse-cu12==12.3.1.170
+torch-scatter==2.1.2+pt20cu117
+nvidia-cusparselt-cu12==0.6.2
+nvidia-nvtx-cu12==12.4.127
+zstd==1.5.6.6
+fair-esm==2.0.0
+omegaconf==2.3.0
+pluggy==1.5.0
+pytest==8.3.5
+nvidia-curand-cu12==10.3.5.147
+nvidia-cufft-cu12==11.2.1.3
+torch-cluster==1.6.3+pt20cu117
+regex==2024.9.11
+nvidia-cudnn-cu12==9.1.0.70
+torch-spline-conv==1.2.2+pt20cu117
+nvidia-cusolver-cu12==11.6.1.9
+antlr4-python3-runtime==4.9.3
+msgpack-numpy==0.4.8
+nlp==0.2.0
+einops==0.8.1
+nvidia-cublas-cu12==12.4.5.8
+triton==3.2.0
+ninja==1.11.1.3
+hydra-core==1.3.2
+nvidia-nvjitlink-cu12==12.4.127
+biotite==0.41.2
+torch-sparse==0.6.18+pt20cu117
+esm==3.1.4
+sympy==1.13.1
+nvidia-cuda-runtime-cu12==12.4.127
+jupyter-lsp==2.2.5
+jupyter-events==0.10.0
+ipykernel==6.29.5
+Mako==1.3.5
+proto-plus==1.25.0
+fst-pso==1.8.1
+gensim==4.3.3
+htmlmin==0.1.12
+tokenizers==0.13.3
+timm==1.0.11
+MarkupSafe==3.0.2
+safetensors==0.4.5
+requests==2.32.3
+gast==0.5.5
+cuml==24.12.0a33
+jaxlib==0.4.23.dev20240214
+spacy-loggers==1.0.5
+pytz==2024.1
+idna==3.10
+python-dateutil==2.9.0
+mdurl==0.1.2
+blis==0.7.10
+jupyter==1.1.1
+pyerfa==2.0.1.5
+comm==0.2.2
+pygraphviz==1.14
+dill==0.3.8
+paramiko==3.5.0
+llama-index==0.8.36
+mdit-py-plugins==0.4.2
+Werkzeug==3.1.3
+pyu2f==0.1.5
+dask-glm==0.2.0
+httpx==0.27.2
+typeguard==4.4.1
+mypy-extensions==1.0.0
+kmodes==0.12.2
+keras==2.15.0
+ydata-profiling==0.0.dev0
+regex==2024.11.6
+xarray==2024.11.0
+setuptools==75.3.0
+charset-normalizer==3.4.0
+jupyterlab_nvdashboard==0.11.0
+pylibraft==24.12.0a36
+spacy==3.7.6
+mlflow-skinny==2.17.2
+nvtx==0.2.10
+multimethod==1.12
+pexpect==4.9.0
+torch==2.1.0.post301
+flatbuffers==24.3.25
+python-json-logger==2.0.7
+PyJWT==2.9.0
+multiprocess==0.70.16
+colorlover==0.3.0
+yarl==1.16.0
+locket==1.0.0
+patsy==1.0.0
+rapids-dask-dependency==24.12.0a0
+stanza==1.9.2
+debugpy==1.8.8
+jupyterlab_pygments==0.3.0
+pylibcudf==24.12.0a337
+lz4==4.3.3
+pandas==2.2.3
+tifffile==2024.9.20
+pynvml==11.4.1
+cufflinks==0.17.3
+ipywidgets==8.1.5
+requests-oauthlib==2.0.0
+google-auth-oauthlib==1.2.1
+rsa==4.9
+webcolors==24.8.0
+jsonschema-specifications==2024.10.1
+scikit-learn==1.5.2
+langchain-text-splitters==0.3.2
+pandas-datareader==0.10.0
+tomli==2.0.2
+tzdata==2024.2
+scikit-image==0.24.0
+tensorboard_data_server==0.7.0
+kiwisolver==1.4.7
+cloudpathlib==0.20.0
+isodate==0.6.1
+adversarial-robustness-toolbox==1.19.1
+SQLAlchemy==2.0.36
+pytest-runner==6.0.0
+pycairo==1.27.0
+treelite==4.3.0
+jiter==0.7.0
+threadpoolctl==3.5.0
+pandocfilters==1.5.0
+loguru==0.7.2
+smart_open==7.0.5
+shellingham==1.5.4
+deepspeed==0.15.4
+prompt_toolkit==3.0.48
+databricks-sdk==0.34.0
+langchain-core==0.3.15
+imageio==2.36.0
+openapi-schema-pydantic==1.2.4
+zict==3.0.0
+cachetools==5.5.0
+colorful==0.5.6
+mpmath==1.3.0
+nest_asyncio==1.6.0
+pyFUME==0.2.25
+opencv-python-headless==4.9.0
+fastai==2.7.18
+importlib_resources==6.4.5
+binaryornot==0.4.4
+evaluate==0.4.1
+matplotlib-inline==0.1.7
+wasabi==1.1.2
+pycparser==2.22
+GitPython==3.1.43
+pluggy==1.5.0
+async-lru==2.0.4
+pgmpy==0.1.24
+anyio==4.4.0
+executing==2.1.0
+orjson==3.10.11
+humanfriendly==10.0
+tornado==6.4.1
+gmpy2==2.1.5
+rlPyCairo==0.2.0
+distributed==2024.11.0
+FuzzyTM==2.0.5
+torchtext==0.15.2a0+5ce3163
+pytest==8.3.5
+pyod==2.0.2
+ImageHash==4.3.1
+soupsieve==2.5
+tblib==3.0.0
+emoji==2.14.0
+aiohappyeyeballs==2.4.3
+uri-template==1.3.0
+tensorflow_estimator==2.15.0
+babel==2.16.0
+dask-cuda==24.12.0a12
+overrides==7.7.0
+opencensus==0.11.3
+openai==0.28.1
+language_data==1.2.0
+jedi==0.19.2
+cookiecutter==2.6.0
+entrypoints==0.4
+exceptiongroup==1.2.2
+marisa-trie==1.2.0
+uvloop==0.20.0
+aiosignal==1.3.1
+Flask==3.0.3
+tensorboard==2.15.2
+cffi==1.17.1
+tf_keras==2.15.0
+absl-py==2.1.0
+blinker==1.9.0
+types-python-dateutil==2.9.0.20241003
+opencv-python==4.9.0
+frozendict==2.4.6
+aiohttp-cors==0.7.0
+statsmodels==0.14.4
+tinycss2==1.4.0
+terminado==0.18.1
+pycaret==2.2.3
+aiohttp==3.10.10
+distributed-ucxx==0.41.0
+prometheus_client==0.21.0
+fastdownload==0.0.7
+grpcio==1.59.3
+google-api-core==2.22.0
+jupyterlab_widgets==3.0.13
+appdirs==1.4.4
+littleutils==0.0.0
+ray==2.24.0
+kaggle==1.6.17
+jsonschema==4.23.0
+google-auth==2.36.0
+scikit-base==0.11.0
+visions==0.7.6
+pyarrow==15.0.0
+transformers==4.33.0
+prometheus_flask_exporter==0.23.1
+dm-tree==0.1.8
+colorama==0.4.6
+requests-toolbelt==1.0.0
+cached-property==1.5.2
+cymem==2.0.8
+PyNaCl==1.5.0
+PyWavelets==1.7.0
+httptools==0.6.1
+typing-utils==0.1.0
+email_validator==2.2.0
+marshmallow==3.23.1
+Deprecated==1.2.14
+virtualenv==20.4.7
+optuna==3.6.1
+jupyter_server==2.14.2
+termcolor==2.5.0
+mpi4py==4.0.1
+torchdata==0.7.1+8cea82f
+dataclasses==0.8
+cloudpickle==3.1.0
+tree_sitter_languages==1.10.2
+tabulate==0.9.0
+ipython==8.29.0
+lightgbm==4.3.0
+captum==0.6.0
+confuse==2.0.1
+torchvision==0.16.1+adc3221
+lxml==4.9.4
+fastapi==0.115.4
+python-multipart==0.0.17
+dnspython==2.7.0
+jupyter-console==6.6.3
+preshed==3.0.9
+py-cpuinfo==9.0.0
+Send2Trash==1.8.3
+murmurhash==1.0.10
+sniffio==1.3.1
+websockets==13.1
+h11==0.14.0
+smmap==5.0.0
+textual==0.85.2
+jsonpatch==1.33
+opencensus-context==0.1.3
+nbconvert==7.16.4
+sentry-sdk==2.19.0
+opentelemetry-semantic-conventions==0.37b0
+pandas-profiling==2.8.0
+pillow==10.3.0
+peft==0.13.2
+rpds-py==0.21.0
+bokeh==3.6.1
+distro==1.9.0
+itsdangerous==2.2.0
+wandb==0.18.7
+jsonpointer==3.0.0
+astropy-iers-data==0.2024.11.11.0.32.38
+horovod==0.28.1
+graphviz==0.20.3
+vtk==9.3.1
+bleach==6.2.0
+numexpr==2.8.7
+pydantic_core==2.23.4
+Jinja2==3.1.4
+widgetsnbextension==4.0.13
+filelock==3.16.1
+catboost==1.2.7
+raft-dask==24.12.0a36
+async-timeout==4.0.3
+datefinder==0.7.3
+coloredlogs==15.0.1
+platformdirs==4.3.6
+spacy-legacy==3.0.12
+chardet==5.2.0
+jupyter_client==8.6.3
+importlib_metadata==8.5.0
+rfc3986-validator==0.1.1
+huggingface_hub==0.26.2
+PySocks==1.7.1
+mlxtend==0.23.2
+outdated==0.2.2
+partd==1.4.2
+thinc==8.2.5
+astropy==6.1.6
+rdflib==6.3.2
+h2==4.1.0
+typer==0.13.0
+xyzservices==2024.9.0
+toolz==0.12.1
+frozenlist==1.5.0
+rdkit==2024.9.2
+pyasn1==0.6.1
+jupyter_server_terminals==0.5.3
+ucx-py==0.41.0a11
+astunparse==1.6.3
+simpful==2.12.0
+notebook_shim==0.2.4
+scipy==1.13.1
+colorlog==6.9.0
+tiktoken==0.3.3
+plotly==5.24.1
+fastrlock==0.8.2
+chart-studio==1.1.0
+stack-data==0.6.2
+google-pasta==0.2.0
+sktime==0.34.0
+PyYAML==6.0.2
+sympy==1.13.3
+multidict==6.1.0
+ml-dtypes==0.2.0
+tensorboardX==2.6.2.2
+decorator==5.1.1
+cytoolz==1.0.0
+ase==3.23.0
+isoduration==20.11.0
+html5lib==1.1
+langsmith==0.1.142
+future==1.0.0
+onnx2torch==1.5.15
+multipledispatch==0.6.0
+protobuf==4.24.4
+ucxx==0.41.0
+pandas_flavor==0.6.0
+msgpack==1.1.0
+pyasn1_modules==0.4.1
+imagecodecs==2024.1.1
+mlflow==2.17.2
+watchfiles==0.24.0
+dm-sonnet==2.0.2
+langcodes==3.4.1
+freetype-py==2.3.0
+argon2-cffi-bindings==21.2.0
+trimesh==4.5.2
+opt_einsum==3.4.0
+tenacity==8.5.0
+h5py==3.12.1
+fastapi-cli==0.0.5
+oauthlib==3.2.2
+parso==0.8.4
+weasel==0.4.1
+yfinance==0.2.49
+networkx==2.8.8
+bitsandbytes==0.44.1
+lazy_loader==0.4
+querystring_parser==1.2.4
+contourpy==1.3.0
+unicodedata2==15.1.0
+bcrypt==4.2.0
+munkres==1.1.4
+langchain==0.0.298
+hpack==4.0.0
+cryptography==43.0.3
+umap-learn==0.5.7
+arrow==1.3.0
+docker==7.1.0
+certifi==2025.1.31
+fastjsonschema==2.20.0
+tensorflow==2.15.0
+googleapis-common-protos==1.65.0
+iniconfig==2.0.0
+Markdown==3.6
+llvmlite==0.43.0
+wslink==2.3.2
+attrs==24.2.0
+rich==13.9.4
+cupy==13.3.0
+uc-micro-py==1.0.3
+alembic==1.14.0
+joblib==1.4.2
+reportlab==4.2.5
+miniful==0.0.6
+jupyter_core==5.7.2
+wheel==0.45.0
+phik==0.12.3
+mistune==3.0.2
+wcwidth==0.2.13
+dacite==1.8.1
+accelerate==0.22.0
+sacremoses==0.0.53
+revtok==0.0.3
+python-slugify==8.0.4
+tangled-up-in-unicode==0.2.0
+dask==2024.11.0
+markdown-it-py==3.0.0
+sentencepiece==0.1.99
+beautifulsoup4==4.12.3
+six==1.16.0
+numba-cuda==0.0.17
+argon2-cffi==23.1.0
+xxhash==3.5.0
+hjson==3.1.0
+fonttools==4.54.1
+graphql-core==3.2.5
+pyparsing==3.2.0
+pure_eval==0.2.3
+distlib==0.3.9
+lightning==2.4.0
+wordcloud==0.0.0
+catalogue==2.0.10
+jax==0.4.27
+tree-sitter==0.23.2
+notebook==7.2.2
+dataclasses-json==0.6.7
+propcache==0.2.0
+numba==0.60.0
+dask-expr==1.1.17
+pydantic==2.9.2
+gunicorn==22.0.0
+missingno==0.5.2
+pyOpenSSL==24.2.1
+openpyxl==3.1.5
+packaging==24.1
+python-dotenv==1.0.1
+cycler==0.12.1
+types-pytz==2024.2.0.20241003
+yellowbrick==1.5
+referencing==0.35.1
+pyLDAvis==3.4.1
+lazypredict==0.2.16
+fqdn==1.5.1
+websocket-client==1.8.0
+fastcore==1.7.19
+pynvjitlink-cu12==0.3.0
+pingouin==0.5.5
+numpy==1.26.4
+typing-inspect==0.9.0
+nltk==3.9.1
+onnxruntime==1.19.2
+tensorflow-probability==0.23.0
+datasets==3.0.2
+pickleshare==0.7.5
+peewee==3.17.7
+torch-geometric==2.6.1
+ptyprocess==0.7.0
+greenlet==3.1.1
+graphql-relay==3.2.0
+graphene==3.4.3
+et_xmlfile==2.0.0
+webencodings==0.5.1
+hyperframe==6.0.1
+multitasking==0.0.9
+typer-slim==0.13.0
+onnx==1.15.0
+uvicorn==0.32.0
+memray==1.13.4
+xgboost==2.1.2
+Brotli==1.1.0
+zipp==3.21.0
+nbformat==5.10.4
+responses==0.18.0
+funcy==2.0
+Pygments==2.18.0
+tqdm==4.67.0
+linkify-it-py==2.0.3
+srsly==2.4.8
+cuda-python==12.6.0
+lightning-utilities==0.11.8
+cudf==24.12.0a337
+dask-ml==2024.4.4
+docker-pycreds==0.4.0
+pkgutil_resolve_name==1.3.10
+opentelemetry-api==1.16.0
+fsspec==2024.9.0
+nbclient==0.10.0
+psutil==5.9.8
+pytorch-lightning==2.4.0
+sortedcontainers==2.4.0
+matplotlib==3.9.2
+defusedxml==0.7.1
+urllib3==1.26.19
+jupyterlab_server==2.27.3
+retrying==1.3.3
+dask-cudf==24.12.0a337
+sqlparse==0.5.1
+text-unidecode==1.3
+seaborn==0.13.2
+typing_extensions==4.12.2
+pyzmq==26.2.0
+rfc3339-validator==0.1.4
+pynndescent==0.5.13
+pip==24.3.1
+confection==0.1.4
+wrapt==1.14.1
+fastprogress==1.0.3
+traitlets==5.14.3
+asttokens==2.4.1
+json5==0.9.28
+pandas-stubs==2.2.3.241126
+torchmetrics==1.2.1
+gitdb==4.0.11
+annotated-types==0.7.0
+ipython-autotime==0.1
+httpcore==1.0.6
+click==8.1.7
+setproctitle==1.3.3
+starlette==0.41.2
+jupyterlab==4.2.5
+rmm==24.12.0a27
+opentelemetry-sdk==1.16.0
+textblob==0.15.3
+imbalanced-learn==0.12.4
+typeguard==4.3.0
+more-itertools==10.3.0
+zipp==3.19.2
+autocommand==2.2.2
+jaraco.context==5.3.0
+packaging==24.1
+importlib_metadata==8.0.0
+platformdirs==4.2.2
+jaraco.functools==4.0.1
+importlib_resources==6.4.0
+tomli==2.0.1
+jaraco.text==3.12.1
+wheel==0.43.0
+jaraco.collections==5.1.0
+typing_extensions==4.12.2
+inflect==7.3.1
+backports.tarfile==1.2.0

wandb/run-20250504_132912-1agsw1y8/files/wandb-metadata.json ADDED Viewed

	@@ -0,0 +1,77 @@

+{
+  "os":  "Linux-5.14.0-427.13.1.el9_4.x86_64-x86_64-with-glibc2.34",
+  "python":  "3.10.15",
+  "startedAt":  "2025-05-04T10:29:13.019628Z",
+  "program":  "/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py",
+  "codePath":  "finetuning_bc_prott5.py",
+  "email":  "zeynep.isik1@sabanciuniv.edu",
+  "root":  "/arf/scratch/zisik/prott5_bc_ft",
+  "host":  "kolyoz1",
+  "username":  "zisik",
+  "executable":  "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/bin/python3",
+  "codePathLocal":  "finetuning_bc_prott5.py",
+  "cpu_count":  64,
+  "cpu_count_logical":  64,
+  "gpu":  "NVIDIA H100 80GB HBM3",
+  "gpu_count":  1,
+  "disk":  {
+    "/":  {
+      "total":  "7643995308032",
+      "used":  "274768302080"
+    }
+  },
+  "memory":  {
+    "total":  "1081373220864"
+  },
+  "cpu":  {
+    "count":  64,
+    "countLogical":  64
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA H100 80GB HBM3",
+      "memoryTotal":  "85520809984",
+      "cudaCores":  16896,
+      "architecture":  "Hopper"
+    }
+  ],
+  "slurm":  {
+    "cluster_name":  "cuda",
+    "conf":  "/etc/slurm/slurm.conf",
+    "cpus_on_node":  "16",
+    "cpus_per_task":  "16",
+    "gpus_on_node":  "1",
+    "gtids":  "0",
+    "job_account":  "tbag154",
+    "job_cpus_per_node":  "16",
+    "job_end_time":  "1746613727",
+    "job_gid":  "11636",
+    "job_gpus":  "1",
+    "job_id":  "1027934",
+    "job_name":  "msa_ph_pt",
+    "job_nodelist":  "kolyoz1",
+    "job_num_nodes":  "1",
+    "job_partition":  "kolyoz-cuda",
+    "job_qos":  "tbag",
+    "job_start_time":  "1746354527",
+    "job_uid":  "11636",
+    "job_user":  "zisik",
+    "jobid":  "1027934",
+    "localid":  "0",
+    "mem_per_cpu":  "14000",
+    "nnodes":  "1",
+    "node_aliases":  "(null)",
+    "nodeid":  "0",
+    "nodelist":  "kolyoz1",
+    "prio_process":  "0",
+    "procid":  "0",
+    "submit_dir":  "/arf/scratch/zisik",
+    "submit_host":  "cuda-ui",
+    "task_pid":  "3157550",
+    "tasks_per_node":  "1",
+    "topology_addr":  "kolyoz1",
+    "topology_addr_pattern":  "node",
+    "working_cluster":  "cuda:slurmcontroller3.ib:6800:9984:109"
+  },
+  "cudaVersion":  "12.6"
+}

wandb/run-20250504_132912-1agsw1y8/files/wandb-summary.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"train/learning_rate":3.383107938893063e-05,"train/global_step":15972,"eval/steps_per_second":75.053,"_timestamp":1.7463635035359182e+09,"eval/accuracy":0.4992604225635032,"_step":31,"eval/loss":0.6931192278862,"train/grad_norm":0.10788150876760483,"train/epoch":1,"_wandb":{"runtime":8950},"_runtime":8950.516897928,"train/loss":0.6932,"eval/runtime":182.4166,"eval/samples_per_second":600.395}

wandb/run-20250504_132912-1agsw1y8/logs/debug-core.log ADDED Viewed

	@@ -0,0 +1,14 @@

+{"time":"2025-05-04T13:29:12.35887463+03:00","level":"INFO","msg":"started logging, with flags","port-filename":"/tmp/tmp1u83hfoi/port-3157577.txt","pid":3157577,"debug":false,"disable-analytics":false}
+{"time":"2025-05-04T13:29:12.358923345+03:00","level":"INFO","msg":"FeatureState","shutdownOnParentExitEnabled":false}
+{"time":"2025-05-04T13:29:12.35977753+03:00","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":45947,"Zone":""}}
+{"time":"2025-05-04T13:29:12.359879073+03:00","level":"INFO","msg":"Will exit if parent process dies.","ppid":3157577}
+{"time":"2025-05-04T13:29:12.546636547+03:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:34718"}
+{"time":"2025-05-04T13:29:13.02161239+03:00","level":"INFO","msg":"handleInformInit: received","streamId":"1agsw1y8","id":"127.0.0.1:34718"}
+{"time":"2025-05-04T13:29:13.145638422+03:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"1agsw1y8","id":"127.0.0.1:34718"}
+{"time":"2025-05-04T15:58:23.607250248+03:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:34718"}
+{"time":"2025-05-04T15:58:23.607435128+03:00","level":"INFO","msg":"server is shutting down"}
+{"time":"2025-05-04T15:58:23.607401252+03:00","level":"INFO","msg":"connection: Close: initiating connection closure","id":"127.0.0.1:34718"}
+{"time":"2025-05-04T15:58:23.607720003+03:00","level":"INFO","msg":"connection: Close: connection successfully closed","id":"127.0.0.1:34718"}
+{"time":"2025-05-04T15:58:24.801882716+03:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:34718"}
+{"time":"2025-05-04T15:58:24.801915389+03:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:34718"}
+{"time":"2025-05-04T15:58:24.801937893+03:00","level":"INFO","msg":"server is closed"}

wandb/run-20250504_132912-1agsw1y8/logs/debug-internal.log ADDED Viewed

	@@ -0,0 +1,19 @@

+{"time":"2025-05-04T13:29:13.023253759+03:00","level":"INFO","msg":"using version","core version":"0.18.7"}
+{"time":"2025-05-04T13:29:13.023302807+03:00","level":"INFO","msg":"created symlink","path":"/arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_132912-1agsw1y8/logs/debug-core.log"}
+{"time":"2025-05-04T13:29:13.145570529+03:00","level":"INFO","msg":"created new stream","id":"1agsw1y8"}
+{"time":"2025-05-04T13:29:13.145625833+03:00","level":"INFO","msg":"stream: started","id":"1agsw1y8"}
+{"time":"2025-05-04T13:29:13.145806528+03:00","level":"INFO","msg":"writer: Do: started","stream_id":"1agsw1y8"}
+{"time":"2025-05-04T13:29:13.145923955+03:00","level":"INFO","msg":"handler: started","stream_id":"1agsw1y8"}
+{"time":"2025-05-04T13:29:13.146011145+03:00","level":"INFO","msg":"sender: started","stream_id":"1agsw1y8"}
+{"time":"2025-05-04T13:29:13.51656923+03:00","level":"INFO","msg":"Starting system monitor"}
+{"time":"2025-05-04T15:58:23.607363166+03:00","level":"INFO","msg":"stream: closing","id":"1agsw1y8"}
+{"time":"2025-05-04T15:58:23.607412721+03:00","level":"INFO","msg":"Stopping system monitor"}
+{"time":"2025-05-04T15:58:23.608736938+03:00","level":"INFO","msg":"Stopped system monitor"}
+{"time":"2025-05-04T15:58:23.995834762+03:00","level":"WARN","msg":"No job ingredients found, not creating job artifact"}
+{"time":"2025-05-04T15:58:23.995863601+03:00","level":"WARN","msg":"No source type found, not creating job artifact"}
+{"time":"2025-05-04T15:58:23.995874256+03:00","level":"INFO","msg":"sender: sendDefer: no job artifact to save"}
+{"time":"2025-05-04T15:58:24.53730388+03:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-05-04T15:58:24.801427373+03:00","level":"INFO","msg":"handler: closed","stream_id":"1agsw1y8"}
+{"time":"2025-05-04T15:58:24.801476891+03:00","level":"INFO","msg":"writer: Close: closed","stream_id":"1agsw1y8"}
+{"time":"2025-05-04T15:58:24.801525233+03:00","level":"INFO","msg":"sender: closed","stream_id":"1agsw1y8"}
+{"time":"2025-05-04T15:58:24.801589463+03:00","level":"INFO","msg":"stream: closed","id":"1agsw1y8"}

wandb/run-20250504_132912-1agsw1y8/logs/debug.log ADDED Viewed

	@@ -0,0 +1,27 @@

+2025-05-04 13:29:13,013 INFO    MainThread:3157577 [wandb_setup.py:_flush():79] Current SDK version is 0.18.7
+2025-05-04 13:29:13,013 INFO    MainThread:3157577 [wandb_setup.py:_flush():79] Configure stats pid to 3157577
+2025-05-04 13:29:13,013 INFO    MainThread:3157577 [wandb_setup.py:_flush():79] Loading settings from /arf/home/zisik/.config/wandb/settings
+2025-05-04 13:29:13,013 INFO    MainThread:3157577 [wandb_setup.py:_flush():79] Loading settings from /arf/scratch/zisik/prott5_bc_ft/wandb/settings
+2025-05-04 13:29:13,013 INFO    MainThread:3157577 [wandb_setup.py:_flush():79] Loading settings from environment variables: {}
+2025-05-04 13:29:13,013 INFO    MainThread:3157577 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': 'finetuning_bc_prott5.py', 'program_abspath': '/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py', 'program': '/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py'}
+2025-05-04 13:29:13,013 INFO    MainThread:3157577 [wandb_setup.py:_flush():79] Applying login settings: {}
+2025-05-04 13:29:13,013 INFO    MainThread:3157577 [wandb_setup.py:_flush():79] Applying login settings: {}
+2025-05-04 13:29:13,013 INFO    MainThread:3157577 [wandb_init.py:_log_setup():533] Logging user logs to /arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_132912-1agsw1y8/logs/debug.log
+2025-05-04 13:29:13,014 INFO    MainThread:3157577 [wandb_init.py:_log_setup():534] Logging internal logs to /arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_132912-1agsw1y8/logs/debug-internal.log
+2025-05-04 13:29:13,014 INFO    MainThread:3157577 [wandb_init.py:init():619] calling init triggers
+2025-05-04 13:29:13,014 INFO    MainThread:3157577 [wandb_init.py:init():626] wandb.init called with sweep_config: {}
+config: {}
+2025-05-04 13:29:13,014 INFO    MainThread:3157577 [wandb_init.py:init():669] starting backend
+2025-05-04 13:29:13,014 INFO    MainThread:3157577 [wandb_init.py:init():673] sending inform_init request
+2025-05-04 13:29:13,018 INFO    MainThread:3157577 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2025-05-04 13:29:13,019 INFO    MainThread:3157577 [wandb_init.py:init():686] backend started and connected
+2025-05-04 13:29:13,026 INFO    MainThread:3157577 [wandb_init.py:init():781] updated telemetry
+2025-05-04 13:29:13,030 INFO    MainThread:3157577 [wandb_init.py:init():814] communicating run to backend with 90.0 second timeout
+2025-05-04 13:29:13,503 INFO    MainThread:3157577 [wandb_init.py:init():867] starting run threads in backend
+2025-05-04 13:29:14,946 INFO    MainThread:3157577 [wandb_run.py:_console_start():2456] atexit reg
+2025-05-04 13:29:14,946 INFO    MainThread:3157577 [wandb_run.py:_redirect():2305] redirect: wrap_raw
+2025-05-04 13:29:14,946 INFO    MainThread:3157577 [wandb_run.py:_redirect():2370] Wrapping output streams.
+2025-05-04 13:29:14,946 INFO    MainThread:3157577 [wandb_run.py:_redirect():2395] Redirects installed.
+2025-05-04 13:29:14,954 INFO    MainThread:3157577 [wandb_init.py:init():911] run started, returning control to user process
+2025-05-04 13:33:19,417 INFO    MainThread:3157577 [wandb_run.py:_config_callback():1387] config_cb None None {'output_dir': 't5-bc-out', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': 't5-bc-out/runs/May04_13-33-08_kolyoz1', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 't5-bc-out', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'loss', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'epoch', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False}
+2025-05-04 15:58:23,607 WARNING MsgRouterThr:3157577 [router.py:message_loop():75] message_loop has been closed

wandb/run-20250504_132912-1agsw1y8/run-1agsw1y8.wandb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:71cf2569d2e508f45833ce35b1904bcc5325f9369eef0a76ea074fad88d8621d
+size 5615901

wandb/run-20250504_160615-f65jh2lv/files/output.log ADDED Viewed

	@@ -0,0 +1,8 @@

+You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
+Map: 100%|██████████| 511104/511104 [00:20<00:00, 25304.42 examples/s]
+Map: 100%|██████████| 109522/109522 [00:02<00:00, 36704.44 examples/s]
+/arf/home/zisik/.local/lib/python3.10/site-packages/transformers/training_args.py:1545: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead
+  warnings.warn(
+[2025-05-04 16:06:52,248] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect)
+wandb: WARNING The `run_name` is currently set to the same value as `TrainingArguments.output_dir`. If this was not intended, please specify a different run name by setting the `TrainingArguments.run_name` parameter.
+  1%|          | 246/47916 [02:12<7:08:44,  1.85it/s]

wandb/run-20250504_160615-f65jh2lv/files/requirements.txt ADDED Viewed

	@@ -0,0 +1,541 @@

+nvidia-cuda-cupti-cu12==12.4.127
+nvidia-cuda-nvrtc-cu12==12.4.127
+pyg-lib==0.4.0+pt20cu117
+biopython==1.85
+iniconfig==2.0.0
+tokenizers==0.20.0
+accelerate==1.3.0
+torch==2.6.0
+nvidia-nccl-cu12==2.21.5
+transformers==4.45.2
+nvidia-cusparse-cu12==12.3.1.170
+torch-scatter==2.1.2+pt20cu117
+nvidia-cusparselt-cu12==0.6.2
+nvidia-nvtx-cu12==12.4.127
+zstd==1.5.6.6
+fair-esm==2.0.0
+omegaconf==2.3.0
+pluggy==1.5.0
+pytest==8.3.5
+nvidia-curand-cu12==10.3.5.147
+nvidia-cufft-cu12==11.2.1.3
+torch-cluster==1.6.3+pt20cu117
+regex==2024.9.11
+nvidia-cudnn-cu12==9.1.0.70
+torch-spline-conv==1.2.2+pt20cu117
+nvidia-cusolver-cu12==11.6.1.9
+antlr4-python3-runtime==4.9.3
+msgpack-numpy==0.4.8
+nlp==0.2.0
+einops==0.8.1
+nvidia-cublas-cu12==12.4.5.8
+triton==3.2.0
+ninja==1.11.1.3
+hydra-core==1.3.2
+nvidia-nvjitlink-cu12==12.4.127
+biotite==0.41.2
+torch-sparse==0.6.18+pt20cu117
+esm==3.1.4
+sympy==1.13.1
+nvidia-cuda-runtime-cu12==12.4.127
+jupyter-lsp==2.2.5
+jupyter-events==0.10.0
+ipykernel==6.29.5
+Mako==1.3.5
+proto-plus==1.25.0
+fst-pso==1.8.1
+gensim==4.3.3
+htmlmin==0.1.12
+tokenizers==0.13.3
+timm==1.0.11
+MarkupSafe==3.0.2
+safetensors==0.4.5
+requests==2.32.3
+gast==0.5.5
+cuml==24.12.0a33
+jaxlib==0.4.23.dev20240214
+spacy-loggers==1.0.5
+pytz==2024.1
+idna==3.10
+python-dateutil==2.9.0
+mdurl==0.1.2
+blis==0.7.10
+jupyter==1.1.1
+pyerfa==2.0.1.5
+comm==0.2.2
+pygraphviz==1.14
+dill==0.3.8
+paramiko==3.5.0
+llama-index==0.8.36
+mdit-py-plugins==0.4.2
+Werkzeug==3.1.3
+pyu2f==0.1.5
+dask-glm==0.2.0
+httpx==0.27.2
+typeguard==4.4.1
+mypy-extensions==1.0.0
+kmodes==0.12.2
+keras==2.15.0
+ydata-profiling==0.0.dev0
+regex==2024.11.6
+xarray==2024.11.0
+setuptools==75.3.0
+charset-normalizer==3.4.0
+jupyterlab_nvdashboard==0.11.0
+pylibraft==24.12.0a36
+spacy==3.7.6
+mlflow-skinny==2.17.2
+nvtx==0.2.10
+multimethod==1.12
+pexpect==4.9.0
+torch==2.1.0.post301
+flatbuffers==24.3.25
+python-json-logger==2.0.7
+PyJWT==2.9.0
+multiprocess==0.70.16
+colorlover==0.3.0
+yarl==1.16.0
+locket==1.0.0
+patsy==1.0.0
+rapids-dask-dependency==24.12.0a0
+stanza==1.9.2
+debugpy==1.8.8
+jupyterlab_pygments==0.3.0
+pylibcudf==24.12.0a337
+lz4==4.3.3
+pandas==2.2.3
+tifffile==2024.9.20
+pynvml==11.4.1
+cufflinks==0.17.3
+ipywidgets==8.1.5
+requests-oauthlib==2.0.0
+google-auth-oauthlib==1.2.1
+rsa==4.9
+webcolors==24.8.0
+jsonschema-specifications==2024.10.1
+scikit-learn==1.5.2
+langchain-text-splitters==0.3.2
+pandas-datareader==0.10.0
+tomli==2.0.2
+tzdata==2024.2
+scikit-image==0.24.0
+tensorboard_data_server==0.7.0
+kiwisolver==1.4.7
+cloudpathlib==0.20.0
+isodate==0.6.1
+adversarial-robustness-toolbox==1.19.1
+SQLAlchemy==2.0.36
+pytest-runner==6.0.0
+pycairo==1.27.0
+treelite==4.3.0
+jiter==0.7.0
+threadpoolctl==3.5.0
+pandocfilters==1.5.0
+loguru==0.7.2
+smart_open==7.0.5
+shellingham==1.5.4
+deepspeed==0.15.4
+prompt_toolkit==3.0.48
+databricks-sdk==0.34.0
+langchain-core==0.3.15
+imageio==2.36.0
+openapi-schema-pydantic==1.2.4
+zict==3.0.0
+cachetools==5.5.0
+colorful==0.5.6
+mpmath==1.3.0
+nest_asyncio==1.6.0
+pyFUME==0.2.25
+opencv-python-headless==4.9.0
+fastai==2.7.18
+importlib_resources==6.4.5
+binaryornot==0.4.4
+evaluate==0.4.1
+matplotlib-inline==0.1.7
+wasabi==1.1.2
+pycparser==2.22
+GitPython==3.1.43
+pluggy==1.5.0
+async-lru==2.0.4
+pgmpy==0.1.24
+anyio==4.4.0
+executing==2.1.0
+orjson==3.10.11
+humanfriendly==10.0
+tornado==6.4.1
+gmpy2==2.1.5
+rlPyCairo==0.2.0
+distributed==2024.11.0
+FuzzyTM==2.0.5
+torchtext==0.15.2a0+5ce3163
+pytest==8.3.5
+pyod==2.0.2
+ImageHash==4.3.1
+soupsieve==2.5
+tblib==3.0.0
+emoji==2.14.0
+aiohappyeyeballs==2.4.3
+uri-template==1.3.0
+tensorflow_estimator==2.15.0
+babel==2.16.0
+dask-cuda==24.12.0a12
+overrides==7.7.0
+opencensus==0.11.3
+openai==0.28.1
+language_data==1.2.0
+jedi==0.19.2
+cookiecutter==2.6.0
+entrypoints==0.4
+exceptiongroup==1.2.2
+marisa-trie==1.2.0
+uvloop==0.20.0
+aiosignal==1.3.1
+Flask==3.0.3
+tensorboard==2.15.2
+cffi==1.17.1
+tf_keras==2.15.0
+absl-py==2.1.0
+blinker==1.9.0
+types-python-dateutil==2.9.0.20241003
+opencv-python==4.9.0
+frozendict==2.4.6
+aiohttp-cors==0.7.0
+statsmodels==0.14.4
+tinycss2==1.4.0
+terminado==0.18.1
+pycaret==2.2.3
+aiohttp==3.10.10
+distributed-ucxx==0.41.0
+prometheus_client==0.21.0
+fastdownload==0.0.7
+grpcio==1.59.3
+google-api-core==2.22.0
+jupyterlab_widgets==3.0.13
+appdirs==1.4.4
+littleutils==0.0.0
+ray==2.24.0
+kaggle==1.6.17
+jsonschema==4.23.0
+google-auth==2.36.0
+scikit-base==0.11.0
+visions==0.7.6
+pyarrow==15.0.0
+transformers==4.33.0
+prometheus_flask_exporter==0.23.1
+dm-tree==0.1.8
+colorama==0.4.6
+requests-toolbelt==1.0.0
+cached-property==1.5.2
+cymem==2.0.8
+PyNaCl==1.5.0
+PyWavelets==1.7.0
+httptools==0.6.1
+typing-utils==0.1.0
+email_validator==2.2.0
+marshmallow==3.23.1
+Deprecated==1.2.14
+virtualenv==20.4.7
+optuna==3.6.1
+jupyter_server==2.14.2
+termcolor==2.5.0
+mpi4py==4.0.1
+torchdata==0.7.1+8cea82f
+dataclasses==0.8
+cloudpickle==3.1.0
+tree_sitter_languages==1.10.2
+tabulate==0.9.0
+ipython==8.29.0
+lightgbm==4.3.0
+captum==0.6.0
+confuse==2.0.1
+torchvision==0.16.1+adc3221
+lxml==4.9.4
+fastapi==0.115.4
+python-multipart==0.0.17
+dnspython==2.7.0
+jupyter-console==6.6.3
+preshed==3.0.9
+py-cpuinfo==9.0.0
+Send2Trash==1.8.3
+murmurhash==1.0.10
+sniffio==1.3.1
+websockets==13.1
+h11==0.14.0
+smmap==5.0.0
+textual==0.85.2
+jsonpatch==1.33
+opencensus-context==0.1.3
+nbconvert==7.16.4
+sentry-sdk==2.19.0
+opentelemetry-semantic-conventions==0.37b0
+pandas-profiling==2.8.0
+pillow==10.3.0
+peft==0.13.2
+rpds-py==0.21.0
+bokeh==3.6.1
+distro==1.9.0
+itsdangerous==2.2.0
+wandb==0.18.7
+jsonpointer==3.0.0
+astropy-iers-data==0.2024.11.11.0.32.38
+horovod==0.28.1
+graphviz==0.20.3
+vtk==9.3.1
+bleach==6.2.0
+numexpr==2.8.7
+pydantic_core==2.23.4
+Jinja2==3.1.4
+widgetsnbextension==4.0.13
+filelock==3.16.1
+catboost==1.2.7
+raft-dask==24.12.0a36
+async-timeout==4.0.3
+datefinder==0.7.3
+coloredlogs==15.0.1
+platformdirs==4.3.6
+spacy-legacy==3.0.12
+chardet==5.2.0
+jupyter_client==8.6.3
+importlib_metadata==8.5.0
+rfc3986-validator==0.1.1
+huggingface_hub==0.26.2
+PySocks==1.7.1
+mlxtend==0.23.2
+outdated==0.2.2
+partd==1.4.2
+thinc==8.2.5
+astropy==6.1.6
+rdflib==6.3.2
+h2==4.1.0
+typer==0.13.0
+xyzservices==2024.9.0
+toolz==0.12.1
+frozenlist==1.5.0
+rdkit==2024.9.2
+pyasn1==0.6.1
+jupyter_server_terminals==0.5.3
+ucx-py==0.41.0a11
+astunparse==1.6.3
+simpful==2.12.0
+notebook_shim==0.2.4
+scipy==1.13.1
+colorlog==6.9.0
+tiktoken==0.3.3
+plotly==5.24.1
+fastrlock==0.8.2
+chart-studio==1.1.0
+stack-data==0.6.2
+google-pasta==0.2.0
+sktime==0.34.0
+PyYAML==6.0.2
+sympy==1.13.3
+multidict==6.1.0
+ml-dtypes==0.2.0
+tensorboardX==2.6.2.2
+decorator==5.1.1
+cytoolz==1.0.0
+ase==3.23.0
+isoduration==20.11.0
+html5lib==1.1
+langsmith==0.1.142
+future==1.0.0
+onnx2torch==1.5.15
+multipledispatch==0.6.0
+protobuf==4.24.4
+ucxx==0.41.0
+pandas_flavor==0.6.0
+msgpack==1.1.0
+pyasn1_modules==0.4.1
+imagecodecs==2024.1.1
+mlflow==2.17.2
+watchfiles==0.24.0
+dm-sonnet==2.0.2
+langcodes==3.4.1
+freetype-py==2.3.0
+argon2-cffi-bindings==21.2.0
+trimesh==4.5.2
+opt_einsum==3.4.0
+tenacity==8.5.0
+h5py==3.12.1
+fastapi-cli==0.0.5
+oauthlib==3.2.2
+parso==0.8.4
+weasel==0.4.1
+yfinance==0.2.49
+networkx==2.8.8
+bitsandbytes==0.44.1
+lazy_loader==0.4
+querystring_parser==1.2.4
+contourpy==1.3.0
+unicodedata2==15.1.0
+bcrypt==4.2.0
+munkres==1.1.4
+langchain==0.0.298
+hpack==4.0.0
+cryptography==43.0.3
+umap-learn==0.5.7
+arrow==1.3.0
+docker==7.1.0
+certifi==2025.1.31
+fastjsonschema==2.20.0
+tensorflow==2.15.0
+googleapis-common-protos==1.65.0
+iniconfig==2.0.0
+Markdown==3.6
+llvmlite==0.43.0
+wslink==2.3.2
+attrs==24.2.0
+rich==13.9.4
+cupy==13.3.0
+uc-micro-py==1.0.3
+alembic==1.14.0
+joblib==1.4.2
+reportlab==4.2.5
+miniful==0.0.6
+jupyter_core==5.7.2
+wheel==0.45.0
+phik==0.12.3
+mistune==3.0.2
+wcwidth==0.2.13
+dacite==1.8.1
+accelerate==0.22.0
+sacremoses==0.0.53
+revtok==0.0.3
+python-slugify==8.0.4
+tangled-up-in-unicode==0.2.0
+dask==2024.11.0
+markdown-it-py==3.0.0
+sentencepiece==0.1.99
+beautifulsoup4==4.12.3
+six==1.16.0
+numba-cuda==0.0.17
+argon2-cffi==23.1.0
+xxhash==3.5.0
+hjson==3.1.0
+fonttools==4.54.1
+graphql-core==3.2.5
+pyparsing==3.2.0
+pure_eval==0.2.3
+distlib==0.3.9
+lightning==2.4.0
+wordcloud==0.0.0
+catalogue==2.0.10
+jax==0.4.27
+tree-sitter==0.23.2
+notebook==7.2.2
+dataclasses-json==0.6.7
+propcache==0.2.0
+numba==0.60.0
+dask-expr==1.1.17
+pydantic==2.9.2
+gunicorn==22.0.0
+missingno==0.5.2
+pyOpenSSL==24.2.1
+openpyxl==3.1.5
+packaging==24.1
+python-dotenv==1.0.1
+cycler==0.12.1
+types-pytz==2024.2.0.20241003
+yellowbrick==1.5
+referencing==0.35.1
+pyLDAvis==3.4.1
+lazypredict==0.2.16
+fqdn==1.5.1
+websocket-client==1.8.0
+fastcore==1.7.19
+pynvjitlink-cu12==0.3.0
+pingouin==0.5.5
+numpy==1.26.4
+typing-inspect==0.9.0
+nltk==3.9.1
+onnxruntime==1.19.2
+tensorflow-probability==0.23.0
+datasets==3.0.2
+pickleshare==0.7.5
+peewee==3.17.7
+torch-geometric==2.6.1
+ptyprocess==0.7.0
+greenlet==3.1.1
+graphql-relay==3.2.0
+graphene==3.4.3
+et_xmlfile==2.0.0
+webencodings==0.5.1
+hyperframe==6.0.1
+multitasking==0.0.9
+typer-slim==0.13.0
+onnx==1.15.0
+uvicorn==0.32.0
+memray==1.13.4
+xgboost==2.1.2
+Brotli==1.1.0
+zipp==3.21.0
+nbformat==5.10.4
+responses==0.18.0
+funcy==2.0
+Pygments==2.18.0
+tqdm==4.67.0
+linkify-it-py==2.0.3
+srsly==2.4.8
+cuda-python==12.6.0
+lightning-utilities==0.11.8
+cudf==24.12.0a337
+dask-ml==2024.4.4
+docker-pycreds==0.4.0
+pkgutil_resolve_name==1.3.10
+opentelemetry-api==1.16.0
+fsspec==2024.9.0
+nbclient==0.10.0
+psutil==5.9.8
+pytorch-lightning==2.4.0
+sortedcontainers==2.4.0
+matplotlib==3.9.2
+defusedxml==0.7.1
+urllib3==1.26.19
+jupyterlab_server==2.27.3
+retrying==1.3.3
+dask-cudf==24.12.0a337
+sqlparse==0.5.1
+text-unidecode==1.3
+seaborn==0.13.2
+typing_extensions==4.12.2
+pyzmq==26.2.0
+rfc3339-validator==0.1.4
+pynndescent==0.5.13
+pip==24.3.1
+confection==0.1.4
+wrapt==1.14.1
+fastprogress==1.0.3
+traitlets==5.14.3
+asttokens==2.4.1
+json5==0.9.28
+pandas-stubs==2.2.3.241126
+torchmetrics==1.2.1
+gitdb==4.0.11
+annotated-types==0.7.0
+ipython-autotime==0.1
+httpcore==1.0.6
+click==8.1.7
+setproctitle==1.3.3
+starlette==0.41.2
+jupyterlab==4.2.5
+rmm==24.12.0a27
+opentelemetry-sdk==1.16.0
+textblob==0.15.3
+imbalanced-learn==0.12.4
+typeguard==4.3.0
+more-itertools==10.3.0
+zipp==3.19.2
+autocommand==2.2.2
+jaraco.context==5.3.0
+packaging==24.1
+importlib_metadata==8.0.0
+platformdirs==4.2.2
+jaraco.functools==4.0.1
+importlib_resources==6.4.0
+tomli==2.0.1
+jaraco.text==3.12.1
+wheel==0.43.0
+jaraco.collections==5.1.0
+typing_extensions==4.12.2
+inflect==7.3.1
+backports.tarfile==1.2.0

wandb/run-20250504_160615-f65jh2lv/files/wandb-metadata.json ADDED Viewed

	@@ -0,0 +1,77 @@

+{
+  "os":  "Linux-5.14.0-427.13.1.el9_4.x86_64-x86_64-with-glibc2.34",
+  "python":  "3.10.15",
+  "startedAt":  "2025-05-04T13:06:15.895027Z",
+  "program":  "/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py",
+  "codePath":  "finetuning_bc_prott5.py",
+  "email":  "zeynep.isik1@sabanciuniv.edu",
+  "root":  "/arf/scratch/zisik/prott5_bc_ft",
+  "host":  "kolyoz1",
+  "username":  "zisik",
+  "executable":  "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/bin/python3",
+  "codePathLocal":  "finetuning_bc_prott5.py",
+  "cpu_count":  64,
+  "cpu_count_logical":  64,
+  "gpu":  "NVIDIA H100 80GB HBM3",
+  "gpu_count":  1,
+  "disk":  {
+    "/":  {
+      "total":  "7643995308032",
+      "used":  "274886729728"
+    }
+  },
+  "memory":  {
+    "total":  "1081373220864"
+  },
+  "cpu":  {
+    "count":  64,
+    "countLogical":  64
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA H100 80GB HBM3",
+      "memoryTotal":  "85520809984",
+      "cudaCores":  16896,
+      "architecture":  "Hopper"
+    }
+  ],
+  "slurm":  {
+    "cluster_name":  "cuda",
+    "conf":  "/etc/slurm/slurm.conf",
+    "cpus_on_node":  "16",
+    "cpus_per_task":  "16",
+    "gpus_on_node":  "1",
+    "gtids":  "0",
+    "job_account":  "tbag154",
+    "job_cpus_per_node":  "16",
+    "job_end_time":  "1746623147",
+    "job_gid":  "11636",
+    "job_gpus":  "1",
+    "job_id":  "1027945",
+    "job_name":  "msa_ph_pt",
+    "job_nodelist":  "kolyoz1",
+    "job_num_nodes":  "1",
+    "job_partition":  "kolyoz-cuda",
+    "job_qos":  "tbag",
+    "job_start_time":  "1746363947",
+    "job_uid":  "11636",
+    "job_user":  "zisik",
+    "jobid":  "1027945",
+    "localid":  "0",
+    "mem_per_cpu":  "14000",
+    "nnodes":  "1",
+    "node_aliases":  "(null)",
+    "nodeid":  "0",
+    "nodelist":  "kolyoz1",
+    "prio_process":  "0",
+    "procid":  "0",
+    "submit_dir":  "/arf/scratch/zisik",
+    "submit_host":  "cuda-ui",
+    "task_pid":  "3178532",
+    "tasks_per_node":  "1",
+    "topology_addr":  "kolyoz1",
+    "topology_addr_pattern":  "node",
+    "working_cluster":  "cuda:slurmcontroller3.ib:6800:9984:109"
+  },
+  "cudaVersion":  "12.6"
+}

wandb/run-20250504_160615-f65jh2lv/logs/debug-core.log ADDED Viewed

	@@ -0,0 +1,7 @@

+{"time":"2025-05-04T16:06:15.269316376+03:00","level":"INFO","msg":"started logging, with flags","port-filename":"/tmp/tmp6sywt0mb/port-3178556.txt","pid":3178556,"debug":false,"disable-analytics":false}
+{"time":"2025-05-04T16:06:15.269366219+03:00","level":"INFO","msg":"FeatureState","shutdownOnParentExitEnabled":false}
+{"time":"2025-05-04T16:06:15.2702663+03:00","level":"INFO","msg":"Will exit if parent process dies.","ppid":3178556}
+{"time":"2025-05-04T16:06:15.270143057+03:00","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":37579,"Zone":""}}
+{"time":"2025-05-04T16:06:15.448913658+03:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:49916"}
+{"time":"2025-05-04T16:06:15.898453126+03:00","level":"INFO","msg":"handleInformInit: received","streamId":"f65jh2lv","id":"127.0.0.1:49916"}
+{"time":"2025-05-04T16:06:16.021719647+03:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"f65jh2lv","id":"127.0.0.1:49916"}

wandb/run-20250504_160615-f65jh2lv/logs/debug-internal.log ADDED Viewed

	@@ -0,0 +1,8 @@

+{"time":"2025-05-04T16:06:15.899998659+03:00","level":"INFO","msg":"using version","core version":"0.18.7"}
+{"time":"2025-05-04T16:06:15.900045512+03:00","level":"INFO","msg":"created symlink","path":"/arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_160615-f65jh2lv/logs/debug-core.log"}
+{"time":"2025-05-04T16:06:16.021644692+03:00","level":"INFO","msg":"created new stream","id":"f65jh2lv"}
+{"time":"2025-05-04T16:06:16.021706945+03:00","level":"INFO","msg":"stream: started","id":"f65jh2lv"}
+{"time":"2025-05-04T16:06:16.021839756+03:00","level":"INFO","msg":"writer: Do: started","stream_id":"f65jh2lv"}
+{"time":"2025-05-04T16:06:16.02194891+03:00","level":"INFO","msg":"handler: started","stream_id":"f65jh2lv"}
+{"time":"2025-05-04T16:06:16.022034888+03:00","level":"INFO","msg":"sender: started","stream_id":"f65jh2lv"}
+{"time":"2025-05-04T16:06:16.421916148+03:00","level":"INFO","msg":"Starting system monitor"}

wandb/run-20250504_160615-f65jh2lv/logs/debug.log ADDED Viewed

	@@ -0,0 +1,26 @@

+2025-05-04 16:06:15,888 INFO    MainThread:3178556 [wandb_setup.py:_flush():79] Current SDK version is 0.18.7
+2025-05-04 16:06:15,888 INFO    MainThread:3178556 [wandb_setup.py:_flush():79] Configure stats pid to 3178556
+2025-05-04 16:06:15,888 INFO    MainThread:3178556 [wandb_setup.py:_flush():79] Loading settings from /arf/home/zisik/.config/wandb/settings
+2025-05-04 16:06:15,888 INFO    MainThread:3178556 [wandb_setup.py:_flush():79] Loading settings from /arf/scratch/zisik/prott5_bc_ft/wandb/settings
+2025-05-04 16:06:15,888 INFO    MainThread:3178556 [wandb_setup.py:_flush():79] Loading settings from environment variables: {}
+2025-05-04 16:06:15,888 INFO    MainThread:3178556 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': 'finetuning_bc_prott5.py', 'program_abspath': '/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py', 'program': '/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py'}
+2025-05-04 16:06:15,888 INFO    MainThread:3178556 [wandb_setup.py:_flush():79] Applying login settings: {}
+2025-05-04 16:06:15,888 INFO    MainThread:3178556 [wandb_setup.py:_flush():79] Applying login settings: {}
+2025-05-04 16:06:15,888 INFO    MainThread:3178556 [wandb_init.py:_log_setup():533] Logging user logs to /arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_160615-f65jh2lv/logs/debug.log
+2025-05-04 16:06:15,889 INFO    MainThread:3178556 [wandb_init.py:_log_setup():534] Logging internal logs to /arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_160615-f65jh2lv/logs/debug-internal.log
+2025-05-04 16:06:15,889 INFO    MainThread:3178556 [wandb_init.py:init():619] calling init triggers
+2025-05-04 16:06:15,889 INFO    MainThread:3178556 [wandb_init.py:init():626] wandb.init called with sweep_config: {}
+config: {}
+2025-05-04 16:06:15,889 INFO    MainThread:3178556 [wandb_init.py:init():669] starting backend
+2025-05-04 16:06:15,889 INFO    MainThread:3178556 [wandb_init.py:init():673] sending inform_init request
+2025-05-04 16:06:15,893 INFO    MainThread:3178556 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2025-05-04 16:06:15,894 INFO    MainThread:3178556 [wandb_init.py:init():686] backend started and connected
+2025-05-04 16:06:15,902 INFO    MainThread:3178556 [wandb_init.py:init():781] updated telemetry
+2025-05-04 16:06:15,905 INFO    MainThread:3178556 [wandb_init.py:init():814] communicating run to backend with 90.0 second timeout
+2025-05-04 16:06:16,414 INFO    MainThread:3178556 [wandb_init.py:init():867] starting run threads in backend
+2025-05-04 16:06:17,992 INFO    MainThread:3178556 [wandb_run.py:_console_start():2456] atexit reg
+2025-05-04 16:06:17,993 INFO    MainThread:3178556 [wandb_run.py:_redirect():2305] redirect: wrap_raw
+2025-05-04 16:06:17,993 INFO    MainThread:3178556 [wandb_run.py:_redirect():2370] Wrapping output streams.
+2025-05-04 16:06:17,993 INFO    MainThread:3178556 [wandb_run.py:_redirect():2395] Redirects installed.
+2025-05-04 16:06:18,004 INFO    MainThread:3178556 [wandb_init.py:init():911] run started, returning control to user process
+2025-05-04 16:06:56,772 INFO    MainThread:3178556 [wandb_run.py:_config_callback():1387] config_cb None None {'output_dir': 't5-bc-out', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': 't5-bc-out/runs/May04_16-06-46_kolyoz1', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': False, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 't5-bc-out', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'loss', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'epoch', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False}

wandb/run-20250504_160615-f65jh2lv/run-f65jh2lv.wandb ADDED Viewed

Binary file (98.3 kB). View file

wandb/run-20250504_160955-rqk2hbkf/files/config.yaml ADDED Viewed

	@@ -0,0 +1,44 @@

+_wandb:
+    value:
+        cli_version: 0.18.7
+        m: []
+        python_version: 3.10.15
+        t:
+            "1":
+                - 1
+                - 2
+                - 3
+                - 5
+                - 11
+                - 12
+                - 49
+                - 51
+                - 53
+                - 55
+                - 71
+                - 98
+                - 105
+            "2":
+                - 1
+                - 2
+                - 3
+                - 5
+                - 11
+                - 12
+                - 49
+                - 51
+                - 53
+                - 55
+                - 71
+                - 98
+                - 105
+            "3":
+                - 23
+                - 55
+            "4": 3.10.15
+            "5": 0.18.7
+            "6": 4.45.2
+            "8":
+                - 5
+            "12": 0.18.7
+            "13": linux-x86_64

wandb/run-20250504_160955-rqk2hbkf/files/output.log ADDED Viewed

	@@ -0,0 +1,24 @@

+Traceback (most recent call last):
+  File "/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py", line 33, in <module>
+    X_train, X_temp, y_train, y_temp = train_test_split(prep_texts, labels, test_size=0.30, random_state=42)
+  File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/sklearn/utils/_param_validation.py", line 213, in wrapper
+    return func(*args, **kwargs)
+  File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/sklearn/model_selection/_split.py", line 2782, in train_test_split
+    arrays = indexable(*arrays)
+  File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/sklearn/utils/validation.py", line 514, in indexable
+    check_consistent_length(*result)
+  File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/sklearn/utils/validation.py", line 457, in check_consistent_length
+    raise ValueError(
+ValueError: Found input variables with inconsistent numbers of samples: [10, 730149]
+Traceback (most recent call last):
+  File "/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py", line 33, in <module>
+    X_train, X_temp, y_train, y_temp = train_test_split(prep_texts, labels, test_size=0.30, random_state=42)
+  File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/sklearn/utils/_param_validation.py", line 213, in wrapper
+    return func(*args, **kwargs)
+  File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/sklearn/model_selection/_split.py", line 2782, in train_test_split
+    arrays = indexable(*arrays)
+  File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/sklearn/utils/validation.py", line 514, in indexable
+    check_consistent_length(*result)
+  File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/sklearn/utils/validation.py", line 457, in check_consistent_length
+    raise ValueError(
+ValueError: Found input variables with inconsistent numbers of samples: [10, 730149]

wandb/run-20250504_160955-rqk2hbkf/files/requirements.txt ADDED Viewed

	@@ -0,0 +1,541 @@

+nvidia-cuda-cupti-cu12==12.4.127
+nvidia-cuda-nvrtc-cu12==12.4.127
+pyg-lib==0.4.0+pt20cu117
+biopython==1.85
+iniconfig==2.0.0
+tokenizers==0.20.0
+accelerate==1.3.0
+torch==2.6.0
+nvidia-nccl-cu12==2.21.5
+transformers==4.45.2
+nvidia-cusparse-cu12==12.3.1.170
+torch-scatter==2.1.2+pt20cu117
+nvidia-cusparselt-cu12==0.6.2
+nvidia-nvtx-cu12==12.4.127
+zstd==1.5.6.6
+fair-esm==2.0.0
+omegaconf==2.3.0
+pluggy==1.5.0
+pytest==8.3.5
+nvidia-curand-cu12==10.3.5.147
+nvidia-cufft-cu12==11.2.1.3
+torch-cluster==1.6.3+pt20cu117
+regex==2024.9.11
+nvidia-cudnn-cu12==9.1.0.70
+torch-spline-conv==1.2.2+pt20cu117
+nvidia-cusolver-cu12==11.6.1.9
+antlr4-python3-runtime==4.9.3
+msgpack-numpy==0.4.8
+nlp==0.2.0
+einops==0.8.1
+nvidia-cublas-cu12==12.4.5.8
+triton==3.2.0
+ninja==1.11.1.3
+hydra-core==1.3.2
+nvidia-nvjitlink-cu12==12.4.127
+biotite==0.41.2
+torch-sparse==0.6.18+pt20cu117
+esm==3.1.4
+sympy==1.13.1
+nvidia-cuda-runtime-cu12==12.4.127
+jupyter-lsp==2.2.5
+jupyter-events==0.10.0
+ipykernel==6.29.5
+Mako==1.3.5
+proto-plus==1.25.0
+fst-pso==1.8.1
+gensim==4.3.3
+htmlmin==0.1.12
+tokenizers==0.13.3
+timm==1.0.11
+MarkupSafe==3.0.2
+safetensors==0.4.5
+requests==2.32.3
+gast==0.5.5
+cuml==24.12.0a33
+jaxlib==0.4.23.dev20240214
+spacy-loggers==1.0.5
+pytz==2024.1
+idna==3.10
+python-dateutil==2.9.0
+mdurl==0.1.2
+blis==0.7.10
+jupyter==1.1.1
+pyerfa==2.0.1.5
+comm==0.2.2
+pygraphviz==1.14
+dill==0.3.8
+paramiko==3.5.0
+llama-index==0.8.36
+mdit-py-plugins==0.4.2
+Werkzeug==3.1.3
+pyu2f==0.1.5
+dask-glm==0.2.0
+httpx==0.27.2
+typeguard==4.4.1
+mypy-extensions==1.0.0
+kmodes==0.12.2
+keras==2.15.0
+ydata-profiling==0.0.dev0
+regex==2024.11.6
+xarray==2024.11.0
+setuptools==75.3.0
+charset-normalizer==3.4.0
+jupyterlab_nvdashboard==0.11.0
+pylibraft==24.12.0a36
+spacy==3.7.6
+mlflow-skinny==2.17.2
+nvtx==0.2.10
+multimethod==1.12
+pexpect==4.9.0
+torch==2.1.0.post301
+flatbuffers==24.3.25
+python-json-logger==2.0.7
+PyJWT==2.9.0
+multiprocess==0.70.16
+colorlover==0.3.0
+yarl==1.16.0
+locket==1.0.0
+patsy==1.0.0
+rapids-dask-dependency==24.12.0a0
+stanza==1.9.2
+debugpy==1.8.8
+jupyterlab_pygments==0.3.0
+pylibcudf==24.12.0a337
+lz4==4.3.3
+pandas==2.2.3
+tifffile==2024.9.20
+pynvml==11.4.1
+cufflinks==0.17.3
+ipywidgets==8.1.5
+requests-oauthlib==2.0.0
+google-auth-oauthlib==1.2.1
+rsa==4.9
+webcolors==24.8.0
+jsonschema-specifications==2024.10.1
+scikit-learn==1.5.2
+langchain-text-splitters==0.3.2
+pandas-datareader==0.10.0
+tomli==2.0.2
+tzdata==2024.2
+scikit-image==0.24.0
+tensorboard_data_server==0.7.0
+kiwisolver==1.4.7
+cloudpathlib==0.20.0
+isodate==0.6.1
+adversarial-robustness-toolbox==1.19.1
+SQLAlchemy==2.0.36
+pytest-runner==6.0.0
+pycairo==1.27.0
+treelite==4.3.0
+jiter==0.7.0
+threadpoolctl==3.5.0
+pandocfilters==1.5.0
+loguru==0.7.2
+smart_open==7.0.5
+shellingham==1.5.4
+deepspeed==0.15.4
+prompt_toolkit==3.0.48
+databricks-sdk==0.34.0
+langchain-core==0.3.15
+imageio==2.36.0
+openapi-schema-pydantic==1.2.4
+zict==3.0.0
+cachetools==5.5.0
+colorful==0.5.6
+mpmath==1.3.0
+nest_asyncio==1.6.0
+pyFUME==0.2.25
+opencv-python-headless==4.9.0
+fastai==2.7.18
+importlib_resources==6.4.5
+binaryornot==0.4.4
+evaluate==0.4.1
+matplotlib-inline==0.1.7
+wasabi==1.1.2
+pycparser==2.22
+GitPython==3.1.43
+pluggy==1.5.0
+async-lru==2.0.4
+pgmpy==0.1.24
+anyio==4.4.0
+executing==2.1.0
+orjson==3.10.11
+humanfriendly==10.0
+tornado==6.4.1
+gmpy2==2.1.5
+rlPyCairo==0.2.0
+distributed==2024.11.0
+FuzzyTM==2.0.5
+torchtext==0.15.2a0+5ce3163
+pytest==8.3.5
+pyod==2.0.2
+ImageHash==4.3.1
+soupsieve==2.5
+tblib==3.0.0
+emoji==2.14.0
+aiohappyeyeballs==2.4.3
+uri-template==1.3.0
+tensorflow_estimator==2.15.0
+babel==2.16.0
+dask-cuda==24.12.0a12
+overrides==7.7.0
+opencensus==0.11.3
+openai==0.28.1
+language_data==1.2.0
+jedi==0.19.2
+cookiecutter==2.6.0
+entrypoints==0.4
+exceptiongroup==1.2.2
+marisa-trie==1.2.0
+uvloop==0.20.0
+aiosignal==1.3.1
+Flask==3.0.3
+tensorboard==2.15.2
+cffi==1.17.1
+tf_keras==2.15.0
+absl-py==2.1.0
+blinker==1.9.0
+types-python-dateutil==2.9.0.20241003
+opencv-python==4.9.0
+frozendict==2.4.6
+aiohttp-cors==0.7.0
+statsmodels==0.14.4
+tinycss2==1.4.0
+terminado==0.18.1
+pycaret==2.2.3
+aiohttp==3.10.10
+distributed-ucxx==0.41.0
+prometheus_client==0.21.0
+fastdownload==0.0.7
+grpcio==1.59.3
+google-api-core==2.22.0
+jupyterlab_widgets==3.0.13
+appdirs==1.4.4
+littleutils==0.0.0
+ray==2.24.0
+kaggle==1.6.17
+jsonschema==4.23.0
+google-auth==2.36.0
+scikit-base==0.11.0
+visions==0.7.6
+pyarrow==15.0.0
+transformers==4.33.0
+prometheus_flask_exporter==0.23.1
+dm-tree==0.1.8
+colorama==0.4.6
+requests-toolbelt==1.0.0
+cached-property==1.5.2
+cymem==2.0.8
+PyNaCl==1.5.0
+PyWavelets==1.7.0
+httptools==0.6.1
+typing-utils==0.1.0
+email_validator==2.2.0
+marshmallow==3.23.1
+Deprecated==1.2.14
+virtualenv==20.4.7
+optuna==3.6.1
+jupyter_server==2.14.2
+termcolor==2.5.0
+mpi4py==4.0.1
+torchdata==0.7.1+8cea82f
+dataclasses==0.8
+cloudpickle==3.1.0
+tree_sitter_languages==1.10.2
+tabulate==0.9.0
+ipython==8.29.0
+lightgbm==4.3.0
+captum==0.6.0
+confuse==2.0.1
+torchvision==0.16.1+adc3221
+lxml==4.9.4
+fastapi==0.115.4
+python-multipart==0.0.17
+dnspython==2.7.0
+jupyter-console==6.6.3
+preshed==3.0.9
+py-cpuinfo==9.0.0
+Send2Trash==1.8.3
+murmurhash==1.0.10
+sniffio==1.3.1
+websockets==13.1
+h11==0.14.0
+smmap==5.0.0
+textual==0.85.2
+jsonpatch==1.33
+opencensus-context==0.1.3
+nbconvert==7.16.4
+sentry-sdk==2.19.0
+opentelemetry-semantic-conventions==0.37b0
+pandas-profiling==2.8.0
+pillow==10.3.0
+peft==0.13.2
+rpds-py==0.21.0
+bokeh==3.6.1
+distro==1.9.0
+itsdangerous==2.2.0
+wandb==0.18.7
+jsonpointer==3.0.0
+astropy-iers-data==0.2024.11.11.0.32.38
+horovod==0.28.1
+graphviz==0.20.3
+vtk==9.3.1
+bleach==6.2.0
+numexpr==2.8.7
+pydantic_core==2.23.4
+Jinja2==3.1.4
+widgetsnbextension==4.0.13
+filelock==3.16.1
+catboost==1.2.7
+raft-dask==24.12.0a36
+async-timeout==4.0.3
+datefinder==0.7.3
+coloredlogs==15.0.1
+platformdirs==4.3.6
+spacy-legacy==3.0.12
+chardet==5.2.0
+jupyter_client==8.6.3
+importlib_metadata==8.5.0
+rfc3986-validator==0.1.1
+huggingface_hub==0.26.2
+PySocks==1.7.1
+mlxtend==0.23.2
+outdated==0.2.2
+partd==1.4.2
+thinc==8.2.5
+astropy==6.1.6
+rdflib==6.3.2
+h2==4.1.0
+typer==0.13.0
+xyzservices==2024.9.0
+toolz==0.12.1
+frozenlist==1.5.0
+rdkit==2024.9.2
+pyasn1==0.6.1
+jupyter_server_terminals==0.5.3
+ucx-py==0.41.0a11
+astunparse==1.6.3
+simpful==2.12.0
+notebook_shim==0.2.4
+scipy==1.13.1
+colorlog==6.9.0
+tiktoken==0.3.3
+plotly==5.24.1
+fastrlock==0.8.2
+chart-studio==1.1.0
+stack-data==0.6.2
+google-pasta==0.2.0
+sktime==0.34.0
+PyYAML==6.0.2
+sympy==1.13.3
+multidict==6.1.0
+ml-dtypes==0.2.0
+tensorboardX==2.6.2.2
+decorator==5.1.1
+cytoolz==1.0.0
+ase==3.23.0
+isoduration==20.11.0
+html5lib==1.1
+langsmith==0.1.142
+future==1.0.0
+onnx2torch==1.5.15
+multipledispatch==0.6.0
+protobuf==4.24.4
+ucxx==0.41.0
+pandas_flavor==0.6.0
+msgpack==1.1.0
+pyasn1_modules==0.4.1
+imagecodecs==2024.1.1
+mlflow==2.17.2
+watchfiles==0.24.0
+dm-sonnet==2.0.2
+langcodes==3.4.1
+freetype-py==2.3.0
+argon2-cffi-bindings==21.2.0
+trimesh==4.5.2
+opt_einsum==3.4.0
+tenacity==8.5.0
+h5py==3.12.1
+fastapi-cli==0.0.5
+oauthlib==3.2.2
+parso==0.8.4
+weasel==0.4.1
+yfinance==0.2.49
+networkx==2.8.8
+bitsandbytes==0.44.1
+lazy_loader==0.4
+querystring_parser==1.2.4
+contourpy==1.3.0
+unicodedata2==15.1.0
+bcrypt==4.2.0
+munkres==1.1.4
+langchain==0.0.298
+hpack==4.0.0
+cryptography==43.0.3
+umap-learn==0.5.7
+arrow==1.3.0
+docker==7.1.0
+certifi==2025.1.31
+fastjsonschema==2.20.0
+tensorflow==2.15.0
+googleapis-common-protos==1.65.0
+iniconfig==2.0.0
+Markdown==3.6
+llvmlite==0.43.0
+wslink==2.3.2
+attrs==24.2.0
+rich==13.9.4
+cupy==13.3.0
+uc-micro-py==1.0.3
+alembic==1.14.0
+joblib==1.4.2
+reportlab==4.2.5
+miniful==0.0.6
+jupyter_core==5.7.2
+wheel==0.45.0
+phik==0.12.3
+mistune==3.0.2
+wcwidth==0.2.13
+dacite==1.8.1
+accelerate==0.22.0
+sacremoses==0.0.53
+revtok==0.0.3
+python-slugify==8.0.4
+tangled-up-in-unicode==0.2.0
+dask==2024.11.0
+markdown-it-py==3.0.0
+sentencepiece==0.1.99
+beautifulsoup4==4.12.3
+six==1.16.0
+numba-cuda==0.0.17
+argon2-cffi==23.1.0
+xxhash==3.5.0
+hjson==3.1.0
+fonttools==4.54.1
+graphql-core==3.2.5
+pyparsing==3.2.0
+pure_eval==0.2.3
+distlib==0.3.9
+lightning==2.4.0
+wordcloud==0.0.0
+catalogue==2.0.10
+jax==0.4.27
+tree-sitter==0.23.2
+notebook==7.2.2
+dataclasses-json==0.6.7
+propcache==0.2.0
+numba==0.60.0
+dask-expr==1.1.17
+pydantic==2.9.2
+gunicorn==22.0.0
+missingno==0.5.2
+pyOpenSSL==24.2.1
+openpyxl==3.1.5
+packaging==24.1
+python-dotenv==1.0.1
+cycler==0.12.1
+types-pytz==2024.2.0.20241003
+yellowbrick==1.5
+referencing==0.35.1
+pyLDAvis==3.4.1
+lazypredict==0.2.16
+fqdn==1.5.1
+websocket-client==1.8.0
+fastcore==1.7.19
+pynvjitlink-cu12==0.3.0
+pingouin==0.5.5
+numpy==1.26.4
+typing-inspect==0.9.0
+nltk==3.9.1
+onnxruntime==1.19.2
+tensorflow-probability==0.23.0
+datasets==3.0.2
+pickleshare==0.7.5
+peewee==3.17.7
+torch-geometric==2.6.1
+ptyprocess==0.7.0
+greenlet==3.1.1
+graphql-relay==3.2.0
+graphene==3.4.3
+et_xmlfile==2.0.0
+webencodings==0.5.1
+hyperframe==6.0.1
+multitasking==0.0.9
+typer-slim==0.13.0
+onnx==1.15.0
+uvicorn==0.32.0
+memray==1.13.4
+xgboost==2.1.2
+Brotli==1.1.0
+zipp==3.21.0
+nbformat==5.10.4
+responses==0.18.0
+funcy==2.0
+Pygments==2.18.0
+tqdm==4.67.0
+linkify-it-py==2.0.3
+srsly==2.4.8
+cuda-python==12.6.0
+lightning-utilities==0.11.8
+cudf==24.12.0a337
+dask-ml==2024.4.4
+docker-pycreds==0.4.0
+pkgutil_resolve_name==1.3.10
+opentelemetry-api==1.16.0
+fsspec==2024.9.0
+nbclient==0.10.0
+psutil==5.9.8
+pytorch-lightning==2.4.0
+sortedcontainers==2.4.0
+matplotlib==3.9.2
+defusedxml==0.7.1
+urllib3==1.26.19
+jupyterlab_server==2.27.3
+retrying==1.3.3
+dask-cudf==24.12.0a337
+sqlparse==0.5.1
+text-unidecode==1.3
+seaborn==0.13.2
+typing_extensions==4.12.2
+pyzmq==26.2.0
+rfc3339-validator==0.1.4
+pynndescent==0.5.13
+pip==24.3.1
+confection==0.1.4
+wrapt==1.14.1
+fastprogress==1.0.3
+traitlets==5.14.3
+asttokens==2.4.1
+json5==0.9.28
+pandas-stubs==2.2.3.241126
+torchmetrics==1.2.1
+gitdb==4.0.11
+annotated-types==0.7.0
+ipython-autotime==0.1
+httpcore==1.0.6
+click==8.1.7
+setproctitle==1.3.3
+starlette==0.41.2
+jupyterlab==4.2.5
+rmm==24.12.0a27
+opentelemetry-sdk==1.16.0
+textblob==0.15.3
+imbalanced-learn==0.12.4
+typeguard==4.3.0
+more-itertools==10.3.0
+zipp==3.19.2
+autocommand==2.2.2
+jaraco.context==5.3.0
+packaging==24.1
+importlib_metadata==8.0.0
+platformdirs==4.2.2
+jaraco.functools==4.0.1
+importlib_resources==6.4.0
+tomli==2.0.1
+jaraco.text==3.12.1
+wheel==0.43.0
+jaraco.collections==5.1.0
+typing_extensions==4.12.2
+inflect==7.3.1
+backports.tarfile==1.2.0

wandb/run-20250504_160955-rqk2hbkf/files/wandb-metadata.json ADDED Viewed

	@@ -0,0 +1,77 @@

+{
+  "os":  "Linux-5.14.0-427.13.1.el9_4.x86_64-x86_64-with-glibc2.34",
+  "python":  "3.10.15",
+  "startedAt":  "2025-05-04T13:09:55.928947Z",
+  "program":  "/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py",
+  "codePath":  "finetuning_bc_prott5.py",
+  "email":  "zeynep.isik1@sabanciuniv.edu",
+  "root":  "/arf/scratch/zisik/prott5_bc_ft",
+  "host":  "kolyoz1",
+  "username":  "zisik",
+  "executable":  "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/bin/python3",
+  "codePathLocal":  "finetuning_bc_prott5.py",
+  "cpu_count":  64,
+  "cpu_count_logical":  64,
+  "gpu":  "NVIDIA H100 80GB HBM3",
+  "gpu_count":  1,
+  "disk":  {
+    "/":  {
+      "total":  "7643995308032",
+      "used":  "272740364288"
+    }
+  },
+  "memory":  {
+    "total":  "1081373220864"
+  },
+  "cpu":  {
+    "count":  64,
+    "countLogical":  64
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA H100 80GB HBM3",
+      "memoryTotal":  "85520809984",
+      "cudaCores":  16896,
+      "architecture":  "Hopper"
+    }
+  ],
+  "slurm":  {
+    "cluster_name":  "cuda",
+    "conf":  "/etc/slurm/slurm.conf",
+    "cpus_on_node":  "16",
+    "cpus_per_task":  "16",
+    "gpus_on_node":  "1",
+    "gtids":  "0",
+    "job_account":  "tbag154",
+    "job_cpus_per_node":  "16",
+    "job_end_time":  "1746623370",
+    "job_gid":  "11636",
+    "job_gpus":  "1",
+    "job_id":  "1027946",
+    "job_name":  "msa_ph_pt",
+    "job_nodelist":  "kolyoz1",
+    "job_num_nodes":  "1",
+    "job_partition":  "kolyoz-cuda",
+    "job_qos":  "tbag",
+    "job_start_time":  "1746364170",
+    "job_uid":  "11636",
+    "job_user":  "zisik",
+    "jobid":  "1027946",
+    "localid":  "0",
+    "mem_per_cpu":  "14000",
+    "nnodes":  "1",
+    "node_aliases":  "(null)",
+    "nodeid":  "0",
+    "nodelist":  "kolyoz1",
+    "prio_process":  "0",
+    "procid":  "0",
+    "submit_dir":  "/arf/scratch/zisik",
+    "submit_host":  "cuda-ui",
+    "task_pid":  "3179106",
+    "tasks_per_node":  "1",
+    "topology_addr":  "kolyoz1",
+    "topology_addr_pattern":  "node",
+    "working_cluster":  "cuda:slurmcontroller3.ib:6800:9984:109"
+  },
+  "cudaVersion":  "12.6"
+}

wandb/run-20250504_160955-rqk2hbkf/files/wandb-summary.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"_wandb":{"runtime":2}}

wandb/run-20250504_160955-rqk2hbkf/logs/debug-core.log ADDED Viewed

	@@ -0,0 +1,14 @@

+{"time":"2025-05-04T16:09:55.241065297+03:00","level":"INFO","msg":"started logging, with flags","port-filename":"/tmp/tmplpbc9pnb/port-3179132.txt","pid":3179132,"debug":false,"disable-analytics":false}
+{"time":"2025-05-04T16:09:55.241124751+03:00","level":"INFO","msg":"FeatureState","shutdownOnParentExitEnabled":false}
+{"time":"2025-05-04T16:09:55.241864+03:00","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":37981,"Zone":""}}
+{"time":"2025-05-04T16:09:55.241967868+03:00","level":"INFO","msg":"Will exit if parent process dies.","ppid":3179132}
+{"time":"2025-05-04T16:09:55.428960455+03:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:40950"}
+{"time":"2025-05-04T16:09:55.928508592+03:00","level":"INFO","msg":"handleInformInit: received","streamId":"rqk2hbkf","id":"127.0.0.1:40950"}
+{"time":"2025-05-04T16:09:56.056026556+03:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"rqk2hbkf","id":"127.0.0.1:40950"}
+{"time":"2025-05-04T16:09:58.597503038+03:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:40950"}
+{"time":"2025-05-04T16:09:58.597631333+03:00","level":"INFO","msg":"server is shutting down"}
+{"time":"2025-05-04T16:09:58.597601675+03:00","level":"INFO","msg":"connection: Close: initiating connection closure","id":"127.0.0.1:40950"}
+{"time":"2025-05-04T16:09:58.597793186+03:00","level":"INFO","msg":"connection: Close: connection successfully closed","id":"127.0.0.1:40950"}
+{"time":"2025-05-04T16:09:59.528863432+03:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:40950"}
+{"time":"2025-05-04T16:09:59.528880642+03:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:40950"}
+{"time":"2025-05-04T16:09:59.528893164+03:00","level":"INFO","msg":"server is closed"}

wandb/run-20250504_160955-rqk2hbkf/logs/debug-internal.log ADDED Viewed

	@@ -0,0 +1,19 @@

+{"time":"2025-05-04T16:09:55.930352223+03:00","level":"INFO","msg":"using version","core version":"0.18.7"}
+{"time":"2025-05-04T16:09:55.930398642+03:00","level":"INFO","msg":"created symlink","path":"/arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_160955-rqk2hbkf/logs/debug-core.log"}
+{"time":"2025-05-04T16:09:56.055953645+03:00","level":"INFO","msg":"created new stream","id":"rqk2hbkf"}
+{"time":"2025-05-04T16:09:56.056013829+03:00","level":"INFO","msg":"stream: started","id":"rqk2hbkf"}
+{"time":"2025-05-04T16:09:56.056183059+03:00","level":"INFO","msg":"writer: Do: started","stream_id":"rqk2hbkf"}
+{"time":"2025-05-04T16:09:56.056291373+03:00","level":"INFO","msg":"sender: started","stream_id":"rqk2hbkf"}
+{"time":"2025-05-04T16:09:56.056498843+03:00","level":"INFO","msg":"handler: started","stream_id":"rqk2hbkf"}
+{"time":"2025-05-04T16:09:56.455842701+03:00","level":"INFO","msg":"Starting system monitor"}
+{"time":"2025-05-04T16:09:58.597599181+03:00","level":"INFO","msg":"stream: closing","id":"rqk2hbkf"}
+{"time":"2025-05-04T16:09:58.597716873+03:00","level":"INFO","msg":"Stopping system monitor"}
+{"time":"2025-05-04T16:09:58.598825235+03:00","level":"INFO","msg":"Stopped system monitor"}
+{"time":"2025-05-04T16:09:58.792882763+03:00","level":"WARN","msg":"No job ingredients found, not creating job artifact"}
+{"time":"2025-05-04T16:09:58.792915401+03:00","level":"WARN","msg":"No source type found, not creating job artifact"}
+{"time":"2025-05-04T16:09:58.792926694+03:00","level":"INFO","msg":"sender: sendDefer: no job artifact to save"}
+{"time":"2025-05-04T16:09:59.286977407+03:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-05-04T16:09:59.528666057+03:00","level":"INFO","msg":"handler: closed","stream_id":"rqk2hbkf"}
+{"time":"2025-05-04T16:09:59.528710573+03:00","level":"INFO","msg":"writer: Close: closed","stream_id":"rqk2hbkf"}
+{"time":"2025-05-04T16:09:59.528726369+03:00","level":"INFO","msg":"sender: closed","stream_id":"rqk2hbkf"}
+{"time":"2025-05-04T16:09:59.528792264+03:00","level":"INFO","msg":"stream: closed","id":"rqk2hbkf"}