isikz commited on
Commit
79dffb2
·
verified ·
1 Parent(s): eec74ee

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +3 -0
  2. added_tokens.json +102 -0
  3. config.json +8 -0
  4. finetuning_bc_prott5.py +149 -0
  5. pytorch_model.bin +3 -0
  6. ready_to_train.csv +3 -0
  7. special_tokens_map.json +125 -0
  8. spiece.model +3 -0
  9. t5-bc-out/checkpoint-47916/optimizer.pt +3 -0
  10. t5-bc-out/checkpoint-47916/pytorch_model.bin +3 -0
  11. t5-bc-out/checkpoint-47916/rng_state.pth +3 -0
  12. t5-bc-out/checkpoint-47916/scheduler.pt +3 -0
  13. t5-bc-out/checkpoint-47916/trainer_state.json +725 -0
  14. t5-bc-out/checkpoint-47916/training_args.bin +3 -0
  15. tokenizer_config.json +941 -0
  16. training_args.bin +3 -0
  17. wandb/debug-internal.log +21 -0
  18. wandb/debug.log +27 -0
  19. wandb/run-20250504_132610-pxg645u5/files/config.yaml +44 -0
  20. wandb/run-20250504_132610-pxg645u5/files/output.log +37 -0
  21. wandb/run-20250504_132610-pxg645u5/files/requirements.txt +541 -0
  22. wandb/run-20250504_132610-pxg645u5/files/wandb-metadata.json +77 -0
  23. wandb/run-20250504_132610-pxg645u5/files/wandb-summary.json +1 -0
  24. wandb/run-20250504_132610-pxg645u5/logs/debug-core.log +14 -0
  25. wandb/run-20250504_132610-pxg645u5/logs/debug-internal.log +19 -0
  26. wandb/run-20250504_132610-pxg645u5/logs/debug.log +26 -0
  27. wandb/run-20250504_132610-pxg645u5/run-pxg645u5.wandb +0 -0
  28. wandb/run-20250504_132912-1agsw1y8/files/config.yaml +374 -0
  29. wandb/run-20250504_132912-1agsw1y8/files/output.log +87 -0
  30. wandb/run-20250504_132912-1agsw1y8/files/requirements.txt +541 -0
  31. wandb/run-20250504_132912-1agsw1y8/files/wandb-metadata.json +77 -0
  32. wandb/run-20250504_132912-1agsw1y8/files/wandb-summary.json +1 -0
  33. wandb/run-20250504_132912-1agsw1y8/logs/debug-core.log +14 -0
  34. wandb/run-20250504_132912-1agsw1y8/logs/debug-internal.log +19 -0
  35. wandb/run-20250504_132912-1agsw1y8/logs/debug.log +27 -0
  36. wandb/run-20250504_132912-1agsw1y8/run-1agsw1y8.wandb +3 -0
  37. wandb/run-20250504_160615-f65jh2lv/files/output.log +8 -0
  38. wandb/run-20250504_160615-f65jh2lv/files/requirements.txt +541 -0
  39. wandb/run-20250504_160615-f65jh2lv/files/wandb-metadata.json +77 -0
  40. wandb/run-20250504_160615-f65jh2lv/logs/debug-core.log +7 -0
  41. wandb/run-20250504_160615-f65jh2lv/logs/debug-internal.log +8 -0
  42. wandb/run-20250504_160615-f65jh2lv/logs/debug.log +26 -0
  43. wandb/run-20250504_160615-f65jh2lv/run-f65jh2lv.wandb +0 -0
  44. wandb/run-20250504_160955-rqk2hbkf/files/config.yaml +44 -0
  45. wandb/run-20250504_160955-rqk2hbkf/files/output.log +24 -0
  46. wandb/run-20250504_160955-rqk2hbkf/files/requirements.txt +541 -0
  47. wandb/run-20250504_160955-rqk2hbkf/files/wandb-metadata.json +77 -0
  48. wandb/run-20250504_160955-rqk2hbkf/files/wandb-summary.json +1 -0
  49. wandb/run-20250504_160955-rqk2hbkf/logs/debug-core.log +14 -0
  50. wandb/run-20250504_160955-rqk2hbkf/logs/debug-internal.log +19 -0
.gitattributes CHANGED
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ ready_to_train.csv filter=lfs diff=lfs merge=lfs -text
37
+ wandb/run-20250504_132912-1agsw1y8/run-1agsw1y8.wandb filter=lfs diff=lfs merge=lfs -text
38
+ wandb/run-20250504_172503-0ictlmwf/run-0ictlmwf.wandb filter=lfs diff=lfs merge=lfs -text
added_tokens.json ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "<extra_id_0>": 127,
3
+ "<extra_id_10>": 117,
4
+ "<extra_id_11>": 116,
5
+ "<extra_id_12>": 115,
6
+ "<extra_id_13>": 114,
7
+ "<extra_id_14>": 113,
8
+ "<extra_id_15>": 112,
9
+ "<extra_id_16>": 111,
10
+ "<extra_id_17>": 110,
11
+ "<extra_id_18>": 109,
12
+ "<extra_id_19>": 108,
13
+ "<extra_id_1>": 126,
14
+ "<extra_id_20>": 107,
15
+ "<extra_id_21>": 106,
16
+ "<extra_id_22>": 105,
17
+ "<extra_id_23>": 104,
18
+ "<extra_id_24>": 103,
19
+ "<extra_id_25>": 102,
20
+ "<extra_id_26>": 101,
21
+ "<extra_id_27>": 100,
22
+ "<extra_id_28>": 99,
23
+ "<extra_id_29>": 98,
24
+ "<extra_id_2>": 125,
25
+ "<extra_id_30>": 97,
26
+ "<extra_id_31>": 96,
27
+ "<extra_id_32>": 95,
28
+ "<extra_id_33>": 94,
29
+ "<extra_id_34>": 93,
30
+ "<extra_id_35>": 92,
31
+ "<extra_id_36>": 91,
32
+ "<extra_id_37>": 90,
33
+ "<extra_id_38>": 89,
34
+ "<extra_id_39>": 88,
35
+ "<extra_id_3>": 124,
36
+ "<extra_id_40>": 87,
37
+ "<extra_id_41>": 86,
38
+ "<extra_id_42>": 85,
39
+ "<extra_id_43>": 84,
40
+ "<extra_id_44>": 83,
41
+ "<extra_id_45>": 82,
42
+ "<extra_id_46>": 81,
43
+ "<extra_id_47>": 80,
44
+ "<extra_id_48>": 79,
45
+ "<extra_id_49>": 78,
46
+ "<extra_id_4>": 123,
47
+ "<extra_id_50>": 77,
48
+ "<extra_id_51>": 76,
49
+ "<extra_id_52>": 75,
50
+ "<extra_id_53>": 74,
51
+ "<extra_id_54>": 73,
52
+ "<extra_id_55>": 72,
53
+ "<extra_id_56>": 71,
54
+ "<extra_id_57>": 70,
55
+ "<extra_id_58>": 69,
56
+ "<extra_id_59>": 68,
57
+ "<extra_id_5>": 122,
58
+ "<extra_id_60>": 67,
59
+ "<extra_id_61>": 66,
60
+ "<extra_id_62>": 65,
61
+ "<extra_id_63>": 64,
62
+ "<extra_id_64>": 63,
63
+ "<extra_id_65>": 62,
64
+ "<extra_id_66>": 61,
65
+ "<extra_id_67>": 60,
66
+ "<extra_id_68>": 59,
67
+ "<extra_id_69>": 58,
68
+ "<extra_id_6>": 121,
69
+ "<extra_id_70>": 57,
70
+ "<extra_id_71>": 56,
71
+ "<extra_id_72>": 55,
72
+ "<extra_id_73>": 54,
73
+ "<extra_id_74>": 53,
74
+ "<extra_id_75>": 52,
75
+ "<extra_id_76>": 51,
76
+ "<extra_id_77>": 50,
77
+ "<extra_id_78>": 49,
78
+ "<extra_id_79>": 48,
79
+ "<extra_id_7>": 120,
80
+ "<extra_id_80>": 47,
81
+ "<extra_id_81>": 46,
82
+ "<extra_id_82>": 45,
83
+ "<extra_id_83>": 44,
84
+ "<extra_id_84>": 43,
85
+ "<extra_id_85>": 42,
86
+ "<extra_id_86>": 41,
87
+ "<extra_id_87>": 40,
88
+ "<extra_id_88>": 39,
89
+ "<extra_id_89>": 38,
90
+ "<extra_id_8>": 119,
91
+ "<extra_id_90>": 37,
92
+ "<extra_id_91>": 36,
93
+ "<extra_id_92>": 35,
94
+ "<extra_id_93>": 34,
95
+ "<extra_id_94>": 33,
96
+ "<extra_id_95>": 32,
97
+ "<extra_id_96>": 31,
98
+ "<extra_id_97>": 30,
99
+ "<extra_id_98>": 29,
100
+ "<extra_id_99>": 28,
101
+ "<extra_id_9>": 118
102
+ }
config.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "T5BinaryClassifier"
4
+ ],
5
+ "model_type": "t5",
6
+ "d_model": 1024,
7
+ "is_encoder_decoder": false
8
+ }
finetuning_bc_prott5.py ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch, torch.nn as nn
2
+ from transformers import (T5EncoderModel, T5Tokenizer,
3
+ Trainer, TrainingArguments)
4
+ from transformers.modeling_outputs import SequenceClassifierOutput
5
+ from datasets import load_dataset
6
+ from sklearn.metrics import accuracy_score
7
+ import pandas as pd
8
+ import wandb
9
+ from huggingface_hub import login
10
+ import re
11
+ from datasets import Dataset
12
+
13
+ # ---------------------------
14
+ # 1. GİRİŞ‑ÇIKIŞ ve LOGIN
15
+ # ---------------------------
16
+
17
+ wandb.login()
18
+ wandb.init(project='finetuning-bc-protT5')
19
+
20
+ # ---------------------------
21
+ # 2. DATA HAZIRLIK (seninkiler)
22
+ # ---------------------------
23
+ data = pd.read_csv("ready_to_train.csv")
24
+ pos = data.loc[data["SITE_+/-7_AA"].str.len()==15]["SITE_+/-7_AA"].tolist()
25
+ neg = data.loc[data["NON_PH_SITE"].str.len()==15]["NON_PH_SITE"].tolist()
26
+ labels = [1]*len(pos)+[0]*len(neg)
27
+ texts = pos+neg
28
+ prep_texts = [" ".join(list(t.upper())) for t in texts]
29
+ prep_texts = [re.sub(r"[UZOB]", "X", pt).replace("_","-")for pt in prep_texts]
30
+
31
+
32
+ from sklearn.model_selection import train_test_split
33
+ X_train, X_temp, y_train, y_temp = train_test_split(prep_texts, labels, test_size=0.30, random_state=42)
34
+ X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.50, random_state=42)
35
+
36
+ tokenizer = T5Tokenizer.from_pretrained("Rostlab/prot_t5_xl_uniref50")
37
+
38
+ def tokenize(batch):
39
+ return tokenizer(batch["text"],
40
+ padding="max_length",
41
+ truncation=True,
42
+ max_length=64)
43
+
44
+ def to_hf_dataset(texts, labels):
45
+ return {"text": texts, "label": labels}
46
+
47
+ train_ds = Dataset.from_dict({"text": X_train, "label": y_train})
48
+ val_ds = Dataset.from_dict({"text": X_val, "label": y_val})
49
+
50
+ train_ds = train_ds.map(tokenize, batched=True).with_format("torch")
51
+ val_ds = val_ds.map(tokenize, batched=True).with_format("torch")
52
+
53
+
54
+
55
+ # ---------------------------
56
+ # 3. MODEL: T5 + Classification Head
57
+ # ---------------------------
58
+ class T5BinaryClassifier(nn.Module):
59
+ def __init__(self, model_name, dropout=0.1):
60
+ super().__init__()
61
+ self.encoder = T5EncoderModel.from_pretrained(model_name)
62
+ enc_dim = self.encoder.config.d_model # 1024 (prot_t5_xl)
63
+ self.dropout = nn.Dropout(dropout)
64
+ self.cls = nn.Linear(enc_dim, 2) # binary
65
+
66
+ def forward(self,
67
+ input_ids=None,
68
+ attention_mask=None,
69
+ labels=None,
70
+ **kwargs):
71
+ enc_out = self.encoder(input_ids=input_ids,
72
+ attention_mask=attention_mask,
73
+ return_dict=True)
74
+ # [CLS]-benzeri vektör: <pad> token pozisyonu (id=0) yerine mean‑pool
75
+ hidden = enc_out.last_hidden_state # (B, L, D)
76
+ pooled = hidden.mean(dim=1) # (B, D)
77
+
78
+ logits = self.cls(self.dropout(pooled))
79
+
80
+ loss = None
81
+ if labels is not None:
82
+ loss_fct = nn.CrossEntropyLoss()
83
+ loss = loss_fct(logits, labels)
84
+
85
+ return SequenceClassifierOutput(
86
+ loss=loss,
87
+ logits=logits,
88
+ hidden_states=enc_out.hidden_states,
89
+ attentions=enc_out.attentions,
90
+ )
91
+
92
+ model = T5BinaryClassifier("Rostlab/prot_t5_xl_uniref50").cuda()
93
+
94
+ # ---------------------------
95
+ # 4. TRAINING ARGUMENTS
96
+ # ---------------------------
97
+ args = TrainingArguments(
98
+ output_dir="t5-bc-out",
99
+ num_train_epochs=3,
100
+ learning_rate=5e-5,
101
+ per_device_train_batch_size=8, # prot_t5_xl büyük; 512 yerine 8‑16 önerilir
102
+ per_device_eval_batch_size=8,
103
+ gradient_accumulation_steps=4, # efektif 32
104
+ evaluation_strategy="epoch",
105
+ load_best_model_at_end=True,
106
+ save_strategy="epoch",
107
+ save_safetensors=False,
108
+ report_to=["wandb"],
109
+ fp16=True,
110
+ )
111
+
112
+ def compute_metrics(eval_pred):
113
+ logits, labels = eval_pred
114
+ preds = logits.argmax(-1)
115
+ acc = accuracy_score(labels, preds)
116
+ return {"accuracy": acc}
117
+
118
+ trainer = Trainer(
119
+ model=model,
120
+ args=args,
121
+ train_dataset=train_ds,
122
+ eval_dataset=val_ds,
123
+ compute_metrics=compute_metrics,
124
+ )
125
+
126
+ trainer.train()
127
+
128
+ # ---------------------------
129
+ # 5. TEST & SAVE
130
+ # ---------------------------
131
+
132
+ # Python dict → Hugging Face Dataset
133
+ test_ds = Dataset.from_dict({"text": X_test, "label": y_test})
134
+
135
+ # Tokenize ve tensor formatına çevir
136
+ test_ds = test_ds.map(tokenize, batched=True).with_format("torch")
137
+
138
+ metrics = trainer.evaluate(test_ds)
139
+ print(metrics)
140
+ # ---- Manuel kaydetme ----
141
+ trainer.save_model(
142
+ "/arf/scratch/zisik/prott5_bc_ft"
143
+ )
144
+ tokenizer.save_pretrained("/arf/scratch/zisik/prott5_bc_ft")
145
+
146
+
147
+ #model.push_to_hub("isikz/prot_t5_binary_classifier")
148
+ #tokenizer.push_to_hub("isikz/prot_t5_binary_classifier")
149
+ #wandb.finish()
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb84e54c51f53eb1a49e0d52446d9e470b5ea320ae7174917832ab5aef4d31a2
3
+ size 4832674810
ready_to_train.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:498eaceee30faf2510396e17a4f8417ce65c37e576c8792a80da432313f03c0e
3
+ size 18584710
special_tokens_map.json ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<extra_id_0>",
4
+ "<extra_id_1>",
5
+ "<extra_id_2>",
6
+ "<extra_id_3>",
7
+ "<extra_id_4>",
8
+ "<extra_id_5>",
9
+ "<extra_id_6>",
10
+ "<extra_id_7>",
11
+ "<extra_id_8>",
12
+ "<extra_id_9>",
13
+ "<extra_id_10>",
14
+ "<extra_id_11>",
15
+ "<extra_id_12>",
16
+ "<extra_id_13>",
17
+ "<extra_id_14>",
18
+ "<extra_id_15>",
19
+ "<extra_id_16>",
20
+ "<extra_id_17>",
21
+ "<extra_id_18>",
22
+ "<extra_id_19>",
23
+ "<extra_id_20>",
24
+ "<extra_id_21>",
25
+ "<extra_id_22>",
26
+ "<extra_id_23>",
27
+ "<extra_id_24>",
28
+ "<extra_id_25>",
29
+ "<extra_id_26>",
30
+ "<extra_id_27>",
31
+ "<extra_id_28>",
32
+ "<extra_id_29>",
33
+ "<extra_id_30>",
34
+ "<extra_id_31>",
35
+ "<extra_id_32>",
36
+ "<extra_id_33>",
37
+ "<extra_id_34>",
38
+ "<extra_id_35>",
39
+ "<extra_id_36>",
40
+ "<extra_id_37>",
41
+ "<extra_id_38>",
42
+ "<extra_id_39>",
43
+ "<extra_id_40>",
44
+ "<extra_id_41>",
45
+ "<extra_id_42>",
46
+ "<extra_id_43>",
47
+ "<extra_id_44>",
48
+ "<extra_id_45>",
49
+ "<extra_id_46>",
50
+ "<extra_id_47>",
51
+ "<extra_id_48>",
52
+ "<extra_id_49>",
53
+ "<extra_id_50>",
54
+ "<extra_id_51>",
55
+ "<extra_id_52>",
56
+ "<extra_id_53>",
57
+ "<extra_id_54>",
58
+ "<extra_id_55>",
59
+ "<extra_id_56>",
60
+ "<extra_id_57>",
61
+ "<extra_id_58>",
62
+ "<extra_id_59>",
63
+ "<extra_id_60>",
64
+ "<extra_id_61>",
65
+ "<extra_id_62>",
66
+ "<extra_id_63>",
67
+ "<extra_id_64>",
68
+ "<extra_id_65>",
69
+ "<extra_id_66>",
70
+ "<extra_id_67>",
71
+ "<extra_id_68>",
72
+ "<extra_id_69>",
73
+ "<extra_id_70>",
74
+ "<extra_id_71>",
75
+ "<extra_id_72>",
76
+ "<extra_id_73>",
77
+ "<extra_id_74>",
78
+ "<extra_id_75>",
79
+ "<extra_id_76>",
80
+ "<extra_id_77>",
81
+ "<extra_id_78>",
82
+ "<extra_id_79>",
83
+ "<extra_id_80>",
84
+ "<extra_id_81>",
85
+ "<extra_id_82>",
86
+ "<extra_id_83>",
87
+ "<extra_id_84>",
88
+ "<extra_id_85>",
89
+ "<extra_id_86>",
90
+ "<extra_id_87>",
91
+ "<extra_id_88>",
92
+ "<extra_id_89>",
93
+ "<extra_id_90>",
94
+ "<extra_id_91>",
95
+ "<extra_id_92>",
96
+ "<extra_id_93>",
97
+ "<extra_id_94>",
98
+ "<extra_id_95>",
99
+ "<extra_id_96>",
100
+ "<extra_id_97>",
101
+ "<extra_id_98>",
102
+ "<extra_id_99>"
103
+ ],
104
+ "eos_token": {
105
+ "content": "</s>",
106
+ "lstrip": false,
107
+ "normalized": false,
108
+ "rstrip": false,
109
+ "single_word": false
110
+ },
111
+ "pad_token": {
112
+ "content": "<pad>",
113
+ "lstrip": false,
114
+ "normalized": false,
115
+ "rstrip": false,
116
+ "single_word": false
117
+ },
118
+ "unk_token": {
119
+ "content": "<unk>",
120
+ "lstrip": false,
121
+ "normalized": false,
122
+ "rstrip": false,
123
+ "single_word": false
124
+ }
125
+ }
spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74da7b4afcde53faa570114b530c726135bdfcdb813dec3abfb27f9d44db7324
3
+ size 237990
t5-bc-out/checkpoint-47916/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3923cb1c3204d99805be4282d57866443cbdd1f5f71ad6af1c81ee4a783d7e9d
3
+ size 9665321730
t5-bc-out/checkpoint-47916/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80957033108061961f1d326abe9e2829f4d78524a478d52ecec37db106fbe5cc
3
+ size 4832674810
t5-bc-out/checkpoint-47916/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1af688f89b64a7c9246d9d5848b03b2543dd68c97861fab57333014cd508ec2
3
+ size 14244
t5-bc-out/checkpoint-47916/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62074fe1abf3e8558aec193d31cdd76f6c2650659b0c8d62d4b5ff6d20fd6edd
3
+ size 1064
t5-bc-out/checkpoint-47916/trainer_state.json ADDED
@@ -0,0 +1,725 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.1829579919576645,
3
+ "best_model_checkpoint": "t5-bc-out/checkpoint-31944",
4
+ "epoch": 3.0,
5
+ "eval_steps": 500,
6
+ "global_step": 47916,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.03130478337089907,
13
+ "grad_norm": 1.3348039388656616,
14
+ "learning_rate": 4.947825361048502e-05,
15
+ "loss": 0.5856,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 0.06260956674179814,
20
+ "grad_norm": 2.473144292831421,
21
+ "learning_rate": 4.8956507220970036e-05,
22
+ "loss": 0.5183,
23
+ "step": 1000
24
+ },
25
+ {
26
+ "epoch": 0.09391435011269722,
27
+ "grad_norm": 3.6210598945617676,
28
+ "learning_rate": 4.843476083145505e-05,
29
+ "loss": 0.4879,
30
+ "step": 1500
31
+ },
32
+ {
33
+ "epoch": 0.12521913348359628,
34
+ "grad_norm": 6.336288928985596,
35
+ "learning_rate": 4.791405793471909e-05,
36
+ "loss": 0.4579,
37
+ "step": 2000
38
+ },
39
+ {
40
+ "epoch": 0.15652391685449538,
41
+ "grad_norm": 2.6699299812316895,
42
+ "learning_rate": 4.739231154520411e-05,
43
+ "loss": 0.4421,
44
+ "step": 2500
45
+ },
46
+ {
47
+ "epoch": 0.18782870022539444,
48
+ "grad_norm": 7.918868064880371,
49
+ "learning_rate": 4.6870565155689124e-05,
50
+ "loss": 0.4205,
51
+ "step": 3000
52
+ },
53
+ {
54
+ "epoch": 0.2191334835962935,
55
+ "grad_norm": 2.9816083908081055,
56
+ "learning_rate": 4.634881876617414e-05,
57
+ "loss": 0.4044,
58
+ "step": 3500
59
+ },
60
+ {
61
+ "epoch": 0.25043826696719257,
62
+ "grad_norm": 7.581803321838379,
63
+ "learning_rate": 4.582707237665916e-05,
64
+ "loss": 0.3901,
65
+ "step": 4000
66
+ },
67
+ {
68
+ "epoch": 0.28174305033809166,
69
+ "grad_norm": 6.031352996826172,
70
+ "learning_rate": 4.5305325987144174e-05,
71
+ "loss": 0.3834,
72
+ "step": 4500
73
+ },
74
+ {
75
+ "epoch": 0.31304783370899075,
76
+ "grad_norm": 2.581623077392578,
77
+ "learning_rate": 4.478357959762919e-05,
78
+ "loss": 0.3601,
79
+ "step": 5000
80
+ },
81
+ {
82
+ "epoch": 0.3443526170798898,
83
+ "grad_norm": 4.7024245262146,
84
+ "learning_rate": 4.42618332081142e-05,
85
+ "loss": 0.3492,
86
+ "step": 5500
87
+ },
88
+ {
89
+ "epoch": 0.3756574004507889,
90
+ "grad_norm": 8.929915428161621,
91
+ "learning_rate": 4.374217380415728e-05,
92
+ "loss": 0.3435,
93
+ "step": 6000
94
+ },
95
+ {
96
+ "epoch": 0.406962183821688,
97
+ "grad_norm": 3.694370985031128,
98
+ "learning_rate": 4.32204274146423e-05,
99
+ "loss": 0.3366,
100
+ "step": 6500
101
+ },
102
+ {
103
+ "epoch": 0.438266967192587,
104
+ "grad_norm": 5.6961350440979,
105
+ "learning_rate": 4.2698681025127307e-05,
106
+ "loss": 0.3259,
107
+ "step": 7000
108
+ },
109
+ {
110
+ "epoch": 0.4695717505634861,
111
+ "grad_norm": 2.740339756011963,
112
+ "learning_rate": 4.217693463561232e-05,
113
+ "loss": 0.3224,
114
+ "step": 7500
115
+ },
116
+ {
117
+ "epoch": 0.5008765339343851,
118
+ "grad_norm": 3.7285494804382324,
119
+ "learning_rate": 4.165518824609734e-05,
120
+ "loss": 0.3103,
121
+ "step": 8000
122
+ },
123
+ {
124
+ "epoch": 0.5321813173052843,
125
+ "grad_norm": 5.1480326652526855,
126
+ "learning_rate": 4.1133441856582356e-05,
127
+ "loss": 0.3107,
128
+ "step": 8500
129
+ },
130
+ {
131
+ "epoch": 0.5634861006761833,
132
+ "grad_norm": 4.8817620277404785,
133
+ "learning_rate": 4.0611695467067366e-05,
134
+ "loss": 0.2945,
135
+ "step": 9000
136
+ },
137
+ {
138
+ "epoch": 0.5947908840470824,
139
+ "grad_norm": 5.003459453582764,
140
+ "learning_rate": 4.008994907755238e-05,
141
+ "loss": 0.2903,
142
+ "step": 9500
143
+ },
144
+ {
145
+ "epoch": 0.6260956674179815,
146
+ "grad_norm": 6.451533317565918,
147
+ "learning_rate": 3.95682026880374e-05,
148
+ "loss": 0.284,
149
+ "step": 10000
150
+ },
151
+ {
152
+ "epoch": 0.6574004507888805,
153
+ "grad_norm": 7.442136287689209,
154
+ "learning_rate": 3.9046456298522416e-05,
155
+ "loss": 0.276,
156
+ "step": 10500
157
+ },
158
+ {
159
+ "epoch": 0.6887052341597796,
160
+ "grad_norm": 3.617513656616211,
161
+ "learning_rate": 3.852575340178646e-05,
162
+ "loss": 0.27,
163
+ "step": 11000
164
+ },
165
+ {
166
+ "epoch": 0.7200100175306787,
167
+ "grad_norm": 5.776317596435547,
168
+ "learning_rate": 3.800400701227148e-05,
169
+ "loss": 0.2666,
170
+ "step": 11500
171
+ },
172
+ {
173
+ "epoch": 0.7513148009015778,
174
+ "grad_norm": 6.264099597930908,
175
+ "learning_rate": 3.7482260622756494e-05,
176
+ "loss": 0.257,
177
+ "step": 12000
178
+ },
179
+ {
180
+ "epoch": 0.7826195842724768,
181
+ "grad_norm": 4.222651481628418,
182
+ "learning_rate": 3.6960514233241504e-05,
183
+ "loss": 0.2566,
184
+ "step": 12500
185
+ },
186
+ {
187
+ "epoch": 0.813924367643376,
188
+ "grad_norm": 6.953704833984375,
189
+ "learning_rate": 3.643876784372652e-05,
190
+ "loss": 0.2502,
191
+ "step": 13000
192
+ },
193
+ {
194
+ "epoch": 0.845229151014275,
195
+ "grad_norm": 3.2264351844787598,
196
+ "learning_rate": 3.591806494699057e-05,
197
+ "loss": 0.2364,
198
+ "step": 13500
199
+ },
200
+ {
201
+ "epoch": 0.876533934385174,
202
+ "grad_norm": 6.233669281005859,
203
+ "learning_rate": 3.539631855747558e-05,
204
+ "loss": 0.2451,
205
+ "step": 14000
206
+ },
207
+ {
208
+ "epoch": 0.9078387177560732,
209
+ "grad_norm": 8.540342330932617,
210
+ "learning_rate": 3.48745721679606e-05,
211
+ "loss": 0.2364,
212
+ "step": 14500
213
+ },
214
+ {
215
+ "epoch": 0.9391435011269722,
216
+ "grad_norm": 4.3881516456604,
217
+ "learning_rate": 3.4352825778445616e-05,
218
+ "loss": 0.2312,
219
+ "step": 15000
220
+ },
221
+ {
222
+ "epoch": 0.9704482844978712,
223
+ "grad_norm": 6.7153167724609375,
224
+ "learning_rate": 3.383107938893063e-05,
225
+ "loss": 0.2323,
226
+ "step": 15500
227
+ },
228
+ {
229
+ "epoch": 1.0,
230
+ "eval_accuracy": 0.9204725991125071,
231
+ "eval_loss": 0.2026778757572174,
232
+ "eval_runtime": 180.0542,
233
+ "eval_samples_per_second": 608.272,
234
+ "eval_steps_per_second": 76.038,
235
+ "step": 15972
236
+ },
237
+ {
238
+ "epoch": 1.0017530678687703,
239
+ "grad_norm": 4.329936504364014,
240
+ "learning_rate": 3.331037649219468e-05,
241
+ "loss": 0.2163,
242
+ "step": 16000
243
+ },
244
+ {
245
+ "epoch": 1.0330578512396693,
246
+ "grad_norm": 8.806492805480957,
247
+ "learning_rate": 3.278863010267969e-05,
248
+ "loss": 0.139,
249
+ "step": 16500
250
+ },
251
+ {
252
+ "epoch": 1.0643626346105686,
253
+ "grad_norm": 9.733407020568848,
254
+ "learning_rate": 3.226688371316471e-05,
255
+ "loss": 0.1419,
256
+ "step": 17000
257
+ },
258
+ {
259
+ "epoch": 1.0956674179814676,
260
+ "grad_norm": 3.5503616333007812,
261
+ "learning_rate": 3.174513732364972e-05,
262
+ "loss": 0.1361,
263
+ "step": 17500
264
+ },
265
+ {
266
+ "epoch": 1.1269722013523666,
267
+ "grad_norm": 5.853847503662109,
268
+ "learning_rate": 3.122339093413474e-05,
269
+ "loss": 0.1398,
270
+ "step": 18000
271
+ },
272
+ {
273
+ "epoch": 1.1582769847232657,
274
+ "grad_norm": 1.6936904191970825,
275
+ "learning_rate": 3.0701644544619754e-05,
276
+ "loss": 0.1373,
277
+ "step": 18500
278
+ },
279
+ {
280
+ "epoch": 1.1895817680941647,
281
+ "grad_norm": 1.5299335718154907,
282
+ "learning_rate": 3.017989815510477e-05,
283
+ "loss": 0.1423,
284
+ "step": 19000
285
+ },
286
+ {
287
+ "epoch": 1.220886551465064,
288
+ "grad_norm": 3.899322986602783,
289
+ "learning_rate": 2.965815176558978e-05,
290
+ "loss": 0.1391,
291
+ "step": 19500
292
+ },
293
+ {
294
+ "epoch": 1.252191334835963,
295
+ "grad_norm": 2.3118438720703125,
296
+ "learning_rate": 2.913744886885383e-05,
297
+ "loss": 0.1408,
298
+ "step": 20000
299
+ },
300
+ {
301
+ "epoch": 1.283496118206862,
302
+ "grad_norm": 0.6930440068244934,
303
+ "learning_rate": 2.8615702479338845e-05,
304
+ "loss": 0.1408,
305
+ "step": 20500
306
+ },
307
+ {
308
+ "epoch": 1.314800901577761,
309
+ "grad_norm": 2.851909875869751,
310
+ "learning_rate": 2.8093956089823858e-05,
311
+ "loss": 0.1404,
312
+ "step": 21000
313
+ },
314
+ {
315
+ "epoch": 1.3461056849486601,
316
+ "grad_norm": 0.22848767042160034,
317
+ "learning_rate": 2.7572209700308875e-05,
318
+ "loss": 0.1382,
319
+ "step": 21500
320
+ },
321
+ {
322
+ "epoch": 1.3774104683195592,
323
+ "grad_norm": 3.973886489868164,
324
+ "learning_rate": 2.7050463310793888e-05,
325
+ "loss": 0.1396,
326
+ "step": 22000
327
+ },
328
+ {
329
+ "epoch": 1.4087152516904582,
330
+ "grad_norm": 3.140080451965332,
331
+ "learning_rate": 2.6529760414057936e-05,
332
+ "loss": 0.127,
333
+ "step": 22500
334
+ },
335
+ {
336
+ "epoch": 1.4400200350613575,
337
+ "grad_norm": 5.468123435974121,
338
+ "learning_rate": 2.6008014024542953e-05,
339
+ "loss": 0.1276,
340
+ "step": 23000
341
+ },
342
+ {
343
+ "epoch": 1.4713248184322565,
344
+ "grad_norm": 0.626640260219574,
345
+ "learning_rate": 2.5486267635027966e-05,
346
+ "loss": 0.1219,
347
+ "step": 23500
348
+ },
349
+ {
350
+ "epoch": 1.5026296018031555,
351
+ "grad_norm": 3.1899547576904297,
352
+ "learning_rate": 2.496452124551298e-05,
353
+ "loss": 0.1319,
354
+ "step": 24000
355
+ },
356
+ {
357
+ "epoch": 1.5339343851740546,
358
+ "grad_norm": 3.199150562286377,
359
+ "learning_rate": 2.4442774855997996e-05,
360
+ "loss": 0.1298,
361
+ "step": 24500
362
+ },
363
+ {
364
+ "epoch": 1.5652391685449536,
365
+ "grad_norm": 5.129565715789795,
366
+ "learning_rate": 2.3921028466483013e-05,
367
+ "loss": 0.1217,
368
+ "step": 25000
369
+ },
370
+ {
371
+ "epoch": 1.5965439519158529,
372
+ "grad_norm": 4.223311424255371,
373
+ "learning_rate": 2.339928207696803e-05,
374
+ "loss": 0.1288,
375
+ "step": 25500
376
+ },
377
+ {
378
+ "epoch": 1.6278487352867517,
379
+ "grad_norm": 10.741965293884277,
380
+ "learning_rate": 2.2877535687453046e-05,
381
+ "loss": 0.1263,
382
+ "step": 26000
383
+ },
384
+ {
385
+ "epoch": 1.659153518657651,
386
+ "grad_norm": 3.0217132568359375,
387
+ "learning_rate": 2.235578929793806e-05,
388
+ "loss": 0.122,
389
+ "step": 26500
390
+ },
391
+ {
392
+ "epoch": 1.69045830202855,
393
+ "grad_norm": 7.847172737121582,
394
+ "learning_rate": 2.1835086401202104e-05,
395
+ "loss": 0.122,
396
+ "step": 27000
397
+ },
398
+ {
399
+ "epoch": 1.721763085399449,
400
+ "grad_norm": 9.223713874816895,
401
+ "learning_rate": 2.1313340011687117e-05,
402
+ "loss": 0.1266,
403
+ "step": 27500
404
+ },
405
+ {
406
+ "epoch": 1.7530678687703483,
407
+ "grad_norm": 2.0706963539123535,
408
+ "learning_rate": 2.0791593622172137e-05,
409
+ "loss": 0.1274,
410
+ "step": 28000
411
+ },
412
+ {
413
+ "epoch": 1.784372652141247,
414
+ "grad_norm": 3.1475393772125244,
415
+ "learning_rate": 2.0270890725436182e-05,
416
+ "loss": 0.1214,
417
+ "step": 28500
418
+ },
419
+ {
420
+ "epoch": 1.8156774355121463,
421
+ "grad_norm": 3.7348415851593018,
422
+ "learning_rate": 1.9749144335921196e-05,
423
+ "loss": 0.1191,
424
+ "step": 29000
425
+ },
426
+ {
427
+ "epoch": 1.8469822188830454,
428
+ "grad_norm": 3.230713129043579,
429
+ "learning_rate": 1.9227397946406212e-05,
430
+ "loss": 0.1199,
431
+ "step": 29500
432
+ },
433
+ {
434
+ "epoch": 1.8782870022539444,
435
+ "grad_norm": 0.4691683351993561,
436
+ "learning_rate": 1.8705651556891226e-05,
437
+ "loss": 0.1176,
438
+ "step": 30000
439
+ },
440
+ {
441
+ "epoch": 1.9095917856248434,
442
+ "grad_norm": 4.382262706756592,
443
+ "learning_rate": 1.8183905167376242e-05,
444
+ "loss": 0.1176,
445
+ "step": 30500
446
+ },
447
+ {
448
+ "epoch": 1.9408965689957425,
449
+ "grad_norm": 9.810182571411133,
450
+ "learning_rate": 1.7662158777861255e-05,
451
+ "loss": 0.1083,
452
+ "step": 31000
453
+ },
454
+ {
455
+ "epoch": 1.9722013523666417,
456
+ "grad_norm": 8.107538223266602,
457
+ "learning_rate": 1.7140412388346275e-05,
458
+ "loss": 0.1103,
459
+ "step": 31500
460
+ },
461
+ {
462
+ "epoch": 2.0,
463
+ "eval_accuracy": 0.9478369642628878,
464
+ "eval_loss": 0.1829579919576645,
465
+ "eval_runtime": 179.9731,
466
+ "eval_samples_per_second": 608.547,
467
+ "eval_steps_per_second": 76.072,
468
+ "step": 31944
469
+ },
470
+ {
471
+ "epoch": 2.0035061357375405,
472
+ "grad_norm": 0.5452843308448792,
473
+ "learning_rate": 1.661866599883129e-05,
474
+ "loss": 0.1087,
475
+ "step": 32000
476
+ },
477
+ {
478
+ "epoch": 2.03481091910844,
479
+ "grad_norm": 1.0569943189620972,
480
+ "learning_rate": 1.6097963102095334e-05,
481
+ "loss": 0.0456,
482
+ "step": 32500
483
+ },
484
+ {
485
+ "epoch": 2.0661157024793386,
486
+ "grad_norm": 0.22022764384746552,
487
+ "learning_rate": 1.557621671258035e-05,
488
+ "loss": 0.0523,
489
+ "step": 33000
490
+ },
491
+ {
492
+ "epoch": 2.097420485850238,
493
+ "grad_norm": 9.75222396850586,
494
+ "learning_rate": 1.5054470323065365e-05,
495
+ "loss": 0.0492,
496
+ "step": 33500
497
+ },
498
+ {
499
+ "epoch": 2.128725269221137,
500
+ "grad_norm": 3.1281306743621826,
501
+ "learning_rate": 1.453272393355038e-05,
502
+ "loss": 0.0498,
503
+ "step": 34000
504
+ },
505
+ {
506
+ "epoch": 2.160030052592036,
507
+ "grad_norm": 0.012396792881190777,
508
+ "learning_rate": 1.4012021036814427e-05,
509
+ "loss": 0.0506,
510
+ "step": 34500
511
+ },
512
+ {
513
+ "epoch": 2.191334835962935,
514
+ "grad_norm": 6.527154922485352,
515
+ "learning_rate": 1.3490274647299442e-05,
516
+ "loss": 0.0569,
517
+ "step": 35000
518
+ },
519
+ {
520
+ "epoch": 2.222639619333834,
521
+ "grad_norm": 3.5429670810699463,
522
+ "learning_rate": 1.2968528257784457e-05,
523
+ "loss": 0.0548,
524
+ "step": 35500
525
+ },
526
+ {
527
+ "epoch": 2.2539444027047333,
528
+ "grad_norm": 1.333369255065918,
529
+ "learning_rate": 1.2446781868269472e-05,
530
+ "loss": 0.0558,
531
+ "step": 36000
532
+ },
533
+ {
534
+ "epoch": 2.2852491860756325,
535
+ "grad_norm": 0.10260029882192612,
536
+ "learning_rate": 1.1926078971533518e-05,
537
+ "loss": 0.0464,
538
+ "step": 36500
539
+ },
540
+ {
541
+ "epoch": 2.3165539694465314,
542
+ "grad_norm": 0.14060164988040924,
543
+ "learning_rate": 1.1404332582018533e-05,
544
+ "loss": 0.0515,
545
+ "step": 37000
546
+ },
547
+ {
548
+ "epoch": 2.3478587528174306,
549
+ "grad_norm": 1.031032919883728,
550
+ "learning_rate": 1.0882586192503548e-05,
551
+ "loss": 0.0448,
552
+ "step": 37500
553
+ },
554
+ {
555
+ "epoch": 2.3791635361883294,
556
+ "grad_norm": 0.20121368765830994,
557
+ "learning_rate": 1.0360839802988565e-05,
558
+ "loss": 0.0475,
559
+ "step": 38000
560
+ },
561
+ {
562
+ "epoch": 2.4104683195592287,
563
+ "grad_norm": 0.06531311571598053,
564
+ "learning_rate": 9.84013690625261e-06,
565
+ "loss": 0.0522,
566
+ "step": 38500
567
+ },
568
+ {
569
+ "epoch": 2.441773102930128,
570
+ "grad_norm": 0.04498385637998581,
571
+ "learning_rate": 9.318390516737625e-06,
572
+ "loss": 0.0434,
573
+ "step": 39000
574
+ },
575
+ {
576
+ "epoch": 2.4730778863010268,
577
+ "grad_norm": 0.3482716679573059,
578
+ "learning_rate": 8.796644127222641e-06,
579
+ "loss": 0.0468,
580
+ "step": 39500
581
+ },
582
+ {
583
+ "epoch": 2.504382669671926,
584
+ "grad_norm": 4.0475053787231445,
585
+ "learning_rate": 8.274897737707656e-06,
586
+ "loss": 0.0505,
587
+ "step": 40000
588
+ },
589
+ {
590
+ "epoch": 2.535687453042825,
591
+ "grad_norm": 0.6960127353668213,
592
+ "learning_rate": 7.753151348192671e-06,
593
+ "loss": 0.0421,
594
+ "step": 40500
595
+ },
596
+ {
597
+ "epoch": 2.566992236413724,
598
+ "grad_norm": 0.8902493119239807,
599
+ "learning_rate": 7.231404958677686e-06,
600
+ "loss": 0.0451,
601
+ "step": 41000
602
+ },
603
+ {
604
+ "epoch": 2.5982970197846234,
605
+ "grad_norm": 0.46462351083755493,
606
+ "learning_rate": 6.710702061941732e-06,
607
+ "loss": 0.0522,
608
+ "step": 41500
609
+ },
610
+ {
611
+ "epoch": 2.629601803155522,
612
+ "grad_norm": 0.07463126629590988,
613
+ "learning_rate": 6.1889556724267476e-06,
614
+ "loss": 0.0468,
615
+ "step": 42000
616
+ },
617
+ {
618
+ "epoch": 2.660906586526421,
619
+ "grad_norm": 0.05138092488050461,
620
+ "learning_rate": 5.6672092829117625e-06,
621
+ "loss": 0.0429,
622
+ "step": 42500
623
+ },
624
+ {
625
+ "epoch": 2.6922113698973202,
626
+ "grad_norm": 0.06017659977078438,
627
+ "learning_rate": 5.145462893396778e-06,
628
+ "loss": 0.038,
629
+ "step": 43000
630
+ },
631
+ {
632
+ "epoch": 2.7235161532682195,
633
+ "grad_norm": 3.794154405593872,
634
+ "learning_rate": 4.624759996660823e-06,
635
+ "loss": 0.0418,
636
+ "step": 43500
637
+ },
638
+ {
639
+ "epoch": 2.7548209366391183,
640
+ "grad_norm": 9.929149627685547,
641
+ "learning_rate": 4.103013607145838e-06,
642
+ "loss": 0.0418,
643
+ "step": 44000
644
+ },
645
+ {
646
+ "epoch": 2.7861257200100176,
647
+ "grad_norm": 0.10156802833080292,
648
+ "learning_rate": 3.5812672176308544e-06,
649
+ "loss": 0.0435,
650
+ "step": 44500
651
+ },
652
+ {
653
+ "epoch": 2.8174305033809164,
654
+ "grad_norm": 15.590471267700195,
655
+ "learning_rate": 3.0595208281158697e-06,
656
+ "loss": 0.039,
657
+ "step": 45000
658
+ },
659
+ {
660
+ "epoch": 2.8487352867518156,
661
+ "grad_norm": 0.1026441678404808,
662
+ "learning_rate": 2.5377744386008846e-06,
663
+ "loss": 0.0451,
664
+ "step": 45500
665
+ },
666
+ {
667
+ "epoch": 2.880040070122715,
668
+ "grad_norm": 0.08782440423965454,
669
+ "learning_rate": 2.0160280490859004e-06,
670
+ "loss": 0.0408,
671
+ "step": 46000
672
+ },
673
+ {
674
+ "epoch": 2.9113448534936137,
675
+ "grad_norm": 17.5203857421875,
676
+ "learning_rate": 1.494281659570916e-06,
677
+ "loss": 0.0372,
678
+ "step": 46500
679
+ },
680
+ {
681
+ "epoch": 2.942649636864513,
682
+ "grad_norm": 0.08832889050245285,
683
+ "learning_rate": 9.735787628349612e-07,
684
+ "loss": 0.041,
685
+ "step": 47000
686
+ },
687
+ {
688
+ "epoch": 2.973954420235412,
689
+ "grad_norm": 10.057083129882812,
690
+ "learning_rate": 4.518323733199766e-07,
691
+ "loss": 0.0417,
692
+ "step": 47500
693
+ },
694
+ {
695
+ "epoch": 3.0,
696
+ "eval_accuracy": 0.9541735906941071,
697
+ "eval_loss": 0.2335142344236374,
698
+ "eval_runtime": 176.4196,
699
+ "eval_samples_per_second": 620.804,
700
+ "eval_steps_per_second": 77.605,
701
+ "step": 47916
702
+ }
703
+ ],
704
+ "logging_steps": 500,
705
+ "max_steps": 47916,
706
+ "num_input_tokens_seen": 0,
707
+ "num_train_epochs": 3,
708
+ "save_steps": 500,
709
+ "stateful_callbacks": {
710
+ "TrainerControl": {
711
+ "args": {
712
+ "should_epoch_stop": false,
713
+ "should_evaluate": false,
714
+ "should_log": false,
715
+ "should_save": true,
716
+ "should_training_stop": true
717
+ },
718
+ "attributes": {}
719
+ }
720
+ },
721
+ "total_flos": 0.0,
722
+ "train_batch_size": 8,
723
+ "trial_name": null,
724
+ "trial_params": null
725
+ }
t5-bc-out/checkpoint-47916/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:705750eb5050da7b859b299363db4324be92a3af2ba4a8530c69e964f52524d7
3
+ size 5176
tokenizer_config.json ADDED
@@ -0,0 +1,941 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": true,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "<pad>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "1": {
13
+ "content": "</s>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "2": {
21
+ "content": "<unk>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "28": {
29
+ "content": "<extra_id_99>",
30
+ "lstrip": true,
31
+ "normalized": false,
32
+ "rstrip": true,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "29": {
37
+ "content": "<extra_id_98>",
38
+ "lstrip": true,
39
+ "normalized": false,
40
+ "rstrip": true,
41
+ "single_word": false,
42
+ "special": true
43
+ },
44
+ "30": {
45
+ "content": "<extra_id_97>",
46
+ "lstrip": true,
47
+ "normalized": false,
48
+ "rstrip": true,
49
+ "single_word": false,
50
+ "special": true
51
+ },
52
+ "31": {
53
+ "content": "<extra_id_96>",
54
+ "lstrip": true,
55
+ "normalized": false,
56
+ "rstrip": true,
57
+ "single_word": false,
58
+ "special": true
59
+ },
60
+ "32": {
61
+ "content": "<extra_id_95>",
62
+ "lstrip": true,
63
+ "normalized": false,
64
+ "rstrip": true,
65
+ "single_word": false,
66
+ "special": true
67
+ },
68
+ "33": {
69
+ "content": "<extra_id_94>",
70
+ "lstrip": true,
71
+ "normalized": false,
72
+ "rstrip": true,
73
+ "single_word": false,
74
+ "special": true
75
+ },
76
+ "34": {
77
+ "content": "<extra_id_93>",
78
+ "lstrip": true,
79
+ "normalized": false,
80
+ "rstrip": true,
81
+ "single_word": false,
82
+ "special": true
83
+ },
84
+ "35": {
85
+ "content": "<extra_id_92>",
86
+ "lstrip": true,
87
+ "normalized": false,
88
+ "rstrip": true,
89
+ "single_word": false,
90
+ "special": true
91
+ },
92
+ "36": {
93
+ "content": "<extra_id_91>",
94
+ "lstrip": true,
95
+ "normalized": false,
96
+ "rstrip": true,
97
+ "single_word": false,
98
+ "special": true
99
+ },
100
+ "37": {
101
+ "content": "<extra_id_90>",
102
+ "lstrip": true,
103
+ "normalized": false,
104
+ "rstrip": true,
105
+ "single_word": false,
106
+ "special": true
107
+ },
108
+ "38": {
109
+ "content": "<extra_id_89>",
110
+ "lstrip": true,
111
+ "normalized": false,
112
+ "rstrip": true,
113
+ "single_word": false,
114
+ "special": true
115
+ },
116
+ "39": {
117
+ "content": "<extra_id_88>",
118
+ "lstrip": true,
119
+ "normalized": false,
120
+ "rstrip": true,
121
+ "single_word": false,
122
+ "special": true
123
+ },
124
+ "40": {
125
+ "content": "<extra_id_87>",
126
+ "lstrip": true,
127
+ "normalized": false,
128
+ "rstrip": true,
129
+ "single_word": false,
130
+ "special": true
131
+ },
132
+ "41": {
133
+ "content": "<extra_id_86>",
134
+ "lstrip": true,
135
+ "normalized": false,
136
+ "rstrip": true,
137
+ "single_word": false,
138
+ "special": true
139
+ },
140
+ "42": {
141
+ "content": "<extra_id_85>",
142
+ "lstrip": true,
143
+ "normalized": false,
144
+ "rstrip": true,
145
+ "single_word": false,
146
+ "special": true
147
+ },
148
+ "43": {
149
+ "content": "<extra_id_84>",
150
+ "lstrip": true,
151
+ "normalized": false,
152
+ "rstrip": true,
153
+ "single_word": false,
154
+ "special": true
155
+ },
156
+ "44": {
157
+ "content": "<extra_id_83>",
158
+ "lstrip": true,
159
+ "normalized": false,
160
+ "rstrip": true,
161
+ "single_word": false,
162
+ "special": true
163
+ },
164
+ "45": {
165
+ "content": "<extra_id_82>",
166
+ "lstrip": true,
167
+ "normalized": false,
168
+ "rstrip": true,
169
+ "single_word": false,
170
+ "special": true
171
+ },
172
+ "46": {
173
+ "content": "<extra_id_81>",
174
+ "lstrip": true,
175
+ "normalized": false,
176
+ "rstrip": true,
177
+ "single_word": false,
178
+ "special": true
179
+ },
180
+ "47": {
181
+ "content": "<extra_id_80>",
182
+ "lstrip": true,
183
+ "normalized": false,
184
+ "rstrip": true,
185
+ "single_word": false,
186
+ "special": true
187
+ },
188
+ "48": {
189
+ "content": "<extra_id_79>",
190
+ "lstrip": true,
191
+ "normalized": false,
192
+ "rstrip": true,
193
+ "single_word": false,
194
+ "special": true
195
+ },
196
+ "49": {
197
+ "content": "<extra_id_78>",
198
+ "lstrip": true,
199
+ "normalized": false,
200
+ "rstrip": true,
201
+ "single_word": false,
202
+ "special": true
203
+ },
204
+ "50": {
205
+ "content": "<extra_id_77>",
206
+ "lstrip": true,
207
+ "normalized": false,
208
+ "rstrip": true,
209
+ "single_word": false,
210
+ "special": true
211
+ },
212
+ "51": {
213
+ "content": "<extra_id_76>",
214
+ "lstrip": true,
215
+ "normalized": false,
216
+ "rstrip": true,
217
+ "single_word": false,
218
+ "special": true
219
+ },
220
+ "52": {
221
+ "content": "<extra_id_75>",
222
+ "lstrip": true,
223
+ "normalized": false,
224
+ "rstrip": true,
225
+ "single_word": false,
226
+ "special": true
227
+ },
228
+ "53": {
229
+ "content": "<extra_id_74>",
230
+ "lstrip": true,
231
+ "normalized": false,
232
+ "rstrip": true,
233
+ "single_word": false,
234
+ "special": true
235
+ },
236
+ "54": {
237
+ "content": "<extra_id_73>",
238
+ "lstrip": true,
239
+ "normalized": false,
240
+ "rstrip": true,
241
+ "single_word": false,
242
+ "special": true
243
+ },
244
+ "55": {
245
+ "content": "<extra_id_72>",
246
+ "lstrip": true,
247
+ "normalized": false,
248
+ "rstrip": true,
249
+ "single_word": false,
250
+ "special": true
251
+ },
252
+ "56": {
253
+ "content": "<extra_id_71>",
254
+ "lstrip": true,
255
+ "normalized": false,
256
+ "rstrip": true,
257
+ "single_word": false,
258
+ "special": true
259
+ },
260
+ "57": {
261
+ "content": "<extra_id_70>",
262
+ "lstrip": true,
263
+ "normalized": false,
264
+ "rstrip": true,
265
+ "single_word": false,
266
+ "special": true
267
+ },
268
+ "58": {
269
+ "content": "<extra_id_69>",
270
+ "lstrip": true,
271
+ "normalized": false,
272
+ "rstrip": true,
273
+ "single_word": false,
274
+ "special": true
275
+ },
276
+ "59": {
277
+ "content": "<extra_id_68>",
278
+ "lstrip": true,
279
+ "normalized": false,
280
+ "rstrip": true,
281
+ "single_word": false,
282
+ "special": true
283
+ },
284
+ "60": {
285
+ "content": "<extra_id_67>",
286
+ "lstrip": true,
287
+ "normalized": false,
288
+ "rstrip": true,
289
+ "single_word": false,
290
+ "special": true
291
+ },
292
+ "61": {
293
+ "content": "<extra_id_66>",
294
+ "lstrip": true,
295
+ "normalized": false,
296
+ "rstrip": true,
297
+ "single_word": false,
298
+ "special": true
299
+ },
300
+ "62": {
301
+ "content": "<extra_id_65>",
302
+ "lstrip": true,
303
+ "normalized": false,
304
+ "rstrip": true,
305
+ "single_word": false,
306
+ "special": true
307
+ },
308
+ "63": {
309
+ "content": "<extra_id_64>",
310
+ "lstrip": true,
311
+ "normalized": false,
312
+ "rstrip": true,
313
+ "single_word": false,
314
+ "special": true
315
+ },
316
+ "64": {
317
+ "content": "<extra_id_63>",
318
+ "lstrip": true,
319
+ "normalized": false,
320
+ "rstrip": true,
321
+ "single_word": false,
322
+ "special": true
323
+ },
324
+ "65": {
325
+ "content": "<extra_id_62>",
326
+ "lstrip": true,
327
+ "normalized": false,
328
+ "rstrip": true,
329
+ "single_word": false,
330
+ "special": true
331
+ },
332
+ "66": {
333
+ "content": "<extra_id_61>",
334
+ "lstrip": true,
335
+ "normalized": false,
336
+ "rstrip": true,
337
+ "single_word": false,
338
+ "special": true
339
+ },
340
+ "67": {
341
+ "content": "<extra_id_60>",
342
+ "lstrip": true,
343
+ "normalized": false,
344
+ "rstrip": true,
345
+ "single_word": false,
346
+ "special": true
347
+ },
348
+ "68": {
349
+ "content": "<extra_id_59>",
350
+ "lstrip": true,
351
+ "normalized": false,
352
+ "rstrip": true,
353
+ "single_word": false,
354
+ "special": true
355
+ },
356
+ "69": {
357
+ "content": "<extra_id_58>",
358
+ "lstrip": true,
359
+ "normalized": false,
360
+ "rstrip": true,
361
+ "single_word": false,
362
+ "special": true
363
+ },
364
+ "70": {
365
+ "content": "<extra_id_57>",
366
+ "lstrip": true,
367
+ "normalized": false,
368
+ "rstrip": true,
369
+ "single_word": false,
370
+ "special": true
371
+ },
372
+ "71": {
373
+ "content": "<extra_id_56>",
374
+ "lstrip": true,
375
+ "normalized": false,
376
+ "rstrip": true,
377
+ "single_word": false,
378
+ "special": true
379
+ },
380
+ "72": {
381
+ "content": "<extra_id_55>",
382
+ "lstrip": true,
383
+ "normalized": false,
384
+ "rstrip": true,
385
+ "single_word": false,
386
+ "special": true
387
+ },
388
+ "73": {
389
+ "content": "<extra_id_54>",
390
+ "lstrip": true,
391
+ "normalized": false,
392
+ "rstrip": true,
393
+ "single_word": false,
394
+ "special": true
395
+ },
396
+ "74": {
397
+ "content": "<extra_id_53>",
398
+ "lstrip": true,
399
+ "normalized": false,
400
+ "rstrip": true,
401
+ "single_word": false,
402
+ "special": true
403
+ },
404
+ "75": {
405
+ "content": "<extra_id_52>",
406
+ "lstrip": true,
407
+ "normalized": false,
408
+ "rstrip": true,
409
+ "single_word": false,
410
+ "special": true
411
+ },
412
+ "76": {
413
+ "content": "<extra_id_51>",
414
+ "lstrip": true,
415
+ "normalized": false,
416
+ "rstrip": true,
417
+ "single_word": false,
418
+ "special": true
419
+ },
420
+ "77": {
421
+ "content": "<extra_id_50>",
422
+ "lstrip": true,
423
+ "normalized": false,
424
+ "rstrip": true,
425
+ "single_word": false,
426
+ "special": true
427
+ },
428
+ "78": {
429
+ "content": "<extra_id_49>",
430
+ "lstrip": true,
431
+ "normalized": false,
432
+ "rstrip": true,
433
+ "single_word": false,
434
+ "special": true
435
+ },
436
+ "79": {
437
+ "content": "<extra_id_48>",
438
+ "lstrip": true,
439
+ "normalized": false,
440
+ "rstrip": true,
441
+ "single_word": false,
442
+ "special": true
443
+ },
444
+ "80": {
445
+ "content": "<extra_id_47>",
446
+ "lstrip": true,
447
+ "normalized": false,
448
+ "rstrip": true,
449
+ "single_word": false,
450
+ "special": true
451
+ },
452
+ "81": {
453
+ "content": "<extra_id_46>",
454
+ "lstrip": true,
455
+ "normalized": false,
456
+ "rstrip": true,
457
+ "single_word": false,
458
+ "special": true
459
+ },
460
+ "82": {
461
+ "content": "<extra_id_45>",
462
+ "lstrip": true,
463
+ "normalized": false,
464
+ "rstrip": true,
465
+ "single_word": false,
466
+ "special": true
467
+ },
468
+ "83": {
469
+ "content": "<extra_id_44>",
470
+ "lstrip": true,
471
+ "normalized": false,
472
+ "rstrip": true,
473
+ "single_word": false,
474
+ "special": true
475
+ },
476
+ "84": {
477
+ "content": "<extra_id_43>",
478
+ "lstrip": true,
479
+ "normalized": false,
480
+ "rstrip": true,
481
+ "single_word": false,
482
+ "special": true
483
+ },
484
+ "85": {
485
+ "content": "<extra_id_42>",
486
+ "lstrip": true,
487
+ "normalized": false,
488
+ "rstrip": true,
489
+ "single_word": false,
490
+ "special": true
491
+ },
492
+ "86": {
493
+ "content": "<extra_id_41>",
494
+ "lstrip": true,
495
+ "normalized": false,
496
+ "rstrip": true,
497
+ "single_word": false,
498
+ "special": true
499
+ },
500
+ "87": {
501
+ "content": "<extra_id_40>",
502
+ "lstrip": true,
503
+ "normalized": false,
504
+ "rstrip": true,
505
+ "single_word": false,
506
+ "special": true
507
+ },
508
+ "88": {
509
+ "content": "<extra_id_39>",
510
+ "lstrip": true,
511
+ "normalized": false,
512
+ "rstrip": true,
513
+ "single_word": false,
514
+ "special": true
515
+ },
516
+ "89": {
517
+ "content": "<extra_id_38>",
518
+ "lstrip": true,
519
+ "normalized": false,
520
+ "rstrip": true,
521
+ "single_word": false,
522
+ "special": true
523
+ },
524
+ "90": {
525
+ "content": "<extra_id_37>",
526
+ "lstrip": true,
527
+ "normalized": false,
528
+ "rstrip": true,
529
+ "single_word": false,
530
+ "special": true
531
+ },
532
+ "91": {
533
+ "content": "<extra_id_36>",
534
+ "lstrip": true,
535
+ "normalized": false,
536
+ "rstrip": true,
537
+ "single_word": false,
538
+ "special": true
539
+ },
540
+ "92": {
541
+ "content": "<extra_id_35>",
542
+ "lstrip": true,
543
+ "normalized": false,
544
+ "rstrip": true,
545
+ "single_word": false,
546
+ "special": true
547
+ },
548
+ "93": {
549
+ "content": "<extra_id_34>",
550
+ "lstrip": true,
551
+ "normalized": false,
552
+ "rstrip": true,
553
+ "single_word": false,
554
+ "special": true
555
+ },
556
+ "94": {
557
+ "content": "<extra_id_33>",
558
+ "lstrip": true,
559
+ "normalized": false,
560
+ "rstrip": true,
561
+ "single_word": false,
562
+ "special": true
563
+ },
564
+ "95": {
565
+ "content": "<extra_id_32>",
566
+ "lstrip": true,
567
+ "normalized": false,
568
+ "rstrip": true,
569
+ "single_word": false,
570
+ "special": true
571
+ },
572
+ "96": {
573
+ "content": "<extra_id_31>",
574
+ "lstrip": true,
575
+ "normalized": false,
576
+ "rstrip": true,
577
+ "single_word": false,
578
+ "special": true
579
+ },
580
+ "97": {
581
+ "content": "<extra_id_30>",
582
+ "lstrip": true,
583
+ "normalized": false,
584
+ "rstrip": true,
585
+ "single_word": false,
586
+ "special": true
587
+ },
588
+ "98": {
589
+ "content": "<extra_id_29>",
590
+ "lstrip": true,
591
+ "normalized": false,
592
+ "rstrip": true,
593
+ "single_word": false,
594
+ "special": true
595
+ },
596
+ "99": {
597
+ "content": "<extra_id_28>",
598
+ "lstrip": true,
599
+ "normalized": false,
600
+ "rstrip": true,
601
+ "single_word": false,
602
+ "special": true
603
+ },
604
+ "100": {
605
+ "content": "<extra_id_27>",
606
+ "lstrip": true,
607
+ "normalized": false,
608
+ "rstrip": true,
609
+ "single_word": false,
610
+ "special": true
611
+ },
612
+ "101": {
613
+ "content": "<extra_id_26>",
614
+ "lstrip": true,
615
+ "normalized": false,
616
+ "rstrip": true,
617
+ "single_word": false,
618
+ "special": true
619
+ },
620
+ "102": {
621
+ "content": "<extra_id_25>",
622
+ "lstrip": true,
623
+ "normalized": false,
624
+ "rstrip": true,
625
+ "single_word": false,
626
+ "special": true
627
+ },
628
+ "103": {
629
+ "content": "<extra_id_24>",
630
+ "lstrip": true,
631
+ "normalized": false,
632
+ "rstrip": true,
633
+ "single_word": false,
634
+ "special": true
635
+ },
636
+ "104": {
637
+ "content": "<extra_id_23>",
638
+ "lstrip": true,
639
+ "normalized": false,
640
+ "rstrip": true,
641
+ "single_word": false,
642
+ "special": true
643
+ },
644
+ "105": {
645
+ "content": "<extra_id_22>",
646
+ "lstrip": true,
647
+ "normalized": false,
648
+ "rstrip": true,
649
+ "single_word": false,
650
+ "special": true
651
+ },
652
+ "106": {
653
+ "content": "<extra_id_21>",
654
+ "lstrip": true,
655
+ "normalized": false,
656
+ "rstrip": true,
657
+ "single_word": false,
658
+ "special": true
659
+ },
660
+ "107": {
661
+ "content": "<extra_id_20>",
662
+ "lstrip": true,
663
+ "normalized": false,
664
+ "rstrip": true,
665
+ "single_word": false,
666
+ "special": true
667
+ },
668
+ "108": {
669
+ "content": "<extra_id_19>",
670
+ "lstrip": true,
671
+ "normalized": false,
672
+ "rstrip": true,
673
+ "single_word": false,
674
+ "special": true
675
+ },
676
+ "109": {
677
+ "content": "<extra_id_18>",
678
+ "lstrip": true,
679
+ "normalized": false,
680
+ "rstrip": true,
681
+ "single_word": false,
682
+ "special": true
683
+ },
684
+ "110": {
685
+ "content": "<extra_id_17>",
686
+ "lstrip": true,
687
+ "normalized": false,
688
+ "rstrip": true,
689
+ "single_word": false,
690
+ "special": true
691
+ },
692
+ "111": {
693
+ "content": "<extra_id_16>",
694
+ "lstrip": true,
695
+ "normalized": false,
696
+ "rstrip": true,
697
+ "single_word": false,
698
+ "special": true
699
+ },
700
+ "112": {
701
+ "content": "<extra_id_15>",
702
+ "lstrip": true,
703
+ "normalized": false,
704
+ "rstrip": true,
705
+ "single_word": false,
706
+ "special": true
707
+ },
708
+ "113": {
709
+ "content": "<extra_id_14>",
710
+ "lstrip": true,
711
+ "normalized": false,
712
+ "rstrip": true,
713
+ "single_word": false,
714
+ "special": true
715
+ },
716
+ "114": {
717
+ "content": "<extra_id_13>",
718
+ "lstrip": true,
719
+ "normalized": false,
720
+ "rstrip": true,
721
+ "single_word": false,
722
+ "special": true
723
+ },
724
+ "115": {
725
+ "content": "<extra_id_12>",
726
+ "lstrip": true,
727
+ "normalized": false,
728
+ "rstrip": true,
729
+ "single_word": false,
730
+ "special": true
731
+ },
732
+ "116": {
733
+ "content": "<extra_id_11>",
734
+ "lstrip": true,
735
+ "normalized": false,
736
+ "rstrip": true,
737
+ "single_word": false,
738
+ "special": true
739
+ },
740
+ "117": {
741
+ "content": "<extra_id_10>",
742
+ "lstrip": true,
743
+ "normalized": false,
744
+ "rstrip": true,
745
+ "single_word": false,
746
+ "special": true
747
+ },
748
+ "118": {
749
+ "content": "<extra_id_9>",
750
+ "lstrip": true,
751
+ "normalized": false,
752
+ "rstrip": true,
753
+ "single_word": false,
754
+ "special": true
755
+ },
756
+ "119": {
757
+ "content": "<extra_id_8>",
758
+ "lstrip": true,
759
+ "normalized": false,
760
+ "rstrip": true,
761
+ "single_word": false,
762
+ "special": true
763
+ },
764
+ "120": {
765
+ "content": "<extra_id_7>",
766
+ "lstrip": true,
767
+ "normalized": false,
768
+ "rstrip": true,
769
+ "single_word": false,
770
+ "special": true
771
+ },
772
+ "121": {
773
+ "content": "<extra_id_6>",
774
+ "lstrip": true,
775
+ "normalized": false,
776
+ "rstrip": true,
777
+ "single_word": false,
778
+ "special": true
779
+ },
780
+ "122": {
781
+ "content": "<extra_id_5>",
782
+ "lstrip": true,
783
+ "normalized": false,
784
+ "rstrip": true,
785
+ "single_word": false,
786
+ "special": true
787
+ },
788
+ "123": {
789
+ "content": "<extra_id_4>",
790
+ "lstrip": true,
791
+ "normalized": false,
792
+ "rstrip": true,
793
+ "single_word": false,
794
+ "special": true
795
+ },
796
+ "124": {
797
+ "content": "<extra_id_3>",
798
+ "lstrip": true,
799
+ "normalized": false,
800
+ "rstrip": true,
801
+ "single_word": false,
802
+ "special": true
803
+ },
804
+ "125": {
805
+ "content": "<extra_id_2>",
806
+ "lstrip": true,
807
+ "normalized": false,
808
+ "rstrip": true,
809
+ "single_word": false,
810
+ "special": true
811
+ },
812
+ "126": {
813
+ "content": "<extra_id_1>",
814
+ "lstrip": true,
815
+ "normalized": false,
816
+ "rstrip": true,
817
+ "single_word": false,
818
+ "special": true
819
+ },
820
+ "127": {
821
+ "content": "<extra_id_0>",
822
+ "lstrip": true,
823
+ "normalized": false,
824
+ "rstrip": true,
825
+ "single_word": false,
826
+ "special": true
827
+ }
828
+ },
829
+ "additional_special_tokens": [
830
+ "<extra_id_0>",
831
+ "<extra_id_1>",
832
+ "<extra_id_2>",
833
+ "<extra_id_3>",
834
+ "<extra_id_4>",
835
+ "<extra_id_5>",
836
+ "<extra_id_6>",
837
+ "<extra_id_7>",
838
+ "<extra_id_8>",
839
+ "<extra_id_9>",
840
+ "<extra_id_10>",
841
+ "<extra_id_11>",
842
+ "<extra_id_12>",
843
+ "<extra_id_13>",
844
+ "<extra_id_14>",
845
+ "<extra_id_15>",
846
+ "<extra_id_16>",
847
+ "<extra_id_17>",
848
+ "<extra_id_18>",
849
+ "<extra_id_19>",
850
+ "<extra_id_20>",
851
+ "<extra_id_21>",
852
+ "<extra_id_22>",
853
+ "<extra_id_23>",
854
+ "<extra_id_24>",
855
+ "<extra_id_25>",
856
+ "<extra_id_26>",
857
+ "<extra_id_27>",
858
+ "<extra_id_28>",
859
+ "<extra_id_29>",
860
+ "<extra_id_30>",
861
+ "<extra_id_31>",
862
+ "<extra_id_32>",
863
+ "<extra_id_33>",
864
+ "<extra_id_34>",
865
+ "<extra_id_35>",
866
+ "<extra_id_36>",
867
+ "<extra_id_37>",
868
+ "<extra_id_38>",
869
+ "<extra_id_39>",
870
+ "<extra_id_40>",
871
+ "<extra_id_41>",
872
+ "<extra_id_42>",
873
+ "<extra_id_43>",
874
+ "<extra_id_44>",
875
+ "<extra_id_45>",
876
+ "<extra_id_46>",
877
+ "<extra_id_47>",
878
+ "<extra_id_48>",
879
+ "<extra_id_49>",
880
+ "<extra_id_50>",
881
+ "<extra_id_51>",
882
+ "<extra_id_52>",
883
+ "<extra_id_53>",
884
+ "<extra_id_54>",
885
+ "<extra_id_55>",
886
+ "<extra_id_56>",
887
+ "<extra_id_57>",
888
+ "<extra_id_58>",
889
+ "<extra_id_59>",
890
+ "<extra_id_60>",
891
+ "<extra_id_61>",
892
+ "<extra_id_62>",
893
+ "<extra_id_63>",
894
+ "<extra_id_64>",
895
+ "<extra_id_65>",
896
+ "<extra_id_66>",
897
+ "<extra_id_67>",
898
+ "<extra_id_68>",
899
+ "<extra_id_69>",
900
+ "<extra_id_70>",
901
+ "<extra_id_71>",
902
+ "<extra_id_72>",
903
+ "<extra_id_73>",
904
+ "<extra_id_74>",
905
+ "<extra_id_75>",
906
+ "<extra_id_76>",
907
+ "<extra_id_77>",
908
+ "<extra_id_78>",
909
+ "<extra_id_79>",
910
+ "<extra_id_80>",
911
+ "<extra_id_81>",
912
+ "<extra_id_82>",
913
+ "<extra_id_83>",
914
+ "<extra_id_84>",
915
+ "<extra_id_85>",
916
+ "<extra_id_86>",
917
+ "<extra_id_87>",
918
+ "<extra_id_88>",
919
+ "<extra_id_89>",
920
+ "<extra_id_90>",
921
+ "<extra_id_91>",
922
+ "<extra_id_92>",
923
+ "<extra_id_93>",
924
+ "<extra_id_94>",
925
+ "<extra_id_95>",
926
+ "<extra_id_96>",
927
+ "<extra_id_97>",
928
+ "<extra_id_98>",
929
+ "<extra_id_99>"
930
+ ],
931
+ "clean_up_tokenization_spaces": false,
932
+ "do_lower_case": false,
933
+ "eos_token": "</s>",
934
+ "extra_ids": 100,
935
+ "legacy": true,
936
+ "model_max_length": 1000000000000000019884624838656,
937
+ "pad_token": "<pad>",
938
+ "sp_model_kwargs": {},
939
+ "tokenizer_class": "T5Tokenizer",
940
+ "unk_token": "<unk>"
941
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:705750eb5050da7b859b299363db4324be92a3af2ba4a8530c69e964f52524d7
3
+ size 5176
wandb/debug-internal.log ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-05-04T17:25:03.375857654+03:00","level":"INFO","msg":"using version","core version":"0.18.7"}
2
+ {"time":"2025-05-04T17:25:03.375905253+03:00","level":"INFO","msg":"created symlink","path":"/arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_172503-0ictlmwf/logs/debug-core.log"}
3
+ {"time":"2025-05-04T17:25:03.501241143+03:00","level":"INFO","msg":"created new stream","id":"0ictlmwf"}
4
+ {"time":"2025-05-04T17:25:03.501294637+03:00","level":"INFO","msg":"stream: started","id":"0ictlmwf"}
5
+ {"time":"2025-05-04T17:25:03.501448652+03:00","level":"INFO","msg":"writer: Do: started","stream_id":"0ictlmwf"}
6
+ {"time":"2025-05-04T17:25:03.501451145+03:00","level":"INFO","msg":"handler: started","stream_id":"0ictlmwf"}
7
+ {"time":"2025-05-04T17:25:03.501574427+03:00","level":"INFO","msg":"sender: started","stream_id":"0ictlmwf"}
8
+ {"time":"2025-05-04T17:25:03.865922055+03:00","level":"INFO","msg":"Starting system monitor"}
9
+ {"time":"2025-05-04T22:47:43.191425732+03:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/isikz/finetuning-bc-protT5/0ictlmwf/file_stream\": dial tcp 35.186.228.49:443: connect: connection timed out"}
10
+ {"time":"2025-05-05T00:01:47.351449692+03:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/isikz/finetuning-bc-protT5/0ictlmwf/file_stream\": dial tcp 35.186.228.49:443: connect: connection timed out"}
11
+ {"time":"2025-05-05T00:49:32.57779148+03:00","level":"INFO","msg":"stream: closing","id":"0ictlmwf"}
12
+ {"time":"2025-05-05T00:49:32.577842715+03:00","level":"INFO","msg":"Stopping system monitor"}
13
+ {"time":"2025-05-05T00:49:32.578849729+03:00","level":"INFO","msg":"Stopped system monitor"}
14
+ {"time":"2025-05-05T00:49:32.781968337+03:00","level":"WARN","msg":"No job ingredients found, not creating job artifact"}
15
+ {"time":"2025-05-05T00:49:32.781997123+03:00","level":"WARN","msg":"No source type found, not creating job artifact"}
16
+ {"time":"2025-05-05T00:49:32.782008311+03:00","level":"INFO","msg":"sender: sendDefer: no job artifact to save"}
17
+ {"time":"2025-05-05T00:49:33.357099059+03:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
18
+ {"time":"2025-05-05T00:49:33.741524339+03:00","level":"INFO","msg":"handler: closed","stream_id":"0ictlmwf"}
19
+ {"time":"2025-05-05T00:49:33.741583153+03:00","level":"INFO","msg":"writer: Close: closed","stream_id":"0ictlmwf"}
20
+ {"time":"2025-05-05T00:49:33.741593811+03:00","level":"INFO","msg":"sender: closed","stream_id":"0ictlmwf"}
21
+ {"time":"2025-05-05T00:49:33.741652369+03:00","level":"INFO","msg":"stream: closed","id":"0ictlmwf"}
wandb/debug.log ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-05-04 17:25:03,365 INFO MainThread:3189710 [wandb_setup.py:_flush():79] Current SDK version is 0.18.7
2
+ 2025-05-04 17:25:03,365 INFO MainThread:3189710 [wandb_setup.py:_flush():79] Configure stats pid to 3189710
3
+ 2025-05-04 17:25:03,365 INFO MainThread:3189710 [wandb_setup.py:_flush():79] Loading settings from /arf/home/zisik/.config/wandb/settings
4
+ 2025-05-04 17:25:03,365 INFO MainThread:3189710 [wandb_setup.py:_flush():79] Loading settings from /arf/scratch/zisik/prott5_bc_ft/wandb/settings
5
+ 2025-05-04 17:25:03,365 INFO MainThread:3189710 [wandb_setup.py:_flush():79] Loading settings from environment variables: {}
6
+ 2025-05-04 17:25:03,365 INFO MainThread:3189710 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': 'finetuning_bc_prott5.py', 'program_abspath': '/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py', 'program': '/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py'}
7
+ 2025-05-04 17:25:03,366 INFO MainThread:3189710 [wandb_setup.py:_flush():79] Applying login settings: {}
8
+ 2025-05-04 17:25:03,366 INFO MainThread:3189710 [wandb_setup.py:_flush():79] Applying login settings: {}
9
+ 2025-05-04 17:25:03,366 INFO MainThread:3189710 [wandb_init.py:_log_setup():533] Logging user logs to /arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_172503-0ictlmwf/logs/debug.log
10
+ 2025-05-04 17:25:03,366 INFO MainThread:3189710 [wandb_init.py:_log_setup():534] Logging internal logs to /arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_172503-0ictlmwf/logs/debug-internal.log
11
+ 2025-05-04 17:25:03,366 INFO MainThread:3189710 [wandb_init.py:init():619] calling init triggers
12
+ 2025-05-04 17:25:03,366 INFO MainThread:3189710 [wandb_init.py:init():626] wandb.init called with sweep_config: {}
13
+ config: {}
14
+ 2025-05-04 17:25:03,366 INFO MainThread:3189710 [wandb_init.py:init():669] starting backend
15
+ 2025-05-04 17:25:03,366 INFO MainThread:3189710 [wandb_init.py:init():673] sending inform_init request
16
+ 2025-05-04 17:25:03,371 INFO MainThread:3189710 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
17
+ 2025-05-04 17:25:03,371 INFO MainThread:3189710 [wandb_init.py:init():686] backend started and connected
18
+ 2025-05-04 17:25:03,379 INFO MainThread:3189710 [wandb_init.py:init():781] updated telemetry
19
+ 2025-05-04 17:25:03,382 INFO MainThread:3189710 [wandb_init.py:init():814] communicating run to backend with 90.0 second timeout
20
+ 2025-05-04 17:25:03,852 INFO MainThread:3189710 [wandb_init.py:init():867] starting run threads in backend
21
+ 2025-05-04 17:25:05,277 INFO MainThread:3189710 [wandb_run.py:_console_start():2456] atexit reg
22
+ 2025-05-04 17:25:05,278 INFO MainThread:3189710 [wandb_run.py:_redirect():2305] redirect: wrap_raw
23
+ 2025-05-04 17:25:05,278 INFO MainThread:3189710 [wandb_run.py:_redirect():2370] Wrapping output streams.
24
+ 2025-05-04 17:25:05,278 INFO MainThread:3189710 [wandb_run.py:_redirect():2395] Redirects installed.
25
+ 2025-05-04 17:25:05,283 INFO MainThread:3189710 [wandb_init.py:init():911] run started, returning control to user process
26
+ 2025-05-04 17:25:53,069 INFO MainThread:3189710 [wandb_run.py:_config_callback():1387] config_cb None None {'output_dir': 't5-bc-out', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': 't5-bc-out/runs/May04_17-25-43_kolyoz1', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': False, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 't5-bc-out', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'loss', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'epoch', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False}
27
+ 2025-05-05 00:49:32,578 WARNING MsgRouterThr:3189710 [router.py:message_loop():75] message_loop has been closed
wandb/run-20250504_132610-pxg645u5/files/config.yaml ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _wandb:
2
+ value:
3
+ cli_version: 0.18.7
4
+ m: []
5
+ python_version: 3.10.15
6
+ t:
7
+ "1":
8
+ - 1
9
+ - 2
10
+ - 3
11
+ - 5
12
+ - 11
13
+ - 12
14
+ - 49
15
+ - 51
16
+ - 53
17
+ - 55
18
+ - 71
19
+ - 98
20
+ - 105
21
+ "2":
22
+ - 1
23
+ - 2
24
+ - 3
25
+ - 5
26
+ - 11
27
+ - 12
28
+ - 49
29
+ - 51
30
+ - 53
31
+ - 55
32
+ - 71
33
+ - 98
34
+ - 105
35
+ "3":
36
+ - 23
37
+ - 55
38
+ "4": 3.10.15
39
+ "5": 0.18.7
40
+ "6": 4.45.2
41
+ "8":
42
+ - 5
43
+ "12": 0.18.7
44
+ "13": linux-x86_64
wandb/run-20250504_132610-pxg645u5/files/output.log ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
2
+ Traceback (most recent call last):
3
+ File "/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py", line 45, in <module>
4
+ train_ds = load_dataset("json", data_files={"train": "-"},
5
+ File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/datasets/load.py", line 2132, in load_dataset
6
+ builder_instance = load_dataset_builder(
7
+ File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/datasets/load.py", line 1853, in load_dataset_builder
8
+ dataset_module = dataset_module_factory(
9
+ File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/datasets/load.py", line 1562, in dataset_module_factory
10
+ ).get_module()
11
+ File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/datasets/load.py", line 942, in get_module
12
+ data_files = DataFilesDict.from_patterns(
13
+ File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/datasets/data_files.py", line 721, in from_patterns
14
+ else DataFilesList.from_patterns(
15
+ File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/datasets/data_files.py", line 624, in from_patterns
16
+ resolve_pattern(
17
+ File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/datasets/data_files.py", line 411, in resolve_pattern
18
+ raise FileNotFoundError(error_msg)
19
+ FileNotFoundError: Unable to find '/arf/scratch/zisik/prott5_bc_ft/-'
20
+ Traceback (most recent call last):
21
+ File "/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py", line 45, in <module>
22
+ train_ds = load_dataset("json", data_files={"train": "-"},
23
+ File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/datasets/load.py", line 2132, in load_dataset
24
+ builder_instance = load_dataset_builder(
25
+ File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/datasets/load.py", line 1853, in load_dataset_builder
26
+ dataset_module = dataset_module_factory(
27
+ File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/datasets/load.py", line 1562, in dataset_module_factory
28
+ ).get_module()
29
+ File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/datasets/load.py", line 942, in get_module
30
+ data_files = DataFilesDict.from_patterns(
31
+ File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/datasets/data_files.py", line 721, in from_patterns
32
+ else DataFilesList.from_patterns(
33
+ File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/datasets/data_files.py", line 624, in from_patterns
34
+ resolve_pattern(
35
+ File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/datasets/data_files.py", line 411, in resolve_pattern
36
+ raise FileNotFoundError(error_msg)
37
+ FileNotFoundError: Unable to find '/arf/scratch/zisik/prott5_bc_ft/-'
wandb/run-20250504_132610-pxg645u5/files/requirements.txt ADDED
@@ -0,0 +1,541 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ nvidia-cuda-cupti-cu12==12.4.127
2
+ nvidia-cuda-nvrtc-cu12==12.4.127
3
+ pyg-lib==0.4.0+pt20cu117
4
+ biopython==1.85
5
+ iniconfig==2.0.0
6
+ tokenizers==0.20.0
7
+ accelerate==1.3.0
8
+ torch==2.6.0
9
+ nvidia-nccl-cu12==2.21.5
10
+ transformers==4.45.2
11
+ nvidia-cusparse-cu12==12.3.1.170
12
+ torch-scatter==2.1.2+pt20cu117
13
+ nvidia-cusparselt-cu12==0.6.2
14
+ nvidia-nvtx-cu12==12.4.127
15
+ zstd==1.5.6.6
16
+ fair-esm==2.0.0
17
+ omegaconf==2.3.0
18
+ pluggy==1.5.0
19
+ pytest==8.3.5
20
+ nvidia-curand-cu12==10.3.5.147
21
+ nvidia-cufft-cu12==11.2.1.3
22
+ torch-cluster==1.6.3+pt20cu117
23
+ regex==2024.9.11
24
+ nvidia-cudnn-cu12==9.1.0.70
25
+ torch-spline-conv==1.2.2+pt20cu117
26
+ nvidia-cusolver-cu12==11.6.1.9
27
+ antlr4-python3-runtime==4.9.3
28
+ msgpack-numpy==0.4.8
29
+ nlp==0.2.0
30
+ einops==0.8.1
31
+ nvidia-cublas-cu12==12.4.5.8
32
+ triton==3.2.0
33
+ ninja==1.11.1.3
34
+ hydra-core==1.3.2
35
+ nvidia-nvjitlink-cu12==12.4.127
36
+ biotite==0.41.2
37
+ torch-sparse==0.6.18+pt20cu117
38
+ esm==3.1.4
39
+ sympy==1.13.1
40
+ nvidia-cuda-runtime-cu12==12.4.127
41
+ jupyter-lsp==2.2.5
42
+ jupyter-events==0.10.0
43
+ ipykernel==6.29.5
44
+ Mako==1.3.5
45
+ proto-plus==1.25.0
46
+ fst-pso==1.8.1
47
+ gensim==4.3.3
48
+ htmlmin==0.1.12
49
+ tokenizers==0.13.3
50
+ timm==1.0.11
51
+ MarkupSafe==3.0.2
52
+ safetensors==0.4.5
53
+ requests==2.32.3
54
+ gast==0.5.5
55
+ cuml==24.12.0a33
56
+ jaxlib==0.4.23.dev20240214
57
+ spacy-loggers==1.0.5
58
+ pytz==2024.1
59
+ idna==3.10
60
+ python-dateutil==2.9.0
61
+ mdurl==0.1.2
62
+ blis==0.7.10
63
+ jupyter==1.1.1
64
+ pyerfa==2.0.1.5
65
+ comm==0.2.2
66
+ pygraphviz==1.14
67
+ dill==0.3.8
68
+ paramiko==3.5.0
69
+ llama-index==0.8.36
70
+ mdit-py-plugins==0.4.2
71
+ Werkzeug==3.1.3
72
+ pyu2f==0.1.5
73
+ dask-glm==0.2.0
74
+ httpx==0.27.2
75
+ typeguard==4.4.1
76
+ mypy-extensions==1.0.0
77
+ kmodes==0.12.2
78
+ keras==2.15.0
79
+ ydata-profiling==0.0.dev0
80
+ regex==2024.11.6
81
+ xarray==2024.11.0
82
+ setuptools==75.3.0
83
+ charset-normalizer==3.4.0
84
+ jupyterlab_nvdashboard==0.11.0
85
+ pylibraft==24.12.0a36
86
+ spacy==3.7.6
87
+ mlflow-skinny==2.17.2
88
+ nvtx==0.2.10
89
+ multimethod==1.12
90
+ pexpect==4.9.0
91
+ torch==2.1.0.post301
92
+ flatbuffers==24.3.25
93
+ python-json-logger==2.0.7
94
+ PyJWT==2.9.0
95
+ multiprocess==0.70.16
96
+ colorlover==0.3.0
97
+ yarl==1.16.0
98
+ locket==1.0.0
99
+ patsy==1.0.0
100
+ rapids-dask-dependency==24.12.0a0
101
+ stanza==1.9.2
102
+ debugpy==1.8.8
103
+ jupyterlab_pygments==0.3.0
104
+ pylibcudf==24.12.0a337
105
+ lz4==4.3.3
106
+ pandas==2.2.3
107
+ tifffile==2024.9.20
108
+ pynvml==11.4.1
109
+ cufflinks==0.17.3
110
+ ipywidgets==8.1.5
111
+ requests-oauthlib==2.0.0
112
+ google-auth-oauthlib==1.2.1
113
+ rsa==4.9
114
+ webcolors==24.8.0
115
+ jsonschema-specifications==2024.10.1
116
+ scikit-learn==1.5.2
117
+ langchain-text-splitters==0.3.2
118
+ pandas-datareader==0.10.0
119
+ tomli==2.0.2
120
+ tzdata==2024.2
121
+ scikit-image==0.24.0
122
+ tensorboard_data_server==0.7.0
123
+ kiwisolver==1.4.7
124
+ cloudpathlib==0.20.0
125
+ isodate==0.6.1
126
+ adversarial-robustness-toolbox==1.19.1
127
+ SQLAlchemy==2.0.36
128
+ pytest-runner==6.0.0
129
+ pycairo==1.27.0
130
+ treelite==4.3.0
131
+ jiter==0.7.0
132
+ threadpoolctl==3.5.0
133
+ pandocfilters==1.5.0
134
+ loguru==0.7.2
135
+ smart_open==7.0.5
136
+ shellingham==1.5.4
137
+ deepspeed==0.15.4
138
+ prompt_toolkit==3.0.48
139
+ databricks-sdk==0.34.0
140
+ langchain-core==0.3.15
141
+ imageio==2.36.0
142
+ openapi-schema-pydantic==1.2.4
143
+ zict==3.0.0
144
+ cachetools==5.5.0
145
+ colorful==0.5.6
146
+ mpmath==1.3.0
147
+ nest_asyncio==1.6.0
148
+ pyFUME==0.2.25
149
+ opencv-python-headless==4.9.0
150
+ fastai==2.7.18
151
+ importlib_resources==6.4.5
152
+ binaryornot==0.4.4
153
+ evaluate==0.4.1
154
+ matplotlib-inline==0.1.7
155
+ wasabi==1.1.2
156
+ pycparser==2.22
157
+ GitPython==3.1.43
158
+ pluggy==1.5.0
159
+ async-lru==2.0.4
160
+ pgmpy==0.1.24
161
+ anyio==4.4.0
162
+ executing==2.1.0
163
+ orjson==3.10.11
164
+ humanfriendly==10.0
165
+ tornado==6.4.1
166
+ gmpy2==2.1.5
167
+ rlPyCairo==0.2.0
168
+ distributed==2024.11.0
169
+ FuzzyTM==2.0.5
170
+ torchtext==0.15.2a0+5ce3163
171
+ pytest==8.3.5
172
+ pyod==2.0.2
173
+ ImageHash==4.3.1
174
+ soupsieve==2.5
175
+ tblib==3.0.0
176
+ emoji==2.14.0
177
+ aiohappyeyeballs==2.4.3
178
+ uri-template==1.3.0
179
+ tensorflow_estimator==2.15.0
180
+ babel==2.16.0
181
+ dask-cuda==24.12.0a12
182
+ overrides==7.7.0
183
+ opencensus==0.11.3
184
+ openai==0.28.1
185
+ language_data==1.2.0
186
+ jedi==0.19.2
187
+ cookiecutter==2.6.0
188
+ entrypoints==0.4
189
+ exceptiongroup==1.2.2
190
+ marisa-trie==1.2.0
191
+ uvloop==0.20.0
192
+ aiosignal==1.3.1
193
+ Flask==3.0.3
194
+ tensorboard==2.15.2
195
+ cffi==1.17.1
196
+ tf_keras==2.15.0
197
+ absl-py==2.1.0
198
+ blinker==1.9.0
199
+ types-python-dateutil==2.9.0.20241003
200
+ opencv-python==4.9.0
201
+ frozendict==2.4.6
202
+ aiohttp-cors==0.7.0
203
+ statsmodels==0.14.4
204
+ tinycss2==1.4.0
205
+ terminado==0.18.1
206
+ pycaret==2.2.3
207
+ aiohttp==3.10.10
208
+ distributed-ucxx==0.41.0
209
+ prometheus_client==0.21.0
210
+ fastdownload==0.0.7
211
+ grpcio==1.59.3
212
+ google-api-core==2.22.0
213
+ jupyterlab_widgets==3.0.13
214
+ appdirs==1.4.4
215
+ littleutils==0.0.0
216
+ ray==2.24.0
217
+ kaggle==1.6.17
218
+ jsonschema==4.23.0
219
+ google-auth==2.36.0
220
+ scikit-base==0.11.0
221
+ visions==0.7.6
222
+ pyarrow==15.0.0
223
+ transformers==4.33.0
224
+ prometheus_flask_exporter==0.23.1
225
+ dm-tree==0.1.8
226
+ colorama==0.4.6
227
+ requests-toolbelt==1.0.0
228
+ cached-property==1.5.2
229
+ cymem==2.0.8
230
+ PyNaCl==1.5.0
231
+ PyWavelets==1.7.0
232
+ httptools==0.6.1
233
+ typing-utils==0.1.0
234
+ email_validator==2.2.0
235
+ marshmallow==3.23.1
236
+ Deprecated==1.2.14
237
+ virtualenv==20.4.7
238
+ optuna==3.6.1
239
+ jupyter_server==2.14.2
240
+ termcolor==2.5.0
241
+ mpi4py==4.0.1
242
+ torchdata==0.7.1+8cea82f
243
+ dataclasses==0.8
244
+ cloudpickle==3.1.0
245
+ tree_sitter_languages==1.10.2
246
+ tabulate==0.9.0
247
+ ipython==8.29.0
248
+ lightgbm==4.3.0
249
+ captum==0.6.0
250
+ confuse==2.0.1
251
+ torchvision==0.16.1+adc3221
252
+ lxml==4.9.4
253
+ fastapi==0.115.4
254
+ python-multipart==0.0.17
255
+ dnspython==2.7.0
256
+ jupyter-console==6.6.3
257
+ preshed==3.0.9
258
+ py-cpuinfo==9.0.0
259
+ Send2Trash==1.8.3
260
+ murmurhash==1.0.10
261
+ sniffio==1.3.1
262
+ websockets==13.1
263
+ h11==0.14.0
264
+ smmap==5.0.0
265
+ textual==0.85.2
266
+ jsonpatch==1.33
267
+ opencensus-context==0.1.3
268
+ nbconvert==7.16.4
269
+ sentry-sdk==2.19.0
270
+ opentelemetry-semantic-conventions==0.37b0
271
+ pandas-profiling==2.8.0
272
+ pillow==10.3.0
273
+ peft==0.13.2
274
+ rpds-py==0.21.0
275
+ bokeh==3.6.1
276
+ distro==1.9.0
277
+ itsdangerous==2.2.0
278
+ wandb==0.18.7
279
+ jsonpointer==3.0.0
280
+ astropy-iers-data==0.2024.11.11.0.32.38
281
+ horovod==0.28.1
282
+ graphviz==0.20.3
283
+ vtk==9.3.1
284
+ bleach==6.2.0
285
+ numexpr==2.8.7
286
+ pydantic_core==2.23.4
287
+ Jinja2==3.1.4
288
+ widgetsnbextension==4.0.13
289
+ filelock==3.16.1
290
+ catboost==1.2.7
291
+ raft-dask==24.12.0a36
292
+ async-timeout==4.0.3
293
+ datefinder==0.7.3
294
+ coloredlogs==15.0.1
295
+ platformdirs==4.3.6
296
+ spacy-legacy==3.0.12
297
+ chardet==5.2.0
298
+ jupyter_client==8.6.3
299
+ importlib_metadata==8.5.0
300
+ rfc3986-validator==0.1.1
301
+ huggingface_hub==0.26.2
302
+ PySocks==1.7.1
303
+ mlxtend==0.23.2
304
+ outdated==0.2.2
305
+ partd==1.4.2
306
+ thinc==8.2.5
307
+ astropy==6.1.6
308
+ rdflib==6.3.2
309
+ h2==4.1.0
310
+ typer==0.13.0
311
+ xyzservices==2024.9.0
312
+ toolz==0.12.1
313
+ frozenlist==1.5.0
314
+ rdkit==2024.9.2
315
+ pyasn1==0.6.1
316
+ jupyter_server_terminals==0.5.3
317
+ ucx-py==0.41.0a11
318
+ astunparse==1.6.3
319
+ simpful==2.12.0
320
+ notebook_shim==0.2.4
321
+ scipy==1.13.1
322
+ colorlog==6.9.0
323
+ tiktoken==0.3.3
324
+ plotly==5.24.1
325
+ fastrlock==0.8.2
326
+ chart-studio==1.1.0
327
+ stack-data==0.6.2
328
+ google-pasta==0.2.0
329
+ sktime==0.34.0
330
+ PyYAML==6.0.2
331
+ sympy==1.13.3
332
+ multidict==6.1.0
333
+ ml-dtypes==0.2.0
334
+ tensorboardX==2.6.2.2
335
+ decorator==5.1.1
336
+ cytoolz==1.0.0
337
+ ase==3.23.0
338
+ isoduration==20.11.0
339
+ html5lib==1.1
340
+ langsmith==0.1.142
341
+ future==1.0.0
342
+ onnx2torch==1.5.15
343
+ multipledispatch==0.6.0
344
+ protobuf==4.24.4
345
+ ucxx==0.41.0
346
+ pandas_flavor==0.6.0
347
+ msgpack==1.1.0
348
+ pyasn1_modules==0.4.1
349
+ imagecodecs==2024.1.1
350
+ mlflow==2.17.2
351
+ watchfiles==0.24.0
352
+ dm-sonnet==2.0.2
353
+ langcodes==3.4.1
354
+ freetype-py==2.3.0
355
+ argon2-cffi-bindings==21.2.0
356
+ trimesh==4.5.2
357
+ opt_einsum==3.4.0
358
+ tenacity==8.5.0
359
+ h5py==3.12.1
360
+ fastapi-cli==0.0.5
361
+ oauthlib==3.2.2
362
+ parso==0.8.4
363
+ weasel==0.4.1
364
+ yfinance==0.2.49
365
+ networkx==2.8.8
366
+ bitsandbytes==0.44.1
367
+ lazy_loader==0.4
368
+ querystring_parser==1.2.4
369
+ contourpy==1.3.0
370
+ unicodedata2==15.1.0
371
+ bcrypt==4.2.0
372
+ munkres==1.1.4
373
+ langchain==0.0.298
374
+ hpack==4.0.0
375
+ cryptography==43.0.3
376
+ umap-learn==0.5.7
377
+ arrow==1.3.0
378
+ docker==7.1.0
379
+ certifi==2025.1.31
380
+ fastjsonschema==2.20.0
381
+ tensorflow==2.15.0
382
+ googleapis-common-protos==1.65.0
383
+ iniconfig==2.0.0
384
+ Markdown==3.6
385
+ llvmlite==0.43.0
386
+ wslink==2.3.2
387
+ attrs==24.2.0
388
+ rich==13.9.4
389
+ cupy==13.3.0
390
+ uc-micro-py==1.0.3
391
+ alembic==1.14.0
392
+ joblib==1.4.2
393
+ reportlab==4.2.5
394
+ miniful==0.0.6
395
+ jupyter_core==5.7.2
396
+ wheel==0.45.0
397
+ phik==0.12.3
398
+ mistune==3.0.2
399
+ wcwidth==0.2.13
400
+ dacite==1.8.1
401
+ accelerate==0.22.0
402
+ sacremoses==0.0.53
403
+ revtok==0.0.3
404
+ python-slugify==8.0.4
405
+ tangled-up-in-unicode==0.2.0
406
+ dask==2024.11.0
407
+ markdown-it-py==3.0.0
408
+ sentencepiece==0.1.99
409
+ beautifulsoup4==4.12.3
410
+ six==1.16.0
411
+ numba-cuda==0.0.17
412
+ argon2-cffi==23.1.0
413
+ xxhash==3.5.0
414
+ hjson==3.1.0
415
+ fonttools==4.54.1
416
+ graphql-core==3.2.5
417
+ pyparsing==3.2.0
418
+ pure_eval==0.2.3
419
+ distlib==0.3.9
420
+ lightning==2.4.0
421
+ wordcloud==0.0.0
422
+ catalogue==2.0.10
423
+ jax==0.4.27
424
+ tree-sitter==0.23.2
425
+ notebook==7.2.2
426
+ dataclasses-json==0.6.7
427
+ propcache==0.2.0
428
+ numba==0.60.0
429
+ dask-expr==1.1.17
430
+ pydantic==2.9.2
431
+ gunicorn==22.0.0
432
+ missingno==0.5.2
433
+ pyOpenSSL==24.2.1
434
+ openpyxl==3.1.5
435
+ packaging==24.1
436
+ python-dotenv==1.0.1
437
+ cycler==0.12.1
438
+ types-pytz==2024.2.0.20241003
439
+ yellowbrick==1.5
440
+ referencing==0.35.1
441
+ pyLDAvis==3.4.1
442
+ lazypredict==0.2.16
443
+ fqdn==1.5.1
444
+ websocket-client==1.8.0
445
+ fastcore==1.7.19
446
+ pynvjitlink-cu12==0.3.0
447
+ pingouin==0.5.5
448
+ numpy==1.26.4
449
+ typing-inspect==0.9.0
450
+ nltk==3.9.1
451
+ onnxruntime==1.19.2
452
+ tensorflow-probability==0.23.0
453
+ datasets==3.0.2
454
+ pickleshare==0.7.5
455
+ peewee==3.17.7
456
+ torch-geometric==2.6.1
457
+ ptyprocess==0.7.0
458
+ greenlet==3.1.1
459
+ graphql-relay==3.2.0
460
+ graphene==3.4.3
461
+ et_xmlfile==2.0.0
462
+ webencodings==0.5.1
463
+ hyperframe==6.0.1
464
+ multitasking==0.0.9
465
+ typer-slim==0.13.0
466
+ onnx==1.15.0
467
+ uvicorn==0.32.0
468
+ memray==1.13.4
469
+ xgboost==2.1.2
470
+ Brotli==1.1.0
471
+ zipp==3.21.0
472
+ nbformat==5.10.4
473
+ responses==0.18.0
474
+ funcy==2.0
475
+ Pygments==2.18.0
476
+ tqdm==4.67.0
477
+ linkify-it-py==2.0.3
478
+ srsly==2.4.8
479
+ cuda-python==12.6.0
480
+ lightning-utilities==0.11.8
481
+ cudf==24.12.0a337
482
+ dask-ml==2024.4.4
483
+ docker-pycreds==0.4.0
484
+ pkgutil_resolve_name==1.3.10
485
+ opentelemetry-api==1.16.0
486
+ fsspec==2024.9.0
487
+ nbclient==0.10.0
488
+ psutil==5.9.8
489
+ pytorch-lightning==2.4.0
490
+ sortedcontainers==2.4.0
491
+ matplotlib==3.9.2
492
+ defusedxml==0.7.1
493
+ urllib3==1.26.19
494
+ jupyterlab_server==2.27.3
495
+ retrying==1.3.3
496
+ dask-cudf==24.12.0a337
497
+ sqlparse==0.5.1
498
+ text-unidecode==1.3
499
+ seaborn==0.13.2
500
+ typing_extensions==4.12.2
501
+ pyzmq==26.2.0
502
+ rfc3339-validator==0.1.4
503
+ pynndescent==0.5.13
504
+ pip==24.3.1
505
+ confection==0.1.4
506
+ wrapt==1.14.1
507
+ fastprogress==1.0.3
508
+ traitlets==5.14.3
509
+ asttokens==2.4.1
510
+ json5==0.9.28
511
+ pandas-stubs==2.2.3.241126
512
+ torchmetrics==1.2.1
513
+ gitdb==4.0.11
514
+ annotated-types==0.7.0
515
+ ipython-autotime==0.1
516
+ httpcore==1.0.6
517
+ click==8.1.7
518
+ setproctitle==1.3.3
519
+ starlette==0.41.2
520
+ jupyterlab==4.2.5
521
+ rmm==24.12.0a27
522
+ opentelemetry-sdk==1.16.0
523
+ textblob==0.15.3
524
+ imbalanced-learn==0.12.4
525
+ typeguard==4.3.0
526
+ more-itertools==10.3.0
527
+ zipp==3.19.2
528
+ autocommand==2.2.2
529
+ jaraco.context==5.3.0
530
+ packaging==24.1
531
+ importlib_metadata==8.0.0
532
+ platformdirs==4.2.2
533
+ jaraco.functools==4.0.1
534
+ importlib_resources==6.4.0
535
+ tomli==2.0.1
536
+ jaraco.text==3.12.1
537
+ wheel==0.43.0
538
+ jaraco.collections==5.1.0
539
+ typing_extensions==4.12.2
540
+ inflect==7.3.1
541
+ backports.tarfile==1.2.0
wandb/run-20250504_132610-pxg645u5/files/wandb-metadata.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.14.0-427.13.1.el9_4.x86_64-x86_64-with-glibc2.34",
3
+ "python": "3.10.15",
4
+ "startedAt": "2025-05-04T10:26:10.053836Z",
5
+ "program": "/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py",
6
+ "codePath": "finetuning_bc_prott5.py",
7
+ "email": "zeynep.isik1@sabanciuniv.edu",
8
+ "root": "/arf/scratch/zisik/prott5_bc_ft",
9
+ "host": "kolyoz1",
10
+ "username": "zisik",
11
+ "executable": "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/bin/python3",
12
+ "codePathLocal": "finetuning_bc_prott5.py",
13
+ "cpu_count": 64,
14
+ "cpu_count_logical": 64,
15
+ "gpu": "NVIDIA H100 80GB HBM3",
16
+ "gpu_count": 1,
17
+ "disk": {
18
+ "/": {
19
+ "total": "7643995308032",
20
+ "used": "274767593472"
21
+ }
22
+ },
23
+ "memory": {
24
+ "total": "1081373220864"
25
+ },
26
+ "cpu": {
27
+ "count": 64,
28
+ "countLogical": 64
29
+ },
30
+ "gpu_nvidia": [
31
+ {
32
+ "name": "NVIDIA H100 80GB HBM3",
33
+ "memoryTotal": "85520809984",
34
+ "cudaCores": 16896,
35
+ "architecture": "Hopper"
36
+ }
37
+ ],
38
+ "slurm": {
39
+ "cluster_name": "cuda",
40
+ "conf": "/etc/slurm/slurm.conf",
41
+ "cpus_on_node": "16",
42
+ "cpus_per_task": "16",
43
+ "gpus_on_node": "1",
44
+ "gtids": "0",
45
+ "job_account": "tbag154",
46
+ "job_cpus_per_node": "16",
47
+ "job_end_time": "1746613538",
48
+ "job_gid": "11636",
49
+ "job_gpus": "1",
50
+ "job_id": "1027932",
51
+ "job_name": "msa_ph_pt",
52
+ "job_nodelist": "kolyoz1",
53
+ "job_num_nodes": "1",
54
+ "job_partition": "kolyoz-cuda",
55
+ "job_qos": "tbag",
56
+ "job_start_time": "1746354338",
57
+ "job_uid": "11636",
58
+ "job_user": "zisik",
59
+ "jobid": "1027932",
60
+ "localid": "0",
61
+ "mem_per_cpu": "14000",
62
+ "nnodes": "1",
63
+ "node_aliases": "(null)",
64
+ "nodeid": "0",
65
+ "nodelist": "kolyoz1",
66
+ "prio_process": "0",
67
+ "procid": "0",
68
+ "submit_dir": "/arf/scratch/zisik",
69
+ "submit_host": "cuda-ui",
70
+ "task_pid": "3156950",
71
+ "tasks_per_node": "1",
72
+ "topology_addr": "kolyoz1",
73
+ "topology_addr_pattern": "node",
74
+ "working_cluster": "cuda:slurmcontroller3.ib:6800:9984:109"
75
+ },
76
+ "cudaVersion": "12.6"
77
+ }
wandb/run-20250504_132610-pxg645u5/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"_wandb":{"runtime":6}}
wandb/run-20250504_132610-pxg645u5/logs/debug-core.log ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-05-04T13:26:09.392354119+03:00","level":"INFO","msg":"started logging, with flags","port-filename":"/tmp/tmppack6571/port-3156976.txt","pid":3156976,"debug":false,"disable-analytics":false}
2
+ {"time":"2025-05-04T13:26:09.392402628+03:00","level":"INFO","msg":"FeatureState","shutdownOnParentExitEnabled":false}
3
+ {"time":"2025-05-04T13:26:09.393200765+03:00","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":36685,"Zone":""}}
4
+ {"time":"2025-05-04T13:26:09.393299078+03:00","level":"INFO","msg":"Will exit if parent process dies.","ppid":3156976}
5
+ {"time":"2025-05-04T13:26:09.570123715+03:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:37852"}
6
+ {"time":"2025-05-04T13:26:10.055349971+03:00","level":"INFO","msg":"handleInformInit: received","streamId":"pxg645u5","id":"127.0.0.1:37852"}
7
+ {"time":"2025-05-04T13:26:10.180212249+03:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"pxg645u5","id":"127.0.0.1:37852"}
8
+ {"time":"2025-05-04T13:26:16.993053475+03:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:37852"}
9
+ {"time":"2025-05-04T13:26:16.994546738+03:00","level":"INFO","msg":"server is shutting down"}
10
+ {"time":"2025-05-04T13:26:16.993862146+03:00","level":"INFO","msg":"connection: Close: initiating connection closure","id":"127.0.0.1:37852"}
11
+ {"time":"2025-05-04T13:26:16.994899765+03:00","level":"INFO","msg":"connection: Close: connection successfully closed","id":"127.0.0.1:37852"}
12
+ {"time":"2025-05-04T13:26:17.953982632+03:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:37852"}
13
+ {"time":"2025-05-04T13:26:17.954000039+03:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:37852"}
14
+ {"time":"2025-05-04T13:26:17.954015604+03:00","level":"INFO","msg":"server is closed"}
wandb/run-20250504_132610-pxg645u5/logs/debug-internal.log ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-05-04T13:26:10.056874799+03:00","level":"INFO","msg":"using version","core version":"0.18.7"}
2
+ {"time":"2025-05-04T13:26:10.056920353+03:00","level":"INFO","msg":"created symlink","path":"/arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_132610-pxg645u5/logs/debug-core.log"}
3
+ {"time":"2025-05-04T13:26:10.180146537+03:00","level":"INFO","msg":"created new stream","id":"pxg645u5"}
4
+ {"time":"2025-05-04T13:26:10.180200098+03:00","level":"INFO","msg":"stream: started","id":"pxg645u5"}
5
+ {"time":"2025-05-04T13:26:10.180372555+03:00","level":"INFO","msg":"writer: Do: started","stream_id":"pxg645u5"}
6
+ {"time":"2025-05-04T13:26:10.180478207+03:00","level":"INFO","msg":"sender: started","stream_id":"pxg645u5"}
7
+ {"time":"2025-05-04T13:26:10.18057531+03:00","level":"INFO","msg":"handler: started","stream_id":"pxg645u5"}
8
+ {"time":"2025-05-04T13:26:10.587540794+03:00","level":"INFO","msg":"Starting system monitor"}
9
+ {"time":"2025-05-04T13:26:16.993666261+03:00","level":"INFO","msg":"stream: closing","id":"pxg645u5"}
10
+ {"time":"2025-05-04T13:26:16.993748173+03:00","level":"INFO","msg":"Stopping system monitor"}
11
+ {"time":"2025-05-04T13:26:16.995793958+03:00","level":"INFO","msg":"Stopped system monitor"}
12
+ {"time":"2025-05-04T13:26:17.198876326+03:00","level":"WARN","msg":"No job ingredients found, not creating job artifact"}
13
+ {"time":"2025-05-04T13:26:17.198909473+03:00","level":"WARN","msg":"No source type found, not creating job artifact"}
14
+ {"time":"2025-05-04T13:26:17.198920913+03:00","level":"INFO","msg":"sender: sendDefer: no job artifact to save"}
15
+ {"time":"2025-05-04T13:26:17.694743818+03:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
16
+ {"time":"2025-05-04T13:26:17.953755664+03:00","level":"INFO","msg":"handler: closed","stream_id":"pxg645u5"}
17
+ {"time":"2025-05-04T13:26:17.953802728+03:00","level":"INFO","msg":"writer: Close: closed","stream_id":"pxg645u5"}
18
+ {"time":"2025-05-04T13:26:17.953828101+03:00","level":"INFO","msg":"sender: closed","stream_id":"pxg645u5"}
19
+ {"time":"2025-05-04T13:26:17.953904675+03:00","level":"INFO","msg":"stream: closed","id":"pxg645u5"}
wandb/run-20250504_132610-pxg645u5/logs/debug.log ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-05-04 13:26:10,046 INFO MainThread:3156976 [wandb_setup.py:_flush():79] Current SDK version is 0.18.7
2
+ 2025-05-04 13:26:10,046 INFO MainThread:3156976 [wandb_setup.py:_flush():79] Configure stats pid to 3156976
3
+ 2025-05-04 13:26:10,046 INFO MainThread:3156976 [wandb_setup.py:_flush():79] Loading settings from /arf/home/zisik/.config/wandb/settings
4
+ 2025-05-04 13:26:10,046 INFO MainThread:3156976 [wandb_setup.py:_flush():79] Loading settings from /arf/scratch/zisik/prott5_bc_ft/wandb/settings
5
+ 2025-05-04 13:26:10,046 INFO MainThread:3156976 [wandb_setup.py:_flush():79] Loading settings from environment variables: {}
6
+ 2025-05-04 13:26:10,046 INFO MainThread:3156976 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': 'finetuning_bc_prott5.py', 'program_abspath': '/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py', 'program': '/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py'}
7
+ 2025-05-04 13:26:10,046 INFO MainThread:3156976 [wandb_setup.py:_flush():79] Applying login settings: {}
8
+ 2025-05-04 13:26:10,046 INFO MainThread:3156976 [wandb_setup.py:_flush():79] Applying login settings: {}
9
+ 2025-05-04 13:26:10,046 INFO MainThread:3156976 [wandb_init.py:_log_setup():533] Logging user logs to /arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_132610-pxg645u5/logs/debug.log
10
+ 2025-05-04 13:26:10,047 INFO MainThread:3156976 [wandb_init.py:_log_setup():534] Logging internal logs to /arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_132610-pxg645u5/logs/debug-internal.log
11
+ 2025-05-04 13:26:10,047 INFO MainThread:3156976 [wandb_init.py:init():619] calling init triggers
12
+ 2025-05-04 13:26:10,047 INFO MainThread:3156976 [wandb_init.py:init():626] wandb.init called with sweep_config: {}
13
+ config: {}
14
+ 2025-05-04 13:26:10,047 INFO MainThread:3156976 [wandb_init.py:init():669] starting backend
15
+ 2025-05-04 13:26:10,047 INFO MainThread:3156976 [wandb_init.py:init():673] sending inform_init request
16
+ 2025-05-04 13:26:10,052 INFO MainThread:3156976 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
17
+ 2025-05-04 13:26:10,053 INFO MainThread:3156976 [wandb_init.py:init():686] backend started and connected
18
+ 2025-05-04 13:26:10,061 INFO MainThread:3156976 [wandb_init.py:init():781] updated telemetry
19
+ 2025-05-04 13:26:10,064 INFO MainThread:3156976 [wandb_init.py:init():814] communicating run to backend with 90.0 second timeout
20
+ 2025-05-04 13:26:10,574 INFO MainThread:3156976 [wandb_init.py:init():867] starting run threads in backend
21
+ 2025-05-04 13:26:12,208 INFO MainThread:3156976 [wandb_run.py:_console_start():2456] atexit reg
22
+ 2025-05-04 13:26:12,209 INFO MainThread:3156976 [wandb_run.py:_redirect():2305] redirect: wrap_raw
23
+ 2025-05-04 13:26:12,209 INFO MainThread:3156976 [wandb_run.py:_redirect():2370] Wrapping output streams.
24
+ 2025-05-04 13:26:12,209 INFO MainThread:3156976 [wandb_run.py:_redirect():2395] Redirects installed.
25
+ 2025-05-04 13:26:12,220 INFO MainThread:3156976 [wandb_init.py:init():911] run started, returning control to user process
26
+ 2025-05-04 13:26:16,995 WARNING MsgRouterThr:3156976 [router.py:message_loop():75] message_loop has been closed
wandb/run-20250504_132610-pxg645u5/run-pxg645u5.wandb ADDED
Binary file (5.5 kB). View file
 
wandb/run-20250504_132912-1agsw1y8/files/config.yaml ADDED
@@ -0,0 +1,374 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _wandb:
2
+ value:
3
+ cli_version: 0.18.7
4
+ m:
5
+ - "1": train/epoch
6
+ "5": 2
7
+ "6":
8
+ - 1
9
+ - 3
10
+ "7": []
11
+ - "1": train/global_step
12
+ "6":
13
+ - 3
14
+ "7": []
15
+ - "1": eval/runtime
16
+ "5": 2
17
+ "6":
18
+ - 1
19
+ - 3
20
+ "7": []
21
+ - "1": train/loss
22
+ "5": 2
23
+ "6":
24
+ - 1
25
+ - 3
26
+ "7": []
27
+ - "1": train/grad_norm
28
+ "5": 2
29
+ "6":
30
+ - 1
31
+ - 3
32
+ "7": []
33
+ - "1": train/learning_rate
34
+ "5": 2
35
+ "6":
36
+ - 1
37
+ - 3
38
+ "7": []
39
+ - "1": eval/loss
40
+ "5": 2
41
+ "6":
42
+ - 1
43
+ - 3
44
+ "7": []
45
+ - "1": eval/samples_per_second
46
+ "5": 2
47
+ "6":
48
+ - 1
49
+ - 3
50
+ "7": []
51
+ - "1": eval/steps_per_second
52
+ "5": 2
53
+ "6":
54
+ - 1
55
+ - 3
56
+ "7": []
57
+ - "1": eval/accuracy
58
+ "5": 2
59
+ "6":
60
+ - 1
61
+ - 3
62
+ "7": []
63
+ python_version: 3.10.15
64
+ t:
65
+ "1":
66
+ - 1
67
+ - 2
68
+ - 3
69
+ - 5
70
+ - 11
71
+ - 12
72
+ - 49
73
+ - 51
74
+ - 53
75
+ - 55
76
+ - 71
77
+ - 98
78
+ - 105
79
+ "2":
80
+ - 1
81
+ - 2
82
+ - 3
83
+ - 5
84
+ - 6
85
+ - 11
86
+ - 12
87
+ - 49
88
+ - 51
89
+ - 53
90
+ - 55
91
+ - 71
92
+ - 98
93
+ - 105
94
+ "3":
95
+ - 7
96
+ - 23
97
+ - 55
98
+ - 66
99
+ "4": 3.10.15
100
+ "5": 0.18.7
101
+ "6": 4.45.2
102
+ "8":
103
+ - 5
104
+ "9":
105
+ "1": transformers_trainer
106
+ "12": 0.18.7
107
+ "13": linux-x86_64
108
+ accelerator_config:
109
+ value:
110
+ dispatch_batches: null
111
+ even_batches: true
112
+ gradient_accumulation_kwargs: null
113
+ non_blocking: false
114
+ split_batches: false
115
+ use_seedable_sampler: true
116
+ adafactor:
117
+ value: false
118
+ adam_beta1:
119
+ value: 0.9
120
+ adam_beta2:
121
+ value: 0.999
122
+ adam_epsilon:
123
+ value: 1e-08
124
+ auto_find_batch_size:
125
+ value: false
126
+ batch_eval_metrics:
127
+ value: false
128
+ bf16:
129
+ value: false
130
+ bf16_full_eval:
131
+ value: false
132
+ data_seed:
133
+ value: null
134
+ dataloader_drop_last:
135
+ value: false
136
+ dataloader_num_workers:
137
+ value: 0
138
+ dataloader_persistent_workers:
139
+ value: false
140
+ dataloader_pin_memory:
141
+ value: true
142
+ dataloader_prefetch_factor:
143
+ value: null
144
+ ddp_backend:
145
+ value: null
146
+ ddp_broadcast_buffers:
147
+ value: null
148
+ ddp_bucket_cap_mb:
149
+ value: null
150
+ ddp_find_unused_parameters:
151
+ value: null
152
+ ddp_timeout:
153
+ value: 1800
154
+ debug:
155
+ value: []
156
+ deepspeed:
157
+ value: null
158
+ disable_tqdm:
159
+ value: false
160
+ dispatch_batches:
161
+ value: null
162
+ do_eval:
163
+ value: true
164
+ do_predict:
165
+ value: false
166
+ do_train:
167
+ value: false
168
+ eval_accumulation_steps:
169
+ value: null
170
+ eval_delay:
171
+ value: 0
172
+ eval_do_concat_batches:
173
+ value: true
174
+ eval_on_start:
175
+ value: false
176
+ eval_steps:
177
+ value: null
178
+ eval_strategy:
179
+ value: epoch
180
+ eval_use_gather_object:
181
+ value: false
182
+ evaluation_strategy:
183
+ value: epoch
184
+ fp16:
185
+ value: true
186
+ fp16_backend:
187
+ value: auto
188
+ fp16_full_eval:
189
+ value: false
190
+ fp16_opt_level:
191
+ value: O1
192
+ fsdp:
193
+ value: []
194
+ fsdp_config:
195
+ value:
196
+ min_num_params: 0
197
+ xla: false
198
+ xla_fsdp_grad_ckpt: false
199
+ xla_fsdp_v2: false
200
+ fsdp_min_num_params:
201
+ value: 0
202
+ fsdp_transformer_layer_cls_to_wrap:
203
+ value: null
204
+ full_determinism:
205
+ value: false
206
+ gradient_accumulation_steps:
207
+ value: 4
208
+ gradient_checkpointing:
209
+ value: false
210
+ gradient_checkpointing_kwargs:
211
+ value: null
212
+ greater_is_better:
213
+ value: false
214
+ group_by_length:
215
+ value: false
216
+ half_precision_backend:
217
+ value: auto
218
+ hub_always_push:
219
+ value: false
220
+ hub_model_id:
221
+ value: null
222
+ hub_private_repo:
223
+ value: false
224
+ hub_strategy:
225
+ value: every_save
226
+ hub_token:
227
+ value: <HUB_TOKEN>
228
+ ignore_data_skip:
229
+ value: false
230
+ include_inputs_for_metrics:
231
+ value: false
232
+ include_num_input_tokens_seen:
233
+ value: false
234
+ include_tokens_per_second:
235
+ value: false
236
+ jit_mode_eval:
237
+ value: false
238
+ label_names:
239
+ value: null
240
+ label_smoothing_factor:
241
+ value: 0
242
+ learning_rate:
243
+ value: 5e-05
244
+ length_column_name:
245
+ value: length
246
+ load_best_model_at_end:
247
+ value: true
248
+ local_rank:
249
+ value: 0
250
+ log_level:
251
+ value: passive
252
+ log_level_replica:
253
+ value: warning
254
+ log_on_each_node:
255
+ value: true
256
+ logging_dir:
257
+ value: t5-bc-out/runs/May04_13-33-08_kolyoz1
258
+ logging_first_step:
259
+ value: false
260
+ logging_nan_inf_filter:
261
+ value: true
262
+ logging_steps:
263
+ value: 500
264
+ logging_strategy:
265
+ value: steps
266
+ lr_scheduler_type:
267
+ value: linear
268
+ max_grad_norm:
269
+ value: 1
270
+ max_steps:
271
+ value: -1
272
+ metric_for_best_model:
273
+ value: loss
274
+ mp_parameters:
275
+ value: ""
276
+ neftune_noise_alpha:
277
+ value: null
278
+ no_cuda:
279
+ value: false
280
+ num_train_epochs:
281
+ value: 3
282
+ optim:
283
+ value: adamw_torch
284
+ optim_args:
285
+ value: null
286
+ optim_target_modules:
287
+ value: null
288
+ output_dir:
289
+ value: t5-bc-out
290
+ overwrite_output_dir:
291
+ value: false
292
+ past_index:
293
+ value: -1
294
+ per_device_eval_batch_size:
295
+ value: 8
296
+ per_device_train_batch_size:
297
+ value: 8
298
+ per_gpu_eval_batch_size:
299
+ value: null
300
+ per_gpu_train_batch_size:
301
+ value: null
302
+ prediction_loss_only:
303
+ value: false
304
+ push_to_hub:
305
+ value: false
306
+ push_to_hub_model_id:
307
+ value: null
308
+ push_to_hub_organization:
309
+ value: null
310
+ push_to_hub_token:
311
+ value: <PUSH_TO_HUB_TOKEN>
312
+ ray_scope:
313
+ value: last
314
+ remove_unused_columns:
315
+ value: true
316
+ report_to:
317
+ value:
318
+ - wandb
319
+ restore_callback_states_from_checkpoint:
320
+ value: false
321
+ resume_from_checkpoint:
322
+ value: null
323
+ run_name:
324
+ value: t5-bc-out
325
+ save_on_each_node:
326
+ value: false
327
+ save_only_model:
328
+ value: false
329
+ save_safetensors:
330
+ value: true
331
+ save_steps:
332
+ value: 500
333
+ save_strategy:
334
+ value: epoch
335
+ save_total_limit:
336
+ value: null
337
+ seed:
338
+ value: 42
339
+ skip_memory_metrics:
340
+ value: true
341
+ split_batches:
342
+ value: null
343
+ tf32:
344
+ value: null
345
+ torch_compile:
346
+ value: false
347
+ torch_compile_backend:
348
+ value: null
349
+ torch_compile_mode:
350
+ value: null
351
+ torch_empty_cache_steps:
352
+ value: null
353
+ torchdynamo:
354
+ value: null
355
+ tpu_metrics_debug:
356
+ value: false
357
+ tpu_num_cores:
358
+ value: null
359
+ use_cpu:
360
+ value: false
361
+ use_ipex:
362
+ value: false
363
+ use_legacy_prediction_loop:
364
+ value: false
365
+ use_liger_kernel:
366
+ value: false
367
+ use_mps_device:
368
+ value: false
369
+ warmup_ratio:
370
+ value: 0
371
+ warmup_steps:
372
+ value: 0
373
+ weight_decay:
374
+ value: 0
wandb/run-20250504_132912-1agsw1y8/files/output.log ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
2
+ Map: 100%|██████████| 511104/511104 [00:20<00:00, 25525.81 examples/s]
3
+ Map: 100%|██████████| 109522/109522 [00:04<00:00, 26956.64 examples/s]
4
+ /arf/home/zisik/.local/lib/python3.10/site-packages/transformers/training_args.py:1545: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead
5
+ warnings.warn(
6
+ [2025-05-04 13:33:14,758] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect)
7
+ wandb: WARNING The `run_name` is currently set to the same value as `TrainingArguments.output_dir`. If this was not intended, please specify a different run name by setting the `TrainingArguments.run_name` parameter.
8
+ 33%|███▎ | 15972/47916 [2:22:01<4:54:49, 1.81it/s]
9
+ {'loss': 0.6947, 'grad_norm': 0.09912440180778503, 'learning_rate': 4.947825361048502e-05, 'epoch': 0.03}
10
+ {'loss': 0.6939, 'grad_norm': 0.23786939680576324, 'learning_rate': 4.8956507220970036e-05, 'epoch': 0.06}
11
+ {'loss': 0.6936, 'grad_norm': 0.10555226355791092, 'learning_rate': 4.843476083145505e-05, 'epoch': 0.09}
12
+ {'loss': 0.6935, 'grad_norm': 0.28058305382728577, 'learning_rate': 4.791301444194006e-05, 'epoch': 0.13}
13
+ {'loss': 0.6937, 'grad_norm': 0.13599741458892822, 'learning_rate': 4.739126805242508e-05, 'epoch': 0.16}
14
+ {'loss': 0.6935, 'grad_norm': 0.13076388835906982, 'learning_rate': 4.6869521662910095e-05, 'epoch': 0.19}
15
+ {'loss': 0.6934, 'grad_norm': 0.1778457760810852, 'learning_rate': 4.634777527339511e-05, 'epoch': 0.22}
16
+ {'loss': 0.6935, 'grad_norm': 0.4112167954444885, 'learning_rate': 4.582602888388012e-05, 'epoch': 0.25}
17
+ {'loss': 0.6934, 'grad_norm': 0.1330016702413559, 'learning_rate': 4.530428249436514e-05, 'epoch': 0.28}
18
+ {'loss': 0.6935, 'grad_norm': 0.09426847100257874, 'learning_rate': 4.478253610485016e-05, 'epoch': 0.31}
19
+ {'loss': 0.6933, 'grad_norm': 0.3686296343803406, 'learning_rate': 4.426078971533517e-05, 'epoch': 0.34}
20
+ {'loss': 0.6933, 'grad_norm': 0.21278153359889984, 'learning_rate': 4.373904332582019e-05, 'epoch': 0.38}
21
+ {'loss': 0.6935, 'grad_norm': 0.23074378073215485, 'learning_rate': 4.321834042908423e-05, 'epoch': 0.41}
22
+ {'loss': 0.6932, 'grad_norm': 0.5192509293556213, 'learning_rate': 4.269659403956925e-05, 'epoch': 0.44}
23
+ {'loss': 0.6932, 'grad_norm': 0.07643919438123703, 'learning_rate': 4.217484765005426e-05, 'epoch': 0.47}
24
+ {'loss': 0.6935, 'grad_norm': 0.09435634315013885, 'learning_rate': 4.1653101260539276e-05, 'epoch': 0.5}
25
+ {'loss': 0.6932, 'grad_norm': 0.3456329107284546, 'learning_rate': 4.113239836380333e-05, 'epoch': 0.53}
26
+ {'loss': 0.6934, 'grad_norm': 0.11689063161611557, 'learning_rate': 4.061065197428834e-05, 'epoch': 0.56}
27
+ {'loss': 0.6934, 'grad_norm': 0.25019219517707825, 'learning_rate': 4.0088905584773355e-05, 'epoch': 0.59}
28
+ {'loss': 0.6933, 'grad_norm': 0.12248441576957703, 'learning_rate': 3.956715919525837e-05, 'epoch': 0.63}
29
+ {'loss': 0.6933, 'grad_norm': 0.11549345403909683, 'learning_rate': 3.9046456298522416e-05, 'epoch': 0.66}
30
+ {'loss': 0.6934, 'grad_norm': 0.27383607625961304, 'learning_rate': 3.852470990900743e-05, 'epoch': 0.69}
31
+ {'loss': 0.6935, 'grad_norm': 0.21311810612678528, 'learning_rate': 3.800296351949245e-05, 'epoch': 0.72}
32
+ {'loss': 0.6933, 'grad_norm': 0.25916823744773865, 'learning_rate': 3.7481217129977466e-05, 'epoch': 0.75}
33
+ {'loss': 0.6934, 'grad_norm': 0.13208124041557312, 'learning_rate': 3.6960514233241504e-05, 'epoch': 0.78}
34
+ {'loss': 0.6934, 'grad_norm': 0.4182877242565155, 'learning_rate': 3.643876784372652e-05, 'epoch': 0.81}
35
+ {'loss': 0.6933, 'grad_norm': 0.19375275075435638, 'learning_rate': 3.5917021454211544e-05, 'epoch': 0.85}
36
+ {'loss': 0.6933, 'grad_norm': 0.1647150218486786, 'learning_rate': 3.5395275064696554e-05, 'epoch': 0.88}
37
+ {'loss': 0.6933, 'grad_norm': 0.458692729473114, 'learning_rate': 3.48745721679606e-05, 'epoch': 0.91}
38
+ {'loss': 0.6933, 'grad_norm': 0.24417555332183838, 'learning_rate': 3.4352825778445616e-05, 'epoch': 0.94}
39
+ {'loss': 0.6932, 'grad_norm': 0.10788150876760483, 'learning_rate': 3.383107938893063e-05, 'epoch': 0.97}
40
+ File "/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py", line 125, in <module>
41
+ {'eval_loss': 0.6931192278862, 'eval_accuracy': 0.4992604225635032, 'eval_runtime': 182.4166, 'eval_samples_per_second': 600.395, 'eval_steps_per_second': 75.053, 'epoch': 1.0}
42
+ trainer.train()
43
+ File "/arf/home/zisik/.local/lib/python3.10/site-packages/transformers/trainer.py", line 2052, in train
44
+ return inner_training_loop(
45
+ File "/arf/home/zisik/.local/lib/python3.10/site-packages/transformers/trainer.py", line 2487, in _inner_training_loop
46
+ self._maybe_log_save_evaluate(tr_loss, grad_norm, model, trial, epoch, ignore_keys_for_eval)
47
+ File "/arf/home/zisik/.local/lib/python3.10/site-packages/transformers/trainer.py", line 2918, in _maybe_log_save_evaluate
48
+ self._save_checkpoint(model, trial, metrics=metrics)
49
+ File "/arf/home/zisik/.local/lib/python3.10/site-packages/transformers/trainer.py", line 3008, in _save_checkpoint
50
+ self.save_model(output_dir, _internal_call=True)
51
+ File "/arf/home/zisik/.local/lib/python3.10/site-packages/transformers/trainer.py", line 3623, in save_model
52
+ self._save(output_dir)
53
+ File "/arf/home/zisik/.local/lib/python3.10/site-packages/transformers/trainer.py", line 3721, in _save
54
+ safetensors.torch.save_file(
55
+ File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/safetensors/torch.py", line 286, in save_file
56
+ serialize_file(_flatten(tensors), filename, metadata=metadata)
57
+ File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/safetensors/torch.py", line 488, in _flatten
58
+ raise RuntimeError(
59
+ RuntimeError:
60
+ Some tensors share memory, this will lead to duplicate memory on disk and potential differences when loading them again: [{'encoder.encoder.embed_tokens.weight', 'encoder.shared.weight'}].
61
+ A potential way to correctly save your model is to use `save_model`.
62
+ More information at https://huggingface.co/docs/safetensors/torch_shared_tensors
63
+
64
+ Traceback (most recent call last):
65
+ File "/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py", line 125, in <module>
66
+ trainer.train()
67
+ File "/arf/home/zisik/.local/lib/python3.10/site-packages/transformers/trainer.py", line 2052, in train
68
+ return inner_training_loop(
69
+ File "/arf/home/zisik/.local/lib/python3.10/site-packages/transformers/trainer.py", line 2487, in _inner_training_loop
70
+ self._maybe_log_save_evaluate(tr_loss, grad_norm, model, trial, epoch, ignore_keys_for_eval)
71
+ File "/arf/home/zisik/.local/lib/python3.10/site-packages/transformers/trainer.py", line 2918, in _maybe_log_save_evaluate
72
+ self._save_checkpoint(model, trial, metrics=metrics)
73
+ File "/arf/home/zisik/.local/lib/python3.10/site-packages/transformers/trainer.py", line 3008, in _save_checkpoint
74
+ self.save_model(output_dir, _internal_call=True)
75
+ File "/arf/home/zisik/.local/lib/python3.10/site-packages/transformers/trainer.py", line 3623, in save_model
76
+ self._save(output_dir)
77
+ File "/arf/home/zisik/.local/lib/python3.10/site-packages/transformers/trainer.py", line 3721, in _save
78
+ safetensors.torch.save_file(
79
+ File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/safetensors/torch.py", line 286, in save_file
80
+ serialize_file(_flatten(tensors), filename, metadata=metadata)
81
+ File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/safetensors/torch.py", line 488, in _flatten
82
+ raise RuntimeError(
83
+ RuntimeError:
84
+ Some tensors share memory, this will lead to duplicate memory on disk and potential differences when loading them again: [{'encoder.encoder.embed_tokens.weight', 'encoder.shared.weight'}].
85
+ A potential way to correctly save your model is to use `save_model`.
86
+ More information at https://huggingface.co/docs/safetensors/torch_shared_tensors
87
+
wandb/run-20250504_132912-1agsw1y8/files/requirements.txt ADDED
@@ -0,0 +1,541 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ nvidia-cuda-cupti-cu12==12.4.127
2
+ nvidia-cuda-nvrtc-cu12==12.4.127
3
+ pyg-lib==0.4.0+pt20cu117
4
+ biopython==1.85
5
+ iniconfig==2.0.0
6
+ tokenizers==0.20.0
7
+ accelerate==1.3.0
8
+ torch==2.6.0
9
+ nvidia-nccl-cu12==2.21.5
10
+ transformers==4.45.2
11
+ nvidia-cusparse-cu12==12.3.1.170
12
+ torch-scatter==2.1.2+pt20cu117
13
+ nvidia-cusparselt-cu12==0.6.2
14
+ nvidia-nvtx-cu12==12.4.127
15
+ zstd==1.5.6.6
16
+ fair-esm==2.0.0
17
+ omegaconf==2.3.0
18
+ pluggy==1.5.0
19
+ pytest==8.3.5
20
+ nvidia-curand-cu12==10.3.5.147
21
+ nvidia-cufft-cu12==11.2.1.3
22
+ torch-cluster==1.6.3+pt20cu117
23
+ regex==2024.9.11
24
+ nvidia-cudnn-cu12==9.1.0.70
25
+ torch-spline-conv==1.2.2+pt20cu117
26
+ nvidia-cusolver-cu12==11.6.1.9
27
+ antlr4-python3-runtime==4.9.3
28
+ msgpack-numpy==0.4.8
29
+ nlp==0.2.0
30
+ einops==0.8.1
31
+ nvidia-cublas-cu12==12.4.5.8
32
+ triton==3.2.0
33
+ ninja==1.11.1.3
34
+ hydra-core==1.3.2
35
+ nvidia-nvjitlink-cu12==12.4.127
36
+ biotite==0.41.2
37
+ torch-sparse==0.6.18+pt20cu117
38
+ esm==3.1.4
39
+ sympy==1.13.1
40
+ nvidia-cuda-runtime-cu12==12.4.127
41
+ jupyter-lsp==2.2.5
42
+ jupyter-events==0.10.0
43
+ ipykernel==6.29.5
44
+ Mako==1.3.5
45
+ proto-plus==1.25.0
46
+ fst-pso==1.8.1
47
+ gensim==4.3.3
48
+ htmlmin==0.1.12
49
+ tokenizers==0.13.3
50
+ timm==1.0.11
51
+ MarkupSafe==3.0.2
52
+ safetensors==0.4.5
53
+ requests==2.32.3
54
+ gast==0.5.5
55
+ cuml==24.12.0a33
56
+ jaxlib==0.4.23.dev20240214
57
+ spacy-loggers==1.0.5
58
+ pytz==2024.1
59
+ idna==3.10
60
+ python-dateutil==2.9.0
61
+ mdurl==0.1.2
62
+ blis==0.7.10
63
+ jupyter==1.1.1
64
+ pyerfa==2.0.1.5
65
+ comm==0.2.2
66
+ pygraphviz==1.14
67
+ dill==0.3.8
68
+ paramiko==3.5.0
69
+ llama-index==0.8.36
70
+ mdit-py-plugins==0.4.2
71
+ Werkzeug==3.1.3
72
+ pyu2f==0.1.5
73
+ dask-glm==0.2.0
74
+ httpx==0.27.2
75
+ typeguard==4.4.1
76
+ mypy-extensions==1.0.0
77
+ kmodes==0.12.2
78
+ keras==2.15.0
79
+ ydata-profiling==0.0.dev0
80
+ regex==2024.11.6
81
+ xarray==2024.11.0
82
+ setuptools==75.3.0
83
+ charset-normalizer==3.4.0
84
+ jupyterlab_nvdashboard==0.11.0
85
+ pylibraft==24.12.0a36
86
+ spacy==3.7.6
87
+ mlflow-skinny==2.17.2
88
+ nvtx==0.2.10
89
+ multimethod==1.12
90
+ pexpect==4.9.0
91
+ torch==2.1.0.post301
92
+ flatbuffers==24.3.25
93
+ python-json-logger==2.0.7
94
+ PyJWT==2.9.0
95
+ multiprocess==0.70.16
96
+ colorlover==0.3.0
97
+ yarl==1.16.0
98
+ locket==1.0.0
99
+ patsy==1.0.0
100
+ rapids-dask-dependency==24.12.0a0
101
+ stanza==1.9.2
102
+ debugpy==1.8.8
103
+ jupyterlab_pygments==0.3.0
104
+ pylibcudf==24.12.0a337
105
+ lz4==4.3.3
106
+ pandas==2.2.3
107
+ tifffile==2024.9.20
108
+ pynvml==11.4.1
109
+ cufflinks==0.17.3
110
+ ipywidgets==8.1.5
111
+ requests-oauthlib==2.0.0
112
+ google-auth-oauthlib==1.2.1
113
+ rsa==4.9
114
+ webcolors==24.8.0
115
+ jsonschema-specifications==2024.10.1
116
+ scikit-learn==1.5.2
117
+ langchain-text-splitters==0.3.2
118
+ pandas-datareader==0.10.0
119
+ tomli==2.0.2
120
+ tzdata==2024.2
121
+ scikit-image==0.24.0
122
+ tensorboard_data_server==0.7.0
123
+ kiwisolver==1.4.7
124
+ cloudpathlib==0.20.0
125
+ isodate==0.6.1
126
+ adversarial-robustness-toolbox==1.19.1
127
+ SQLAlchemy==2.0.36
128
+ pytest-runner==6.0.0
129
+ pycairo==1.27.0
130
+ treelite==4.3.0
131
+ jiter==0.7.0
132
+ threadpoolctl==3.5.0
133
+ pandocfilters==1.5.0
134
+ loguru==0.7.2
135
+ smart_open==7.0.5
136
+ shellingham==1.5.4
137
+ deepspeed==0.15.4
138
+ prompt_toolkit==3.0.48
139
+ databricks-sdk==0.34.0
140
+ langchain-core==0.3.15
141
+ imageio==2.36.0
142
+ openapi-schema-pydantic==1.2.4
143
+ zict==3.0.0
144
+ cachetools==5.5.0
145
+ colorful==0.5.6
146
+ mpmath==1.3.0
147
+ nest_asyncio==1.6.0
148
+ pyFUME==0.2.25
149
+ opencv-python-headless==4.9.0
150
+ fastai==2.7.18
151
+ importlib_resources==6.4.5
152
+ binaryornot==0.4.4
153
+ evaluate==0.4.1
154
+ matplotlib-inline==0.1.7
155
+ wasabi==1.1.2
156
+ pycparser==2.22
157
+ GitPython==3.1.43
158
+ pluggy==1.5.0
159
+ async-lru==2.0.4
160
+ pgmpy==0.1.24
161
+ anyio==4.4.0
162
+ executing==2.1.0
163
+ orjson==3.10.11
164
+ humanfriendly==10.0
165
+ tornado==6.4.1
166
+ gmpy2==2.1.5
167
+ rlPyCairo==0.2.0
168
+ distributed==2024.11.0
169
+ FuzzyTM==2.0.5
170
+ torchtext==0.15.2a0+5ce3163
171
+ pytest==8.3.5
172
+ pyod==2.0.2
173
+ ImageHash==4.3.1
174
+ soupsieve==2.5
175
+ tblib==3.0.0
176
+ emoji==2.14.0
177
+ aiohappyeyeballs==2.4.3
178
+ uri-template==1.3.0
179
+ tensorflow_estimator==2.15.0
180
+ babel==2.16.0
181
+ dask-cuda==24.12.0a12
182
+ overrides==7.7.0
183
+ opencensus==0.11.3
184
+ openai==0.28.1
185
+ language_data==1.2.0
186
+ jedi==0.19.2
187
+ cookiecutter==2.6.0
188
+ entrypoints==0.4
189
+ exceptiongroup==1.2.2
190
+ marisa-trie==1.2.0
191
+ uvloop==0.20.0
192
+ aiosignal==1.3.1
193
+ Flask==3.0.3
194
+ tensorboard==2.15.2
195
+ cffi==1.17.1
196
+ tf_keras==2.15.0
197
+ absl-py==2.1.0
198
+ blinker==1.9.0
199
+ types-python-dateutil==2.9.0.20241003
200
+ opencv-python==4.9.0
201
+ frozendict==2.4.6
202
+ aiohttp-cors==0.7.0
203
+ statsmodels==0.14.4
204
+ tinycss2==1.4.0
205
+ terminado==0.18.1
206
+ pycaret==2.2.3
207
+ aiohttp==3.10.10
208
+ distributed-ucxx==0.41.0
209
+ prometheus_client==0.21.0
210
+ fastdownload==0.0.7
211
+ grpcio==1.59.3
212
+ google-api-core==2.22.0
213
+ jupyterlab_widgets==3.0.13
214
+ appdirs==1.4.4
215
+ littleutils==0.0.0
216
+ ray==2.24.0
217
+ kaggle==1.6.17
218
+ jsonschema==4.23.0
219
+ google-auth==2.36.0
220
+ scikit-base==0.11.0
221
+ visions==0.7.6
222
+ pyarrow==15.0.0
223
+ transformers==4.33.0
224
+ prometheus_flask_exporter==0.23.1
225
+ dm-tree==0.1.8
226
+ colorama==0.4.6
227
+ requests-toolbelt==1.0.0
228
+ cached-property==1.5.2
229
+ cymem==2.0.8
230
+ PyNaCl==1.5.0
231
+ PyWavelets==1.7.0
232
+ httptools==0.6.1
233
+ typing-utils==0.1.0
234
+ email_validator==2.2.0
235
+ marshmallow==3.23.1
236
+ Deprecated==1.2.14
237
+ virtualenv==20.4.7
238
+ optuna==3.6.1
239
+ jupyter_server==2.14.2
240
+ termcolor==2.5.0
241
+ mpi4py==4.0.1
242
+ torchdata==0.7.1+8cea82f
243
+ dataclasses==0.8
244
+ cloudpickle==3.1.0
245
+ tree_sitter_languages==1.10.2
246
+ tabulate==0.9.0
247
+ ipython==8.29.0
248
+ lightgbm==4.3.0
249
+ captum==0.6.0
250
+ confuse==2.0.1
251
+ torchvision==0.16.1+adc3221
252
+ lxml==4.9.4
253
+ fastapi==0.115.4
254
+ python-multipart==0.0.17
255
+ dnspython==2.7.0
256
+ jupyter-console==6.6.3
257
+ preshed==3.0.9
258
+ py-cpuinfo==9.0.0
259
+ Send2Trash==1.8.3
260
+ murmurhash==1.0.10
261
+ sniffio==1.3.1
262
+ websockets==13.1
263
+ h11==0.14.0
264
+ smmap==5.0.0
265
+ textual==0.85.2
266
+ jsonpatch==1.33
267
+ opencensus-context==0.1.3
268
+ nbconvert==7.16.4
269
+ sentry-sdk==2.19.0
270
+ opentelemetry-semantic-conventions==0.37b0
271
+ pandas-profiling==2.8.0
272
+ pillow==10.3.0
273
+ peft==0.13.2
274
+ rpds-py==0.21.0
275
+ bokeh==3.6.1
276
+ distro==1.9.0
277
+ itsdangerous==2.2.0
278
+ wandb==0.18.7
279
+ jsonpointer==3.0.0
280
+ astropy-iers-data==0.2024.11.11.0.32.38
281
+ horovod==0.28.1
282
+ graphviz==0.20.3
283
+ vtk==9.3.1
284
+ bleach==6.2.0
285
+ numexpr==2.8.7
286
+ pydantic_core==2.23.4
287
+ Jinja2==3.1.4
288
+ widgetsnbextension==4.0.13
289
+ filelock==3.16.1
290
+ catboost==1.2.7
291
+ raft-dask==24.12.0a36
292
+ async-timeout==4.0.3
293
+ datefinder==0.7.3
294
+ coloredlogs==15.0.1
295
+ platformdirs==4.3.6
296
+ spacy-legacy==3.0.12
297
+ chardet==5.2.0
298
+ jupyter_client==8.6.3
299
+ importlib_metadata==8.5.0
300
+ rfc3986-validator==0.1.1
301
+ huggingface_hub==0.26.2
302
+ PySocks==1.7.1
303
+ mlxtend==0.23.2
304
+ outdated==0.2.2
305
+ partd==1.4.2
306
+ thinc==8.2.5
307
+ astropy==6.1.6
308
+ rdflib==6.3.2
309
+ h2==4.1.0
310
+ typer==0.13.0
311
+ xyzservices==2024.9.0
312
+ toolz==0.12.1
313
+ frozenlist==1.5.0
314
+ rdkit==2024.9.2
315
+ pyasn1==0.6.1
316
+ jupyter_server_terminals==0.5.3
317
+ ucx-py==0.41.0a11
318
+ astunparse==1.6.3
319
+ simpful==2.12.0
320
+ notebook_shim==0.2.4
321
+ scipy==1.13.1
322
+ colorlog==6.9.0
323
+ tiktoken==0.3.3
324
+ plotly==5.24.1
325
+ fastrlock==0.8.2
326
+ chart-studio==1.1.0
327
+ stack-data==0.6.2
328
+ google-pasta==0.2.0
329
+ sktime==0.34.0
330
+ PyYAML==6.0.2
331
+ sympy==1.13.3
332
+ multidict==6.1.0
333
+ ml-dtypes==0.2.0
334
+ tensorboardX==2.6.2.2
335
+ decorator==5.1.1
336
+ cytoolz==1.0.0
337
+ ase==3.23.0
338
+ isoduration==20.11.0
339
+ html5lib==1.1
340
+ langsmith==0.1.142
341
+ future==1.0.0
342
+ onnx2torch==1.5.15
343
+ multipledispatch==0.6.0
344
+ protobuf==4.24.4
345
+ ucxx==0.41.0
346
+ pandas_flavor==0.6.0
347
+ msgpack==1.1.0
348
+ pyasn1_modules==0.4.1
349
+ imagecodecs==2024.1.1
350
+ mlflow==2.17.2
351
+ watchfiles==0.24.0
352
+ dm-sonnet==2.0.2
353
+ langcodes==3.4.1
354
+ freetype-py==2.3.0
355
+ argon2-cffi-bindings==21.2.0
356
+ trimesh==4.5.2
357
+ opt_einsum==3.4.0
358
+ tenacity==8.5.0
359
+ h5py==3.12.1
360
+ fastapi-cli==0.0.5
361
+ oauthlib==3.2.2
362
+ parso==0.8.4
363
+ weasel==0.4.1
364
+ yfinance==0.2.49
365
+ networkx==2.8.8
366
+ bitsandbytes==0.44.1
367
+ lazy_loader==0.4
368
+ querystring_parser==1.2.4
369
+ contourpy==1.3.0
370
+ unicodedata2==15.1.0
371
+ bcrypt==4.2.0
372
+ munkres==1.1.4
373
+ langchain==0.0.298
374
+ hpack==4.0.0
375
+ cryptography==43.0.3
376
+ umap-learn==0.5.7
377
+ arrow==1.3.0
378
+ docker==7.1.0
379
+ certifi==2025.1.31
380
+ fastjsonschema==2.20.0
381
+ tensorflow==2.15.0
382
+ googleapis-common-protos==1.65.0
383
+ iniconfig==2.0.0
384
+ Markdown==3.6
385
+ llvmlite==0.43.0
386
+ wslink==2.3.2
387
+ attrs==24.2.0
388
+ rich==13.9.4
389
+ cupy==13.3.0
390
+ uc-micro-py==1.0.3
391
+ alembic==1.14.0
392
+ joblib==1.4.2
393
+ reportlab==4.2.5
394
+ miniful==0.0.6
395
+ jupyter_core==5.7.2
396
+ wheel==0.45.0
397
+ phik==0.12.3
398
+ mistune==3.0.2
399
+ wcwidth==0.2.13
400
+ dacite==1.8.1
401
+ accelerate==0.22.0
402
+ sacremoses==0.0.53
403
+ revtok==0.0.3
404
+ python-slugify==8.0.4
405
+ tangled-up-in-unicode==0.2.0
406
+ dask==2024.11.0
407
+ markdown-it-py==3.0.0
408
+ sentencepiece==0.1.99
409
+ beautifulsoup4==4.12.3
410
+ six==1.16.0
411
+ numba-cuda==0.0.17
412
+ argon2-cffi==23.1.0
413
+ xxhash==3.5.0
414
+ hjson==3.1.0
415
+ fonttools==4.54.1
416
+ graphql-core==3.2.5
417
+ pyparsing==3.2.0
418
+ pure_eval==0.2.3
419
+ distlib==0.3.9
420
+ lightning==2.4.0
421
+ wordcloud==0.0.0
422
+ catalogue==2.0.10
423
+ jax==0.4.27
424
+ tree-sitter==0.23.2
425
+ notebook==7.2.2
426
+ dataclasses-json==0.6.7
427
+ propcache==0.2.0
428
+ numba==0.60.0
429
+ dask-expr==1.1.17
430
+ pydantic==2.9.2
431
+ gunicorn==22.0.0
432
+ missingno==0.5.2
433
+ pyOpenSSL==24.2.1
434
+ openpyxl==3.1.5
435
+ packaging==24.1
436
+ python-dotenv==1.0.1
437
+ cycler==0.12.1
438
+ types-pytz==2024.2.0.20241003
439
+ yellowbrick==1.5
440
+ referencing==0.35.1
441
+ pyLDAvis==3.4.1
442
+ lazypredict==0.2.16
443
+ fqdn==1.5.1
444
+ websocket-client==1.8.0
445
+ fastcore==1.7.19
446
+ pynvjitlink-cu12==0.3.0
447
+ pingouin==0.5.5
448
+ numpy==1.26.4
449
+ typing-inspect==0.9.0
450
+ nltk==3.9.1
451
+ onnxruntime==1.19.2
452
+ tensorflow-probability==0.23.0
453
+ datasets==3.0.2
454
+ pickleshare==0.7.5
455
+ peewee==3.17.7
456
+ torch-geometric==2.6.1
457
+ ptyprocess==0.7.0
458
+ greenlet==3.1.1
459
+ graphql-relay==3.2.0
460
+ graphene==3.4.3
461
+ et_xmlfile==2.0.0
462
+ webencodings==0.5.1
463
+ hyperframe==6.0.1
464
+ multitasking==0.0.9
465
+ typer-slim==0.13.0
466
+ onnx==1.15.0
467
+ uvicorn==0.32.0
468
+ memray==1.13.4
469
+ xgboost==2.1.2
470
+ Brotli==1.1.0
471
+ zipp==3.21.0
472
+ nbformat==5.10.4
473
+ responses==0.18.0
474
+ funcy==2.0
475
+ Pygments==2.18.0
476
+ tqdm==4.67.0
477
+ linkify-it-py==2.0.3
478
+ srsly==2.4.8
479
+ cuda-python==12.6.0
480
+ lightning-utilities==0.11.8
481
+ cudf==24.12.0a337
482
+ dask-ml==2024.4.4
483
+ docker-pycreds==0.4.0
484
+ pkgutil_resolve_name==1.3.10
485
+ opentelemetry-api==1.16.0
486
+ fsspec==2024.9.0
487
+ nbclient==0.10.0
488
+ psutil==5.9.8
489
+ pytorch-lightning==2.4.0
490
+ sortedcontainers==2.4.0
491
+ matplotlib==3.9.2
492
+ defusedxml==0.7.1
493
+ urllib3==1.26.19
494
+ jupyterlab_server==2.27.3
495
+ retrying==1.3.3
496
+ dask-cudf==24.12.0a337
497
+ sqlparse==0.5.1
498
+ text-unidecode==1.3
499
+ seaborn==0.13.2
500
+ typing_extensions==4.12.2
501
+ pyzmq==26.2.0
502
+ rfc3339-validator==0.1.4
503
+ pynndescent==0.5.13
504
+ pip==24.3.1
505
+ confection==0.1.4
506
+ wrapt==1.14.1
507
+ fastprogress==1.0.3
508
+ traitlets==5.14.3
509
+ asttokens==2.4.1
510
+ json5==0.9.28
511
+ pandas-stubs==2.2.3.241126
512
+ torchmetrics==1.2.1
513
+ gitdb==4.0.11
514
+ annotated-types==0.7.0
515
+ ipython-autotime==0.1
516
+ httpcore==1.0.6
517
+ click==8.1.7
518
+ setproctitle==1.3.3
519
+ starlette==0.41.2
520
+ jupyterlab==4.2.5
521
+ rmm==24.12.0a27
522
+ opentelemetry-sdk==1.16.0
523
+ textblob==0.15.3
524
+ imbalanced-learn==0.12.4
525
+ typeguard==4.3.0
526
+ more-itertools==10.3.0
527
+ zipp==3.19.2
528
+ autocommand==2.2.2
529
+ jaraco.context==5.3.0
530
+ packaging==24.1
531
+ importlib_metadata==8.0.0
532
+ platformdirs==4.2.2
533
+ jaraco.functools==4.0.1
534
+ importlib_resources==6.4.0
535
+ tomli==2.0.1
536
+ jaraco.text==3.12.1
537
+ wheel==0.43.0
538
+ jaraco.collections==5.1.0
539
+ typing_extensions==4.12.2
540
+ inflect==7.3.1
541
+ backports.tarfile==1.2.0
wandb/run-20250504_132912-1agsw1y8/files/wandb-metadata.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.14.0-427.13.1.el9_4.x86_64-x86_64-with-glibc2.34",
3
+ "python": "3.10.15",
4
+ "startedAt": "2025-05-04T10:29:13.019628Z",
5
+ "program": "/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py",
6
+ "codePath": "finetuning_bc_prott5.py",
7
+ "email": "zeynep.isik1@sabanciuniv.edu",
8
+ "root": "/arf/scratch/zisik/prott5_bc_ft",
9
+ "host": "kolyoz1",
10
+ "username": "zisik",
11
+ "executable": "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/bin/python3",
12
+ "codePathLocal": "finetuning_bc_prott5.py",
13
+ "cpu_count": 64,
14
+ "cpu_count_logical": 64,
15
+ "gpu": "NVIDIA H100 80GB HBM3",
16
+ "gpu_count": 1,
17
+ "disk": {
18
+ "/": {
19
+ "total": "7643995308032",
20
+ "used": "274768302080"
21
+ }
22
+ },
23
+ "memory": {
24
+ "total": "1081373220864"
25
+ },
26
+ "cpu": {
27
+ "count": 64,
28
+ "countLogical": 64
29
+ },
30
+ "gpu_nvidia": [
31
+ {
32
+ "name": "NVIDIA H100 80GB HBM3",
33
+ "memoryTotal": "85520809984",
34
+ "cudaCores": 16896,
35
+ "architecture": "Hopper"
36
+ }
37
+ ],
38
+ "slurm": {
39
+ "cluster_name": "cuda",
40
+ "conf": "/etc/slurm/slurm.conf",
41
+ "cpus_on_node": "16",
42
+ "cpus_per_task": "16",
43
+ "gpus_on_node": "1",
44
+ "gtids": "0",
45
+ "job_account": "tbag154",
46
+ "job_cpus_per_node": "16",
47
+ "job_end_time": "1746613727",
48
+ "job_gid": "11636",
49
+ "job_gpus": "1",
50
+ "job_id": "1027934",
51
+ "job_name": "msa_ph_pt",
52
+ "job_nodelist": "kolyoz1",
53
+ "job_num_nodes": "1",
54
+ "job_partition": "kolyoz-cuda",
55
+ "job_qos": "tbag",
56
+ "job_start_time": "1746354527",
57
+ "job_uid": "11636",
58
+ "job_user": "zisik",
59
+ "jobid": "1027934",
60
+ "localid": "0",
61
+ "mem_per_cpu": "14000",
62
+ "nnodes": "1",
63
+ "node_aliases": "(null)",
64
+ "nodeid": "0",
65
+ "nodelist": "kolyoz1",
66
+ "prio_process": "0",
67
+ "procid": "0",
68
+ "submit_dir": "/arf/scratch/zisik",
69
+ "submit_host": "cuda-ui",
70
+ "task_pid": "3157550",
71
+ "tasks_per_node": "1",
72
+ "topology_addr": "kolyoz1",
73
+ "topology_addr_pattern": "node",
74
+ "working_cluster": "cuda:slurmcontroller3.ib:6800:9984:109"
75
+ },
76
+ "cudaVersion": "12.6"
77
+ }
wandb/run-20250504_132912-1agsw1y8/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"train/learning_rate":3.383107938893063e-05,"train/global_step":15972,"eval/steps_per_second":75.053,"_timestamp":1.7463635035359182e+09,"eval/accuracy":0.4992604225635032,"_step":31,"eval/loss":0.6931192278862,"train/grad_norm":0.10788150876760483,"train/epoch":1,"_wandb":{"runtime":8950},"_runtime":8950.516897928,"train/loss":0.6932,"eval/runtime":182.4166,"eval/samples_per_second":600.395}
wandb/run-20250504_132912-1agsw1y8/logs/debug-core.log ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-05-04T13:29:12.35887463+03:00","level":"INFO","msg":"started logging, with flags","port-filename":"/tmp/tmp1u83hfoi/port-3157577.txt","pid":3157577,"debug":false,"disable-analytics":false}
2
+ {"time":"2025-05-04T13:29:12.358923345+03:00","level":"INFO","msg":"FeatureState","shutdownOnParentExitEnabled":false}
3
+ {"time":"2025-05-04T13:29:12.35977753+03:00","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":45947,"Zone":""}}
4
+ {"time":"2025-05-04T13:29:12.359879073+03:00","level":"INFO","msg":"Will exit if parent process dies.","ppid":3157577}
5
+ {"time":"2025-05-04T13:29:12.546636547+03:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:34718"}
6
+ {"time":"2025-05-04T13:29:13.02161239+03:00","level":"INFO","msg":"handleInformInit: received","streamId":"1agsw1y8","id":"127.0.0.1:34718"}
7
+ {"time":"2025-05-04T13:29:13.145638422+03:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"1agsw1y8","id":"127.0.0.1:34718"}
8
+ {"time":"2025-05-04T15:58:23.607250248+03:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:34718"}
9
+ {"time":"2025-05-04T15:58:23.607435128+03:00","level":"INFO","msg":"server is shutting down"}
10
+ {"time":"2025-05-04T15:58:23.607401252+03:00","level":"INFO","msg":"connection: Close: initiating connection closure","id":"127.0.0.1:34718"}
11
+ {"time":"2025-05-04T15:58:23.607720003+03:00","level":"INFO","msg":"connection: Close: connection successfully closed","id":"127.0.0.1:34718"}
12
+ {"time":"2025-05-04T15:58:24.801882716+03:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:34718"}
13
+ {"time":"2025-05-04T15:58:24.801915389+03:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:34718"}
14
+ {"time":"2025-05-04T15:58:24.801937893+03:00","level":"INFO","msg":"server is closed"}
wandb/run-20250504_132912-1agsw1y8/logs/debug-internal.log ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-05-04T13:29:13.023253759+03:00","level":"INFO","msg":"using version","core version":"0.18.7"}
2
+ {"time":"2025-05-04T13:29:13.023302807+03:00","level":"INFO","msg":"created symlink","path":"/arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_132912-1agsw1y8/logs/debug-core.log"}
3
+ {"time":"2025-05-04T13:29:13.145570529+03:00","level":"INFO","msg":"created new stream","id":"1agsw1y8"}
4
+ {"time":"2025-05-04T13:29:13.145625833+03:00","level":"INFO","msg":"stream: started","id":"1agsw1y8"}
5
+ {"time":"2025-05-04T13:29:13.145806528+03:00","level":"INFO","msg":"writer: Do: started","stream_id":"1agsw1y8"}
6
+ {"time":"2025-05-04T13:29:13.145923955+03:00","level":"INFO","msg":"handler: started","stream_id":"1agsw1y8"}
7
+ {"time":"2025-05-04T13:29:13.146011145+03:00","level":"INFO","msg":"sender: started","stream_id":"1agsw1y8"}
8
+ {"time":"2025-05-04T13:29:13.51656923+03:00","level":"INFO","msg":"Starting system monitor"}
9
+ {"time":"2025-05-04T15:58:23.607363166+03:00","level":"INFO","msg":"stream: closing","id":"1agsw1y8"}
10
+ {"time":"2025-05-04T15:58:23.607412721+03:00","level":"INFO","msg":"Stopping system monitor"}
11
+ {"time":"2025-05-04T15:58:23.608736938+03:00","level":"INFO","msg":"Stopped system monitor"}
12
+ {"time":"2025-05-04T15:58:23.995834762+03:00","level":"WARN","msg":"No job ingredients found, not creating job artifact"}
13
+ {"time":"2025-05-04T15:58:23.995863601+03:00","level":"WARN","msg":"No source type found, not creating job artifact"}
14
+ {"time":"2025-05-04T15:58:23.995874256+03:00","level":"INFO","msg":"sender: sendDefer: no job artifact to save"}
15
+ {"time":"2025-05-04T15:58:24.53730388+03:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
16
+ {"time":"2025-05-04T15:58:24.801427373+03:00","level":"INFO","msg":"handler: closed","stream_id":"1agsw1y8"}
17
+ {"time":"2025-05-04T15:58:24.801476891+03:00","level":"INFO","msg":"writer: Close: closed","stream_id":"1agsw1y8"}
18
+ {"time":"2025-05-04T15:58:24.801525233+03:00","level":"INFO","msg":"sender: closed","stream_id":"1agsw1y8"}
19
+ {"time":"2025-05-04T15:58:24.801589463+03:00","level":"INFO","msg":"stream: closed","id":"1agsw1y8"}
wandb/run-20250504_132912-1agsw1y8/logs/debug.log ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-05-04 13:29:13,013 INFO MainThread:3157577 [wandb_setup.py:_flush():79] Current SDK version is 0.18.7
2
+ 2025-05-04 13:29:13,013 INFO MainThread:3157577 [wandb_setup.py:_flush():79] Configure stats pid to 3157577
3
+ 2025-05-04 13:29:13,013 INFO MainThread:3157577 [wandb_setup.py:_flush():79] Loading settings from /arf/home/zisik/.config/wandb/settings
4
+ 2025-05-04 13:29:13,013 INFO MainThread:3157577 [wandb_setup.py:_flush():79] Loading settings from /arf/scratch/zisik/prott5_bc_ft/wandb/settings
5
+ 2025-05-04 13:29:13,013 INFO MainThread:3157577 [wandb_setup.py:_flush():79] Loading settings from environment variables: {}
6
+ 2025-05-04 13:29:13,013 INFO MainThread:3157577 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': 'finetuning_bc_prott5.py', 'program_abspath': '/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py', 'program': '/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py'}
7
+ 2025-05-04 13:29:13,013 INFO MainThread:3157577 [wandb_setup.py:_flush():79] Applying login settings: {}
8
+ 2025-05-04 13:29:13,013 INFO MainThread:3157577 [wandb_setup.py:_flush():79] Applying login settings: {}
9
+ 2025-05-04 13:29:13,013 INFO MainThread:3157577 [wandb_init.py:_log_setup():533] Logging user logs to /arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_132912-1agsw1y8/logs/debug.log
10
+ 2025-05-04 13:29:13,014 INFO MainThread:3157577 [wandb_init.py:_log_setup():534] Logging internal logs to /arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_132912-1agsw1y8/logs/debug-internal.log
11
+ 2025-05-04 13:29:13,014 INFO MainThread:3157577 [wandb_init.py:init():619] calling init triggers
12
+ 2025-05-04 13:29:13,014 INFO MainThread:3157577 [wandb_init.py:init():626] wandb.init called with sweep_config: {}
13
+ config: {}
14
+ 2025-05-04 13:29:13,014 INFO MainThread:3157577 [wandb_init.py:init():669] starting backend
15
+ 2025-05-04 13:29:13,014 INFO MainThread:3157577 [wandb_init.py:init():673] sending inform_init request
16
+ 2025-05-04 13:29:13,018 INFO MainThread:3157577 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
17
+ 2025-05-04 13:29:13,019 INFO MainThread:3157577 [wandb_init.py:init():686] backend started and connected
18
+ 2025-05-04 13:29:13,026 INFO MainThread:3157577 [wandb_init.py:init():781] updated telemetry
19
+ 2025-05-04 13:29:13,030 INFO MainThread:3157577 [wandb_init.py:init():814] communicating run to backend with 90.0 second timeout
20
+ 2025-05-04 13:29:13,503 INFO MainThread:3157577 [wandb_init.py:init():867] starting run threads in backend
21
+ 2025-05-04 13:29:14,946 INFO MainThread:3157577 [wandb_run.py:_console_start():2456] atexit reg
22
+ 2025-05-04 13:29:14,946 INFO MainThread:3157577 [wandb_run.py:_redirect():2305] redirect: wrap_raw
23
+ 2025-05-04 13:29:14,946 INFO MainThread:3157577 [wandb_run.py:_redirect():2370] Wrapping output streams.
24
+ 2025-05-04 13:29:14,946 INFO MainThread:3157577 [wandb_run.py:_redirect():2395] Redirects installed.
25
+ 2025-05-04 13:29:14,954 INFO MainThread:3157577 [wandb_init.py:init():911] run started, returning control to user process
26
+ 2025-05-04 13:33:19,417 INFO MainThread:3157577 [wandb_run.py:_config_callback():1387] config_cb None None {'output_dir': 't5-bc-out', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': 't5-bc-out/runs/May04_13-33-08_kolyoz1', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 't5-bc-out', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'loss', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'epoch', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False}
27
+ 2025-05-04 15:58:23,607 WARNING MsgRouterThr:3157577 [router.py:message_loop():75] message_loop has been closed
wandb/run-20250504_132912-1agsw1y8/run-1agsw1y8.wandb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71cf2569d2e508f45833ce35b1904bcc5325f9369eef0a76ea074fad88d8621d
3
+ size 5615901
wandb/run-20250504_160615-f65jh2lv/files/output.log ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
2
+ Map: 100%|██████████| 511104/511104 [00:20<00:00, 25304.42 examples/s]
3
+ Map: 100%|██████████| 109522/109522 [00:02<00:00, 36704.44 examples/s]
4
+ /arf/home/zisik/.local/lib/python3.10/site-packages/transformers/training_args.py:1545: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead
5
+ warnings.warn(
6
+ [2025-05-04 16:06:52,248] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect)
7
+ wandb: WARNING The `run_name` is currently set to the same value as `TrainingArguments.output_dir`. If this was not intended, please specify a different run name by setting the `TrainingArguments.run_name` parameter.
8
+ 1%| | 246/47916 [02:12<7:08:44, 1.85it/s]
wandb/run-20250504_160615-f65jh2lv/files/requirements.txt ADDED
@@ -0,0 +1,541 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ nvidia-cuda-cupti-cu12==12.4.127
2
+ nvidia-cuda-nvrtc-cu12==12.4.127
3
+ pyg-lib==0.4.0+pt20cu117
4
+ biopython==1.85
5
+ iniconfig==2.0.0
6
+ tokenizers==0.20.0
7
+ accelerate==1.3.0
8
+ torch==2.6.0
9
+ nvidia-nccl-cu12==2.21.5
10
+ transformers==4.45.2
11
+ nvidia-cusparse-cu12==12.3.1.170
12
+ torch-scatter==2.1.2+pt20cu117
13
+ nvidia-cusparselt-cu12==0.6.2
14
+ nvidia-nvtx-cu12==12.4.127
15
+ zstd==1.5.6.6
16
+ fair-esm==2.0.0
17
+ omegaconf==2.3.0
18
+ pluggy==1.5.0
19
+ pytest==8.3.5
20
+ nvidia-curand-cu12==10.3.5.147
21
+ nvidia-cufft-cu12==11.2.1.3
22
+ torch-cluster==1.6.3+pt20cu117
23
+ regex==2024.9.11
24
+ nvidia-cudnn-cu12==9.1.0.70
25
+ torch-spline-conv==1.2.2+pt20cu117
26
+ nvidia-cusolver-cu12==11.6.1.9
27
+ antlr4-python3-runtime==4.9.3
28
+ msgpack-numpy==0.4.8
29
+ nlp==0.2.0
30
+ einops==0.8.1
31
+ nvidia-cublas-cu12==12.4.5.8
32
+ triton==3.2.0
33
+ ninja==1.11.1.3
34
+ hydra-core==1.3.2
35
+ nvidia-nvjitlink-cu12==12.4.127
36
+ biotite==0.41.2
37
+ torch-sparse==0.6.18+pt20cu117
38
+ esm==3.1.4
39
+ sympy==1.13.1
40
+ nvidia-cuda-runtime-cu12==12.4.127
41
+ jupyter-lsp==2.2.5
42
+ jupyter-events==0.10.0
43
+ ipykernel==6.29.5
44
+ Mako==1.3.5
45
+ proto-plus==1.25.0
46
+ fst-pso==1.8.1
47
+ gensim==4.3.3
48
+ htmlmin==0.1.12
49
+ tokenizers==0.13.3
50
+ timm==1.0.11
51
+ MarkupSafe==3.0.2
52
+ safetensors==0.4.5
53
+ requests==2.32.3
54
+ gast==0.5.5
55
+ cuml==24.12.0a33
56
+ jaxlib==0.4.23.dev20240214
57
+ spacy-loggers==1.0.5
58
+ pytz==2024.1
59
+ idna==3.10
60
+ python-dateutil==2.9.0
61
+ mdurl==0.1.2
62
+ blis==0.7.10
63
+ jupyter==1.1.1
64
+ pyerfa==2.0.1.5
65
+ comm==0.2.2
66
+ pygraphviz==1.14
67
+ dill==0.3.8
68
+ paramiko==3.5.0
69
+ llama-index==0.8.36
70
+ mdit-py-plugins==0.4.2
71
+ Werkzeug==3.1.3
72
+ pyu2f==0.1.5
73
+ dask-glm==0.2.0
74
+ httpx==0.27.2
75
+ typeguard==4.4.1
76
+ mypy-extensions==1.0.0
77
+ kmodes==0.12.2
78
+ keras==2.15.0
79
+ ydata-profiling==0.0.dev0
80
+ regex==2024.11.6
81
+ xarray==2024.11.0
82
+ setuptools==75.3.0
83
+ charset-normalizer==3.4.0
84
+ jupyterlab_nvdashboard==0.11.0
85
+ pylibraft==24.12.0a36
86
+ spacy==3.7.6
87
+ mlflow-skinny==2.17.2
88
+ nvtx==0.2.10
89
+ multimethod==1.12
90
+ pexpect==4.9.0
91
+ torch==2.1.0.post301
92
+ flatbuffers==24.3.25
93
+ python-json-logger==2.0.7
94
+ PyJWT==2.9.0
95
+ multiprocess==0.70.16
96
+ colorlover==0.3.0
97
+ yarl==1.16.0
98
+ locket==1.0.0
99
+ patsy==1.0.0
100
+ rapids-dask-dependency==24.12.0a0
101
+ stanza==1.9.2
102
+ debugpy==1.8.8
103
+ jupyterlab_pygments==0.3.0
104
+ pylibcudf==24.12.0a337
105
+ lz4==4.3.3
106
+ pandas==2.2.3
107
+ tifffile==2024.9.20
108
+ pynvml==11.4.1
109
+ cufflinks==0.17.3
110
+ ipywidgets==8.1.5
111
+ requests-oauthlib==2.0.0
112
+ google-auth-oauthlib==1.2.1
113
+ rsa==4.9
114
+ webcolors==24.8.0
115
+ jsonschema-specifications==2024.10.1
116
+ scikit-learn==1.5.2
117
+ langchain-text-splitters==0.3.2
118
+ pandas-datareader==0.10.0
119
+ tomli==2.0.2
120
+ tzdata==2024.2
121
+ scikit-image==0.24.0
122
+ tensorboard_data_server==0.7.0
123
+ kiwisolver==1.4.7
124
+ cloudpathlib==0.20.0
125
+ isodate==0.6.1
126
+ adversarial-robustness-toolbox==1.19.1
127
+ SQLAlchemy==2.0.36
128
+ pytest-runner==6.0.0
129
+ pycairo==1.27.0
130
+ treelite==4.3.0
131
+ jiter==0.7.0
132
+ threadpoolctl==3.5.0
133
+ pandocfilters==1.5.0
134
+ loguru==0.7.2
135
+ smart_open==7.0.5
136
+ shellingham==1.5.4
137
+ deepspeed==0.15.4
138
+ prompt_toolkit==3.0.48
139
+ databricks-sdk==0.34.0
140
+ langchain-core==0.3.15
141
+ imageio==2.36.0
142
+ openapi-schema-pydantic==1.2.4
143
+ zict==3.0.0
144
+ cachetools==5.5.0
145
+ colorful==0.5.6
146
+ mpmath==1.3.0
147
+ nest_asyncio==1.6.0
148
+ pyFUME==0.2.25
149
+ opencv-python-headless==4.9.0
150
+ fastai==2.7.18
151
+ importlib_resources==6.4.5
152
+ binaryornot==0.4.4
153
+ evaluate==0.4.1
154
+ matplotlib-inline==0.1.7
155
+ wasabi==1.1.2
156
+ pycparser==2.22
157
+ GitPython==3.1.43
158
+ pluggy==1.5.0
159
+ async-lru==2.0.4
160
+ pgmpy==0.1.24
161
+ anyio==4.4.0
162
+ executing==2.1.0
163
+ orjson==3.10.11
164
+ humanfriendly==10.0
165
+ tornado==6.4.1
166
+ gmpy2==2.1.5
167
+ rlPyCairo==0.2.0
168
+ distributed==2024.11.0
169
+ FuzzyTM==2.0.5
170
+ torchtext==0.15.2a0+5ce3163
171
+ pytest==8.3.5
172
+ pyod==2.0.2
173
+ ImageHash==4.3.1
174
+ soupsieve==2.5
175
+ tblib==3.0.0
176
+ emoji==2.14.0
177
+ aiohappyeyeballs==2.4.3
178
+ uri-template==1.3.0
179
+ tensorflow_estimator==2.15.0
180
+ babel==2.16.0
181
+ dask-cuda==24.12.0a12
182
+ overrides==7.7.0
183
+ opencensus==0.11.3
184
+ openai==0.28.1
185
+ language_data==1.2.0
186
+ jedi==0.19.2
187
+ cookiecutter==2.6.0
188
+ entrypoints==0.4
189
+ exceptiongroup==1.2.2
190
+ marisa-trie==1.2.0
191
+ uvloop==0.20.0
192
+ aiosignal==1.3.1
193
+ Flask==3.0.3
194
+ tensorboard==2.15.2
195
+ cffi==1.17.1
196
+ tf_keras==2.15.0
197
+ absl-py==2.1.0
198
+ blinker==1.9.0
199
+ types-python-dateutil==2.9.0.20241003
200
+ opencv-python==4.9.0
201
+ frozendict==2.4.6
202
+ aiohttp-cors==0.7.0
203
+ statsmodels==0.14.4
204
+ tinycss2==1.4.0
205
+ terminado==0.18.1
206
+ pycaret==2.2.3
207
+ aiohttp==3.10.10
208
+ distributed-ucxx==0.41.0
209
+ prometheus_client==0.21.0
210
+ fastdownload==0.0.7
211
+ grpcio==1.59.3
212
+ google-api-core==2.22.0
213
+ jupyterlab_widgets==3.0.13
214
+ appdirs==1.4.4
215
+ littleutils==0.0.0
216
+ ray==2.24.0
217
+ kaggle==1.6.17
218
+ jsonschema==4.23.0
219
+ google-auth==2.36.0
220
+ scikit-base==0.11.0
221
+ visions==0.7.6
222
+ pyarrow==15.0.0
223
+ transformers==4.33.0
224
+ prometheus_flask_exporter==0.23.1
225
+ dm-tree==0.1.8
226
+ colorama==0.4.6
227
+ requests-toolbelt==1.0.0
228
+ cached-property==1.5.2
229
+ cymem==2.0.8
230
+ PyNaCl==1.5.0
231
+ PyWavelets==1.7.0
232
+ httptools==0.6.1
233
+ typing-utils==0.1.0
234
+ email_validator==2.2.0
235
+ marshmallow==3.23.1
236
+ Deprecated==1.2.14
237
+ virtualenv==20.4.7
238
+ optuna==3.6.1
239
+ jupyter_server==2.14.2
240
+ termcolor==2.5.0
241
+ mpi4py==4.0.1
242
+ torchdata==0.7.1+8cea82f
243
+ dataclasses==0.8
244
+ cloudpickle==3.1.0
245
+ tree_sitter_languages==1.10.2
246
+ tabulate==0.9.0
247
+ ipython==8.29.0
248
+ lightgbm==4.3.0
249
+ captum==0.6.0
250
+ confuse==2.0.1
251
+ torchvision==0.16.1+adc3221
252
+ lxml==4.9.4
253
+ fastapi==0.115.4
254
+ python-multipart==0.0.17
255
+ dnspython==2.7.0
256
+ jupyter-console==6.6.3
257
+ preshed==3.0.9
258
+ py-cpuinfo==9.0.0
259
+ Send2Trash==1.8.3
260
+ murmurhash==1.0.10
261
+ sniffio==1.3.1
262
+ websockets==13.1
263
+ h11==0.14.0
264
+ smmap==5.0.0
265
+ textual==0.85.2
266
+ jsonpatch==1.33
267
+ opencensus-context==0.1.3
268
+ nbconvert==7.16.4
269
+ sentry-sdk==2.19.0
270
+ opentelemetry-semantic-conventions==0.37b0
271
+ pandas-profiling==2.8.0
272
+ pillow==10.3.0
273
+ peft==0.13.2
274
+ rpds-py==0.21.0
275
+ bokeh==3.6.1
276
+ distro==1.9.0
277
+ itsdangerous==2.2.0
278
+ wandb==0.18.7
279
+ jsonpointer==3.0.0
280
+ astropy-iers-data==0.2024.11.11.0.32.38
281
+ horovod==0.28.1
282
+ graphviz==0.20.3
283
+ vtk==9.3.1
284
+ bleach==6.2.0
285
+ numexpr==2.8.7
286
+ pydantic_core==2.23.4
287
+ Jinja2==3.1.4
288
+ widgetsnbextension==4.0.13
289
+ filelock==3.16.1
290
+ catboost==1.2.7
291
+ raft-dask==24.12.0a36
292
+ async-timeout==4.0.3
293
+ datefinder==0.7.3
294
+ coloredlogs==15.0.1
295
+ platformdirs==4.3.6
296
+ spacy-legacy==3.0.12
297
+ chardet==5.2.0
298
+ jupyter_client==8.6.3
299
+ importlib_metadata==8.5.0
300
+ rfc3986-validator==0.1.1
301
+ huggingface_hub==0.26.2
302
+ PySocks==1.7.1
303
+ mlxtend==0.23.2
304
+ outdated==0.2.2
305
+ partd==1.4.2
306
+ thinc==8.2.5
307
+ astropy==6.1.6
308
+ rdflib==6.3.2
309
+ h2==4.1.0
310
+ typer==0.13.0
311
+ xyzservices==2024.9.0
312
+ toolz==0.12.1
313
+ frozenlist==1.5.0
314
+ rdkit==2024.9.2
315
+ pyasn1==0.6.1
316
+ jupyter_server_terminals==0.5.3
317
+ ucx-py==0.41.0a11
318
+ astunparse==1.6.3
319
+ simpful==2.12.0
320
+ notebook_shim==0.2.4
321
+ scipy==1.13.1
322
+ colorlog==6.9.0
323
+ tiktoken==0.3.3
324
+ plotly==5.24.1
325
+ fastrlock==0.8.2
326
+ chart-studio==1.1.0
327
+ stack-data==0.6.2
328
+ google-pasta==0.2.0
329
+ sktime==0.34.0
330
+ PyYAML==6.0.2
331
+ sympy==1.13.3
332
+ multidict==6.1.0
333
+ ml-dtypes==0.2.0
334
+ tensorboardX==2.6.2.2
335
+ decorator==5.1.1
336
+ cytoolz==1.0.0
337
+ ase==3.23.0
338
+ isoduration==20.11.0
339
+ html5lib==1.1
340
+ langsmith==0.1.142
341
+ future==1.0.0
342
+ onnx2torch==1.5.15
343
+ multipledispatch==0.6.0
344
+ protobuf==4.24.4
345
+ ucxx==0.41.0
346
+ pandas_flavor==0.6.0
347
+ msgpack==1.1.0
348
+ pyasn1_modules==0.4.1
349
+ imagecodecs==2024.1.1
350
+ mlflow==2.17.2
351
+ watchfiles==0.24.0
352
+ dm-sonnet==2.0.2
353
+ langcodes==3.4.1
354
+ freetype-py==2.3.0
355
+ argon2-cffi-bindings==21.2.0
356
+ trimesh==4.5.2
357
+ opt_einsum==3.4.0
358
+ tenacity==8.5.0
359
+ h5py==3.12.1
360
+ fastapi-cli==0.0.5
361
+ oauthlib==3.2.2
362
+ parso==0.8.4
363
+ weasel==0.4.1
364
+ yfinance==0.2.49
365
+ networkx==2.8.8
366
+ bitsandbytes==0.44.1
367
+ lazy_loader==0.4
368
+ querystring_parser==1.2.4
369
+ contourpy==1.3.0
370
+ unicodedata2==15.1.0
371
+ bcrypt==4.2.0
372
+ munkres==1.1.4
373
+ langchain==0.0.298
374
+ hpack==4.0.0
375
+ cryptography==43.0.3
376
+ umap-learn==0.5.7
377
+ arrow==1.3.0
378
+ docker==7.1.0
379
+ certifi==2025.1.31
380
+ fastjsonschema==2.20.0
381
+ tensorflow==2.15.0
382
+ googleapis-common-protos==1.65.0
383
+ iniconfig==2.0.0
384
+ Markdown==3.6
385
+ llvmlite==0.43.0
386
+ wslink==2.3.2
387
+ attrs==24.2.0
388
+ rich==13.9.4
389
+ cupy==13.3.0
390
+ uc-micro-py==1.0.3
391
+ alembic==1.14.0
392
+ joblib==1.4.2
393
+ reportlab==4.2.5
394
+ miniful==0.0.6
395
+ jupyter_core==5.7.2
396
+ wheel==0.45.0
397
+ phik==0.12.3
398
+ mistune==3.0.2
399
+ wcwidth==0.2.13
400
+ dacite==1.8.1
401
+ accelerate==0.22.0
402
+ sacremoses==0.0.53
403
+ revtok==0.0.3
404
+ python-slugify==8.0.4
405
+ tangled-up-in-unicode==0.2.0
406
+ dask==2024.11.0
407
+ markdown-it-py==3.0.0
408
+ sentencepiece==0.1.99
409
+ beautifulsoup4==4.12.3
410
+ six==1.16.0
411
+ numba-cuda==0.0.17
412
+ argon2-cffi==23.1.0
413
+ xxhash==3.5.0
414
+ hjson==3.1.0
415
+ fonttools==4.54.1
416
+ graphql-core==3.2.5
417
+ pyparsing==3.2.0
418
+ pure_eval==0.2.3
419
+ distlib==0.3.9
420
+ lightning==2.4.0
421
+ wordcloud==0.0.0
422
+ catalogue==2.0.10
423
+ jax==0.4.27
424
+ tree-sitter==0.23.2
425
+ notebook==7.2.2
426
+ dataclasses-json==0.6.7
427
+ propcache==0.2.0
428
+ numba==0.60.0
429
+ dask-expr==1.1.17
430
+ pydantic==2.9.2
431
+ gunicorn==22.0.0
432
+ missingno==0.5.2
433
+ pyOpenSSL==24.2.1
434
+ openpyxl==3.1.5
435
+ packaging==24.1
436
+ python-dotenv==1.0.1
437
+ cycler==0.12.1
438
+ types-pytz==2024.2.0.20241003
439
+ yellowbrick==1.5
440
+ referencing==0.35.1
441
+ pyLDAvis==3.4.1
442
+ lazypredict==0.2.16
443
+ fqdn==1.5.1
444
+ websocket-client==1.8.0
445
+ fastcore==1.7.19
446
+ pynvjitlink-cu12==0.3.0
447
+ pingouin==0.5.5
448
+ numpy==1.26.4
449
+ typing-inspect==0.9.0
450
+ nltk==3.9.1
451
+ onnxruntime==1.19.2
452
+ tensorflow-probability==0.23.0
453
+ datasets==3.0.2
454
+ pickleshare==0.7.5
455
+ peewee==3.17.7
456
+ torch-geometric==2.6.1
457
+ ptyprocess==0.7.0
458
+ greenlet==3.1.1
459
+ graphql-relay==3.2.0
460
+ graphene==3.4.3
461
+ et_xmlfile==2.0.0
462
+ webencodings==0.5.1
463
+ hyperframe==6.0.1
464
+ multitasking==0.0.9
465
+ typer-slim==0.13.0
466
+ onnx==1.15.0
467
+ uvicorn==0.32.0
468
+ memray==1.13.4
469
+ xgboost==2.1.2
470
+ Brotli==1.1.0
471
+ zipp==3.21.0
472
+ nbformat==5.10.4
473
+ responses==0.18.0
474
+ funcy==2.0
475
+ Pygments==2.18.0
476
+ tqdm==4.67.0
477
+ linkify-it-py==2.0.3
478
+ srsly==2.4.8
479
+ cuda-python==12.6.0
480
+ lightning-utilities==0.11.8
481
+ cudf==24.12.0a337
482
+ dask-ml==2024.4.4
483
+ docker-pycreds==0.4.0
484
+ pkgutil_resolve_name==1.3.10
485
+ opentelemetry-api==1.16.0
486
+ fsspec==2024.9.0
487
+ nbclient==0.10.0
488
+ psutil==5.9.8
489
+ pytorch-lightning==2.4.0
490
+ sortedcontainers==2.4.0
491
+ matplotlib==3.9.2
492
+ defusedxml==0.7.1
493
+ urllib3==1.26.19
494
+ jupyterlab_server==2.27.3
495
+ retrying==1.3.3
496
+ dask-cudf==24.12.0a337
497
+ sqlparse==0.5.1
498
+ text-unidecode==1.3
499
+ seaborn==0.13.2
500
+ typing_extensions==4.12.2
501
+ pyzmq==26.2.0
502
+ rfc3339-validator==0.1.4
503
+ pynndescent==0.5.13
504
+ pip==24.3.1
505
+ confection==0.1.4
506
+ wrapt==1.14.1
507
+ fastprogress==1.0.3
508
+ traitlets==5.14.3
509
+ asttokens==2.4.1
510
+ json5==0.9.28
511
+ pandas-stubs==2.2.3.241126
512
+ torchmetrics==1.2.1
513
+ gitdb==4.0.11
514
+ annotated-types==0.7.0
515
+ ipython-autotime==0.1
516
+ httpcore==1.0.6
517
+ click==8.1.7
518
+ setproctitle==1.3.3
519
+ starlette==0.41.2
520
+ jupyterlab==4.2.5
521
+ rmm==24.12.0a27
522
+ opentelemetry-sdk==1.16.0
523
+ textblob==0.15.3
524
+ imbalanced-learn==0.12.4
525
+ typeguard==4.3.0
526
+ more-itertools==10.3.0
527
+ zipp==3.19.2
528
+ autocommand==2.2.2
529
+ jaraco.context==5.3.0
530
+ packaging==24.1
531
+ importlib_metadata==8.0.0
532
+ platformdirs==4.2.2
533
+ jaraco.functools==4.0.1
534
+ importlib_resources==6.4.0
535
+ tomli==2.0.1
536
+ jaraco.text==3.12.1
537
+ wheel==0.43.0
538
+ jaraco.collections==5.1.0
539
+ typing_extensions==4.12.2
540
+ inflect==7.3.1
541
+ backports.tarfile==1.2.0
wandb/run-20250504_160615-f65jh2lv/files/wandb-metadata.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.14.0-427.13.1.el9_4.x86_64-x86_64-with-glibc2.34",
3
+ "python": "3.10.15",
4
+ "startedAt": "2025-05-04T13:06:15.895027Z",
5
+ "program": "/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py",
6
+ "codePath": "finetuning_bc_prott5.py",
7
+ "email": "zeynep.isik1@sabanciuniv.edu",
8
+ "root": "/arf/scratch/zisik/prott5_bc_ft",
9
+ "host": "kolyoz1",
10
+ "username": "zisik",
11
+ "executable": "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/bin/python3",
12
+ "codePathLocal": "finetuning_bc_prott5.py",
13
+ "cpu_count": 64,
14
+ "cpu_count_logical": 64,
15
+ "gpu": "NVIDIA H100 80GB HBM3",
16
+ "gpu_count": 1,
17
+ "disk": {
18
+ "/": {
19
+ "total": "7643995308032",
20
+ "used": "274886729728"
21
+ }
22
+ },
23
+ "memory": {
24
+ "total": "1081373220864"
25
+ },
26
+ "cpu": {
27
+ "count": 64,
28
+ "countLogical": 64
29
+ },
30
+ "gpu_nvidia": [
31
+ {
32
+ "name": "NVIDIA H100 80GB HBM3",
33
+ "memoryTotal": "85520809984",
34
+ "cudaCores": 16896,
35
+ "architecture": "Hopper"
36
+ }
37
+ ],
38
+ "slurm": {
39
+ "cluster_name": "cuda",
40
+ "conf": "/etc/slurm/slurm.conf",
41
+ "cpus_on_node": "16",
42
+ "cpus_per_task": "16",
43
+ "gpus_on_node": "1",
44
+ "gtids": "0",
45
+ "job_account": "tbag154",
46
+ "job_cpus_per_node": "16",
47
+ "job_end_time": "1746623147",
48
+ "job_gid": "11636",
49
+ "job_gpus": "1",
50
+ "job_id": "1027945",
51
+ "job_name": "msa_ph_pt",
52
+ "job_nodelist": "kolyoz1",
53
+ "job_num_nodes": "1",
54
+ "job_partition": "kolyoz-cuda",
55
+ "job_qos": "tbag",
56
+ "job_start_time": "1746363947",
57
+ "job_uid": "11636",
58
+ "job_user": "zisik",
59
+ "jobid": "1027945",
60
+ "localid": "0",
61
+ "mem_per_cpu": "14000",
62
+ "nnodes": "1",
63
+ "node_aliases": "(null)",
64
+ "nodeid": "0",
65
+ "nodelist": "kolyoz1",
66
+ "prio_process": "0",
67
+ "procid": "0",
68
+ "submit_dir": "/arf/scratch/zisik",
69
+ "submit_host": "cuda-ui",
70
+ "task_pid": "3178532",
71
+ "tasks_per_node": "1",
72
+ "topology_addr": "kolyoz1",
73
+ "topology_addr_pattern": "node",
74
+ "working_cluster": "cuda:slurmcontroller3.ib:6800:9984:109"
75
+ },
76
+ "cudaVersion": "12.6"
77
+ }
wandb/run-20250504_160615-f65jh2lv/logs/debug-core.log ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {"time":"2025-05-04T16:06:15.269316376+03:00","level":"INFO","msg":"started logging, with flags","port-filename":"/tmp/tmp6sywt0mb/port-3178556.txt","pid":3178556,"debug":false,"disable-analytics":false}
2
+ {"time":"2025-05-04T16:06:15.269366219+03:00","level":"INFO","msg":"FeatureState","shutdownOnParentExitEnabled":false}
3
+ {"time":"2025-05-04T16:06:15.2702663+03:00","level":"INFO","msg":"Will exit if parent process dies.","ppid":3178556}
4
+ {"time":"2025-05-04T16:06:15.270143057+03:00","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":37579,"Zone":""}}
5
+ {"time":"2025-05-04T16:06:15.448913658+03:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:49916"}
6
+ {"time":"2025-05-04T16:06:15.898453126+03:00","level":"INFO","msg":"handleInformInit: received","streamId":"f65jh2lv","id":"127.0.0.1:49916"}
7
+ {"time":"2025-05-04T16:06:16.021719647+03:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"f65jh2lv","id":"127.0.0.1:49916"}
wandb/run-20250504_160615-f65jh2lv/logs/debug-internal.log ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-05-04T16:06:15.899998659+03:00","level":"INFO","msg":"using version","core version":"0.18.7"}
2
+ {"time":"2025-05-04T16:06:15.900045512+03:00","level":"INFO","msg":"created symlink","path":"/arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_160615-f65jh2lv/logs/debug-core.log"}
3
+ {"time":"2025-05-04T16:06:16.021644692+03:00","level":"INFO","msg":"created new stream","id":"f65jh2lv"}
4
+ {"time":"2025-05-04T16:06:16.021706945+03:00","level":"INFO","msg":"stream: started","id":"f65jh2lv"}
5
+ {"time":"2025-05-04T16:06:16.021839756+03:00","level":"INFO","msg":"writer: Do: started","stream_id":"f65jh2lv"}
6
+ {"time":"2025-05-04T16:06:16.02194891+03:00","level":"INFO","msg":"handler: started","stream_id":"f65jh2lv"}
7
+ {"time":"2025-05-04T16:06:16.022034888+03:00","level":"INFO","msg":"sender: started","stream_id":"f65jh2lv"}
8
+ {"time":"2025-05-04T16:06:16.421916148+03:00","level":"INFO","msg":"Starting system monitor"}
wandb/run-20250504_160615-f65jh2lv/logs/debug.log ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-05-04 16:06:15,888 INFO MainThread:3178556 [wandb_setup.py:_flush():79] Current SDK version is 0.18.7
2
+ 2025-05-04 16:06:15,888 INFO MainThread:3178556 [wandb_setup.py:_flush():79] Configure stats pid to 3178556
3
+ 2025-05-04 16:06:15,888 INFO MainThread:3178556 [wandb_setup.py:_flush():79] Loading settings from /arf/home/zisik/.config/wandb/settings
4
+ 2025-05-04 16:06:15,888 INFO MainThread:3178556 [wandb_setup.py:_flush():79] Loading settings from /arf/scratch/zisik/prott5_bc_ft/wandb/settings
5
+ 2025-05-04 16:06:15,888 INFO MainThread:3178556 [wandb_setup.py:_flush():79] Loading settings from environment variables: {}
6
+ 2025-05-04 16:06:15,888 INFO MainThread:3178556 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': 'finetuning_bc_prott5.py', 'program_abspath': '/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py', 'program': '/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py'}
7
+ 2025-05-04 16:06:15,888 INFO MainThread:3178556 [wandb_setup.py:_flush():79] Applying login settings: {}
8
+ 2025-05-04 16:06:15,888 INFO MainThread:3178556 [wandb_setup.py:_flush():79] Applying login settings: {}
9
+ 2025-05-04 16:06:15,888 INFO MainThread:3178556 [wandb_init.py:_log_setup():533] Logging user logs to /arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_160615-f65jh2lv/logs/debug.log
10
+ 2025-05-04 16:06:15,889 INFO MainThread:3178556 [wandb_init.py:_log_setup():534] Logging internal logs to /arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_160615-f65jh2lv/logs/debug-internal.log
11
+ 2025-05-04 16:06:15,889 INFO MainThread:3178556 [wandb_init.py:init():619] calling init triggers
12
+ 2025-05-04 16:06:15,889 INFO MainThread:3178556 [wandb_init.py:init():626] wandb.init called with sweep_config: {}
13
+ config: {}
14
+ 2025-05-04 16:06:15,889 INFO MainThread:3178556 [wandb_init.py:init():669] starting backend
15
+ 2025-05-04 16:06:15,889 INFO MainThread:3178556 [wandb_init.py:init():673] sending inform_init request
16
+ 2025-05-04 16:06:15,893 INFO MainThread:3178556 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
17
+ 2025-05-04 16:06:15,894 INFO MainThread:3178556 [wandb_init.py:init():686] backend started and connected
18
+ 2025-05-04 16:06:15,902 INFO MainThread:3178556 [wandb_init.py:init():781] updated telemetry
19
+ 2025-05-04 16:06:15,905 INFO MainThread:3178556 [wandb_init.py:init():814] communicating run to backend with 90.0 second timeout
20
+ 2025-05-04 16:06:16,414 INFO MainThread:3178556 [wandb_init.py:init():867] starting run threads in backend
21
+ 2025-05-04 16:06:17,992 INFO MainThread:3178556 [wandb_run.py:_console_start():2456] atexit reg
22
+ 2025-05-04 16:06:17,993 INFO MainThread:3178556 [wandb_run.py:_redirect():2305] redirect: wrap_raw
23
+ 2025-05-04 16:06:17,993 INFO MainThread:3178556 [wandb_run.py:_redirect():2370] Wrapping output streams.
24
+ 2025-05-04 16:06:17,993 INFO MainThread:3178556 [wandb_run.py:_redirect():2395] Redirects installed.
25
+ 2025-05-04 16:06:18,004 INFO MainThread:3178556 [wandb_init.py:init():911] run started, returning control to user process
26
+ 2025-05-04 16:06:56,772 INFO MainThread:3178556 [wandb_run.py:_config_callback():1387] config_cb None None {'output_dir': 't5-bc-out', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': 't5-bc-out/runs/May04_16-06-46_kolyoz1', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': False, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 't5-bc-out', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'loss', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'epoch', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False}
wandb/run-20250504_160615-f65jh2lv/run-f65jh2lv.wandb ADDED
Binary file (98.3 kB). View file
 
wandb/run-20250504_160955-rqk2hbkf/files/config.yaml ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _wandb:
2
+ value:
3
+ cli_version: 0.18.7
4
+ m: []
5
+ python_version: 3.10.15
6
+ t:
7
+ "1":
8
+ - 1
9
+ - 2
10
+ - 3
11
+ - 5
12
+ - 11
13
+ - 12
14
+ - 49
15
+ - 51
16
+ - 53
17
+ - 55
18
+ - 71
19
+ - 98
20
+ - 105
21
+ "2":
22
+ - 1
23
+ - 2
24
+ - 3
25
+ - 5
26
+ - 11
27
+ - 12
28
+ - 49
29
+ - 51
30
+ - 53
31
+ - 55
32
+ - 71
33
+ - 98
34
+ - 105
35
+ "3":
36
+ - 23
37
+ - 55
38
+ "4": 3.10.15
39
+ "5": 0.18.7
40
+ "6": 4.45.2
41
+ "8":
42
+ - 5
43
+ "12": 0.18.7
44
+ "13": linux-x86_64
wandb/run-20250504_160955-rqk2hbkf/files/output.log ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Traceback (most recent call last):
2
+ File "/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py", line 33, in <module>
3
+ X_train, X_temp, y_train, y_temp = train_test_split(prep_texts, labels, test_size=0.30, random_state=42)
4
+ File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/sklearn/utils/_param_validation.py", line 213, in wrapper
5
+ return func(*args, **kwargs)
6
+ File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/sklearn/model_selection/_split.py", line 2782, in train_test_split
7
+ arrays = indexable(*arrays)
8
+ File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/sklearn/utils/validation.py", line 514, in indexable
9
+ check_consistent_length(*result)
10
+ File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/sklearn/utils/validation.py", line 457, in check_consistent_length
11
+ raise ValueError(
12
+ ValueError: Found input variables with inconsistent numbers of samples: [10, 730149]
13
+ Traceback (most recent call last):
14
+ File "/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py", line 33, in <module>
15
+ X_train, X_temp, y_train, y_temp = train_test_split(prep_texts, labels, test_size=0.30, random_state=42)
16
+ File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/sklearn/utils/_param_validation.py", line 213, in wrapper
17
+ return func(*args, **kwargs)
18
+ File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/sklearn/model_selection/_split.py", line 2782, in train_test_split
19
+ arrays = indexable(*arrays)
20
+ File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/sklearn/utils/validation.py", line 514, in indexable
21
+ check_consistent_length(*result)
22
+ File "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/lib/python3.10/site-packages/sklearn/utils/validation.py", line 457, in check_consistent_length
23
+ raise ValueError(
24
+ ValueError: Found input variables with inconsistent numbers of samples: [10, 730149]
wandb/run-20250504_160955-rqk2hbkf/files/requirements.txt ADDED
@@ -0,0 +1,541 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ nvidia-cuda-cupti-cu12==12.4.127
2
+ nvidia-cuda-nvrtc-cu12==12.4.127
3
+ pyg-lib==0.4.0+pt20cu117
4
+ biopython==1.85
5
+ iniconfig==2.0.0
6
+ tokenizers==0.20.0
7
+ accelerate==1.3.0
8
+ torch==2.6.0
9
+ nvidia-nccl-cu12==2.21.5
10
+ transformers==4.45.2
11
+ nvidia-cusparse-cu12==12.3.1.170
12
+ torch-scatter==2.1.2+pt20cu117
13
+ nvidia-cusparselt-cu12==0.6.2
14
+ nvidia-nvtx-cu12==12.4.127
15
+ zstd==1.5.6.6
16
+ fair-esm==2.0.0
17
+ omegaconf==2.3.0
18
+ pluggy==1.5.0
19
+ pytest==8.3.5
20
+ nvidia-curand-cu12==10.3.5.147
21
+ nvidia-cufft-cu12==11.2.1.3
22
+ torch-cluster==1.6.3+pt20cu117
23
+ regex==2024.9.11
24
+ nvidia-cudnn-cu12==9.1.0.70
25
+ torch-spline-conv==1.2.2+pt20cu117
26
+ nvidia-cusolver-cu12==11.6.1.9
27
+ antlr4-python3-runtime==4.9.3
28
+ msgpack-numpy==0.4.8
29
+ nlp==0.2.0
30
+ einops==0.8.1
31
+ nvidia-cublas-cu12==12.4.5.8
32
+ triton==3.2.0
33
+ ninja==1.11.1.3
34
+ hydra-core==1.3.2
35
+ nvidia-nvjitlink-cu12==12.4.127
36
+ biotite==0.41.2
37
+ torch-sparse==0.6.18+pt20cu117
38
+ esm==3.1.4
39
+ sympy==1.13.1
40
+ nvidia-cuda-runtime-cu12==12.4.127
41
+ jupyter-lsp==2.2.5
42
+ jupyter-events==0.10.0
43
+ ipykernel==6.29.5
44
+ Mako==1.3.5
45
+ proto-plus==1.25.0
46
+ fst-pso==1.8.1
47
+ gensim==4.3.3
48
+ htmlmin==0.1.12
49
+ tokenizers==0.13.3
50
+ timm==1.0.11
51
+ MarkupSafe==3.0.2
52
+ safetensors==0.4.5
53
+ requests==2.32.3
54
+ gast==0.5.5
55
+ cuml==24.12.0a33
56
+ jaxlib==0.4.23.dev20240214
57
+ spacy-loggers==1.0.5
58
+ pytz==2024.1
59
+ idna==3.10
60
+ python-dateutil==2.9.0
61
+ mdurl==0.1.2
62
+ blis==0.7.10
63
+ jupyter==1.1.1
64
+ pyerfa==2.0.1.5
65
+ comm==0.2.2
66
+ pygraphviz==1.14
67
+ dill==0.3.8
68
+ paramiko==3.5.0
69
+ llama-index==0.8.36
70
+ mdit-py-plugins==0.4.2
71
+ Werkzeug==3.1.3
72
+ pyu2f==0.1.5
73
+ dask-glm==0.2.0
74
+ httpx==0.27.2
75
+ typeguard==4.4.1
76
+ mypy-extensions==1.0.0
77
+ kmodes==0.12.2
78
+ keras==2.15.0
79
+ ydata-profiling==0.0.dev0
80
+ regex==2024.11.6
81
+ xarray==2024.11.0
82
+ setuptools==75.3.0
83
+ charset-normalizer==3.4.0
84
+ jupyterlab_nvdashboard==0.11.0
85
+ pylibraft==24.12.0a36
86
+ spacy==3.7.6
87
+ mlflow-skinny==2.17.2
88
+ nvtx==0.2.10
89
+ multimethod==1.12
90
+ pexpect==4.9.0
91
+ torch==2.1.0.post301
92
+ flatbuffers==24.3.25
93
+ python-json-logger==2.0.7
94
+ PyJWT==2.9.0
95
+ multiprocess==0.70.16
96
+ colorlover==0.3.0
97
+ yarl==1.16.0
98
+ locket==1.0.0
99
+ patsy==1.0.0
100
+ rapids-dask-dependency==24.12.0a0
101
+ stanza==1.9.2
102
+ debugpy==1.8.8
103
+ jupyterlab_pygments==0.3.0
104
+ pylibcudf==24.12.0a337
105
+ lz4==4.3.3
106
+ pandas==2.2.3
107
+ tifffile==2024.9.20
108
+ pynvml==11.4.1
109
+ cufflinks==0.17.3
110
+ ipywidgets==8.1.5
111
+ requests-oauthlib==2.0.0
112
+ google-auth-oauthlib==1.2.1
113
+ rsa==4.9
114
+ webcolors==24.8.0
115
+ jsonschema-specifications==2024.10.1
116
+ scikit-learn==1.5.2
117
+ langchain-text-splitters==0.3.2
118
+ pandas-datareader==0.10.0
119
+ tomli==2.0.2
120
+ tzdata==2024.2
121
+ scikit-image==0.24.0
122
+ tensorboard_data_server==0.7.0
123
+ kiwisolver==1.4.7
124
+ cloudpathlib==0.20.0
125
+ isodate==0.6.1
126
+ adversarial-robustness-toolbox==1.19.1
127
+ SQLAlchemy==2.0.36
128
+ pytest-runner==6.0.0
129
+ pycairo==1.27.0
130
+ treelite==4.3.0
131
+ jiter==0.7.0
132
+ threadpoolctl==3.5.0
133
+ pandocfilters==1.5.0
134
+ loguru==0.7.2
135
+ smart_open==7.0.5
136
+ shellingham==1.5.4
137
+ deepspeed==0.15.4
138
+ prompt_toolkit==3.0.48
139
+ databricks-sdk==0.34.0
140
+ langchain-core==0.3.15
141
+ imageio==2.36.0
142
+ openapi-schema-pydantic==1.2.4
143
+ zict==3.0.0
144
+ cachetools==5.5.0
145
+ colorful==0.5.6
146
+ mpmath==1.3.0
147
+ nest_asyncio==1.6.0
148
+ pyFUME==0.2.25
149
+ opencv-python-headless==4.9.0
150
+ fastai==2.7.18
151
+ importlib_resources==6.4.5
152
+ binaryornot==0.4.4
153
+ evaluate==0.4.1
154
+ matplotlib-inline==0.1.7
155
+ wasabi==1.1.2
156
+ pycparser==2.22
157
+ GitPython==3.1.43
158
+ pluggy==1.5.0
159
+ async-lru==2.0.4
160
+ pgmpy==0.1.24
161
+ anyio==4.4.0
162
+ executing==2.1.0
163
+ orjson==3.10.11
164
+ humanfriendly==10.0
165
+ tornado==6.4.1
166
+ gmpy2==2.1.5
167
+ rlPyCairo==0.2.0
168
+ distributed==2024.11.0
169
+ FuzzyTM==2.0.5
170
+ torchtext==0.15.2a0+5ce3163
171
+ pytest==8.3.5
172
+ pyod==2.0.2
173
+ ImageHash==4.3.1
174
+ soupsieve==2.5
175
+ tblib==3.0.0
176
+ emoji==2.14.0
177
+ aiohappyeyeballs==2.4.3
178
+ uri-template==1.3.0
179
+ tensorflow_estimator==2.15.0
180
+ babel==2.16.0
181
+ dask-cuda==24.12.0a12
182
+ overrides==7.7.0
183
+ opencensus==0.11.3
184
+ openai==0.28.1
185
+ language_data==1.2.0
186
+ jedi==0.19.2
187
+ cookiecutter==2.6.0
188
+ entrypoints==0.4
189
+ exceptiongroup==1.2.2
190
+ marisa-trie==1.2.0
191
+ uvloop==0.20.0
192
+ aiosignal==1.3.1
193
+ Flask==3.0.3
194
+ tensorboard==2.15.2
195
+ cffi==1.17.1
196
+ tf_keras==2.15.0
197
+ absl-py==2.1.0
198
+ blinker==1.9.0
199
+ types-python-dateutil==2.9.0.20241003
200
+ opencv-python==4.9.0
201
+ frozendict==2.4.6
202
+ aiohttp-cors==0.7.0
203
+ statsmodels==0.14.4
204
+ tinycss2==1.4.0
205
+ terminado==0.18.1
206
+ pycaret==2.2.3
207
+ aiohttp==3.10.10
208
+ distributed-ucxx==0.41.0
209
+ prometheus_client==0.21.0
210
+ fastdownload==0.0.7
211
+ grpcio==1.59.3
212
+ google-api-core==2.22.0
213
+ jupyterlab_widgets==3.0.13
214
+ appdirs==1.4.4
215
+ littleutils==0.0.0
216
+ ray==2.24.0
217
+ kaggle==1.6.17
218
+ jsonschema==4.23.0
219
+ google-auth==2.36.0
220
+ scikit-base==0.11.0
221
+ visions==0.7.6
222
+ pyarrow==15.0.0
223
+ transformers==4.33.0
224
+ prometheus_flask_exporter==0.23.1
225
+ dm-tree==0.1.8
226
+ colorama==0.4.6
227
+ requests-toolbelt==1.0.0
228
+ cached-property==1.5.2
229
+ cymem==2.0.8
230
+ PyNaCl==1.5.0
231
+ PyWavelets==1.7.0
232
+ httptools==0.6.1
233
+ typing-utils==0.1.0
234
+ email_validator==2.2.0
235
+ marshmallow==3.23.1
236
+ Deprecated==1.2.14
237
+ virtualenv==20.4.7
238
+ optuna==3.6.1
239
+ jupyter_server==2.14.2
240
+ termcolor==2.5.0
241
+ mpi4py==4.0.1
242
+ torchdata==0.7.1+8cea82f
243
+ dataclasses==0.8
244
+ cloudpickle==3.1.0
245
+ tree_sitter_languages==1.10.2
246
+ tabulate==0.9.0
247
+ ipython==8.29.0
248
+ lightgbm==4.3.0
249
+ captum==0.6.0
250
+ confuse==2.0.1
251
+ torchvision==0.16.1+adc3221
252
+ lxml==4.9.4
253
+ fastapi==0.115.4
254
+ python-multipart==0.0.17
255
+ dnspython==2.7.0
256
+ jupyter-console==6.6.3
257
+ preshed==3.0.9
258
+ py-cpuinfo==9.0.0
259
+ Send2Trash==1.8.3
260
+ murmurhash==1.0.10
261
+ sniffio==1.3.1
262
+ websockets==13.1
263
+ h11==0.14.0
264
+ smmap==5.0.0
265
+ textual==0.85.2
266
+ jsonpatch==1.33
267
+ opencensus-context==0.1.3
268
+ nbconvert==7.16.4
269
+ sentry-sdk==2.19.0
270
+ opentelemetry-semantic-conventions==0.37b0
271
+ pandas-profiling==2.8.0
272
+ pillow==10.3.0
273
+ peft==0.13.2
274
+ rpds-py==0.21.0
275
+ bokeh==3.6.1
276
+ distro==1.9.0
277
+ itsdangerous==2.2.0
278
+ wandb==0.18.7
279
+ jsonpointer==3.0.0
280
+ astropy-iers-data==0.2024.11.11.0.32.38
281
+ horovod==0.28.1
282
+ graphviz==0.20.3
283
+ vtk==9.3.1
284
+ bleach==6.2.0
285
+ numexpr==2.8.7
286
+ pydantic_core==2.23.4
287
+ Jinja2==3.1.4
288
+ widgetsnbextension==4.0.13
289
+ filelock==3.16.1
290
+ catboost==1.2.7
291
+ raft-dask==24.12.0a36
292
+ async-timeout==4.0.3
293
+ datefinder==0.7.3
294
+ coloredlogs==15.0.1
295
+ platformdirs==4.3.6
296
+ spacy-legacy==3.0.12
297
+ chardet==5.2.0
298
+ jupyter_client==8.6.3
299
+ importlib_metadata==8.5.0
300
+ rfc3986-validator==0.1.1
301
+ huggingface_hub==0.26.2
302
+ PySocks==1.7.1
303
+ mlxtend==0.23.2
304
+ outdated==0.2.2
305
+ partd==1.4.2
306
+ thinc==8.2.5
307
+ astropy==6.1.6
308
+ rdflib==6.3.2
309
+ h2==4.1.0
310
+ typer==0.13.0
311
+ xyzservices==2024.9.0
312
+ toolz==0.12.1
313
+ frozenlist==1.5.0
314
+ rdkit==2024.9.2
315
+ pyasn1==0.6.1
316
+ jupyter_server_terminals==0.5.3
317
+ ucx-py==0.41.0a11
318
+ astunparse==1.6.3
319
+ simpful==2.12.0
320
+ notebook_shim==0.2.4
321
+ scipy==1.13.1
322
+ colorlog==6.9.0
323
+ tiktoken==0.3.3
324
+ plotly==5.24.1
325
+ fastrlock==0.8.2
326
+ chart-studio==1.1.0
327
+ stack-data==0.6.2
328
+ google-pasta==0.2.0
329
+ sktime==0.34.0
330
+ PyYAML==6.0.2
331
+ sympy==1.13.3
332
+ multidict==6.1.0
333
+ ml-dtypes==0.2.0
334
+ tensorboardX==2.6.2.2
335
+ decorator==5.1.1
336
+ cytoolz==1.0.0
337
+ ase==3.23.0
338
+ isoduration==20.11.0
339
+ html5lib==1.1
340
+ langsmith==0.1.142
341
+ future==1.0.0
342
+ onnx2torch==1.5.15
343
+ multipledispatch==0.6.0
344
+ protobuf==4.24.4
345
+ ucxx==0.41.0
346
+ pandas_flavor==0.6.0
347
+ msgpack==1.1.0
348
+ pyasn1_modules==0.4.1
349
+ imagecodecs==2024.1.1
350
+ mlflow==2.17.2
351
+ watchfiles==0.24.0
352
+ dm-sonnet==2.0.2
353
+ langcodes==3.4.1
354
+ freetype-py==2.3.0
355
+ argon2-cffi-bindings==21.2.0
356
+ trimesh==4.5.2
357
+ opt_einsum==3.4.0
358
+ tenacity==8.5.0
359
+ h5py==3.12.1
360
+ fastapi-cli==0.0.5
361
+ oauthlib==3.2.2
362
+ parso==0.8.4
363
+ weasel==0.4.1
364
+ yfinance==0.2.49
365
+ networkx==2.8.8
366
+ bitsandbytes==0.44.1
367
+ lazy_loader==0.4
368
+ querystring_parser==1.2.4
369
+ contourpy==1.3.0
370
+ unicodedata2==15.1.0
371
+ bcrypt==4.2.0
372
+ munkres==1.1.4
373
+ langchain==0.0.298
374
+ hpack==4.0.0
375
+ cryptography==43.0.3
376
+ umap-learn==0.5.7
377
+ arrow==1.3.0
378
+ docker==7.1.0
379
+ certifi==2025.1.31
380
+ fastjsonschema==2.20.0
381
+ tensorflow==2.15.0
382
+ googleapis-common-protos==1.65.0
383
+ iniconfig==2.0.0
384
+ Markdown==3.6
385
+ llvmlite==0.43.0
386
+ wslink==2.3.2
387
+ attrs==24.2.0
388
+ rich==13.9.4
389
+ cupy==13.3.0
390
+ uc-micro-py==1.0.3
391
+ alembic==1.14.0
392
+ joblib==1.4.2
393
+ reportlab==4.2.5
394
+ miniful==0.0.6
395
+ jupyter_core==5.7.2
396
+ wheel==0.45.0
397
+ phik==0.12.3
398
+ mistune==3.0.2
399
+ wcwidth==0.2.13
400
+ dacite==1.8.1
401
+ accelerate==0.22.0
402
+ sacremoses==0.0.53
403
+ revtok==0.0.3
404
+ python-slugify==8.0.4
405
+ tangled-up-in-unicode==0.2.0
406
+ dask==2024.11.0
407
+ markdown-it-py==3.0.0
408
+ sentencepiece==0.1.99
409
+ beautifulsoup4==4.12.3
410
+ six==1.16.0
411
+ numba-cuda==0.0.17
412
+ argon2-cffi==23.1.0
413
+ xxhash==3.5.0
414
+ hjson==3.1.0
415
+ fonttools==4.54.1
416
+ graphql-core==3.2.5
417
+ pyparsing==3.2.0
418
+ pure_eval==0.2.3
419
+ distlib==0.3.9
420
+ lightning==2.4.0
421
+ wordcloud==0.0.0
422
+ catalogue==2.0.10
423
+ jax==0.4.27
424
+ tree-sitter==0.23.2
425
+ notebook==7.2.2
426
+ dataclasses-json==0.6.7
427
+ propcache==0.2.0
428
+ numba==0.60.0
429
+ dask-expr==1.1.17
430
+ pydantic==2.9.2
431
+ gunicorn==22.0.0
432
+ missingno==0.5.2
433
+ pyOpenSSL==24.2.1
434
+ openpyxl==3.1.5
435
+ packaging==24.1
436
+ python-dotenv==1.0.1
437
+ cycler==0.12.1
438
+ types-pytz==2024.2.0.20241003
439
+ yellowbrick==1.5
440
+ referencing==0.35.1
441
+ pyLDAvis==3.4.1
442
+ lazypredict==0.2.16
443
+ fqdn==1.5.1
444
+ websocket-client==1.8.0
445
+ fastcore==1.7.19
446
+ pynvjitlink-cu12==0.3.0
447
+ pingouin==0.5.5
448
+ numpy==1.26.4
449
+ typing-inspect==0.9.0
450
+ nltk==3.9.1
451
+ onnxruntime==1.19.2
452
+ tensorflow-probability==0.23.0
453
+ datasets==3.0.2
454
+ pickleshare==0.7.5
455
+ peewee==3.17.7
456
+ torch-geometric==2.6.1
457
+ ptyprocess==0.7.0
458
+ greenlet==3.1.1
459
+ graphql-relay==3.2.0
460
+ graphene==3.4.3
461
+ et_xmlfile==2.0.0
462
+ webencodings==0.5.1
463
+ hyperframe==6.0.1
464
+ multitasking==0.0.9
465
+ typer-slim==0.13.0
466
+ onnx==1.15.0
467
+ uvicorn==0.32.0
468
+ memray==1.13.4
469
+ xgboost==2.1.2
470
+ Brotli==1.1.0
471
+ zipp==3.21.0
472
+ nbformat==5.10.4
473
+ responses==0.18.0
474
+ funcy==2.0
475
+ Pygments==2.18.0
476
+ tqdm==4.67.0
477
+ linkify-it-py==2.0.3
478
+ srsly==2.4.8
479
+ cuda-python==12.6.0
480
+ lightning-utilities==0.11.8
481
+ cudf==24.12.0a337
482
+ dask-ml==2024.4.4
483
+ docker-pycreds==0.4.0
484
+ pkgutil_resolve_name==1.3.10
485
+ opentelemetry-api==1.16.0
486
+ fsspec==2024.9.0
487
+ nbclient==0.10.0
488
+ psutil==5.9.8
489
+ pytorch-lightning==2.4.0
490
+ sortedcontainers==2.4.0
491
+ matplotlib==3.9.2
492
+ defusedxml==0.7.1
493
+ urllib3==1.26.19
494
+ jupyterlab_server==2.27.3
495
+ retrying==1.3.3
496
+ dask-cudf==24.12.0a337
497
+ sqlparse==0.5.1
498
+ text-unidecode==1.3
499
+ seaborn==0.13.2
500
+ typing_extensions==4.12.2
501
+ pyzmq==26.2.0
502
+ rfc3339-validator==0.1.4
503
+ pynndescent==0.5.13
504
+ pip==24.3.1
505
+ confection==0.1.4
506
+ wrapt==1.14.1
507
+ fastprogress==1.0.3
508
+ traitlets==5.14.3
509
+ asttokens==2.4.1
510
+ json5==0.9.28
511
+ pandas-stubs==2.2.3.241126
512
+ torchmetrics==1.2.1
513
+ gitdb==4.0.11
514
+ annotated-types==0.7.0
515
+ ipython-autotime==0.1
516
+ httpcore==1.0.6
517
+ click==8.1.7
518
+ setproctitle==1.3.3
519
+ starlette==0.41.2
520
+ jupyterlab==4.2.5
521
+ rmm==24.12.0a27
522
+ opentelemetry-sdk==1.16.0
523
+ textblob==0.15.3
524
+ imbalanced-learn==0.12.4
525
+ typeguard==4.3.0
526
+ more-itertools==10.3.0
527
+ zipp==3.19.2
528
+ autocommand==2.2.2
529
+ jaraco.context==5.3.0
530
+ packaging==24.1
531
+ importlib_metadata==8.0.0
532
+ platformdirs==4.2.2
533
+ jaraco.functools==4.0.1
534
+ importlib_resources==6.4.0
535
+ tomli==2.0.1
536
+ jaraco.text==3.12.1
537
+ wheel==0.43.0
538
+ jaraco.collections==5.1.0
539
+ typing_extensions==4.12.2
540
+ inflect==7.3.1
541
+ backports.tarfile==1.2.0
wandb/run-20250504_160955-rqk2hbkf/files/wandb-metadata.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.14.0-427.13.1.el9_4.x86_64-x86_64-with-glibc2.34",
3
+ "python": "3.10.15",
4
+ "startedAt": "2025-05-04T13:09:55.928947Z",
5
+ "program": "/arf/scratch/zisik/prott5_bc_ft/finetuning_bc_prott5.py",
6
+ "codePath": "finetuning_bc_prott5.py",
7
+ "email": "zeynep.isik1@sabanciuniv.edu",
8
+ "root": "/arf/scratch/zisik/prott5_bc_ft",
9
+ "host": "kolyoz1",
10
+ "username": "zisik",
11
+ "executable": "/arf/sw/apps/truba-ai/gpu/miniforge3-2024/envs/gpu-2024.0/bin/python3",
12
+ "codePathLocal": "finetuning_bc_prott5.py",
13
+ "cpu_count": 64,
14
+ "cpu_count_logical": 64,
15
+ "gpu": "NVIDIA H100 80GB HBM3",
16
+ "gpu_count": 1,
17
+ "disk": {
18
+ "/": {
19
+ "total": "7643995308032",
20
+ "used": "272740364288"
21
+ }
22
+ },
23
+ "memory": {
24
+ "total": "1081373220864"
25
+ },
26
+ "cpu": {
27
+ "count": 64,
28
+ "countLogical": 64
29
+ },
30
+ "gpu_nvidia": [
31
+ {
32
+ "name": "NVIDIA H100 80GB HBM3",
33
+ "memoryTotal": "85520809984",
34
+ "cudaCores": 16896,
35
+ "architecture": "Hopper"
36
+ }
37
+ ],
38
+ "slurm": {
39
+ "cluster_name": "cuda",
40
+ "conf": "/etc/slurm/slurm.conf",
41
+ "cpus_on_node": "16",
42
+ "cpus_per_task": "16",
43
+ "gpus_on_node": "1",
44
+ "gtids": "0",
45
+ "job_account": "tbag154",
46
+ "job_cpus_per_node": "16",
47
+ "job_end_time": "1746623370",
48
+ "job_gid": "11636",
49
+ "job_gpus": "1",
50
+ "job_id": "1027946",
51
+ "job_name": "msa_ph_pt",
52
+ "job_nodelist": "kolyoz1",
53
+ "job_num_nodes": "1",
54
+ "job_partition": "kolyoz-cuda",
55
+ "job_qos": "tbag",
56
+ "job_start_time": "1746364170",
57
+ "job_uid": "11636",
58
+ "job_user": "zisik",
59
+ "jobid": "1027946",
60
+ "localid": "0",
61
+ "mem_per_cpu": "14000",
62
+ "nnodes": "1",
63
+ "node_aliases": "(null)",
64
+ "nodeid": "0",
65
+ "nodelist": "kolyoz1",
66
+ "prio_process": "0",
67
+ "procid": "0",
68
+ "submit_dir": "/arf/scratch/zisik",
69
+ "submit_host": "cuda-ui",
70
+ "task_pid": "3179106",
71
+ "tasks_per_node": "1",
72
+ "topology_addr": "kolyoz1",
73
+ "topology_addr_pattern": "node",
74
+ "working_cluster": "cuda:slurmcontroller3.ib:6800:9984:109"
75
+ },
76
+ "cudaVersion": "12.6"
77
+ }
wandb/run-20250504_160955-rqk2hbkf/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"_wandb":{"runtime":2}}
wandb/run-20250504_160955-rqk2hbkf/logs/debug-core.log ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-05-04T16:09:55.241065297+03:00","level":"INFO","msg":"started logging, with flags","port-filename":"/tmp/tmplpbc9pnb/port-3179132.txt","pid":3179132,"debug":false,"disable-analytics":false}
2
+ {"time":"2025-05-04T16:09:55.241124751+03:00","level":"INFO","msg":"FeatureState","shutdownOnParentExitEnabled":false}
3
+ {"time":"2025-05-04T16:09:55.241864+03:00","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":37981,"Zone":""}}
4
+ {"time":"2025-05-04T16:09:55.241967868+03:00","level":"INFO","msg":"Will exit if parent process dies.","ppid":3179132}
5
+ {"time":"2025-05-04T16:09:55.428960455+03:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:40950"}
6
+ {"time":"2025-05-04T16:09:55.928508592+03:00","level":"INFO","msg":"handleInformInit: received","streamId":"rqk2hbkf","id":"127.0.0.1:40950"}
7
+ {"time":"2025-05-04T16:09:56.056026556+03:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"rqk2hbkf","id":"127.0.0.1:40950"}
8
+ {"time":"2025-05-04T16:09:58.597503038+03:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:40950"}
9
+ {"time":"2025-05-04T16:09:58.597631333+03:00","level":"INFO","msg":"server is shutting down"}
10
+ {"time":"2025-05-04T16:09:58.597601675+03:00","level":"INFO","msg":"connection: Close: initiating connection closure","id":"127.0.0.1:40950"}
11
+ {"time":"2025-05-04T16:09:58.597793186+03:00","level":"INFO","msg":"connection: Close: connection successfully closed","id":"127.0.0.1:40950"}
12
+ {"time":"2025-05-04T16:09:59.528863432+03:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:40950"}
13
+ {"time":"2025-05-04T16:09:59.528880642+03:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:40950"}
14
+ {"time":"2025-05-04T16:09:59.528893164+03:00","level":"INFO","msg":"server is closed"}
wandb/run-20250504_160955-rqk2hbkf/logs/debug-internal.log ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-05-04T16:09:55.930352223+03:00","level":"INFO","msg":"using version","core version":"0.18.7"}
2
+ {"time":"2025-05-04T16:09:55.930398642+03:00","level":"INFO","msg":"created symlink","path":"/arf/scratch/zisik/prott5_bc_ft/wandb/run-20250504_160955-rqk2hbkf/logs/debug-core.log"}
3
+ {"time":"2025-05-04T16:09:56.055953645+03:00","level":"INFO","msg":"created new stream","id":"rqk2hbkf"}
4
+ {"time":"2025-05-04T16:09:56.056013829+03:00","level":"INFO","msg":"stream: started","id":"rqk2hbkf"}
5
+ {"time":"2025-05-04T16:09:56.056183059+03:00","level":"INFO","msg":"writer: Do: started","stream_id":"rqk2hbkf"}
6
+ {"time":"2025-05-04T16:09:56.056291373+03:00","level":"INFO","msg":"sender: started","stream_id":"rqk2hbkf"}
7
+ {"time":"2025-05-04T16:09:56.056498843+03:00","level":"INFO","msg":"handler: started","stream_id":"rqk2hbkf"}
8
+ {"time":"2025-05-04T16:09:56.455842701+03:00","level":"INFO","msg":"Starting system monitor"}
9
+ {"time":"2025-05-04T16:09:58.597599181+03:00","level":"INFO","msg":"stream: closing","id":"rqk2hbkf"}
10
+ {"time":"2025-05-04T16:09:58.597716873+03:00","level":"INFO","msg":"Stopping system monitor"}
11
+ {"time":"2025-05-04T16:09:58.598825235+03:00","level":"INFO","msg":"Stopped system monitor"}
12
+ {"time":"2025-05-04T16:09:58.792882763+03:00","level":"WARN","msg":"No job ingredients found, not creating job artifact"}
13
+ {"time":"2025-05-04T16:09:58.792915401+03:00","level":"WARN","msg":"No source type found, not creating job artifact"}
14
+ {"time":"2025-05-04T16:09:58.792926694+03:00","level":"INFO","msg":"sender: sendDefer: no job artifact to save"}
15
+ {"time":"2025-05-04T16:09:59.286977407+03:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
16
+ {"time":"2025-05-04T16:09:59.528666057+03:00","level":"INFO","msg":"handler: closed","stream_id":"rqk2hbkf"}
17
+ {"time":"2025-05-04T16:09:59.528710573+03:00","level":"INFO","msg":"writer: Close: closed","stream_id":"rqk2hbkf"}
18
+ {"time":"2025-05-04T16:09:59.528726369+03:00","level":"INFO","msg":"sender: closed","stream_id":"rqk2hbkf"}
19
+ {"time":"2025-05-04T16:09:59.528792264+03:00","level":"INFO","msg":"stream: closed","id":"rqk2hbkf"}