Anthony Miyaguchi
commited on
Commit
•
8e899a8
1
Parent(s):
2759b86
Use mode, updated model, and larger batch size
Browse files- fetch_model.sh +1 -1
- last.ckpt +2 -2
- script.py +15 -5
fetch_model.sh
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
#!/usr/bin/bash
|
2 |
|
3 |
-
path=gs://dsgt-clef-snakeclef-2024/models/torch-petastorm-
|
4 |
gcloud storage cp $path .
|
|
|
1 |
#!/usr/bin/bash
|
2 |
|
3 |
+
path=gs://dsgt-clef-snakeclef-2024/models/torch-petastorm-v2-cls-token/checkpoints/last.ckpt
|
4 |
gcloud storage cp $path .
|
last.ckpt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:93e5258ca3f1b82cd84882e435a064c463cc4e69e015b4ea91be23f7f43195b9
|
3 |
+
size 16145784
|
script.py
CHANGED
@@ -10,6 +10,7 @@ from PIL import Image
|
|
10 |
from torch import nn
|
11 |
from torch.utils.data import DataLoader, Dataset
|
12 |
from transformers import AutoImageProcessor, AutoModel
|
|
|
13 |
|
14 |
|
15 |
class ImageDataset(Dataset):
|
@@ -61,19 +62,28 @@ def make_submission(
|
|
61 |
hparams = checkpoint["hyper_parameters"]
|
62 |
model = LinearClassifier(hparams["num_features"], hparams["num_classes"])
|
63 |
model.load_state_dict(checkpoint["state_dict"])
|
|
|
|
|
64 |
|
65 |
dataloader = DataLoader(
|
66 |
-
ImageDataset(test_metadata, images_root_path), batch_size=
|
67 |
)
|
68 |
rows = []
|
69 |
-
for batch in dataloader:
|
70 |
-
observation_ids = batch["observation_id"]
|
71 |
-
logits = model(batch["features"])
|
72 |
class_ids = torch.argmax(logits, dim=1)
|
73 |
for observation_id, class_id in zip(observation_ids, class_ids):
|
74 |
row = {"observation_id": int(observation_id), "class_id": int(class_id)}
|
75 |
rows.append(row)
|
76 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
77 |
submission_df.to_csv(output_csv_path, index=False)
|
78 |
|
79 |
|
|
|
10 |
from torch import nn
|
11 |
from torch.utils.data import DataLoader, Dataset
|
12 |
from transformers import AutoImageProcessor, AutoModel
|
13 |
+
import tqdm
|
14 |
|
15 |
|
16 |
class ImageDataset(Dataset):
|
|
|
62 |
hparams = checkpoint["hyper_parameters"]
|
63 |
model = LinearClassifier(hparams["num_features"], hparams["num_classes"])
|
64 |
model.load_state_dict(checkpoint["state_dict"])
|
65 |
+
# to gpu
|
66 |
+
model = model.cuda()
|
67 |
|
68 |
dataloader = DataLoader(
|
69 |
+
ImageDataset(test_metadata, images_root_path), batch_size=250
|
70 |
)
|
71 |
rows = []
|
72 |
+
for batch in tqdm.tqdm(dataloader):
|
73 |
+
observation_ids = batch["observation_id"].cuda()
|
74 |
+
logits = model(batch["features"].cuda())
|
75 |
class_ids = torch.argmax(logits, dim=1)
|
76 |
for observation_id, class_id in zip(observation_ids, class_ids):
|
77 |
row = {"observation_id": int(observation_id), "class_id": int(class_id)}
|
78 |
rows.append(row)
|
79 |
+
# group by observation take the mode of the class_id
|
80 |
+
# make sure to keep the observation id
|
81 |
+
submission_df = (
|
82 |
+
pd.DataFrame(rows)
|
83 |
+
.groupby("observation_id")
|
84 |
+
.agg(lambda x: pd.Series.mode(x)[0])
|
85 |
+
.reset_index()
|
86 |
+
)
|
87 |
submission_df.to_csv(output_csv_path, index=False)
|
88 |
|
89 |
|