asdf98
/

iris-image-gen

asdf98 commited on 8 days ago

Commit

e90110a

verified ·

1 Parent(s): 654d061

Fix conv2d bf16 crash on T4: colab_train_iris.py

Files changed (1) hide show

colab_train_iris.py CHANGED Viewed

@@ -72,14 +72,25 @@ import gc
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 if device.type == "cuda":
     gpu_name = torch.cuda.get_device_name(0)
-    gpu_mem = torch.cuda.get_device_properties(0).total_mem / 1e9
     print(f"GPU: {gpu_name} ({gpu_mem:.1f} GB)")
 else:
     print("WARNING: No GPU detected. Training will be very slow.")
     print("In Colab: Runtime -> Change runtime type -> T4 GPU")
 use_amp = device.type == "cuda"
-amp_dtype = torch.bfloat16 if (use_amp and torch.cuda.is_bf16_supported()) else torch.float16 if use_amp else torch.float32
 print(f"AMP dtype: {amp_dtype}")
 # ============================================================
@@ -224,7 +235,7 @@ print(f"  Core: {counts['core']:,}")
 print(f"  Decoder: {counts['tiny_decoder']:,}")
 if device.type == "cuda":
-    print(f"VRAM used: {torch.cuda.memory_allocated()/1e9:.2f} GB / {torch.cuda.get_device_properties(0).total_mem/1e9:.1f} GB")
 # ============================================================
 # CELL 9: Train!

 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 if device.type == "cuda":
     gpu_name = torch.cuda.get_device_name(0)
+    gpu_mem = torch.cuda.get_device_properties(0).total_memory / 1e9
     print(f"GPU: {gpu_name} ({gpu_mem:.1f} GB)")
 else:
     print("WARNING: No GPU detected. Training will be very slow.")
     print("In Colab: Runtime -> Change runtime type -> T4 GPU")
 use_amp = device.type == "cuda"
+# T4 (compute capability 7.5) reports bf16 supported but cuDNN conv2d kernels
+# lack bf16 engines → crashes at runtime. Force fp16 which T4 natively supports.
+if use_amp:
+    cc = torch.cuda.get_device_capability(0)
+    if cc[0] < 8:  # Ampere (8.0+) has native bf16; Turing (7.5) does not
+        amp_dtype = torch.float16
+        print(f"GPU compute capability {cc[0]}.{cc[1]} — using fp16 (bf16 conv kernels unavailable)")
+    else:
+        amp_dtype = torch.bfloat16
+        print(f"GPU compute capability {cc[0]}.{cc[1]} — using bf16")
+else:
+    amp_dtype = torch.float32
 print(f"AMP dtype: {amp_dtype}")
 # ============================================================
 print(f"  Decoder: {counts['tiny_decoder']:,}")
 if device.type == "cuda":
+    print(f"VRAM used: {torch.cuda.memory_allocated()/1e9:.2f} GB / {torch.cuda.get_device_properties(0).total_memory/1e9:.1f} GB")
 # ============================================================
 # CELL 9: Train!