Spaces:

drscotthawley
/

PicturesOfMIDI

Running on Zero

drscotthawley commited on Jun 28, 2024

Commit

d19a04f

1 Parent(s): 1dd43bb

almost runs

Files changed (2) hide show

app.py CHANGED Viewed

@@ -28,7 +28,7 @@ import k_diffusion as K
 from pom.pianoroll import regroup_lines, img_file_2_midi_file, square_to_rect, rect_to_square
 from pom.square_to_rect import square_to_rect
 def infer_mask_from_init_img(img, mask_with='grey'):
     "note, this works whether image is normalized on 0..1 or -1..1, but not 0..255"
@@ -103,14 +103,11 @@ def process_image(image, repaint, busyness):
     bs = num
     repaint = repaint
     seed_scale = 1.0
-    DEVICES = 'CUDA_VISIBLE_DEVICES=3'
-    USER = 'shawley'
-    RUN_HOME = f'/runs/{USER}/k-diffusion/pop909/full_chords'
-    CKPT = f'{RUN_HOME}/256_chords_00130000.pth'
     PREFIX = 'gradiodemo'
     # !echo {DEVICES} {CT_HOME} {CKPT} {PREFIX} {masked_img_file}
     print("Reading init image from ", masked_img_file,", repaint = ",repaint)
-    cmd = f'/home/shawley/envs/hs/bin/python {CT_HOME}/sample.py --batch-size {bs} --checkpoint {CKPT} --config {CT_HOME}/configs/config_pop909_256x256_chords.json -n {num} --prefix {PREFIX} --init-image {masked_img_file} --steps=100 --repaint={repaint}'
     print("Will run command: ", cmd)
     args = cmd.split(' ')
     #call(cmd, shell=True)

 from pom.pianoroll import regroup_lines, img_file_2_midi_file, square_to_rect, rect_to_square
 from pom.square_to_rect import square_to_rect
+CT_HOME = '.'
 def infer_mask_from_init_img(img, mask_with='grey'):
     "note, this works whether image is normalized on 0..1 or -1..1, but not 0..255"
     bs = num
     repaint = repaint
     seed_scale = 1.0
+    CKPT = f'ckpt/256_chords_00130000.pth'
     PREFIX = 'gradiodemo'
     # !echo {DEVICES} {CT_HOME} {CKPT} {PREFIX} {masked_img_file}
     print("Reading init image from ", masked_img_file,", repaint = ",repaint)
+    cmd = f'python {CT_HOME}/sample.py --batch-size {bs} --checkpoint {CKPT} --config {CT_HOME}/configs/config_pop909_256x256_chords.json -n {num} --prefix {PREFIX} --init-image {masked_img_file} --steps=100 --repaint={repaint}'
     print("Will run command: ", cmd)
     args = cmd.split(' ')
     #call(cmd, shell=True)

configs/config_pop909_256x256_chords.json ADDED Viewed

+{
+    "model": {
+        "type": "image_transformer_v2",
+        "input_channels": 3,
+        "input_size": [256, 256],
+        "patch_size": [4, 4],
+        "depths": [2, 2, 4],
+        "widths": [128, 256, 512],
+        "self_attns": [
+            {"type": "neighborhood", "d_head": 64, "kernel_size": 7},
+            {"type": "neighborhood", "d_head": 64, "kernel_size": 7},
+            {"type": "global", "d_head": 64}
+        ],
+        "loss_config": "karras",
+        "loss_weighting": "soft-min-snr",
+        "dropout_rate": [0.0, 0.0, 0.1],
+        "mapping_dropout_rate": 0.0,
+        "augment_prob": 0.0,
+        "vert_trans_prob": 0.0,
+        "sigma_data": 0.5,
+        "sigma_min": 1e-2,
+        "sigma_max": 160,
+        "sigma_sample_density": {
+            "type": "cosine-interpolated"
+        }
+    },
+    "dataset": {
+        "type": "midi_pianoroll",
+        "location": "/data/POP909-Dataset/images_128_rg_chords_TOTAL",
+        "crop_size": [128, 512]
+    },
+    "optimizer": {
+        "type": "adamw",
+        "lr": 5e-4,
+        "betas": [0.9, 0.95],
+        "eps": 1e-8,
+        "weight_decay": 1e-3
+    },
+    "lr_sched": {
+        "type": "constant",
+        "warmup": 0.0
+    },
+    "ema_sched": {
+        "type": "inverse",
+        "power": 0.75,
+        "max_value": 0.9999
+    }
+}