drscotthawley commited on
Commit
d19a04f
1 Parent(s): 1dd43bb

almost runs

Browse files
app.py CHANGED
@@ -28,7 +28,7 @@ import k_diffusion as K
28
  from pom.pianoroll import regroup_lines, img_file_2_midi_file, square_to_rect, rect_to_square
29
  from pom.square_to_rect import square_to_rect
30
 
31
-
32
 
33
  def infer_mask_from_init_img(img, mask_with='grey'):
34
  "note, this works whether image is normalized on 0..1 or -1..1, but not 0..255"
@@ -103,14 +103,11 @@ def process_image(image, repaint, busyness):
103
  bs = num
104
  repaint = repaint
105
  seed_scale = 1.0
106
- DEVICES = 'CUDA_VISIBLE_DEVICES=3'
107
- USER = 'shawley'
108
- RUN_HOME = f'/runs/{USER}/k-diffusion/pop909/full_chords'
109
- CKPT = f'{RUN_HOME}/256_chords_00130000.pth'
110
  PREFIX = 'gradiodemo'
111
  # !echo {DEVICES} {CT_HOME} {CKPT} {PREFIX} {masked_img_file}
112
  print("Reading init image from ", masked_img_file,", repaint = ",repaint)
113
- cmd = f'/home/shawley/envs/hs/bin/python {CT_HOME}/sample.py --batch-size {bs} --checkpoint {CKPT} --config {CT_HOME}/configs/config_pop909_256x256_chords.json -n {num} --prefix {PREFIX} --init-image {masked_img_file} --steps=100 --repaint={repaint}'
114
  print("Will run command: ", cmd)
115
  args = cmd.split(' ')
116
  #call(cmd, shell=True)
 
28
  from pom.pianoroll import regroup_lines, img_file_2_midi_file, square_to_rect, rect_to_square
29
  from pom.square_to_rect import square_to_rect
30
 
31
+ CT_HOME = '.'
32
 
33
  def infer_mask_from_init_img(img, mask_with='grey'):
34
  "note, this works whether image is normalized on 0..1 or -1..1, but not 0..255"
 
103
  bs = num
104
  repaint = repaint
105
  seed_scale = 1.0
106
+ CKPT = f'ckpt/256_chords_00130000.pth'
 
 
 
107
  PREFIX = 'gradiodemo'
108
  # !echo {DEVICES} {CT_HOME} {CKPT} {PREFIX} {masked_img_file}
109
  print("Reading init image from ", masked_img_file,", repaint = ",repaint)
110
+ cmd = f'python {CT_HOME}/sample.py --batch-size {bs} --checkpoint {CKPT} --config {CT_HOME}/configs/config_pop909_256x256_chords.json -n {num} --prefix {PREFIX} --init-image {masked_img_file} --steps=100 --repaint={repaint}'
111
  print("Will run command: ", cmd)
112
  args = cmd.split(' ')
113
  #call(cmd, shell=True)
configs/config_pop909_256x256_chords.json ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "type": "image_transformer_v2",
4
+ "input_channels": 3,
5
+ "input_size": [256, 256],
6
+ "patch_size": [4, 4],
7
+ "depths": [2, 2, 4],
8
+ "widths": [128, 256, 512],
9
+ "self_attns": [
10
+ {"type": "neighborhood", "d_head": 64, "kernel_size": 7},
11
+ {"type": "neighborhood", "d_head": 64, "kernel_size": 7},
12
+ {"type": "global", "d_head": 64}
13
+ ],
14
+ "loss_config": "karras",
15
+ "loss_weighting": "soft-min-snr",
16
+ "dropout_rate": [0.0, 0.0, 0.1],
17
+ "mapping_dropout_rate": 0.0,
18
+ "augment_prob": 0.0,
19
+ "vert_trans_prob": 0.0,
20
+ "sigma_data": 0.5,
21
+ "sigma_min": 1e-2,
22
+ "sigma_max": 160,
23
+ "sigma_sample_density": {
24
+ "type": "cosine-interpolated"
25
+ }
26
+ },
27
+ "dataset": {
28
+ "type": "midi_pianoroll",
29
+ "location": "/data/POP909-Dataset/images_128_rg_chords_TOTAL",
30
+ "crop_size": [128, 512]
31
+ },
32
+ "optimizer": {
33
+ "type": "adamw",
34
+ "lr": 5e-4,
35
+ "betas": [0.9, 0.95],
36
+ "eps": 1e-8,
37
+ "weight_decay": 1e-3
38
+ },
39
+ "lr_sched": {
40
+ "type": "constant",
41
+ "warmup": 0.0
42
+ },
43
+ "ema_sched": {
44
+ "type": "inverse",
45
+ "power": 0.75,
46
+ "max_value": 0.9999
47
+ }
48
+ }