nostalgebraist commited on
Commit
41cc149
1 Parent(s): c0a92b9

un-tar'd version of nostalgebraist/nostalgebraist-autoresponder-diffusion-captions

Browse files
meta.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "checkpoint_64": "logdir_frank_64_capts_finalattn/ema_0.9999_412900.pt",
3
+ "checkpoint_128": "logdir_frank_64_128_capts/ema_0.9999_190300.pt",
4
+ "checkpoint_256": "logdir_sres2_256_beyond_noise_cond/ema_0.9999_077100.pt",
5
+ "checkpoint_512": "logdir_sres_2x_august_2022_384/ema_0.9999_046400.pt",
6
+ "tested_on_commit": "a172f4b"
7
+ }
nostalgebraist-autoresponder-diffusion/config_sres1.json ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "is_super_res": false,
3
+ "tokenizer_config": {
4
+ "max_seq_len": 384,
5
+ "char_level": true,
6
+ "legacy_padding_behavior": false
7
+ },
8
+ "image_size": 64,
9
+ "num_channels": 256,
10
+ "num_res_blocks": 2,
11
+ "num_heads": 16,
12
+ "num_heads_upsample": -1,
13
+ "attention_resolutions": "4,8,16",
14
+ "dropout": 0.0,
15
+ "learn_sigma": true,
16
+ "sigma_small": false,
17
+ "class_cond": false,
18
+ "diffusion_steps": 1000,
19
+ "noise_schedule": "cosine",
20
+ "timestep_respacing": "",
21
+ "use_kl": false,
22
+ "predict_xstart": false,
23
+ "rescale_timesteps": false,
24
+ "rescale_learned_sigmas": false,
25
+ "resblock_updown": true,
26
+ "use_checkpoint": false,
27
+ "use_scale_shift_norm": true,
28
+ "channels_per_head": 64,
29
+ "channels_per_head_upsample": -1,
30
+ "channel_mult": "1,2,2,4,4",
31
+ "use_checkpoint_down": false,
32
+ "use_checkpoint_middle": false,
33
+ "use_checkpoint_up": false,
34
+ "txt": true,
35
+ "txt_dim": 512,
36
+ "txt_depth": 4,
37
+ "max_seq_len": 384,
38
+ "txt_resolutions": "8,16,32",
39
+ "cross_attn_channels_per_head": 128,
40
+ "cross_attn_init_gain": 1.0,
41
+ "cross_attn_gain_scale": 1.0,
42
+ "text_lr_mult": null,
43
+ "txt_output_layers_only": true,
44
+ "monochrome": false,
45
+ "monochrome_adapter": false,
46
+ "txt_attn_before_attn": false,
47
+ "txt_avoid_groupnorm": false,
48
+ "cross_attn_orth_init": true,
49
+ "cross_attn_q_t_emb": true,
50
+ "txt_rezero": false,
51
+ "cross_attn_rezero": false,
52
+ "cross_attn_rezero_keeps_prenorm": true,
53
+ "cross_attn_use_layerscale": false,
54
+ "verbose": false,
55
+ "txt_t5": true,
56
+ "txt_rotary": false,
57
+ "rgb_adapter": false,
58
+ "weave_attn": true,
59
+ "weave_use_ff": true,
60
+ "weave_ff_rezero": false,
61
+ "weave_ff_force_prenorm": false,
62
+ "weave_ff_mult": 2,
63
+ "weave_ff_glu": false,
64
+ "weave_qkv_dim_always_text": true,
65
+ "channels_last_mem": false,
66
+ "txt_ff_glu": true,
67
+ "txt_ff_mult": 3,
68
+ "weave_v2": false,
69
+ "use_checkpoint_lowcost": false,
70
+ "weave_use_ff_gain": true,
71
+ "return_diffusion_factory": false,
72
+ "use_balanced_loss": false,
73
+ "use_v_loss": false,
74
+ "use_snr_plus_one_loss": false,
75
+ "bread_adapter_at_ds": -1,
76
+ "bread_adapter_nearest_in": false,
77
+ "bread_adapter_zero_conv_in": false,
78
+ "bread_adapter_only": false,
79
+ "expand_timestep_base_dim": 192,
80
+ "silu_impl": "fused",
81
+ "using_capt": true,
82
+ "xattn_capt": true,
83
+ "weave_capt": false,
84
+ "glide_style_capt_attn": true,
85
+ "glide_style_capt_emb": false,
86
+ "glide_style_capt_emb_init_scale": 0.1,
87
+ "glide_style_capt_emb_nonlin": false,
88
+ "clipname": "ViT-L/14@336px",
89
+ "clip_use_penultimate_layer": true,
90
+ "use_checkpoint_below_res": -1,
91
+ "vb_loss_ratio": 1000.0,
92
+ "no_attn": false,
93
+ "no_attn_substitute_resblock": false,
94
+ "freeze_capt_encoder": true,
95
+ "clipmod": null,
96
+ "post_txt_image_attn": "final_res",
97
+ "txt_groupnorm_1group": false
98
+ }
nostalgebraist-autoresponder-diffusion/config_sres1p5.json ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "is_super_res": true,
3
+ "tokenizer_config": {
4
+ "max_seq_len": 384,
5
+ "char_level": true,
6
+ "legacy_padding_behavior": false
7
+ },
8
+ "num_channels": 256,
9
+ "num_res_blocks": 2,
10
+ "num_heads": 16,
11
+ "num_heads_upsample": -1,
12
+ "attention_resolutions": "8,16,32",
13
+ "dropout": 0.0,
14
+ "learn_sigma": true,
15
+ "class_cond": false,
16
+ "diffusion_steps": 1000,
17
+ "noise_schedule": "linear",
18
+ "timestep_respacing": "",
19
+ "use_kl": false,
20
+ "predict_xstart": false,
21
+ "rescale_timesteps": false,
22
+ "rescale_learned_sigmas": false,
23
+ "resblock_updown": true,
24
+ "use_checkpoint": false,
25
+ "use_scale_shift_norm": true,
26
+ "channels_per_head": 64,
27
+ "channels_per_head_upsample": -1,
28
+ "channel_mult": "1,2,2,4,4",
29
+ "use_checkpoint_down": false,
30
+ "use_checkpoint_middle": false,
31
+ "use_checkpoint_up": false,
32
+ "txt": true,
33
+ "txt_dim": 512,
34
+ "txt_depth": 4,
35
+ "max_seq_len": 384,
36
+ "txt_resolutions": "8,16,32",
37
+ "cross_attn_channels_per_head": 128,
38
+ "cross_attn_init_gain": 1.0,
39
+ "cross_attn_gain_scale": 1.0,
40
+ "text_lr_mult": -1.0,
41
+ "txt_output_layers_only": true,
42
+ "monochrome": false,
43
+ "monochrome_adapter": false,
44
+ "txt_attn_before_attn": false,
45
+ "txt_avoid_groupnorm": false,
46
+ "cross_attn_orth_init": true,
47
+ "cross_attn_q_t_emb": true,
48
+ "txt_rezero": false,
49
+ "cross_attn_rezero": false,
50
+ "cross_attn_rezero_keeps_prenorm": true,
51
+ "cross_attn_use_layerscale": false,
52
+ "verbose": false,
53
+ "txt_t5": true,
54
+ "txt_rotary": false,
55
+ "rgb_adapter": false,
56
+ "weave_attn": true,
57
+ "weave_use_ff": true,
58
+ "weave_ff_rezero": false,
59
+ "weave_ff_force_prenorm": false,
60
+ "weave_ff_mult": 2,
61
+ "weave_ff_glu": false,
62
+ "weave_qkv_dim_always_text": true,
63
+ "channels_last_mem": false,
64
+ "txt_ff_glu": true,
65
+ "txt_ff_mult": 3,
66
+ "weave_v2": false,
67
+ "use_checkpoint_lowcost": false,
68
+ "weave_use_ff_gain": true,
69
+ "return_diffusion_factory": false,
70
+ "use_balanced_loss": false,
71
+ "use_v_loss": false,
72
+ "use_snr_plus_one_loss": false,
73
+ "expand_timestep_base_dim": 192,
74
+ "silu_impl": "fused",
75
+ "using_capt": true,
76
+ "xattn_capt": true,
77
+ "weave_capt": false,
78
+ "glide_style_capt_attn": true,
79
+ "glide_style_capt_emb": false,
80
+ "glide_style_capt_emb_init_scale": 0.1,
81
+ "glide_style_capt_emb_nonlin": false,
82
+ "clipname": "ViT-L/14@336px",
83
+ "clip_use_penultimate_layer": true,
84
+ "use_checkpoint_below_res": -1,
85
+ "vb_loss_ratio": 1000.0,
86
+ "no_attn": false,
87
+ "no_attn_substitute_resblock": false,
88
+ "freeze_capt_encoder": true,
89
+ "clipmod": null,
90
+ "post_txt_image_attn": "none",
91
+ "txt_groupnorm_1group": false,
92
+ "large_size": 128,
93
+ "small_size": 64,
94
+ "colorize": false,
95
+ "up_interp_mode": "bilinear",
96
+ "noise_cond": true
97
+ }
nostalgebraist-autoresponder-diffusion/config_sres2.json ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "is_super_res": true,
3
+ "tokenizer_config": {
4
+ "max_seq_len": 384,
5
+ "char_level": true,
6
+ "legacy_padding_behavior": false
7
+ },
8
+ "num_channels": 192,
9
+ "num_res_blocks": 2,
10
+ "num_heads": 12,
11
+ "num_heads_upsample": -1,
12
+ "attention_resolutions": "8,16,32",
13
+ "dropout": 0.0,
14
+ "learn_sigma": true,
15
+ "class_cond": false,
16
+ "diffusion_steps": 1000,
17
+ "noise_schedule": "linear",
18
+ "timestep_respacing": "",
19
+ "use_kl": false,
20
+ "predict_xstart": false,
21
+ "rescale_timesteps": false,
22
+ "rescale_learned_sigmas": false,
23
+ "resblock_updown": true,
24
+ "use_checkpoint": false,
25
+ "use_scale_shift_norm": true,
26
+ "channels_per_head": 64,
27
+ "channels_per_head_upsample": -1,
28
+ "channel_mult": "1,1,2,2,4,4",
29
+ "use_checkpoint_down": false,
30
+ "use_checkpoint_middle": false,
31
+ "use_checkpoint_up": false,
32
+ "txt": true,
33
+ "txt_dim": 512,
34
+ "txt_depth": 4,
35
+ "max_seq_len": 384,
36
+ "txt_resolutions": "8,16,32",
37
+ "cross_attn_channels_per_head": 128,
38
+ "cross_attn_init_gain": 1.0,
39
+ "cross_attn_gain_scale": 1.0,
40
+ "text_lr_mult": -1.0,
41
+ "txt_output_layers_only": true,
42
+ "monochrome": false,
43
+ "monochrome_adapter": false,
44
+ "txt_attn_before_attn": false,
45
+ "txt_avoid_groupnorm": false,
46
+ "cross_attn_orth_init": true,
47
+ "cross_attn_q_t_emb": true,
48
+ "txt_rezero": false,
49
+ "cross_attn_rezero": false,
50
+ "cross_attn_rezero_keeps_prenorm": true,
51
+ "cross_attn_use_layerscale": false,
52
+ "verbose": false,
53
+ "txt_t5": true,
54
+ "txt_rotary": false,
55
+ "rgb_adapter": false,
56
+ "weave_attn": true,
57
+ "weave_use_ff": true,
58
+ "weave_ff_rezero": false,
59
+ "weave_ff_force_prenorm": false,
60
+ "weave_ff_mult": 2,
61
+ "weave_ff_glu": false,
62
+ "weave_qkv_dim_always_text": true,
63
+ "channels_last_mem": false,
64
+ "txt_ff_glu": true,
65
+ "txt_ff_mult": 3,
66
+ "weave_v2": false,
67
+ "use_checkpoint_lowcost": false,
68
+ "weave_use_ff_gain": true,
69
+ "return_diffusion_factory": false,
70
+ "use_balanced_loss": false,
71
+ "use_v_loss": false,
72
+ "use_snr_plus_one_loss": false,
73
+ "expand_timestep_base_dim": -1,
74
+ "silu_impl": "fused",
75
+ "using_capt": false,
76
+ "xattn_capt": true,
77
+ "weave_capt": false,
78
+ "glide_style_capt_attn": false,
79
+ "glide_style_capt_emb": false,
80
+ "glide_style_capt_emb_init_scale": 0.1,
81
+ "glide_style_capt_emb_nonlin": false,
82
+ "clipname": "RN50",
83
+ "clip_use_penultimate_layer": false,
84
+ "use_checkpoint_below_res": -1,
85
+ "vb_loss_ratio": 1000.0,
86
+ "no_attn": false,
87
+ "no_attn_substitute_resblock": false,
88
+ "freeze_capt_encoder": false,
89
+ "clipmod": null,
90
+ "post_txt_image_attn": "none",
91
+ "txt_groupnorm_1group": false,
92
+ "large_size": 256,
93
+ "small_size": 128,
94
+ "colorize": false,
95
+ "up_interp_mode": "bilinear",
96
+ "noise_cond": true
97
+ }
nostalgebraist-autoresponder-diffusion/config_sres3.json ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "is_super_res": true,
3
+ "tokenizer_config": {
4
+ "max_seq_len": 64,
5
+ "char_level": false,
6
+ "legacy_padding_behavior": true
7
+ },
8
+ "num_channels": 192,
9
+ "num_res_blocks": 2,
10
+ "num_heads": 12,
11
+ "num_heads_upsample": -1,
12
+ "attention_resolutions": "64",
13
+ "dropout": 0.0,
14
+ "learn_sigma": true,
15
+ "class_cond": false,
16
+ "diffusion_steps": 1000,
17
+ "noise_schedule": "linear",
18
+ "timestep_respacing": "",
19
+ "use_kl": false,
20
+ "predict_xstart": false,
21
+ "rescale_timesteps": false,
22
+ "rescale_learned_sigmas": false,
23
+ "resblock_updown": true,
24
+ "use_checkpoint": false,
25
+ "use_scale_shift_norm": true,
26
+ "channels_per_head": 64,
27
+ "channels_per_head_upsample": -1,
28
+ "channel_mult": "1,1,2,2,4,4",
29
+ "use_checkpoint_down": false,
30
+ "use_checkpoint_middle": false,
31
+ "use_checkpoint_up": false,
32
+ "txt": false,
33
+ "txt_dim": 128,
34
+ "txt_depth": 2,
35
+ "max_seq_len": 64,
36
+ "txt_resolutions": "8",
37
+ "cross_attn_channels_per_head": -1,
38
+ "cross_attn_init_gain": 1.0,
39
+ "cross_attn_gain_scale": 200.0,
40
+ "text_lr_mult": -1.0,
41
+ "txt_output_layers_only": false,
42
+ "monochrome": false,
43
+ "monochrome_adapter": false,
44
+ "txt_attn_before_attn": false,
45
+ "txt_avoid_groupnorm": false,
46
+ "cross_attn_orth_init": false,
47
+ "cross_attn_q_t_emb": false,
48
+ "txt_rezero": false,
49
+ "cross_attn_rezero": false,
50
+ "cross_attn_rezero_keeps_prenorm": false,
51
+ "cross_attn_use_layerscale": false,
52
+ "verbose": false,
53
+ "txt_t5": false,
54
+ "txt_rotary": false,
55
+ "rgb_adapter": false,
56
+ "weave_attn": false,
57
+ "weave_use_ff": true,
58
+ "weave_ff_rezero": true,
59
+ "weave_ff_force_prenorm": false,
60
+ "weave_ff_mult": 4,
61
+ "weave_ff_glu": false,
62
+ "weave_qkv_dim_always_text": false,
63
+ "channels_last_mem": false,
64
+ "txt_ff_glu": false,
65
+ "txt_ff_mult": 4,
66
+ "weave_v2": false,
67
+ "use_checkpoint_lowcost": false,
68
+ "weave_use_ff_gain": false,
69
+ "return_diffusion_factory": false,
70
+ "use_balanced_loss": false,
71
+ "use_v_loss": false,
72
+ "use_snr_plus_one_loss": false,
73
+ "expand_timestep_base_dim": -1,
74
+ "silu_impl": "fused",
75
+ "using_capt": false,
76
+ "xattn_capt": true,
77
+ "weave_capt": false,
78
+ "glide_style_capt_attn": false,
79
+ "glide_style_capt_emb": false,
80
+ "glide_style_capt_emb_init_scale": 0.1,
81
+ "glide_style_capt_emb_nonlin": false,
82
+ "clipname": "RN50",
83
+ "clip_use_penultimate_layer": false,
84
+ "use_checkpoint_below_res": -1,
85
+ "vb_loss_ratio": 1000.0,
86
+ "no_attn": true,
87
+ "no_attn_substitute_resblock": false,
88
+ "freeze_capt_encoder": false,
89
+ "clipmod": null,
90
+ "post_txt_image_attn": "none",
91
+ "txt_groupnorm_1group": true,
92
+ "large_size": 512,
93
+ "small_size": 256,
94
+ "colorize": false,
95
+ "up_interp_mode": "bilinear",
96
+ "noise_cond": true
97
+ }
nostalgebraist-autoresponder-diffusion/sres1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1b046d556059ad950995bd9953b822b793cb400b1c44448e02fd53c2b172c46
3
+ size 2538552032
nostalgebraist-autoresponder-diffusion/sres1p5.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04ca499098183835a5d7daa2347d1e1dd0ca512b04d6728da6123767809f506b
3
+ size 2567556026
nostalgebraist-autoresponder-diffusion/sres2.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:022f83346b28bf8f6ad1fe32a59ddbe81dd5eaf614f1f55ee6ad0f00dd99ae20
3
+ size 1498473747
nostalgebraist-autoresponder-diffusion/sres3.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be7b0156fbf63cc98f7d5ea58050162c4a19d55ed09defdaaeaa0c504af6f547
3
+ size 1131415821