diffusers not working

#6
by daniel5984 - opened

i get this error when loading

image.png

full error
```
The config attributes {'reverse_transformer_layers_per_block': [[4, 4, 10], [2, 1, 1], 1]} were passed to UNet2DConditionModel, but are not expected and will be ignored. Please verify your config.json configuration file.

TypeError Traceback (most recent call last)
Cell In[17], line 3
1 from diffusers import StableDiffusionXLPipeline
2 import torch
----> 3 pipe = StableDiffusionXLPipeline.from_pretrained("segmind/SSD-1B", torch_dtype=torch.float16, use_safetensors=True, variant="fp16")
4 pipe.to("cuda")
6 #model_path = "/notebooks/lora-trained-xl/pytorch_lora_weights.safetensors"
7 #pipe.unet.load_attn_procs(model_path)
8 #pipe.to("cuda")

File /usr/local/lib/python3.10/dist-packages/diffusers/pipelines/pipeline_utils.py:1105, in DiffusionPipeline.from_pretrained(cls, pretrained_model_name_or_path, **kwargs)
1102 loaded_sub_model = passed_class_obj[name]
1103 else:
1104 # load sub model
-> 1105 loaded_sub_model = load_sub_model(
1106 library_name=library_name,
1107 class_name=class_name,
1108 importable_classes=importable_classes,
1109 pipelines=pipelines,
1110 is_pipeline_module=is_pipeline_module,
1111 pipeline_class=pipeline_class,
1112 torch_dtype=torch_dtype,
1113 provider=provider,
1114 sess_options=sess_options,
1115 device_map=device_map,
1116 max_memory=max_memory,
1117 offload_folder=offload_folder,
1118 offload_state_dict=offload_state_dict,
1119 model_variants=model_variants,
1120 name=name,
1121 from_flax=from_flax,
1122 variant=variant,
1123 low_cpu_mem_usage=low_cpu_mem_usage,
1124 cached_folder=cached_folder,
1125 )
1126 logger.info(
1127 f"Loaded {name} as {class_name} from {name} subfolder of {pretrained_model_name_or_path}."
1128 )
1130 init_kwargs[name] = loaded_sub_model # UNet(...), # DiffusionSchedule(...)

File /usr/local/lib/python3.10/dist-packages/diffusers/pipelines/pipeline_utils.py:472, in load_sub_model(library_name, class_name, importable_classes, pipelines, is_pipeline_module, pipeline_class, torch_dtype, provider, sess_options, device_map, max_memory, offload_folder, offload_state_dict, model_variants, name, from_flax, variant, low_cpu_mem_usage, cached_folder)
470 # check if the module is in a subdirectory
471 if os.path.isdir(os.path.join(cached_folder, name)):
--> 472 loaded_sub_model = load_method(os.path.join(cached_folder, name), **loading_kwargs)
473 else:
474 # else load from the root directory
475 loaded_sub_model = load_method(cached_folder, **loading_kwargs)

File /usr/local/lib/python3.10/dist-packages/diffusers/models/modeling_utils.py:636, in ModelMixin.from_pretrained(cls, pretrained_model_name_or_path, **kwargs)
633 if low_cpu_mem_usage:
634 # Instantiate model with empty weights
635 with accelerate.init_empty_weights():
--> 636 model = cls.from_config(config, **unused_kwargs)
638 # if device_map is None, load the state dict and move the params from meta device to the cpu
639 if device_map is None:

File /usr/local/lib/python3.10/dist-packages/diffusers/configuration_utils.py:254, in ConfigMixin.from_config(cls, config, return_unused_kwargs, **kwargs)
251 init_dict[deprecated_kwarg] = unused_kwargs.pop(deprecated_kwarg)
253 # Return model and optionally state and/or unused_kwargs
--> 254 model = cls(**init_dict)
256 # make sure to also save config parameters that might be used for compatible classes
257 model.register_to_config(**hidden_dict)

File /usr/local/lib/python3.10/dist-packages/diffusers/configuration_utils.py:636, in register_to_config..inner_init(self, *args, **kwargs)
634 new_kwargs = {**config_init_kwargs, **new_kwargs}
635 getattr(self, "register_to_config")(**new_kwargs)
--> 636 init(self, *args, **init_kwargs)

File /usr/local/lib/python3.10/dist-packages/diffusers/models/unet_2d_condition.py:440, in UNet2DConditionModel.init(self, sample_size, in_channels, out_channels, center_input_sample, flip_sin_to_cos, freq_shift, down_block_types, mid_block_type, up_block_types, only_cross_attention, block_out_channels, layers_per_block, downsample_padding, mid_block_scale_factor, dropout, act_fn, norm_num_groups, norm_eps, cross_attention_dim, transformer_layers_per_block, encoder_hid_dim, encoder_hid_dim_type, attention_head_dim, num_attention_heads, dual_cross_attention, use_linear_projection, class_embed_type, addition_embed_type, addition_time_embed_dim, num_class_embeds, upcast_attention, resnet_time_scale_shift, resnet_skip_time_act, resnet_out_scale_factor, time_embedding_type, time_embedding_dim, time_embedding_act_fn, timestep_post_act, time_cond_proj_dim, conv_in_kernel, conv_out_kernel, projection_class_embeddings_input_dim, attention_type, class_embeddings_concat, mid_block_only_cross_attention, cross_attention_norm, addition_embed_type_num_heads)
437 output_channel = block_out_channels[i]
438 is_final_block = i == len(block_out_channels) - 1
--> 440 down_block = get_down_block(
441 down_block_type,
442 num_layers=layers_per_block[i],
443 transformer_layers_per_block=transformer_layers_per_block[i],
444 in_channels=input_channel,
445 out_channels=output_channel,
446 temb_channels=blocks_time_embed_dim,
447 add_downsample=not is_final_block,
448 resnet_eps=norm_eps,
449 resnet_act_fn=act_fn,
450 resnet_groups=norm_num_groups,
451 cross_attention_dim=cross_attention_dim[i],
452 num_attention_heads=num_attention_heads[i],
453 downsample_padding=downsample_padding,
454 dual_cross_attention=dual_cross_attention,
455 use_linear_projection=use_linear_projection,
456 only_cross_attention=only_cross_attention[i],
457 upcast_attention=upcast_attention,
458 resnet_time_scale_shift=resnet_time_scale_shift,
459 attention_type=attention_type,
460 resnet_skip_time_act=resnet_skip_time_act,
461 resnet_out_scale_factor=resnet_out_scale_factor,
462 cross_attention_norm=cross_attention_norm,
463 attention_head_dim=attention_head_dim[i] if attention_head_dim[i] is not None else output_channel,
464 dropout=dropout,
465 )
466 self.down_blocks.append(down_block)
468 # mid

File /usr/local/lib/python3.10/dist-packages/diffusers/models/unet_2d_blocks.py:119, in get_down_block(down_block_type, num_layers, in_channels, out_channels, temb_channels, add_downsample, resnet_eps, resnet_act_fn, transformer_layers_per_block, num_attention_heads, resnet_groups, cross_attention_dim, downsample_padding, dual_cross_attention, use_linear_projection, only_cross_attention, upcast_attention, resnet_time_scale_shift, attention_type, resnet_skip_time_act, resnet_out_scale_factor, cross_attention_norm, attention_head_dim, downsample_type, dropout)
117 if cross_attention_dim is None:
118 raise ValueError("cross_attention_dim must be specified for CrossAttnDownBlock2D")
--> 119 return CrossAttnDownBlock2D(
120 num_layers=num_layers,
121 transformer_layers_per_block=transformer_layers_per_block,
122 in_channels=in_channels,
123 out_channels=out_channels,
124 temb_channels=temb_channels,
125 dropout=dropout,
126 add_downsample=add_downsample,
127 resnet_eps=resnet_eps,
128 resnet_act_fn=resnet_act_fn,
129 resnet_groups=resnet_groups,
130 downsample_padding=downsample_padding,
131 cross_attention_dim=cross_attention_dim,
132 num_attention_heads=num_attention_heads,
133 dual_cross_attention=dual_cross_attention,
134 use_linear_projection=use_linear_projection,
135 only_cross_attention=only_cross_attention,
136 upcast_attention=upcast_attention,
137 resnet_time_scale_shift=resnet_time_scale_shift,
138 attention_type=attention_type,
139 )
140 elif down_block_type == "SimpleCrossAttnDownBlock2D":
141 if cross_attention_dim is None:

File /usr/local/lib/python3.10/dist-packages/diffusers/models/unet_2d_blocks.py:1001, in CrossAttnDownBlock2D.init(self, in_channels, out_channels, temb_channels, dropout, num_layers, transformer_layers_per_block, resnet_eps, resnet_time_scale_shift, resnet_act_fn, resnet_groups, resnet_pre_norm, num_attention_heads, cross_attention_dim, output_scale_factor, downsample_padding, add_downsample, dual_cross_attention, use_linear_projection, only_cross_attention, upcast_attention, attention_type)
985 resnets.append(
986 ResnetBlock2D(
987 in_channels=in_channels,
(...)
997 )
998 )
999 if not dual_cross_attention:
1000 attentions.append(
-> 1001 Transformer2DModel(
1002 num_attention_heads,
1003 out_channels // num_attention_heads,
1004 in_channels=out_channels,
1005 num_layers=transformer_layers_per_block,
1006 cross_attention_dim=cross_attention_dim,
1007 norm_num_groups=resnet_groups,
1008 use_linear_projection=use_linear_projection,
1009 only_cross_attention=only_cross_attention,
1010 upcast_attention=upcast_attention,
1011 attention_type=attention_type,
1012 )
1013 )
1014 else:
1015 attentions.append(
1016 DualTransformer2DModel(
1017 num_attention_heads,
(...)
1023 )
1024 )

File /usr/local/lib/python3.10/dist-packages/diffusers/configuration_utils.py:636, in register_to_config..inner_init(self, *args, **kwargs)
634 new_kwargs = {**config_init_kwargs, **new_kwargs}
635 getattr(self, "register_to_config")(**new_kwargs)
--> 636 init(self, *args, **init_kwargs)

File /usr/local/lib/python3.10/dist-packages/diffusers/models/transformer_2d.py:191, in Transformer2DModel.init(self, num_attention_heads, attention_head_dim, in_channels, out_channels, num_layers, dropout, norm_num_groups, cross_attention_dim, attention_bias, sample_size, num_vector_embeds, patch_size, activation_fn, num_embeds_ada_norm, use_linear_projection, only_cross_attention, double_self_attention, upcast_attention, norm_type, norm_elementwise_affine, attention_type)
164 self.pos_embed = PatchEmbed(
165 height=sample_size,
166 width=sample_size,
(...)
169 embed_dim=inner_dim,
170 )
172 # 3. Define transformers blocks
173 self.transformer_blocks = nn.ModuleList(
174 [
175 BasicTransformerBlock(
176 inner_dim,
177 num_attention_heads,
178 attention_head_dim,
179 dropout=dropout,
180 cross_attention_dim=cross_attention_dim,
181 activation_fn=activation_fn,
182 num_embeds_ada_norm=num_embeds_ada_norm,
183 attention_bias=attention_bias,
184 only_cross_attention=only_cross_attention,
185 double_self_attention=double_self_attention,
186 upcast_attention=upcast_attention,
187 norm_type=norm_type,
188 norm_elementwise_affine=norm_elementwise_affine,
189 attention_type=attention_type,
190 )
--> 191 for d in range(num_layers)
192 ]
193 )
195 # 4. Define output layers
196 self.out_channels = in_channels if out_channels is None else out_channels

TypeError: 'list' object cannot be interpreted as an integer
```

must use pip install git+https://github.com/huggingface/diffusers
cannot use pip install diffusers

Segmind org

Like danielthx says, you must use a diffusers source install.

pip install git+https://github.com/huggingface/diffusers
Icar changed discussion status to closed

Sign up or log in to comment