Felixstro-dev commited on
Commit
f13dc80
ยท
verified ยท
1 Parent(s): 3722d89

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +203 -198
app.py CHANGED
@@ -3,18 +3,13 @@ Minecraft Skin Generator โ€“ HuggingFace Spaces Demo
3
  ====================================================
4
  Lรคdt model.pt (EMA-Gewichte) aus dem Repo und generiert Skins per Prompt.
5
  Benรถtigte Dateien im Space-Repo:
6
- app.py โ† diese Datei
7
- model.pt โ† exportiertes Modell (via Option 6 โ†’ "Modell exportieren")
8
  requirements.txt
9
-
10
- requirements.txt Inhalt:
11
- torch
12
- gradio
13
- Pillow
14
- numpy
15
  """
16
 
17
  import math
 
18
  import random
19
  import numpy as np
20
  import gradio as gr
@@ -23,7 +18,7 @@ import torch.nn as nn
23
  import torch.nn.functional as F
24
  from PIL import Image
25
 
26
- # โ”€โ”€โ”€ Konstanten (MรœSSEN exakt mit train_diffusion.py รผbereinstimmen) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
27
  IMG_SIZE = 64
28
  CHANNELS = 4
29
  EMBED_DIM = 256
@@ -33,18 +28,12 @@ BETA_END = 0.02
33
 
34
  # โ”€โ”€โ”€ Tags (identisch mit Training) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
35
  BEREICHE = ["head","body","arm_l","arm_r","leg_l","leg_r"]
36
- FARBEN = ["orange","red","blue","green","cyan","yellow","pink","purple",
37
- "black","white","gray","brown","beige"]
38
  HELL = ["bright","medium","dark"]
39
- KLEIDUNG = ["hoodie","shirt","tshirt","jacket","coat","armor","robe","suit",
40
- "dress","cape","vest","sweater","uniform","casual","formal",
41
- "jeans","pants","shorts","skirt"]
42
- STIL = ["player_skin","mob_skin","zombie","enderman","skeleton_like",
43
- "custom","unknown","fantasy","modern","medieval","sci_fi",
44
- "ninja","pirate","wizard","knight","archer","mage"]
45
  HAUTTONE = ["skin_light","skin_medium","skin_dark","skin_pale","skin_tan"]
46
- ACCESSOIRES = ["hat","helmet","crown","glasses","beard","hair_long","hair_short",
47
- "wings","tail","horns","mask"]
48
 
49
  ALL_TAGS = []
50
  for b in BEREICHE:
@@ -66,32 +55,29 @@ PROMPT_KEYWORDS = {
66
  "red":"red","blue":"blue","green":"green","yellow":"yellow","cyan":"cyan",
67
  "pink":"pink","purple":"purple","black":"black","white":"white",
68
  "gray":"gray","grey":"gray","brown":"brown",
69
- "hell":"bright","bright":"bright","dunkel":"dark","dark":"dark",
70
- "mittel":"medium","medium":"medium",
71
- "zombie":"zombie","enderman":"enderman","skelett":"skeleton_like",
72
- "skeleton":"skeleton_like","armor":"armor","player":"player_skin","custom":"custom",
73
- "hoodie":"hoodie","hemd":"shirt","shirt":"shirt","tshirt":"tshirt",
74
- "jacke":"jacket","jacket":"jacket","mantel":"coat","coat":"coat",
75
- "robe":"robe","anzug":"suit","suit":"suit","kleid":"dress","dress":"dress",
76
- "umhang":"cape","cape":"cape","weste":"vest","vest":"vest",
77
- "pullover":"sweater","sweater":"sweater","uniform":"uniform",
78
- "casual":"casual","formal":"formal","jeans":"jeans","hose":"pants",
79
- "pants":"pants","shorts":"shorts","skirt":"skirt",
80
  "fantasy":"fantasy","modern":"modern","medieval":"medieval",
81
  "scifi":"sci_fi","ninja":"ninja","pirate":"pirate",
82
  "wizard":"wizard","knight":"knight","archer":"archer","mage":"mage",
83
  "pale":"skin_pale","tan":"skin_tan",
84
- "hat":"hat","helmet":"helmet","crown":"crown","glasses":"glasses",
85
- "beard":"beard","wings":"wings","horns":"horns","mask":"mask",
 
86
  }
87
  _COLOR_BODY_PARTS = {
88
- "hoodie": ["body","arm_l","arm_r"],
89
- "shirt": ["body"], "tshirt": ["body"],
90
- "jacket": ["body","arm_l","arm_r"],
91
- "coat": ["body","arm_l","arm_r"],
92
- "jeans": ["leg_l","leg_r"], "pants": ["leg_l","leg_r"],
93
- "shorts": ["leg_l","leg_r"], "skirt": ["leg_l","leg_r"],
94
- "default": ["head","body","arm_l","arm_r","leg_l","leg_r"],
95
  }
96
 
97
  def parse_prompt(prompt: str) -> list:
@@ -102,8 +88,7 @@ def parse_prompt(prompt: str) -> list:
102
  if resolved in FARBEN:
103
  pending_color = resolved
104
  if pending_garment is None:
105
- for b in _COLOR_BODY_PARTS["default"]:
106
- tags.add(f"{b}_{resolved}")
107
  elif resolved in KLEIDUNG:
108
  pending_garment = resolved
109
  tags.add(resolved)
@@ -126,17 +111,17 @@ def tags_to_vector(tags: list) -> torch.Tensor:
126
  if t in TAG2IDX: vec[TAG2IDX[t]] = 1.0
127
  return vec
128
 
129
- # โ”€โ”€โ”€ UV-Masken (identisch mit Training) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
130
  SKIN_REGIONS = {
131
- "head": (0, 0, 32, 16),
132
- "body": (16, 16, 40, 32),
133
- "arm_r": (40, 16, 56, 32),
134
- "leg_r": (0, 16, 16, 32),
135
- "arm_l": (32, 48, 48, 64),
136
- "leg_l": (16, 48, 32, 64),
137
  }
138
  OVERLAY_REGIONS = {
139
- "head_overlay": (32, 0, 64, 16),
140
  "body_overlay": (16, 32, 40, 48),
141
  "arm_r_overlay": (40, 32, 56, 48),
142
  "leg_r_overlay": (0, 32, 16, 48),
@@ -145,13 +130,13 @@ OVERLAY_REGIONS = {
145
  }
146
 
147
  def _build_base_mask(device):
148
- mask = torch.zeros(1,1,IMG_SIZE,IMG_SIZE,device=device)
149
  for x1,y1,x2,y2 in SKIN_REGIONS.values():
150
  mask[0,0,y1:y2,x1:x2] = 1.0
151
  return mask
152
 
153
  def _build_overlay_mask(device):
154
- mask = torch.zeros(1,1,IMG_SIZE,IMG_SIZE,device=device)
155
  for x1,y1,x2,y2 in OVERLAY_REGIONS.values():
156
  mask[0,0,y1:y2,x1:x2] = 1.0
157
  return mask
@@ -159,141 +144,142 @@ def _build_overlay_mask(device):
159
  def force_alpha_mask(img: torch.Tensor) -> torch.Tensor:
160
  base = _build_base_mask(img.device)
161
  overlay = _build_overlay_mask(img.device)
162
- outside = (1.0 - base - overlay).clamp(0,1)
163
- alpha = (base * torch.ones_like(img[:,3:4])
164
- + overlay * img[:,3:4]
165
- + outside * torch.full_like(img[:,3:4], -1.0))
166
- return torch.cat([img[:,:3], alpha], dim=1)
167
-
168
- # โ”€โ”€โ”€ Architektur (EXAKT identisch mit train_diffusion.py) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
169
 
 
170
  class SinusoidalPE(nn.Module):
171
  def __init__(self, dim):
172
  super().__init__()
173
  self.dim = dim
 
174
  def forward(self, t):
175
- device = t.device
176
- half = self.dim // 2
177
- freqs = torch.exp(-math.log(10000) * torch.arange(half, device=device) / half)
178
- args = t[:,None].float() * freqs[None]
179
  return torch.cat([args.sin(), args.cos()], dim=-1)
180
 
 
181
  class CondEmbed(nn.Module):
182
  def __init__(self, num_tags, embed_dim):
183
  super().__init__()
184
  self.net = nn.Sequential(
185
- nn.Linear(num_tags, embed_dim*2), nn.SiLU(),
186
- nn.Linear(embed_dim*2, embed_dim), nn.SiLU(),
187
  )
188
  def forward(self, c): return self.net(c)
189
 
190
- class RMSNorm(nn.Module):
191
- def __init__(self, num_channels, eps=1e-8):
192
- super().__init__()
193
- self.eps = eps
194
- # WICHTIG: Shape (1, num_channels, 1, 1) โ€“ identisch mit Training
195
- self.scale = nn.Parameter(torch.ones(1, num_channels, 1, 1))
196
- def forward(self, x):
197
- rms = x.pow(2).mean(dim=1, keepdim=True).add(self.eps).sqrt()
198
- return x / rms * self.scale
199
 
200
  class ResBlock(nn.Module):
201
- # WICHTIG: time_mlp + cond_mlp getrennt โ€“ identisch mit Training
202
- def __init__(self, in_ch, out_ch, time_dim, dropout=0.1, cond_dim=None):
203
  super().__init__()
204
- self.norm1 = RMSNorm(in_ch)
205
  self.conv1 = nn.Conv2d(in_ch, out_ch, 3, padding=1)
206
- self.norm2 = RMSNorm(out_ch)
207
  self.conv2 = nn.Conv2d(out_ch, out_ch, 3, padding=1)
208
- self.time_mlp = nn.Sequential(nn.SiLU(), nn.Linear(time_dim, out_ch*2))
209
- self.cond_mlp = nn.Sequential(nn.SiLU(), nn.Linear(cond_dim if cond_dim else time_dim, out_ch*2))
210
  self.skip = nn.Conv2d(in_ch, out_ch, 1) if in_ch != out_ch else nn.Identity()
211
  self.dropout = nn.Dropout(dropout)
212
  self.act = nn.SiLU()
213
- def forward(self, x, t_emb, c_emb=None):
 
214
  h = self.conv1(self.act(self.norm1(x)))
215
- t = self.time_mlp(t_emb)[:,:,None,None]
216
- t_scale, t_shift = t.chunk(2, dim=1)
217
- h = self.norm2(h) * (1 + t_scale) + t_shift
218
- if c_emb is not None:
219
- c = self.cond_mlp(c_emb)[:,:,None,None]
220
- c_scale, c_shift = c.chunk(2, dim=1)
221
- h = h * (1 + c_scale) + c_shift
222
  h = self.conv2(self.dropout(self.act(h)))
223
  return h + self.skip(x)
224
 
 
225
  class AttentionBlock(nn.Module):
226
  def __init__(self, ch, heads=4):
227
  super().__init__()
228
- self.norm = RMSNorm(ch)
229
  self.attn = nn.MultiheadAttention(ch, heads, batch_first=True)
230
  self.proj = nn.Conv2d(ch, ch, 1)
231
- nn.init.zeros_(self.proj.weight)
232
- nn.init.zeros_(self.proj.bias)
233
  def forward(self, x):
234
- B,C,H,W = x.shape
235
- h = self.norm(x).view(B,C,H*W).permute(0,2,1)
236
- h,_ = self.attn(h,h,h)
237
- h = h.permute(0,2,1).view(B,C,H,W)
238
  return x + self.proj(h)
239
 
 
240
  class UNet(nn.Module):
241
  def __init__(self, channels=CHANNELS, base_ch=96, embed_dim=EMBED_DIM):
242
  super().__init__()
243
  time_dim = embed_dim * 2
244
  cond_dim = embed_dim
 
245
  self.time_pe = SinusoidalPE(embed_dim)
246
  self.time_mlp = nn.Sequential(
247
- nn.Linear(embed_dim, time_dim), nn.SiLU(),
248
- nn.Linear(time_dim, time_dim),
249
  )
250
  self.cond_emb = CondEmbed(NUM_TAGS, cond_dim)
251
- self.cond_mlp = nn.Linear(cond_dim, time_dim) # ungenutzt in forward, aber im state_dict
 
252
  ch = base_ch
253
  self.enc_in = nn.Conv2d(channels, ch, 3, padding=1)
254
- self.enc1 = ResBlock(ch, ch, time_dim, dropout=0.05, cond_dim=cond_dim)
255
- self.enc1b = ResBlock(ch, ch, time_dim, dropout=0.05, cond_dim=cond_dim)
 
256
  self.down1 = nn.Conv2d(ch, ch, 4, stride=2, padding=1)
257
- self.enc2 = ResBlock(ch, ch*2, time_dim, dropout=0.05, cond_dim=cond_dim)
258
- self.enc2b = ResBlock(ch*2, ch*2, time_dim, dropout=0.05, cond_dim=cond_dim)
 
259
  self.down2 = nn.Conv2d(ch*2, ch*2, 4, stride=2, padding=1)
260
- self.enc3 = ResBlock(ch*2, ch*4, time_dim, dropout=0.05, cond_dim=cond_dim)
261
- self.enc3b = ResBlock(ch*4, ch*4, time_dim, dropout=0.05, cond_dim=cond_dim)
 
262
  self.attn3 = AttentionBlock(ch*4)
263
  self.down3 = nn.Conv2d(ch*4, ch*4, 4, stride=2, padding=1)
264
- self.mid1 = ResBlock(ch*4, ch*4, time_dim, cond_dim=cond_dim)
 
265
  self.mid_att = AttentionBlock(ch*4)
266
- self.mid2 = ResBlock(ch*4, ch*4, time_dim, cond_dim=cond_dim)
 
267
  self.up3 = nn.ConvTranspose2d(ch*4, ch*4, 4, stride=2, padding=1)
268
- self.dec3 = ResBlock(ch*8, ch*4, time_dim, dropout=0.15, cond_dim=cond_dim)
269
- self.dec3b = ResBlock(ch*4, ch*4, time_dim, dropout=0.15, cond_dim=cond_dim)
270
  self.attn_d3 = AttentionBlock(ch*4)
 
271
  self.up2 = nn.ConvTranspose2d(ch*4, ch*2, 4, stride=2, padding=1)
272
- self.dec2 = ResBlock(ch*4, ch*2, time_dim, dropout=0.15, cond_dim=cond_dim)
273
- self.dec2b = ResBlock(ch*2, ch*2, time_dim, dropout=0.15, cond_dim=cond_dim)
 
274
  self.up1 = nn.ConvTranspose2d(ch*2, ch, 4, stride=2, padding=1)
275
- self.dec1 = ResBlock(ch*2, ch, time_dim, cond_dim=cond_dim)
276
- self.dec1b = ResBlock(ch, ch, time_dim, cond_dim=cond_dim)
277
- self.out = nn.Sequential(
278
- nn.GroupNorm(min(8,ch), ch), nn.SiLU(),
279
- nn.Conv2d(ch, channels, 3, padding=1),
280
  )
281
- nn.init.zeros_(self.out[-1].bias)
282
 
283
  def forward(self, x, t, cond):
284
  t_emb = self.time_mlp(self.time_pe(t))
285
- c_emb = self.cond_emb(cond) # cond_dim=embed_dim, direkt an ResBlocks
 
 
286
  h0 = self.enc_in(x)
287
- h1 = self.enc1b(self.enc1(h0, t_emb, c_emb), t_emb, c_emb)
288
- h2 = self.enc2b(self.enc2(self.down1(h1), t_emb, c_emb), t_emb, c_emb)
289
- h3 = self.attn3(self.enc3b(self.enc3(self.down2(h2), t_emb, c_emb), t_emb, c_emb))
290
- h = self.mid2(self.mid_att(self.mid1(self.down3(h3), t_emb, c_emb)), t_emb, c_emb)
291
- h = self.attn_d3(self.dec3b(self.dec3(torch.cat([self.up3(h), h3], 1), t_emb, c_emb), t_emb, c_emb))
292
- h = self.dec2b(self.dec2(torch.cat([self.up2(h), h2], 1), t_emb, c_emb), t_emb, c_emb)
293
- h = self.dec1b(self.dec1(torch.cat([self.up1(h), h1], 1), t_emb, c_emb), t_emb, c_emb)
 
 
294
  return self.out(h)
295
 
296
- # โ”€โ”€โ”€ Diffusion Schedule (EXAKT identisch mit train_diffusion.py) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
 
297
  class DiffusionSchedule:
298
  def __init__(self, T=T_STEPS, device="cpu"):
299
  self.T = T
@@ -303,132 +289,151 @@ class DiffusionSchedule:
303
  alphas = torch.cos(((x / T) + 0.008) / 1.008 * math.pi / 2) ** 2
304
  alphas = alphas / alphas[0]
305
  betas = (1 - alphas[1:] / alphas[:-1]).clamp(0, 0.999)
 
306
  self.betas = betas.to(device)
307
  self.alphas = 1.0 - self.betas
308
  self.alphas_cumprod = torch.cumprod(self.alphas, dim=0)
309
- self.alphas_cumprod_prev = F.pad(self.alphas_cumprod[:-1], (1,0), value=1.0)
 
 
 
310
 
311
  @torch.no_grad()
312
- def _predict_noise(self, model, x, t_idx, cond, guidance_scale):
313
  t_tensor = torch.full((x.shape[0],), t_idx, device=self.device, dtype=torch.long)
314
  null_cond = torch.zeros_like(cond)
315
- x2 = torch.cat([x, x])
316
- t2 = torch.cat([t_tensor, t_tensor])
317
- c2 = torch.cat([cond, null_cond])
318
- out = model(x2, t2, c2)
319
- noise_cond, noise_uncond = out.chunk(2)
320
- return noise_uncond + guidance_scale * (noise_cond - noise_uncond)
321
 
322
- @torch.no_grad()
323
- def ddim_step(self, model, x, t_idx, t_prev_idx, cond, guidance_scale=6.0, eta=0.0):
324
- noise_pred = self._predict_noise(model, x, t_idx, cond, guidance_scale)
325
- alpha_bar = self.alphas_cumprod[t_idx]
326
- alpha_bar_prev = self.alphas_cumprod[t_prev_idx] if t_prev_idx >= 0 else torch.ones(1, device=self.device)
327
- x0_pred = (x - (1 - alpha_bar).sqrt() * noise_pred) / alpha_bar.sqrt()
328
- x0_pred = x0_pred.clamp(-1.0, 1.0)
329
- sigma = eta * ((1 - alpha_bar_prev)/(1 - alpha_bar)).sqrt() * (1 - alpha_bar/alpha_bar_prev).sqrt()
330
- dir_xt = (1 - alpha_bar_prev - sigma**2).clamp(min=0).sqrt() * noise_pred
331
- x_prev = alpha_bar_prev.sqrt() * x0_pred + dir_xt
332
- if eta > 0 and t_prev_idx > 0:
333
- x_prev = x_prev + sigma * torch.randn_like(x)
334
- return x_prev
 
 
 
 
 
335
 
336
  @torch.no_grad()
337
- def sample(self, model, cond, n=1, steps=80, guidance_scale=6.0):
338
  model.eval()
339
  x = torch.randn(n, CHANNELS, IMG_SIZE, IMG_SIZE, device=self.device)
340
- timesteps = torch.linspace(self.T - 1, 0, steps, device=self.device).round().long()
341
- timesteps = torch.unique_consecutive(timesteps)
342
- for i in range(len(timesteps)):
343
- t_idx = int(timesteps[i].item())
344
- t_prev_idx = int(timesteps[i+1].item()) if i+1 < len(timesteps) else -1
345
- x = self.ddim_step(model, x, t_idx, t_prev_idx, cond, guidance_scale)
346
  return force_alpha_mask(x).clamp(-1, 1)
347
 
 
348
  # โ”€โ”€โ”€ Modell laden โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
349
  device = "cuda" if torch.cuda.is_available() else "cpu"
350
  print(f"Device: {device}")
351
 
352
  ckpt = torch.load("model.pt", map_location=device, weights_only=False)
353
  base_ch = ckpt.get("base_ch", 96)
354
-
355
- # EMA bevorzugen (stabiler), Fallback auf "model"
356
- sd = ckpt.get("ema") or ckpt.get("model") or ckpt
357
- # _orig_mod. Prรคfix entfernen (torch.compile Artefakt)
358
- cleaned_sd = {k.replace("_orig_mod.", ""): v for k, v in sd.items()}
359
-
360
- model_obj = UNet(base_ch=base_ch).to(device)
361
- missing, unexpected = model_obj.load_state_dict(cleaned_sd, strict=False)
362
- if missing:
363
- print(f"โš ๏ธ {len(missing)} Keys nicht geladen: {missing[:3]}")
364
- model_obj.eval()
365
- try:
366
- torch._dynamo.disable(model_obj)
367
- except Exception:
368
- pass
369
 
370
  schedule = DiffusionSchedule(device=device)
371
- n_params = sum(p.numel() for p in model_obj.parameters()) / 1e6
372
- print(f"โœ… Modell geladen: base_ch={base_ch}, {n_params:.1f}M Parameter, {len(cleaned_sd)-len(missing)}/{len(cleaned_sd)} Keys")
373
 
374
  # โ”€โ”€โ”€ Generierungs-Funktion โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
375
  def generate(prompt, num_skins, steps, guidance_scale, seed, randomize_seed):
376
  if randomize_seed:
377
  seed = random.randint(0, 2**31)
378
- torch.manual_seed(seed)
379
 
 
380
  tags = parse_prompt(prompt)
381
- tag_str = ", ".join(sorted(tags)) if tags else "โ€“"
382
- cond = tags_to_vector(tags).to(device).unsqueeze(0).repeat(num_skins, 1)
 
383
 
384
  with torch.inference_mode():
385
- imgs = schedule.sample(model_obj, cond, n=num_skins,
386
- steps=steps, guidance_scale=guidance_scale)
387
 
388
  results = []
389
  for img_t in imgs:
390
- arr = ((img_t.cpu().permute(1,2,0).numpy() + 1) * 127.5).clip(0,255).astype(np.uint8)
391
- # 8ร— Upscale (nearest-neighbor, kein Blur) fรผr Sichtbarkeit
392
  pil = Image.fromarray(arr, "RGBA").resize((512, 512), Image.NEAREST)
393
  results.append(pil)
394
 
395
- return results, f"Tags: {tag_str}", int(seed)
 
396
 
397
  # โ”€โ”€โ”€ Gradio UI โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
398
  EXAMPLES = [
399
- ["red hoodie blue jeans", 4, 80, 6.0],
400
- ["zombie", 4, 80, 7.0],
401
- ["wizard fantasy purple", 4, 80, 6.5],
402
- ["knight medieval armor", 4, 80, 6.0],
403
- ["ninja black dark", 4, 80, 7.0],
404
- ["enderman", 2, 80, 6.0],
405
  ]
406
 
407
- with gr.Blocks(title="Minecraft Skin Generator") as demo:
 
 
408
  gr.Markdown("""
409
  # ๐ŸŽฎ Minecraft Skin Generator
410
- Generiert **64ร—64 Minecraft Skins** aus einem Text-Prompt via DDPM Diffusion Model (~35M Parameter, trainiert auf ~44k Skins).
411
 
412
- **Prompts:** `red hoodie blue jeans` ยท `zombie` ยท `knight medieval armor` ยท `wizard fantasy purple` ยท `ninja black dark`
413
  """)
 
414
  with gr.Row():
415
  with gr.Column(scale=2):
416
- prompt = gr.Text(label="Prompt", placeholder="z.B. red hoodie blue jeans", lines=1)
417
- run_btn = gr.Button("๐ŸŽจ Generieren", variant="primary", size="lg")
418
- with gr.Accordion("โš™๏ธ Einstellungen", open=False):
419
- num_skins = gr.Slider(label="Anzahl Skins", minimum=1, maximum=8, step=1, value=4)
420
- steps = gr.Slider(label="Diffusion-Schritte", minimum=20, maximum=200, step=10, value=80)
421
- guidance = gr.Slider(label="Guidance Scale", minimum=1.0, maximum=15.0, step=0.5, value=6.0)
422
- seed = gr.Slider(label="Seed", minimum=0, maximum=2**31,step=1, value=42)
 
 
 
 
 
423
  rand_seed = gr.Checkbox(label="Seed zufรคllig", value=True)
424
- tag_info = gr.Text(label="Erkannte Tags", interactive=False)
425
- seed_out = gr.Number(label="Verwendeter Seed", interactive=False)
 
 
426
  with gr.Column(scale=3):
427
  gallery = gr.Gallery(
428
- label="Generierte Skins (512ร—512 hochskaliert, nearest-neighbor)",
429
- columns=4, rows=2, object_fit="contain", height=420,
 
 
 
 
430
  )
431
- gr.Examples(examples=EXAMPLES, inputs=[prompt, num_skins, steps, guidance], label="Beispiele")
 
 
 
 
 
 
432
  gr.on(
433
  triggers=[run_btn.click, prompt.submit],
434
  fn=generate,
 
3
  ====================================================
4
  Lรคdt model.pt (EMA-Gewichte) aus dem Repo und generiert Skins per Prompt.
5
  Benรถtigte Dateien im Space-Repo:
6
+ app.py โ† diese Datei
7
+ model.pt โ† dein exportiertes EMA-Modell
8
  requirements.txt
 
 
 
 
 
 
9
  """
10
 
11
  import math
12
+ import copy
13
  import random
14
  import numpy as np
15
  import gradio as gr
 
18
  import torch.nn.functional as F
19
  from PIL import Image
20
 
21
+ # โ”€โ”€โ”€ Konstanten (mรผssen exakt mit train_diffusion.py รผbereinstimmen) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
22
  IMG_SIZE = 64
23
  CHANNELS = 4
24
  EMBED_DIM = 256
 
28
 
29
  # โ”€โ”€โ”€ Tags (identisch mit Training) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
30
  BEREICHE = ["head","body","arm_l","arm_r","leg_l","leg_r"]
31
+ FARBEN = ["orange","red","blue","green","cyan","yellow","pink","purple","black","white","gray","brown","beige"]
 
32
  HELL = ["bright","medium","dark"]
33
+ KLEIDUNG = ["hoodie","shirt","tshirt","jacket","coat","armor","robe","suit","dress","cape","vest","sweater","uniform","casual","formal","jeans","pants","shorts","skirt"]
34
+ STIL = ["player_skin","mob_skin","zombie","enderman","skeleton_like","custom","unknown","fantasy","modern","medieval","sci_fi","ninja","pirate","wizard","knight","archer","mage"]
 
 
 
 
35
  HAUTTONE = ["skin_light","skin_medium","skin_dark","skin_pale","skin_tan"]
36
+ ACCESSOIRES = ["hat","helmet","crown","glasses","beard","hair_long","hair_short","wings","tail","horns","mask"]
 
37
 
38
  ALL_TAGS = []
39
  for b in BEREICHE:
 
55
  "red":"red","blue":"blue","green":"green","yellow":"yellow","cyan":"cyan",
56
  "pink":"pink","purple":"purple","black":"black","white":"white",
57
  "gray":"gray","grey":"gray","brown":"brown",
58
+ "hell":"bright","bright":"bright","dunkel":"dark","dark":"dark","mittel":"medium","medium":"medium",
59
+ "zombie":"zombie","enderman":"enderman","skelett":"skeleton_like","skeleton":"skeleton_like",
60
+ "armor":"armor","player":"player_skin","custom":"custom",
61
+ "hoodie":"hoodie","hemd":"shirt","shirt":"shirt",
62
+ "tshirt":"tshirt","jacke":"jacket","jacket":"jacket",
63
+ "mantel":"coat","coat":"coat","robe":"robe","anzug":"suit","suit":"suit",
64
+ "kleid":"dress","dress":"dress","umhang":"cape","cape":"cape",
65
+ "weste":"vest","vest":"vest","pullover":"sweater","sweater":"sweater",
66
+ "uniform":"uniform","casual":"casual","formal":"formal",
67
+ "jeans":"jeans","hose":"pants","pants":"pants","shorts":"shorts","skirt":"skirt",
 
68
  "fantasy":"fantasy","modern":"modern","medieval":"medieval",
69
  "scifi":"sci_fi","ninja":"ninja","pirate":"pirate",
70
  "wizard":"wizard","knight":"knight","archer":"archer","mage":"mage",
71
  "pale":"skin_pale","tan":"skin_tan",
72
+ "hat":"hat","helmet":"helmet","crown":"crown",
73
+ "glasses":"glasses","beard":"beard",
74
+ "wings":"wings","horns":"horns","mask":"mask",
75
  }
76
  _COLOR_BODY_PARTS = {
77
+ "hoodie":["body","arm_l","arm_r"],"shirt":["body"],"tshirt":["body"],
78
+ "jacket":["body","arm_l","arm_r"],"coat":["body","arm_l","arm_r"],
79
+ "jeans":["leg_l","leg_r"],"pants":["leg_l","leg_r"],"shorts":["leg_l","leg_r"],"skirt":["leg_l","leg_r"],
80
+ "default":["head","body","arm_l","arm_r","leg_l","leg_r"],
 
 
 
81
  }
82
 
83
  def parse_prompt(prompt: str) -> list:
 
88
  if resolved in FARBEN:
89
  pending_color = resolved
90
  if pending_garment is None:
91
+ for b in _COLOR_BODY_PARTS["default"]: tags.add(f"{b}_{resolved}")
 
92
  elif resolved in KLEIDUNG:
93
  pending_garment = resolved
94
  tags.add(resolved)
 
111
  if t in TAG2IDX: vec[TAG2IDX[t]] = 1.0
112
  return vec
113
 
114
+ # โ”€โ”€โ”€ UV-Masken โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
115
  SKIN_REGIONS = {
116
+ "head": (0, 0, 32, 16),
117
+ "body": (16, 16, 40, 32),
118
+ "arm_r": (40, 16, 56, 32),
119
+ "leg_r": (0, 16, 16, 32),
120
+ "arm_l": (32, 48, 48, 64),
121
+ "leg_l": (16, 48, 32, 64),
122
  }
123
  OVERLAY_REGIONS = {
124
+ "head_overlay": (32, 0, 64, 16),
125
  "body_overlay": (16, 32, 40, 48),
126
  "arm_r_overlay": (40, 32, 56, 48),
127
  "leg_r_overlay": (0, 32, 16, 48),
 
130
  }
131
 
132
  def _build_base_mask(device):
133
+ mask = torch.zeros(1, 1, IMG_SIZE, IMG_SIZE, device=device)
134
  for x1,y1,x2,y2 in SKIN_REGIONS.values():
135
  mask[0,0,y1:y2,x1:x2] = 1.0
136
  return mask
137
 
138
  def _build_overlay_mask(device):
139
+ mask = torch.zeros(1, 1, IMG_SIZE, IMG_SIZE, device=device)
140
  for x1,y1,x2,y2 in OVERLAY_REGIONS.values():
141
  mask[0,0,y1:y2,x1:x2] = 1.0
142
  return mask
 
144
  def force_alpha_mask(img: torch.Tensor) -> torch.Tensor:
145
  base = _build_base_mask(img.device)
146
  overlay = _build_overlay_mask(img.device)
147
+ outside = (1.0 - base - overlay).clamp(0, 1)
148
+ alpha = (
149
+ base * torch.ones_like(img[:, 3:4])
150
+ + overlay * img[:, 3:4]
151
+ + outside * torch.full_like(img[:, 3:4], -1.0)
152
+ )
153
+ return torch.cat([img[:, :3], alpha], dim=1)
154
 
155
+ # โ”€โ”€โ”€ UNet (identisch mit train_diffusion.py) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
156
  class SinusoidalPE(nn.Module):
157
  def __init__(self, dim):
158
  super().__init__()
159
  self.dim = dim
160
+
161
  def forward(self, t):
162
+ half = self.dim // 2
163
+ freqs = torch.exp(-math.log(10000) * torch.arange(half, device=t.device) / half)
164
+ args = t[:, None].float() * freqs[None]
 
165
  return torch.cat([args.sin(), args.cos()], dim=-1)
166
 
167
+
168
  class CondEmbed(nn.Module):
169
  def __init__(self, num_tags, embed_dim):
170
  super().__init__()
171
  self.net = nn.Sequential(
172
+ nn.Linear(num_tags, embed_dim * 2), nn.SiLU(),
173
+ nn.Linear(embed_dim * 2, embed_dim), nn.SiLU(),
174
  )
175
  def forward(self, c): return self.net(c)
176
 
 
 
 
 
 
 
 
 
 
177
 
178
  class ResBlock(nn.Module):
179
+ def __init__(self, in_ch, out_ch, time_dim, dropout=0.1):
 
180
  super().__init__()
181
+ self.norm1 = nn.GroupNorm(min(8, in_ch), in_ch)
182
  self.conv1 = nn.Conv2d(in_ch, out_ch, 3, padding=1)
183
+ self.norm2 = nn.GroupNorm(min(8, out_ch), out_ch)
184
  self.conv2 = nn.Conv2d(out_ch, out_ch, 3, padding=1)
185
+ self.time_mlp = nn.Sequential(nn.SiLU(), nn.Linear(time_dim, out_ch * 2))
 
186
  self.skip = nn.Conv2d(in_ch, out_ch, 1) if in_ch != out_ch else nn.Identity()
187
  self.dropout = nn.Dropout(dropout)
188
  self.act = nn.SiLU()
189
+
190
+ def forward(self, x, t_emb):
191
  h = self.conv1(self.act(self.norm1(x)))
192
+ t = self.time_mlp(t_emb)[:, :, None, None]
193
+ scale, shift = t.chunk(2, dim=1)
194
+ h = self.norm2(h) * (1 + scale) + shift
 
 
 
 
195
  h = self.conv2(self.dropout(self.act(h)))
196
  return h + self.skip(x)
197
 
198
+
199
  class AttentionBlock(nn.Module):
200
  def __init__(self, ch, heads=4):
201
  super().__init__()
202
+ self.norm = nn.GroupNorm(min(8, ch), ch)
203
  self.attn = nn.MultiheadAttention(ch, heads, batch_first=True)
204
  self.proj = nn.Conv2d(ch, ch, 1)
205
+
 
206
  def forward(self, x):
207
+ B, C, H, W = x.shape
208
+ h = self.norm(x).view(B, C, H * W).permute(0, 2, 1)
209
+ h, _ = self.attn(h, h, h)
210
+ h = h.permute(0, 2, 1).view(B, C, H, W)
211
  return x + self.proj(h)
212
 
213
+
214
  class UNet(nn.Module):
215
  def __init__(self, channels=CHANNELS, base_ch=96, embed_dim=EMBED_DIM):
216
  super().__init__()
217
  time_dim = embed_dim * 2
218
  cond_dim = embed_dim
219
+
220
  self.time_pe = SinusoidalPE(embed_dim)
221
  self.time_mlp = nn.Sequential(
222
+ nn.Linear(embed_dim, time_dim), nn.SiLU(), nn.Linear(time_dim, time_dim)
 
223
  )
224
  self.cond_emb = CondEmbed(NUM_TAGS, cond_dim)
225
+ self.cond_mlp = nn.Linear(cond_dim, time_dim)
226
+
227
  ch = base_ch
228
  self.enc_in = nn.Conv2d(channels, ch, 3, padding=1)
229
+
230
+ self.enc1 = ResBlock(ch, ch, time_dim)
231
+ self.enc1b = ResBlock(ch, ch, time_dim)
232
  self.down1 = nn.Conv2d(ch, ch, 4, stride=2, padding=1)
233
+
234
+ self.enc2 = ResBlock(ch, ch*2, time_dim)
235
+ self.enc2b = ResBlock(ch*2, ch*2, time_dim)
236
  self.down2 = nn.Conv2d(ch*2, ch*2, 4, stride=2, padding=1)
237
+
238
+ self.enc3 = ResBlock(ch*2, ch*4, time_dim)
239
+ self.enc3b = ResBlock(ch*4, ch*4, time_dim)
240
  self.attn3 = AttentionBlock(ch*4)
241
  self.down3 = nn.Conv2d(ch*4, ch*4, 4, stride=2, padding=1)
242
+
243
+ self.mid1 = ResBlock(ch*4, ch*4, time_dim)
244
  self.mid_att = AttentionBlock(ch*4)
245
+ self.mid2 = ResBlock(ch*4, ch*4, time_dim)
246
+
247
  self.up3 = nn.ConvTranspose2d(ch*4, ch*4, 4, stride=2, padding=1)
248
+ self.dec3 = ResBlock(ch*8, ch*4, time_dim)
249
+ self.dec3b = ResBlock(ch*4, ch*4, time_dim)
250
  self.attn_d3 = AttentionBlock(ch*4)
251
+
252
  self.up2 = nn.ConvTranspose2d(ch*4, ch*2, 4, stride=2, padding=1)
253
+ self.dec2 = ResBlock(ch*4, ch*2, time_dim)
254
+ self.dec2b = ResBlock(ch*2, ch*2, time_dim)
255
+
256
  self.up1 = nn.ConvTranspose2d(ch*2, ch, 4, stride=2, padding=1)
257
+ self.dec1 = ResBlock(ch*2, ch, time_dim)
258
+ self.dec1b = ResBlock(ch, ch, time_dim)
259
+
260
+ self.out = nn.Sequential(
261
+ nn.GroupNorm(min(8, ch), ch), nn.SiLU(), nn.Conv2d(ch, channels, 3, padding=1)
262
  )
 
263
 
264
  def forward(self, x, t, cond):
265
  t_emb = self.time_mlp(self.time_pe(t))
266
+ c_emb = self.cond_mlp(self.cond_emb(cond))
267
+ emb = t_emb + c_emb
268
+
269
  h0 = self.enc_in(x)
270
+ h1 = self.enc1b(self.enc1(h0, emb), emb)
271
+ h2 = self.enc2b(self.enc2(self.down1(h1), emb), emb)
272
+ h3 = self.attn3(self.enc3b(self.enc3(self.down2(h2), emb), emb))
273
+
274
+ h = self.mid2(self.mid_att(self.mid1(self.down3(h3), emb)), emb)
275
+
276
+ h = self.attn_d3(self.dec3b(self.dec3(torch.cat([self.up3(h), h3], 1), emb), emb))
277
+ h = self.dec2b(self.dec2(torch.cat([self.up2(h), h2], 1), emb), emb)
278
+ h = self.dec1b(self.dec1(torch.cat([self.up1(h), h1], 1), emb), emb)
279
  return self.out(h)
280
 
281
+
282
+ # โ”€โ”€โ”€ Diffusion Schedule โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
283
  class DiffusionSchedule:
284
  def __init__(self, T=T_STEPS, device="cpu"):
285
  self.T = T
 
289
  alphas = torch.cos(((x / T) + 0.008) / 1.008 * math.pi / 2) ** 2
290
  alphas = alphas / alphas[0]
291
  betas = (1 - alphas[1:] / alphas[:-1]).clamp(0, 0.999)
292
+
293
  self.betas = betas.to(device)
294
  self.alphas = 1.0 - self.betas
295
  self.alphas_cumprod = torch.cumprod(self.alphas, dim=0)
296
+ self.alphas_cumprod_prev = F.pad(self.alphas_cumprod[:-1], (1, 0), value=1.0)
297
+ self.posterior_variance = (
298
+ self.betas * (1 - self.alphas_cumprod_prev) / (1 - self.alphas_cumprod)
299
+ )
300
 
301
  @torch.no_grad()
302
+ def p_sample(self, model, x, t_idx, cond, guidance_scale):
303
  t_tensor = torch.full((x.shape[0],), t_idx, device=self.device, dtype=torch.long)
304
  null_cond = torch.zeros_like(cond)
 
 
 
 
 
 
305
 
306
+ x2 = torch.cat([x, x])
307
+ t2 = torch.cat([t_tensor, t_tensor])
308
+ c2 = torch.cat([cond, null_cond])
309
+
310
+ out = model(x2, t2, c2)
311
+ n_cond, n_uncond = out.chunk(2)
312
+ noise_pred = n_uncond + guidance_scale * (n_cond - n_uncond)
313
+
314
+ alpha = self.alphas[t_idx]
315
+ alpha_bar = self.alphas_cumprod[t_idx]
316
+ beta = self.betas[t_idx]
317
+
318
+ coef = beta / (1 - alpha_bar).sqrt()
319
+ mean = (1 / alpha.sqrt()) * (x - coef * noise_pred)
320
+
321
+ if t_idx > 0:
322
+ return mean + self.posterior_variance[t_idx].sqrt() * torch.randn_like(x)
323
+ return mean
324
 
325
  @torch.no_grad()
326
+ def sample(self, model, cond, n=1, steps=50, guidance_scale=3.0):
327
  model.eval()
328
  x = torch.randn(n, CHANNELS, IMG_SIZE, IMG_SIZE, device=self.device)
329
+ for t_idx in torch.linspace(self.T - 1, 0, steps, dtype=torch.long, device=self.device):
330
+ x = self.p_sample(model, x, t_idx.item(), cond, guidance_scale)
 
 
 
 
331
  return force_alpha_mask(x).clamp(-1, 1)
332
 
333
+
334
  # โ”€โ”€โ”€ Modell laden โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
335
  device = "cuda" if torch.cuda.is_available() else "cpu"
336
  print(f"Device: {device}")
337
 
338
  ckpt = torch.load("model.pt", map_location=device, weights_only=False)
339
  base_ch = ckpt.get("base_ch", 96)
340
+ if base_ch is None:
341
+ for key in ("enc_in.weight", "_orig_mod.enc_in.weight"):
342
+ sd_check = ckpt.get("model", ckpt)
343
+ if key in sd_check:
344
+ base_ch = sd_check[key].shape[0]
345
+ break
346
+ base_ch = base_ch or 96
347
+
348
+ model = UNet(base_ch=base_ch).to(device)
349
+ sd = ckpt.get("model", ckpt)
350
+ model.load_state_dict(sd, strict=False)
351
+ model.eval()
352
+ try: torch._dynamo.disable(model)
353
+ except Exception: pass
 
354
 
355
  schedule = DiffusionSchedule(device=device)
356
+ print(f"Modell geladen: base_ch={base_ch}, {sum(p.numel() for p in model.parameters())/1e6:.1f}M Parameter")
357
+
358
 
359
  # โ”€โ”€โ”€ Generierungs-Funktion โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
360
  def generate(prompt, num_skins, steps, guidance_scale, seed, randomize_seed):
361
  if randomize_seed:
362
  seed = random.randint(0, 2**31)
 
363
 
364
+ torch.manual_seed(seed)
365
  tags = parse_prompt(prompt)
366
+ tag_str = ", ".join(tags) if tags else "โ€“"
367
+
368
+ cond = tags_to_vector(tags).to(device).unsqueeze(0).expand(num_skins, -1)
369
 
370
  with torch.inference_mode():
371
+ imgs = schedule.sample(model, cond, n=num_skins, steps=steps, guidance_scale=guidance_scale)
 
372
 
373
  results = []
374
  for img_t in imgs:
375
+ arr = ((img_t.cpu().permute(1, 2, 0).numpy() + 1) * 127.5).clip(0, 255).astype(np.uint8)
 
376
  pil = Image.fromarray(arr, "RGBA").resize((512, 512), Image.NEAREST)
377
  results.append(pil)
378
 
379
+ return results, f"Tags erkannt: {tag_str}", seed
380
+
381
 
382
  # โ”€โ”€โ”€ Gradio UI โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
383
  EXAMPLES = [
384
+ ["roter hoodie blaue jeans", 4, 50, 3.0],
385
+ ["zombie", 4, 50, 4.0],
386
+ ["wizard fantasy purple", 4, 50, 3.5],
387
+ ["knight medieval armor", 4, 50, 3.0],
388
+ ["ninja black dark", 4, 50, 4.0],
389
+ ["enderman", 2, 50, 3.0],
390
  ]
391
 
392
+ css = "#gallery { min-height: 300px; }"
393
+
394
+ with gr.Blocks(css=css, title="Minecraft Skin Generator") as demo:
395
  gr.Markdown("""
396
  # ๐ŸŽฎ Minecraft Skin Generator
397
+ Generiert 64ร—64 Minecraft Skins aus einem Text-Prompt. Trainiert mit DDPM auf ~41k Skins.
398
 
399
+ **Beispiel-Prompts:** `roter hoodie blaue jeans` ยท `zombie` ยท `knight medieval armor` ยท `wizard fantasy purple`
400
  """)
401
+
402
  with gr.Row():
403
  with gr.Column(scale=2):
404
+ prompt = gr.Text(
405
+ label="Prompt",
406
+ placeholder="z.B. roter hoodie blaue jeans",
407
+ lines=1,
408
+ )
409
+ run_btn = gr.Button("Generieren", variant="primary", size="lg")
410
+
411
+ with gr.Accordion("Einstellungen", open=False):
412
+ num_skins = gr.Slider(label="Anzahl Skins", minimum=1, maximum=8, step=1, value=4)
413
+ steps = gr.Slider(label="Diffusion-Schritte", minimum=10, maximum=100, step=5, value=50)
414
+ guidance = gr.Slider(label="Guidance Scale", minimum=1.0, maximum=15.0,step=0.5, value=3.0)
415
+ seed = gr.Slider(label="Seed", minimum=0, maximum=2**31,step=1, value=42)
416
  rand_seed = gr.Checkbox(label="Seed zufรคllig", value=True)
417
+
418
+ tag_info = gr.Text(label="Erkannte Tags", interactive=False)
419
+ seed_out = gr.Number(label="Verwendeter Seed", interactive=False)
420
+
421
  with gr.Column(scale=3):
422
  gallery = gr.Gallery(
423
+ label="Generierte Skins (512ร—512 hochskaliert)",
424
+ elem_id="gallery",
425
+ columns=4,
426
+ rows=2,
427
+ object_fit="contain",
428
+ height=400,
429
  )
430
+
431
+ gr.Examples(
432
+ examples=EXAMPLES,
433
+ inputs=[prompt, num_skins, steps, guidance],
434
+ label="Beispiele",
435
+ )
436
+
437
  gr.on(
438
  triggers=[run_btn.click, prompt.submit],
439
  fn=generate,