aiqtech commited on
Commit
a7544c9
Β·
verified Β·
1 Parent(s): 1f5cf77

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -31
app.py CHANGED
@@ -32,27 +32,32 @@ def initialize_models():
32
  try:
33
  import torch
34
 
35
- # A100 μ΅œμ ν™” μ„€μ •
36
- torch.backends.cudnn.benchmark = True # A100μ—μ„œλŠ” μ„±λŠ₯ ν–₯상을 μœ„ν•΄ ν™œμ„±ν™”
37
- torch.backends.cuda.matmul.allow_tf32 = True # TF32 ν—ˆμš©
38
  torch.backends.cudnn.allow_tf32 = True
39
 
40
  print("Initializing Trellis pipeline...")
41
  pipeline = TrellisImageTo3DPipeline.from_pretrained(
42
- "JeffreyXiang/TRELLIS-image-large",
43
- torch_dtype=torch.float16 # A100μ—μ„œ FP16 μ‚¬μš©
44
  )
45
 
46
  if torch.cuda.is_available():
47
  pipeline = pipeline.to("cuda")
 
 
 
48
 
49
  print("Initializing translator...")
50
  translator = translation_pipeline(
51
  "translation",
52
  model="Helsinki-NLP/opus-mt-ko-en",
53
- device="cuda" # λ²ˆμ—­κΈ°λ„ GPU μ‚¬μš©
54
  )
55
 
 
 
 
56
  print("Models initialized successfully")
57
  return True
58
 
@@ -68,17 +73,15 @@ def get_flux_pipe():
68
  free_memory()
69
  flux_pipe = FluxPipeline.from_pretrained(
70
  "black-forest-labs/FLUX.1-dev",
71
- torch_dtype=torch.float16, # A100μ—μ„œ FP16 μ‚¬μš©
72
  use_safetensors=True
73
  ).to("cuda")
 
 
74
  except Exception as e:
75
  print(f"Error loading Flux pipeline: {e}")
76
  return None
77
  return flux_pipe
78
 
79
-
80
-
81
-
82
  def free_memory():
83
  """κ°•ν™”λœ λ©”λͺ¨λ¦¬ 정리 ν•¨μˆ˜"""
84
  import gc
@@ -108,7 +111,7 @@ def free_memory():
108
  except:
109
  pass
110
 
111
- @spaces.GPU
112
  def setup_gpu_model(model):
113
  """GPU 섀정이 ν•„μš”ν•œ λͺ¨λΈμ„ μ²˜λ¦¬ν•˜λŠ” ν•¨μˆ˜"""
114
  if torch.cuda.is_available():
@@ -122,7 +125,7 @@ def translate_if_korean(text):
122
  return translated
123
  return text
124
 
125
- @spaces.GPU
126
  def preprocess_image(image: Image.Image) -> Tuple[str, Image.Image]:
127
  try:
128
  if pipeline is None:
@@ -192,7 +195,6 @@ def unpack_state(state: dict) -> Tuple[Gaussian, edict, str]:
192
 
193
  return gs, mesh, state['trial_id']
194
 
195
- @spaces.GPU
196
  def image_to_3d(trial_id: str, seed: int, randomize_seed: bool, ss_guidance_strength: float,
197
  ss_sampling_steps: int, slat_guidance_strength: float, slat_sampling_steps: int):
198
  try:
@@ -201,8 +203,8 @@ def image_to_3d(trial_id: str, seed: int, randomize_seed: bool, ss_guidance_stre
201
 
202
  input_image = Image.open(f"{TMP_DIR}/{trial_id}.png")
203
 
204
- # 이미지 크기 μ œν•œ
205
- max_size = 512
206
  if max(input_image.size) > max_size:
207
  ratio = max_size / max(input_image.size)
208
  input_image = input_image.resize(
@@ -214,31 +216,31 @@ def image_to_3d(trial_id: str, seed: int, randomize_seed: bool, ss_guidance_stre
214
  if torch.cuda.is_available():
215
  pipeline.to("cuda")
216
 
217
- with torch.no_grad():
218
  outputs = pipeline.run(
219
  input_image,
220
  seed=seed,
221
  formats=["gaussian", "mesh"],
222
  preprocess_image=False,
223
  sparse_structure_sampler_params={
224
- "steps": min(ss_sampling_steps, 15),
225
  "cfg_strength": ss_guidance_strength,
226
  },
227
  slat_sampler_params={
228
- "steps": min(slat_sampling_steps, 15),
229
  "cfg_strength": slat_guidance_strength,
230
  }
231
  )
232
 
233
- # λΉ„λ””μ˜€ ν”„λ ˆμž„ 수 κ°μ†Œ
234
- video = render_utils.render_video(outputs['gaussian'][0], num_frames=30)['color']
235
- video_geo = render_utils.render_video(outputs['mesh'][0], num_frames=30)['normal']
236
  video = [np.concatenate([video[i], video_geo[i]], axis=1) for i in range(len(video))]
237
 
238
  trial_id = str(uuid.uuid4())
239
  video_path = f"{TMP_DIR}/{trial_id}.mp4"
240
  os.makedirs(os.path.dirname(video_path), exist_ok=True)
241
- imageio.mimsave(video_path, video, fps=15)
242
 
243
  state = pack_state(outputs['gaussian'][0], outputs['mesh'][0], trial_id)
244
 
@@ -253,26 +255,23 @@ def image_to_3d(trial_id: str, seed: int, randomize_seed: bool, ss_guidance_stre
253
  pipeline.to("cpu")
254
  raise e
255
 
256
- @spaces.GPU
257
  def generate_image_from_text(prompt, height, width, guidance_scale, num_steps):
258
  try:
259
  free_memory()
260
 
261
- # Flux νŒŒμ΄ν”„λΌμΈ κ°€μ Έμ˜€κΈ°
262
  flux_pipe = get_flux_pipe()
263
  if flux_pipe is None:
264
  raise Exception("Failed to load Flux pipeline")
265
 
266
- # 이미지 크기 μ œν•œ
267
- height = min(height, 1024) # A100μ—μ„œλŠ” 더 큰 이미지 ν—ˆμš©
268
  width = min(width, 1024)
269
 
270
- # ν”„λ‘¬ν”„νŠΈ 처리
271
- base_prompt = "wbgmsst, 3D, white background"
272
  translated_prompt = translate_if_korean(prompt)
273
- final_prompt = f"{translated_prompt}, {base_prompt}"
274
 
275
- with torch.cuda.amp.autocast(): # A100μ—μ„œ μžλ™ ν˜Όν•© 정밀도 μ‚¬μš©
276
  output = flux_pipe(
277
  prompt=[final_prompt],
278
  height=height,
@@ -292,7 +291,7 @@ def generate_image_from_text(prompt, height, width, guidance_scale, num_steps):
292
  free_memory()
293
  raise e
294
 
295
- @spaces.GPU
296
  def extract_glb(state: dict, mesh_simplify: float, texture_size: int) -> Tuple[str, str]:
297
  gs, mesh, trial_id = unpack_state(state)
298
  glb = postprocessing_utils.to_glb(gs, mesh, simplify=mesh_simplify, texture_size=texture_size, verbose=False)
 
32
  try:
33
  import torch
34
 
35
+ # L40S GPU μ΅œμ ν™” μ„€μ •
36
+ torch.backends.cudnn.benchmark = True
37
+ torch.backends.cuda.matmul.allow_tf32 = True
38
  torch.backends.cudnn.allow_tf32 = True
39
 
40
  print("Initializing Trellis pipeline...")
41
  pipeline = TrellisImageTo3DPipeline.from_pretrained(
42
+ "JeffreyXiang/TRELLIS-image-large"
 
43
  )
44
 
45
  if torch.cuda.is_available():
46
  pipeline = pipeline.to("cuda")
47
+ # λͺ¨λΈμ„ FP16으둜 λ³€ν™˜
48
+ for param in pipeline.parameters():
49
+ param.data = param.data.half()
50
 
51
  print("Initializing translator...")
52
  translator = translation_pipeline(
53
  "translation",
54
  model="Helsinki-NLP/opus-mt-ko-en",
55
+ device="cuda"
56
  )
57
 
58
+ # Flux νŒŒμ΄ν”„λΌμΈμ€ λ‚˜μ€‘μ— μ΄ˆκΈ°ν™”
59
+ flux_pipe = None
60
+
61
  print("Models initialized successfully")
62
  return True
63
 
 
73
  free_memory()
74
  flux_pipe = FluxPipeline.from_pretrained(
75
  "black-forest-labs/FLUX.1-dev",
 
76
  use_safetensors=True
77
  ).to("cuda")
78
+ # FP16으둜 λ³€ν™˜
79
+ flux_pipe.to(torch.float16)
80
  except Exception as e:
81
  print(f"Error loading Flux pipeline: {e}")
82
  return None
83
  return flux_pipe
84
 
 
 
 
85
  def free_memory():
86
  """κ°•ν™”λœ λ©”λͺ¨λ¦¬ 정리 ν•¨μˆ˜"""
87
  import gc
 
111
  except:
112
  pass
113
 
114
+
115
  def setup_gpu_model(model):
116
  """GPU 섀정이 ν•„μš”ν•œ λͺ¨λΈμ„ μ²˜λ¦¬ν•˜λŠ” ν•¨μˆ˜"""
117
  if torch.cuda.is_available():
 
125
  return translated
126
  return text
127
 
128
+
129
  def preprocess_image(image: Image.Image) -> Tuple[str, Image.Image]:
130
  try:
131
  if pipeline is None:
 
195
 
196
  return gs, mesh, state['trial_id']
197
 
 
198
  def image_to_3d(trial_id: str, seed: int, randomize_seed: bool, ss_guidance_strength: float,
199
  ss_sampling_steps: int, slat_guidance_strength: float, slat_sampling_steps: int):
200
  try:
 
203
 
204
  input_image = Image.open(f"{TMP_DIR}/{trial_id}.png")
205
 
206
+ # L40S에 맞게 이미지 크기 μ œν•œ μ‘°μ •
207
+ max_size = 768 # L40SλŠ” 더 큰 이미지 처리 κ°€λŠ₯
208
  if max(input_image.size) > max_size:
209
  ratio = max_size / max(input_image.size)
210
  input_image = input_image.resize(
 
216
  if torch.cuda.is_available():
217
  pipeline.to("cuda")
218
 
219
+ with torch.cuda.amp.autocast(): # μžλ™ ν˜Όν•© 정밀도 μ‚¬μš©
220
  outputs = pipeline.run(
221
  input_image,
222
  seed=seed,
223
  formats=["gaussian", "mesh"],
224
  preprocess_image=False,
225
  sparse_structure_sampler_params={
226
+ "steps": min(ss_sampling_steps, 20), # L40Sμ—μ„œ 더 λ§Žμ€ μŠ€ν… ν—ˆμš©
227
  "cfg_strength": ss_guidance_strength,
228
  },
229
  slat_sampler_params={
230
+ "steps": min(slat_sampling_steps, 20),
231
  "cfg_strength": slat_guidance_strength,
232
  }
233
  )
234
 
235
+ # λΉ„λ””μ˜€ 생성
236
+ video = render_utils.render_video(outputs['gaussian'][0], num_frames=40)['color']
237
+ video_geo = render_utils.render_video(outputs['mesh'][0], num_frames=40)['normal']
238
  video = [np.concatenate([video[i], video_geo[i]], axis=1) for i in range(len(video))]
239
 
240
  trial_id = str(uuid.uuid4())
241
  video_path = f"{TMP_DIR}/{trial_id}.mp4"
242
  os.makedirs(os.path.dirname(video_path), exist_ok=True)
243
+ imageio.mimsave(video_path, video, fps=20)
244
 
245
  state = pack_state(outputs['gaussian'][0], outputs['mesh'][0], trial_id)
246
 
 
255
  pipeline.to("cpu")
256
  raise e
257
 
258
+
259
  def generate_image_from_text(prompt, height, width, guidance_scale, num_steps):
260
  try:
261
  free_memory()
262
 
 
263
  flux_pipe = get_flux_pipe()
264
  if flux_pipe is None:
265
  raise Exception("Failed to load Flux pipeline")
266
 
267
+ # L40S에 맞게 크기 μ œν•œ μ‘°μ •
268
+ height = min(height, 1024)
269
  width = min(width, 1024)
270
 
 
 
271
  translated_prompt = translate_if_korean(prompt)
272
+ final_prompt = f"{translated_prompt}, wbgmsst, 3D, white background"
273
 
274
+ with torch.cuda.amp.autocast():
275
  output = flux_pipe(
276
  prompt=[final_prompt],
277
  height=height,
 
291
  free_memory()
292
  raise e
293
 
294
+
295
  def extract_glb(state: dict, mesh_simplify: float, texture_size: int) -> Tuple[str, str]:
296
  gs, mesh, trial_id = unpack_state(state)
297
  glb = postprocessing_utils.to_glb(gs, mesh, simplify=mesh_simplify, texture_size=texture_size, verbose=False)