PommesPeter commited on
Commit
2aded43
1 Parent(s): 49f6f11

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -12
app.py CHANGED
@@ -80,7 +80,7 @@ def encode_prompt(
80
  return prompt_embeds, prompt_masks
81
 
82
 
83
- def load_model(args, master_port, rank, barrier):
84
  # import here to avoid huggingface Tokenizer parallelism warnings
85
  from diffusers.models import AutoencoderKL
86
  from transformers import AutoModelForCausalLM, AutoTokenizer
@@ -161,20 +161,19 @@ def load_model(args, master_port, rank, barrier):
161
  )
162
  model.load_state_dict(ckpt, strict=True)
163
 
164
- barrier.wait()
165
  return text_encoder, tokenizer, vae, model
166
 
167
 
168
  @torch.no_grad()
169
- def model_main(args, master_port, rank, request_queue, response_queue, barrier, text_encoder, tokenizer, vae, model):
170
  dtype = {"bf16": torch.bfloat16, "fp16": torch.float16, "fp32": torch.float32}[
171
  args.precision
172
  ]
173
  train_args = torch.load(os.path.join(args.ckpt, "model_args.pth"))
174
 
175
  with torch.autocast("cuda", dtype):
176
- barrier.wait()
177
-
178
  while True:
179
  (
180
  cap,
@@ -437,24 +436,24 @@ def main():
437
  request_queues = []
438
  response_queue = Queue()
439
  # mp_barrier = mp.Barrier(args.num_gpus + 1)
440
- barrier = Barrier(args.num_gpus + 1)
441
  for i in range(args.num_gpus):
442
- text_encoder, tokenizer, vae, model = load_model(args, master_port, i, barrier)
443
- request_queues.append(Queue())
444
  generation_kwargs = dict(
445
  args=args,
446
  master_port=master_port,
447
  rank=i,
448
  request_queue=request_queues[i],
449
  response_queue=response_queue if i == 0 else None,
450
- barrier=barrier,
451
  text_encoder=text_encoder,
452
  tokenizer=tokenizer,
453
  vae=vae,
454
  model=model
455
  )
456
- thread = Thread(target=model_main, kwargs=generation_kwargs)
457
- thread.start()
 
458
 
459
  with gr.Blocks() as demo:
460
  with gr.Row():
@@ -606,7 +605,7 @@ def main():
606
  [output_img],
607
  )
608
 
609
- barrier.wait()
610
  demo.queue(max_size=20).launch()
611
 
612
 
 
80
  return prompt_embeds, prompt_masks
81
 
82
 
83
+ def load_model(args, master_port, rank):
84
  # import here to avoid huggingface Tokenizer parallelism warnings
85
  from diffusers.models import AutoencoderKL
86
  from transformers import AutoModelForCausalLM, AutoTokenizer
 
161
  )
162
  model.load_state_dict(ckpt, strict=True)
163
 
164
+ # barrier.wait()
165
  return text_encoder, tokenizer, vae, model
166
 
167
 
168
  @torch.no_grad()
169
+ def model_main(args, master_port, rank, request_queue, response_queue, text_encoder, tokenizer, vae, model):
170
  dtype = {"bf16": torch.bfloat16, "fp16": torch.float16, "fp32": torch.float32}[
171
  args.precision
172
  ]
173
  train_args = torch.load(os.path.join(args.ckpt, "model_args.pth"))
174
 
175
  with torch.autocast("cuda", dtype):
176
+ # barrier.wait()
 
177
  while True:
178
  (
179
  cap,
 
436
  request_queues = []
437
  response_queue = Queue()
438
  # mp_barrier = mp.Barrier(args.num_gpus + 1)
439
+ # barrier = Barrier(args.num_gpus + 1)
440
  for i in range(args.num_gpus):
441
+ text_encoder, tokenizer, vae, model = load_model(args, master_port, i)
442
+ # request_queues.append(Queue())
443
  generation_kwargs = dict(
444
  args=args,
445
  master_port=master_port,
446
  rank=i,
447
  request_queue=request_queues[i],
448
  response_queue=response_queue if i == 0 else None,
 
449
  text_encoder=text_encoder,
450
  tokenizer=tokenizer,
451
  vae=vae,
452
  model=model
453
  )
454
+ model_main(**generation_kwargs)
455
+ # thread = Thread(target=model_main, kwargs=generation_kwargs)
456
+ # thread.start()
457
 
458
  with gr.Blocks() as demo:
459
  with gr.Row():
 
605
  [output_img],
606
  )
607
 
608
+ # barrier.wait()
609
  demo.queue(max_size=20).launch()
610
 
611