PommesPeter commited on
Commit
f168dbf
1 Parent(s): f38ec19

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -20
app.py CHANGED
@@ -6,21 +6,27 @@ subprocess.run("huggingface-cli download --resume-download Alpha-VLLM/Lumina-Nex
6
  import argparse
7
  import builtins
8
  import json
9
- import multiprocessing as mp
10
  import random
11
  import socket
12
  import spaces
13
  import traceback
14
  import os
 
 
15
  import fairscale.nn.model_parallel.initialize as fs_init
16
  import gradio as gr
17
  import numpy as np
 
18
  import torch
19
  import torch.distributed as dist
20
  from torchvision.transforms.functional import to_pil_image
21
 
22
- import models
23
  from PIL import Image
 
 
 
 
24
  from lumina_t2i.transport import create_transport, Sampler
25
 
26
  print(f"Is CUDA available: {torch.cuda.is_available()}")
@@ -113,7 +119,7 @@ def encode_prompt(
113
 
114
 
115
  @torch.no_grad()
116
- def model_main(args, master_port, rank, request_queue, response_queue, mp_barrier):
117
  # import here to avoid huggingface Tokenizer parallelism warnings
118
  from diffusers.models import AutoencoderKL
119
  from transformers import AutoModelForCausalLM, AutoTokenizer
@@ -194,7 +200,7 @@ def model_main(args, master_port, rank, request_queue, response_queue, mp_barrie
194
  )
195
  model.load_state_dict(ckpt, strict=True)
196
 
197
- mp_barrier.wait()
198
 
199
  with torch.autocast("cuda", dtype):
200
  while True:
@@ -458,23 +464,20 @@ def main():
458
 
459
  processes = []
460
  request_queues = []
461
- response_queue = mp.Queue()
462
- mp_barrier = mp.Barrier(args.num_gpus + 1)
463
  for i in range(args.num_gpus):
464
- request_queues.append(mp.Queue())
465
- p = mp.Process(
466
- target=model_main,
467
- args=(
468
- args,
469
- master_port,
470
- i,
471
- request_queues[i],
472
- response_queue if i == 0 else None,
473
- mp_barrier,
474
- ),
475
  )
476
- p.start()
477
- processes.append(p)
 
478
 
479
  with gr.Blocks() as demo:
480
  with gr.Row():
@@ -596,7 +599,7 @@ def main():
596
  [output_img],
597
  )
598
 
599
- mp_barrier.wait()
600
  demo.queue(max_size=20).launch()
601
 
602
 
 
6
  import argparse
7
  import builtins
8
  import json
9
+ import threading
10
  import random
11
  import socket
12
  import spaces
13
  import traceback
14
  import os
15
+ from queue import Queue
16
+
17
  import fairscale.nn.model_parallel.initialize as fs_init
18
  import gradio as gr
19
  import numpy as np
20
+
21
  import torch
22
  import torch.distributed as dist
23
  from torchvision.transforms.functional import to_pil_image
24
 
 
25
  from PIL import Image
26
+ from threading import Thread
27
+
28
+ import models
29
+
30
  from lumina_t2i.transport import create_transport, Sampler
31
 
32
  print(f"Is CUDA available: {torch.cuda.is_available()}")
 
119
 
120
 
121
  @torch.no_grad()
122
+ def model_main(args, master_port, rank, request_queue, response_queue):
123
  # import here to avoid huggingface Tokenizer parallelism warnings
124
  from diffusers.models import AutoencoderKL
125
  from transformers import AutoModelForCausalLM, AutoTokenizer
 
200
  )
201
  model.load_state_dict(ckpt, strict=True)
202
 
203
+ # mp_barrier.wait()
204
 
205
  with torch.autocast("cuda", dtype):
206
  while True:
 
464
 
465
  processes = []
466
  request_queues = []
467
+ response_queue = Queue()
468
+ # mp_barrier = mp.Barrier(args.num_gpus + 1)
469
  for i in range(args.num_gpus):
470
+ request_queues.append(Queue())
471
+ generation_kwargs = dict(
472
+ args,
473
+ master_port,
474
+ i,
475
+ request_queues[i],
476
+ response_queue if i == 0 else None,
 
 
 
 
477
  )
478
+ thread = Thread(target=model_main, kwargs=generation_kwargs)
479
+ thread.start()
480
+ processes.append(thread)
481
 
482
  with gr.Blocks() as demo:
483
  with gr.Row():
 
599
  [output_img],
600
  )
601
 
602
+ # mp_barrier.wait()
603
  demo.queue(max_size=20).launch()
604
 
605