Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -6,21 +6,27 @@ subprocess.run("huggingface-cli download --resume-download Alpha-VLLM/Lumina-Nex
|
|
6 |
import argparse
|
7 |
import builtins
|
8 |
import json
|
9 |
-
import
|
10 |
import random
|
11 |
import socket
|
12 |
import spaces
|
13 |
import traceback
|
14 |
import os
|
|
|
|
|
15 |
import fairscale.nn.model_parallel.initialize as fs_init
|
16 |
import gradio as gr
|
17 |
import numpy as np
|
|
|
18 |
import torch
|
19 |
import torch.distributed as dist
|
20 |
from torchvision.transforms.functional import to_pil_image
|
21 |
|
22 |
-
import models
|
23 |
from PIL import Image
|
|
|
|
|
|
|
|
|
24 |
from lumina_t2i.transport import create_transport, Sampler
|
25 |
|
26 |
print(f"Is CUDA available: {torch.cuda.is_available()}")
|
@@ -113,7 +119,7 @@ def encode_prompt(
|
|
113 |
|
114 |
|
115 |
@torch.no_grad()
|
116 |
-
def model_main(args, master_port, rank, request_queue, response_queue
|
117 |
# import here to avoid huggingface Tokenizer parallelism warnings
|
118 |
from diffusers.models import AutoencoderKL
|
119 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
@@ -194,7 +200,7 @@ def model_main(args, master_port, rank, request_queue, response_queue, mp_barrie
|
|
194 |
)
|
195 |
model.load_state_dict(ckpt, strict=True)
|
196 |
|
197 |
-
mp_barrier.wait()
|
198 |
|
199 |
with torch.autocast("cuda", dtype):
|
200 |
while True:
|
@@ -458,23 +464,20 @@ def main():
|
|
458 |
|
459 |
processes = []
|
460 |
request_queues = []
|
461 |
-
response_queue =
|
462 |
-
mp_barrier = mp.Barrier(args.num_gpus + 1)
|
463 |
for i in range(args.num_gpus):
|
464 |
-
request_queues.append(
|
465 |
-
|
466 |
-
|
467 |
-
|
468 |
-
|
469 |
-
|
470 |
-
|
471 |
-
request_queues[i],
|
472 |
-
response_queue if i == 0 else None,
|
473 |
-
mp_barrier,
|
474 |
-
),
|
475 |
)
|
476 |
-
|
477 |
-
|
|
|
478 |
|
479 |
with gr.Blocks() as demo:
|
480 |
with gr.Row():
|
@@ -596,7 +599,7 @@ def main():
|
|
596 |
[output_img],
|
597 |
)
|
598 |
|
599 |
-
mp_barrier.wait()
|
600 |
demo.queue(max_size=20).launch()
|
601 |
|
602 |
|
|
|
6 |
import argparse
|
7 |
import builtins
|
8 |
import json
|
9 |
+
import threading
|
10 |
import random
|
11 |
import socket
|
12 |
import spaces
|
13 |
import traceback
|
14 |
import os
|
15 |
+
from queue import Queue
|
16 |
+
|
17 |
import fairscale.nn.model_parallel.initialize as fs_init
|
18 |
import gradio as gr
|
19 |
import numpy as np
|
20 |
+
|
21 |
import torch
|
22 |
import torch.distributed as dist
|
23 |
from torchvision.transforms.functional import to_pil_image
|
24 |
|
|
|
25 |
from PIL import Image
|
26 |
+
from threading import Thread
|
27 |
+
|
28 |
+
import models
|
29 |
+
|
30 |
from lumina_t2i.transport import create_transport, Sampler
|
31 |
|
32 |
print(f"Is CUDA available: {torch.cuda.is_available()}")
|
|
|
119 |
|
120 |
|
121 |
@torch.no_grad()
|
122 |
+
def model_main(args, master_port, rank, request_queue, response_queue):
|
123 |
# import here to avoid huggingface Tokenizer parallelism warnings
|
124 |
from diffusers.models import AutoencoderKL
|
125 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
|
200 |
)
|
201 |
model.load_state_dict(ckpt, strict=True)
|
202 |
|
203 |
+
# mp_barrier.wait()
|
204 |
|
205 |
with torch.autocast("cuda", dtype):
|
206 |
while True:
|
|
|
464 |
|
465 |
processes = []
|
466 |
request_queues = []
|
467 |
+
response_queue = Queue()
|
468 |
+
# mp_barrier = mp.Barrier(args.num_gpus + 1)
|
469 |
for i in range(args.num_gpus):
|
470 |
+
request_queues.append(Queue())
|
471 |
+
generation_kwargs = dict(
|
472 |
+
args,
|
473 |
+
master_port,
|
474 |
+
i,
|
475 |
+
request_queues[i],
|
476 |
+
response_queue if i == 0 else None,
|
|
|
|
|
|
|
|
|
477 |
)
|
478 |
+
thread = Thread(target=model_main, kwargs=generation_kwargs)
|
479 |
+
thread.start()
|
480 |
+
processes.append(thread)
|
481 |
|
482 |
with gr.Blocks() as demo:
|
483 |
with gr.Row():
|
|
|
599 |
[output_img],
|
600 |
)
|
601 |
|
602 |
+
# mp_barrier.wait()
|
603 |
demo.queue(max_size=20).launch()
|
604 |
|
605 |
|