File size: 5,000 Bytes
916b126
 
606ab59
 
3f05732
 
 
 
6272555
3f05732
 
 
 
 
 
6272555
 
971b51c
916b126
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9486ff0
916b126
 
 
 
 
 
9486ff0
 
 
 
 
 
 
 
916b126
 
 
9486ff0
916b126
 
 
 
 
 
 
 
 
 
 
 
73d2593
576d93d
 
9486ff0
916b126
 
 
9486ff0
d06ff19
4d527d5
d06ff19
916b126
 
 
cae28d7
916b126
 
9486ff0
36d51ec
3c86beb
576d93d
916b126
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import gradio as gr
import numpy as np
import os

try:
    from train import *
    print('==> simple-knn & diff-gaussian-rasterization already installed!')
except:
    print('==> simple-knn & diff-gaussian-rasterization are NOT installed!')
    # https://github.com/pytorch/extension-cpp/issues/71
    os.environ["TORCH_CUDA_ARCH_LIST"] = "3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX"
    print('==> TORCH_CUDA_ARCH_LIST =', os.environ.get('TORCH_CUDA_ARCH_LIST'))
    os.system("python -m pip install git+https://github.com/YixunLiang/simple-knn.git")
    print('==> simple-knn installed!')
    os.system("python -m pip install git+https://github.com/YixunLiang/diff-gaussian-rasterization.git")
    print('==> diff-gaussian-rasterization installed!')
    from train import *

example_inputs = [[
        "A DSLR photo of a Rugged, vintage-inspired hiking boots with a weathered leather finish, best quality, 4K, HD.",
        "Rugged, vintage-inspired hiking boots with a weathered leather finish."
    ], [
        "a DSLR photo of a Cream Cheese Donut.",
        "a Donut."
    ], [
        "A durian, 8k, HDR.",
        "A durian"
    ], [
        "A pillow with huskies printed on it",
        "A pillow"
    ], [
        "A DSLR photo of a wooden car, super detailed, best quality, 4K, HD.",
        "a wooden car."
]]
example_outputs_1 = [
    gr.Video(value=os.path.join(os.path.dirname(__file__), 'example/boots.mp4'), autoplay=True),
    gr.Video(value=os.path.join(os.path.dirname(__file__), 'example/Donut.mp4'), autoplay=True),
    gr.Video(value=os.path.join(os.path.dirname(__file__), 'example/durian.mp4'), autoplay=True),
    gr.Video(value=os.path.join(os.path.dirname(__file__), 'example/pillow_huskies.mp4'), autoplay=True),
    gr.Video(value=os.path.join(os.path.dirname(__file__), 'example/wooden_car.mp4'), autoplay=True)
]
example_outputs_2 = [
    gr.Video(value=os.path.join(os.path.dirname(__file__), 'example/boots_pro.mp4'), autoplay=True),
    gr.Video(value=os.path.join(os.path.dirname(__file__), 'example/Donut_pro.mp4'), autoplay=True),
    gr.Video(value=os.path.join(os.path.dirname(__file__), 'example/durian_pro.mp4'), autoplay=True),
    gr.Video(value=os.path.join(os.path.dirname(__file__), 'example/pillow_huskies_pro.mp4'), autoplay=True),
    gr.Video(value=os.path.join(os.path.dirname(__file__), 'example/wooden_car_pro.mp4'), autoplay=True)
]


def main(prompt, init_prompt, negative_prompt, num_iter, CFG, seed):
    if [prompt, init_prompt] in example_inputs:
        return example_outputs_1[example_inputs.index([prompt, init_prompt])], example_outputs_2[example_inputs.index([prompt, init_prompt])]
    args, lp, op, pp, gcp, gp = args_parser(default_opt=os.path.join(os.path.dirname(__file__), 'configs/white_hair_ironman.yaml'))
    gp.text = prompt
    gp.negative = negative_prompt
    if len(init_prompt) > 1: 
        gcp.init_shape = 'pointe' 
        gcp.init_prompt = init_prompt
    else:
        gcp.init_shape = 'sphere'
        gcp.init_prompt = '.'
    op.iterations = num_iter
    gp.guidance_scale = CFG
    gp.noise_seed = int(seed)
    print('==> User Prompt:', gp.text)
    lp.workspace = 'gradio_demo'
    video_path, pro_video_path = start_training(args, lp, op, pp, gcp, gp)
    return gr.Video(value=video_path, autoplay=True), gr.Video(value=pro_video_path, autoplay=True)

with gr.Blocks() as demo:
    gr.Markdown("# <center>LucidDreamer: Towards High-Fidelity Text-to-3D Generation via Interval Score Matching</center>")
    gr.Markdown("This live demo allows you to generate high-quality 3D content using text prompts. The outputs are 360° rendered 3d gaussian video and training progress visualization.<br> \
                It is based on Stable Diffusion 2.1. Please check out our <strong><a href=https://github.com/EnVision-Research/LucidDreamer>Project Page</a> / <a href=https://arxiv.org/abs/2311.11284>Paper</a> / <a href=https://github.com/EnVision-Research/LucidDreamer>Code</a></strong> if you want to learn more about our method!<br> \
                Note that this demo is running on A10G, the running time might be longer than the reported 35 minutes (5000 iterations) on A100.<br> \
                &copy; This Gradio space was developed by Haodong LI.")
    gr.Interface(fn=main, inputs=[gr.Textbox(lines=2, value="A portrait of IRONMAN, white hair, head, photorealistic, 8K, HDR.", label="Your prompt"),
            gr.Textbox(lines=1, value="a man head.", label="Point-E init prompt (optional)"),
            gr.Textbox(lines=2, value="unrealistic, blurry, low quality, out of focus, ugly, low contrast, dull, low-resolution.", label="Negative prompt (optional)"),
            gr.Slider(1000, 5000, value=3000, label="Number of iterations"),
            gr.Slider(7.5, 100, value=7.5, label="CFG"),
            gr.Number(value=0, label="Seed")], 
        outputs=["playable_video", "playable_video"],
        examples=example_inputs,
        cache_examples=True,
        concurrency_limit=1)
demo.launch()