hysts HF staff commited on
Commit
feb3220
1 Parent(s): 9924827
Files changed (14) hide show
  1. .gitignore +164 -0
  2. .pre-commit-config.yaml +36 -0
  3. .style.yapf +5 -0
  4. LICENSE +21 -0
  5. LICENSE.Shap-E +21 -0
  6. README.md +3 -1
  7. app.py +28 -0
  8. app_image_to_3d.py +84 -0
  9. app_text_to_3d.py +100 -0
  10. model.py +162 -0
  11. requirements.txt +6 -0
  12. settings.py +7 -0
  13. style.css +21 -0
  14. utils.py +9 -0
.gitignore ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio_cached_examples/
2
+ shap_e_model_cache/
3
+ corgi.png
4
+
5
+ # Byte-compiled / optimized / DLL files
6
+ __pycache__/
7
+ *.py[cod]
8
+ *$py.class
9
+
10
+ # C extensions
11
+ *.so
12
+
13
+ # Distribution / packaging
14
+ .Python
15
+ build/
16
+ develop-eggs/
17
+ dist/
18
+ downloads/
19
+ eggs/
20
+ .eggs/
21
+ lib/
22
+ lib64/
23
+ parts/
24
+ sdist/
25
+ var/
26
+ wheels/
27
+ share/python-wheels/
28
+ *.egg-info/
29
+ .installed.cfg
30
+ *.egg
31
+ MANIFEST
32
+
33
+ # PyInstaller
34
+ # Usually these files are written by a python script from a template
35
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
36
+ *.manifest
37
+ *.spec
38
+
39
+ # Installer logs
40
+ pip-log.txt
41
+ pip-delete-this-directory.txt
42
+
43
+ # Unit test / coverage reports
44
+ htmlcov/
45
+ .tox/
46
+ .nox/
47
+ .coverage
48
+ .coverage.*
49
+ .cache
50
+ nosetests.xml
51
+ coverage.xml
52
+ *.cover
53
+ *.py,cover
54
+ .hypothesis/
55
+ .pytest_cache/
56
+ cover/
57
+
58
+ # Translations
59
+ *.mo
60
+ *.pot
61
+
62
+ # Django stuff:
63
+ *.log
64
+ local_settings.py
65
+ db.sqlite3
66
+ db.sqlite3-journal
67
+
68
+ # Flask stuff:
69
+ instance/
70
+ .webassets-cache
71
+
72
+ # Scrapy stuff:
73
+ .scrapy
74
+
75
+ # Sphinx documentation
76
+ docs/_build/
77
+
78
+ # PyBuilder
79
+ .pybuilder/
80
+ target/
81
+
82
+ # Jupyter Notebook
83
+ .ipynb_checkpoints
84
+
85
+ # IPython
86
+ profile_default/
87
+ ipython_config.py
88
+
89
+ # pyenv
90
+ # For a library or package, you might want to ignore these files since the code is
91
+ # intended to run in multiple environments; otherwise, check them in:
92
+ # .python-version
93
+
94
+ # pipenv
95
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
96
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
97
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
98
+ # install all needed dependencies.
99
+ #Pipfile.lock
100
+
101
+ # poetry
102
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
103
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
104
+ # commonly ignored for libraries.
105
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
106
+ #poetry.lock
107
+
108
+ # pdm
109
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
110
+ #pdm.lock
111
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
112
+ # in version control.
113
+ # https://pdm.fming.dev/#use-with-ide
114
+ .pdm.toml
115
+
116
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
117
+ __pypackages__/
118
+
119
+ # Celery stuff
120
+ celerybeat-schedule
121
+ celerybeat.pid
122
+
123
+ # SageMath parsed files
124
+ *.sage.py
125
+
126
+ # Environments
127
+ .env
128
+ .venv
129
+ env/
130
+ venv/
131
+ ENV/
132
+ env.bak/
133
+ venv.bak/
134
+
135
+ # Spyder project settings
136
+ .spyderproject
137
+ .spyproject
138
+
139
+ # Rope project settings
140
+ .ropeproject
141
+
142
+ # mkdocs documentation
143
+ /site
144
+
145
+ # mypy
146
+ .mypy_cache/
147
+ .dmypy.json
148
+ dmypy.json
149
+
150
+ # Pyre type checker
151
+ .pyre/
152
+
153
+ # pytype static type analyzer
154
+ .pytype/
155
+
156
+ # Cython debug symbols
157
+ cython_debug/
158
+
159
+ # PyCharm
160
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
161
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
162
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
163
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
164
+ #.idea/
.pre-commit-config.yaml ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ repos:
2
+ - repo: https://github.com/pre-commit/pre-commit-hooks
3
+ rev: v4.2.0
4
+ hooks:
5
+ - id: check-executables-have-shebangs
6
+ - id: check-json
7
+ - id: check-merge-conflict
8
+ - id: check-shebang-scripts-are-executable
9
+ - id: check-toml
10
+ - id: check-yaml
11
+ - id: double-quote-string-fixer
12
+ - id: end-of-file-fixer
13
+ - id: mixed-line-ending
14
+ args: ['--fix=lf']
15
+ - id: requirements-txt-fixer
16
+ - id: trailing-whitespace
17
+ - repo: https://github.com/myint/docformatter
18
+ rev: v1.4
19
+ hooks:
20
+ - id: docformatter
21
+ args: ['--in-place']
22
+ - repo: https://github.com/pycqa/isort
23
+ rev: 5.12.0
24
+ hooks:
25
+ - id: isort
26
+ - repo: https://github.com/pre-commit/mirrors-mypy
27
+ rev: v0.991
28
+ hooks:
29
+ - id: mypy
30
+ args: ['--ignore-missing-imports']
31
+ additional_dependencies: ['types-python-slugify']
32
+ - repo: https://github.com/google/yapf
33
+ rev: v0.32.0
34
+ hooks:
35
+ - id: yapf
36
+ args: ['--parallel', '--in-place']
.style.yapf ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
1
+ [style]
2
+ based_on_style = pep8
3
+ blank_line_before_nested_class_or_def = false
4
+ spaces_before_comment = 2
5
+ split_before_logical_operator = true
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2023 hysts
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
LICENSE.Shap-E ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2023 OpenAI
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README.md CHANGED
@@ -1,12 +1,14 @@
1
  ---
2
- title: Shap E
3
  emoji: 📉
4
  colorFrom: yellow
5
  colorTo: blue
6
  sdk: gradio
7
  sdk_version: 3.28.2
 
8
  app_file: app.py
9
  pinned: false
 
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
1
  ---
2
+ title: Shap-E
3
  emoji: 📉
4
  colorFrom: yellow
5
  colorTo: blue
6
  sdk: gradio
7
  sdk_version: 3.28.2
8
+ python_version: 3.10.11
9
  app_file: app.py
10
  pinned: false
11
+ license: mit
12
  ---
13
 
14
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+
3
+ import os
4
+
5
+ import gradio as gr
6
+ import torch
7
+
8
+ from app_image_to_3d import create_demo as create_demo_image_to_3d
9
+ from app_text_to_3d import create_demo as create_demo_text_to_3d
10
+ from model import Model
11
+
12
+ DESCRIPTION = '# [Shap-E](https://github.com/openai/shap-e)'
13
+
14
+ if (SPACE_ID := os.getenv('SPACE_ID')) is not None:
15
+ DESCRIPTION += f'\n<p>For faster inference without waiting in queue, you may duplicate the space and upgrade to GPU in settings. <a href="https://huggingface.co/spaces/{SPACE_ID}?duplicate=true"><img style="display: inline; margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space" /></a></p>'
16
+ if not torch.cuda.is_available():
17
+ DESCRIPTION += '\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>'
18
+
19
+ model = Model()
20
+
21
+ with gr.Blocks(css='style.css') as demo:
22
+ gr.Markdown(DESCRIPTION)
23
+ with gr.Tabs():
24
+ with gr.Tab(label='Text to 3D'):
25
+ create_demo_text_to_3d(model)
26
+ with gr.Tab(label='Image to 3D'):
27
+ create_demo_image_to_3d(model)
28
+ demo.queue(api_open=False, max_size=5).launch()
app_image_to_3d.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+
3
+ import shlex
4
+ import subprocess
5
+
6
+ import gradio as gr
7
+
8
+ from model import Model
9
+ from settings import CACHE_EXAMPLES, MAX_SEED
10
+ from utils import randomize_seed_fn
11
+
12
+
13
+ def create_demo(model: Model) -> gr.Blocks:
14
+ subprocess.run(
15
+ shlex.split(
16
+ 'wget https://raw.githubusercontent.com/openai/shap-e/d99cedaea18e0989e340163dbaeb4b109fa9e8ec/shap_e/examples/example_data/corgi.png -O corgi.png'
17
+ ))
18
+ examples = ['corgi.png']
19
+
20
+ def process_example_fn(image_path: str) -> str:
21
+ return model.run_image(image_path, output_image_size=128)
22
+
23
+ with gr.Blocks() as demo:
24
+ with gr.Box():
25
+ image = gr.Image(label='Input image',
26
+ show_label=False,
27
+ type='filepath')
28
+ run_button = gr.Button('Run')
29
+ result = gr.Video(label='Result', elem_id='result-2')
30
+ with gr.Accordion('Advanced options', open=False):
31
+ seed = gr.Slider(label='Seed',
32
+ minimum=0,
33
+ maximum=MAX_SEED,
34
+ step=1,
35
+ value=0)
36
+ randomize_seed = gr.Checkbox(label='Randomize seed',
37
+ value=True)
38
+ guidance_scale = gr.Slider(label='Guidance scale',
39
+ minimum=1,
40
+ maximum=20,
41
+ step=0.1,
42
+ value=3.0)
43
+ num_inference_steps = gr.Slider(
44
+ label='Number of inference steps',
45
+ minimum=1,
46
+ maximum=100,
47
+ step=1,
48
+ value=64)
49
+ image_size = gr.Slider(label='Image size',
50
+ minimum=64,
51
+ maximum=256,
52
+ step=64,
53
+ value=128)
54
+ render_mode = gr.Dropdown(label='Render mode',
55
+ choices=['nerf', 'stf'],
56
+ value='nerf',
57
+ visible=False)
58
+
59
+ gr.Examples(examples=examples,
60
+ inputs=image,
61
+ outputs=result,
62
+ fn=process_example_fn,
63
+ cache_examples=CACHE_EXAMPLES)
64
+
65
+ inputs = [
66
+ image,
67
+ seed,
68
+ guidance_scale,
69
+ num_inference_steps,
70
+ image_size,
71
+ render_mode,
72
+ ]
73
+
74
+ run_button.click(
75
+ fn=randomize_seed_fn,
76
+ inputs=[seed, randomize_seed],
77
+ outputs=seed,
78
+ queue=False,
79
+ ).then(
80
+ fn=model.run_image,
81
+ inputs=inputs,
82
+ outputs=result,
83
+ )
84
+ return demo
app_text_to_3d.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+
3
+ import gradio as gr
4
+
5
+ from model import Model
6
+ from settings import CACHE_EXAMPLES, MAX_SEED
7
+ from utils import randomize_seed_fn
8
+
9
+
10
+ def create_demo(model: Model) -> gr.Blocks:
11
+ examples = [
12
+ 'A chair that looks like an avocado',
13
+ 'An airplane that looks like a banana',
14
+ 'A spaceship',
15
+ 'A birthday cupcake',
16
+ 'A chair that looks like a tree',
17
+ 'A green boot',
18
+ 'A penguin',
19
+ 'Ube ice cream cone',
20
+ 'A bowl of vegetables',
21
+ ]
22
+
23
+ def process_example_fn(prompt: str) -> str:
24
+ return model.run_text(prompt, output_image_size=128)
25
+
26
+ with gr.Blocks() as demo:
27
+ with gr.Box():
28
+ with gr.Row(elem_id='prompt-container'):
29
+ prompt = gr.Text(
30
+ label='Prompt',
31
+ show_label=False,
32
+ max_lines=1,
33
+ placeholder='Enter your prompt').style(container=False)
34
+ run_button = gr.Button('Run').style(full_width=False)
35
+ result = gr.Video(label='Result', elem_id='result-1')
36
+ with gr.Accordion('Advanced options', open=False):
37
+ seed = gr.Slider(label='Seed',
38
+ minimum=0,
39
+ maximum=MAX_SEED,
40
+ step=1,
41
+ value=0)
42
+ randomize_seed = gr.Checkbox(label='Randomize seed',
43
+ value=True)
44
+ guidance_scale = gr.Slider(label='Guidance scale',
45
+ minimum=1,
46
+ maximum=20,
47
+ step=0.1,
48
+ value=15.0)
49
+ num_inference_steps = gr.Slider(
50
+ label='Number of inference steps',
51
+ minimum=1,
52
+ maximum=100,
53
+ step=1,
54
+ value=64)
55
+ image_size = gr.Slider(label='Image size',
56
+ minimum=64,
57
+ maximum=256,
58
+ step=64,
59
+ value=128)
60
+ render_mode = gr.Dropdown(label='Render mode',
61
+ choices=['nerf', 'stf'],
62
+ value='nerf',
63
+ visible=False)
64
+
65
+ gr.Examples(examples=examples,
66
+ inputs=prompt,
67
+ outputs=result,
68
+ fn=process_example_fn,
69
+ cache_examples=CACHE_EXAMPLES)
70
+
71
+ inputs = [
72
+ prompt,
73
+ seed,
74
+ guidance_scale,
75
+ num_inference_steps,
76
+ image_size,
77
+ render_mode,
78
+ ]
79
+ prompt.submit(
80
+ fn=randomize_seed_fn,
81
+ inputs=[seed, randomize_seed],
82
+ outputs=seed,
83
+ queue=False,
84
+ ).then(
85
+ fn=model.run_text,
86
+ inputs=inputs,
87
+ outputs=result,
88
+ )
89
+
90
+ run_button.click(
91
+ fn=randomize_seed_fn,
92
+ inputs=[seed, randomize_seed],
93
+ outputs=seed,
94
+ queue=False,
95
+ ).then(
96
+ fn=model.run_text,
97
+ inputs=inputs,
98
+ outputs=result,
99
+ )
100
+ return demo
model.py ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tempfile
2
+
3
+ import imageio
4
+ import numpy as np
5
+ import PIL.Image
6
+ import torch
7
+ from shap_e.diffusion.gaussian_diffusion import diffusion_from_config
8
+ from shap_e.diffusion.sample import sample_latents
9
+ from shap_e.models.download import load_config, load_model
10
+ from shap_e.models.nn.camera import (DifferentiableCameraBatch,
11
+ DifferentiableProjectiveCamera)
12
+ from shap_e.models.transmitter.base import Transmitter, VectorDecoder
13
+ from shap_e.util.collections import AttrDict
14
+ from shap_e.util.image_util import load_image
15
+
16
+
17
+ # Copied from https://github.com/openai/shap-e/blob/d99cedaea18e0989e340163dbaeb4b109fa9e8ec/shap_e/util/notebooks.py#L15-L42
18
+ def create_pan_cameras(size: int,
19
+ device: torch.device) -> DifferentiableCameraBatch:
20
+ origins = []
21
+ xs = []
22
+ ys = []
23
+ zs = []
24
+ for theta in np.linspace(0, 2 * np.pi, num=20):
25
+ z = np.array([np.sin(theta), np.cos(theta), -0.5])
26
+ z /= np.sqrt(np.sum(z**2))
27
+ origin = -z * 4
28
+ x = np.array([np.cos(theta), -np.sin(theta), 0.0])
29
+ y = np.cross(z, x)
30
+ origins.append(origin)
31
+ xs.append(x)
32
+ ys.append(y)
33
+ zs.append(z)
34
+ return DifferentiableCameraBatch(
35
+ shape=(1, len(xs)),
36
+ flat_camera=DifferentiableProjectiveCamera(
37
+ origin=torch.from_numpy(np.stack(origins,
38
+ axis=0)).float().to(device),
39
+ x=torch.from_numpy(np.stack(xs, axis=0)).float().to(device),
40
+ y=torch.from_numpy(np.stack(ys, axis=0)).float().to(device),
41
+ z=torch.from_numpy(np.stack(zs, axis=0)).float().to(device),
42
+ width=size,
43
+ height=size,
44
+ x_fov=0.7,
45
+ y_fov=0.7,
46
+ ),
47
+ )
48
+
49
+
50
+ # Copied from https://github.com/openai/shap-e/blob/d99cedaea18e0989e340163dbaeb4b109fa9e8ec/shap_e/util/notebooks.py#L45-L60
51
+ @torch.no_grad()
52
+ def decode_latent_images(
53
+ xm: Transmitter | VectorDecoder,
54
+ latent: torch.Tensor,
55
+ cameras: DifferentiableCameraBatch,
56
+ rendering_mode: str = 'stf',
57
+ ):
58
+ decoded = xm.renderer.render_views(
59
+ AttrDict(cameras=cameras),
60
+ params=(xm.encoder if isinstance(xm, Transmitter) else
61
+ xm).bottleneck_to_params(latent[None]),
62
+ options=AttrDict(rendering_mode=rendering_mode,
63
+ render_with_direction=False),
64
+ )
65
+ arr = decoded.channels.clamp(0, 255).to(torch.uint8)[0].cpu().numpy()
66
+ return [PIL.Image.fromarray(x) for x in arr]
67
+
68
+
69
+ class Model:
70
+ def __init__(self):
71
+ self.device = torch.device(
72
+ 'cuda' if torch.cuda.is_available() else 'cpu')
73
+ self.xm = load_model('transmitter', device=self.device)
74
+ self.diffusion = diffusion_from_config(load_config('diffusion'))
75
+ self.model_name = ''
76
+ self.model = None
77
+
78
+ def load_model(self, model_name: str) -> None:
79
+ assert model_name in ['text300M', 'image300M']
80
+ if model_name == self.model_name:
81
+ return
82
+ self.model = load_model(model_name, device=self.device)
83
+ self.model_name = model_name
84
+
85
+ @staticmethod
86
+ def to_video(frames: list[PIL.Image.Image], fps: int = 5) -> str:
87
+ out_file = tempfile.NamedTemporaryFile(suffix='.mp4', delete=False)
88
+ writer = imageio.get_writer(out_file.name, format='FFMPEG', fps=fps)
89
+ for frame in frames:
90
+ writer.append_data(np.asarray(frame))
91
+ writer.close()
92
+ return out_file.name
93
+
94
+ def run_text(self,
95
+ prompt: str,
96
+ seed: int = 0,
97
+ guidance_scale: float = 15.0,
98
+ num_steps: int = 64,
99
+ output_image_size: int = 64,
100
+ render_mode: str = 'nerf') -> str:
101
+ self.load_model('text300M')
102
+
103
+ torch.manual_seed(seed)
104
+
105
+ latents = sample_latents(
106
+ batch_size=1,
107
+ model=self.model,
108
+ diffusion=self.diffusion,
109
+ guidance_scale=guidance_scale,
110
+ model_kwargs=dict(texts=[prompt]),
111
+ progress=True,
112
+ clip_denoised=True,
113
+ use_fp16=True,
114
+ use_karras=True,
115
+ karras_steps=num_steps,
116
+ sigma_min=1e-3,
117
+ sigma_max=160,
118
+ s_churn=0,
119
+ )
120
+
121
+ cameras = create_pan_cameras(output_image_size, self.device)
122
+ frames = decode_latent_images(self.xm,
123
+ latents[0],
124
+ cameras,
125
+ rendering_mode=render_mode)
126
+ return self.to_video(frames)
127
+
128
+ def run_image(self,
129
+ image_path: str,
130
+ seed: int = 0,
131
+ guidance_scale: float = 3.0,
132
+ num_steps: int = 64,
133
+ output_image_size: int = 64,
134
+ render_mode: str = 'nerf') -> str:
135
+ self.load_model('image300M')
136
+
137
+ torch.manual_seed(seed)
138
+
139
+ image = load_image(image_path)
140
+
141
+ latents = sample_latents(
142
+ batch_size=1,
143
+ model=self.model,
144
+ diffusion=self.diffusion,
145
+ guidance_scale=guidance_scale,
146
+ model_kwargs=dict(images=[image]),
147
+ progress=True,
148
+ clip_denoised=True,
149
+ use_fp16=True,
150
+ use_karras=True,
151
+ karras_steps=num_steps,
152
+ sigma_min=1e-3,
153
+ sigma_max=160,
154
+ s_churn=0,
155
+ )
156
+
157
+ cameras = create_pan_cameras(output_image_size, self.device)
158
+ frames = decode_latent_images(self.xm,
159
+ latents[0],
160
+ cameras,
161
+ rendering_mode=render_mode)
162
+ return self.to_video(frames)
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
1
+ blobfile==2.0.2
2
+ git+https://github.com/openai/shap-e@d99ceda
3
+ gradio==3.28.2
4
+ imageio[ffmpeg]==2.28.1
5
+ torch==2.0.0
6
+ torchvision==0.15.1
settings.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ import numpy as np
4
+
5
+ CACHE_EXAMPLES = os.getenv('CACHE_EXAMPLES') == '1'
6
+
7
+ MAX_SEED = np.iinfo(np.int32).max
style.css ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ h1 {
2
+ text-align: center;
3
+ }
4
+
5
+ #component-0 {
6
+ max-width: 730px;
7
+ margin: auto;
8
+ padding-top: 1.5rem;
9
+ }
10
+
11
+ #result-1 video {
12
+ object-fit: scale-down;
13
+ }
14
+
15
+ #result-2 video {
16
+ object-fit: scale-down;
17
+ }
18
+
19
+ #prompt-container {
20
+ gap: 0;
21
+ }
utils.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
1
+ import random
2
+
3
+ from settings import MAX_SEED
4
+
5
+
6
+ def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
7
+ if randomize_seed:
8
+ seed = random.randint(0, MAX_SEED)
9
+ return seed