Spaces:
Running
Running
Linoy Tsaban
commited on
Commit
•
f06d376
1
Parent(s):
0ba4738
Update index.html
Browse files- index.html +120 -92
index.html
CHANGED
@@ -27,19 +27,21 @@
|
|
27 |
<script src="./static/js/bulma-slider.min.js"></script>
|
28 |
<script src="./static/js/index.js"></script>
|
29 |
<style>
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
|
|
|
|
43 |
</style>
|
44 |
</head>
|
45 |
<body>
|
@@ -56,12 +58,13 @@
|
|
56 |
|
57 |
|
58 |
<section class="hero">
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
|
|
65 |
<span class="author-block">
|
66 |
<a href="https://scholar.google.com/citations?user=kJ9Abf8AAAAJ&hl=en">Manuel Brack</a>¹²,
|
67 |
</span>
|
@@ -84,7 +87,7 @@
|
|
84 |
<a href="https://twitter.com/multimodalart">Apolinário Passos</a>⁴
|
85 |
</span>
|
86 |
<p></p>
|
87 |
-
|
88 |
<div class="is-size-5 publication-authors">
|
89 |
<span class="author-block">¹ German Research Center for Artificial Intelligence (DFKI),</span>
|
90 |
<span class="author-block">² Computer Science Department, TU Darmstadt,</span>
|
@@ -93,10 +96,10 @@
|
|
93 |
<span class="author-block">⁵ Centre for Cognitive Science, TU Darmstadt,</span>
|
94 |
<span class="author-block">⁶ LAION</span>
|
95 |
</div>
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
<a href="#"
|
101 |
class="external-link button is-normal is-rounded is-dark">
|
102 |
<span class="icon">
|
@@ -105,16 +108,16 @@
|
|
105 |
<span>arXiv</span>
|
106 |
</a>
|
107 |
</span>
|
108 |
-
|
109 |
-
|
110 |
<a href="https://huggingface.co/spaces/editing-images/ledtisplusplus"
|
111 |
target="_blank"
|
112 |
class="external-link button is-normal is-rounded is-dark">
|
113 |
<span>🤗 Demo</span>
|
114 |
</a>
|
115 |
</span>
|
116 |
-
|
117 |
-
|
118 |
<a href="https://huggingface.co/spaces/editing-images/ledtisplusplus/tree/main"
|
119 |
target="_blank"
|
120 |
class="external-link button is-normal is-rounded is-dark">
|
@@ -124,12 +127,12 @@
|
|
124 |
<span>Code</span>
|
125 |
</a>
|
126 |
</span>
|
127 |
-
|
128 |
-
|
|
|
|
|
129 |
</div>
|
130 |
-
|
131 |
-
</div>
|
132 |
-
</div>
|
133 |
</section>
|
134 |
|
135 |
<section class="hero teaser">
|
@@ -140,10 +143,6 @@
|
|
140 |
<source src="static/videos/faces.mp4"
|
141 |
type="video/mp4">
|
142 |
</video>
|
143 |
-
<video autoplay muted loop playsinline height="100%">
|
144 |
-
<source src="static/videos/objects_styles.mp4"
|
145 |
-
type="video/mp4">
|
146 |
-
</video>
|
147 |
|
148 |
|
149 |
<h2 class="subtitle has-text-centered">
|
@@ -193,10 +192,9 @@
|
|
193 |
</section>
|
194 |
|
195 |
|
196 |
-
|
197 |
<section class="section">
|
198 |
<div class="container is-max-desktop">
|
199 |
-
|
200 |
<div class="columns is-centered has-text-centered">
|
201 |
<h2 class="title is-3">LEDITS++: Efficient and Versatile Textual Image Editing</h2>
|
202 |
</div>
|
@@ -270,7 +268,9 @@
|
|
270 |
<p>
|
271 |
Utilizing T2I models for editing real images is usually done by inverting the sampling
|
272 |
process to identify a noisy xT that will be denoised to the input image x0.
|
273 |
-
We draw characteristics from <a href="https://inbarhub.github.io/DDPM_inversion/"
|
|
|
|
|
274 |
inversion method that greatly reduces the required number
|
275 |
of steps while maintaining no reconstruction error.
|
276 |
DDPM can be viewed as a first-order
|
@@ -283,67 +283,95 @@
|
|
283 |
<img src="static/images/inversion.png"/>
|
284 |
</div>
|
285 |
<div class="content">
|
286 |
-
|
287 |
-
|
288 |
-
|
289 |
-
|
290 |
-
|
291 |
-
|
292 |
-
|
293 |
-
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
-
|
298 |
-
|
299 |
<div class="content">
|
300 |
-
|
301 |
-
|
302 |
-
|
303 |
-
|
304 |
-
|
305 |
-
|
306 |
-
|
307 |
-
|
308 |
-
|
309 |
-
|
310 |
-
|
311 |
-
|
312 |
-
|
313 |
-
|
314 |
-
|
315 |
-
|
316 |
</div>
|
317 |
-
|
318 |
</div>
|
319 |
</div>
|
320 |
<div class="columns is-centered has-text-centered">
|
321 |
-
|
322 |
-
|
323 |
-
|
324 |
-
|
325 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
326 |
</div>
|
|
|
|
|
|
|
|
|
|
|
327 |
</div>
|
328 |
|
329 |
</section>
|
330 |
<section class="section">
|
331 |
<div class="container is-max-desktop">
|
332 |
-
|
333 |
-
|
334 |
-
|
335 |
-
|
336 |
-
|
337 |
-
|
338 |
-
|
339 |
-
|
340 |
-
|
341 |
|
342 |
</div>
|
343 |
</section>
|
344 |
-
|
345 |
-
|
346 |
-
|
347 |
<!--<section class="hero teaser">-->
|
348 |
<!-- <div class="container is-max-desktop">-->
|
349 |
<!-- <div class="hero-body">-->
|
@@ -359,15 +387,15 @@
|
|
359 |
<!-- </div>-->
|
360 |
<!--</section>-->
|
361 |
|
362 |
-
|
363 |
|
364 |
|
365 |
<section class="section" id="BibTeX">
|
366 |
-
|
367 |
-
|
368 |
-
|
369 |
}</code></pre>
|
370 |
-
|
371 |
</section>
|
372 |
|
373 |
|
|
|
27 |
<script src="./static/js/bulma-slider.min.js"></script>
|
28 |
<script src="./static/js/index.js"></script>
|
29 |
<style>
|
30 |
+
.publication-links a {
|
31 |
+
color: white !important
|
32 |
+
}
|
33 |
+
|
34 |
+
@media only screen and (max-width: 900px) {
|
35 |
+
.columns {
|
36 |
+
overflow-y: scroll;
|
37 |
+
}
|
38 |
+
}
|
39 |
+
|
40 |
+
@media only screen and (min-width: 901px) {
|
41 |
+
.is-centered img {
|
42 |
+
width: 80vw !important
|
43 |
+
}
|
44 |
+
}
|
45 |
</style>
|
46 |
</head>
|
47 |
<body>
|
|
|
58 |
|
59 |
|
60 |
<section class="hero">
|
61 |
+
<div class="hero-body">
|
62 |
+
<div class="container is-max-desktop">
|
63 |
+
<div class="columns is-centered">
|
64 |
+
<div class="column has-text-centered">
|
65 |
+
<h1 class="title is-1 publication-title">LEDITS++: Limitless Image Editing using Text-to-Image
|
66 |
+
Models</h1>
|
67 |
+
<div class="is-size-5 publication-authors">
|
68 |
<span class="author-block">
|
69 |
<a href="https://scholar.google.com/citations?user=kJ9Abf8AAAAJ&hl=en">Manuel Brack</a>¹²,
|
70 |
</span>
|
|
|
87 |
<a href="https://twitter.com/multimodalart">Apolinário Passos</a>⁴
|
88 |
</span>
|
89 |
<p></p>
|
90 |
+
|
91 |
<div class="is-size-5 publication-authors">
|
92 |
<span class="author-block">¹ German Research Center for Artificial Intelligence (DFKI),</span>
|
93 |
<span class="author-block">² Computer Science Department, TU Darmstadt,</span>
|
|
|
96 |
<span class="author-block">⁵ Centre for Cognitive Science, TU Darmstadt,</span>
|
97 |
<span class="author-block">⁶ LAION</span>
|
98 |
</div>
|
99 |
+
<div class="column has-text-centered">
|
100 |
+
<div class="publication-links">
|
101 |
+
<!-- arxiv Link. -->
|
102 |
+
<span class="link-block">
|
103 |
<a href="#"
|
104 |
class="external-link button is-normal is-rounded is-dark">
|
105 |
<span class="icon">
|
|
|
108 |
<span>arXiv</span>
|
109 |
</a>
|
110 |
</span>
|
111 |
+
<!-- Demo Link. -->
|
112 |
+
<span class="link-block">
|
113 |
<a href="https://huggingface.co/spaces/editing-images/ledtisplusplus"
|
114 |
target="_blank"
|
115 |
class="external-link button is-normal is-rounded is-dark">
|
116 |
<span>🤗 Demo</span>
|
117 |
</a>
|
118 |
</span>
|
119 |
+
<!-- Code Link. -->
|
120 |
+
<span class="link-block">
|
121 |
<a href="https://huggingface.co/spaces/editing-images/ledtisplusplus/tree/main"
|
122 |
target="_blank"
|
123 |
class="external-link button is-normal is-rounded is-dark">
|
|
|
127 |
<span>Code</span>
|
128 |
</a>
|
129 |
</span>
|
130 |
+
</div>
|
131 |
+
</div>
|
132 |
+
</div>
|
133 |
+
</div>
|
134 |
</div>
|
135 |
+
</div>
|
|
|
|
|
136 |
</section>
|
137 |
|
138 |
<section class="hero teaser">
|
|
|
143 |
<source src="static/videos/faces.mp4"
|
144 |
type="video/mp4">
|
145 |
</video>
|
|
|
|
|
|
|
|
|
146 |
|
147 |
|
148 |
<h2 class="subtitle has-text-centered">
|
|
|
192 |
</section>
|
193 |
|
194 |
|
|
|
195 |
<section class="section">
|
196 |
<div class="container is-max-desktop">
|
197 |
+
<!-- Introduction -->
|
198 |
<div class="columns is-centered has-text-centered">
|
199 |
<h2 class="title is-3">LEDITS++: Efficient and Versatile Textual Image Editing</h2>
|
200 |
</div>
|
|
|
268 |
<p>
|
269 |
Utilizing T2I models for editing real images is usually done by inverting the sampling
|
270 |
process to identify a noisy xT that will be denoised to the input image x0.
|
271 |
+
We draw characteristics from <a href="https://inbarhub.github.io/DDPM_inversion/"
|
272 |
+
target="_blank">edit friendly DDPM inversion</a> and propose
|
273 |
+
an efficient
|
274 |
inversion method that greatly reduces the required number
|
275 |
of steps while maintaining no reconstruction error.
|
276 |
DDPM can be viewed as a first-order
|
|
|
283 |
<img src="static/images/inversion.png"/>
|
284 |
</div>
|
285 |
<div class="content">
|
286 |
+
<h2 class="title is-4">Component 2: Textual Editing</h2>
|
287 |
+
<p>
|
288 |
+
After creating our re-construction sequence, we can edit the image by manipulating
|
289 |
+
the noise estimate εθ based on a set of edit instructions. We devise a dedicated
|
290 |
+
guidance term for each concept based on conditioned and unconditioned estimate. We
|
291 |
+
define LEDITS++ guidance such that it both reflects the direction of the edit (if we
|
292 |
+
want
|
293 |
+
to push away from/towards the edit concept) and maximizes fine-grained control over
|
294 |
+
the effect of the desired edit.
|
295 |
+
|
296 |
+
</p>
|
297 |
+
<img src="static/images/textual_editing.png"/>
|
298 |
+
</div>
|
299 |
<div class="content">
|
300 |
+
<h2 class="title is-4">Component 3: Semantic Grounding</h2>
|
301 |
+
<p>
|
302 |
+
In our defined LEDITS++ guidance, we include a masking term composed of the
|
303 |
+
intersection between the mask generated from
|
304 |
+
the U-Net’s cross-attention layers and a mask derived from
|
305 |
+
the noise estimate - yielding a mask both focused on relevant image
|
306 |
+
regions and of fine granularity.
|
307 |
+
We empirically demonstrate that these maps can also capture regions
|
308 |
+
of an image relevant to an editing concept that is not already present.
|
309 |
+
Specifically for multiple edits, calculating a
|
310 |
+
dedicated mask for each edit prompt ensures that the corresponding
|
311 |
+
guidance terms remain largely isolated, limiting
|
312 |
+
interference between them.
|
313 |
+
|
314 |
+
</p>
|
315 |
+
|
316 |
</div>
|
317 |
+
|
318 |
</div>
|
319 |
</div>
|
320 |
<div class="columns is-centered has-text-centered">
|
321 |
+
<img
|
322 |
+
style="max-height:800px; max-width:800px"
|
323 |
+
src="static/images/semantic_grounding.png"
|
324 |
+
/>
|
325 |
+
</div>
|
326 |
+
|
327 |
+
<div class="columns is-centered has-text-centered">
|
328 |
+
<h2 class="title is-3">Properties if LEDITS++
|
329 |
+
</h2>
|
330 |
+
</div>
|
331 |
+
<div class="columns is-centered has-text-centered">
|
332 |
+
<div class="column">
|
333 |
+
<p>
|
334 |
+
Efficiency.
|
335 |
+
</p>
|
336 |
+
</div>
|
337 |
+
<div class="column">
|
338 |
+
<p>
|
339 |
+
Versatility.
|
340 |
+
</p>
|
341 |
+
</div>
|
342 |
+
<div class="column">
|
343 |
+
<p>
|
344 |
+
Precision.
|
345 |
+
</p>
|
346 |
+
</div>
|
347 |
+
</div>
|
348 |
+
|
349 |
</div>
|
350 |
+
<video autoplay muted loop playsinline height="100%">
|
351 |
+
<source src="static/videos/objects_styles.mp4"
|
352 |
+
type="video/mp4">
|
353 |
+
</video>
|
354 |
+
|
355 |
</div>
|
356 |
|
357 |
</section>
|
358 |
<section class="section">
|
359 |
<div class="container is-max-desktop">
|
360 |
+
<div class="columns is-centered has-text-centered">
|
361 |
+
<h2 class="title is-3">Interactive Demo</h2>
|
362 |
+
</div>
|
363 |
+
<script
|
364 |
+
type="module"
|
365 |
+
src="https://gradio.s3-us-west-2.amazonaws.com/3.43.0/gradio.js"
|
366 |
+
></script>
|
367 |
+
|
368 |
+
<gradio-app src="https://editing-images-ledtisplusplus.hf.space"></gradio-app>
|
369 |
|
370 |
</div>
|
371 |
</section>
|
372 |
+
|
373 |
+
|
374 |
+
<!-- portraits video -->
|
375 |
<!--<section class="hero teaser">-->
|
376 |
<!-- <div class="container is-max-desktop">-->
|
377 |
<!-- <div class="hero-body">-->
|
|
|
387 |
<!-- </div>-->
|
388 |
<!--</section>-->
|
389 |
|
390 |
+
<!-- 3 key observations -->
|
391 |
|
392 |
|
393 |
<section class="section" id="BibTeX">
|
394 |
+
<div class="container is-max-desktop content">
|
395 |
+
<h2 class="title">BibTeX</h2>
|
396 |
+
<pre><code>@article{
|
397 |
}</code></pre>
|
398 |
+
</div>
|
399 |
</section>
|
400 |
|
401 |
|