philschmid's picture
philschmid HF staff
Update app.py
23d5fdf
import gradio as gr
import gradio.helpers
from datasets import load_dataset
import re
import os
import requests
import time
from typing import Tuple
from share_btn import community_icon_html, loading_icon_html, share_js
from filter_words import bad_words
from sagemaker.huggingface import HuggingFacePredictor
from sagemaker import Session
import boto3
from concurrent.futures import ThreadPoolExecutor, as_completed
aws_access_key_id = os.environ.get("AWS_ACCESS_KEY_ID", None)
aws_secret_access_key = os.environ.get("AWS_SECRET_ACCESS_KEY", None)
region = os.environ.get("AWS_REGION", "us-east-2")
endpoint_name = os.environ.get(
"SAGEMAKER_ENDPOINT_NAME",
"huggingface-pytorch-inference-neuronx-2023-11-15-13-51-10-749",
)
if (
aws_access_key_id is None
or aws_secret_access_key is None
or region is None
or endpoint_name is None
):
raise Exception(
"Please set AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_REGION and SAGEMAKER_ENDPOINT_NAME environment variables"
)
boto_session = boto3.Session(
aws_access_key_id=aws_access_key_id,
aws_secret_access_key=aws_secret_access_key,
region_name=region,
)
session = Session(boto_session=boto_session)
print(f"sagemaker session region: {session.boto_region_name}")
predictor = HuggingFacePredictor(
endpoint_name=endpoint_name,
sagemaker_session=session,
)
style_list = [
{
"name": "(No style)",
"prompt": "{prompt}",
"negative_prompt": "",
},
{
"name": "Cinematic",
"prompt": "cinematic still {prompt} . emotional, harmonious, vignette, highly detailed, high budget, bokeh, cinemascope, moody, epic, gorgeous, film grain, grainy",
"negative_prompt": "anime, cartoon, graphic, text, painting, crayon, graphite, abstract, glitch, deformed, mutated, ugly, disfigured",
},
{
"name": "Photographic",
"prompt": "cinematic photo {prompt} . 35mm photograph, film, bokeh, professional, 4k, highly detailed",
"negative_prompt": "drawing, painting, crayon, sketch, graphite, impressionist, noisy, blurry, soft, deformed, ugly",
},
{
"name": "Anime",
"prompt": "anime artwork {prompt} . anime style, key visual, vibrant, studio anime, highly detailed",
"negative_prompt": "photo, deformed, black and white, realism, disfigured, low contrast",
},
{
"name": "Manga",
"prompt": "manga style {prompt} . vibrant, high-energy, detailed, iconic, Japanese comic style",
"negative_prompt": "ugly, deformed, noisy, blurry, low contrast, realism, photorealistic, Western comic style",
},
{
"name": "Digital Art",
"prompt": "concept art {prompt} . digital artwork, illustrative, painterly, matte painting, highly detailed",
"negative_prompt": "photo, photorealistic, realism, ugly",
},
{
"name": "Pixel art",
"prompt": "pixel-art {prompt} . low-res, blocky, pixel art style, 8-bit graphics",
"negative_prompt": "sloppy, messy, blurry, noisy, highly detailed, ultra textured, photo, realistic",
},
{
"name": "Fantasy art",
"prompt": "ethereal fantasy concept art of {prompt} . magnificent, celestial, ethereal, painterly, epic, majestic, magical, fantasy art, cover art, dreamy",
"negative_prompt": "photographic, realistic, realism, 35mm film, dslr, cropped, frame, text, deformed, glitch, noise, noisy, off-center, deformed, cross-eyed, closed eyes, bad anatomy, ugly, disfigured, sloppy, duplicate, mutated, black and white",
},
{
"name": "Neonpunk",
"prompt": "neonpunk style {prompt} . cyberpunk, vaporwave, neon, vibes, vibrant, stunningly beautiful, crisp, detailed, sleek, ultramodern, magenta highlights, dark purple shadows, high contrast, cinematic, ultra detailed, intricate, professional",
"negative_prompt": "painting, drawing, illustration, glitch, deformed, mutated, cross-eyed, ugly, disfigured",
},
{
"name": "3D Model",
"prompt": "professional 3d model {prompt} . octane render, highly detailed, volumetric, dramatic lighting",
"negative_prompt": "ugly, deformed, noisy, low poly, blurry, painting",
},
]
styles = {k["name"]: (k["prompt"], k["negative_prompt"]) for k in style_list}
STYLE_NAMES = list(styles.keys())
DEFAULT_STYLE_NAME = "(No style)"
def apply_style(style_name: str, positive: str, negative: str = "") -> Tuple[str, str]:
p, n = styles.get(style_name, styles[DEFAULT_STYLE_NAME])
return p.replace("{prompt}", positive), n + negative
def parllel_infer(payload):
responses = []
max_workers = 1
with ThreadPoolExecutor(max_workers=max_workers) as executor:
futures = [executor.submit(send_request, payload) for _ in range(max_workers)]
# Wait for all futures to complete
for future in as_completed(futures):
result = future.result()
responses.append(result["generated_images"][0])
return {"generated_images": responses}
def send_request(payload):
response = predictor.predict(data=payload)
return response
def infer(
prompt,
negative="low_quality",
scale=7,
style_name=None,
num_steps=25,
):
for filter in bad_words:
if re.search(rf"\b{filter}\b", prompt):
raise gr.Error("Please try again with a different prompt")
prompt, negative = apply_style(style_name, prompt, negative)
images = []
payload = {
"inputs": prompt,
"parameters": {
"negative_prompt": negative,
"guidance_scale": scale,
"num_inference_steps": num_steps,
},
}
start_time = time.time()
# images_request = send_request(payload)
images_request = parllel_infer(payload)
print(len(images_request["generated_images"]))
print(time.time() - start_time)
try:
for image in images_request["generated_images"]:
image_b64 = f"data:image/jpeg;base64,{image}"
images.append(image_b64)
except requests.exceptions.JSONDecodeError:
raise gr.Error("SDXL did not return a valid result, try again")
return images, gr.update(visible=True)
css = """
.gradio-container {
font-family: 'IBM Plex Sans', sans-serif;
}
.gr-button {
color: white;
border-color: black;
background: black;
}
input[type='range'] {
accent-color: black;
}
.dark input[type='range'] {
accent-color: #dfdfdf;
}
.gradio-container {
max-width: 730px !important;
margin: auto;
padding-top: 1.5rem;
}
#gallery {
min-height: 22rem;
margin-bottom: 15px;
margin-left: auto;
margin-right: auto;
border-bottom-right-radius: .5rem !important;
border-bottom-left-radius: .5rem !important;
}
#gallery>div>.h-full {
min-height: 20rem;
}
.details:hover {
text-decoration: underline;
}
.gr-button {
white-space: nowrap;
}
.gr-button:focus {
border-color: rgb(147 197 253 / var(--tw-border-opacity));
outline: none;
box-shadow: var(--tw-ring-offset-shadow), var(--tw-ring-shadow), var(--tw-shadow, 0 0 #0000);
--tw-border-opacity: 1;
--tw-ring-offset-shadow: var(--tw-ring-inset) 0 0 0 var(--tw-ring-offset-width) var(--tw-ring-offset-color);
--tw-ring-shadow: var(--tw-ring-inset) 0 0 0 calc(3px var(--tw-ring-offset-width)) var(--tw-ring-color);
--tw-ring-color: rgb(191 219 254 / var(--tw-ring-opacity));
--tw-ring-opacity: .5;
}
#advanced-btn {
font-size: .7rem !important;
line-height: 19px;
margin-top: 12px;
margin-bottom: 12px;
padding: 2px 8px;
border-radius: 14px !important;
}
#advanced-options {
display: none;
margin-bottom: 20px;
}
.footer {
margin-bottom: 45px;
margin-top: 35px;
text-align: center;
border-bottom: 1px solid #e5e5e5;
}
.footer>p {
font-size: .8rem;
display: inline-block;
padding: 0 10px;
transform: translateY(10px);
background: white;
}
.dark .footer {
border-color: #303030;
}
.dark .footer>p {
background: #0b0f19;
}
.acknowledgments h4{
margin: 1.25em 0 .25em 0;
font-weight: bold;
font-size: 115%;
}
.animate-spin {
animation: spin 1s linear infinite;
}
@keyframes spin {
from {
transform: rotate(0deg);
}
to {
transform: rotate(360deg);
}
}
#share-btn-container {padding-left: 0.5rem !important; padding-right: 0.5rem !important; background-color: #000000; justify-content: center; align-items: center; border-radius: 9999px !important; max-width: 13rem; margin-left: auto;}
div#share-btn-container > div {flex-direction: row;background: black;align-items: center}
#share-btn-container:hover {background-color: #060606}
#share-btn {all: initial; color: #ffffff;font-weight: 600; cursor:pointer; font-family: 'IBM Plex Sans', sans-serif; margin-left: 0.5rem !important; padding-top: 0.5rem !important; padding-bottom: 0.5rem !important;right:0;}
#share-btn * {all: unset}
#share-btn-container div:nth-child(-n+2){width: auto !important;min-height: 0px !important;}
#share-btn-container .wrap {display: none !important}
#share-btn-container.hidden {display: none!important}
.gr-form{
flex: 1 1 50%; border-top-right-radius: 0; border-bottom-right-radius: 0;
}
#prompt-container{
gap: 0;
}
#prompt-container .form{
border-top-right-radius: 0;
border-bottom-right-radius: 0;
}
#gen-button{
border-top-left-radius:0;
border-bottom-left-radius:0;
}
#prompt-text-input, #negative-prompt-text-input{padding: .45rem 0.625rem}
#component-16{border-top-width: 1px!important;margin-top: 1em}
.image_duplication{position: absolute; width: 100px; left: 50px}
.tabitem{border: 0 !important}
"""
block = gr.Blocks()
examples = [
["A serious capybara at work, wearing a suit", None, None, 25],
["A Squirtle fine dining with a view to the London Eye", None, None, 25],
["A tamale food cart in front of a Japanese Castle", None, None, 25],
["a graffiti of a robot serving meals to people", None, None, 25],
["a beautiful cabin in Attersee, Austria, 3d animation style", None, None, 25],
]
with block:
gr.HTML(
"""
<div style="text-align: center; max-width: 650px; margin: 0 auto; display:grid; gap:25px;">
<img class="logo" src="https://huggingface.co/datasets/philschmid/assets/resolve/main/aws-neuron_hf.png" alt="Hugging Face Habana Logo"
style="margin: auto; max-width: 14rem;">
<h1 style="font-weight: 900; margin-bottom: 7px;margin-top:5px">
Latent Consistent Stable Diffusion XL on AWS INF2 ⚡
</h1>
<p style="margin-bottom: 10px; font-size: 94%; line-height: 23px;">
Latent Consistency Models (LCMs) were proposed in Latent Consistency Models: Synthesizing High-Resolution Images with Few-Step Inference by Simian Luo, Yiqin Tan, Longbo Huang, Jian Li, and Hang Zhao. LCMs enable inference with fewer steps on any pre-trained LDMs, including Stable Diffusion and SDXL.
SDXL is a high quality text-to-image model from Stability AI. This demo is running on <a style="text-decoration: underline;" href="https://aws.amazon.com/ec2/instance-types/inf2/?nc1=h_ls">AWS Inferentia2</a>, to achieve efficient and cost-effective inference of 1024×1024 images. <a href="https://www.philschmid.de/inferentia2-stable-diffusion-xl" target="_blank">How does it work?</a>
</p>
</div>
"""
)
with gr.Row(elem_id="prompt-container").style(
mobile_collapse=False, equal_height=True
):
text = gr.Textbox(
label="Enter your prompt",
show_label=False,
max_lines=1,
placeholder="Enter your prompt",
elem_id="prompt-text-input",
)
btn = gr.Button("Generate", scale=0, elem_id="gen-button")
gallery = gr.Gallery(
label="Generated images", show_label=False, elem_id="gallery", grid=[2]
)
with gr.Group(elem_id="share-btn-container", visible=False) as community_group:
community_icon = gr.HTML(community_icon_html)
loading_icon = gr.HTML(loading_icon_html)
share_button = gr.Button("Share to community", elem_id="share-btn")
with gr.Accordion("Advanced settings", open=False):
style_selection = gr.Radio(
show_label=True,
container=True,
interactive=True,
choices=STYLE_NAMES,
value=DEFAULT_STYLE_NAME,
label="Image Style",
)
negative = gr.Textbox(
label="Enter your negative prompt",
show_label=False,
max_lines=1,
placeholder="Enter a negative prompt",
elem_id="negative-prompt-text-input",
)
guidance_scale = gr.Slider(
label="Guidance Scale", minimum=0, maximum=50, value=8, step=0.1
)
num_steps = gr.Slider(
label="Number of steps", minimum=1, maximum=16, value=8, step=1
)
ex = gr.Examples(
examples=examples,
fn=infer,
inputs=[text, negative, guidance_scale],
outputs=[gallery, community_group],
cache_examples=True,
postprocess=False,
)
negative.submit(
infer,
inputs=[text, negative, guidance_scale, style_selection, num_steps],
outputs=[gallery, community_group],
postprocess=False,
)
text.submit(
infer,
inputs=[text, negative, guidance_scale, style_selection, num_steps],
outputs=[gallery, community_group],
postprocess=False,
)
btn.click(
infer,
inputs=[text, negative, guidance_scale, style_selection, num_steps],
outputs=[gallery, community_group],
postprocess=False,
)
share_button.click(
None,
[],
[],
_js=share_js,
)
gr.HTML(
"""
<div class="footer">
<p>Model by <a href="https://huggingface.co/stabilityai" style="text-decoration: underline;" target="_blank">StabilityAI</a> - backend running Optimum Neuron on AWS Inferentia2 - Gradio Demo by 🤗 Hugging Face - this is not an official AWS Product
</p>
</div>
"""
)
with gr.Accordion(label="License", open=False):
gr.HTML(
"""<div class="acknowledgments">
<p><h4>LICENSE</h4>
The model is licensed with a <a href="https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/blob/main/LICENSE.md" style="text-decoration: underline;" target="_blank">Stability AI CreativeML Open RAIL++-M</a> license. The License allows users to take advantage of the model in a wide range of settings (including free use and redistribution) as long as they respect the specific use case restrictions outlined, which correspond to model applications the licensor deems ill-suited for the model or are likely to cause harm. For the full list of restrictions please <a href="https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/blob/main/LICENSE.md" target="_blank" style="text-decoration: underline;" target="_blank">read the license</a></p>
<p><h4>Biases and content acknowledgment</h4>
Despite how impressive being able to turn text into image is, beware that this model may output content that reinforces or exacerbates societal biases, as well as realistic faces, pornography and violence. You can read more in the <a href="https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0" style="text-decoration: underline;" target="_blank">model card</a></p>
</div>
"""
)
with gr.Blocks(css=css) as block_with_history:
block.render()
block_with_history.queue(concurrency_count=8, max_size=10, api_open=False).launch(
show_api=False
)