huntingcarlisle commited on
Commit
31379ae
·
1 Parent(s): ceca31f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +541 -24
app.py CHANGED
@@ -24,28 +24,28 @@ def display_image(image=None,width=500,height=500):
24
  # API Gateway endpoint URL
25
  api_url = 'https://a02q342s5b.execute-api.us-east-2.amazonaws.com/reinvent-demo-inf2-sm-20231114'
26
 
27
- # Define the CSS to change the text input background color
28
- input_field_style = """
29
- <style>
30
- /* Customize the text input field background and text color */
31
- .stTextInput input {
32
- background-color: #fbd8bf; /* 'Rind' color */
33
- color: #232F3E; /* Dark text color */
34
- }
35
- /* You might also want to change the color for textarea if you're using it */
36
- .stTextArea textarea {
37
- background-color: #fbd8bf; /* 'Rind' color */
38
- color: #232F3E; /* Dark text color */
39
- }
40
- </style>
41
- """
42
 
43
- # Inject custom styles into the Streamlit app
44
- st.markdown(input_field_style, unsafe_allow_html=True)
45
 
46
 
47
  # Creating Tabs
48
- tab1, tab2 = st.tabs(["Image Generation", "Architecture"])
49
 
50
  with tab1:
51
  # Create two columns for layout
@@ -122,10 +122,527 @@ with tab1:
122
 
123
  with tab2:
124
  # ===========
125
- # Define Streamlit UI elements
126
- st.title('Architecture')
127
- st.image('./architecture.png', caption=f"Application Architecture")
128
-
129
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
 
131
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  # API Gateway endpoint URL
25
  api_url = 'https://a02q342s5b.execute-api.us-east-2.amazonaws.com/reinvent-demo-inf2-sm-20231114'
26
 
27
+ # # Define the CSS to change the text input background color
28
+ # input_field_style = """
29
+ # <style>
30
+ # /* Customize the text input field background and text color */
31
+ # .stTextInput input {
32
+ # background-color: #fbd8bf; /* 'Rind' color */
33
+ # color: #232F3E; /* Dark text color */
34
+ # }
35
+ # /* You might also want to change the color for textarea if you're using it */
36
+ # .stTextArea textarea {
37
+ # background-color: #fbd8bf; /* 'Rind' color */
38
+ # color: #232F3E; /* Dark text color */
39
+ # }
40
+ # </style>
41
+ # """
42
 
43
+ # # Inject custom styles into the Streamlit app
44
+ # st.markdown(input_field_style, unsafe_allow_html=True)
45
 
46
 
47
  # Creating Tabs
48
+ tab1, tab2, tab3 = st.tabs(["Image Generation", "Architecture", "Code"])
49
 
50
  with tab1:
51
  # Create two columns for layout
 
122
 
123
  with tab2:
124
  # ===========
125
+ left_column, _, right_column = st.columns([2,.2,3])
126
+
127
+ with right_column:
128
+ # Define Streamlit UI elements
129
+ st.markdown("""<br>""", unsafe_allow_html=True)
130
+ st.markdown("""<br>""", unsafe_allow_html=True)
131
+ st.markdown("""<br>""", unsafe_allow_html=True)
132
+ st.markdown("""<br>""", unsafe_allow_html=True)
133
+ st.markdown("""<br>""", unsafe_allow_html=True)
134
+ st.image('./architecture.png', caption=f"Application Architecture")
135
+
136
+ with left_column:
137
+ st.write("## Architecture Overview")
138
+ st.write("This diagram illustrates the architecture of our Generative AI service, which is composed of several interconnected AWS services, notable Amazon Elastic Compute Cloud (Amazon EC2). Here's a detailed look at each component:")
139
+
140
+ with st.expander("(1) Inference Models"):
141
+ st.markdown("""
142
+ - The architecture starts with our trained machine learning models hosted on Amazon SageMaker, running on AWS Inferentia 2 instance (`inf2.xlarge`).
143
+ - There are two models shown here, Stable Diffusion XL for image generation, and Llama 2 7B for text generation.
144
+ """)
145
+
146
+ with st.expander("(2) Amazon SageMaker Endpoints"):
147
+ st.markdown("""
148
+ - The models are exposed via SageMaker Endpoints, which provide scalable and secure real-time inference services.
149
+ - These endpoints are the interfaces through which the models receive input data and return predictions.
150
+ """)
151
+
152
+ with st.expander("(3) AWS Lambda"):
153
+ st.markdown("""
154
+ - AWS Lambda functions serve as the middle layer, handling the logic of communicating with the SageMaker Endpoints.
155
+ - Lambda can process the incoming requests, perform any necessary transformations, call the endpoints, and then process the results before sending them back.
156
+ """)
157
+
158
+ with st.expander("(4) Amazon API Gateway"):
159
+ st.markdown("""
160
+ - The processed results from Lambda are then routed through Amazon API Gateway.
161
+ - API Gateway acts as a front door to manage all incoming API requests, including authorization, throttling, and CORS handling.
162
+ """)
163
+
164
+ with st.expander("(5) Streamlit Frontend"):
165
+ st.markdown("""
166
+ - Finally, our Streamlit application provides a user-friendly interface for end-users to interact with the service.
167
+ - It sends requests to the API Gateway and displays the returned predictions from the machine learning models.
168
+ """)
169
+
170
+ st.write("""
171
+ In summary, this architecture enables a scalable, serverless, and responsive Generative AI service that can serve real-time predictions to users directly from a web interface.
172
+ """)
173
+
174
+ with tab3:
175
+ with st.expander("(1) Deploy GenAI Model to AWS Inferentia 2 Instance and Amazon SageMaker Endpoint"):
176
+ st.markdown(
177
+ """
178
+ [Source] This code is modified from this fantastic blog by Phil Schmid at HuggingFace: https://www.philschmid.de/inferentia2-stable-diffusion-xl
179
+
180
+ # Deploy Stable Diffusion on AWS inferentia2 with Amazon SageMaker
181
+
182
+ In this end-to-end tutorial, you will learn how to deploy and speed up Stable Diffusion XL inference using AWS Inferentia2 and [optimum-neuron](https://huggingface.co/docs/optimum-neuron/index) on Amazon SageMaker. [Optimum Neuron](https://huggingface.co/docs/optimum-neuron/index) is the interface between the Hugging Face Transformers & Diffusers library and AWS Accelerators including AWS Trainium and AWS Inferentia2.
183
+
184
+ You will learn how to:
185
+
186
+ 1. Convert Stable Diffusion XL to AWS Neuron (Inferentia2) with `optimum-neuron`
187
+ 2. Create a custom `inference.py` script for Stable Diffusion
188
+ 3. Upload the neuron model and inference script to Amazon S3
189
+ 4. Deploy a Real-time Inference Endpoint on Amazon SageMaker
190
+ 5. Generate images using the deployed model
191
+
192
+ ## Quick intro: AWS Inferentia 2
193
+
194
+ [AWS inferentia (Inf2)](https://aws.amazon.com/de/ec2/instance-types/inf2/) are purpose-built EC2 for deep learning (DL) inference workloads. Inferentia 2 is the successor of [AWS Inferentia](https://aws.amazon.com/ec2/instance-types/inf1/?nc1=h_ls), which promises to deliver up to 4x higher throughput and up to 10x lower latency.
195
+
196
+ | instance size | accelerators | Neuron Cores | accelerator memory | vCPU | CPU Memory | on-demand price ($/h) |
197
+ | ------------- | ------------ | ------------ | ------------------ | ---- | ---------- | --------------------- |
198
+ | inf2.xlarge | 1 | 2 | 32 | 4 | 16 | 0.76 |
199
+ | inf2.8xlarge | 1 | 2 | 32 | 32 | 128 | 1.97 |
200
+ | inf2.24xlarge | 6 | 12 | 192 | 96 | 384 | 6.49 |
201
+ | inf2.48xlarge | 12 | 24 | 384 | 192 | 768 | 12.98 |
202
+
203
+ Additionally, inferentia 2 will support the writing of custom operators in c++ and new datatypes, including `FP8` (cFP8).
204
+
205
+ Let's get started! 🚀
206
+
207
+ *If you are going to use Sagemaker in a local environment (not SageMaker Studio or Notebook Instances). You need access to an IAM Role with the required permissions for Sagemaker. You can find [here](https://docs.aws.amazon.com/sagemaker/latest/dg/sagemaker-roles.html) more about it.*
208
+
209
+ ## 1. Convert Stable Diffusion to AWS Neuron (Inferentia2) with `optimum-neuron`
210
+
211
+ We are going to use the [optimum-neuron](https://huggingface.co/docs/optimum-neuron/index) to compile/convert our model to neuronx. Optimum Neuron provides a set of tools enabling easy model loading, training and inference on single- and multi-Accelerator settings for different downstream tasks.
212
+
213
+ As a first step, we need to install the `optimum-neuron` and other required packages.
214
+
215
+ *Tip: If you are using Amazon SageMaker Notebook Instances or Studio you can go with the `conda_python3` conda kernel.*
216
+
217
+
218
+
219
+ ```python
220
+ # Install the required packages
221
+ %pip install "optimum-neuron==0.0.13" "diffusers==0.21.4" --upgrade
222
+ %pip install "sagemaker>=2.197.0" --upgrade
223
+ ```
224
+
225
+ After we have installed the `optimum-neuron` we can convert load and convert our model.
226
+
227
+ We are going to use the [stabilityai/stable-diffusion-xl-base-1.0](hstabilityai/stable-diffusion-xl-base-1.0) model. Stable Diffusion XL (SDXL) from [Stability AI](https://stability.ai/) is the newset text-to-image generation model, which can create photorealistic images with detailed imagery and composition compared to previous SD models, including SD 2.1.
228
+
229
+ At the time of writing, the [AWS Inferentia2 does not support dynamic shapes for inference](https://awsdocs-neuron.readthedocs-hosted.com/en/latest/general/arch/neuron-features/dynamic-shapes.html?highlight=dynamic%20shapes#), which means that the we need to specify our image size in advanced for compiling and inference.
230
+
231
+ In simpler terms, this means we need to define the input shapes for our prompt (sequence length), batch size, height and width of the image.
232
+
233
+ We precompiled the model with the following parameters and pushed it to the Hugging Face Hub:
234
+ * `height`: 1024
235
+ * `width`: 1024
236
+ * `sequence_length`: 128
237
+ * `num_images_per_prompt`: 1
238
+ * `batch_size`: 1
239
+ * `neuron`: 2.15.0
240
+
241
+
242
+ _Note: If you want to compile your own model or a different Stable Diffusion XL checkpoint you need to use ~120GB of memory and the compilation can take ~45 minutes. We used an `inf2.8xlarge` ec2 instance with the [Hugging Face Neuron Deep Learning AMI](https://aws.amazon.com/marketplace/pp/prodview-gr3e6yiscria2) to compile the model._
243
+
244
+
245
+ ```python
246
+ from huggingface_hub import snapshot_download
247
+
248
+ # compiled model id
249
+ compiled_model_id = "aws-neuron/stable-diffusion-xl-base-1-0-1024x1024"
250
+
251
+ # save compiled model to local directory
252
+ save_directory = "sdxl_neuron"
253
+ # Downloads our compiled model from the HuggingFace Hub
254
+ # using the revision as neuron version reference
255
+ # and makes sure we exlcude the symlink files and "hidden" files, like .DS_Store, .gitignore, etc.
256
+ snapshot_download(compiled_model_id, revision="2.15.0", local_dir=save_directory, local_dir_use_symlinks=False, allow_patterns=["[!.]*.*"])
257
+
258
+
259
+ ###############################################
260
+ # COMMENT IN BELOW TO COMPILE DIFFERENT MODEL #
261
+ ###############################################
262
+ #
263
+ # from optimum.neuron import NeuronStableDiffusionXLPipeline
264
+ #
265
+ # # model id you want to compile
266
+ # vanilla_model_id = "stabilityai/stable-diffusion-xl-base-1.0"
267
+ #
268
+ # # configs for compiling model
269
+ # compiler_args = {"auto_cast": "all", "auto_cast_type": "bf16"}
270
+ # input_shapes = {
271
+ # "height": 1024, # width of the image
272
+ # "width": 1024, # height of the image
273
+ # "num_images_per_prompt": 1, # number of images to generate per prompt
274
+ # "batch_size": 1 # batch size for the model
275
+ # }
276
+ #
277
+ # sd = NeuronStableDiffusionXLPipeline.from_pretrained(vanilla_model_id, export=True, **input_shapes, **compiler_args)
278
+ #
279
+ # # Save locally or upload to the HuggingFace Hub
280
+ # save_directory = "sdxl_neuron"
281
+ # sd.save_pretrained(save_directory)
282
+ ```
283
+
284
+ ## 2. Create a custom `inference.py` script for Stable Diffusion
285
+
286
+ The [Hugging Face Inference Toolkit](https://github.com/aws/sagemaker-huggingface-inference-toolkit) supports zero-code deployments on top of the [pipeline feature](https://huggingface.co/transformers/main_classes/pipelines.html) from 🤗 Transformers. This allows users to deploy Hugging Face transformers without an inference script [[Example](https://github.com/huggingface/notebooks/blob/master/sagemaker/11_deploy_model_from_hf_hub/deploy_transformer_model_from_hf_hub.ipynb)].
287
+
288
+ Currently is this feature not supported with AWS Inferentia2, which means we need to provide an `inference.py` for running inference. But `optimum-neuron` has integrated support for the 🤗 Diffusers pipeline feature. That way we can use the `optimum-neuron` to create a pipeline for our model.
289
+
290
+ If you want to know more about the `inference.py` script check out this [example](https://github.com/huggingface/notebooks/blob/master/sagemaker/17_custom_inference_script/sagemaker-notebook.ipynb). It explains amongst other things what the `model_fn` and `predict_fn` are.
291
+
292
+
293
+ ```python
294
+ # create code directory in our model directory
295
+ !mkdir {save_directory}/code
296
+ ```
297
+
298
+ We are using the `NEURON_RT_NUM_CORES=2` to make sure that each HTTP worker uses 2 Neuron core to maximize throughput.
299
+
300
+
301
+ ```python
302
+ %%writefile {save_directory}/code/inference.py
303
+ import os
304
+ # To use two neuron core per worker
305
+ os.environ["NEURON_RT_NUM_CORES"] = "2"
306
+ import torch
307
+ import torch_neuronx
308
+ import base64
309
+ from io import BytesIO
310
+ from optimum.neuron import NeuronStableDiffusionXLPipeline
311
+
312
+
313
+ def model_fn(model_dir):
314
+ # load local converted model into pipeline
315
+ pipeline = NeuronStableDiffusionXLPipeline.from_pretrained(model_dir, device_ids=[0, 1])
316
+ return pipeline
317
+
318
+
319
+ def predict_fn(data, pipeline):
320
+ # extract prompt from data
321
+ prompt = data.pop("inputs", data)
322
+
323
+ parameters = data.pop("parameters", None)
324
+
325
+ if parameters is not None:
326
+ generated_images = pipeline(prompt, **parameters)["images"]
327
+ else:
328
+ generated_images = pipeline(prompt)["images"]
329
+
330
+ # postprocess convert image into base64 string
331
+ encoded_images = []
332
+ for image in generated_images:
333
+ buffered = BytesIO()
334
+ image.save(buffered, format="JPEG")
335
+ encoded_images.append(base64.b64encode(buffered.getvalue()).decode())
336
+
337
+ # always return the first
338
+ return {"generated_images": encoded_images}
339
+ ```
340
+
341
+ ## 3. Upload the neuron model and inference script to Amazon S3
342
+
343
+ Before we can deploy our neuron model to Amazon SageMaker we need to upload it all our model artifacts to Amazon S3.
344
+
345
+ _Note: Currently `inf2` instances are only available in the `us-east-2` & `us-east-1` region [[REF](https://aws.amazon.com/de/about-aws/whats-new/2023/05/sagemaker-ml-inf2-ml-trn1-instances-model-deployment/)]. Therefore we need to force the region to us-east-2._
346
+
347
+ Lets create our SageMaker session and upload our model to Amazon S3.
348
+
349
+
350
+ ```python
351
+ import sagemaker
352
+ import boto3
353
+ sess = sagemaker.Session()
354
+ # sagemaker session bucket -> used for uploading data, models and logs
355
+ # sagemaker will automatically create this bucket if it not exists
356
+ sagemaker_session_bucket=None
357
+ if sagemaker_session_bucket is None and sess is not None:
358
+ # set to default bucket if a bucket name is not given
359
+ sagemaker_session_bucket = sess.default_bucket()
360
+
361
+ try:
362
+ role = sagemaker.get_execution_role()
363
+ except ValueError:
364
+ iam = boto3.client('iam')
365
+ role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']
366
+
367
+ sess = sagemaker.Session(default_bucket=sagemaker_session_bucket)
368
+
369
+ print(f"sagemaker role arn: {role}")
370
+ print(f"sagemaker bucket: {sess.default_bucket()}")
371
+ print(f"sagemaker session region: {sess.boto_region_name}")
372
+ assert sess.boto_region_name in ["us-east-2", "us-east-1"] , "region must be us-east-2 or us-west-2, due to instance availability"
373
+ ```
374
+
375
+ We create our `model.tar.gz` with our `inference.py`` script
376
+
377
+
378
+
379
+ ```python
380
+ # create a model.tar.gz archive with all the model artifacts and the inference.py script.
381
+ %cd {save_directory}
382
+ !tar zcvf model.tar.gz *
383
+ %cd ..
384
+ ```
385
+
386
+ Next, we upload our `model.tar.gz` to Amazon S3 using our session bucket and `sagemaker` sdk.
387
+
388
+
389
+ ```python
390
+ from sagemaker.s3 import S3Uploader
391
+
392
+ # create s3 uri
393
+ s3_model_path = f"s3://{sess.default_bucket()}/neuronx/sdxl"
394
+
395
+ # upload model.tar.gz
396
+ s3_model_uri = S3Uploader.upload(local_path=f"{save_directory}/model.tar.gz", desired_s3_uri=s3_model_path)
397
+ print(f"model artifcats uploaded to {s3_model_uri}")
398
+ ```
399
+
400
+ ## 4. Deploy a Real-time Inference Endpoint on Amazon SageMaker
401
+
402
+ After we have uploaded our model artifacts to Amazon S3 can we create a custom `HuggingfaceModel`. This class will be used to create and deploy our real-time inference endpoint on Amazon SageMaker.
403
+
404
+ The `inf2.xlarge` instance type is the smallest instance type with AWS Inferentia2 support. It comes with 1 Inferentia2 chip with 2 Neuron Cores. This means we can use 2 Neuron Cores to minimize latency for our image generation.
405
+
406
+
407
+ ```python
408
+ from sagemaker.huggingface.model import HuggingFaceModel
409
+
410
+ # create Hugging Face Model Class
411
+ huggingface_model = HuggingFaceModel(
412
+ model_data=s3_model_uri, # path to your model.tar.gz on s3
413
+ role=role, # iam role with permissions to create an Endpoint
414
+ transformers_version="4.34.1", # transformers version used
415
+ pytorch_version="1.13.1", # pytorch version used
416
+ py_version='py310', # python version used
417
+ model_server_workers=1, # number of workers for the model server
418
+ )
419
+
420
+ # deploy the endpoint endpoint
421
+ predictor = huggingface_model.deploy(
422
+ initial_instance_count=1, # number of instances
423
+ instance_type="ml.inf2.xlarge", # AWS Inferentia Instance
424
+ volume_size = 100
425
+ )
426
+ # ignore the "Your model is not compiled. Please compile your model before using Inferentia." warning, we already compiled our model.
427
+ ```
428
+
429
+ # 5.Generate images using the deployed model
430
+
431
+ The `.deploy()` returns an `HuggingFacePredictor` object which can be used to request inference. Our endpoint expects a `json` with at least `inputs` key. The `inputs` key is the input prompt for the model, which will be used to generate the image. Additionally, we can provide inference parameters, e.g. `num_inference_steps`.
432
+
433
+ The `predictor.predict()` function returns a `json` with the `generated_images` key. The `generated_images` key contains the `1` generated image as a `base64` encoded string. To decode our response we added a small helper function `decode_base64_to_image` which takes the `base64` encoded string and returns a `PIL.Image` object and `display_image` displays them.
434
+
435
+
436
+ ```python
437
+ from PIL import Image
438
+ from io import BytesIO
439
+ from IPython.display import display
440
+ import base64
441
+
442
+ # helper decoder
443
+ def decode_base64_image(image_string):
444
+ base64_image = base64.b64decode(image_string)
445
+ buffer = BytesIO(base64_image)
446
+ return Image.open(buffer)
447
+
448
+ # display PIL images as grid
449
+ def display_image(image=None,width=500,height=500):
450
+ img = image.resize((width, height))
451
+ display(img)
452
+ ```
453
+
454
+ Now, lets generate some images. As example `A dog trying catch a flying pizza in style of comic book, at a street corner.`. Generating an image with 25 steps takes around ~6 seconds, except for the first request which can take 45-60s.
455
+ _note: If the request times out, just rerun again. Only the first request takes a long time._
456
+
457
+
458
+ ```python
459
+ prompt = "A dog trying catch a flying pizza at a street corner, comic book, well lit, night time"
460
+
461
+ # run prediction
462
+ response = predictor.predict(data={
463
+ "inputs": prompt,
464
+ "parameters": {
465
+ "num_inference_steps" : 25,
466
+ "negative_prompt" : "disfigured, ugly, deformed"
467
+ }
468
+ }
469
+ )
470
+
471
+ # decode and display image
472
+ display_image(decode_base64_image(response["generated_images"][0]))
473
+ ```
474
+
475
+
476
+
477
+
478
+ ### Delete model and endpoint
479
+
480
+ To clean up, we can delete the model and endpoint.
481
+
482
+
483
+ ```python
484
+ predictor.delete_model()
485
+ predictor.delete_endpoint()
486
+ ```
487
+
488
+
489
+ ```python
490
+
491
+ ```
492
+
493
+ """
494
+
495
+ )
496
 
497
+ with st.expander("(2) AWS Lambda Function to handle inference requests"):
498
+ st.markdown(
499
+ """
500
+ ```python
501
+ import boto3
502
+ import json
503
+
504
+ def lambda_handler(event, context):
505
+ # SageMaker endpoint details
506
+ endpoint_name = 'INSERT_YOUR_SAGEMAKER_ENDPOINT_NAME_HERE'
507
+ runtime = boto3.client('sagemaker-runtime')
508
+
509
+ # Sample input data (modify as per your model's input requirements)
510
+ # Get the prompt from the Lambda function input
511
+ print("======== event payload: ==========")
512
+ print(event['body'])
513
+
514
+ print("======== prompt payload: ==========")
515
+ event_parsed = json.loads(event['body'])
516
+ prompt = event_parsed.get('prompt', '')
517
+ print(prompt)
518
+ print("======== params payload: ==========")
519
+ params = event_parsed.get('parameters','')
520
+ print(params)
521
+
522
+ # Prepare input data
523
+ model_input = {
524
+ 'inputs': prompt,
525
+ 'parameters': params
526
+ }
527
+
528
+ input_data = json.dumps(model_input)
529
+
530
+ # Make a prediction request to the SageMaker endpoint
531
+ response = runtime.invoke_endpoint(EndpointName=endpoint_name,
532
+ ContentType='application/json',
533
+ Body=input_data)
534
+
535
+ # Parse the response
536
+ result = response['Body'].read()
537
+ return {
538
+ 'statusCode': 200,
539
+ 'body': result
540
+ }
541
+
542
+ ```
543
+
544
+ """
545
+ )
546
+
547
+ with st.expander("(3) Streamlit app.py, running on Amazon EC2 t2.micro instance"):
548
+ st.markdown(
549
+ """
550
+ ```python
551
+ import streamlit as st
552
+ # Set the page layout to 'wide'
553
+ st.set_page_config(layout="wide")
554
+ import requests
555
+ from PIL import Image
556
+ from io import BytesIO
557
+ import base64
558
+ import time
559
+
560
+
561
+
562
+ # helper decoder
563
+ def decode_base64_image(image_string):
564
+ base64_image = base64.b64decode(image_string)
565
+ buffer = BytesIO(base64_image)
566
+ return Image.open(buffer)
567
+
568
+ # display PIL images as grid
569
+ def display_image(image=None,width=500,height=500):
570
+ img = image.resize((width, height))
571
+ return img
572
+
573
+ # API Gateway endpoint URL
574
+ api_url = 'INSERT_YOUR_API_GATEWAY_ENDPOINT_URL_HERE'
575
+ # Create two columns for layout
576
+ left_column, right_column = st.columns(2)
577
+ # ===========
578
+ with left_column:
579
+ # Define Streamlit UI elements
580
+ st.title('Stable Diffusion XL Image Generation with AWS Inferentia')
581
+
582
+ prompt_one = st.text_area("Enter your prompt:",
583
+ f"Raccoon astronaut in space, sci-fi, future, cold color palette, muted colors, detailed, 8k")
584
+
585
+ # Number of inference steps
586
+ num_inference_steps_one = st.slider("Number of Inference Steps",
587
+ min_value=1,
588
+ max_value=100,
589
+ value=30,
590
+ help="More steps might improve quality, with diminishing marginal returns. 30-50 seems best, but your mileage may vary.")
591
+
592
+ # Create an expandable section for optional parameters
593
+ with st.expander("Optional Parameters"):
594
+ # Random seed input
595
+ seed_one = st.number_input("Random seed",
596
+ value=555,
597
+ help="Set to the same value to generate the same image if other inputs are the same, change to generate a different image for same inputs.")
598
+
599
+ # Negative prompt input
600
+ negative_prompt_one = st.text_area("Enter your negative prompt:",
601
+ "cartoon, graphic, text, painting, crayon, graphite, abstract glitch, blurry")
602
+
603
+
604
+
605
+
606
+
607
+
608
+
609
+ if st.button('Generate Image'):
610
+ with st.spinner(f'Generating Image with {num_inference_steps_one} iterations'):
611
+ with right_column:
612
+ start_time = time.time()
613
+ # ===============
614
+ # Example input data
615
+ prompt_input_one = {
616
+ "prompt": prompt_one,
617
+ "parameters": {
618
+ "num_inference_steps": num_inference_steps_one,
619
+ "seed": seed_one,
620
+ "negative_prompt": negative_prompt_one
621
+ }
622
+ }
623
+
624
+ # Make API request
625
+ response_one = requests.post(api_url, json=prompt_input_one)
626
+
627
+ # Process and display the response
628
+ if response_one.status_code == 200:
629
+ result_one = response_one.json()
630
+ # st.success(f"Prediction result: {result}")
631
+ image_one = display_image(decode_base64_image(result_one["generated_images"][0]))
632
+ st.image(image_one,
633
+ caption=f"{prompt_one}")
634
+ end_time = time.time()
635
+ total_time = round(end_time - start_time, 2)
636
+ st.text(f"Prompt: {prompt_one}")
637
+ st.text(f"Number of Iterations: {num_inference_steps_one}")
638
+ st.text(f"Random Seed: {seed_one}")
639
+ st.text(f'Total time taken: {total_time} seconds')
640
+ # Calculate and display the time per iteration in milliseconds
641
+ time_per_iteration_ms = (total_time / num_inference_steps_one)
642
+ st.text(f'Time per iteration: {time_per_iteration_ms:.2f} seconds')
643
+ else:
644
+ st.error(f"Error: {response_one.text}")
645
+ ```
646
+
647
+ """
648
+ )