Spaces:

scepter-studio
/

ACE-Chat

Running on Zero

App Files Files Community

chaojiemao commited on Nov 20, 2024

Commit

ec9288d

verified ·

1 Parent(s): 78e9f55

Update app.py

Browse files

Files changed (1) hide show

app.py +288 -109

app.py CHANGED Viewed

@@ -1,45 +1,40 @@
 # -*- coding: utf-8 -*-
 # Copyright (c) Alibaba, Inc. and its affiliates.
-import os
-import shlex
-import subprocess
-subprocess.run(shlex.split('pip install flash-attn --no-build-isolation'), env=os.environ | {'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"})
-import sys
-import csv
-csv.field_size_limit(sys.maxsize)
-import argparse
 import base64
 import copy
 import glob
 import io
-import os
 import random
 import re
 import string
 import threading
 import spaces
 import cv2
 import gradio as gr
 import numpy as np
 import torch
 import transformers
-from diffusers import CogVideoXImageToVideoPipeline
-from diffusers.utils import export_to_video
-from gradio_imageslider import ImageSlider
 from PIL import Image
 from transformers import AutoModel, AutoTokenizer
 from scepter.modules.utils.config import Config
 from scepter.modules.utils.directory import get_md5
 from scepter.modules.utils.file_system import FS
 from scepter.studio.utils.env import init_env
-from infer import ACEInference
 from example import get_examples
 from utils import load_image
 refresh_sty = '\U0001f504'  # 🔄
 clear_sty = '\U0001f5d1'  # 🗑️
@@ -53,33 +48,56 @@ lock = threading.Lock()
 class ChatBotUI(object):
     def __init__(self,
-                 cfg,
                  root_work_dir='./'):
         cfg.WORK_DIR = os.path.join(root_work_dir, cfg.WORK_DIR)
         if not FS.exists(cfg.WORK_DIR):
             FS.make_dir(cfg.WORK_DIR)
         cfg = init_env(cfg)
         self.cache_dir = cfg.WORK_DIR
-        self.chatbot_examples = get_examples(self.cache_dir)
         self.model_cfg_dir = cfg.MODEL.EDIT_MODEL.MODEL_CFG_DIR
         self.model_yamls = glob.glob(os.path.join(self.model_cfg_dir,
                                                   '*.yaml'))
         self.model_choices = dict()
         for i in self.model_yamls:
-            model_name = '.'.join(i.split('/')[-1].split('.')[:-1])
-            self.model_choices[model_name] = i
-        print('Models: ', self.model_choices)
-        self.model_name = cfg.MODEL.EDIT_MODEL.DEFAULT
-        assert self.model_name in self.model_choices
-        model_cfg = Config(load=True,
-                           cfg_file=self.model_choices[self.model_name])
         self.pipe = ACEInference()
-        self.pipe.init_from_cfg(model_cfg)
         self.max_msgs = 20
         self.enable_i2v = cfg.get('ENABLE_I2V', False)
         if self.enable_i2v:
             self.i2v_model_dir = cfg.MODEL.I2V.MODEL_DIR
             self.i2v_model_name = cfg.MODEL.I2V.MODEL_NAME
@@ -115,15 +133,11 @@ class ChatBotUI(object):
                 )
             sys_prompt = """You are part of a team of bots that creates videos. You work with an assistant bot that will draw anything you say in square brackets.
             For example , outputting " a beautiful morning in the woods with the sun peaking through the trees " will trigger your partner bot to output an video of a forest morning , as described. You will be prompted by people looking to create detailed , amazing videos. The way to accomplish this is to take their short prompts and make them extremely detailed and descriptive.
             There are a few rules to follow:
             You will only ever output a single video description per user request.
             When modifications are requested , you should not simply make the description longer . You should refactor the entire description to integrate the suggestions.
             Other times the user will not want modifications , but instead want a new image . In this case , you should ignore your previous conversation with the user.
             Video descriptions must have the same num of words as examples below. Extra words will be ignored.
             """
             self.enhance_ctx = [
@@ -170,6 +184,7 @@ class ChatBotUI(object):
             ]
     def create_ui(self):
         css = '.chatbot.prose.md {opacity: 1.0 !important} #chatbot {opacity: 1.0 !important}'
         with gr.Blocks(css=css,
                        title='Chatbot',
@@ -180,7 +195,8 @@ class ChatBotUI(object):
             self.history_result = gr.State(value={})
             self.retry_msg = gr.State(value='')
             with gr.Group():
-                with gr.Row(equal_height=True):
                     with gr.Column(visible=True) as self.chat_page:
                         self.chatbot = gr.Chatbot(
                             height=600,
@@ -195,7 +211,7 @@ class ChatBotUI(object):
                                                        size='sm')
                     with gr.Column(visible=False) as self.editor_page:
-                        with gr.Tabs():
                             with gr.Tab(id='ImageUploader',
                                         label='Image Uploader',
                                         visible=True) as self.upload_tab:
@@ -204,7 +220,7 @@ class ChatBotUI(object):
                                     interactive=True,
                                     type='pil',
                                     image_mode='RGB',
-                                    sources='upload',
                                     elem_id='image_uploader',
                                     format='png')
                                 with gr.Row():
@@ -212,10 +228,9 @@ class ChatBotUI(object):
                                         value='Submit',
                                         elem_id='upload_submit')
                                     self.ext_btn_1 = gr.Button(value='Exit')
                             with gr.Tab(id='ImageEditor',
-                                        label='Image Editor',
-                                        visible=False) as self.edit_tab:
                                 self.mask_type = gr.Dropdown(
                                     label='Mask Type',
                                     choices=[
@@ -278,13 +293,23 @@ class ChatBotUI(object):
                                     self.ext_btn_2 = gr.Button(value='Exit')
                             with gr.Tab(id='ImageViewer',
-                                        label='Image Viewer',
-                                        visible=False) as self.image_view_tab:
-                                self.image_viewer = ImageSlider(
-                                    label='Image',
-                                    type='pil',
-                                    show_download_button=True,
-                                    elem_id='image_viewer')
                                 self.ext_btn_3 = gr.Button(value='Exit')
@@ -303,11 +328,30 @@ class ChatBotUI(object):
                                 self.ext_btn_4 = gr.Button(value='Exit')
                 with gr.Accordion(label='Setting', open=False):
                     with gr.Row():
                         self.model_name_dd = gr.Dropdown(
                             choices=self.model_choices,
-                            value=self.model_name,
                             label='Model Version')
                     with gr.Row():
@@ -318,39 +362,63 @@ class ChatBotUI(object):
                             label='Negative Prompt',
                             container=False)
                     with gr.Row():
                         with gr.Column(scale=8, min_width=500):
                             with gr.Row():
                                 self.step = gr.Slider(minimum=1,
                                                       maximum=1000,
-                                                      value=20,
                                                       label='Sample Step')
                                 self.cfg_scale = gr.Slider(
                                     minimum=1.0,
                                     maximum=20.0,
-                                    value=4.5,
                                     label='Guidance Scale')
                                 self.rescale = gr.Slider(minimum=0.0,
                                                          maximum=1.0,
-                                                         value=0.5,
                                                          label='Rescale')
                                 self.seed = gr.Slider(minimum=-1,
                                                       maximum=10000000,
                                                       value=-1,
                                                       label='Seed')
                                 self.output_height = gr.Slider(
                                     minimum=256,
-                                    maximum=1024,
-                                    value=512,
                                     label='Output Height')
                                 self.output_width = gr.Slider(
                                     minimum=256,
-                                    maximum=1024,
-                                    value=512,
                                     label='Output Width')
                         with gr.Column(scale=1, min_width=50):
                             self.use_history = gr.Checkbox(value=False,
                                                            label='Use History')
                             self.video_auto = gr.Checkbox(
                                 value=False,
                                 label='Auto Gen Video',
@@ -387,9 +455,8 @@ class ChatBotUI(object):
                                                     visible=True)
                 with gr.Row():
-                    inst = """
                        **Instruction**:
                        1. Click 'Upload' button to upload one or more images as input images.
                        2. Enter '@' in the text box will exhibit all images in the gallery.
                        3. Select the image you wish to edit from the gallery, and its Image ID will be displayed in the text box.
@@ -399,14 +466,24 @@ class ChatBotUI(object):
                        6. **Important** To render text on an image, please ensure to include a space between each letter. For instance, "add text 'g i r l' on the mask area of @xxxxx".
                        7. To implement local editing based on a specified mask, simply click on the image within the chat window to access the image editor. Here, you can draw a mask and then click the 'Submit' button to upload the edited image along with the mask. For inpainting tasks, select the 'Composite' mask type, while for outpainting tasks, choose the 'Outpainting' mask type. For all other local editing tasks, please select the 'Background' mask type.
                        8. If you find our work valuable, we invite you to refer to the [ACE Page](https://ali-vilab.github.io/ace-page/) for comprehensive information.
                     """
-                    gr.Markdown(value=inst)
                 with gr.Row(variant='panel',
                             equal_height=True,
                             show_progress=False):
-                    with gr.Column(scale=1, min_width=100):
                         self.upload_btn = gr.Button(value=upload_sty +
                                                     ' Upload',
                                                     variant='secondary')
@@ -416,12 +493,16 @@ class ChatBotUI(object):
                             label='Instruction',
                             container=False)
                     with gr.Column(scale=1, min_width=100):
-                        self.chat_btn = gr.Button(value=chat_sty + ' Chat',
                                                   variant='primary')
                     with gr.Column(scale=1, min_width=100):
                         self.retry_btn = gr.Button(value=refresh_sty +
                                                    ' Retry',
                                                    variant='secondary')
                     with gr.Column(scale=(1 if self.enable_i2v else 0),
                                    min_width=0):
                         self.video_gen_btn = gr.Button(value=video_sty +
@@ -457,19 +538,77 @@ class ChatBotUI(object):
                 lock.acquire()
                 del self.pipe
                 torch.cuda.empty_cache()
-                model_cfg = Config(load=True,
-                                   cfg_file=self.model_choices[model_name])
                 self.pipe = ACEInference()
-                self.pipe.init_from_cfg(model_cfg)
                 self.model_name = model_name
                 lock.release()
-            return model_name, gr.update(), gr.update()
         self.model_name_dd.change(
             change_model,
             inputs=[self.model_name_dd],
-            outputs=[self.model_name_dd, self.chatbot, self.text])
         ########################################
         def generate_gallery(text, images):
@@ -516,7 +655,6 @@ class ChatBotUI(object):
                             outputs=[self.text, self.gallery])
         ########################################
-        @spaces.GPU(duration=120)
         def generate_video(message,
                            extend_prompt,
                            history,
@@ -527,6 +665,9 @@ class ChatBotUI(object):
                            fps,
                            seed,
                            progress=gr.Progress(track_tqdm=True)):
             generator = torch.Generator(device='cuda').manual_seed(seed)
             img_ids = re.findall('@(.*?)[ ,;.?$]', message)
             if len(img_ids) == 0:
@@ -597,8 +738,12 @@ class ChatBotUI(object):
             outputs=[self.history, self.chatbot, self.text, self.gallery])
         ########################################
-        @spaces.GPU(duration=60)
-        def run_chat(message,
                      extend_prompt,
                      history,
                      images,
@@ -607,6 +752,8 @@ class ChatBotUI(object):
                      negative_prompt,
                      cfg_scale,
                      rescale,
                      step,
                      seed,
                      output_h,
@@ -618,12 +765,25 @@ class ChatBotUI(object):
                      video_fps,
                      video_seed,
                      progress=gr.Progress(track_tqdm=True)):
             retry_msg = message
             gen_id = get_md5(message)[:12]
             save_path = os.path.join(self.cache_dir, f'{gen_id}.png')
             img_ids = re.findall('@(.*?)[ ,;.?$]', message)
             history_io = None
             new_message = message
             if len(img_ids) > 0:
@@ -655,9 +815,9 @@ class ChatBotUI(object):
                         history_io = history_result[img_id]
                 buffered = io.BytesIO()
-                edit_image[0].save(buffered, format='JPEG')
                 img_b64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
-                img_str = f'<img src="data:image/jpg;base64,{img_b64}" style="pointer-events: none;">'
                 pre_info = f'Received one or more images, so image editing is conducted.\n The first input image @{img_ids[0]} is:\n {img_str}'
             else:
                 pre_info = 'No image ids were found in the provided text prompt, so text-guided image generation is conducted. \n'
@@ -682,6 +842,9 @@ class ChatBotUI(object):
                 guide_scale=cfg_scale,
                 guide_rescale=rescale,
                 seed=seed,
             )
             img = imgs[0]
@@ -728,9 +891,9 @@ class ChatBotUI(object):
             }
             buffered = io.BytesIO()
-            img.convert('RGB').save(buffered, format='JPEG')
             img_b64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
-            img_str = f'<img src="data:image/jpg;base64,{img_b64}" style="pointer-events: none;">'
             history.append(
                 (message,
@@ -790,21 +953,25 @@ class ChatBotUI(object):
             while len(history) >= self.max_msgs:
                 history.pop(0)
-            return history, images, history_result, self.get_history(
-                history), gr.update(value=''), gr.update(
-                    visible=False), retry_msg
         chat_inputs = [
             self.extend_prompt, self.history, self.images, self.use_history,
             self.history_result, self.negative_prompt, self.cfg_scale,
-            self.rescale, self.step, self.seed, self.output_height,
             self.output_width, self.video_auto, self.video_step,
             self.video_frames, self.video_cfg_scale, self.video_fps,
             self.video_seed
         ]
         chat_outputs = [
-            self.history, self.images, self.history_result, self.chatbot,
             self.text, self.gallery, self.retry_msg
         ]
@@ -824,7 +991,7 @@ class ChatBotUI(object):
                              outputs=chat_outputs)
         ########################################
-        @spaces.GPU(duration=60)
         def run_example(task, img, img_mask, ref1, prompt, seed):
             edit_image, edit_image_mask, edit_task = [], [], []
             if img is not None:
@@ -848,9 +1015,9 @@ class ChatBotUI(object):
                     edit_task.append('')
                 buffered = io.BytesIO()
-                img.save(buffered, format='JPEG')
                 img_b64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
-                img_str = f'<img src="data:image/jpg;base64,{img_b64}" style="pointer-events: none;">'
                 pre_info = f'Received one or more images, so image editing is conducted.\n The first input image is:\n {img_str}'
             else:
                 pre_info = 'No image ids were found in the provided text prompt, so text-guided image generation is conducted. \n'
@@ -866,13 +1033,15 @@ class ChatBotUI(object):
                 prompt=[prompt] * img_num,
                 negative_prompt=[''] * img_num,
                 seed=seed,
             )
             img = imgs[0]
             buffered = io.BytesIO()
-            img.convert('RGB').save(buffered, format='JPEG')
             img_b64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
-            img_str = f'<img src="data:image/jpg;base64,{img_b64}" style="pointer-events: none;">'
             history = [(prompt,
                         f'{pre_info} The generated image is:\n {img_str}')]
             return self.get_history(history), gr.update(value=''), gr.update(
@@ -911,21 +1080,23 @@ class ChatBotUI(object):
             return (gr.update(visible=True,
                               scale=1), gr.update(visible=True, scale=1),
                     gr.update(visible=True), gr.update(visible=False),
-                    gr.update(visible=False), gr.update(visible=False))
         self.upload_btn.click(upload_image,
                               inputs=[],
                               outputs=[
                                   self.chat_page, self.editor_page,
                                   self.upload_tab, self.edit_tab,
-                                  self.image_view_tab, self.video_view_tab
                               ])
         ########################################
         def edit_image(evt: gr.SelectData):
             if isinstance(evt.value, str):
                 img_b64s = re.findall(
-                    '<img src="data:image/jpg;base64,(.*?)" style="pointer-events: none;">',
                     evt.value)
                 imgs = [
                     Image.open(io.BytesIO(base64.b64decode(copy.deepcopy(i))))
@@ -933,13 +1104,19 @@ class ChatBotUI(object):
                 ]
                 if len(imgs) > 0:
                     if len(imgs) == 2:
-                        view_img = copy.deepcopy(imgs)
                         edit_img = copy.deepcopy(imgs[-1])
                     else:
-                        view_img = [
-                            copy.deepcopy(imgs[-1]),
-                            copy.deepcopy(imgs[-1])
-                        ]
                         edit_img = copy.deepcopy(imgs[-1])
                     return (gr.update(visible=True,
@@ -948,11 +1125,12 @@ class ChatBotUI(object):
                             gr.update(visible=False), gr.update(visible=True),
                             gr.update(visible=True), gr.update(visible=False),
                             gr.update(value=edit_img),
-                            gr.update(value=view_img), gr.update(value=None))
                 else:
                     return (gr.update(), gr.update(), gr.update(), gr.update(),
                             gr.update(), gr.update(), gr.update(), gr.update(),
-                            gr.update())
             elif isinstance(evt.value, dict) and evt.value.get(
                     'component', '') == 'video':
                 value = evt.value['value']['video']['path']
@@ -960,11 +1138,12 @@ class ChatBotUI(object):
                                   scale=1), gr.update(visible=True, scale=1),
                         gr.update(visible=False), gr.update(visible=False),
                         gr.update(visible=False), gr.update(visible=True),
-                        gr.update(), gr.update(), gr.update(value=value))
             else:
                 return (gr.update(), gr.update(), gr.update(), gr.update(),
                         gr.update(), gr.update(), gr.update(), gr.update(),
-                        gr.update())
         self.chatbot.select(edit_image,
                             outputs=[
@@ -972,16 +1151,17 @@ class ChatBotUI(object):
                                 self.upload_tab, self.edit_tab,
                                 self.image_view_tab, self.video_view_tab,
                                 self.image_editor, self.image_viewer,
-                                self.video_viewer
                             ])
-        self.image_viewer.change(lambda x: x,
-                                 inputs=self.image_viewer,
-                                 outputs=self.image_viewer)
         ########################################
         def submit_upload_image(image, history, images):
-            history, images = self.add_uploaded_image_to_history(
                 image, history, images)
             return gr.update(visible=False), gr.update(
                 visible=True), gr.update(
@@ -1151,14 +1331,14 @@ class ChatBotUI(object):
         thumbnail.save(thumbnail_path, format='JPEG')
         buffered = io.BytesIO()
-        img.convert('RGB').save(buffered, format='JPEG')
         img_b64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
-        img_str = f'<img src="data:image/jpg;base64,{img_b64}" style="pointer-events: none;">'
         buffered = io.BytesIO()
-        mask.convert('RGB').save(buffered, format='JPEG')
         mask_b64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
-        mask_str = f'<img src="data:image/jpg;base64,{mask_b64}" style="pointer-events: none;">'
         images[img_id] = {
             'image': save_path,
@@ -1207,22 +1387,21 @@ class ChatBotUI(object):
         }
         buffered = io.BytesIO()
-        img.convert('RGB').save(buffered, format='JPEG')
         img_b64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
-        img_str = f'<img src="data:image/jpg;base64,{img_b64}" style="pointer-events: none;">'
         history.append(
             (None,
              f'This is uploaded image:\n {img_str} image ID is: {img_id}'))
-        return history, images
 if __name__ == '__main__':
-    cfg = Config(cfg_file="config/chatbot_ui.yaml")
     with gr.Blocks() as demo:
         chatbot = ChatBotUI(cfg)
         chatbot.create_ui()
         chatbot.set_callbacks()
-    demo.launch()

 # -*- coding: utf-8 -*-
 # Copyright (c) Alibaba, Inc. and its affiliates.
 import base64
 import copy
 import glob
 import io
+import os, csv, sys
 import random
 import re
+import shlex
 import string
+import subprocess
 import threading
 import spaces
+subprocess.run(shlex.split('pip install flash-attn --no-build-isolation'),
+               env=os.environ | {'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"})
 import cv2
 import gradio as gr
 import numpy as np
 import torch
 import transformers
 from PIL import Image
 from transformers import AutoModel, AutoTokenizer
+from scepter.modules.inference.ace_inference import ACEInference
 from scepter.modules.utils.config import Config
 from scepter.modules.utils.directory import get_md5
 from scepter.modules.utils.file_system import FS
 from scepter.studio.utils.env import init_env
+from importlib.metadata import version
 from example import get_examples
 from utils import load_image
+csv.field_size_limit(sys.maxsize)
 refresh_sty = '\U0001f504'  # 🔄
 clear_sty = '\U0001f5d1'  # 🗑️
 class ChatBotUI(object):
     def __init__(self,
+                 cfg_general_file,
+                 is_debug=False,
+                 language='en',
                  root_work_dir='./'):
+        try:
+            from diffusers import CogVideoXImageToVideoPipeline
+            from diffusers.utils import export_to_video
+        except Exception as e:
+            print(f"Import diffusers failed, please install or upgrade diffusers. Error information: {e}")
+        cfg = Config(cfg_file=cfg_general_file)
+        if cfg.have("FILE_SYSTEM"):
+            for file_sys in cfg.FILE_SYSTEM:
+                fs_prefix = FS.init_fs_client(file_sys)
+        else:
+            fs_prefix = FS.init_fs_client(cfg)
         cfg.WORK_DIR = os.path.join(root_work_dir, cfg.WORK_DIR)
         if not FS.exists(cfg.WORK_DIR):
             FS.make_dir(cfg.WORK_DIR)
         cfg = init_env(cfg)
         self.cache_dir = cfg.WORK_DIR
+        self.chatbot_examples = get_examples(self.cache_dir) if not cfg.get('SKIP_EXAMPLES', False) else []
         self.model_cfg_dir = cfg.MODEL.EDIT_MODEL.MODEL_CFG_DIR
         self.model_yamls = glob.glob(os.path.join(self.model_cfg_dir,
                                                   '*.yaml'))
         self.model_choices = dict()
+        self.default_model_name = ''
         for i in self.model_yamls:
+            model_cfg = Config(load=True, cfg_file=i)
+            model_name = model_cfg.NAME
+            if model_cfg.IS_DEFAULT: self.default_model_name = model_name
+            self.model_choices[model_name] = model_cfg
+        print('Models: ', self.model_choices.keys())
+        #FS.get_from("ms://AI-ModelScope/FLUX.1-dev@flux1-dev.safetensors")
+        #FS.get_from("ms://AI-ModelScope/FLUX.1-dev@ae.safetensors")
+        #FS.get_dir_to_local_dir("ms://AI-ModelScope/FLUX.1-dev@text_encoder_2/")
+        #FS.get_dir_to_local_dir("ms://AI-ModelScope/FLUX.1-dev@tokenizer_2/")
+        #FS.get_dir_to_local_dir("ms://AI-ModelScope/FLUX.1-dev@text_encoder/")
+        #FS.get_dir_to_local_dir("ms://AI-ModelScope/FLUX.1-dev@tokenizer/")
+        assert len(self.model_choices) > 0
+        if self.default_model_name == "": self.default_model_name = self.model_choices.keys()[0]
+        self.model_name = self.default_model_name
         self.pipe = ACEInference()
+        self.pipe.init_from_cfg(self.model_choices[self.default_model_name])
         self.max_msgs = 20
         self.enable_i2v = cfg.get('ENABLE_I2V', False)
+        self.gradio_version = version('gradio')
         if self.enable_i2v:
             self.i2v_model_dir = cfg.MODEL.I2V.MODEL_DIR
             self.i2v_model_name = cfg.MODEL.I2V.MODEL_NAME
                 )
             sys_prompt = """You are part of a team of bots that creates videos. You work with an assistant bot that will draw anything you say in square brackets.
             For example , outputting " a beautiful morning in the woods with the sun peaking through the trees " will trigger your partner bot to output an video of a forest morning , as described. You will be prompted by people looking to create detailed , amazing videos. The way to accomplish this is to take their short prompts and make them extremely detailed and descriptive.
             There are a few rules to follow:
             You will only ever output a single video description per user request.
             When modifications are requested , you should not simply make the description longer . You should refactor the entire description to integrate the suggestions.
             Other times the user will not want modifications , but instead want a new image . In this case , you should ignore your previous conversation with the user.
             Video descriptions must have the same num of words as examples below. Extra words will be ignored.
             """
             self.enhance_ctx = [
             ]
     def create_ui(self):
         css = '.chatbot.prose.md {opacity: 1.0 !important} #chatbot {opacity: 1.0 !important}'
         with gr.Blocks(css=css,
                        title='Chatbot',
             self.history_result = gr.State(value={})
             self.retry_msg = gr.State(value='')
             with gr.Group():
+                self.ui_mode = gr.State(value='legacy')
+                with gr.Row(equal_height=True, visible=False) as self.chat_group:
                     with gr.Column(visible=True) as self.chat_page:
                         self.chatbot = gr.Chatbot(
                             height=600,
                                                        size='sm')
                     with gr.Column(visible=False) as self.editor_page:
+                        with gr.Tabs(visible=False) as self.upload_tabs:
                             with gr.Tab(id='ImageUploader',
                                         label='Image Uploader',
                                         visible=True) as self.upload_tab:
                                     interactive=True,
                                     type='pil',
                                     image_mode='RGB',
+                                    sources=['upload'],
                                     elem_id='image_uploader',
                                     format='png')
                                 with gr.Row():
                                         value='Submit',
                                         elem_id='upload_submit')
                                     self.ext_btn_1 = gr.Button(value='Exit')
+                        with gr.Tabs(visible=False) as self.edit_tabs:
                             with gr.Tab(id='ImageEditor',
+                                        label='Image Editor') as self.edit_tab:
                                 self.mask_type = gr.Dropdown(
                                     label='Mask Type',
                                     choices=[
                                     self.ext_btn_2 = gr.Button(value='Exit')
                             with gr.Tab(id='ImageViewer',
+                                        label='Image Viewer') as self.image_view_tab:
+                                if self.gradio_version >= '5.0.0':
+                                    self.image_viewer = gr.Image(
+                                        label='Image',
+                                        type='pil',
+                                        show_download_button=True,
+                                        elem_id='image_viewer')
+                                else:
+                                    try:
+                                        from gradio_imageslider import ImageSlider
+                                    except Exception as e:
+                                        print(f"Import gradio_imageslider failed, please install.")
+                                    self.image_viewer = ImageSlider(
+                                        label='Image',
+                                        type='pil',
+                                        show_download_button=True,
+                                        elem_id='image_viewer')
                                 self.ext_btn_3 = gr.Button(value='Exit')
                                 self.ext_btn_4 = gr.Button(value='Exit')
+                with gr.Row(equal_height=True, visible=True) as self.legacy_group:
+                    with gr.Column():
+                        self.legacy_image_uploader = gr.Image(
+                            height=550,
+                            interactive=True,
+                            type='pil',
+                            image_mode='RGB',
+                            elem_id='legacy_image_uploader',
+                            format='png')
+                    with gr.Column():
+                        self.legacy_image_viewer = gr.Image(
+                            label='Image',
+                            height=550,
+                            type='pil',
+                            interactive=False,
+                            show_download_button=True,
+                            elem_id='image_viewer')
                 with gr.Accordion(label='Setting', open=False):
                     with gr.Row():
                         self.model_name_dd = gr.Dropdown(
                             choices=self.model_choices,
+                            value=self.default_model_name,
                             label='Model Version')
                     with gr.Row():
                             label='Negative Prompt',
                             container=False)
+                    with gr.Row():
+                        # REFINER_PROMPT
+                        self.refiner_prompt = gr.Textbox(
+                            value=self.pipe.input.get("refiner_prompt", ""),
+                            visible=self.pipe.input.get("refiner_prompt", None) is not None,
+                            placeholder=
+                            'Prompt used for refiner',
+                            label='Refiner Prompt',
+                            container=False)
                     with gr.Row():
                         with gr.Column(scale=8, min_width=500):
                             with gr.Row():
                                 self.step = gr.Slider(minimum=1,
                                                       maximum=1000,
+                                                      value=self.pipe.input.get("sample_steps", 20),
+                                                      visible=self.pipe.input.get("sample_steps", None) is not None,
                                                       label='Sample Step')
                                 self.cfg_scale = gr.Slider(
                                     minimum=1.0,
                                     maximum=20.0,
+                                    value=self.pipe.input.get("guide_scale", 4.5),
+                                    visible=self.pipe.input.get("guide_scale", None) is not None,
                                     label='Guidance Scale')
                                 self.rescale = gr.Slider(minimum=0.0,
                                                          maximum=1.0,
+                                                         value=self.pipe.input.get("guide_rescale", 0.5),
+                                                         visible=self.pipe.input.get("guide_rescale", None) is not None,
                                                          label='Rescale')
+                                self.refiner_scale = gr.Slider(minimum=-0.1,
+                                                         maximum=1.0,
+                                                         value=self.pipe.input.get("refiner_scale", 0.5),
+                                                         visible=self.pipe.input.get("refiner_scale", None) is not None,
+                                                         label='Refiner Scale')
                                 self.seed = gr.Slider(minimum=-1,
                                                       maximum=10000000,
                                                       value=-1,
                                                       label='Seed')
                                 self.output_height = gr.Slider(
                                     minimum=256,
+                                    maximum=1440,
+                                    value=self.pipe.input.get("output_height", 1024),
+                                    visible=self.pipe.input.get("output_height", None) is not None,
                                     label='Output Height')
                                 self.output_width = gr.Slider(
                                     minimum=256,
+                                    maximum=1440,
+                                    value=self.pipe.input.get("output_width", 1024),
+                                    visible=self.pipe.input.get("output_width", None) is not None,
                                     label='Output Width')
                         with gr.Column(scale=1, min_width=50):
                             self.use_history = gr.Checkbox(value=False,
                                                            label='Use History')
+                            self.use_ace = gr.Checkbox(value=self.pipe.input.get("use_ace", True),
+                                                       visible=self.pipe.input.get("use_ace", None) is not None,
+                                                       label='Use ACE')
                             self.video_auto = gr.Checkbox(
                                 value=False,
                                 label='Auto Gen Video',
                                                     visible=True)
                 with gr.Row():
+                    self.chatbot_inst = """
                        **Instruction**:
                        1. Click 'Upload' button to upload one or more images as input images.
                        2. Enter '@' in the text box will exhibit all images in the gallery.
                        3. Select the image you wish to edit from the gallery, and its Image ID will be displayed in the text box.
                        6. **Important** To render text on an image, please ensure to include a space between each letter. For instance, "add text 'g i r l' on the mask area of @xxxxx".
                        7. To implement local editing based on a specified mask, simply click on the image within the chat window to access the image editor. Here, you can draw a mask and then click the 'Submit' button to upload the edited image along with the mask. For inpainting tasks, select the 'Composite' mask type, while for outpainting tasks, choose the 'Outpainting' mask type. For all other local editing tasks, please select the 'Background' mask type.
                        8. If you find our work valuable, we invite you to refer to the [ACE Page](https://ali-vilab.github.io/ace-page/) for comprehensive information.
                     """
+                    self.legacy_inst = """
+                       **Instruction**:
+                       1. You can edit the image by uploading it; if no image is uploaded, an image will be generated from text..
+                       2. Enter '@' in the text box will exhibit all images in the gallery.
+                       3. Select the image you wish to edit from the gallery, and its Image ID will be displayed in the text box.
+                       4. **Important** To render text on an image, please ensure to include a space between each letter. For instance, "add text 'g i r l' on the mask area of @xxxxx".
+                       5. To perform multi-step editing, partial editing, inpainting, outpainting, and other operations, please click the Chatbot Checkbox to enable the conversational editing mode and follow the relevant instructions..
+                       6. If you find our work valuable, we invite you to refer to the [ACE Page](https://ali-vilab.github.io/ace-page/) for comprehensive information.
+                    """
+                    self.instruction = gr.Markdown(value=self.legacy_inst)
                 with gr.Row(variant='panel',
                             equal_height=True,
                             show_progress=False):
+                    with gr.Column(scale=1, min_width=100, visible=False) as self.upload_panel:
                         self.upload_btn = gr.Button(value=upload_sty +
                                                     ' Upload',
                                                     variant='secondary')
                             label='Instruction',
                             container=False)
                     with gr.Column(scale=1, min_width=100):
+                        self.chat_btn = gr.Button(value='Generate',
                                                   variant='primary')
                     with gr.Column(scale=1, min_width=100):
                         self.retry_btn = gr.Button(value=refresh_sty +
                                                    ' Retry',
                                                    variant='secondary')
+                    with gr.Column(scale=1, min_width=100):
+                        self.mode_checkbox = gr.Checkbox(
+                            value=False,
+                            label='ChatBot')
                     with gr.Column(scale=(1 if self.enable_i2v else 0),
                                    min_width=0):
                         self.video_gen_btn = gr.Button(value=video_sty +
                 lock.acquire()
                 del self.pipe
                 torch.cuda.empty_cache()
                 self.pipe = ACEInference()
+                self.pipe.init_from_cfg(self.model_choices[model_name])
                 self.model_name = model_name
                 lock.release()
+            return (model_name, gr.update(), gr.update(),
+                    gr.Slider(
+                              value=self.pipe.input.get("sample_steps", 20),
+                              visible=self.pipe.input.get("sample_steps", None) is not None),
+                    gr.Slider(
+                        value=self.pipe.input.get("guide_scale", 4.5),
+                        visible=self.pipe.input.get("guide_scale", None) is not None),
+                    gr.Slider(
+                              value=self.pipe.input.get("guide_rescale", 0.5),
+                              visible=self.pipe.input.get("guide_rescale", None) is not None),
+                    gr.Slider(
+                        value=self.pipe.input.get("output_height", 1024),
+                        visible=self.pipe.input.get("output_height", None) is not None),
+                    gr.Slider(
+                        value=self.pipe.input.get("output_width", 1024),
+                        visible=self.pipe.input.get("output_width", None) is not None),
+                    gr.Textbox(
+                        value=self.pipe.input.get("refiner_prompt", ""),
+                        visible=self.pipe.input.get("refiner_prompt", None) is not None),
+                    gr.Slider(
+                              value=self.pipe.input.get("refiner_scale", 0.5),
+                              visible=self.pipe.input.get("refiner_scale", None) is not None
+                        ),
+                    gr.Checkbox(
+                        value=self.pipe.input.get("use_ace", True),
+                        visible=self.pipe.input.get("use_ace", None) is not None
+                    )
+                    )
         self.model_name_dd.change(
             change_model,
             inputs=[self.model_name_dd],
+            outputs=[
+                self.model_name_dd, self.chatbot, self.text,
+                self.step,
+                self.cfg_scale, self.rescale, self.output_height,
+                self.output_width, self.refiner_prompt, self.refiner_scale,
+                self.use_ace])
+        def mode_change(mode_check):
+            if mode_check:
+                # ChatBot
+                return (
+                    gr.Row(visible=False),
+                    gr.Row(visible=True),
+                    gr.Button(value='Generate'),
+                    gr.State(value='chatbot'),
+                    gr.Column(visible=True),
+                    gr.Markdown(value=self.chatbot_inst)
+                )
+            else:
+                # Legacy
+                return (
+                    gr.Row(visible=True),
+                    gr.Row(visible=False),
+                    gr.Button(value=chat_sty + ' Chat'),
+                    gr.State(value='legacy'),
+                    gr.Column(visible=False),
+                    gr.Markdown(value=self.legacy_inst)
+                )
+        self.mode_checkbox.change(mode_change, inputs=[self.mode_checkbox],
+                                  outputs=[self.legacy_group, self.chat_group,
+                                           self.chat_btn, self.ui_mode,
+                                           self.upload_panel, self.instruction])
         ########################################
         def generate_gallery(text, images):
                             outputs=[self.text, self.gallery])
         ########################################
         def generate_video(message,
                            extend_prompt,
                            history,
                            fps,
                            seed,
                            progress=gr.Progress(track_tqdm=True)):
+            from diffusers.utils import export_to_video
             generator = torch.Generator(device='cuda').manual_seed(seed)
             img_ids = re.findall('@(.*?)[ ,;.?$]', message)
             if len(img_ids) == 0:
             outputs=[self.history, self.chatbot, self.text, self.gallery])
         ########################################
+        @spaces.GPU(duration=240)
+        def run_chat(
+                     message,
+                     legacy_image,
+                     ui_mode,
+                     use_ace,
                      extend_prompt,
                      history,
                      images,
                      negative_prompt,
                      cfg_scale,
                      rescale,
+                     refiner_prompt,
+                     refiner_scale,
                      step,
                      seed,
                      output_h,
                      video_fps,
                      video_seed,
                      progress=gr.Progress(track_tqdm=True)):
+            legacy_img_ids = []
+            if ui_mode == 'legacy':
+                if legacy_image is not None:
+                    history, images, img_id = self.add_uploaded_image_to_history(
+                        legacy_image, history, images)
+                    legacy_img_ids.append(img_id)
             retry_msg = message
             gen_id = get_md5(message)[:12]
             save_path = os.path.join(self.cache_dir, f'{gen_id}.png')
             img_ids = re.findall('@(.*?)[ ,;.?$]', message)
             history_io = None
+            if len(img_ids) < 1:
+                img_ids = legacy_img_ids
+                for img_id in img_ids:
+                    if f'@{img_id}' not in message:
+                        message = f'@{img_id} ' + message
             new_message = message
             if len(img_ids) > 0:
                         history_io = history_result[img_id]
                 buffered = io.BytesIO()
+                edit_image[0].save(buffered, format='PNG')
                 img_b64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
+                img_str = f'<img src="data:image/png;base64,{img_b64}" style="pointer-events: none;">'
                 pre_info = f'Received one or more images, so image editing is conducted.\n The first input image @{img_ids[0]} is:\n {img_str}'
             else:
                 pre_info = 'No image ids were found in the provided text prompt, so text-guided image generation is conducted. \n'
                 guide_scale=cfg_scale,
                 guide_rescale=rescale,
                 seed=seed,
+                refiner_prompt=refiner_prompt,
+                refiner_scale=refiner_scale,
+                use_ace=use_ace
             )
             img = imgs[0]
             }
             buffered = io.BytesIO()
+            img.convert('RGB').save(buffered, format='PNG')
             img_b64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
+            img_str = f'<img src="data:image/png;base64,{img_b64}" style="pointer-events: none;">'
             history.append(
                 (message,
             while len(history) >= self.max_msgs:
                 history.pop(0)
+            return (history, images, gr.Image(value=save_path),
+                    history_result, self.get_history(
+                history), gr.update(), gr.update(
+                    visible=False), retry_msg)
         chat_inputs = [
+            self.legacy_image_uploader, self.ui_mode, self.use_ace,
             self.extend_prompt, self.history, self.images, self.use_history,
             self.history_result, self.negative_prompt, self.cfg_scale,
+            self.rescale, self.refiner_prompt, self.refiner_scale,
+            self.step, self.seed, self.output_height,
             self.output_width, self.video_auto, self.video_step,
             self.video_frames, self.video_cfg_scale, self.video_fps,
             self.video_seed
         ]
         chat_outputs = [
+            self.history, self.images, self.legacy_image_viewer,
+            self.history_result, self.chatbot,
             self.text, self.gallery, self.retry_msg
         ]
                              outputs=chat_outputs)
         ########################################
+        @spaces.GPU(duration=120)
         def run_example(task, img, img_mask, ref1, prompt, seed):
             edit_image, edit_image_mask, edit_task = [], [], []
             if img is not None:
                     edit_task.append('')
                 buffered = io.BytesIO()
+                img.save(buffered, format='PNG')
                 img_b64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
+                img_str = f'<img src="data:image/png;base64,{img_b64}" style="pointer-events: none;">'
                 pre_info = f'Received one or more images, so image editing is conducted.\n The first input image is:\n {img_str}'
             else:
                 pre_info = 'No image ids were found in the provided text prompt, so text-guided image generation is conducted. \n'
                 prompt=[prompt] * img_num,
                 negative_prompt=[''] * img_num,
                 seed=seed,
+                refiner_prompt=self.pipe.input.get("refiner_prompt", ""),
+                refiner_scale=self.pipe.input.get("refiner_scale", 0.0),
             )
             img = imgs[0]
             buffered = io.BytesIO()
+            img.convert('RGB').save(buffered, format='PNG')
             img_b64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
+            img_str = f'<img src="data:image/png;base64,{img_b64}" style="pointer-events: none;">'
             history = [(prompt,
                         f'{pre_info} The generated image is:\n {img_str}')]
             return self.get_history(history), gr.update(value=''), gr.update(
             return (gr.update(visible=True,
                               scale=1), gr.update(visible=True, scale=1),
                     gr.update(visible=True), gr.update(visible=False),
+                    gr.update(visible=False), gr.update(visible=False),
+                    gr.update(visible=True))
         self.upload_btn.click(upload_image,
                               inputs=[],
                               outputs=[
                                   self.chat_page, self.editor_page,
                                   self.upload_tab, self.edit_tab,
+                                  self.image_view_tab, self.video_view_tab,
+                                  self.upload_tabs
                               ])
         ########################################
         def edit_image(evt: gr.SelectData):
             if isinstance(evt.value, str):
                 img_b64s = re.findall(
+                    '<img src="data:image/png;base64,(.*?)" style="pointer-events: none;">',
                     evt.value)
                 imgs = [
                     Image.open(io.BytesIO(base64.b64decode(copy.deepcopy(i))))
                 ]
                 if len(imgs) > 0:
                     if len(imgs) == 2:
+                        if self.gradio_version >= '5.0.0':
+                            view_img = copy.deepcopy(imgs[-1])
+                        else:
+                            view_img = copy.deepcopy(imgs)
                         edit_img = copy.deepcopy(imgs[-1])
                     else:
+                        if self.gradio_version >= '5.0.0':
+                            view_img = copy.deepcopy(imgs[-1])
+                        else:
+                            view_img = [
+                                copy.deepcopy(imgs[-1]),
+                                copy.deepcopy(imgs[-1])
+                            ]
                         edit_img = copy.deepcopy(imgs[-1])
                     return (gr.update(visible=True,
                             gr.update(visible=False), gr.update(visible=True),
                             gr.update(visible=True), gr.update(visible=False),
                             gr.update(value=edit_img),
+                            gr.update(value=view_img), gr.update(value=None),
+                            gr.update(visible=True))
                 else:
                     return (gr.update(), gr.update(), gr.update(), gr.update(),
                             gr.update(), gr.update(), gr.update(), gr.update(),
+                            gr.update(), gr.update())
             elif isinstance(evt.value, dict) and evt.value.get(
                     'component', '') == 'video':
                 value = evt.value['value']['video']['path']
                                   scale=1), gr.update(visible=True, scale=1),
                         gr.update(visible=False), gr.update(visible=False),
                         gr.update(visible=False), gr.update(visible=True),
+                        gr.update(), gr.update(), gr.update(value=value),
+                        gr.update())
             else:
                 return (gr.update(), gr.update(), gr.update(), gr.update(),
                         gr.update(), gr.update(), gr.update(), gr.update(),
+                        gr.update(), gr.update())
         self.chatbot.select(edit_image,
                             outputs=[
                                 self.upload_tab, self.edit_tab,
                                 self.image_view_tab, self.video_view_tab,
                                 self.image_editor, self.image_viewer,
+                                self.video_viewer, self.edit_tabs
                             ])
+        if self.gradio_version < '5.0.0':
+            self.image_viewer.change(lambda x: x,
+                                     inputs=self.image_viewer,
+                                     outputs=self.image_viewer)
         ########################################
         def submit_upload_image(image, history, images):
+            history, images, _ = self.add_uploaded_image_to_history(
                 image, history, images)
             return gr.update(visible=False), gr.update(
                 visible=True), gr.update(
         thumbnail.save(thumbnail_path, format='JPEG')
         buffered = io.BytesIO()
+        img.convert('RGB').save(buffered, format='PNG')
         img_b64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
+        img_str = f'<img src="data:image/png;base64,{img_b64}" style="pointer-events: none;">'
         buffered = io.BytesIO()
+        mask.convert('RGB').save(buffered, format='PNG')
         mask_b64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
+        mask_str = f'<img src="data:image/png;base64,{mask_b64}" style="pointer-events: none;">'
         images[img_id] = {
             'image': save_path,
         }
         buffered = io.BytesIO()
+        img.convert('RGB').save(buffered, format='PNG')
         img_b64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
+        img_str = f'<img src="data:image/png;base64,{img_b64}" style="pointer-events: none;">'
         history.append(
             (None,
              f'This is uploaded image:\n {img_str} image ID is: {img_id}'))
+        return history, images, img_id
 if __name__ == '__main__':
+    cfg = "config/chatbot_ui.yaml"
     with gr.Blocks() as demo:
         chatbot = ChatBotUI(cfg)
         chatbot.create_ui()
         chatbot.set_callbacks()
+    demo.launch()