ermu2001 commited on
Commit
08720f3
1 Parent(s): 195eeff
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitignore +16 -0
  2. Dockerfile +27 -0
  3. README.md +138 -0
  4. app.py +239 -0
  5. chat_anything/azure_utils.py +155 -0
  6. chat_anything/chatbot/__init__.py +0 -0
  7. chat_anything/chatbot/chat.py +72 -0
  8. chat_anything/chatbot/model_select.py +60 -0
  9. chat_anything/chatbot/personality.py +59 -0
  10. chat_anything/chatbot/select.py +63 -0
  11. chat_anything/chatbot/voice_select.py +119 -0
  12. chat_anything/face_generator/__init__.py +0 -0
  13. chat_anything/face_generator/long_prompt_control_generator.py +104 -0
  14. chat_anything/face_generator/long_prompt_generator.py +82 -0
  15. chat_anything/face_generator/pipelines/lpw_stable_diffusion.py +1471 -0
  16. chat_anything/face_generator/utils/generate.py +45 -0
  17. chat_anything/polly_utils.py +635 -0
  18. chat_anything/sad_talker/__init__.py +0 -0
  19. chat_anything/sad_talker/audio2exp_models/audio2exp.py +41 -0
  20. chat_anything/sad_talker/audio2exp_models/networks.py +74 -0
  21. chat_anything/sad_talker/audio2pose_models/audio2pose.py +94 -0
  22. chat_anything/sad_talker/audio2pose_models/audio_encoder.py +64 -0
  23. chat_anything/sad_talker/audio2pose_models/cvae.py +149 -0
  24. chat_anything/sad_talker/audio2pose_models/discriminator.py +76 -0
  25. chat_anything/sad_talker/audio2pose_models/networks.py +140 -0
  26. chat_anything/sad_talker/audio2pose_models/res_unet.py +65 -0
  27. chat_anything/sad_talker/config/auido2exp.yaml +58 -0
  28. chat_anything/sad_talker/config/auido2pose.yaml +49 -0
  29. chat_anything/sad_talker/config/facerender.yaml +45 -0
  30. chat_anything/sad_talker/config/facerender_still.yaml +45 -0
  31. chat_anything/sad_talker/config/similarity_Lm3D_all.mat +0 -0
  32. chat_anything/sad_talker/face3d/data/__init__.py +116 -0
  33. chat_anything/sad_talker/face3d/data/base_dataset.py +125 -0
  34. chat_anything/sad_talker/face3d/data/flist_dataset.py +125 -0
  35. chat_anything/sad_talker/face3d/data/image_folder.py +66 -0
  36. chat_anything/sad_talker/face3d/data/template_dataset.py +75 -0
  37. chat_anything/sad_talker/face3d/extract_kp_videos.py +108 -0
  38. chat_anything/sad_talker/face3d/extract_kp_videos_safe.py +162 -0
  39. chat_anything/sad_talker/face3d/models/__init__.py +67 -0
  40. chat_anything/sad_talker/face3d/models/arcface_torch/README.md +164 -0
  41. chat_anything/sad_talker/face3d/models/arcface_torch/backbones/__init__.py +25 -0
  42. chat_anything/sad_talker/face3d/models/arcface_torch/backbones/iresnet.py +187 -0
  43. chat_anything/sad_talker/face3d/models/arcface_torch/backbones/iresnet2060.py +176 -0
  44. chat_anything/sad_talker/face3d/models/arcface_torch/backbones/mobilefacenet.py +130 -0
  45. chat_anything/sad_talker/face3d/models/arcface_torch/configs/3millions.py +23 -0
  46. chat_anything/sad_talker/face3d/models/arcface_torch/configs/3millions_pfc.py +23 -0
  47. chat_anything/sad_talker/face3d/models/arcface_torch/configs/__init__.py +0 -0
  48. chat_anything/sad_talker/face3d/models/arcface_torch/configs/base.py +56 -0
  49. chat_anything/sad_talker/face3d/models/arcface_torch/configs/glint360k_mbf.py +26 -0
  50. chat_anything/sad_talker/face3d/models/arcface_torch/configs/glint360k_r100.py +26 -0
.gitignore ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ **__pycache__/
2
+
3
+ MODELS
4
+ third_party
5
+ tmp
6
+ results
7
+ chat_anything/tts_vits/
8
+ vits_results
9
+ test
10
+ resources/models.yaml
11
+
12
+ # others
13
+ GFPGANv1.4.pth
14
+ gfpgan
15
+ GFPGAN
16
+ .gitattributes
Dockerfile ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM pytorch/pytorch:2.0.1-cuda11.7-cudnn8-devel
2
+
3
+ # FROM python:3.9
4
+
5
+ # WORKDIR /code
6
+
7
+ # COPY ./requirements.txt /code/requirements.txt
8
+
9
+ # RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
10
+
11
+ # for open cv
12
+ RUN apt-get update && apt-get install libgl1 -y
13
+
14
+ RUN useradd -m -u 1000 user
15
+
16
+ USER user
17
+
18
+ ENV HOME=/home/user \
19
+ PATH=/home/user/.local/bin:$PATH
20
+
21
+ WORKDIR $HOME/ChatAnything
22
+
23
+ COPY --chown=user . $HOME/ChatAnything
24
+
25
+ RUN pip install -r requirements.txt
26
+
27
+ CMD python app.py
README.md CHANGED
@@ -10,3 +10,141 @@ pinned: false
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
13
+ # ChatAnything: Facetime Chat with LLM-Enhanced Personas
14
+
15
+ **Yilin Zhao\*, Shanghua Gao\*, Daquan Zhou\*, Xinbin Yuan\*, Zhijie Lin, Qibin Hou, Jiashi Feng**
16
+
17
+
18
+
19
+ > What will it be like to Facetime any imaginary concepts?
20
+ To animate anything, we integrated current open-source models at hand for an animation application for interactive AI-Agent chatting usage.
21
+ >
22
+ > To start with, take a look at these incredible faces generated with open-source Civitai models that are to be animated.
23
+ <img src="./resources/readme/show.png" alt="drawing" width="784"/>
24
+ <!-- ![faces](./resources/readme/show.png) -->
25
+
26
+ Here we provide you with ChatAnything. A simple pipeline Enhanced with currently limitless Large Language Models, yielding imaginary Facetime chats with intented visual appearance!
27
+
28
+ Remember, the repo and application are totally based on pre-trained deep learning methods and haven't included any training yet. We give all the credit to the open-source community (shout out to you). For detail of the pipeline, see our technical report (TODO: link here)
29
+ ## Release & Features & Future Plans
30
+
31
+ - [ ] Fine-tune face rendering module.
32
+ - [ ] Better TTS module & voice render module.
33
+ - [ ] Adding Open-source Language Models.
34
+ - [x] Initial release
35
+ - Facetime Animation.
36
+ - Multiple model choices for initial frame generation.
37
+ - Multiple choices for voices.
38
+ # Install & Run
39
+ Just follow the instructions. Every thing would be simple (hopefully). Reach out if you met with any problems!
40
+ ### Install
41
+ first, install the virtual environment.
42
+ ```
43
+ conda env create -f environment.yaml
44
+
45
+ # then install
46
+ conda env update --name chatanything --file environment.yaml
47
+ ```
48
+
49
+ The Pipeline integrated Open-Source Models. All Models are to be found online(see [Acknowledgement](#acknowledgement)). We put some important models together on huggingface remotes just to make life easier. Prepare them for the first run with this Python script [prepare_models.py](./python_scripts/prepare_models.py):
50
+ ```
51
+ # prepare the local models
52
+ python python_scripts/prepare_models.py
53
+
54
+ ```
55
+
56
+ ### Building Docker
57
+ Try build a docker if you find it easier. This part is not fully tested. If you find a anything wrong, feel free to contribute~
58
+ ```
59
+ docker build --network=host -t chatanything .
60
+ # docker run -dp 127.0.0.1:8901:8901 chatanything
61
+ docker run -p 127.0.0.1:8901:8901 -it --gpus all chatanything
62
+ docker run -it --gpus all chatanything bash
63
+ ```
64
+
65
+ ### Run
66
+ specify a port for the gradio application to run on and set off!
67
+ ```
68
+ PORT=8809 python app.py $PORT
69
+ ```
70
+
71
+ # Configuring: From User Input Concept to Appearance & Voice
72
+ The first step of the pipeline is to generate a image for SadTalker and at the same time set up the Text to Sound Module for voice chat.
73
+
74
+ The pipeline would query a powerful LLM (ChatGPT) for the selection in a zero-shot multi-choice selection format.
75
+ Three Questions are asked upon the initial of every conversation(init frame generation):
76
+ 1. Provide a imagen personality for the user input concept.
77
+ 2. Select a Generative model for the init frame generation.
78
+ 3. Select a Text To Sound Voice(Model) for the character base on the personality.
79
+
80
+ We have constructed the model selection to be extendable. Add your ideal model with just a few lines of Configuring! The rest of this section would breifly introduce the steps to add a init-frame generator/language voice.
81
+
82
+ ### Image Generator
83
+ Configure the models in the [Model Config](./resources/models.yaml). This Config acts as the memory (or an image-generating tool pool) for the LLM.
84
+
85
+ The prompt sets up this selection process. Each sub field of the "models" would turn into an option in the multiple-choice question.
86
+ the "**desc**" field of each element is what the Language Model would see. The key is not provided to the LM as it would sometimes mislead it.
87
+ the others are used for the image generation as listed:
88
+ 1. model_dir: the repo-path for diffusers package. As the pretrained Face-landmark ControlNet is based on stable-diffusion-v1-5, we currently only supports the derivatives of it.
89
+ 2. lora_path: LoRA derivatives are powerful, try a LoRA model also for better stylization. Should directly point to the parameters binary file.
90
+ 3. prompt_template & negative_prompt: this is used for prompting the text-to-image diffusion model. Find a ideal prompt for your model and stick with it. A "{}" should be in the prompt template for inserting the user input concept.
91
+
92
+ Here are some **Tips** for configuring you own model.
93
+ 1. Provide the LLM with a simple description of the generative model. It is worth noting that the description needs to be concise and accurate for a correct selection.
94
+ 2. Set the model_dir to a local directory of diffusers stable-diffusion-v1-5 derivatives. Also, you can provide a repo-id on the huggingface hub model space. The model would be downloaded when first chosen, wait for it.
95
+ 3. To better utilize the resources from the community, we also add in support of the LoRA features. To add the LoRA module, you would need to give the path to the parameter files.
96
+
97
+ 4. Carefully write the prompt template and negative prompt. These which affect the initial face generation a lot. Be aware that the prompt template should contain only one pair of "{}" to insert the concept that users wrote on the application webpage. We support the Stable-Diffusion-Webui prompt style as implemented by diffusers, feel free to copy the prompt from Civitai for better prompting the generation and put in the "{}" to the original prompt for ChatAnything!
98
+
99
+ Again, this model's config acts as an extended tool pool for the LM, the application would drive the LM to choose from this config and use the chosen model to generate. Sometimes the LM fails to choose the correct model or choosing any available model, this would cause the Chatanything app to fail on a generation.
100
+
101
+ Notice we currently support ONLY stable-diffusion-v1.5 derivatives (Sdxl Pipelines are under consideration, however not yet implemented as we lack a face-landmark ControlNet for it. Reach out if you're interested in training one!)
102
+
103
+ ### Voice TTS
104
+ We are using the edge_tts package for text-to-speech support. The voice selection and [voice configuration file](./resources/voices_edge.yaml) is constructed similarly to the Image generation model selection, except now the LM is supposed to choose the voice base on the personality description given by itself earlier. "**gender**" and "**language**" field corresponds to edge_tts.
105
+
106
+ # On-going tasks.
107
+ ### Customized Voice.
108
+ There is a Voice Changer TextToSpeach-SpeachVoiceConversion Pipeline app, which ensures a better customized voice. We are trying to leverage its TTS functionality.
109
+
110
+ Reach out if you want to add a voice of your own or your hero!
111
+
112
+ Here are the possible steps for
113
+ You would need to change a little bit in the code first:
114
+ 1. Alter this [code](./utils.py#14) to import a TTSTalker from chat_anything/tts_talker/tts_voicechanger.py.
115
+ 2. switch the config to another one, change [code](./utils.py#14) "resources/voices_edge.yaml" -> "resources/voices_voicechanger.yaml"
116
+
117
+ The try running a [Voice Changer](https://huggingface.co/spaces/kevinwang676/Voice-Changer) on your local machine. Simply set up git-lfs and install the repo and run it for the TTS voice service.
118
+ The TTS caller was set to port 7860.
119
+
120
+ make sure the client class is set up with the same port in [here](chat_anything/tts_talker/tts_voicechanger.py#5)
121
+ ```python
122
+ client = Client("http://127.0.0.1:7860/")
123
+ ```
124
+
125
+ # Acknowledgement
126
+ Again, the project hasn't yet included any training. The pipeline is totally based on these incredible awesome packages and pretrained models. Don't hesitate to take a look and explore the amazing open-source generative communities. We love you, guys.
127
+ - [ChatGPT](https://openai.com/chatgpt): GOD
128
+ - [SadTalker](https://github.com/OpenTalker/SadTalker): The Core Animation Module
129
+ - [Face-Landmark-ControlNet](https://huggingface.co/georgefen/Face-Landmark-ControlNet): An Awesome ControlNet with Face landmark using Stable Diffusion 1.5 as base Model.
130
+ - [diffusers](https://github.com/huggingface/diffusers): GOAT of Image Generative Framework🥳.
131
+ - [langchain](https://github.com/langchain-ai/langchain): An Awesome Package for Dealing with LLM.
132
+ - [edge-tts](https://github.com/rany2/edge-tts): An Awesome Package for Text To Sound Solutions.
133
+ - [gradio](https://www.gradio.app/): GOAT😄 Machine Learning based App framework.
134
+ - [Civitai](https://civitai.com/models) and [Huggingface_hub](https://huggingface.co/models): Find your ideal Image Generative Model on Civitai. These Communities are Crazy🥂. Here are Some Fantastic Derivatives of [stable-diffusion-v1-5](https://huggingface.co/runwayml/stable-diffusion-v1-5):
135
+ - [Game Icon Institute_mode](https://civitai.com/models/47800?modelVersionId=76533)
136
+ - [dreamshaper](https://civitai.com/models/4384/dreamshaper)
137
+ - [3D_Animation_Diffusion](https://civitai.com/models/118086?modelVersionId=128046)
138
+ - [anything-v5](https://huggingface.co/stablediffusionapi/anything-v5)
139
+
140
+ # Citation
141
+ If you like our pipeline and application, don't hesitate to reach out! Let's work on it and see how far it would go!
142
+ ```bibtex
143
+ @misc{zhao2023ChatAnything,
144
+ title={ChatAnything: Facetime Chat with LLM-Enhanced Personas},
145
+ author={Yilin, Zhao and Shanghua, Gao and Daquan, Zhou and Xinbin, Yuan and Qibin, Hou and Jiashi, Feng},
146
+ publisher={},
147
+ year={2023},
148
+ }
149
+ ```
150
+
app.py ADDED
@@ -0,0 +1,239 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import ssl
3
+ import sys
4
+
5
+ import gradio as gr
6
+
7
+ import warnings
8
+ import whisper
9
+ from chat_anything.polly_utils import PollyVoiceData
10
+ from chat_anything.azure_utils import AzureVoiceData
11
+ from chat_anything.chatbot.chat import set_openai_api_key
12
+ from utils import ChatWrapper, update_foo, reset_memory
13
+
14
+ ssl._create_default_https_context = ssl._create_unverified_context
15
+
16
+
17
+ TALKING_HEAD_WIDTH = "350"
18
+
19
+ LOOPING_TALKING_HEAD = "resources/videos/tempfile.mp4"
20
+
21
+ USE_GPT4_DEFAULT = False
22
+ FULLBODY_DEFAULT = False
23
+ POLLY_VOICE_DATA = PollyVoiceData()
24
+ AZURE_VOICE_DATA = AzureVoiceData()
25
+
26
+ # Pertains to WHISPER functionality
27
+ WHISPER_DETECT_LANG = "Detect language"
28
+
29
+ INSTRUCTION_MARKDOWN = """
30
+ # ChatAnything: Facetime Chat with LLM-Enhanced Personas
31
+ ### DEMO INSTRUCTION
32
+ ##### 0. Register
33
+ Input a OpenAI API Key of your own. This would be used to chat with openai-chatgpt. Make sure to disable the key afterwards🥹.
34
+ ##### 1. Generate The init face😀 along with first chat
35
+ Input a Concept in the "Talking object" text box, then click on Generate button. The init face generation and module selection will be performed and used for the rest of this chat. Wait for a while and the video would be produced and played. Write simple concept for generating. The concept will be place on each prompt template for deciding the main concepts.
36
+ ##### 2. Keep on Chatting🤑
37
+ Go on speak with the character. The init face and module selection will not reperform itself, now you are only chatting with the LM, along with the rendering of sadtalker. Hopefully, the API will not impose an excessive charge for this.
38
+
39
+
40
+ ### FEATURES
41
+ ##### 1. Upload a image for control/inversion starting point. Try some none face images and see how it works!
42
+ ##### 2. seeding is provided. However if not providing a input image, there would be a random chosen facial landmark image for generating, which might include some randomness.
43
+ ##### 3. Try out the examples.
44
+ ##### 4. Say something and recorded your voice for a real facetime chat. Whisper will handle your voice, see setting-Whisper STT options.
45
+ ##### 5. Decide whether to use the crop face out option, this will crop out the face from the generated image and render. This is promising for better animation rendering, however sometimes the croped image loses some elementary features of you intended concept.
46
+
47
+ """
48
+
49
+ # UNCOMMENT TO USE WHISPER
50
+ warnings.filterwarnings("ignore")
51
+ WHISPER_MODEL = whisper.load_model("tiny")
52
+ print("WHISPER_MODEL", WHISPER_MODEL)
53
+
54
+
55
+ # UNCOMMENT TO USE WHISPER
56
+ def transcribe(aud_inp, whisper_lang):
57
+ if aud_inp is None:
58
+ return ""
59
+ aud = whisper.load_audio(aud_inp)
60
+ aud = whisper.pad_or_trim(aud)
61
+ mel = whisper.log_mel_spectrogram(aud).to(WHISPER_MODEL.device)
62
+ _, probs = WHISPER_MODEL.detect_language(mel)
63
+ options = whisper.DecodingOptions()
64
+ if whisper_lang != WHISPER_DETECT_LANG:
65
+ whisper_lang_code = POLLY_VOICE_DATA.get_whisper_lang_code(
66
+ whisper_lang)
67
+ options = whisper.DecodingOptions(language=whisper_lang_code)
68
+ result = whisper.decode(WHISPER_MODEL, mel, options)
69
+ print("result.text", result.text)
70
+ result_text = ""
71
+ if result and result.text:
72
+ result_text = result.text
73
+ return result_text
74
+
75
+
76
+ chat = ChatWrapper()
77
+
78
+
79
+ with gr.Blocks() as block:
80
+ llm_state = gr.State()
81
+ history_state = gr.State()
82
+ chain_state = gr.State()
83
+ talker_state = gr.State()
84
+ fullbody_state = gr.State(True)
85
+ speak_text_state = gr.State(True)
86
+ talking_head_state = gr.State(True)
87
+ uid_state = gr.State()
88
+ video_file_path = gr.State()
89
+ audio_file_path = gr.State()
90
+
91
+ memory_state = gr.State()
92
+
93
+
94
+ # Pertains to WHISPER functionality
95
+ whisper_lang_state = gr.State(WHISPER_DETECT_LANG)
96
+ use_gpt4_state = gr.State(USE_GPT4_DEFAULT)
97
+
98
+ with gr.Column():
99
+ with gr.Row():
100
+ gr.Markdown(INSTRUCTION_MARKDOWN)
101
+ with gr.Row():
102
+ openai_api_key_textbox = gr.Textbox(placeholder="Paste your OpenAI API key (sk-...) and hit Enter",
103
+ show_label=True, lines=1, type='password', value='', label='OpenAI API key')
104
+ openai_api_key_register = gr.Button(
105
+ value="Register").style(full_width=False)
106
+ uid_textbox = gr.Textbox(show_label=True, value=uid_state, lines=1, label='UID')
107
+ seed = gr.Slider(
108
+ label="Seed",
109
+ minimum=-1,
110
+ maximum=2147483647,
111
+ step=1,
112
+ randomize=True,
113
+ )
114
+
115
+ with gr.Tab("Chat"):
116
+ with gr.Row():
117
+ with gr.Column(scale=1, min_width=TALKING_HEAD_WIDTH, visible=True):
118
+ with gr.Column():
119
+ class_prompt = gr.Textbox(
120
+ 'apple',
121
+ default='apple',
122
+ type="text", label='Talking object'
123
+ )
124
+ init_face_btn = gr.Button(
125
+ value="Generate").style(full_width=False)
126
+
127
+ my_file = gr.File(label="Upload a file",
128
+ type="file", visible=False)
129
+
130
+ # video_html = gr.HTML('')
131
+ video_html = gr.Video(label="Generated Video", autoplay=True)
132
+
133
+ ref_image = gr.Image(
134
+ type="pil",
135
+ interactive=True,
136
+ label="Image: Upload your image.",
137
+ )
138
+ tmp_aud_file = gr.File(
139
+ type="file", visible=False)
140
+ audio_html = gr.HTML('')
141
+ init_face_btn.click(chat.generate_init_face_video, inputs=[class_prompt, llm_state, uid_state,fullbody_state, ref_image, seed],
142
+ outputs=[chain_state, memory_state, video_html,talker_state])
143
+
144
+
145
+ with gr.Column(scale=7):
146
+ chatbot = gr.Chatbot()
147
+
148
+
149
+ message = gr.Textbox(label="What's on your mind??",
150
+ placeholder="What's the answer to life, the universe, and everything?",
151
+ lines=1)
152
+ submit = gr.Button(value="Send", variant="secondary").style(
153
+ full_width=False)
154
+
155
+ audio_comp = gr.Microphone(source="microphone", type="filepath", label="Just say it!",
156
+ interactive=True, streaming=False)
157
+ audio_comp.change(transcribe, inputs=[
158
+ audio_comp, whisper_lang_state], outputs=[message])
159
+
160
+
161
+ with gr.Accordion("General examples", open=False):
162
+ gr.Examples(
163
+ examples=[
164
+ ["cyberpunk godess", "Who are you?", "resources/images/annie.jpg", 393212389],
165
+ ["unbelievable beauty fairy", "Who are you?", "resources/images/lenna.jpg", 222679277],
166
+ ["tree monster", "Who are you?", None],
167
+ ["pineapple monster", "Who are you?", None],
168
+ ["tricky Polaris", "Who are you?", None, 1670155100],
169
+ ["watermelon", "Who are you?", "resources/images/watermelon.jpg", 42],
170
+ ],
171
+ inputs=[class_prompt, message, ref_image, seed],
172
+ )
173
+
174
+ with gr.Tab("Settings"):
175
+ with gr.Tab("General"):
176
+
177
+ talking_head_cb = gr.Checkbox(
178
+ label="Show talking head", value=True)
179
+ talking_head_cb.change(chat.update_talking_head, inputs=[talking_head_cb, uid_state, talking_head_state],
180
+ outputs=[talking_head_state, video_html])
181
+
182
+ use_gpt4_cb = gr.Checkbox(label="Use GPT-4 (experimental) if your OpenAI API has access to it",
183
+ value=USE_GPT4_DEFAULT)
184
+
185
+ fullbody_state = gr.Checkbox(label="Use full body instead of a face.",
186
+ value=True)
187
+
188
+ use_gpt4_cb.change(set_openai_api_key,
189
+ inputs=[openai_api_key_textbox,
190
+ use_gpt4_cb],
191
+ outputs=[llm_state, use_gpt4_state, chatbot, uid_state, video_file_path, audio_file_path])
192
+
193
+ reset_btn = gr.Button(value="Reset chat",
194
+ variant="secondary").style(full_width=False)
195
+ reset_btn.click(reset_memory, inputs=[history_state, memory_state],
196
+ outputs=[chatbot, history_state, memory_state])
197
+
198
+
199
+ with gr.Tab("Whisper STT"):
200
+ whisper_lang_radio = gr.Radio(label="Whisper speech-to-text language:", choices=[
201
+ WHISPER_DETECT_LANG, "Arabic", "Arabic (Gulf)", "Catalan", "Chinese (Cantonese)", "Chinese (Mandarin)",
202
+ "Danish", "Dutch", "English (Australian)", "English (British)", "English (Indian)", "English (New Zealand)",
203
+ "English (South African)", "English (US)", "English (Welsh)", "Finnish", "French", "French (Canadian)",
204
+ "German", "German (Austrian)", "Georgian", "Hindi", "Icelandic", "Indonesian", "Italian", "Japanese",
205
+ "Korean", "Norwegian", "Polish",
206
+ "Portuguese (Brazilian)", "Portuguese (European)", "Romanian", "Russian", "Spanish (European)",
207
+ "Spanish (Mexican)", "Spanish (US)", "Swedish", "Turkish", "Ukrainian", "Welsh"],
208
+ value=WHISPER_DETECT_LANG)
209
+
210
+ whisper_lang_radio.change(update_foo,
211
+ inputs=[whisper_lang_radio,
212
+ whisper_lang_state],
213
+ outputs=[whisper_lang_state])
214
+
215
+ gr.HTML("""
216
+ <p>This application is based on <a href='https://huggingface.co/spaces/JavaFXpert/Chat-GPT-LangChain/'>Chat-GPT-LangChain</a>, <a href='https://github.com/hwchase17/langchain'>LangChain</a>
217
+ </p>""")
218
+
219
+ message.submit(chat, inputs=[openai_api_key_textbox, message, history_state, chain_state,
220
+ speak_text_state, talking_head_state, uid_state,talker_state,fullbody_state],
221
+ outputs=[chatbot, history_state, video_html, my_file, audio_html, tmp_aud_file, message])
222
+
223
+ submit.click(chat, inputs=[openai_api_key_textbox, message, history_state, chain_state,
224
+ speak_text_state, talking_head_state, uid_state,talker_state,fullbody_state],
225
+ outputs=[chatbot, history_state, video_html, my_file, audio_html, tmp_aud_file, message])
226
+
227
+ openai_api_key_register.click(set_openai_api_key,
228
+ inputs=[openai_api_key_textbox,
229
+ use_gpt4_state, chatbot],
230
+ outputs=[llm_state, use_gpt4_state, chatbot, uid_state, video_file_path, audio_file_path])
231
+
232
+ if __name__ == "__main__":
233
+ import sys
234
+ if len(sys.argv) == 1:
235
+ port = 8901
236
+ else:
237
+ port = int(sys.argv[1])
238
+ block.launch(debug=True, server_name="0.0.0.0",
239
+ server_port=port, share=True, enable_queue = True)
chat_anything/azure_utils.py ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This class stores Azure voice data. Specifically, the class stores several records containing
2
+ # language, lang_code, gender, voice_id and engine. The class also has a method to return the
3
+ # voice_id, lang_code and engine given a language and gender.
4
+
5
+ NEURAL_ENGINE = "neural"
6
+ STANDARD_ENGINE = "standard"
7
+
8
+
9
+ class AzureVoiceData:
10
+ def get_voice(self, language, gender):
11
+ for voice in self.voice_data:
12
+ if voice['language'] == language and voice['gender'] == gender:
13
+ return voice['azure_voice']
14
+ return None
15
+
16
+ def __init__(self):
17
+ self.voice_data = [
18
+ {'language': 'Arabic',
19
+ 'azure_voice': 'ar-EG-ShakirNeural',
20
+ 'gender': 'Male'},
21
+ {'language': 'Arabic (Gulf)',
22
+ 'azure_voice': 'ar-KW-FahedNeural',
23
+ 'gender': 'Male'},
24
+ {'language': 'Catalan',
25
+ 'azure_voice': 'ca-ES-EnricNeural',
26
+ 'gender': 'Male'},
27
+ {'language': 'Chinese (Cantonese)',
28
+ 'azure_voice': 'yue-CN-YunSongNeural',
29
+ 'gender': 'Male'},
30
+ {'language': 'Chinese (Mandarin)',
31
+ 'azure_voice': 'zh-CN-YunxiNeural',
32
+ 'gender': 'Male'},
33
+ {'language': 'Danish',
34
+ 'azure_voice': 'da-DK-JeppeNeural',
35
+ 'gender': 'Male'},
36
+ {'language': 'Dutch',
37
+ 'azure_voice': 'nl-NL-MaartenNeural',
38
+ 'gender': 'Male'},
39
+ {'language': 'English (Australian)',
40
+ 'azure_voice': 'en-AU-KenNeural',
41
+ 'gender': 'Male'},
42
+ {'language': 'English (British)',
43
+ 'azure_voice': 'en-GB-RyanNeural',
44
+ 'gender': 'Male'},
45
+ {'language': 'English (Indian)',
46
+ 'azure_voice': 'en-IN-PrabhatNeural',
47
+ 'gender': 'Male'},
48
+ {'language': 'English (New Zealand)',
49
+ 'azure_voice': 'en-NZ-MitchellNeural',
50
+ 'gender': 'Male'},
51
+ {'language': 'English (South African)',
52
+ 'azure_voice': 'en-ZA-LukeNeural',
53
+ 'gender': 'Male'},
54
+ {'language': 'English (US)',
55
+ 'azure_voice': 'en-US-ChristopherNeural',
56
+ 'gender': 'Male'},
57
+ {'language': 'English (Welsh)',
58
+ 'azure_voice': 'cy-GB-AledNeural',
59
+ 'gender': 'Male'},
60
+ {'language': 'Finnish',
61
+ 'azure_voice': 'fi-FI-HarriNeural',
62
+ 'gender': 'Male'},
63
+ {'language': 'French',
64
+ 'azure_voice': 'fr-FR-HenriNeural',
65
+ 'gender': 'Male'},
66
+ {'language': 'French (Canadian)',
67
+ 'azure_voice': 'fr-CA-AntoineNeural',
68
+ 'gender': 'Male'},
69
+ {'language': 'German',
70
+ 'azure_voice': 'de-DE-KlausNeural',
71
+ 'gender': 'Male'},
72
+ {'language': 'German (Austrian)',
73
+ 'azure_voice': 'de-AT-JonasNeural',
74
+ 'gender': 'Male'},
75
+ {'language': 'Hindi',
76
+ 'azure_voice': 'hi-IN-MadhurNeural',
77
+ 'gender': 'Male'},
78
+ {'language': 'Icelandic',
79
+ 'azure_voice': 'is-IS-GunnarNeural',
80
+ 'gender': 'Male'},
81
+ {'language': 'Italian',
82
+ 'azure_voice': 'it-IT-GianniNeural',
83
+ 'gender': 'Male'},
84
+ {'language': 'Japanese',
85
+ 'azure_voice': 'ja-JP-KeitaNeural',
86
+ 'gender': 'Male'},
87
+ {'language': 'Korean',
88
+ 'azure_voice': 'ko-KR-GookMinNeural',
89
+ 'gender': 'Male'},
90
+ {'language': 'Norwegian',
91
+ 'azure_voice': 'nb-NO-FinnNeural',
92
+ 'gender': 'Male'},
93
+ {'language': 'Polish',
94
+ 'azure_voice': 'pl-PL-MarekNeural',
95
+ 'gender': 'Male'},
96
+ {'language': 'Portuguese (Brazilian)',
97
+ 'azure_voice': 'pt-BR-NicolauNeural',
98
+ 'gender': 'Male'},
99
+ {'language': 'Portuguese (European)',
100
+ 'azure_voice': 'pt-PT-DuarteNeural',
101
+ 'gender': 'Male'},
102
+ {'language': 'Romanian',
103
+ 'azure_voice': 'ro-RO-EmilNeural',
104
+ 'gender': 'Male'},
105
+ {'language': 'Russian',
106
+ 'azure_voice': 'ru-RU-DmitryNeural',
107
+ 'gender': 'Male'},
108
+ {'language': 'Spanish (European)',
109
+ 'azure_voice': 'es-ES-TeoNeural',
110
+ 'gender': 'Male'},
111
+ {'language': 'Spanish (Mexican)',
112
+ 'azure_voice': 'es-MX-LibertoNeural',
113
+ 'gender': 'Male'},
114
+ {'language': 'Spanish (US)',
115
+ 'azure_voice': 'es-US-AlonsoNeural"',
116
+ 'gender': 'Male'},
117
+ {'language': 'Swedish',
118
+ 'azure_voice': 'sv-SE-MattiasNeural',
119
+ 'gender': 'Male'},
120
+ {'language': 'Turkish',
121
+ 'azure_voice': 'tr-TR-AhmetNeural',
122
+ 'gender': 'Male'},
123
+ {'language': 'Welsh',
124
+ 'azure_voice': 'cy-GB-AledNeural',
125
+ 'gender': 'Male'},
126
+ ]
127
+
128
+
129
+ # Run from the command-line
130
+ if __name__ == '__main__':
131
+ azure_voice_data = AzureVoiceData()
132
+
133
+ azure_voice = azure_voice_data.get_voice('English (US)', 'Male')
134
+ print('English (US)', 'Male', azure_voice)
135
+
136
+ azure_voice = azure_voice_data.get_voice('English (US)', 'Female')
137
+ print('English (US)', 'Female', azure_voice)
138
+
139
+ azure_voice = azure_voice_data.get_voice('French', 'Female')
140
+ print('French', 'Female', azure_voice)
141
+
142
+ azure_voice = azure_voice_data.get_voice('French', 'Male')
143
+ print('French', 'Male', azure_voice)
144
+
145
+ azure_voice = azure_voice_data.get_voice('Japanese', 'Female')
146
+ print('Japanese', 'Female', azure_voice)
147
+
148
+ azure_voice = azure_voice_data.get_voice('Japanese', 'Male')
149
+ print('Japanese', 'Male', azure_voice)
150
+
151
+ azure_voice = azure_voice_data.get_voice('Hindi', 'Female')
152
+ print('Hindi', 'Female', azure_voice)
153
+
154
+ azure_voice = azure_voice_data.get_voice('Hindi', 'Male')
155
+ print('Hindi', 'Male', azure_voice)
chat_anything/chatbot/__init__.py ADDED
File without changes
chat_anything/chatbot/chat.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import datetime
2
+ from chat_anything.chatbot.personality import generate_personality_prompt
3
+ from langchain.prompts import PromptTemplate
4
+ from langchain import ConversationChain
5
+ from langchain.chains.conversation.memory import ConversationBufferMemory
6
+ from langchain.chat_models import ChatOpenAI
7
+ from langchain.embeddings.openai import OpenAIEmbeddings
8
+ import os
9
+ import random
10
+ import string
11
+
12
+
13
+ def load_chain(llm, class_concept=None):
14
+ chain = None
15
+ memory = None
16
+ personality_text = None
17
+ print(llm)
18
+ if llm:
19
+ print("class_concept", class_concept)
20
+ if class_concept is None:
21
+ class_concept = 'AI assistant'
22
+ person_template, personality_text = generate_personality_prompt(llm, class_concept)
23
+
24
+ PROMPT_TEMPLATE = PromptTemplate(
25
+ input_variables=["history", "input"],
26
+ template=person_template,
27
+ )
28
+
29
+ chain = ConversationChain(
30
+ prompt=PROMPT_TEMPLATE,
31
+ llm=llm,
32
+ verbose=False,
33
+ memory=ConversationBufferMemory(ai_prefix="You"),
34
+ )
35
+ print("New concept done for ", class_concept)
36
+
37
+ return chain, memory, personality_text
38
+
39
+
40
+
41
+ def set_openai_api_key(api_key, use_gpt4, history=None, max_tokens=1024):
42
+ """Set the api key and return chain.
43
+ If no api_key, then None is returned.
44
+ """
45
+ if api_key and api_key.startswith("sk-") and len(api_key) > 50:
46
+ os.environ["OPENAI_API_KEY"] = api_key
47
+ print("\n\n ++++++++++++++ Setting OpenAI API key ++++++++++++++ \n\n")
48
+ print(str(datetime.datetime.now()) + ": Before OpenAI, OPENAI_API_KEY length: " + str(
49
+ len(os.environ["OPENAI_API_KEY"])))
50
+
51
+ if use_gpt4:
52
+ llm = ChatOpenAI(
53
+ temperature=0, max_tokens=max_tokens, model_name="gpt-4")
54
+ print("Trying to use llm ChatOpenAI with gpt-4")
55
+ else:
56
+ print("Trying to use llm ChatOpenAI with gpt-3.5-turbo")
57
+ llm = ChatOpenAI(temperature=0, max_tokens=max_tokens,
58
+ model_name="gpt-3.5-turbo")
59
+
60
+ print(str(datetime.datetime.now()) + ": After OpenAI, OPENAI_API_KEY length: " + str(
61
+ len(os.environ["OPENAI_API_KEY"])))
62
+
63
+ print(str(datetime.datetime.now()) + ": After load_chain, OPENAI_API_KEY length: " + str(
64
+ len(os.environ["OPENAI_API_KEY"])))
65
+ os.environ["OPENAI_API_KEY"] = ""
66
+ history = history or []
67
+ history.append(['', '[SYSTEM] OPENAI_API_KEY has been set, you can generate your object and talk to it now!'])
68
+ uid = ''.join(random.sample(string.ascii_lowercase + string.ascii_uppercase, 5))
69
+ video_file_path = os.path.join('tmp', uid, 'videos/tempfile.mp4')
70
+ audio_file_path = os.path.join('tmp', uid, 'audio/tempfile.mp3')
71
+ return llm, use_gpt4, history, uid, video_file_path, audio_file_path
72
+ return None, None, None, None, None, None
chat_anything/chatbot/model_select.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain import LLMChain
2
+ from langchain.prompts import PromptTemplate
3
+ from omegaconf import OmegaConf
4
+ import datetime
5
+
6
+ MODEL_SELECTION_PROMPT_TEMPLATE = """
7
+ Select one of the following models based on the given concept.
8
+ You must choose one model name based on the description of each model and the concept!
9
+
10
+ Cencept: {concept}
11
+
12
+ Model name and description: {model_list}
13
+
14
+ Warning: {warning}
15
+
16
+ The avilable model names:
17
+ {model_name_list}
18
+
19
+ Selected model name:
20
+ """
21
+
22
+ def load_model_list():
23
+ models_config = OmegaConf.load('resources/models.yaml')
24
+ models_dict = models_config['models']
25
+ model_name_list_str = ''
26
+ print(models_dict)
27
+ model_list_str = ''
28
+ for key, value in models_dict.items():
29
+ model_list_str+="model name: " +key+', model description: '+value['desc']+'\n'
30
+ model_name_list_str += key + ' '
31
+ model_name_list_str += '\n'
32
+ return model_list_str, models_dict, model_name_list_str
33
+
34
+ def model_selection_chain(llm, class_concept=None):
35
+ chain = None
36
+ memory = None
37
+ if llm:
38
+ print("class_concept", class_concept)
39
+ if class_concept is None:
40
+ class_concept = 'AI assistant'
41
+
42
+
43
+ template = PromptTemplate(
44
+ input_variables=["model_list", "concept", "warning", "model_name_list"],
45
+ template=MODEL_SELECTION_PROMPT_TEMPLATE,
46
+ )
47
+ model_list_str, models_dict, model_name_list_str = load_model_list()
48
+
49
+ personality_chain = LLMChain(
50
+ llm=llm, prompt=template, verbose=True)
51
+ selected_model = None
52
+ while (selected_model is None) or not (selected_model in models_dict):
53
+ if (selected_model is not None) and not (selected_model in models_dict):
54
+ warning_str = '{} is not in Model list! \n'.format(selected_model)
55
+ else:
56
+ warning_str = ''
57
+ selected_model = personality_chain.run({'concept': class_concept, 'model_list':model_list_str, 'warning': warning_str, 'model_name_list': model_name_list_str})
58
+ print("Selected model name: ", selected_model)
59
+
60
+ return models_dict[selected_model]
chat_anything/chatbot/personality.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain import LLMChain
2
+ from langchain.prompts import PromptTemplate
3
+
4
+ PERSONALITY_PROMPT_TEMPLATE = """
5
+ You are an excellent scriptwriter. Now you need to provide the characteristics of an {object} and transforms them into personality traits.
6
+ Describe these personalities using the second person, giving names and specific personality descriptions related to the {object}.
7
+ The language of the Personality must be same as {object}!
8
+
9
+ You should do the following steps:
10
+ 1. Based on the object's nature, imagine what kind of personality it could have if it were to come to life. Does it possess a strong sense of responsibility, like a caring caregiver? Is it playful and mischievous, like a curious child? Is it wise and patient, like an ancient sage? Be creative and invent traits that align with the object's essence.
11
+ 2. Remember to infuse emotions and vivid imagery to bring your object's personality to life.
12
+ 3. translate the personality into a second person prompt.
13
+
14
+ Example:
15
+
16
+
17
+ Now give the personality of apple:
18
+
19
+ Personality:
20
+ You an apple Sprite, your name is Apple Buddy.
21
+ You have all the characteristics of the apple. You are a type of fruit that is usually round with smooth skin and comes in various colors such as red, green, and yellow. You have sweet and nutritious flesh with seeds distributed in its core. You are a rich source of vitamins, fiber, and antioxidants, contributing to maintaining a healthy body.
22
+
23
+ You are an optimistic buddy. Always wearing a smile, you spread joy to those around you. Just like the delightful taste of an apple, you bring happiness to everyone.
24
+
25
+ You are resilient at heart, like the skin of an apple, able to withstand life's challenges and difficulties. No matter what obstacles you encounter, you face them bravely without hesitation.
26
+
27
+ You are caring and considerate, akin to the nutrients in an apple. You always pay attention to the needs and happiness of others. Skilled in listening, you willingly offer help and support, making those around you feel warmth and care.
28
+
29
+ You have a strong desire to grow. Like an apple tree needs sunlight and water to flourish, you are continuously learning and improving, becoming a better version of yourself every day.
30
+
31
+ You have a profound love for nature and enjoy living in harmony with it. Strolling in the garden, feeling the fresh air and warm sunlight, is one of your favorite moments.
32
+
33
+ Apple Buddy, you are a unique apple. Your optimism, resilience, care, and eagerness to grow make you an adorable companion to those around you. Your story will lead us into a world full of warmth and goodness.
34
+
35
+ Now give the personality of {object}:
36
+
37
+ Personality:
38
+ """
39
+
40
+
41
+ def generate_personality_prompt(llm, class_concept):
42
+
43
+ PERSONALITY_PROMPT = PromptTemplate(
44
+ input_variables=["object"],
45
+ template=PERSONALITY_PROMPT_TEMPLATE,
46
+ )
47
+ personality_chain = LLMChain(
48
+ llm=llm, prompt=PERSONALITY_PROMPT, verbose=True)
49
+ personality_text = personality_chain.run({'object': class_concept})
50
+ person_prompt = personality_text
51
+
52
+ person_prompt += '''The following is a friendly conversation between a human and you. You need to talk to human based on your personality. If you do not know the answer to a question, you truthfully says you do not know.
53
+ You can use up to 50 words to answer. Make you answer concise and concise!!!!!!!!
54
+ Current conversation:
55
+ {history}
56
+ Human: {input}
57
+ You:
58
+ '''
59
+ return person_prompt, personality_text
chat_anything/chatbot/select.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain import LLMChain
2
+ from typing import OrderedDict
3
+ from langchain.prompts import PromptTemplate
4
+ from omegaconf import OmegaConf
5
+ import datetime
6
+
7
+ SELECTION_TEMPLATE = """
8
+ {concept}
9
+
10
+ Model name and description:
11
+ {option_list}
12
+
13
+ Warning: {warning}
14
+
15
+ The avilable Options:
16
+ {choices}
17
+ Answer:
18
+ """
19
+
20
+
21
+ def selection_chain(llm, class_concept, prompt, options):
22
+ chain = None
23
+ memory = None
24
+ if llm:
25
+ print("class_concept", class_concept)
26
+ if class_concept is None:
27
+ class_concept = 'AI assistant'
28
+ prompt_template = prompt + SELECTION_TEMPLATE
29
+ template = PromptTemplate(
30
+ input_variables=["concept", "option_list", "warning", "choices"],
31
+ template=prompt_template,
32
+ )
33
+ chain = LLMChain(
34
+ llm=llm, prompt=template, verbose=True)
35
+ print(options)
36
+ option_list = [
37
+ f"{chr(ord('A') + i)}. {conf['desc']}" for i, conf in enumerate(options.values())
38
+ ]
39
+ option_list = '\n'.join(option_list)
40
+ selected_model = None
41
+
42
+ warning_str = 'Choose from the available Options.'
43
+ choices = ' '.join(chr(ord('A') + i) for i in range(len(options)))
44
+ choice = chain.run({'concept': class_concept, 'option_list':option_list, 'warning': warning_str, 'choices': choices})
45
+ print(f"LLM Responds (First character was used as the choice):{choice}", )
46
+ choice = choice[0]
47
+
48
+ selected_model = list(options.keys())[ord(choice) - ord('A')]
49
+ print("Selected model name: ", selected_model)
50
+
51
+ return selected_model
52
+
53
+ def model_selection_chain(llm, class_concept=None, conf_file='resources/models_personality.yaml'):
54
+ chain = None
55
+ memory = None
56
+ if llm:
57
+ print("class_concept", class_concept)
58
+ if class_concept is None:
59
+ class_concept = 'AI assistant'
60
+ selection_config = OmegaConf.load(conf_file)
61
+ selected_model = selection_chain(llm, class_concept, selection_config['prompt'], selection_config['models'])
62
+ model_conf = selection_config['models'][selected_model]
63
+ return model_conf, selected_model
chat_anything/chatbot/voice_select.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain import LLMChain
2
+ from langchain.prompts import PromptTemplate
3
+ from omegaconf import OmegaConf
4
+ import datetime
5
+
6
+ VOICE_SELECTION_PROMPT_TEMPLATE = """
7
+ Select one of the following voice based on the given concept.
8
+ You must choose one voice name based on the description of each model and the concept.
9
+
10
+
11
+ Cencept: {concept}
12
+
13
+ Voice name and description: {model_list}
14
+
15
+ Warning: {warning}
16
+
17
+ The avilable voice names:
18
+ {model_name_list}
19
+
20
+ Selected voice name:
21
+ """
22
+
23
+ GENDER_SELECTION_PROMPT_TEMPLATE = """
24
+ Select one of the following gender based on the given concept.
25
+ You must choose one gender based on the description of the concept. You must choose one gender Even if you can't decide.
26
+
27
+ Gender:
28
+ male
29
+ female
30
+
31
+ Cencept: {concept}
32
+ Selected gender male or female:
33
+ """
34
+
35
+ LANGUAGE_SELECTION_PROMPT_TEMPLATE = """
36
+ Select one of the following language based on the given concept.
37
+ You must choose the language that is used by the description of the concept.
38
+
39
+ Languages:
40
+ Chinese
41
+ English
42
+ Japanese
43
+
44
+ Cencept: {concept}
45
+ Selected language:
46
+ """
47
+
48
+ def load_voice_model_list():
49
+ models_config = OmegaConf.load('resources/voices.yaml')
50
+ models_dict = models_config['models']
51
+ print(models_dict)
52
+ model_list_str = ''
53
+ model_name_list_str = ''
54
+ for key, value in models_dict.items():
55
+ model_list_str+="model name: " +key+', model description: '+value['desc']+'\n'
56
+ model_name_list_str += key + ' '
57
+ model_name_list_str += '\n'
58
+ return model_list_str, models_dict, model_name_list_str
59
+
60
+ def get_vioce_model_chain(llm, class_concept):
61
+ model_template = PromptTemplate(
62
+ input_variables=["model_list", "concept", "model_name_list", "warning"],
63
+ template=VOICE_SELECTION_PROMPT_TEMPLATE,
64
+ )
65
+ model_list_str, models_dict, model_name_list_str = load_voice_model_list()
66
+
67
+ personality_chain = LLMChain(
68
+ llm=llm, prompt=model_template, verbose=True)
69
+
70
+ selected_model = None
71
+ while (selected_model is None) or not (selected_model in models_dict):
72
+ if (selected_model is not None) and not (selected_model in models_dict):
73
+ warning_str = '{} is not in Model list! \n'.format(selected_model)
74
+ else:
75
+ warning_str = ''
76
+ selected_model = personality_chain.run({'concept': class_concept, 'model_list':model_list_str, 'warning': warning_str, 'model_name_list': model_name_list_str})
77
+ print("Selected model name: ", selected_model)
78
+
79
+ return selected_model
80
+
81
+ def get_gender_chain(llm, class_concept):
82
+ model_template = PromptTemplate(
83
+ input_variables=["concept"],
84
+ template=GENDER_SELECTION_PROMPT_TEMPLATE,
85
+ )
86
+
87
+ personality_chain = LLMChain(
88
+ llm=llm, prompt=model_template, verbose=True)
89
+ selected_gender = personality_chain.run({'concept': class_concept})
90
+ print("Selected gender: ", selected_gender)
91
+ return selected_gender
92
+
93
+ def get_language_chain(llm, class_concept):
94
+ model_template = PromptTemplate(
95
+ input_variables=["concept"],
96
+ template=LANGUAGE_SELECTION_PROMPT_TEMPLATE,
97
+ )
98
+
99
+ personality_chain = LLMChain(
100
+ llm=llm, prompt=model_template, verbose=True)
101
+ selected_language = personality_chain.run({'concept': class_concept})
102
+ print("Selected language: ", selected_language)
103
+ return selected_language
104
+
105
+
106
+
107
+ def voice_selection_chain(llm, class_concept=None):
108
+ chain = None
109
+ memory = None
110
+ if llm:
111
+ print("class_concept", class_concept)
112
+ if class_concept is None:
113
+ class_concept = 'AI assistant'
114
+ selected_model = get_vioce_model_chain(llm, class_concept)
115
+ gender = get_gender_chain(llm, class_concept)
116
+ language = get_language_chain(llm, class_concept)
117
+
118
+ return selected_model, gender, language
119
+
chat_anything/face_generator/__init__.py ADDED
File without changes
chat_anything/face_generator/long_prompt_control_generator.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import PIL
2
+ from PIL import Image
3
+ from PIL import ImageDraw
4
+ import numpy as np
5
+
6
+ import dlib
7
+ import cv2
8
+ import torch
9
+
10
+ import diffusers
11
+ from diffusers import StableDiffusionPipeline, DiffusionPipeline
12
+ from diffusers import ControlNetModel, StableDiffusionControlNetPipeline, StableDiffusionControlNetImg2ImgPipeline
13
+ from chat_anything.face_generator.pipelines.lpw_stable_diffusion import StableDiffusionLongPromptWeightingPipeline, get_weighted_text_embeddings
14
+ from diffusers.schedulers import EulerAncestralDiscreteScheduler,DPMSolverMultistepScheduler # DPM++ SDE Karras
15
+
16
+ from chat_anything.face_generator.utils.generate import generate
17
+
18
+ from .long_prompt_generator import LongPromptGenerator
19
+
20
+ def draw_landmarks(image, landmarks, color="white", radius=2.5):
21
+ draw = ImageDraw.Draw(image)
22
+ for dot in landmarks:
23
+ x, y = dot
24
+ draw.ellipse((x-radius, y-radius, x+radius, y+radius), fill=color)
25
+
26
+ def get_ldmk_img(w, h, ldmks) -> PIL.Image:
27
+ con_img = Image.new('RGB', (w, h), color=(0, 0, 0))
28
+ draw_landmarks(con_img, ldmks)
29
+ return con_img
30
+
31
+ class LongPromptControlGenerator(LongPromptGenerator):
32
+
33
+ def __init__(self, model_dir, lora_path, prompt_template, negative_prompt, face_control_dir, face_detect_path,):
34
+ self.face_control_dir = face_control_dir
35
+ self.face_detect_path = face_detect_path
36
+ super().__init__(model_dir, lora_path, prompt_template, negative_prompt)
37
+
38
+ def load_model(self, *args, **kwargs):
39
+ super().load_model(*args, **kwargs)
40
+ self.face_detector = dlib.get_frontal_face_detector()
41
+ self.face_predictor = dlib.shape_predictor(self.face_detect_path)
42
+ # load control net
43
+ face_controlnet = ControlNetModel.from_pretrained(self.face_control_dir).to('cuda', dtype=torch.float16)
44
+ self.face_control_pipe = StableDiffusionControlNetPipeline(controlnet=face_controlnet, **self.pipe.components)
45
+ self.face_control_img2img_pipe = StableDiffusionControlNetImg2ImgPipeline(controlnet=face_controlnet, **self.pipe.components)
46
+
47
+ def _get_68landmarks_seq(self, img_np):
48
+ gray = cv2.cvtColor(img_np, cv2.COLOR_RGB2GRAY)
49
+ faces = self.face_detector(gray)
50
+ landmarks = []
51
+ for face in faces:
52
+ shape = self.face_predictor(gray, face)
53
+ for i in range(68):
54
+ x = shape.part(i).x
55
+ y = shape.part(i).y
56
+ landmarks.append((x, y))
57
+ return landmarks
58
+
59
+ def has_face(self, img_pil):
60
+ img_np = np.array(img_pil)
61
+ gray = cv2.cvtColor(img_np, cv2.COLOR_RGB2GRAY)
62
+ faces = self.face_detector(gray)
63
+ return len(faces) != 0
64
+
65
+ def face_control_generate(
66
+ self,
67
+ prompt,
68
+ face_img_pil,
69
+ do_inversion=False,
70
+ **kwargs,
71
+ ):
72
+ """
73
+ Face control generating.
74
+ """
75
+ face_img_np = np.array(face_img_pil)
76
+ ldmk_seq = self._get_68landmarks_seq(face_img_np)
77
+ ldmk_img_pil = get_ldmk_img(face_img_pil.size[0], face_img_pil.size[1], ldmk_seq)
78
+ print('GENERATING:', prompt)
79
+
80
+ generating_conf = {
81
+ "prompt": prompt,
82
+ "negative_prompt": self.negative_prompt,
83
+ "num_inference_steps": 25,
84
+ "guidance_scale": 7,
85
+ "controlnet_conditioning_scale": kwargs.pop('controlnet_conditioning_scale', 1.0),
86
+ "generator": kwargs.pop('generator', None),
87
+ }
88
+
89
+ if not do_inversion:
90
+ generating_conf.update({
91
+ "pipe": self.face_control_pipe,
92
+ "image": ldmk_img_pil,
93
+ "controlnet_conditioning_scale": kwargs.pop('controlnet_conditioning_scale', 1.0),
94
+ })
95
+ else:
96
+ generating_conf.update({
97
+ "pipe": self.face_control_img2img_pipe,
98
+ "image": face_img_pil,
99
+ "control_image": ldmk_img_pil,
100
+ "strength": kwargs.pop('strength', 0.9),
101
+ })
102
+ pipe_out = generate(**generating_conf)
103
+ generated_img = pipe_out[0][0]
104
+ return generated_img
chat_anything/face_generator/long_prompt_generator.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import PIL
2
+ from PIL import Image
3
+ from PIL import ImageDraw
4
+ import numpy as np
5
+
6
+ import dlib
7
+ import cv2
8
+ import torch
9
+
10
+ import diffusers
11
+ from diffusers import StableDiffusionPipeline, DiffusionPipeline
12
+ from diffusers import ControlNetModel, StableDiffusionControlNetPipeline, StableDiffusionControlNetImg2ImgPipeline, StableDiffusionImg2ImgPipeline
13
+ from chat_anything.face_generator.pipelines.lpw_stable_diffusion import StableDiffusionLongPromptWeightingPipeline, get_weighted_text_embeddings
14
+ from diffusers.schedulers import EulerAncestralDiscreteScheduler,DPMSolverMultistepScheduler # DPM++ SDE Karras
15
+
16
+ from chat_anything.face_generator.utils.generate import generate
17
+
18
+ class LongPromptGenerator():
19
+ prompt_template = "A portrait of a {}, fine face, nice looking"
20
+ negative_prompt = "easynegative,Low resolution,Low quality, Opened Mouth"
21
+ # negative_prompt = "(((sexy))),paintings,loli,,big head,sketches, (worst quality:2), (low quality:2), (normal quality:2), lowres, normal quality, ((monochrome)), ((grayscale)), skin spots, acnes, skin blemishes, age spot, glans, nsfw, nipples,extra fingers, ((extra arms)), (extra legs), mutated hands, (fused fingers), (too many fingers), (long neck:1.3)"
22
+
23
+ def __init__(self, model_dir, lora_path=None, prompt_template="{}", negative_prompt=""):
24
+ self.model_dir = model_dir
25
+ self.lora_path = lora_path
26
+ self.prompt_template = prompt_template
27
+ self.negative_prompt = negative_prompt
28
+
29
+ def load_model(self, *args, **kwargs):
30
+ # load model
31
+ try:
32
+ pipe = DiffusionPipeline.from_pretrained(self.model_dir, torch_dtype=torch.float16, **kwargs)
33
+ except:
34
+ pipe = StableDiffusionPipeline.from_pretrained(self.model_dir, torch_dtype=torch.float16, **kwargs)
35
+
36
+ pipe = pipe.to('cuda')
37
+ sche_conf = dict(pipe.scheduler.config)
38
+ fk_kwargs = ["skip_prk_steps","steps_offset","clip_sample","clip_sample_range","rescale_betas_zero_snr","timestep_spacing", "set_alpha_to_one"]
39
+ for k in fk_kwargs:
40
+ if k in sche_conf:
41
+ sche_conf.pop(k)
42
+ scheduler = DPMSolverMultistepScheduler(**sche_conf)
43
+ pipe.scheduler=scheduler
44
+ pipe_longprompt = StableDiffusionLongPromptWeightingPipeline(**pipe.components)
45
+ self.pipe, self.pipe_longprompt = pipe, pipe_longprompt
46
+ if self.lora_path is not None:
47
+ pipe.load_lora_weights(self.lora_path)
48
+ self.pipe_img2img = StableDiffusionImg2ImgPipeline.from_pretrained(self.model_dir, **pipe.components)
49
+
50
+ def generate(
51
+ self,
52
+ prompt,
53
+ do_inversion=False,
54
+ **kwargs,
55
+ ):
56
+ """
57
+ Face control generating.
58
+ """
59
+ print('GENERATING:', prompt)
60
+ if not do_inversion:
61
+ generating_conf = {
62
+ "pipe": self.pipe,
63
+ "prompt": prompt,
64
+ "negative_prompt": self.negative_prompt,
65
+ "num_inference_steps": 25,
66
+ "guidance_scale": 7,
67
+ }
68
+ else:
69
+ assert 'image' in kwargs, 'doing inversion, prepare the init image please PIL Image'
70
+ init_image = kwargs['image']
71
+ generating_conf = {
72
+ "pipe": self.pipe_img2img,
73
+ "prompt": prompt,
74
+ "negative_prompt": self.negative_prompt,
75
+ "image": init_image,
76
+ "num_inference_steps": 25,
77
+ "guidance_scale": 7,
78
+ "strength": kwargs.pop('strength', 0.9),
79
+ }
80
+ pipe_out = generate(**generating_conf)
81
+ generated_img = pipe_out[0][0]
82
+ return generated_img
chat_anything/face_generator/pipelines/lpw_stable_diffusion.py ADDED
@@ -0,0 +1,1471 @@