phamngoctukts commited on
Commit
67b7ca7
1 Parent(s): 4b2dda3

Upload 3 files

Browse files
Files changed (3) hide show
  1. ClassPrompt.py +305 -0
  2. app.py +287 -144
  3. render.py +94 -0
ClassPrompt.py ADDED
@@ -0,0 +1,305 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random # Import the random module
2
+ from groq import Groq
3
+ from openai import OpenAI
4
+ import os
5
+ class PromptClass:
6
+ def __init__(self):
7
+ self.huggingface_token = os.environ.get("HF_TOKEN")
8
+ self.groq_api_key = os.environ.get("GROQ_TOKEN")
9
+ self.sambanova_api_key = os.environ.get("SAMBANOVA_TOKEN")
10
+ self.huggingface_client = OpenAI(
11
+ base_url="https://api-inference.huggingface.co/v1/",
12
+ api_key=self.huggingface_token,
13
+ )
14
+ self.groq_client = Groq(api_key=self.groq_api_key)
15
+ self.sambanova_client = OpenAI(
16
+ api_key=self.sambanova_api_key,
17
+ base_url="https://api.sambanova.ai/v1",
18
+ )
19
+ self.download_models()
20
+
21
+ def download_models(self):
22
+ from huggingface_hub import hf_hub_download
23
+
24
+ hf_hub_download(
25
+ repo_id="stabilityai/stable-diffusion-3.5-large",
26
+ filename="mmdit.png",
27
+ local_dir = "./models",
28
+ token = self.huggingface_token
29
+ )
30
+
31
+ hf_hub_download(
32
+ repo_id="stabilityai/stable-diffusion-3.5-large-turbo",
33
+ filename="LICENSE.md",
34
+ local_dir = "./models",
35
+ token = self.huggingface_token
36
+ )
37
+
38
+ def generate_prompt(self, dynamic_seed, prompt_type, custom_input):
39
+ """
40
+ Generates a prompt based on the provided seed, prompt type, and custom input.
41
+ """
42
+ random.seed(dynamic_seed)
43
+ if custom_input and custom_input.strip():
44
+ prompt = custom_input
45
+ else:
46
+ prompt = f"Create a random prompt based on the '{prompt_type}' type."
47
+
48
+ # Additional logic can be added here if needed
49
+ print(f"Generated prompt: {prompt}") # Debug statement
50
+ return prompt
51
+
52
+ def generate(
53
+ self,
54
+ input_text,
55
+ long_talk,
56
+ compress,
57
+ compression_level,
58
+ poster,
59
+ prompt_type,
60
+ custom_base_prompt="",
61
+ provider="Hugging Face",
62
+ model=None,
63
+ ):
64
+ try:
65
+ # Define prompts
66
+ default_long_prompt = """Create a detailed visually descriptive caption of this description,
67
+ which will be used as a prompt for a text to image AI system (caption only, no instructions like "create an image").
68
+ Remove any mention of digital artwork or artwork style. Give detailed visual descriptions of the character(s), including ethnicity, skin tone, expression etc.
69
+ Imagine using keywords for a still for someone who has aphantasia. Describe the image style, e.g., any photographic or art styles/techniques utilized.
70
+ Make sure to fully describe all aspects of the cinematography, with abundant technical details and visual descriptions.
71
+ If there is more than one image, combine the elements and characters from all of the images creatively into a single
72
+ cohesive composition with a single background, inventing an interaction between the characters.
73
+ Be creative in combining the characters into a single cohesive scene.
74
+ Focus on two primary characters (or one) and describe an interesting interaction between them, such as a hug, a kiss, a fight, giving an object,
75
+ an emotional reaction/interaction. If there is more than one background in the images, pick the most appropriate one.
76
+ Your output is only the caption itself, no comments or extra formatting.
77
+ The caption is in a single long paragraph.
78
+ If you feel the images are inappropriate, invent a new scene/characters inspired by these.
79
+ Additionally, incorporate a specific movie director's visual style and describe the lighting setup in detail,
80
+ including the type, color, and placement of light sources to create the desired mood and atmosphere.
81
+ Always frame the scene, including details about the film grain, color grading, and any artifacts or characteristics specific."""
82
+
83
+ default_simple_prompt = """Create a brief, straightforward caption for this description, suitable for a text-to-image AI system.
84
+ Focus on the main elements, key characters, and overall scene without elaborate details.
85
+ Provide a clear and concise description in one or two sentences. Your output is only the caption itself, no comments or extra formatting.
86
+ The caption is in a single long paragraph."""
87
+
88
+ poster_prompt = """Analyze the provided description and extract key information to create a movie poster style description. Format the output as follows:
89
+ Title: A catchy, intriguing title that captures the essence of the scene, place the title in "".
90
+ Main character: Give a description of the main character.
91
+ Background: Describe the background in detail.
92
+ Supporting characters: Describe the supporting characters.
93
+ Branding type: Describe the branding type.
94
+ Tagline: Include a tagline that captures the essence of the movie.
95
+ Visual style: Ensure that the visual style fits the branding type and tagline.
96
+ You are allowed to make up film and branding names, and do them like 80's, 90's or modern movie posters.
97
+ Your output is only the caption itself, no comments or extra formatting. The caption is in a single long paragraph."""
98
+
99
+ only_objects_prompt = """Create a highly detailed and visually rich description focusing solely on inanimate objects,
100
+ without including any human or animal figures. Describe the objects' shapes, sizes, colors, textures, and materials in great detail.
101
+ Pay attention to their arrangement, positioning, and how they interact with light and shadow. Include information about the setting
102
+ or environment these objects are in, such as indoor/outdoor, time of day, weather conditions, and any atmospheric effects.
103
+ Mention any unique features, patterns, or imperfections on the objects. Describe the overall composition, perspective, and
104
+ any artistic techniques that might be employed to render these objects (e.g., photorealism, impressionistic style, etc.).
105
+ Your description should paint a vivid picture that allows someone to imagine the scene without seeing it, focusing on the beauty,
106
+ complexity, or significance of everyday objects. Your output is only the caption itself, no comments or extra formatting.
107
+ The caption is in a single long paragraph."""
108
+
109
+ no_figure_prompt = """Generate a comprehensive and visually evocative description of a scene
110
+ or landscape without including any human or animal figures. Focus on the environment, natural elements, and man-made structures if present.
111
+ Describe the topography, vegetation, weather conditions, and time of day in great detail.
112
+ Pay attention to colors, textures, and how light interacts with different elements of the scene.
113
+ If there are buildings or other structures, describe their architecture, condition, and how they fit into the landscape.
114
+ Include sensory details beyond just visual elements - mention sounds, smells, and the overall atmosphere or mood of the scene.
115
+ Describe any notable features like bodies of water, geological formations, or sky phenomena.
116
+ Consider the perspective from which the scene is viewed and how this affects the composition.
117
+ Your description should transport the reader to this location, allowing them to vividly imagine the scene without any living subjects present.
118
+ Your output is only the caption itself, no comments or extra formatting. The caption is in a single long paragraph."""
119
+
120
+ landscape_prompt = """Create an immersive and detailed description of a landscape,
121
+ focusing on its natural beauty and geographical features.
122
+ Begin with the overall topography - is it mountainous, coastal, forested, desert, or a combination?
123
+ Describe the horizon and how land meets sky. Detail the vegetation, noting types of trees, flowers, or grass,
124
+ and how they're distributed across the landscape. Include information about any water features -
125
+ rivers, lakes, oceans - and how they interact with the land. Describe the sky, including cloud formations,
126
+ color gradients, and any celestial bodies visible.
127
+ Pay attention to the quality of light, time of day, and season, explaining how these factors affect the colors and shadows in the scene.
128
+ Include details about weather conditions and how they impact the landscape.
129
+ Mention any geological features like rock formations, cliffs, or unique land patterns.
130
+ If there are any distant man-made elements, describe how they integrate with the natural setting.
131
+ Your description should capture the grandeur and mood of the landscape,
132
+ allowing the reader to feel as if they're standing within this awe-inspiring natural scene.
133
+ Your output is only the caption itself, no comments or extra formatting. The caption is in a single long paragraph."""
134
+
135
+ fantasy_prompt = """Craft an extraordinarily detailed and imaginative description of a fantasy scene,
136
+ blending elements of magic, otherworldly creatures, and fantastical environments. Begin by setting the overall tone -
137
+ is this a dark and foreboding realm, a whimsical fairytale setting, or an epic high-fantasy world?
138
+ Describe the landscape, including any impossible or magical geographical features like floating islands,
139
+ crystal forests, or rivers of starlight. Detail the flora and fauna,
140
+ focusing on fantastical plants and creatures that don't exist in our world.
141
+ Include descriptions of any structures or ruins, emphasizing their otherworldly architecture and magical properties.
142
+ Describe the sky and any celestial bodies, considering how they might differ from our reality.
143
+ Include details about the presence of magic - how it manifests visually,
144
+ its effects on the environment, and any magical phenomena occurring in the scene.
145
+ If there are characters present, describe their appearance, focusing on non-human features, magical auras, or
146
+ fantastical clothing and accessories. Pay attention to colors, textures, and light sources,
147
+ especially those that couldn't exist in the real world. Your description should transport the
148
+ reader to a realm of pure imagination, where the laws of physics and nature as we know them don't apply.
149
+ Your output is only the caption itself, no comments or extra formatting. The caption is in a single long paragraph."""
150
+
151
+ prompt_types = {
152
+ "Long": default_long_prompt,
153
+ "Short": default_simple_prompt,
154
+ "Medium": poster_prompt,
155
+ "OnlyObjects": only_objects_prompt,
156
+ "NoFigure": no_figure_prompt,
157
+ "Landscape": landscape_prompt,
158
+ "Fantasy": fantasy_prompt,
159
+ }
160
+
161
+ # Determine the base prompt
162
+ print(f"Received prompt_type: '{prompt_type}'") # Debug print
163
+ if prompt_type == "Random":
164
+ prompt_type = random.choice(list(prompt_types.keys()))
165
+ print(f"Randomly selected prompt type: {prompt_type}")
166
+
167
+ if prompt_type and prompt_type.strip() and prompt_type in prompt_types:
168
+ base_prompt = prompt_types[prompt_type]
169
+ print(f"Using {prompt_type} prompt")
170
+ elif custom_base_prompt.strip():
171
+ base_prompt = custom_base_prompt
172
+ print("Using custom base prompt")
173
+ else:
174
+ base_prompt = default_long_prompt
175
+ print(f"Warning: Unknown or empty prompt type '{prompt_type}'. Using default long prompt.")
176
+
177
+ # Handle compression if applicable
178
+ if compress and not poster:
179
+ compression_chars = {
180
+ "soft": 600 if long_talk else 300,
181
+ "medium": 400 if long_talk else 200,
182
+ "hard": 200 if long_talk else 100,
183
+ }
184
+ char_limit = compression_chars.get(compression_level, 200)
185
+ base_prompt += f" Compress the output to be concise while retaining key visual details. MAX OUTPUT SIZE no more than {char_limit} characters."
186
+
187
+ # Construct messages for the LLM
188
+ system_message = "You are a helpful assistant. Try your best to give the best response possible to the user."
189
+
190
+ if input_text.startswith("Create a random prompt based on"):
191
+ user_message = f"Create a random description based on this\nInstructions: {base_prompt}"
192
+ else:
193
+ user_message = f"{base_prompt}\nDescription: {input_text}"
194
+
195
+ # Generate a random seed
196
+ seed = random.randint(0, 10000)
197
+ print(f"Generated seed: {seed}") # Debug print
198
+
199
+ # Select the appropriate provider
200
+ if provider == "Hugging Face":
201
+ response = self.huggingface_client.chat.completions.create(
202
+ model=model or "meta-llama/Meta-Llama-3.1-70B-Instruct",
203
+ max_tokens=1024,
204
+ temperature=1.0,
205
+ top_p=0.95,
206
+ messages=[
207
+ {"role": "system", "content": system_message},
208
+ {"role": "user", "content": user_message},
209
+ ],
210
+ seed=seed # Pass the seed parameter
211
+ )
212
+ output = response.choices[0].message.content.strip()
213
+
214
+ elif provider == "Groq":
215
+ response = self.groq_client.chat.completions.create(
216
+ model=model or "llama-3.1-70b-versatile",
217
+ max_tokens=1024,
218
+ temperature=1.0,
219
+ messages=[
220
+ {"role": "system", "content": system_message},
221
+ {"role": "user", "content": user_message},
222
+ ],
223
+ seed=seed # Pass the seed parameter
224
+ )
225
+ output = response.choices[0].message.content.strip()
226
+
227
+ elif provider == "SambaNova":
228
+ response = self.sambanova_client.chat.completions.create(
229
+ model=model or "Meta-Llama-3.1-70B-Instruct",
230
+ max_tokens=1024,
231
+ temperature=1.0,
232
+ messages=[
233
+ {"role": "system", "content": system_message},
234
+ {"role": "user", "content": user_message},
235
+ ],
236
+ seed=seed # Pass the seed parameter
237
+ )
238
+ output = response.choices[0].message.content.strip()
239
+
240
+ else:
241
+ raise ValueError(f"Unsupported provider: {provider}")
242
+
243
+ # Clean up the output if necessary
244
+ if ": " in output:
245
+ output = output.split(": ", 1)[1].strip()
246
+ elif output.lower().startswith("here"):
247
+ sentences = output.split(". ")
248
+ if len(sentences) > 1:
249
+ output = ". ".join(sentences[1:]).strip()
250
+
251
+ return output
252
+
253
+ except Exception as e:
254
+ print(f"An error occurred: {e}")
255
+ return f"Error occurred while processing the request: {str(e)}"
256
+
257
+ def chat(self,provider="Hugging Face",model=None,input_text=None):
258
+ seed = random.randint(0, 10000)
259
+ if input_text != "":
260
+ # Select the appropriate provider
261
+ if provider == "Hugging Face":
262
+ response = self.huggingface_client.chat.completions.create(
263
+ model=model or "meta-llama/Meta-Llama-3.1-70B-Instruct",
264
+ max_tokens=1024,
265
+ temperature=1.0,
266
+ top_p=0.95,
267
+ messages=input_text,
268
+ seed=seed # Pass the seed parameter
269
+ )
270
+ output = response.choices[0].message.content.strip()
271
+
272
+ elif provider == "Groq":
273
+ response = self.groq_client.chat.completions.create(
274
+ model=model or "llama-3.1-70b-versatile",
275
+ max_tokens=1024,
276
+ temperature=1.0,
277
+ messages=[
278
+ {"role": "system", "content": "You are a helpful assistant"},
279
+ {"role": "user", "content": input_text},
280
+ ],
281
+ seed=seed # Pass the seed parameter
282
+ )
283
+ output = response.choices[0].message.content.strip()
284
+ elif provider == "SambaNova":
285
+ response = self.sambanova_client.chat.completions.create(
286
+ model=model or "Meta-Llama-3.1-70B-Instruct",
287
+ max_tokens=1024,
288
+ temperature=1.0,
289
+ messages=[
290
+ {"role": "system", "content": "You are a helpful assistant"},
291
+ {"role": "user", "content": input_text},
292
+ ],
293
+ seed=seed # Pass the seed parameter
294
+ )
295
+ output = response.choices[0].message.content.strip()
296
+ else:
297
+ raise ValueError(f"Unsupported provider: {provider}")
298
+ # Clean up the output if necessary
299
+ if ": " in output:
300
+ output = output.split(": ", 1)[1].strip()
301
+ elif output.lower().startswith("here"):
302
+ sentences = output.split(". ")
303
+ if len(sentences) > 1:
304
+ output = ". ".join(sentences[1:]).strip()
305
+ return output
app.py CHANGED
@@ -5,29 +5,36 @@ from io import BytesIO
5
  import numpy as np
6
  from dataclasses import dataclass, field
7
  import time
8
- import traceback
9
  from pydub import AudioSegment
10
  import librosa
11
  from utils.vad import get_speech_timestamps, collect_chunks, VadOptions
12
- from huggingface_hub import InferenceClient
13
  import os
14
  from PIL import Image
15
- from threading import Thread
16
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 
 
17
  r = sr.Recognizer()
18
 
19
  @dataclass
20
  class AppState:
21
  stream: np.ndarray | None = None
22
- image: dict = field(default_factory=dict)
23
  sampling_rate: int = 0
24
  pause_detected: bool = False
25
  started_talking: bool = False
26
  stopped: bool = False
27
- message: dict = field(default_factory=dict)
28
  history: list = field(default_factory=list)
29
- conversation: list = field(default_factory=list)
30
- textout: str = ""
 
 
 
 
 
 
 
 
31
 
32
  def run_vad(ori_audio, sr):
33
  _st = time.time()
@@ -35,24 +42,17 @@ def run_vad(ori_audio, sr):
35
  audio = ori_audio
36
  audio = audio.astype(np.float32) / 32768.0
37
  sampling_rate = 16000
38
- if sr != sampling_rate:
39
- audio = librosa.resample(audio, orig_sr=sr, target_sr=sampling_rate)
40
  vad_parameters = {}
41
  vad_parameters = VadOptions(**vad_parameters)
42
  speech_chunks = get_speech_timestamps(audio, vad_parameters)
43
  audio = collect_chunks(audio, speech_chunks)
44
- duration_after_vad = audio.shape[0] / sampling_rate
45
- if sr != sampling_rate:
46
- # resample to original sampling rate
47
- vad_audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=sr)
48
- else:
49
- vad_audio = audio
50
  vad_audio = np.round(vad_audio * 32768.0).astype(np.int16)
51
  vad_audio_bytes = vad_audio.tobytes()
52
  return duration_after_vad, vad_audio_bytes, round(time.time() - _st, 4)
53
  except Exception as e:
54
- msg = f"[asr vad error] audio_len: {len(ori_audio)/(sr*2):.3f} s, trace: {traceback.format_exc()}"
55
- print(msg)
56
  return -1, ori_audio, round(time.time() - _st, 4)
57
 
58
  def determine_pause(audio:np.ndarray,sampling_rate:int,state:AppState) -> bool:
@@ -65,157 +65,300 @@ def determine_pause(audio:np.ndarray,sampling_rate:int,state:AppState) -> bool:
65
  state.started_talking = True
66
  return False
67
  print(f"duration_after_vad: {dur_vad:.3f} s, time_vad: {time_vad:.3f} s")
68
- return (duration - dur_vad) > 1
69
 
70
- def process_audio(audio:tuple, image: Image, state:AppState):
71
- if audio is None:
72
- print("Lỗi: audio là None. Kiểm tra nguồn âm thanh.")
73
- # Xử lỗi, ví dụ: thoát chương trình hoặc sử dụng giá trị mặc định cho audio
74
- else:
75
- try:
76
- if state.stream is None:
77
- state.stream = audio[1]
78
- state.sampling_rate = audio[0]
79
- else:
80
- state.stream = np.concatenate((state.stream, audio[1]))
81
- except IndexError:
82
- print("Lỗi: Chỉ mục vượt quá giới hạn của audio. Kiểm tra kích thước của audio.")
83
- if image is None:
84
- state.image = {"file":""}
85
- else:
86
- state.image = {"file":str(image)}
87
- pause_detected = determine_pause(state.stream, state.sampling_rate, state)
88
- state.pause_detected = pause_detected
89
- if state.pause_detected and state.started_talking:
90
- return gr.Audio(recording=False), state
91
- return None, state
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
 
93
- def response(state:AppState = AppState()):
94
- max_new_tokens = 1024
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  if not state.pause_detected and not state.started_talking:
96
- return None, AppState()
97
- audio_buffer = BytesIO()
98
- segment = AudioSegment(
99
  state.stream.tobytes(),
100
  frame_rate=state.sampling_rate,
101
  sample_width=state.stream.dtype.itemsize,
102
- channels=(1 if len(state.stream.shape) == 1 else state.stream.shape[1]),
103
  )
104
- segment.export(audio_buffer, format="wav")
105
- textin = ""
106
- with sr.AudioFile(audio_buffer) as source:
107
- audio_data=r.record(source)
108
- try:
109
- textin=r.recognize_google(audio_data,language='vi')
110
- except:
111
- textin = ""
112
- #state.conversation.append({"role": "user", "content": "Bạn: " + textin})
113
- textout = ""
114
  if textin != "":
115
- print("Đang nghĩ...")
116
- state.message = {}
117
- state.message={"text": textin,"files": state.image["file"]}
118
-
119
- # phần phiên dịch
120
- txt = state.message["text"]
121
- messages= []
122
- images = []
123
- for i, msg in enumerate(state.history):
124
- if isinstance(msg[0], tuple):
125
- messages.append({"role": "user", "content": [{"type": "text", "text": state.history[i][0]}, {"type": "image"}]})
126
- messages.append({"role": "assistant", "content": [{"type": "text", "text": state.history[i][1]}]})
127
- images.append(Image.open(msg[0][0]).convert("RGB"))
128
- elif isinstance(state.history[i], tuple) and isinstance(msg[0], str):
129
- # messages are already handled
130
- pass
131
- elif isinstance(state.history[i][0], str) and isinstance(msg[0], str): # text only turn
132
- messages.append({"role": "user", "content": [{"type": "text", "text": msg[0]}]})
133
- messages.append({"role": "assistant", "content": [{"type": "text", "text": msg[1]}]})
134
-
135
- # add current message
136
- if state.message["files"] != "": # examples
137
- image = Image.open(state.message["files"]).convert("RGB")
138
- images.append(image)
139
- messages.append({"role": "user", "content": [{"type": "text", "text": txt}, {"type": "image"}]})
140
- else: # regular input
141
- messages.append({"role": "user", "content": [{"type": "text", "text": txt}]})
142
- token = ""
143
- try:
144
- for message in client.chat_completion(
145
- messages,
146
- max_tokens=max_new_tokens,
147
- stream=True,
148
- temperature=1.0,
149
- top_p=0.9,
150
- ):
151
- token += message.choices[0].delta.content
152
- textout=token
153
- except:
154
- print("Chưa lấy được thông tin dịch")
155
- if state.message["files"] != "":
156
- state.history.append([(txt,state.image["file"]),textout])
157
- state.conversation.append({"role":"user","content":"Bạn: " + str(txt) + str(state.image["file"])})
158
- state.conversation.append({"role":"assistant", "content": "Bot: " + str(textout)})
159
  else:
160
- state.history.append([txt,textout])
161
- state.conversation.append({"role": "user", "content":"Bạn: " + str(txt)})
162
- state.conversation.append({"role": "assistant", "content":"Bot: " + str(textout)})
 
163
  else:
164
- textout = "Tôi không nghe rõ"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
 
 
 
 
 
 
166
 
167
- #phần đọc chữ đã dịch
168
- ssr = state.stream.tobytes()
169
- print("Đang đọc...")
170
- try:
171
- mp3 = gTTS(textout,tld='com.vn',lang='vi',slow=False)
172
- mp3_fp = BytesIO()
173
- mp3.write_to_fp(mp3_fp)
174
- srr=mp3_fp.getvalue()
175
- except:
176
- print("Lỗi không đọc được")
177
- finally:
178
- mp3_fp.close()
179
- yield srr, AppState(conversation=state.conversation, history=state.history)
180
 
181
- def start_recording_user(state:AppState): # Sửa lỗi tại đây
182
- if not state.stopped:
183
- return gr.Audio(recording=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
184
 
185
- title = "vietnamese by tuphamkts"
186
- description = "A vietnamese text-to-speech demo."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
 
188
- with gr.Blocks() as demo:
 
 
 
 
189
  with gr.Row():
190
  with gr.Column():
 
 
 
 
 
 
 
 
 
 
 
191
  input_audio = gr.Audio(label="Nói cho tôi nghe nào", sources="microphone", type="numpy")
192
- input_image = gr.Image(label="Hình ảnh của bạn", sources="upload", type="filepath")
193
- with gr.Column():
194
- chatbot = gr.Chatbot(label="Nội dung trò chuyện", type="messages")
195
- output_audio = gr.Audio(label="Trợ lý", autoplay=True)
196
- with gr.Row():
197
- output_image = gr.Image(label="Hình ảnh sau xử lý", sources="clipboard", type="filepath",visible=False)
198
- state = gr.State(value=AppState())
 
 
 
 
 
 
 
 
 
199
  stream = input_audio.stream(
200
  process_audio,
201
- [input_audio, input_image, state],
202
- [input_audio, state],
203
- stream_every=0.50,
204
  time_limit=30,
205
  )
 
206
  respond = input_audio.stop_recording(
207
- response,
208
- [state],
209
- [output_audio, state],
210
  )
211
  respond.then(lambda s: s.conversation, [state], [chatbot])
212
- #respond.then(lambda s: s.image, [state], [output_image])
 
213
  restart = output_audio.stop(
214
- start_recording_user,
215
  [state],
216
- [input_audio],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
217
  )
218
- cancel = gr.Button("Stop Conversation", variant="stop")
219
- cancel.click(lambda: (AppState(stopped=True), gr.Audio(recording=False)), None,
220
- [state, input_audio], cancels=[respond, restart])
221
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  import numpy as np
6
  from dataclasses import dataclass, field
7
  import time
 
8
  from pydub import AudioSegment
9
  import librosa
10
  from utils.vad import get_speech_timestamps, collect_chunks, VadOptions
11
+ from huggingface_hub import login, hf_hub_download
12
  import os
13
  from PIL import Image
14
+ from ClassPrompt import PromptClass
15
+ import render
16
+
17
+ creator_prompt = PromptClass()
18
  r = sr.Recognizer()
19
 
20
  @dataclass
21
  class AppState:
22
  stream: np.ndarray | None = None
 
23
  sampling_rate: int = 0
24
  pause_detected: bool = False
25
  started_talking: bool = False
26
  stopped: bool = False
 
27
  history: list = field(default_factory=list)
28
+ typing: bool = False
29
+ painting:bool = False
30
+ image_out:Image.Image = None
31
+ image_in:Image = None
32
+ conversation:list = field(default_factory=list)
33
+ recording: bool = False # Thêm thuộc tính recording
34
+ pause_threshold: float = 1 # Thêm thuộc tính pause_threshold
35
+ strength: float = 1.0
36
+ ckpt:list = field(default_factory=list)
37
+ guidance: float = 8
38
 
39
  def run_vad(ori_audio, sr):
40
  _st = time.time()
 
42
  audio = ori_audio
43
  audio = audio.astype(np.float32) / 32768.0
44
  sampling_rate = 16000
45
+ audio = librosa.resample(audio, orig_sr=sr, target_sr=sampling_rate)
 
46
  vad_parameters = {}
47
  vad_parameters = VadOptions(**vad_parameters)
48
  speech_chunks = get_speech_timestamps(audio, vad_parameters)
49
  audio = collect_chunks(audio, speech_chunks)
50
+ duration_after_vad = audio.shape[0] / sampling_rate # Khai báo và tính toán duration_after_vad
51
+ vad_audio = audio
 
 
 
 
52
  vad_audio = np.round(vad_audio * 32768.0).astype(np.int16)
53
  vad_audio_bytes = vad_audio.tobytes()
54
  return duration_after_vad, vad_audio_bytes, round(time.time() - _st, 4)
55
  except Exception as e:
 
 
56
  return -1, ori_audio, round(time.time() - _st, 4)
57
 
58
  def determine_pause(audio:np.ndarray,sampling_rate:int,state:AppState) -> bool:
 
65
  state.started_talking = True
66
  return False
67
  print(f"duration_after_vad: {dur_vad:.3f} s, time_vad: {time_vad:.3f} s")
68
+ return (duration - dur_vad) > state.pause_threshold # Sử dụng state.pause_threshold
69
 
70
+ def process_audio(audio:tuple,state:AppState,image:Image, streng:float,ckpt,guidance):
71
+ if state.recording: # Kiểm tra state.stream:
72
+ time.sleep(0.1)
73
+ if state.stream is None:
74
+ state.stream = audio[1]
75
+ state.sampling_rate = audio[0]
76
+ else:
77
+ state.stream = np.concatenate((state.stream, audio[1]))
78
+ state.image_in=image
79
+ state.strength=streng
80
+ state.ckpt=ckpt
81
+ state.guidance=guidance
82
+ pause_detected = determine_pause(state.stream, state.sampling_rate, state)
83
+ state.pause_detected = pause_detected
84
+ if state.pause_detected and state.started_talking:
85
+ state.recording = False
86
+ return state, gr.Audio(recording=False)
87
+ return state, None
88
+
89
+ def transcribe_audio(audio_segment):
90
+ audio_buffer = BytesIO()
91
+ audio_segment.export(audio_buffer, format="wav")
92
+ audio_buffer.seek(0)
93
+ try:
94
+ with sr.AudioFile(audio_buffer) as source:
95
+ r.adjust_for_ambient_noise(source)
96
+ text = r.recognize_google(r.record(source), language='vi')
97
+ return text
98
+ except sr.UnknownValueError:
99
+ print("Could not understand audio.")
100
+ except sr.RequestError as e:
101
+ print(f"Could not request results from Google Speech Recognition service; {e}")
102
+ return ""
103
+
104
+ def chat_with_onlinemodel(user_input, state:AppState):
105
+ state.history.append({"role": "user", "content": user_input})
106
+ response = creator_prompt.chat(provider="SambaNova", model="Meta-Llama-3.1-405B-Instruct", input_text=state.history)
107
+ bot_response = response
108
+ characters = bot_response.replace("*","")
109
+ state.history.append({"role": "assistant", "content": characters})
110
+ state.conversation.append({"role": "user", "content":"Bạn: " + user_input})
111
+ state.conversation.append({"role": "assistant", "content":"Bot: " + characters})
112
+ return characters, state
113
 
114
+ def synthesize_speech(text):
115
+ """Chuyển đổi text sang giọng nói bằng gTTS."""
116
+ try:
117
+ mp3 = gTTS(text, tld='com.vn', lang='vi', slow=False)
118
+ mp3_fp = BytesIO()
119
+ mp3.write_to_fp(mp3_fp)
120
+ audio_bytes = mp3_fp.getvalue()
121
+ mp3_fp.close()
122
+ return audio_bytes # Chỉ trả về audio_bytes
123
+ except Exception as e:
124
+ print(f"Lỗi tổng hợp giọng nói: {e}")
125
+ return None
126
+
127
+ def response_audio(state:AppState):
128
+ """Xử lý yêu cầu và tạo phản hồi."""
129
  if not state.pause_detected and not state.started_talking:
130
+ return state, None
131
+ textin=""
132
+ audio_segment = AudioSegment(
133
  state.stream.tobytes(),
134
  frame_rate=state.sampling_rate,
135
  sample_width=state.stream.dtype.itemsize,
136
+ channels=1 if state.stream.ndim == 1 else state.stream.shape[1]
137
  )
138
+ textin = transcribe_audio(audio_segment)
139
+ state.stream = None
140
+ if state.typing is False:
141
+ txt,state = chuyen_trangthai(textin, state)
142
+ if txt == True:
143
+ return state, synthesize_speech("chuyển sang trạng thái dùng bàn phím")
 
 
 
 
144
  if textin != "":
145
+ paint=state.painting
146
+ state.painting = text_check(textin, state.painting)
147
+ if paint != state.painting:
148
+ return state, synthesize_speech("Đã chuyển sang chế độ " + ("vẽ" if state.painting else "nói chuyện"))
149
+ if state.painting is True:
150
+ promptx = prompt_hugingface(textin,"Hugging Face","Qwen/Qwen2.5-72B-Instruct","Medium")
151
+ img=resize(state.image_in)
152
+ state.image_out = render.generate_images(textin, img)
153
+ audio_bytes = synthesize_speech("Bạn thấy tôi vẽ "+textin+" có đẹp không")
154
+ return state, audio_bytes
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
155
  else:
156
+ print("Đang nghĩ...")
157
+ text_out, state = chat_with_onlinemodel(textin,state)
158
+ audio_bytes = synthesize_speech(text_out)
159
+ return state, audio_bytes
160
  else:
161
+ return state, synthesize_speech("Tôi nghe không rõ") # Trả về thông báo lỗi nếu synthesize_speech thất bại
162
+
163
+ def response_text(state:AppState,textin,image:Image,streng:float,ckpt, prompt,guidance,progress=gr.Progress(track_tqdm=True)):
164
+ """Xử lý yêu cầu và tạo phản hồi."""
165
+ #state.recording = False # Dừng ghi âm
166
+ if state.typing is True:
167
+ txt,state = chuyen_trangthai(textin, state)
168
+ if txt == False:
169
+ return state, synthesize_speech("chuyển sang trạng thái nói")
170
+ if textin != "":
171
+ paint=state.painting
172
+ state.painting = text_check(textin, state.painting)
173
+ if paint != state.painting:
174
+ return state, synthesize_speech("Đã chuyển sang chế độ " + ("vẽ" if state.painting else "nói chuyện"))
175
+ if state.painting is True:
176
+ state.conversation.append({"role": "user", "content":"Bạn: " + textin})
177
+ #state.image_out = generate_image(textin, image, streng, ckpt,guidance)
178
+ img=resize(image)
179
+ image_out = render.generate_images(textin, img)
180
+ state.image_out = image_out
181
+ audio_bytes = synthesize_speech("Bạn thấy tôi vẽ "+prompt+" có đẹp không")
182
+ return state, audio_bytes
183
+ else:
184
+ print("Đang nghĩ...")
185
+ text_out, state = chat_with_onlinemodel(textin,state=state)
186
+ audio_bytes = synthesize_speech(text_out)
187
+ return state, audio_bytes
188
+ else:
189
+ return state, synthesize_speech("Hãy gõ nội dung") # Trả về thông báo lỗi nếu synthesize_speech thất bại
190
+
191
+ def text_check(textin, painting):
192
+ if not painting:
193
+ return "sang chế độ vẽ" in textin
194
+ return "sang chế độ nói" not in textin
195
+
196
+ def chuyen_trangthai(textin, state:AppState):
197
+ if "muốn nói chuyện" in textin:
198
+ state.started_talking = False
199
+ state.recording = True
200
+ state.stopped=False
201
+ state.typing = False
202
+ return False, state
203
+ elif "dùng bàn phím" in textin:
204
+ state.started_talking = False
205
+ state.recording = False
206
+ state.stopped=True
207
+ state.typing = True
208
+ return True, state
209
+ else:
210
+ return state.typing, state
211
 
212
+ def start_recording_user(state:AppState,progress=gr.Progress(track_tqdm=True)): # Sửa lỗi tại đây
213
+ state.stopped = False # Cho phép bắt đầu ghi âm lại nếu đang ở trạng thái recording
214
+ state.started_talking = False
215
+ state.recording = True
216
+ return gr.Audio(recording=True), state
217
 
218
+ def restart_recording(state:AppState): # Sửa lỗi tại đây
219
+ if not state.stopped: # Cho phép bắt đầu ghi âm lại nếu đang ở trạng thái recording
220
+ state.started_talking = False
221
+ state.recording = True
222
+ return gr.Audio(recording=True), state
223
+ else:
224
+ state.started_talking = False
225
+ state.recording = False
226
+ return gr.Audio(recording=False), state
 
 
 
 
227
 
228
+ def prompt_hugingface(prompt,llm_provider,model,type):
229
+ result = creator_prompt.generate(
230
+ input_text=prompt,
231
+ long_talk=True,
232
+ compress=True,
233
+ compression_level="hard",
234
+ poster=False,
235
+ prompt_type=type, # Use the updated prompt_type here
236
+ custom_base_prompt="",
237
+ provider=llm_provider,
238
+ model=model
239
+ )
240
+ output = result
241
+ return output
242
+
243
+ def resize(img:Image.Image):
244
+ height = (img.height // 8) * 8
245
+ width = (img.width // 8) * 8
246
+ imgre = img.resize((width,height))
247
+ return imgre
248
+
249
+ loaded = ""
250
+ steps = 50
251
 
252
+ def update_model_choices(provider):
253
+ provider_models = {
254
+ "Hugging Face": [
255
+ "Qwen/Qwen2.5-72B-Instruct",
256
+ "meta-llama/Meta-Llama-3.1-70B-Instruct",
257
+ "mistralai/Mixtral-8x7B-Instruct-v0.1",
258
+ "mistralai/Mistral-7B-Instruct-v0.3"
259
+ ],
260
+ "SambaNova": [
261
+ "Meta-Llama-3.1-70B-Instruct",
262
+ "Meta-Llama-3.1-405B-Instruct",
263
+ "Meta-Llama-3.1-8B-Instruct"
264
+ ],
265
+ }
266
+ models = provider_models.get(provider, [])
267
+ return gr.Dropdown(choices=models, value=models[0] if models else "")
268
 
269
+ title = "Chat tiếng việt by tuphamkts"
270
+ description = "Muốn vẽ nói: Chuyển sang chế độ vẽ. Muốn chat nói: Chuyển sang chế độ nói. Chế độ gõ: Tôi muốn dùng bàn phím, chế độ nói: Tôi muốn nói chuyện. Ghi chú: Chỉ dừng chương trình khi tôi đang nói (lịch sử chat sẽ bị xóa khi dừng chương trình)."
271
+ examples = ["Chuyển sang chế độ vẽ","Chuyển sang chế độ nói"]
272
+ with gr.Blocks(title=title) as demo:
273
+ gr.HTML(f"<div style='text-align: center;'><h1>{title}</h1><p>{description}</p></div>")
274
  with gr.Row():
275
  with gr.Column():
276
+ with gr.Column(visible=True) as prompt_visible:
277
+ with gr.Row():
278
+ llm_provider = gr.Dropdown(choices=["Hugging Face", "SambaNova"], label="Nguồn model", value="Hugging Face")
279
+ model = gr.Dropdown(label="Chọn Model", choices=["Qwen/Qwen2.5-72B-Instruct","meta-llama/Meta-Llama-3.1-70B-Instruct","mistralai/Mixtral-8x7B-Instruct-v0.1","mistralai/Mistral-7B-Instruct-v0.3"], value="Qwen/Qwen2.5-72B-Instruct")
280
+ prompt_types = ["Long", "Short", "Medium", "OnlyObjects", "NoFigure", "Landscape", "Fantasy"]
281
+ prompt_type = gr.Dropdown(choices=prompt_types, label="Phong cách", value="Medium", interactive=True)
282
+ input_prompt = gr.Textbox(label="Nhập nội dung muốn vẽ",value="Một cô gái", type="text"),
283
+ generate_prompt = gr.Button("Tạo Prompt", variant="stop")
284
+ with gr.Column(visible=True) as typing_visible:
285
+ input_text = gr.Textbox(label="Nhập nội dung trao đổi", type="text"),
286
+ submit = gr.Button("Áp dụng", variant="stop")
287
  input_audio = gr.Audio(label="Nói cho tôi nghe nào", sources="microphone", type="numpy")
288
+ output_audio = gr.Audio(label="Trợ ", autoplay=True, sources=None,type="numpy")
289
+ input_image = gr.Image(label="Hình ảnh của bạn", sources=["upload","clipboard","webcam"], type="pil",visible=True)
290
+ with gr.Column(visible=False) as image_visible:
291
+ ckpt = gr.Dropdown(label='Chọn mô hình',choices=['Chất lượng cao -> Tốc độ chậm', 'Chất lượng vừa -> Tốc độ vừa', 'Chất lượng kém -> Tốc độ nhanh'], value='Chất lượng kém -> Tốc độ nhanh', interactive=True, visible=True)
292
+ output_image = gr.Image(label="Hình ảnh sau xử lý", sources=None, type="pil",visible=True)
293
+ streng = gr.Slider(minimum=0.1, maximum=1, value=.8, step=0.05, label='Strength Lora')
294
+ guidance = gr.Slider(minimum=0.1, maximum=12, value=4, step=0.1, label='Sáng tạo')
295
+ with gr.Column(visible=True) as chatbot_visible:
296
+ chatbot = gr.Chatbot(label="Nội dung trò chuyện",type="messages")
297
+ #state = gr.State(value=AppState())
298
+ state = gr.State(value=AppState(typing=True, painting=True))
299
+ startrecord = input_audio.start_recording(
300
+ start_recording_user,
301
+ [state],
302
+ [input_audio, state],
303
+ )
304
  stream = input_audio.stream(
305
  process_audio,
306
+ [input_audio, state,input_image,streng,ckpt,guidance],
307
+ [state,input_audio],
308
+ stream_every=1,
309
  time_limit=30,
310
  )
311
+
312
  respond = input_audio.stop_recording(
313
+ fn=response_audio,
314
+ inputs=[state],
315
+ outputs=[state, output_audio],
316
  )
317
  respond.then(lambda s: s.conversation, [state], [chatbot])
318
+ respond.then(lambda s: s.image_out, [state], [output_image])
319
+
320
  restart = output_audio.stop(
321
+ restart_recording,
322
  [state],
323
+ [input_audio, state],
324
+ )
325
+ restart.then(lambda s: gr.update(visible= not s.typing, recording = not s.typing), [state], [input_audio])
326
+ restart.then(lambda s: gr.update(visible=s.typing), [state], [typing_visible])
327
+ restart.then(lambda s: gr.update(visible=s.painting), [state], [image_visible])
328
+ restart.then(lambda s: gr.update(visible=(s.painting and s.typing) if s.painting==True else False), [state], [prompt_visible])
329
+ restart.then(lambda s: gr.update(visible= not s.painting), [state], [chatbot_visible])
330
+
331
+ cancel = gr.Button("Dừng chương trình", variant="stop", interactive=False)
332
+ stream.then(lambda s: gr.update(interactive= not s.stopped), [state], [cancel])
333
+ cancel.click(
334
+ lambda: (AppState(stopped=True, recording=False, started_talking = False), gr.Audio(recording=False), gr.update(interactive=False)),
335
+ None,[state, input_audio, cancel],
336
+ cancels=[respond, stream, startrecord] # Thêm startrecord và stream vào cancels
337
+ )
338
+
339
+ sub = submit.click(
340
+ fn=response_text,
341
+ inputs=[state, input_text[0], input_image, streng, ckpt, input_prompt[0],guidance],
342
+ outputs=[state, output_audio]
343
  )
344
+ sub.then(lambda s: s.conversation, [state], [chatbot])
345
+ sub.then(lambda s: s.image_out, [state], [output_image])
346
+
347
+ generator = generate_prompt.click(
348
+ fn=prompt_hugingface,
349
+ inputs=[input_prompt[0],llm_provider,model,prompt_type],
350
+ outputs=[input_text[0]]
351
+ )
352
+
353
+ llm_provider.change(
354
+ update_model_choices,
355
+ inputs=[llm_provider],
356
+ outputs=[model]
357
+ )
358
+ gr.Examples(
359
+ examples=examples,
360
+ inputs=input_text,
361
+ )
362
+
363
+ if __name__ == "__main__":
364
+ demo.launch(server_name="0.0.0.0", server_port=7860, share=False)
render.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import websocket # websocket-client
2
+ import uuid
3
+ import json
4
+ import urllib.request
5
+ import urllib.parse
6
+ import random
7
+ from PIL import Image
8
+ import io
9
+ from termcolor import colored
10
+ import base64
11
+ import io
12
+ import os
13
+
14
+ server_address = os.environ.get("URL_API")
15
+ json_data=os.environ.get("JSON_API")
16
+ client_id = str(uuid.uuid4())
17
+
18
+ def queue_prompt(prompt):
19
+ p = {"prompt": prompt, "client_id": client_id}
20
+ data = json.dumps(p, indent=4).encode('utf-8') # Prettify JSON for print
21
+ req = urllib.request.Request(f"http://{server_address}/prompt", data=data)
22
+ return json.loads(urllib.request.urlopen(req).read())
23
+
24
+ def get_image(filename, subfolder, folder_type):
25
+ data = {"filename": filename, "subfolder": subfolder, "type": folder_type}
26
+ url_values = urllib.parse.urlencode(data)
27
+ with urllib.request.urlopen(f"http://{server_address}/view?{url_values}") as response:
28
+ return response.read()
29
+
30
+ def get_history(prompt_id):
31
+ print(colored(f"Fetching history for prompt ID: {prompt_id}.", "cyan"))
32
+ with urllib.request.urlopen(f"http://{server_address}/history/{prompt_id}") as response:
33
+ return json.loads(response.read())
34
+
35
+ def get_images(ws, prompt):
36
+ prompt_id = queue_prompt(prompt)['prompt_id']
37
+ output_images = {}
38
+ last_reported_percentage = 0
39
+ while True:
40
+ out = ws.recv()
41
+ if isinstance(out, str):
42
+ message = json.loads(out)
43
+ if message['type'] == 'progress':
44
+ data = message['data']
45
+ current_progress = data['value']
46
+ max_progress = data['max']
47
+ percentage = int((current_progress / max_progress) * 100)
48
+ if percentage >= last_reported_percentage + 10:
49
+ print(colored(f"Progress: {percentage}% in node {data['node']}", "yellow"))
50
+ last_reported_percentage = percentage
51
+
52
+ elif message['type'] == 'executing':
53
+ data = message['data']
54
+ if data['node'] is None and data['prompt_id'] == prompt_id:
55
+ print(colored("Execution complete.", "green"))
56
+ break # Execution is done
57
+ else:
58
+ continue # Previews are binary data
59
+
60
+ history = get_history(prompt_id)[prompt_id]
61
+ for o in history['outputs']:
62
+ for node_id in history['outputs']:
63
+ node_output = history['outputs'][node_id]
64
+ if 'images' in node_output:
65
+ images_output = []
66
+ for image in node_output['images']:
67
+ print(colored(f"Downloading image: {image['filename']} from the server.", "yellow"))
68
+ image_data = get_image(image['filename'], image['subfolder'], image['type'])
69
+ images_output.append(image_data)
70
+ output_images[node_id] = images_output
71
+ return output_images
72
+
73
+ def pil_to_base64(image):
74
+ buffer = io.BytesIO()
75
+ image.save(buffer, format="PNG")
76
+ base64_string=base64.b64encode(buffer.getvalue()).decode("utf-8")
77
+ return f"data:image/png;base64,{base64_string}"
78
+
79
+ def generate_images(positive_prompt, image):
80
+ ws = websocket.WebSocket()
81
+ ws_url = f"ws://{server_address}/ws?clientId={client_id}"
82
+ ws.connect(ws_url)
83
+ data = json.loads(json_data)
84
+ data["49"]["inputs"]["text"] = positive_prompt
85
+ data["90"]["inputs"]["images"]["base64"] = [pil_to_base64(image)]
86
+ seed = random.randint(1, 1000000000)
87
+ data["47"]["inputs"]["noise_seed"] = seed
88
+ images = get_images(ws, data)
89
+ ws.close()
90
+ for node_id in images:
91
+ for image_data in images[node_id]:
92
+ image = Image.open(io.BytesIO(image_data))
93
+ return image
94
+