Haofei Yu commited on
Commit
acb3380
1 Parent(s): a917903

Feature/support ctm (#16)

Browse files

* support inner loop ctm

* support pre-commit

* support running

* support ctm ai

.gitignore CHANGED
@@ -157,5 +157,4 @@ cython_debug/
157
  # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158
  # and can be added to the global gitignore or merged into this file. For a more nuclear
159
  # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160
- #.idea/
161
- **/ctm/*
 
157
  # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158
  # and can be added to the global gitignore or merged into this file. For a more nuclear
159
  # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160
+ #.idea/
 
app.py CHANGED
@@ -2,11 +2,8 @@ import os
2
  import sys
3
 
4
  import gradio as gr
5
- import base64
6
- import io
7
- from PIL import Image
8
-
9
- sys.path.append("../CTM/")
10
  from ctm.ctms.ctm_base import BaseConsciousnessTuringMachine
11
 
12
  ctm = BaseConsciousnessTuringMachine()
@@ -35,14 +32,12 @@ def introduction():
35
  """
36
  )
37
 
38
-
39
  def add_processor(processor_name, display_name, state):
40
- print("add processor ", processor_name)
41
  ctm.add_processor(processor_name)
42
  print(ctm.processor_group_map)
43
  print(len(ctm.processor_list))
44
- return display_name + " (added)"
45
-
46
 
47
  def processor_tab():
48
  # Categorized model names
@@ -50,14 +45,14 @@ def processor_tab():
50
  "gpt4_text_emotion_processor",
51
  "gpt4_text_summary_processor",
52
  "gpt4_speaker_intent_processor",
53
- "roberta_text_sentiment_processor",
54
  ]
55
  vision_processors = [
56
  "gpt4v_cloth_fashion_processor",
57
  "gpt4v_face_emotion_processor",
58
  "gpt4v_ocr_processor",
59
- "gpt4v_posture_processor",
60
- "gpt4v_scene_location_processor",
61
  ]
62
 
63
  with gr.Blocks():
@@ -65,54 +60,37 @@ def processor_tab():
65
  with gr.Column(scale=1):
66
  gr.Markdown("### Text Processors")
67
  for model_name in text_processors:
68
- display_name = (
69
- model_name.replace("processor", "")
70
- .replace("_", " ")
71
- .title()
72
- )
73
 
74
  button = gr.Button(display_name)
75
- processor_name = gr.Textbox(
76
- value=model_name, visible=False
77
- )
78
- display_name = gr.Textbox(
79
- value=display_name, visible=False
80
- )
81
  button.click(
82
  fn=add_processor,
83
  inputs=[processor_name, display_name, gr.State()],
84
- outputs=[button],
85
  )
86
 
87
  with gr.Column(scale=1):
88
  gr.Markdown("### Vision Processors")
89
  for model_name in vision_processors:
90
- display_name = (
91
- model_name.replace("processor", "")
92
- .replace("_", " ")
93
- .title()
94
- )
95
 
96
  button = gr.Button(display_name)
97
- processor_name = gr.Textbox(
98
- value=model_name, visible=False
99
- )
100
- display_name = gr.Textbox(
101
- value=display_name, visible=False
102
- )
103
  button.click(
104
  fn=add_processor,
105
  inputs=[processor_name, display_name, gr.State()],
106
- outputs=[button],
107
  )
108
 
109
 
110
- def forward(query, text, image, state):
111
- state["question"] = query
112
- image = convert_base64(image)
113
- ask_processors_output_info, state = ask_processors(
114
- query, text, image, state
115
- )
116
  uptree_competition_output_info, state = uptree_competition(state)
117
  ask_supervisor_output_info, state = ask_supervisor(state)
118
 
 
2
  import sys
3
 
4
  import gradio as gr
5
+ import sys
6
+ sys.path.append('./ctm')
 
 
 
7
  from ctm.ctms.ctm_base import BaseConsciousnessTuringMachine
8
 
9
  ctm = BaseConsciousnessTuringMachine()
 
32
  """
33
  )
34
 
 
35
  def add_processor(processor_name, display_name, state):
36
+ print('add processor ', processor_name)
37
  ctm.add_processor(processor_name)
38
  print(ctm.processor_group_map)
39
  print(len(ctm.processor_list))
40
+ return display_name + ' (added)'
 
41
 
42
  def processor_tab():
43
  # Categorized model names
 
45
  "gpt4_text_emotion_processor",
46
  "gpt4_text_summary_processor",
47
  "gpt4_speaker_intent_processor",
48
+ "roberta_text_sentiment_processor"
49
  ]
50
  vision_processors = [
51
  "gpt4v_cloth_fashion_processor",
52
  "gpt4v_face_emotion_processor",
53
  "gpt4v_ocr_processor",
54
+ "gpt4v_posture",
55
+ "gpt4v_scene_location_processor"
56
  ]
57
 
58
  with gr.Blocks():
 
60
  with gr.Column(scale=1):
61
  gr.Markdown("### Text Processors")
62
  for model_name in text_processors:
63
+ display_name = model_name.replace("processor", "").replace("_", " ").title()
 
 
 
 
64
 
65
  button = gr.Button(display_name)
66
+ processor_name = gr.Textbox(value=model_name, visible=False)
67
+ display_name = gr.Textbox(value=display_name, visible=False)
 
 
 
 
68
  button.click(
69
  fn=add_processor,
70
  inputs=[processor_name, display_name, gr.State()],
71
+ outputs=[button]
72
  )
73
 
74
  with gr.Column(scale=1):
75
  gr.Markdown("### Vision Processors")
76
  for model_name in vision_processors:
77
+ display_name = model_name.replace("processor", "").replace("_", " ").title()
 
 
 
 
78
 
79
  button = gr.Button(display_name)
80
+ processor_name = gr.Textbox(value=model_name, visible=False)
81
+ display_name = gr.Textbox(value=display_name, visible=False)
 
 
 
 
82
  button.click(
83
  fn=add_processor,
84
  inputs=[processor_name, display_name, gr.State()],
85
+ outputs=[button]
86
  )
87
 
88
 
89
+
90
+
91
+ def forward(query, content, image, state):
92
+ state['question'] = query
93
+ ask_processors_output_info, state = ask_processors(query, content, image, state)
 
94
  uptree_competition_output_info, state = uptree_competition(state)
95
  ask_supervisor_output_info, state = ask_supervisor(state)
96
 
ctm DELETED
@@ -1 +0,0 @@
1
- /Users/yuhaofei/github_repo/CTM-AI/ctm/
 
 
ctm/configs/__init__.py ADDED
File without changes
ctm/configs/ctm_config_base.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+
3
+
4
+ class BaseConsciousnessTuringMachineConfig(object):
5
+ # Initialize with default values or those passed to the constructor
6
+ def __init__(
7
+ self,
8
+ ctm_name=None,
9
+ max_iter_num=3,
10
+ output_threshold=0.5,
11
+ groups_of_processors={},
12
+ supervisor="gpt4_supervisor",
13
+ **kwargs,
14
+ ):
15
+ self.ctm_name = ctm_name
16
+ self.max_iter_num = max_iter_num
17
+ self.output_threshold = output_threshold
18
+ self.groups_of_processors = groups_of_processors
19
+ self.supervisor = supervisor
20
+ # This allows for handling additional, possibly unknown configuration parameters
21
+ for key, value in kwargs.items():
22
+ setattr(self, key, value)
23
+
24
+ def to_json_string(self):
25
+ """Serializes this instance to a JSON string."""
26
+ return json.dumps(self.__dict__, indent=2) + "\n"
27
+
28
+ @classmethod
29
+ def from_json_file(cls, json_file):
30
+ """Creates an instance from a JSON file."""
31
+ with open(json_file, "r", encoding="utf-8") as reader:
32
+ text = reader.read()
33
+ return cls(**json.loads(text))
34
+
35
+ @classmethod
36
+ def from_ctm(cls, ctm_name):
37
+ """
38
+ Simulate fetching a model configuration from a ctm model repository.
39
+ This example assumes the configuration is already downloaded and saved locally.
40
+ """
41
+ # This path would be generated dynamically based on `model_name_or_path`
42
+ # For simplicity, we're directly using it as a path to a local file
43
+ config_file = f"../ctm/configs/{ctm_name}_config.json"
44
+ return cls.from_json_file(config_file)
ctm/configs/sarcasm_ctm_config.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "ctm_name": "sarcasm_ctm",
3
+ "max_iter_num": 3,
4
+ "output_threshold": 0.5,
5
+ "groups_of_processors": {
6
+ "group_1": [
7
+ "gpt4v_scene_location_processor",
8
+ "gpt4v_cloth_fashion_processor"
9
+ ],
10
+ "group_2": [
11
+ "gpt4v_posture_processor"
12
+ ],
13
+ "group_3": [
14
+ "gpt4v_ocr_processor"
15
+ ]
16
+ },
17
+ "supervisor": "gpt4_supervisor"
18
+ }
ctm/ctms/__init__.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ from .ctm_base import BaseConsciousnessTuringMachine
2
+
3
+ __all__ = [
4
+ "BaseConsciousnessTuringMachine",
5
+ ]
ctm/ctms/ctm_base.py ADDED
@@ -0,0 +1,227 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import concurrent.futures
2
+ from collections import defaultdict
3
+
4
+ from sklearn.feature_extraction.text import TfidfVectorizer
5
+ from sklearn.metrics.pairwise import cosine_similarity
6
+
7
+ from ctm.configs.ctm_config_base import (
8
+ BaseConsciousnessTuringMachineConfig,
9
+ )
10
+ from ctm.processors.processor_base import BaseProcessor
11
+ from ctm.supervisors.supervisor_base import BaseSupervisor
12
+
13
+
14
+ class BaseConsciousnessTuringMachine(object):
15
+ def __call__(self, *args, **kwargs):
16
+ return self.forward(*args, **kwargs)
17
+
18
+ def __init__(self, ctm_name=None, *args, **kwargs):
19
+ super().__init__(*args, **kwargs)
20
+ if ctm_name:
21
+ self.config = BaseConsciousnessTuringMachineConfig.from_ctm(
22
+ ctm_name
23
+ )
24
+ else:
25
+ self.config = BaseConsciousnessTuringMachineConfig()
26
+ self.processor_list = []
27
+ self.processor_group_map = defaultdict(list)
28
+ self.load_ctm()
29
+
30
+ def add_processor(self, processor_name, group_name=None):
31
+ processor_instance = BaseProcessor(processor_name)
32
+ self.processor_list.append(
33
+ {
34
+ "processor_name": processor_name,
35
+ "processor_instance": processor_instance,
36
+ }
37
+ )
38
+ if group_name:
39
+ self.processor_group_map[processor_name] = group_name
40
+
41
+ def add_supervisor(self, supervisor_name):
42
+ supervisor_instance = BaseSupervisor(supervisor_name)
43
+ self.supervisor = {
44
+ "supervisor_name": supervisor_name,
45
+ "supervisor_instance": supervisor_instance,
46
+ }
47
+
48
+ @staticmethod
49
+ def ask_processor(
50
+ processor, question, context, image_path, audio_path, video_path
51
+ ):
52
+ processor_instance = processor["processor_instance"]
53
+ processor_name = processor["processor_name"]
54
+ gist, score = processor_instance.ask(
55
+ question, context, image_path, audio_path, video_path
56
+ )
57
+ return {"name": processor_name, "gist": gist, "score": score}
58
+
59
+ def ask_processors(
60
+ self, question, context, image_path, audio_path, video_path
61
+ ):
62
+ with concurrent.futures.ThreadPoolExecutor() as executor:
63
+ futures = [
64
+ executor.submit(
65
+ self.ask_processor,
66
+ processor,
67
+ question,
68
+ context,
69
+ image_path,
70
+ audio_path,
71
+ video_path,
72
+ )
73
+ for processor in self.processor_list
74
+ ]
75
+ results = [
76
+ future.result()
77
+ for future in concurrent.futures.as_completed(futures)
78
+ ]
79
+
80
+ output = {}
81
+ for result in results:
82
+ output[result["name"]] = {
83
+ "gist": result["gist"],
84
+ "score": result["score"],
85
+ }
86
+
87
+ assert len(output) == len(self.processor_list)
88
+ return output
89
+
90
+ def uptree_competition(self, processor_output):
91
+ # Unpack processor outputs into lists for easier processing
92
+ gists, scores, names = [], [], []
93
+ for name, info in processor_output.items():
94
+ gists.append(info["gist"])
95
+ scores.append(info["score"])
96
+ names.append(name)
97
+
98
+ # Determine the unique group for each processor
99
+ unique_groups = set(self.processor_group_map.values())
100
+
101
+ # Prepare to track the best processor by group
102
+ best_processor_by_group = {
103
+ group: (None, -1) for group in unique_groups
104
+ } # (processor_name, score)
105
+
106
+ # Iterate through processors to find the best in each group
107
+ for name, score in zip(names, scores):
108
+ group = self.processor_group_map[name]
109
+ if score > best_processor_by_group[group][1]:
110
+ best_processor_by_group[group] = (name, score)
111
+
112
+ # Select the overall best across groups
113
+ best_overall = max(
114
+ best_processor_by_group.values(), key=lambda x: x[1]
115
+ )
116
+ best_name = best_overall[0]
117
+ index = names.index(best_name)
118
+
119
+ winning_info = {
120
+ "name": best_name,
121
+ "gist": gists[index],
122
+ "score": scores[index],
123
+ }
124
+ return winning_info
125
+
126
+ def ask_supervisor(self, question, processor_info):
127
+ final_answer, score = self.supervisor["supervisor_instance"].ask(
128
+ question, processor_info["gist"]
129
+ )
130
+ return final_answer, score
131
+
132
+ def downtree_broadcast(self, winning_output):
133
+ winning_processor_name = winning_output["name"]
134
+ winning_processor_gist = winning_output["gist"]
135
+ for processor in self.processor_list:
136
+ if processor["processor_name"] != winning_processor_name:
137
+ processor["processor_instance"].update_info(
138
+ winning_processor_gist
139
+ )
140
+ return
141
+
142
+ def calc_processor_sim(self, processor_output):
143
+ processor_gists = [info["gist"] for info in processor_output.values()]
144
+ tfidf_vectorizer = TfidfVectorizer()
145
+ tfidf_matrix = tfidf_vectorizer.fit_transform(processor_gists)
146
+ cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)
147
+ return cosine_sim
148
+
149
+ def link_form(self, processor_output):
150
+ sim = self.calc_processor_sim(processor_output)
151
+ print(sim)
152
+ # iterate on each sim pair
153
+ # if sim > threshold, then link the two processors by combining them into the same group
154
+ link_threshold = 0.5
155
+ for i in range(len(sim)):
156
+ for j in range(i + 1, len(sim)):
157
+ if sim[i][j] > 0.5:
158
+ processor1_name = list(processor_output.keys())[i]
159
+ processor2_name = list(processor_output.keys())[j]
160
+ # choose the group that includes more processors
161
+ # processor_group_map is a dict with processor_name as key and group_name as value
162
+ group1 = self.processor_group_map[processor1_name]
163
+ group2 = self.processor_group_map[processor2_name]
164
+ # calculate the number of processors in each group
165
+ group1_count = sum(
166
+ [
167
+ 1
168
+ for group in self.processor_group_map.values()
169
+ if group == group1
170
+ ]
171
+ )
172
+ group2_count = sum(
173
+ [
174
+ 1
175
+ for group in self.processor_group_map.values()
176
+ if group == group2
177
+ ]
178
+ )
179
+ # choose the group with more processors
180
+ group_name = (
181
+ group1 if group1_count > group2_count else group2
182
+ )
183
+ self.processor_group_map[processor1_name] = group_name
184
+ self.processor_group_map[processor2_name] = group_name
185
+ return
186
+
187
+ def processor_fuse(self, infos, scores):
188
+ return infos, scores
189
+
190
+ def forward(
191
+ self,
192
+ question=None,
193
+ context=None,
194
+ image_path=None,
195
+ audio_path=None,
196
+ video_path=None,
197
+ ):
198
+ answer_threshold = 0.5
199
+ max_iter = 3
200
+
201
+ for i in range(max_iter):
202
+ print("start the {}-th iteration".format(i + 1))
203
+ processor_output = self.ask_processors(
204
+ question=question,
205
+ context=context,
206
+ image_path=image_path,
207
+ audio_path=audio_path,
208
+ video_path=video_path,
209
+ )
210
+ import pdb; pdb.set_trace()
211
+ winning_output = self.uptree_competition(processor_output)
212
+ answer, score = self.ask_supervisor(question, winning_output)
213
+ if score > answer_threshold:
214
+ break
215
+ else:
216
+ self.downtree_broadcast(winning_output)
217
+ self.link_form(processor_output)
218
+ return answer, score
219
+
220
+ def load_ctm(self):
221
+ for (
222
+ group_name,
223
+ processor_list,
224
+ ) in self.config.groups_of_processors.items():
225
+ for processor_name in processor_list:
226
+ self.add_processor(processor_name, group_name=group_name)
227
+ self.add_supervisor(self.config.supervisor)
ctm/messengers/__init__.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .messenger_bart_text_summ import BartTextSummarizationMessenger
2
+ from .messenger_base import BaseMessenger
3
+ from .messenger_gpt4 import GPT4Messenger
4
+ from .messenger_gpt4v import GPT4VMessenger
5
+ from .messenger_roberta_text_sentiment import (
6
+ RobertaTextSentimentMessenger,
7
+ )
8
+
9
+ __all__ = [
10
+ "BaseMessenger",
11
+ "GPT4VMessenger",
12
+ "GPT4Messenger",
13
+ "BartTextSummarizationMessenger",
14
+ "RobertaTextSentimentMessenger",
15
+ ]
ctm/messengers/messenger_bart_text_summ.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, List, Union
2
+
3
+ from ctm.messengers.messenger_base import BaseMessenger
4
+
5
+
6
+ @BaseMessenger.register_messenger("bart_text_summ_messenger") # type: ignore[no-untyped-call] # FIX ME
7
+ class BartTextSummarizationMessenger(BaseMessenger):
8
+ def __init__(self, role=None, content=None, *args, **kwargs): # type: ignore[no-untyped-def] # FIX ME
9
+ self.init_messenger(role, content)
10
+
11
+ def init_messenger( # type: ignore[no-untyped-def] # FIX ME
12
+ self, role: str = None, content: Union[str, Dict, List] = None # type: ignore[assignment, type-arg] # FIX ME
13
+ ):
14
+ self.messages = ""
15
+ if content and role:
16
+ self.update_messages(role, content) # type: ignore[attr-defined] # FIX ME
17
+
18
+ def update_message(self, role: str, content: Union[str, Dict, List]): # type: ignore[no-untyped-def, type-arg] # FIX ME
19
+ self.messages += content # type: ignore[operator] # FIX ME
20
+
21
+ def check_iter_round_num(self): # type: ignore[no-untyped-def] # FIX ME
22
+ return 1 if len(self.messages) > 0 else 0
ctm/messengers/messenger_base.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, List, Union
2
+
3
+
4
+ class BaseMessenger(object):
5
+ _messenger_registry = {} # type: ignore[var-annotated] # FIX ME
6
+
7
+ @classmethod
8
+ def register_messenger(cls, messenger_name): # type: ignore[no-untyped-def] # FIX ME
9
+ def decorator(subclass): # type: ignore[no-untyped-def] # FIX ME
10
+ cls._messenger_registry[messenger_name] = subclass
11
+ return subclass
12
+
13
+ return decorator
14
+
15
+ def __new__(cls, messenger_name, *args, **kwargs): # type: ignore[no-untyped-def] # FIX ME
16
+ if messenger_name not in cls._messenger_registry:
17
+ raise ValueError(
18
+ f"No messenger registered with name '{messenger_name}'"
19
+ )
20
+ return super(BaseMessenger, cls).__new__(
21
+ cls._messenger_registry[messenger_name]
22
+ )
23
+
24
+ def __init__(self, role=None, content=None, *args, **kwargs): # type: ignore[no-untyped-def] # FIX ME
25
+ self.init_messenger(role, content)
26
+
27
+ def init_messenger( # type: ignore[no-untyped-def] # FIX ME
28
+ self, role: str = None, content: Union[str, Dict, List] = None # type: ignore[assignment, type-arg] # FIX ME
29
+ ):
30
+ pass
31
+
32
+ def update_message(self, role: str, content: Union[str, Dict, List]): # type: ignore[no-untyped-def, type-arg] # FIX ME
33
+ pass
34
+
35
+ def check_iter_round_num(self): # type: ignore[no-untyped-def] # FIX ME
36
+ pass
37
+
38
+ def add_system_message(self, message: Union[str, Dict, List]): # type: ignore[no-untyped-def, type-arg] # FIX ME
39
+ self.update_message("system", message)
40
+
41
+ def add_assistant_message(self, message: Union[str, Dict, List]): # type: ignore[no-untyped-def, type-arg] # FIX ME
42
+ self.update_message("assistant", message)
43
+
44
+ def add_user_message(self, message: Union[str, Dict, List]): # type: ignore[no-untyped-def, type-arg] # FIX ME
45
+ self.update_message("user", message)
46
+
47
+ def add_user_image(self, image_base64: str): # type: ignore[no-untyped-def] # FIX ME
48
+ self.add_message( # type: ignore[attr-defined] # FIX ME
49
+ "user",
50
+ {
51
+ "type": "image_url",
52
+ "image_url": f"data:image/jpeg;base64,{image_base64}",
53
+ },
54
+ )
55
+
56
+ def add_feedback(self, feedback: Union[str, Dict, List]): # type: ignore[no-untyped-def, type-arg] # FIX ME
57
+ self.add_message("system", feedback) # type: ignore[attr-defined] # FIX ME
58
+
59
+ def clear(self): # type: ignore[no-untyped-def] # FIX ME
60
+ self.messages.clear() # type: ignore[attr-defined] # FIX ME
61
+
62
+ def get_messages(self): # type: ignore[no-untyped-def] # FIX ME
63
+ return self.messages # type: ignore[attr-defined] # FIX ME
ctm/messengers/messenger_gpt4.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, List, Union
2
+
3
+ from ctm.messengers.messenger_base import BaseMessenger
4
+
5
+
6
+ @BaseMessenger.register_messenger("gpt4_messenger") # type: ignore[no-untyped-call] # FIX ME
7
+ class GPT4Messenger(BaseMessenger):
8
+ def __init__(self, role=None, content=None, *args, **kwargs): # type: ignore[no-untyped-def] # FIX ME
9
+ self.init_messenger(role, content)
10
+
11
+ def init_messenger( # type: ignore[no-untyped-def] # FIX ME
12
+ self, role: str = None, content: Union[str, Dict, List] = None # type: ignore[assignment, type-arg] # FIX ME
13
+ ):
14
+ self.messages = [] # type: ignore[var-annotated] # FIX ME
15
+ if content and role:
16
+ self.update_messages(role, content) # type: ignore[attr-defined] # FIX ME
17
+
18
+ def update_message(self, role: str, content: Union[str, Dict, List]): # type: ignore[no-untyped-def, type-arg] # FIX ME
19
+ self.messages.append({"role": role, "content": content})
20
+
21
+ def check_iter_round_num(self): # type: ignore[no-untyped-def] # FIX ME
22
+ return len(self.messages)
ctm/messengers/messenger_gpt4v.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, List, Union
2
+
3
+ from ctm.messengers.messenger_base import BaseMessenger
4
+
5
+
6
+ @BaseMessenger.register_messenger("gpt4v_messenger") # type: ignore[no-untyped-call] # FIX ME
7
+ class GPT4VMessenger(BaseMessenger):
8
+ def __init__(self, role=None, content=None, *args, **kwargs): # type: ignore[no-untyped-def] # FIX ME
9
+ self.init_messenger(role, content)
10
+
11
+ def init_messenger( # type: ignore[no-untyped-def] # FIX ME
12
+ self, role: str = None, content: Union[str, Dict, List] = None # type: ignore[assignment, type-arg] # FIX ME
13
+ ):
14
+ self.messages = [] # type: ignore[var-annotated] # FIX ME
15
+ if content and role:
16
+ self.update_messages(role, content) # type: ignore[attr-defined] # FIX ME
17
+
18
+ def update_message(self, role: str, content: Union[str, Dict, List]): # type: ignore[no-untyped-def, type-arg] # FIX ME
19
+ self.messages.append({"role": role, "content": content})
20
+
21
+ def check_iter_round_num(self): # type: ignore[no-untyped-def] # FIX ME
22
+ return len(self.messages)
ctm/messengers/messenger_roberta_text_sentiment.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, List, Union
2
+
3
+ from ctm.messengers.messenger_base import BaseMessenger
4
+
5
+
6
+ @BaseMessenger.register_messenger("roberta_text_sentiment_messenger") # type: ignore[no-untyped-call] # FIX ME
7
+ class RobertaTextSentimentMessenger(BaseMessenger):
8
+ def __init__(self, role=None, content=None, *args, **kwargs): # type: ignore[no-untyped-def] # FIX ME
9
+ self.init_messenger(role, content)
10
+
11
+ def init_messenger( # type: ignore[no-untyped-def] # FIX ME
12
+ self, role: str = None, content: Union[str, Dict, List] = None # type: ignore[assignment, type-arg] # FIX ME
13
+ ):
14
+ self.messages = ""
15
+ if content and role:
16
+ self.update_messages(role, content) # type: ignore[attr-defined] # FIX ME
17
+
18
+ def update_message(self, role: str, content: Union[str, Dict, List]): # type: ignore[no-untyped-def, type-arg] # FIX ME
19
+ # should replace with updated message
20
+ self.messages = content # type: ignore[assignment] # FIX ME
21
+
22
+ def check_iter_round_num(self): # type: ignore[no-untyped-def] # FIX ME
23
+ return 1 if len(self.messages) > 0 else 0
ctm/processors/__init__.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .processor_bart_text_summary import BartTextSummaryProcessor
2
+ from .processor_base import BaseProcessor
3
+ from .processor_gpt4 import GPT4Processor
4
+ from .processor_gpt4_speaker_intent import GPT4SpeakerIntentProcessor
5
+ from .processor_gpt4_text_emotion import GPT4TextEmotionProcessor
6
+ from .processor_gpt4_text_summary import GPT4TextSummaryProcessor
7
+ from .processor_gpt4v import GPT4VProcessor
8
+ from .processor_gpt4v_cloth_fashion import GPT4VClothFashionProcessor
9
+ from .processor_gpt4v_face_emotion import GPT4VFaceEmotionProcessor
10
+ from .processor_gpt4v_ocr import GPT4VOCRProcessor
11
+ from .processor_gpt4v_posture import GPT4VPostureProcessor
12
+ from .processor_gpt4v_scene_location import GPT4VSceneLocationProcessor
13
+ from .processor_roberta_text_sentiment import (
14
+ RobertaTextSentimentProcessor,
15
+ )
16
+
17
+ __all__ = [
18
+ "BaseProcessor",
19
+ "GPT4VProcessor",
20
+ "GPT4VSceneLocationProcessor",
21
+ "GPT4VOCRProcessor",
22
+ "GPT4VClothFashionProcessor",
23
+ "GPT4VFaceEmotionProcessor",
24
+ "GPT4VPostureProcessor",
25
+ "RobertaTextSentimentProcessor",
26
+ "BartTextSummaryProcessor",
27
+ "GPT4SpeakerIntentProcessor",
28
+ "GPT4TextEmotionProcessor",
29
+ "GPT4TextSummaryProcessor",
30
+ "GPT4Processor",
31
+ ]
ctm/processors/processor_bart_text_summary.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ from huggingface_hub.inference_api import (
4
+ InferenceApi, # type: ignore[import] # FIX ME
5
+ )
6
+
7
+ from ctm.messengers.messenger_base import BaseMessenger
8
+ from ctm.processors.processor_base import BaseProcessor
9
+
10
+
11
+ @BaseProcessor.register_processor("bart_text_summary_processor") # type: ignore[no-untyped-call] # FIX ME
12
+ class BartTextSummaryProcessor(BaseProcessor):
13
+ def __init__(self, *args, **kwargs): # type: ignore[no-untyped-def] # FIX ME
14
+ self.init_processor() # type: ignore[no-untyped-call] # FIX ME
15
+
16
+ def init_processor(self): # type: ignore[no-untyped-def] # FIX ME
17
+ self.model = InferenceApi(
18
+ token=os.environ["HF_TOKEN"], repo_id="facebook/bart-large-cnn"
19
+ )
20
+ self.messenger = BaseMessenger("bart_text_summ_messenger") # type: ignore[no-untyped-call] # FIX ME
21
+ return
22
+
23
+ def update_info(self, feedback: str): # type: ignore[no-untyped-def] # FIX ME
24
+ self.messenger.add_assistant_message(feedback)
25
+
26
+ def ask_info( # type: ignore[override] # FIX ME
27
+ self,
28
+ query: str,
29
+ context: str = None, # type: ignore[assignment] # FIX ME
30
+ image_path: str = None, # type: ignore[assignment] # FIX ME
31
+ audio_path: str = None, # type: ignore[assignment] # FIX ME
32
+ video_path: str = None, # type: ignore[assignment] # FIX ME
33
+ ) -> str:
34
+ if self.messenger.check_iter_round_num() == 0: # type: ignore[no-untyped-call] # FIX ME
35
+ self.messenger.add_user_message(context)
36
+
37
+ response = self.model(self.messenger.get_messages()) # type: ignore[no-untyped-call] # FIX ME
38
+ summary = response[0]["summary_text"]
39
+ return summary # type: ignore[no-any-return] # FIX ME
40
+
41
+
42
+ if __name__ == "__main__":
43
+ processor = BaseProcessor("bart_text_summ_processor") # type: ignore[no-untyped-call] # FIX ME
44
+ image_path = "../ctmai-test1.png"
45
+ text: str = (
46
+ "In a shocking turn of events, Hugging Face has released a new version of Transformers "
47
+ "that brings several enhancements and bug fixes. Users are thrilled with the improvements "
48
+ "and are finding the new version to be significantly better than the previous one. "
49
+ "The Hugging Face team is thankful for the community's support and continues to work "
50
+ "towards making the library the best it can be."
51
+ )
52
+ summary: str = processor.ask_info( # type: ignore[no-untyped-call] # FIX ME
53
+ query=None, context=text, image_path=image_path
54
+ )
55
+ print(summary)
ctm/processors/processor_base.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+
3
+ from ctm.utils.exponential_backoff import exponential_backoff
4
+
5
+
6
+ class BaseProcessor(object):
7
+ _processor_registry = {} # type: ignore[var-annotated] # FIX ME
8
+
9
+ @classmethod
10
+ def register_processor(cls, processor_name): # type: ignore[no-untyped-def] # FIX ME
11
+ def decorator(subclass): # type: ignore[no-untyped-def] # FIX ME
12
+ cls._processor_registry[processor_name] = subclass
13
+ return subclass
14
+
15
+ return decorator
16
+
17
+ def __new__(cls, processor_name, *args, **kwargs): # type: ignore[no-untyped-def] # FIX ME
18
+ if processor_name not in cls._processor_registry:
19
+ raise ValueError(
20
+ f"No processor registered with name '{processor_name}'"
21
+ )
22
+ return super(BaseProcessor, cls).__new__(
23
+ cls._processor_registry[processor_name]
24
+ )
25
+
26
+ def set_model(self): # type: ignore[no-untyped-def] # FIX ME
27
+ raise NotImplementedError(
28
+ "The 'set_model' method must be implemented in derived classes."
29
+ )
30
+
31
+ @staticmethod
32
+ def process_image(image_path): # type: ignore[no-untyped-def] # FIX ME
33
+ with open(image_path, "rb") as image_file:
34
+ return base64.b64encode(image_file.read()).decode("utf-8")
35
+
36
+ @staticmethod
37
+ def process_audio(audio_path): # type: ignore[no-untyped-def] # FIX ME
38
+ return None
39
+
40
+ @staticmethod
41
+ def process_video(video_path): # type: ignore[no-untyped-def] # FIX ME
42
+ return None
43
+
44
+ def ask(self, query, context, image_path, audio_path, video_path): # type: ignore[no-untyped-def] # FIX ME
45
+ gist = self.ask_info( # type: ignore[no-untyped-call] # FIX ME
46
+ query, context, image_path, audio_path, video_path
47
+ )
48
+ score = self.ask_score(query, gist, verbose=True) # type: ignore[no-untyped-call] # FIX ME
49
+ return gist, score
50
+
51
+ @exponential_backoff(retries=5, base_wait_time=1) # type: ignore[misc, no-untyped-call] # FIX ME
52
+ def ask_relevance(self, query: str, gist: str) -> float:
53
+ response = self.model.chat.completions.create( # type: ignore[attr-defined] # FIX ME
54
+ model="gpt-4-0125-preview",
55
+ messages=[
56
+ {
57
+ "role": "user",
58
+ "content": "How related is the information ({}) with the query ({})? Answer with a number from 0 to 5 and do not add any other thing.".format(
59
+ gist, query
60
+ ),
61
+ },
62
+ ],
63
+ max_tokens=50,
64
+ )
65
+ score = int(response.choices[0].message.content.strip()) / 5
66
+ return score
67
+
68
+ @exponential_backoff(retries=5, base_wait_time=1) # type: ignore[misc, no-untyped-call] # FIX ME
69
+ def ask_confidence(self, query: str, gist: str) -> float:
70
+ response = self.model.chat.completions.create( # type: ignore[attr-defined] # FIX ME
71
+ model="gpt-4-0125-preview",
72
+ messages=[
73
+ {
74
+ "role": "user",
75
+ "content": "How confidence do you think the information ({}) is a mustk? Answer with a number from 0 to 5 and do not add any other thing.".format( # type: ignore[str-format] # FIX ME
76
+ gist, query
77
+ ),
78
+ },
79
+ ],
80
+ max_tokens=50,
81
+ )
82
+ score = int(response.choices[0].message.content.strip()) / 5
83
+ return score
84
+
85
+ @exponential_backoff(retries=5, base_wait_time=1) # type: ignore[misc, no-untyped-call] # FIX ME
86
+ def ask_surprise(
87
+ self, query: str, gist: str, history_gists: str = None # type: ignore[assignment] # FIX ME
88
+ ) -> float:
89
+ response = self.model.chat.completions.create( # type: ignore[attr-defined] # FIX ME
90
+ model="gpt-4-0125-preview",
91
+ messages=[
92
+ {
93
+ "role": "user",
94
+ "content": "How surprise do you think the information ({}) is as an output of the processor? Answer with a number from 0 to 5 and do not add any other thing.".format( # type: ignore[str-format] # FIX ME
95
+ gist, query
96
+ ),
97
+ },
98
+ ],
99
+ max_tokens=50,
100
+ )
101
+ score = int(response.choices[0].message.content.strip()) / 5
102
+ return score
103
+
104
+ def ask_score(self, query, gist, verbose=False, *args, **kwargs): # type: ignore[no-untyped-def] # FIX ME
105
+ relevance = self.ask_relevance(query, gist, *args, **kwargs)
106
+ confidence = self.ask_confidence(query, gist, *args, **kwargs)
107
+ surprise = self.ask_surprise(query, gist, *args, **kwargs)
108
+ if verbose:
109
+ print(
110
+ f"Relevance: {relevance}, Confidence: {confidence}, Surprise: {surprise}"
111
+ )
112
+ return relevance * confidence * surprise
113
+
114
+ def ask_info(self, query, image_path, *args, **kwargs): # type: ignore[no-untyped-def] # FIX ME
115
+ raise NotImplementedError(
116
+ "The 'ask_information' method must be implemented in derived classes."
117
+ )
ctm/processors/processor_gpt4.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from openai import OpenAI
2
+
3
+ from ctm.messengers.messenger_base import BaseMessenger
4
+ from ctm.processors.processor_base import BaseProcessor
5
+ from ctm.utils.exponential_backoff import exponential_backoff
6
+
7
+
8
+ @BaseProcessor.register_processor("gpt4_processor") # type: ignore[no-untyped-call] # FIX ME
9
+ class GPT4Processor(BaseProcessor):
10
+ def __init__(self, *args, **kwargs): # type: ignore[no-untyped-def] # FIX ME
11
+ self.init_processor() # type: ignore[no-untyped-call] # FIX ME
12
+ self.task_instruction = None
13
+
14
+ def init_processor(self): # type: ignore[no-untyped-def] # FIX ME
15
+ self.model = OpenAI()
16
+ self.messenger = BaseMessenger("gpt4_messenger") # type: ignore[no-untyped-call] # FIX ME
17
+ return
18
+
19
+ def process(self, payload: dict) -> dict: # type: ignore[type-arg] # FIX ME
20
+ return # type: ignore[return-value] # FIX ME
21
+
22
+ def update_info(self, feedback: str): # type: ignore[no-untyped-def] # FIX ME
23
+ self.messenger.add_assistant_message(feedback)
24
+
25
+ @exponential_backoff(retries=5, base_wait_time=1) # type: ignore[no-untyped-call] # FIX ME
26
+ def gpt4_requst(self): # type: ignore[no-untyped-def] # FIX ME
27
+ response = self.model.chat.completions.create(
28
+ model="gpt-4-turbo-preview",
29
+ messages=self.messenger.get_messages(), # type: ignore[no-untyped-call] # FIX ME
30
+ max_tokens=300,
31
+ )
32
+ return response
33
+
34
+ def ask_info( # type: ignore[override] # FIX ME
35
+ self,
36
+ query: str,
37
+ context: str = None, # type: ignore[assignment] # FIX ME
38
+ image_path: str = None, # type: ignore[assignment] # FIX ME
39
+ audio_path: str = None, # type: ignore[assignment] # FIX ME
40
+ video_path: str = None, # type: ignore[assignment] # FIX ME
41
+ ) -> str:
42
+ if self.messenger.check_iter_round_num() == 0: # type: ignore[no-untyped-call] # FIX ME
43
+ self.messenger.add_user_message(
44
+ "The text information for the previously described task is as follows: "
45
+ + context
46
+ + "Here is what you should do: "
47
+ + self.task_instruction # type: ignore[operator] # FIX ME
48
+ )
49
+
50
+ response = self.gpt4_requst()
51
+ description = response.choices[0].message.content
52
+ return description # type: ignore[no-any-return] # FIX ME
53
+
54
+
55
+ if __name__ == "__main__":
56
+ processor = BaseProcessor("ocr_processor") # type: ignore[no-untyped-call] # FIX ME
57
+ image_path = "../ctmai-test1.png"
58
+ summary: str = processor.ask_info(query=None, image_path=image_path) # type: ignore[no-untyped-call] # FIX ME
59
+ print(summary)
ctm/processors/processor_gpt4_speaker_intent.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ctm.processors.processor_gpt4 import GPT4Processor
2
+
3
+
4
+ @GPT4Processor.register_processor("gpt4_speaker_intent_processor") # type: ignore[no-untyped-call] # FIX ME
5
+ class GPT4SpeakerIntentProcessor(GPT4Processor):
6
+ def __init__(self, *args, **kwargs): # type: ignore[no-untyped-def] # FIX ME
7
+ self.init_processor() # type: ignore[no-untyped-call] # FIX ME
8
+ self.task_instruction = "You are a speaker intent predictor. You can understand the intent of the speaker and describe what is the speaker's intent for saying that. If there is no speaker detected, please answer with None."
9
+
10
+
11
+ if __name__ == "__main__":
12
+ processor = GPT4Processor("close_fashion_processor") # type: ignore[no-untyped-call] # FIX ME
13
+ image_path = "../ctmai-test1.png"
14
+ summary: str = processor.ask_info(query=None, image_path=image_path) # type: ignore[arg-type] # FIX ME
15
+ print(summary)
ctm/processors/processor_gpt4_text_emotion.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ctm.processors.processor_gpt4 import GPT4Processor
2
+
3
+
4
+ @GPT4Processor.register_processor("gpt4_text_emotion_processor") # type: ignore[no-untyped-call] # FIX ME
5
+ class GPT4TextEmotionProcessor(GPT4Processor):
6
+ def __init__(self, *args, **kwargs): # type: ignore[no-untyped-def] # FIX ME
7
+ self.init_processor() # type: ignore[no-untyped-call] # FIX ME
8
+ self.task_instruction = "You are a text emotion classifier. You can understand the emotion within the text and generate the emotion label. If there is no text detected, please answer with None."
9
+
10
+
11
+ if __name__ == "__main__":
12
+ processor = GPT4Processor("close_fashion_processor") # type: ignore[no-untyped-call] # FIX ME
13
+ image_path = "../ctmai-test1.png"
14
+ summary: str = processor.ask_info(query=None, image_path=image_path) # type: ignore[arg-type] # FIX ME
15
+ print(summary)
ctm/processors/processor_gpt4_text_summary.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ctm.processors.processor_gpt4 import GPT4Processor
2
+
3
+
4
+ @GPT4Processor.register_processor("gpt4_text_summary_processor") # type: ignore[no-untyped-call] # FIX ME
5
+ class GPT4TextSummaryProcessor(GPT4Processor):
6
+ def __init__(self, *args, **kwargs): # type: ignore[no-untyped-def] # FIX ME
7
+ self.init_processor() # type: ignore[no-untyped-call] # FIX ME
8
+ self.task_instruction = "You are a text summarizer. You can understand the meaning of the text and generate the summary."
9
+
10
+
11
+ if __name__ == "__main__":
12
+ processor = GPT4Processor("close_fashion_processor") # type: ignore[no-untyped-call] # FIX ME
13
+ image_path = "../ctmai-test1.png"
14
+ summary: str = processor.ask_info(query=None, image_path=image_path) # type: ignore[arg-type] # FIX ME
15
+ print(summary)
ctm/processors/processor_gpt4v.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from openai import OpenAI
2
+
3
+ from ctm.messengers.messenger_base import BaseMessenger
4
+ from ctm.processors.processor_base import BaseProcessor
5
+ from ctm.utils.exponential_backoff import exponential_backoff
6
+
7
+
8
+ @BaseProcessor.register_processor("gpt4v_processor") # type: ignore[no-untyped-call] # FIX ME
9
+ class GPT4VProcessor(BaseProcessor):
10
+ def __init__(self, *args, **kwargs): # type: ignore[no-untyped-def] # FIX ME
11
+ self.init_processor() # type: ignore[no-untyped-call] # FIX ME
12
+ self.task_instruction = None
13
+
14
+ def init_processor(self): # type: ignore[no-untyped-def] # FIX ME
15
+ self.model = OpenAI()
16
+ self.messenger = BaseMessenger("gpt4v_messenger") # type: ignore[no-untyped-call] # FIX ME
17
+ return
18
+
19
+ def process(self, payload: dict) -> dict: # type: ignore[type-arg] # FIX ME
20
+ return # type: ignore[return-value] # FIX ME
21
+
22
+ def update_info(self, feedback: str): # type: ignore[no-untyped-def] # FIX ME
23
+ self.messenger.add_assistant_message(feedback)
24
+
25
+ @exponential_backoff(retries=5, base_wait_time=1) # type: ignore[no-untyped-call] # FIX ME
26
+ def gpt4v_requst(self): # type: ignore[no-untyped-def] # FIX ME
27
+ response = self.model.chat.completions.create(
28
+ model="gpt-4-vision-preview",
29
+ messages=self.messenger.get_messages(), # type: ignore[no-untyped-call] # FIX ME
30
+ max_tokens=300,
31
+ )
32
+ return response
33
+
34
+ def ask_info( # type: ignore[override] # FIX ME
35
+ self,
36
+ query: str,
37
+ context: str = None, # type: ignore[assignment] # FIX ME
38
+ image_path: str = None, # type: ignore[assignment] # FIX ME
39
+ audio_path: str = None, # type: ignore[assignment] # FIX ME
40
+ video_path: str = None, # type: ignore[assignment] # FIX ME
41
+ ) -> str:
42
+ if self.messenger.check_iter_round_num() == 0: # type: ignore[no-untyped-call] # FIX ME
43
+ image = self.process_image(image_path) # type: ignore[no-untyped-call] # FIX ME
44
+ # image = '0'
45
+ self.messenger.add_user_message(
46
+ [
47
+ {"type": "text", "text": self.task_instruction},
48
+ {
49
+ "type": "image_url",
50
+ "image_url": f"data:image/jpeg;base64,{image}",
51
+ },
52
+ ]
53
+ )
54
+
55
+ response = self.gpt4v_requst()
56
+ description = response.choices[0].message.content
57
+ return description # type: ignore[no-any-return] # FIX ME
58
+
59
+
60
+ if __name__ == "__main__":
61
+ processor = BaseProcessor("ocr_processor") # type: ignore[no-untyped-call] # FIX ME
62
+ image_path = "../ctmai-test1.png"
63
+ summary: str = processor.ask_info(query=None, image_path=image_path) # type: ignore[no-untyped-call] # FIX ME
64
+ print(summary)
ctm/processors/processor_gpt4v_cloth_fashion.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ctm.processors.processor_gpt4v import GPT4VProcessor
2
+
3
+
4
+ @GPT4VProcessor.register_processor("gpt4v_cloth_fashion_processor") # type: ignore[no-untyped-call] # FIX ME
5
+ class GPT4VClothFashionProcessor(GPT4VProcessor):
6
+ def __init__(self, *args, **kwargs): # type: ignore[no-untyped-def] # FIX ME
7
+ self.init_processor() # type: ignore[no-untyped-call] # FIX ME
8
+ self.task_instruction = "Focus on the cloth of people in the image, describe the style of the cloth fashion. If there is no people detected, please answer with None."
9
+
10
+
11
+ if __name__ == "__main__":
12
+ processor = GPT4VProcessor("close_fashion_processor") # type: ignore[no-untyped-call] # FIX ME
13
+ image_path = "../ctmai-test1.png"
14
+ summary: str = processor.ask_info(query=None, image_path=image_path) # type: ignore[arg-type] # FIX ME
15
+ print(summary)
ctm/processors/processor_gpt4v_face_emotion.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ctm.processors.processor_gpt4v import GPT4VProcessor
2
+
3
+
4
+ @GPT4VProcessor.register_processor("gpt4v_face_emotion_processor") # type: ignore[no-untyped-call] # FIX ME
5
+ class GPT4VFaceEmotionProcessor(GPT4VProcessor):
6
+ def __init__(self, *args, **kwargs): # type: ignore[no-untyped-def] # FIX ME
7
+ self.init_processor() # type: ignore[no-untyped-call] # FIX ME
8
+ self.task_instruction = "Besides the main scene in the image, can you describe the face emotion that is on people's faces within this picture?"
9
+
10
+
11
+ if __name__ == "__main__":
12
+ processor = GPT4VProcessor("face_emotion_processor") # type: ignore[no-untyped-call] # FIX ME
13
+ image_path = "../ctmai-test1.png"
14
+ summary: str = processor.ask_info(query=None, image_path=image_path) # type: ignore[arg-type] # FIX ME
15
+ print(summary)
ctm/processors/processor_gpt4v_ocr.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ctm.processors.processor_gpt4v import GPT4VProcessor
2
+
3
+
4
+ @GPT4VProcessor.register_processor("gpt4v_ocr_processor") # type: ignore[no-untyped-call] # FIX ME
5
+ class GPT4VOCRProcessor(GPT4VProcessor):
6
+ def __init__(self, *args, **kwargs): # type: ignore[no-untyped-def] # FIX ME
7
+ self.init_processor() # type: ignore[no-untyped-call] # FIX ME
8
+ self.task_instruction = "You should act like an OCR model. Please extract the text from the image. If there is no text detected, please answer with None."
9
+
10
+
11
+ if __name__ == "__main__":
12
+ processor = GPT4VProcessor("ocr_processor") # type: ignore[no-untyped-call] # FIX ME
13
+ image_path = "../ctmai-test1.png"
14
+ summary: str = processor.ask_info(query=None, image_path=image_path) # type: ignore[arg-type] # FIX ME
15
+ print(summary)
ctm/processors/processor_gpt4v_posture.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ctm.processors.processor_gpt4v import GPT4VProcessor
2
+
3
+
4
+ @GPT4VProcessor.register_processor("gpt4v_posture_processor") # type: ignore[no-untyped-call] # FIX ME
5
+ class GPT4VPostureProcessor(GPT4VProcessor):
6
+ def __init__(self, *args, **kwargs): # type: ignore[no-untyped-def] # FIX ME
7
+ self.init_processor() # type: ignore[no-untyped-call] # FIX ME
8
+ self.task_instruction = "Besides the main scene in the image, can you describe the posture that is going on within this picture?"
9
+
10
+
11
+ if __name__ == "__main__":
12
+ processor = GPT4VProcessor("posture_processor") # type: ignore[no-untyped-call] # FIX ME
13
+ image_path = "../ctmai-test1.png"
14
+ summary: str = processor.ask_info(query=None, image_path=image_path) # type: ignore[arg-type] # FIX ME
15
+ print(summary)
ctm/processors/processor_gpt4v_scene_location.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ctm.processors.processor_gpt4v import GPT4VProcessor
2
+
3
+
4
+ @GPT4VProcessor.register_processor("gpt4v_scene_location_processor") # type: ignore[no-untyped-call] # FIX ME
5
+ class GPT4VSceneLocationProcessor(GPT4VProcessor):
6
+ def __init__(self, *args, **kwargs): # type: ignore[no-untyped-def] # FIX ME
7
+ self.init_processor() # type: ignore[no-untyped-call] # FIX ME
8
+ self.task_instruction = "Besides the main activity in the image, can you describe the potential location or the event that is going on within this picture?"
9
+
10
+
11
+ if __name__ == "__main__":
12
+ processor = GPT4VProcessor("scene_location_processor") # type: ignore[no-untyped-call] # FIX ME
13
+ image_path = "../ctmai-test1.png"
14
+ summary: str = processor.ask_info(query=None, image_path=image_path) # type: ignore[arg-type] # FIX ME
15
+ print(summary)
ctm/processors/processor_roberta_text_sentiment.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ from huggingface_hub.inference_api import (
4
+ InferenceApi, # type: ignore[import] # FIX ME
5
+ )
6
+
7
+ from ctm.messengers.messenger_base import BaseMessenger
8
+ from ctm.processors.processor_base import BaseProcessor
9
+
10
+
11
+ @BaseProcessor.register_processor("roberta_text_sentiment_processor") # type: ignore[no-untyped-call] # FIX ME
12
+ class RobertaTextSentimentProcessor(BaseProcessor):
13
+ def __init__(self, *args, **kwargs): # type: ignore[no-untyped-def] # FIX ME
14
+ self.init_processor() # type: ignore[no-untyped-call] # FIX ME
15
+
16
+ def init_processor(self): # type: ignore[no-untyped-def] # FIX ME
17
+ self.model = InferenceApi(
18
+ token=os.environ["HF_TOKEN"],
19
+ repo_id="cardiffnlp/twitter-roberta-base-sentiment-latest",
20
+ )
21
+ self.messenger = BaseMessenger("roberta_text_sentiment_messenger") # type: ignore[no-untyped-call] # FIX ME
22
+ return
23
+
24
+ def update_info(self, feedback: str): # type: ignore[no-untyped-def] # FIX ME
25
+ self.messenger.add_assistant_message(feedback)
26
+
27
+ def ask_info( # type: ignore[override] # FIX ME
28
+ self,
29
+ query: str,
30
+ context: str = None, # type: ignore[assignment] # FIX ME
31
+ image_path: str = None, # type: ignore[assignment] # FIX ME
32
+ audio_path: str = None, # type: ignore[assignment] # FIX ME
33
+ video_path: str = None, # type: ignore[assignment] # FIX ME
34
+ ) -> str:
35
+ if self.messenger.check_iter_round_num() == 0: # type: ignore[no-untyped-call] # FIX ME
36
+ self.messenger.add_user_message(context)
37
+
38
+ response = self.model(self.messenger.get_messages()) # type: ignore[no-untyped-call] # FIX ME
39
+ results = response[0]
40
+ # choose the label with the highest score
41
+ pos_score = 0
42
+ neg_score = 0
43
+ neutral_score = 0
44
+ for result in results:
45
+ if result["label"] == "POSITIVE":
46
+ pos_score = result["score"]
47
+ elif result["label"] == "NEGATIVE":
48
+ neg_score = result["score"]
49
+ else:
50
+ neutral_score = result["score"]
51
+ if max(pos_score, neg_score, neutral_score) == pos_score:
52
+ return "This text is positive."
53
+ elif max(pos_score, neg_score, neutral_score) == neg_score:
54
+ return "This text is negative."
55
+ else:
56
+ return "This text is neutral."
57
+
58
+
59
+ if __name__ == "__main__":
60
+ processor = BaseProcessor("roberta_text_sentiment_processor") # type: ignore[no-untyped-call] # FIX ME
61
+ image_path = "../ctmai-test1.png"
62
+ text: str = (
63
+ "In a shocking turn of events, Hugging Face has released a new version of Transformers "
64
+ "that brings several enhancements and bug fixes. Users are thrilled with the improvements "
65
+ "and are finding the new version to be significantly better than the previous one. "
66
+ "The Hugging Face team is thankful for the community's support and continues to work "
67
+ "towards making the library the best it can be."
68
+ )
69
+ label = processor.ask_info(query=None, context=text, image_path=image_path) # type: ignore[no-untyped-call] # FIX ME
70
+ print(label)
ctm/supervisors/__init__.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ from .supervisor_gpt4 import GPT4Supervisior
2
+
3
+ __all__ = [
4
+ "GPT4Supervisior",
5
+ ]
ctm/supervisors/supervisor_base.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+
3
+
4
+ class BaseSupervisor(object):
5
+ _supervisor_registry = {}
6
+
7
+ @classmethod
8
+ def register_supervisor(cls, supervisor_name):
9
+ def decorator(subclass):
10
+ cls._supervisor_registry[supervisor_name] = subclass
11
+ return subclass
12
+
13
+ return decorator
14
+
15
+ def __new__(cls, supervisor_name, *args, **kwargs):
16
+ if supervisor_name not in cls._supervisor_registry:
17
+ raise ValueError(
18
+ f"No supervisor registered with name '{supervisor_name}'"
19
+ )
20
+ return super(BaseSupervisor, cls).__new__(
21
+ cls._supervisor_registry[supervisor_name]
22
+ )
23
+
24
+ def set_model(self):
25
+ raise NotImplementedError(
26
+ "The 'set_model' method must be implemented in derived classes."
27
+ )
28
+
29
+ @staticmethod
30
+ def process_image(image_path):
31
+ with open(image_path, "rb") as image_file:
32
+ return base64.b64encode(image_file.read()).decode("utf-8")
33
+
34
+ @staticmethod
35
+ def process_audio(audio_path):
36
+ return None
37
+
38
+ @staticmethod
39
+ def process_video(video_path):
40
+ return None
41
+
42
+ def ask(self, query, image_path):
43
+ gist = self.ask_info(query, image_path)
44
+ score = self.ask_score(query, gist, verbose=True)
45
+ return gist, score
46
+
47
+ def ask_info(self, query: str, context: str = None) -> str:
48
+ return None
49
+
50
+ def ask_score(self, query: str, gist: str, verbose: bool = False) -> float:
51
+ return None
ctm/supervisors/supervisor_gpt4.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from openai import OpenAI
2
+
3
+ from ctm.supervisors.supervisor_base import BaseSupervisor
4
+ from ctm.utils.exponential_backoff import exponential_backoff
5
+
6
+
7
+ @BaseSupervisor.register_supervisor("gpt4_supervisor")
8
+ class GPT4Supervisior(BaseSupervisor):
9
+ def __init__(self, *args, **kwargs):
10
+ self.init_supervisor()
11
+
12
+ def init_supervisor(self):
13
+ self.model = OpenAI()
14
+
15
+ @exponential_backoff(retries=5, base_wait_time=1)
16
+ def ask_info(self, query: str, context: str = None) -> str:
17
+ prompt = [
18
+ {
19
+ "role": "user",
20
+ "content": f"The following is detailed information on the topic: {context}. Based on this information, answer the question: {query}. Answer with a few words:",
21
+ }
22
+ ]
23
+ responses = self.model.chat.completions.create(
24
+ model="gpt-4-turbo-preview", messages=prompt, max_tokens=300, n=1
25
+ )
26
+ answer = responses.choices[0].message.content
27
+ return answer
28
+
29
+ def ask_score(self, query, gist, verbose=False, *args, **kwargs):
30
+ max_attempts = 5
31
+ for attempt in range(max_attempts):
32
+ try:
33
+ response = self.model.chat.completions.create(
34
+ model="gpt-4-0125-preview",
35
+ messages=[
36
+ {
37
+ "role": "user",
38
+ "content": "How related is the information ({}) with the query ({})? We want to make sure that the information includes a person's name as the answer. Answer with a number from 0 to 5 and do not add any other thing.".format(
39
+ gist, query
40
+ ),
41
+ },
42
+ ],
43
+ max_tokens=50,
44
+ )
45
+ score = int(response.choices[0].message.content.strip()) / 5
46
+ return score
47
+ except Exception as e:
48
+ print(f"Attempt {attempt + 1} failed: {e}")
49
+ if attempt < max_attempts - 1:
50
+ print("Retrying...")
51
+ else:
52
+ print("Max attempts reached. Returning default score.")
53
+ return 0
54
+
55
+
56
+ if __name__ == "__main__":
57
+ supervisor = BaseSupervisor("cloth_fashion_supervisor")
58
+ image_path = "../ctmai-test1.png"
59
+ summary: str = supervisor.ask_info(query=None, image_path=image_path)
60
+ print(summary)
ctm/utils/__init__.py ADDED
File without changes
ctm/utils/exponential_backoff.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+ import time
3
+ from functools import wraps
4
+
5
+
6
+ def exponential_backoff(retries=5, base_wait_time=1): # type: ignore[no-untyped-def] # FIX ME
7
+ """
8
+ Decorator for applying exponential backoff to a function.
9
+ :param retries: Maximum number of retries.
10
+ :param base_wait_time: Base wait time in seconds for the exponential backoff.
11
+ """
12
+
13
+ def decorator(func): # type: ignore[no-untyped-def] # FIX ME
14
+ @wraps(func)
15
+ def wrapper(*args, **kwargs): # type: ignore[no-untyped-def] # FIX ME
16
+ attempts = 0
17
+ while attempts < retries:
18
+ try:
19
+ return func(*args, **kwargs)
20
+ except Exception as e:
21
+ wait_time = base_wait_time * (2**attempts)
22
+ print(f"Attempt {attempts + 1} failed: {e}")
23
+ print(f"Waiting {wait_time} seconds before retrying...")
24
+ time.sleep(wait_time)
25
+ attempts += 1
26
+ print(
27
+ f"Failed to execute '{func.__name__}' after {retries} retries."
28
+ )
29
+ return None
30
+
31
+ return wrapper
32
+
33
+ return decorator