Haofei Yu
commited on
Commit
•
acb3380
1
Parent(s):
a917903
Feature/support ctm (#16)
Browse files* support inner loop ctm
* support pre-commit
* support running
* support ctm ai
- .gitignore +1 -2
- app.py +20 -42
- ctm +0 -1
- ctm/configs/__init__.py +0 -0
- ctm/configs/ctm_config_base.py +44 -0
- ctm/configs/sarcasm_ctm_config.json +18 -0
- ctm/ctms/__init__.py +5 -0
- ctm/ctms/ctm_base.py +227 -0
- ctm/messengers/__init__.py +15 -0
- ctm/messengers/messenger_bart_text_summ.py +22 -0
- ctm/messengers/messenger_base.py +63 -0
- ctm/messengers/messenger_gpt4.py +22 -0
- ctm/messengers/messenger_gpt4v.py +22 -0
- ctm/messengers/messenger_roberta_text_sentiment.py +23 -0
- ctm/processors/__init__.py +31 -0
- ctm/processors/processor_bart_text_summary.py +55 -0
- ctm/processors/processor_base.py +117 -0
- ctm/processors/processor_gpt4.py +59 -0
- ctm/processors/processor_gpt4_speaker_intent.py +15 -0
- ctm/processors/processor_gpt4_text_emotion.py +15 -0
- ctm/processors/processor_gpt4_text_summary.py +15 -0
- ctm/processors/processor_gpt4v.py +64 -0
- ctm/processors/processor_gpt4v_cloth_fashion.py +15 -0
- ctm/processors/processor_gpt4v_face_emotion.py +15 -0
- ctm/processors/processor_gpt4v_ocr.py +15 -0
- ctm/processors/processor_gpt4v_posture.py +15 -0
- ctm/processors/processor_gpt4v_scene_location.py +15 -0
- ctm/processors/processor_roberta_text_sentiment.py +70 -0
- ctm/supervisors/__init__.py +5 -0
- ctm/supervisors/supervisor_base.py +51 -0
- ctm/supervisors/supervisor_gpt4.py +60 -0
- ctm/utils/__init__.py +0 -0
- ctm/utils/exponential_backoff.py +33 -0
.gitignore
CHANGED
@@ -157,5 +157,4 @@ cython_debug/
|
|
157 |
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
158 |
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
159 |
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
160 |
-
#.idea/
|
161 |
-
**/ctm/*
|
|
|
157 |
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
158 |
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
159 |
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
160 |
+
#.idea/
|
|
app.py
CHANGED
@@ -2,11 +2,8 @@ import os
|
|
2 |
import sys
|
3 |
|
4 |
import gradio as gr
|
5 |
-
import
|
6 |
-
|
7 |
-
from PIL import Image
|
8 |
-
|
9 |
-
sys.path.append("../CTM/")
|
10 |
from ctm.ctms.ctm_base import BaseConsciousnessTuringMachine
|
11 |
|
12 |
ctm = BaseConsciousnessTuringMachine()
|
@@ -35,14 +32,12 @@ def introduction():
|
|
35 |
"""
|
36 |
)
|
37 |
|
38 |
-
|
39 |
def add_processor(processor_name, display_name, state):
|
40 |
-
print(
|
41 |
ctm.add_processor(processor_name)
|
42 |
print(ctm.processor_group_map)
|
43 |
print(len(ctm.processor_list))
|
44 |
-
return display_name +
|
45 |
-
|
46 |
|
47 |
def processor_tab():
|
48 |
# Categorized model names
|
@@ -50,14 +45,14 @@ def processor_tab():
|
|
50 |
"gpt4_text_emotion_processor",
|
51 |
"gpt4_text_summary_processor",
|
52 |
"gpt4_speaker_intent_processor",
|
53 |
-
"roberta_text_sentiment_processor"
|
54 |
]
|
55 |
vision_processors = [
|
56 |
"gpt4v_cloth_fashion_processor",
|
57 |
"gpt4v_face_emotion_processor",
|
58 |
"gpt4v_ocr_processor",
|
59 |
-
"
|
60 |
-
"gpt4v_scene_location_processor"
|
61 |
]
|
62 |
|
63 |
with gr.Blocks():
|
@@ -65,54 +60,37 @@ def processor_tab():
|
|
65 |
with gr.Column(scale=1):
|
66 |
gr.Markdown("### Text Processors")
|
67 |
for model_name in text_processors:
|
68 |
-
display_name = (
|
69 |
-
model_name.replace("processor", "")
|
70 |
-
.replace("_", " ")
|
71 |
-
.title()
|
72 |
-
)
|
73 |
|
74 |
button = gr.Button(display_name)
|
75 |
-
processor_name = gr.Textbox(
|
76 |
-
|
77 |
-
)
|
78 |
-
display_name = gr.Textbox(
|
79 |
-
value=display_name, visible=False
|
80 |
-
)
|
81 |
button.click(
|
82 |
fn=add_processor,
|
83 |
inputs=[processor_name, display_name, gr.State()],
|
84 |
-
outputs=[button]
|
85 |
)
|
86 |
|
87 |
with gr.Column(scale=1):
|
88 |
gr.Markdown("### Vision Processors")
|
89 |
for model_name in vision_processors:
|
90 |
-
display_name = (
|
91 |
-
model_name.replace("processor", "")
|
92 |
-
.replace("_", " ")
|
93 |
-
.title()
|
94 |
-
)
|
95 |
|
96 |
button = gr.Button(display_name)
|
97 |
-
processor_name = gr.Textbox(
|
98 |
-
|
99 |
-
)
|
100 |
-
display_name = gr.Textbox(
|
101 |
-
value=display_name, visible=False
|
102 |
-
)
|
103 |
button.click(
|
104 |
fn=add_processor,
|
105 |
inputs=[processor_name, display_name, gr.State()],
|
106 |
-
outputs=[button]
|
107 |
)
|
108 |
|
109 |
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
)
|
116 |
uptree_competition_output_info, state = uptree_competition(state)
|
117 |
ask_supervisor_output_info, state = ask_supervisor(state)
|
118 |
|
|
|
2 |
import sys
|
3 |
|
4 |
import gradio as gr
|
5 |
+
import sys
|
6 |
+
sys.path.append('./ctm')
|
|
|
|
|
|
|
7 |
from ctm.ctms.ctm_base import BaseConsciousnessTuringMachine
|
8 |
|
9 |
ctm = BaseConsciousnessTuringMachine()
|
|
|
32 |
"""
|
33 |
)
|
34 |
|
|
|
35 |
def add_processor(processor_name, display_name, state):
|
36 |
+
print('add processor ', processor_name)
|
37 |
ctm.add_processor(processor_name)
|
38 |
print(ctm.processor_group_map)
|
39 |
print(len(ctm.processor_list))
|
40 |
+
return display_name + ' (added)'
|
|
|
41 |
|
42 |
def processor_tab():
|
43 |
# Categorized model names
|
|
|
45 |
"gpt4_text_emotion_processor",
|
46 |
"gpt4_text_summary_processor",
|
47 |
"gpt4_speaker_intent_processor",
|
48 |
+
"roberta_text_sentiment_processor"
|
49 |
]
|
50 |
vision_processors = [
|
51 |
"gpt4v_cloth_fashion_processor",
|
52 |
"gpt4v_face_emotion_processor",
|
53 |
"gpt4v_ocr_processor",
|
54 |
+
"gpt4v_posture",
|
55 |
+
"gpt4v_scene_location_processor"
|
56 |
]
|
57 |
|
58 |
with gr.Blocks():
|
|
|
60 |
with gr.Column(scale=1):
|
61 |
gr.Markdown("### Text Processors")
|
62 |
for model_name in text_processors:
|
63 |
+
display_name = model_name.replace("processor", "").replace("_", " ").title()
|
|
|
|
|
|
|
|
|
64 |
|
65 |
button = gr.Button(display_name)
|
66 |
+
processor_name = gr.Textbox(value=model_name, visible=False)
|
67 |
+
display_name = gr.Textbox(value=display_name, visible=False)
|
|
|
|
|
|
|
|
|
68 |
button.click(
|
69 |
fn=add_processor,
|
70 |
inputs=[processor_name, display_name, gr.State()],
|
71 |
+
outputs=[button]
|
72 |
)
|
73 |
|
74 |
with gr.Column(scale=1):
|
75 |
gr.Markdown("### Vision Processors")
|
76 |
for model_name in vision_processors:
|
77 |
+
display_name = model_name.replace("processor", "").replace("_", " ").title()
|
|
|
|
|
|
|
|
|
78 |
|
79 |
button = gr.Button(display_name)
|
80 |
+
processor_name = gr.Textbox(value=model_name, visible=False)
|
81 |
+
display_name = gr.Textbox(value=display_name, visible=False)
|
|
|
|
|
|
|
|
|
82 |
button.click(
|
83 |
fn=add_processor,
|
84 |
inputs=[processor_name, display_name, gr.State()],
|
85 |
+
outputs=[button]
|
86 |
)
|
87 |
|
88 |
|
89 |
+
|
90 |
+
|
91 |
+
def forward(query, content, image, state):
|
92 |
+
state['question'] = query
|
93 |
+
ask_processors_output_info, state = ask_processors(query, content, image, state)
|
|
|
94 |
uptree_competition_output_info, state = uptree_competition(state)
|
95 |
ask_supervisor_output_info, state = ask_supervisor(state)
|
96 |
|
ctm
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
/Users/yuhaofei/github_repo/CTM-AI/ctm/
|
|
|
|
ctm/configs/__init__.py
ADDED
File without changes
|
ctm/configs/ctm_config_base.py
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
|
3 |
+
|
4 |
+
class BaseConsciousnessTuringMachineConfig(object):
|
5 |
+
# Initialize with default values or those passed to the constructor
|
6 |
+
def __init__(
|
7 |
+
self,
|
8 |
+
ctm_name=None,
|
9 |
+
max_iter_num=3,
|
10 |
+
output_threshold=0.5,
|
11 |
+
groups_of_processors={},
|
12 |
+
supervisor="gpt4_supervisor",
|
13 |
+
**kwargs,
|
14 |
+
):
|
15 |
+
self.ctm_name = ctm_name
|
16 |
+
self.max_iter_num = max_iter_num
|
17 |
+
self.output_threshold = output_threshold
|
18 |
+
self.groups_of_processors = groups_of_processors
|
19 |
+
self.supervisor = supervisor
|
20 |
+
# This allows for handling additional, possibly unknown configuration parameters
|
21 |
+
for key, value in kwargs.items():
|
22 |
+
setattr(self, key, value)
|
23 |
+
|
24 |
+
def to_json_string(self):
|
25 |
+
"""Serializes this instance to a JSON string."""
|
26 |
+
return json.dumps(self.__dict__, indent=2) + "\n"
|
27 |
+
|
28 |
+
@classmethod
|
29 |
+
def from_json_file(cls, json_file):
|
30 |
+
"""Creates an instance from a JSON file."""
|
31 |
+
with open(json_file, "r", encoding="utf-8") as reader:
|
32 |
+
text = reader.read()
|
33 |
+
return cls(**json.loads(text))
|
34 |
+
|
35 |
+
@classmethod
|
36 |
+
def from_ctm(cls, ctm_name):
|
37 |
+
"""
|
38 |
+
Simulate fetching a model configuration from a ctm model repository.
|
39 |
+
This example assumes the configuration is already downloaded and saved locally.
|
40 |
+
"""
|
41 |
+
# This path would be generated dynamically based on `model_name_or_path`
|
42 |
+
# For simplicity, we're directly using it as a path to a local file
|
43 |
+
config_file = f"../ctm/configs/{ctm_name}_config.json"
|
44 |
+
return cls.from_json_file(config_file)
|
ctm/configs/sarcasm_ctm_config.json
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"ctm_name": "sarcasm_ctm",
|
3 |
+
"max_iter_num": 3,
|
4 |
+
"output_threshold": 0.5,
|
5 |
+
"groups_of_processors": {
|
6 |
+
"group_1": [
|
7 |
+
"gpt4v_scene_location_processor",
|
8 |
+
"gpt4v_cloth_fashion_processor"
|
9 |
+
],
|
10 |
+
"group_2": [
|
11 |
+
"gpt4v_posture_processor"
|
12 |
+
],
|
13 |
+
"group_3": [
|
14 |
+
"gpt4v_ocr_processor"
|
15 |
+
]
|
16 |
+
},
|
17 |
+
"supervisor": "gpt4_supervisor"
|
18 |
+
}
|
ctm/ctms/__init__.py
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .ctm_base import BaseConsciousnessTuringMachine
|
2 |
+
|
3 |
+
__all__ = [
|
4 |
+
"BaseConsciousnessTuringMachine",
|
5 |
+
]
|
ctm/ctms/ctm_base.py
ADDED
@@ -0,0 +1,227 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import concurrent.futures
|
2 |
+
from collections import defaultdict
|
3 |
+
|
4 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
5 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
6 |
+
|
7 |
+
from ctm.configs.ctm_config_base import (
|
8 |
+
BaseConsciousnessTuringMachineConfig,
|
9 |
+
)
|
10 |
+
from ctm.processors.processor_base import BaseProcessor
|
11 |
+
from ctm.supervisors.supervisor_base import BaseSupervisor
|
12 |
+
|
13 |
+
|
14 |
+
class BaseConsciousnessTuringMachine(object):
|
15 |
+
def __call__(self, *args, **kwargs):
|
16 |
+
return self.forward(*args, **kwargs)
|
17 |
+
|
18 |
+
def __init__(self, ctm_name=None, *args, **kwargs):
|
19 |
+
super().__init__(*args, **kwargs)
|
20 |
+
if ctm_name:
|
21 |
+
self.config = BaseConsciousnessTuringMachineConfig.from_ctm(
|
22 |
+
ctm_name
|
23 |
+
)
|
24 |
+
else:
|
25 |
+
self.config = BaseConsciousnessTuringMachineConfig()
|
26 |
+
self.processor_list = []
|
27 |
+
self.processor_group_map = defaultdict(list)
|
28 |
+
self.load_ctm()
|
29 |
+
|
30 |
+
def add_processor(self, processor_name, group_name=None):
|
31 |
+
processor_instance = BaseProcessor(processor_name)
|
32 |
+
self.processor_list.append(
|
33 |
+
{
|
34 |
+
"processor_name": processor_name,
|
35 |
+
"processor_instance": processor_instance,
|
36 |
+
}
|
37 |
+
)
|
38 |
+
if group_name:
|
39 |
+
self.processor_group_map[processor_name] = group_name
|
40 |
+
|
41 |
+
def add_supervisor(self, supervisor_name):
|
42 |
+
supervisor_instance = BaseSupervisor(supervisor_name)
|
43 |
+
self.supervisor = {
|
44 |
+
"supervisor_name": supervisor_name,
|
45 |
+
"supervisor_instance": supervisor_instance,
|
46 |
+
}
|
47 |
+
|
48 |
+
@staticmethod
|
49 |
+
def ask_processor(
|
50 |
+
processor, question, context, image_path, audio_path, video_path
|
51 |
+
):
|
52 |
+
processor_instance = processor["processor_instance"]
|
53 |
+
processor_name = processor["processor_name"]
|
54 |
+
gist, score = processor_instance.ask(
|
55 |
+
question, context, image_path, audio_path, video_path
|
56 |
+
)
|
57 |
+
return {"name": processor_name, "gist": gist, "score": score}
|
58 |
+
|
59 |
+
def ask_processors(
|
60 |
+
self, question, context, image_path, audio_path, video_path
|
61 |
+
):
|
62 |
+
with concurrent.futures.ThreadPoolExecutor() as executor:
|
63 |
+
futures = [
|
64 |
+
executor.submit(
|
65 |
+
self.ask_processor,
|
66 |
+
processor,
|
67 |
+
question,
|
68 |
+
context,
|
69 |
+
image_path,
|
70 |
+
audio_path,
|
71 |
+
video_path,
|
72 |
+
)
|
73 |
+
for processor in self.processor_list
|
74 |
+
]
|
75 |
+
results = [
|
76 |
+
future.result()
|
77 |
+
for future in concurrent.futures.as_completed(futures)
|
78 |
+
]
|
79 |
+
|
80 |
+
output = {}
|
81 |
+
for result in results:
|
82 |
+
output[result["name"]] = {
|
83 |
+
"gist": result["gist"],
|
84 |
+
"score": result["score"],
|
85 |
+
}
|
86 |
+
|
87 |
+
assert len(output) == len(self.processor_list)
|
88 |
+
return output
|
89 |
+
|
90 |
+
def uptree_competition(self, processor_output):
|
91 |
+
# Unpack processor outputs into lists for easier processing
|
92 |
+
gists, scores, names = [], [], []
|
93 |
+
for name, info in processor_output.items():
|
94 |
+
gists.append(info["gist"])
|
95 |
+
scores.append(info["score"])
|
96 |
+
names.append(name)
|
97 |
+
|
98 |
+
# Determine the unique group for each processor
|
99 |
+
unique_groups = set(self.processor_group_map.values())
|
100 |
+
|
101 |
+
# Prepare to track the best processor by group
|
102 |
+
best_processor_by_group = {
|
103 |
+
group: (None, -1) for group in unique_groups
|
104 |
+
} # (processor_name, score)
|
105 |
+
|
106 |
+
# Iterate through processors to find the best in each group
|
107 |
+
for name, score in zip(names, scores):
|
108 |
+
group = self.processor_group_map[name]
|
109 |
+
if score > best_processor_by_group[group][1]:
|
110 |
+
best_processor_by_group[group] = (name, score)
|
111 |
+
|
112 |
+
# Select the overall best across groups
|
113 |
+
best_overall = max(
|
114 |
+
best_processor_by_group.values(), key=lambda x: x[1]
|
115 |
+
)
|
116 |
+
best_name = best_overall[0]
|
117 |
+
index = names.index(best_name)
|
118 |
+
|
119 |
+
winning_info = {
|
120 |
+
"name": best_name,
|
121 |
+
"gist": gists[index],
|
122 |
+
"score": scores[index],
|
123 |
+
}
|
124 |
+
return winning_info
|
125 |
+
|
126 |
+
def ask_supervisor(self, question, processor_info):
|
127 |
+
final_answer, score = self.supervisor["supervisor_instance"].ask(
|
128 |
+
question, processor_info["gist"]
|
129 |
+
)
|
130 |
+
return final_answer, score
|
131 |
+
|
132 |
+
def downtree_broadcast(self, winning_output):
|
133 |
+
winning_processor_name = winning_output["name"]
|
134 |
+
winning_processor_gist = winning_output["gist"]
|
135 |
+
for processor in self.processor_list:
|
136 |
+
if processor["processor_name"] != winning_processor_name:
|
137 |
+
processor["processor_instance"].update_info(
|
138 |
+
winning_processor_gist
|
139 |
+
)
|
140 |
+
return
|
141 |
+
|
142 |
+
def calc_processor_sim(self, processor_output):
|
143 |
+
processor_gists = [info["gist"] for info in processor_output.values()]
|
144 |
+
tfidf_vectorizer = TfidfVectorizer()
|
145 |
+
tfidf_matrix = tfidf_vectorizer.fit_transform(processor_gists)
|
146 |
+
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)
|
147 |
+
return cosine_sim
|
148 |
+
|
149 |
+
def link_form(self, processor_output):
|
150 |
+
sim = self.calc_processor_sim(processor_output)
|
151 |
+
print(sim)
|
152 |
+
# iterate on each sim pair
|
153 |
+
# if sim > threshold, then link the two processors by combining them into the same group
|
154 |
+
link_threshold = 0.5
|
155 |
+
for i in range(len(sim)):
|
156 |
+
for j in range(i + 1, len(sim)):
|
157 |
+
if sim[i][j] > 0.5:
|
158 |
+
processor1_name = list(processor_output.keys())[i]
|
159 |
+
processor2_name = list(processor_output.keys())[j]
|
160 |
+
# choose the group that includes more processors
|
161 |
+
# processor_group_map is a dict with processor_name as key and group_name as value
|
162 |
+
group1 = self.processor_group_map[processor1_name]
|
163 |
+
group2 = self.processor_group_map[processor2_name]
|
164 |
+
# calculate the number of processors in each group
|
165 |
+
group1_count = sum(
|
166 |
+
[
|
167 |
+
1
|
168 |
+
for group in self.processor_group_map.values()
|
169 |
+
if group == group1
|
170 |
+
]
|
171 |
+
)
|
172 |
+
group2_count = sum(
|
173 |
+
[
|
174 |
+
1
|
175 |
+
for group in self.processor_group_map.values()
|
176 |
+
if group == group2
|
177 |
+
]
|
178 |
+
)
|
179 |
+
# choose the group with more processors
|
180 |
+
group_name = (
|
181 |
+
group1 if group1_count > group2_count else group2
|
182 |
+
)
|
183 |
+
self.processor_group_map[processor1_name] = group_name
|
184 |
+
self.processor_group_map[processor2_name] = group_name
|
185 |
+
return
|
186 |
+
|
187 |
+
def processor_fuse(self, infos, scores):
|
188 |
+
return infos, scores
|
189 |
+
|
190 |
+
def forward(
|
191 |
+
self,
|
192 |
+
question=None,
|
193 |
+
context=None,
|
194 |
+
image_path=None,
|
195 |
+
audio_path=None,
|
196 |
+
video_path=None,
|
197 |
+
):
|
198 |
+
answer_threshold = 0.5
|
199 |
+
max_iter = 3
|
200 |
+
|
201 |
+
for i in range(max_iter):
|
202 |
+
print("start the {}-th iteration".format(i + 1))
|
203 |
+
processor_output = self.ask_processors(
|
204 |
+
question=question,
|
205 |
+
context=context,
|
206 |
+
image_path=image_path,
|
207 |
+
audio_path=audio_path,
|
208 |
+
video_path=video_path,
|
209 |
+
)
|
210 |
+
import pdb; pdb.set_trace()
|
211 |
+
winning_output = self.uptree_competition(processor_output)
|
212 |
+
answer, score = self.ask_supervisor(question, winning_output)
|
213 |
+
if score > answer_threshold:
|
214 |
+
break
|
215 |
+
else:
|
216 |
+
self.downtree_broadcast(winning_output)
|
217 |
+
self.link_form(processor_output)
|
218 |
+
return answer, score
|
219 |
+
|
220 |
+
def load_ctm(self):
|
221 |
+
for (
|
222 |
+
group_name,
|
223 |
+
processor_list,
|
224 |
+
) in self.config.groups_of_processors.items():
|
225 |
+
for processor_name in processor_list:
|
226 |
+
self.add_processor(processor_name, group_name=group_name)
|
227 |
+
self.add_supervisor(self.config.supervisor)
|
ctm/messengers/__init__.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .messenger_bart_text_summ import BartTextSummarizationMessenger
|
2 |
+
from .messenger_base import BaseMessenger
|
3 |
+
from .messenger_gpt4 import GPT4Messenger
|
4 |
+
from .messenger_gpt4v import GPT4VMessenger
|
5 |
+
from .messenger_roberta_text_sentiment import (
|
6 |
+
RobertaTextSentimentMessenger,
|
7 |
+
)
|
8 |
+
|
9 |
+
__all__ = [
|
10 |
+
"BaseMessenger",
|
11 |
+
"GPT4VMessenger",
|
12 |
+
"GPT4Messenger",
|
13 |
+
"BartTextSummarizationMessenger",
|
14 |
+
"RobertaTextSentimentMessenger",
|
15 |
+
]
|
ctm/messengers/messenger_bart_text_summ.py
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Dict, List, Union
|
2 |
+
|
3 |
+
from ctm.messengers.messenger_base import BaseMessenger
|
4 |
+
|
5 |
+
|
6 |
+
@BaseMessenger.register_messenger("bart_text_summ_messenger") # type: ignore[no-untyped-call] # FIX ME
|
7 |
+
class BartTextSummarizationMessenger(BaseMessenger):
|
8 |
+
def __init__(self, role=None, content=None, *args, **kwargs): # type: ignore[no-untyped-def] # FIX ME
|
9 |
+
self.init_messenger(role, content)
|
10 |
+
|
11 |
+
def init_messenger( # type: ignore[no-untyped-def] # FIX ME
|
12 |
+
self, role: str = None, content: Union[str, Dict, List] = None # type: ignore[assignment, type-arg] # FIX ME
|
13 |
+
):
|
14 |
+
self.messages = ""
|
15 |
+
if content and role:
|
16 |
+
self.update_messages(role, content) # type: ignore[attr-defined] # FIX ME
|
17 |
+
|
18 |
+
def update_message(self, role: str, content: Union[str, Dict, List]): # type: ignore[no-untyped-def, type-arg] # FIX ME
|
19 |
+
self.messages += content # type: ignore[operator] # FIX ME
|
20 |
+
|
21 |
+
def check_iter_round_num(self): # type: ignore[no-untyped-def] # FIX ME
|
22 |
+
return 1 if len(self.messages) > 0 else 0
|
ctm/messengers/messenger_base.py
ADDED
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Dict, List, Union
|
2 |
+
|
3 |
+
|
4 |
+
class BaseMessenger(object):
|
5 |
+
_messenger_registry = {} # type: ignore[var-annotated] # FIX ME
|
6 |
+
|
7 |
+
@classmethod
|
8 |
+
def register_messenger(cls, messenger_name): # type: ignore[no-untyped-def] # FIX ME
|
9 |
+
def decorator(subclass): # type: ignore[no-untyped-def] # FIX ME
|
10 |
+
cls._messenger_registry[messenger_name] = subclass
|
11 |
+
return subclass
|
12 |
+
|
13 |
+
return decorator
|
14 |
+
|
15 |
+
def __new__(cls, messenger_name, *args, **kwargs): # type: ignore[no-untyped-def] # FIX ME
|
16 |
+
if messenger_name not in cls._messenger_registry:
|
17 |
+
raise ValueError(
|
18 |
+
f"No messenger registered with name '{messenger_name}'"
|
19 |
+
)
|
20 |
+
return super(BaseMessenger, cls).__new__(
|
21 |
+
cls._messenger_registry[messenger_name]
|
22 |
+
)
|
23 |
+
|
24 |
+
def __init__(self, role=None, content=None, *args, **kwargs): # type: ignore[no-untyped-def] # FIX ME
|
25 |
+
self.init_messenger(role, content)
|
26 |
+
|
27 |
+
def init_messenger( # type: ignore[no-untyped-def] # FIX ME
|
28 |
+
self, role: str = None, content: Union[str, Dict, List] = None # type: ignore[assignment, type-arg] # FIX ME
|
29 |
+
):
|
30 |
+
pass
|
31 |
+
|
32 |
+
def update_message(self, role: str, content: Union[str, Dict, List]): # type: ignore[no-untyped-def, type-arg] # FIX ME
|
33 |
+
pass
|
34 |
+
|
35 |
+
def check_iter_round_num(self): # type: ignore[no-untyped-def] # FIX ME
|
36 |
+
pass
|
37 |
+
|
38 |
+
def add_system_message(self, message: Union[str, Dict, List]): # type: ignore[no-untyped-def, type-arg] # FIX ME
|
39 |
+
self.update_message("system", message)
|
40 |
+
|
41 |
+
def add_assistant_message(self, message: Union[str, Dict, List]): # type: ignore[no-untyped-def, type-arg] # FIX ME
|
42 |
+
self.update_message("assistant", message)
|
43 |
+
|
44 |
+
def add_user_message(self, message: Union[str, Dict, List]): # type: ignore[no-untyped-def, type-arg] # FIX ME
|
45 |
+
self.update_message("user", message)
|
46 |
+
|
47 |
+
def add_user_image(self, image_base64: str): # type: ignore[no-untyped-def] # FIX ME
|
48 |
+
self.add_message( # type: ignore[attr-defined] # FIX ME
|
49 |
+
"user",
|
50 |
+
{
|
51 |
+
"type": "image_url",
|
52 |
+
"image_url": f"data:image/jpeg;base64,{image_base64}",
|
53 |
+
},
|
54 |
+
)
|
55 |
+
|
56 |
+
def add_feedback(self, feedback: Union[str, Dict, List]): # type: ignore[no-untyped-def, type-arg] # FIX ME
|
57 |
+
self.add_message("system", feedback) # type: ignore[attr-defined] # FIX ME
|
58 |
+
|
59 |
+
def clear(self): # type: ignore[no-untyped-def] # FIX ME
|
60 |
+
self.messages.clear() # type: ignore[attr-defined] # FIX ME
|
61 |
+
|
62 |
+
def get_messages(self): # type: ignore[no-untyped-def] # FIX ME
|
63 |
+
return self.messages # type: ignore[attr-defined] # FIX ME
|
ctm/messengers/messenger_gpt4.py
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Dict, List, Union
|
2 |
+
|
3 |
+
from ctm.messengers.messenger_base import BaseMessenger
|
4 |
+
|
5 |
+
|
6 |
+
@BaseMessenger.register_messenger("gpt4_messenger") # type: ignore[no-untyped-call] # FIX ME
|
7 |
+
class GPT4Messenger(BaseMessenger):
|
8 |
+
def __init__(self, role=None, content=None, *args, **kwargs): # type: ignore[no-untyped-def] # FIX ME
|
9 |
+
self.init_messenger(role, content)
|
10 |
+
|
11 |
+
def init_messenger( # type: ignore[no-untyped-def] # FIX ME
|
12 |
+
self, role: str = None, content: Union[str, Dict, List] = None # type: ignore[assignment, type-arg] # FIX ME
|
13 |
+
):
|
14 |
+
self.messages = [] # type: ignore[var-annotated] # FIX ME
|
15 |
+
if content and role:
|
16 |
+
self.update_messages(role, content) # type: ignore[attr-defined] # FIX ME
|
17 |
+
|
18 |
+
def update_message(self, role: str, content: Union[str, Dict, List]): # type: ignore[no-untyped-def, type-arg] # FIX ME
|
19 |
+
self.messages.append({"role": role, "content": content})
|
20 |
+
|
21 |
+
def check_iter_round_num(self): # type: ignore[no-untyped-def] # FIX ME
|
22 |
+
return len(self.messages)
|
ctm/messengers/messenger_gpt4v.py
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Dict, List, Union
|
2 |
+
|
3 |
+
from ctm.messengers.messenger_base import BaseMessenger
|
4 |
+
|
5 |
+
|
6 |
+
@BaseMessenger.register_messenger("gpt4v_messenger") # type: ignore[no-untyped-call] # FIX ME
|
7 |
+
class GPT4VMessenger(BaseMessenger):
|
8 |
+
def __init__(self, role=None, content=None, *args, **kwargs): # type: ignore[no-untyped-def] # FIX ME
|
9 |
+
self.init_messenger(role, content)
|
10 |
+
|
11 |
+
def init_messenger( # type: ignore[no-untyped-def] # FIX ME
|
12 |
+
self, role: str = None, content: Union[str, Dict, List] = None # type: ignore[assignment, type-arg] # FIX ME
|
13 |
+
):
|
14 |
+
self.messages = [] # type: ignore[var-annotated] # FIX ME
|
15 |
+
if content and role:
|
16 |
+
self.update_messages(role, content) # type: ignore[attr-defined] # FIX ME
|
17 |
+
|
18 |
+
def update_message(self, role: str, content: Union[str, Dict, List]): # type: ignore[no-untyped-def, type-arg] # FIX ME
|
19 |
+
self.messages.append({"role": role, "content": content})
|
20 |
+
|
21 |
+
def check_iter_round_num(self): # type: ignore[no-untyped-def] # FIX ME
|
22 |
+
return len(self.messages)
|
ctm/messengers/messenger_roberta_text_sentiment.py
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Dict, List, Union
|
2 |
+
|
3 |
+
from ctm.messengers.messenger_base import BaseMessenger
|
4 |
+
|
5 |
+
|
6 |
+
@BaseMessenger.register_messenger("roberta_text_sentiment_messenger") # type: ignore[no-untyped-call] # FIX ME
|
7 |
+
class RobertaTextSentimentMessenger(BaseMessenger):
|
8 |
+
def __init__(self, role=None, content=None, *args, **kwargs): # type: ignore[no-untyped-def] # FIX ME
|
9 |
+
self.init_messenger(role, content)
|
10 |
+
|
11 |
+
def init_messenger( # type: ignore[no-untyped-def] # FIX ME
|
12 |
+
self, role: str = None, content: Union[str, Dict, List] = None # type: ignore[assignment, type-arg] # FIX ME
|
13 |
+
):
|
14 |
+
self.messages = ""
|
15 |
+
if content and role:
|
16 |
+
self.update_messages(role, content) # type: ignore[attr-defined] # FIX ME
|
17 |
+
|
18 |
+
def update_message(self, role: str, content: Union[str, Dict, List]): # type: ignore[no-untyped-def, type-arg] # FIX ME
|
19 |
+
# should replace with updated message
|
20 |
+
self.messages = content # type: ignore[assignment] # FIX ME
|
21 |
+
|
22 |
+
def check_iter_round_num(self): # type: ignore[no-untyped-def] # FIX ME
|
23 |
+
return 1 if len(self.messages) > 0 else 0
|
ctm/processors/__init__.py
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .processor_bart_text_summary import BartTextSummaryProcessor
|
2 |
+
from .processor_base import BaseProcessor
|
3 |
+
from .processor_gpt4 import GPT4Processor
|
4 |
+
from .processor_gpt4_speaker_intent import GPT4SpeakerIntentProcessor
|
5 |
+
from .processor_gpt4_text_emotion import GPT4TextEmotionProcessor
|
6 |
+
from .processor_gpt4_text_summary import GPT4TextSummaryProcessor
|
7 |
+
from .processor_gpt4v import GPT4VProcessor
|
8 |
+
from .processor_gpt4v_cloth_fashion import GPT4VClothFashionProcessor
|
9 |
+
from .processor_gpt4v_face_emotion import GPT4VFaceEmotionProcessor
|
10 |
+
from .processor_gpt4v_ocr import GPT4VOCRProcessor
|
11 |
+
from .processor_gpt4v_posture import GPT4VPostureProcessor
|
12 |
+
from .processor_gpt4v_scene_location import GPT4VSceneLocationProcessor
|
13 |
+
from .processor_roberta_text_sentiment import (
|
14 |
+
RobertaTextSentimentProcessor,
|
15 |
+
)
|
16 |
+
|
17 |
+
__all__ = [
|
18 |
+
"BaseProcessor",
|
19 |
+
"GPT4VProcessor",
|
20 |
+
"GPT4VSceneLocationProcessor",
|
21 |
+
"GPT4VOCRProcessor",
|
22 |
+
"GPT4VClothFashionProcessor",
|
23 |
+
"GPT4VFaceEmotionProcessor",
|
24 |
+
"GPT4VPostureProcessor",
|
25 |
+
"RobertaTextSentimentProcessor",
|
26 |
+
"BartTextSummaryProcessor",
|
27 |
+
"GPT4SpeakerIntentProcessor",
|
28 |
+
"GPT4TextEmotionProcessor",
|
29 |
+
"GPT4TextSummaryProcessor",
|
30 |
+
"GPT4Processor",
|
31 |
+
]
|
ctm/processors/processor_bart_text_summary.py
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
from huggingface_hub.inference_api import (
|
4 |
+
InferenceApi, # type: ignore[import] # FIX ME
|
5 |
+
)
|
6 |
+
|
7 |
+
from ctm.messengers.messenger_base import BaseMessenger
|
8 |
+
from ctm.processors.processor_base import BaseProcessor
|
9 |
+
|
10 |
+
|
11 |
+
@BaseProcessor.register_processor("bart_text_summary_processor") # type: ignore[no-untyped-call] # FIX ME
|
12 |
+
class BartTextSummaryProcessor(BaseProcessor):
|
13 |
+
def __init__(self, *args, **kwargs): # type: ignore[no-untyped-def] # FIX ME
|
14 |
+
self.init_processor() # type: ignore[no-untyped-call] # FIX ME
|
15 |
+
|
16 |
+
def init_processor(self): # type: ignore[no-untyped-def] # FIX ME
|
17 |
+
self.model = InferenceApi(
|
18 |
+
token=os.environ["HF_TOKEN"], repo_id="facebook/bart-large-cnn"
|
19 |
+
)
|
20 |
+
self.messenger = BaseMessenger("bart_text_summ_messenger") # type: ignore[no-untyped-call] # FIX ME
|
21 |
+
return
|
22 |
+
|
23 |
+
def update_info(self, feedback: str): # type: ignore[no-untyped-def] # FIX ME
|
24 |
+
self.messenger.add_assistant_message(feedback)
|
25 |
+
|
26 |
+
def ask_info( # type: ignore[override] # FIX ME
|
27 |
+
self,
|
28 |
+
query: str,
|
29 |
+
context: str = None, # type: ignore[assignment] # FIX ME
|
30 |
+
image_path: str = None, # type: ignore[assignment] # FIX ME
|
31 |
+
audio_path: str = None, # type: ignore[assignment] # FIX ME
|
32 |
+
video_path: str = None, # type: ignore[assignment] # FIX ME
|
33 |
+
) -> str:
|
34 |
+
if self.messenger.check_iter_round_num() == 0: # type: ignore[no-untyped-call] # FIX ME
|
35 |
+
self.messenger.add_user_message(context)
|
36 |
+
|
37 |
+
response = self.model(self.messenger.get_messages()) # type: ignore[no-untyped-call] # FIX ME
|
38 |
+
summary = response[0]["summary_text"]
|
39 |
+
return summary # type: ignore[no-any-return] # FIX ME
|
40 |
+
|
41 |
+
|
42 |
+
if __name__ == "__main__":
|
43 |
+
processor = BaseProcessor("bart_text_summ_processor") # type: ignore[no-untyped-call] # FIX ME
|
44 |
+
image_path = "../ctmai-test1.png"
|
45 |
+
text: str = (
|
46 |
+
"In a shocking turn of events, Hugging Face has released a new version of Transformers "
|
47 |
+
"that brings several enhancements and bug fixes. Users are thrilled with the improvements "
|
48 |
+
"and are finding the new version to be significantly better than the previous one. "
|
49 |
+
"The Hugging Face team is thankful for the community's support and continues to work "
|
50 |
+
"towards making the library the best it can be."
|
51 |
+
)
|
52 |
+
summary: str = processor.ask_info( # type: ignore[no-untyped-call] # FIX ME
|
53 |
+
query=None, context=text, image_path=image_path
|
54 |
+
)
|
55 |
+
print(summary)
|
ctm/processors/processor_base.py
ADDED
@@ -0,0 +1,117 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import base64
|
2 |
+
|
3 |
+
from ctm.utils.exponential_backoff import exponential_backoff
|
4 |
+
|
5 |
+
|
6 |
+
class BaseProcessor(object):
|
7 |
+
_processor_registry = {} # type: ignore[var-annotated] # FIX ME
|
8 |
+
|
9 |
+
@classmethod
|
10 |
+
def register_processor(cls, processor_name): # type: ignore[no-untyped-def] # FIX ME
|
11 |
+
def decorator(subclass): # type: ignore[no-untyped-def] # FIX ME
|
12 |
+
cls._processor_registry[processor_name] = subclass
|
13 |
+
return subclass
|
14 |
+
|
15 |
+
return decorator
|
16 |
+
|
17 |
+
def __new__(cls, processor_name, *args, **kwargs): # type: ignore[no-untyped-def] # FIX ME
|
18 |
+
if processor_name not in cls._processor_registry:
|
19 |
+
raise ValueError(
|
20 |
+
f"No processor registered with name '{processor_name}'"
|
21 |
+
)
|
22 |
+
return super(BaseProcessor, cls).__new__(
|
23 |
+
cls._processor_registry[processor_name]
|
24 |
+
)
|
25 |
+
|
26 |
+
def set_model(self): # type: ignore[no-untyped-def] # FIX ME
|
27 |
+
raise NotImplementedError(
|
28 |
+
"The 'set_model' method must be implemented in derived classes."
|
29 |
+
)
|
30 |
+
|
31 |
+
@staticmethod
|
32 |
+
def process_image(image_path): # type: ignore[no-untyped-def] # FIX ME
|
33 |
+
with open(image_path, "rb") as image_file:
|
34 |
+
return base64.b64encode(image_file.read()).decode("utf-8")
|
35 |
+
|
36 |
+
@staticmethod
|
37 |
+
def process_audio(audio_path): # type: ignore[no-untyped-def] # FIX ME
|
38 |
+
return None
|
39 |
+
|
40 |
+
@staticmethod
|
41 |
+
def process_video(video_path): # type: ignore[no-untyped-def] # FIX ME
|
42 |
+
return None
|
43 |
+
|
44 |
+
def ask(self, query, context, image_path, audio_path, video_path): # type: ignore[no-untyped-def] # FIX ME
|
45 |
+
gist = self.ask_info( # type: ignore[no-untyped-call] # FIX ME
|
46 |
+
query, context, image_path, audio_path, video_path
|
47 |
+
)
|
48 |
+
score = self.ask_score(query, gist, verbose=True) # type: ignore[no-untyped-call] # FIX ME
|
49 |
+
return gist, score
|
50 |
+
|
51 |
+
@exponential_backoff(retries=5, base_wait_time=1) # type: ignore[misc, no-untyped-call] # FIX ME
|
52 |
+
def ask_relevance(self, query: str, gist: str) -> float:
|
53 |
+
response = self.model.chat.completions.create( # type: ignore[attr-defined] # FIX ME
|
54 |
+
model="gpt-4-0125-preview",
|
55 |
+
messages=[
|
56 |
+
{
|
57 |
+
"role": "user",
|
58 |
+
"content": "How related is the information ({}) with the query ({})? Answer with a number from 0 to 5 and do not add any other thing.".format(
|
59 |
+
gist, query
|
60 |
+
),
|
61 |
+
},
|
62 |
+
],
|
63 |
+
max_tokens=50,
|
64 |
+
)
|
65 |
+
score = int(response.choices[0].message.content.strip()) / 5
|
66 |
+
return score
|
67 |
+
|
68 |
+
@exponential_backoff(retries=5, base_wait_time=1) # type: ignore[misc, no-untyped-call] # FIX ME
|
69 |
+
def ask_confidence(self, query: str, gist: str) -> float:
|
70 |
+
response = self.model.chat.completions.create( # type: ignore[attr-defined] # FIX ME
|
71 |
+
model="gpt-4-0125-preview",
|
72 |
+
messages=[
|
73 |
+
{
|
74 |
+
"role": "user",
|
75 |
+
"content": "How confidence do you think the information ({}) is a mustk? Answer with a number from 0 to 5 and do not add any other thing.".format( # type: ignore[str-format] # FIX ME
|
76 |
+
gist, query
|
77 |
+
),
|
78 |
+
},
|
79 |
+
],
|
80 |
+
max_tokens=50,
|
81 |
+
)
|
82 |
+
score = int(response.choices[0].message.content.strip()) / 5
|
83 |
+
return score
|
84 |
+
|
85 |
+
@exponential_backoff(retries=5, base_wait_time=1) # type: ignore[misc, no-untyped-call] # FIX ME
|
86 |
+
def ask_surprise(
|
87 |
+
self, query: str, gist: str, history_gists: str = None # type: ignore[assignment] # FIX ME
|
88 |
+
) -> float:
|
89 |
+
response = self.model.chat.completions.create( # type: ignore[attr-defined] # FIX ME
|
90 |
+
model="gpt-4-0125-preview",
|
91 |
+
messages=[
|
92 |
+
{
|
93 |
+
"role": "user",
|
94 |
+
"content": "How surprise do you think the information ({}) is as an output of the processor? Answer with a number from 0 to 5 and do not add any other thing.".format( # type: ignore[str-format] # FIX ME
|
95 |
+
gist, query
|
96 |
+
),
|
97 |
+
},
|
98 |
+
],
|
99 |
+
max_tokens=50,
|
100 |
+
)
|
101 |
+
score = int(response.choices[0].message.content.strip()) / 5
|
102 |
+
return score
|
103 |
+
|
104 |
+
def ask_score(self, query, gist, verbose=False, *args, **kwargs): # type: ignore[no-untyped-def] # FIX ME
|
105 |
+
relevance = self.ask_relevance(query, gist, *args, **kwargs)
|
106 |
+
confidence = self.ask_confidence(query, gist, *args, **kwargs)
|
107 |
+
surprise = self.ask_surprise(query, gist, *args, **kwargs)
|
108 |
+
if verbose:
|
109 |
+
print(
|
110 |
+
f"Relevance: {relevance}, Confidence: {confidence}, Surprise: {surprise}"
|
111 |
+
)
|
112 |
+
return relevance * confidence * surprise
|
113 |
+
|
114 |
+
def ask_info(self, query, image_path, *args, **kwargs): # type: ignore[no-untyped-def] # FIX ME
|
115 |
+
raise NotImplementedError(
|
116 |
+
"The 'ask_information' method must be implemented in derived classes."
|
117 |
+
)
|
ctm/processors/processor_gpt4.py
ADDED
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from openai import OpenAI
|
2 |
+
|
3 |
+
from ctm.messengers.messenger_base import BaseMessenger
|
4 |
+
from ctm.processors.processor_base import BaseProcessor
|
5 |
+
from ctm.utils.exponential_backoff import exponential_backoff
|
6 |
+
|
7 |
+
|
8 |
+
@BaseProcessor.register_processor("gpt4_processor") # type: ignore[no-untyped-call] # FIX ME
|
9 |
+
class GPT4Processor(BaseProcessor):
|
10 |
+
def __init__(self, *args, **kwargs): # type: ignore[no-untyped-def] # FIX ME
|
11 |
+
self.init_processor() # type: ignore[no-untyped-call] # FIX ME
|
12 |
+
self.task_instruction = None
|
13 |
+
|
14 |
+
def init_processor(self): # type: ignore[no-untyped-def] # FIX ME
|
15 |
+
self.model = OpenAI()
|
16 |
+
self.messenger = BaseMessenger("gpt4_messenger") # type: ignore[no-untyped-call] # FIX ME
|
17 |
+
return
|
18 |
+
|
19 |
+
def process(self, payload: dict) -> dict: # type: ignore[type-arg] # FIX ME
|
20 |
+
return # type: ignore[return-value] # FIX ME
|
21 |
+
|
22 |
+
def update_info(self, feedback: str): # type: ignore[no-untyped-def] # FIX ME
|
23 |
+
self.messenger.add_assistant_message(feedback)
|
24 |
+
|
25 |
+
@exponential_backoff(retries=5, base_wait_time=1) # type: ignore[no-untyped-call] # FIX ME
|
26 |
+
def gpt4_requst(self): # type: ignore[no-untyped-def] # FIX ME
|
27 |
+
response = self.model.chat.completions.create(
|
28 |
+
model="gpt-4-turbo-preview",
|
29 |
+
messages=self.messenger.get_messages(), # type: ignore[no-untyped-call] # FIX ME
|
30 |
+
max_tokens=300,
|
31 |
+
)
|
32 |
+
return response
|
33 |
+
|
34 |
+
def ask_info( # type: ignore[override] # FIX ME
|
35 |
+
self,
|
36 |
+
query: str,
|
37 |
+
context: str = None, # type: ignore[assignment] # FIX ME
|
38 |
+
image_path: str = None, # type: ignore[assignment] # FIX ME
|
39 |
+
audio_path: str = None, # type: ignore[assignment] # FIX ME
|
40 |
+
video_path: str = None, # type: ignore[assignment] # FIX ME
|
41 |
+
) -> str:
|
42 |
+
if self.messenger.check_iter_round_num() == 0: # type: ignore[no-untyped-call] # FIX ME
|
43 |
+
self.messenger.add_user_message(
|
44 |
+
"The text information for the previously described task is as follows: "
|
45 |
+
+ context
|
46 |
+
+ "Here is what you should do: "
|
47 |
+
+ self.task_instruction # type: ignore[operator] # FIX ME
|
48 |
+
)
|
49 |
+
|
50 |
+
response = self.gpt4_requst()
|
51 |
+
description = response.choices[0].message.content
|
52 |
+
return description # type: ignore[no-any-return] # FIX ME
|
53 |
+
|
54 |
+
|
55 |
+
if __name__ == "__main__":
|
56 |
+
processor = BaseProcessor("ocr_processor") # type: ignore[no-untyped-call] # FIX ME
|
57 |
+
image_path = "../ctmai-test1.png"
|
58 |
+
summary: str = processor.ask_info(query=None, image_path=image_path) # type: ignore[no-untyped-call] # FIX ME
|
59 |
+
print(summary)
|
ctm/processors/processor_gpt4_speaker_intent.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from ctm.processors.processor_gpt4 import GPT4Processor
|
2 |
+
|
3 |
+
|
4 |
+
@GPT4Processor.register_processor("gpt4_speaker_intent_processor") # type: ignore[no-untyped-call] # FIX ME
|
5 |
+
class GPT4SpeakerIntentProcessor(GPT4Processor):
|
6 |
+
def __init__(self, *args, **kwargs): # type: ignore[no-untyped-def] # FIX ME
|
7 |
+
self.init_processor() # type: ignore[no-untyped-call] # FIX ME
|
8 |
+
self.task_instruction = "You are a speaker intent predictor. You can understand the intent of the speaker and describe what is the speaker's intent for saying that. If there is no speaker detected, please answer with None."
|
9 |
+
|
10 |
+
|
11 |
+
if __name__ == "__main__":
|
12 |
+
processor = GPT4Processor("close_fashion_processor") # type: ignore[no-untyped-call] # FIX ME
|
13 |
+
image_path = "../ctmai-test1.png"
|
14 |
+
summary: str = processor.ask_info(query=None, image_path=image_path) # type: ignore[arg-type] # FIX ME
|
15 |
+
print(summary)
|
ctm/processors/processor_gpt4_text_emotion.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from ctm.processors.processor_gpt4 import GPT4Processor
|
2 |
+
|
3 |
+
|
4 |
+
@GPT4Processor.register_processor("gpt4_text_emotion_processor") # type: ignore[no-untyped-call] # FIX ME
|
5 |
+
class GPT4TextEmotionProcessor(GPT4Processor):
|
6 |
+
def __init__(self, *args, **kwargs): # type: ignore[no-untyped-def] # FIX ME
|
7 |
+
self.init_processor() # type: ignore[no-untyped-call] # FIX ME
|
8 |
+
self.task_instruction = "You are a text emotion classifier. You can understand the emotion within the text and generate the emotion label. If there is no text detected, please answer with None."
|
9 |
+
|
10 |
+
|
11 |
+
if __name__ == "__main__":
|
12 |
+
processor = GPT4Processor("close_fashion_processor") # type: ignore[no-untyped-call] # FIX ME
|
13 |
+
image_path = "../ctmai-test1.png"
|
14 |
+
summary: str = processor.ask_info(query=None, image_path=image_path) # type: ignore[arg-type] # FIX ME
|
15 |
+
print(summary)
|
ctm/processors/processor_gpt4_text_summary.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from ctm.processors.processor_gpt4 import GPT4Processor
|
2 |
+
|
3 |
+
|
4 |
+
@GPT4Processor.register_processor("gpt4_text_summary_processor") # type: ignore[no-untyped-call] # FIX ME
|
5 |
+
class GPT4TextSummaryProcessor(GPT4Processor):
|
6 |
+
def __init__(self, *args, **kwargs): # type: ignore[no-untyped-def] # FIX ME
|
7 |
+
self.init_processor() # type: ignore[no-untyped-call] # FIX ME
|
8 |
+
self.task_instruction = "You are a text summarizer. You can understand the meaning of the text and generate the summary."
|
9 |
+
|
10 |
+
|
11 |
+
if __name__ == "__main__":
|
12 |
+
processor = GPT4Processor("close_fashion_processor") # type: ignore[no-untyped-call] # FIX ME
|
13 |
+
image_path = "../ctmai-test1.png"
|
14 |
+
summary: str = processor.ask_info(query=None, image_path=image_path) # type: ignore[arg-type] # FIX ME
|
15 |
+
print(summary)
|
ctm/processors/processor_gpt4v.py
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from openai import OpenAI
|
2 |
+
|
3 |
+
from ctm.messengers.messenger_base import BaseMessenger
|
4 |
+
from ctm.processors.processor_base import BaseProcessor
|
5 |
+
from ctm.utils.exponential_backoff import exponential_backoff
|
6 |
+
|
7 |
+
|
8 |
+
@BaseProcessor.register_processor("gpt4v_processor") # type: ignore[no-untyped-call] # FIX ME
|
9 |
+
class GPT4VProcessor(BaseProcessor):
|
10 |
+
def __init__(self, *args, **kwargs): # type: ignore[no-untyped-def] # FIX ME
|
11 |
+
self.init_processor() # type: ignore[no-untyped-call] # FIX ME
|
12 |
+
self.task_instruction = None
|
13 |
+
|
14 |
+
def init_processor(self): # type: ignore[no-untyped-def] # FIX ME
|
15 |
+
self.model = OpenAI()
|
16 |
+
self.messenger = BaseMessenger("gpt4v_messenger") # type: ignore[no-untyped-call] # FIX ME
|
17 |
+
return
|
18 |
+
|
19 |
+
def process(self, payload: dict) -> dict: # type: ignore[type-arg] # FIX ME
|
20 |
+
return # type: ignore[return-value] # FIX ME
|
21 |
+
|
22 |
+
def update_info(self, feedback: str): # type: ignore[no-untyped-def] # FIX ME
|
23 |
+
self.messenger.add_assistant_message(feedback)
|
24 |
+
|
25 |
+
@exponential_backoff(retries=5, base_wait_time=1) # type: ignore[no-untyped-call] # FIX ME
|
26 |
+
def gpt4v_requst(self): # type: ignore[no-untyped-def] # FIX ME
|
27 |
+
response = self.model.chat.completions.create(
|
28 |
+
model="gpt-4-vision-preview",
|
29 |
+
messages=self.messenger.get_messages(), # type: ignore[no-untyped-call] # FIX ME
|
30 |
+
max_tokens=300,
|
31 |
+
)
|
32 |
+
return response
|
33 |
+
|
34 |
+
def ask_info( # type: ignore[override] # FIX ME
|
35 |
+
self,
|
36 |
+
query: str,
|
37 |
+
context: str = None, # type: ignore[assignment] # FIX ME
|
38 |
+
image_path: str = None, # type: ignore[assignment] # FIX ME
|
39 |
+
audio_path: str = None, # type: ignore[assignment] # FIX ME
|
40 |
+
video_path: str = None, # type: ignore[assignment] # FIX ME
|
41 |
+
) -> str:
|
42 |
+
if self.messenger.check_iter_round_num() == 0: # type: ignore[no-untyped-call] # FIX ME
|
43 |
+
image = self.process_image(image_path) # type: ignore[no-untyped-call] # FIX ME
|
44 |
+
# image = '0'
|
45 |
+
self.messenger.add_user_message(
|
46 |
+
[
|
47 |
+
{"type": "text", "text": self.task_instruction},
|
48 |
+
{
|
49 |
+
"type": "image_url",
|
50 |
+
"image_url": f"data:image/jpeg;base64,{image}",
|
51 |
+
},
|
52 |
+
]
|
53 |
+
)
|
54 |
+
|
55 |
+
response = self.gpt4v_requst()
|
56 |
+
description = response.choices[0].message.content
|
57 |
+
return description # type: ignore[no-any-return] # FIX ME
|
58 |
+
|
59 |
+
|
60 |
+
if __name__ == "__main__":
|
61 |
+
processor = BaseProcessor("ocr_processor") # type: ignore[no-untyped-call] # FIX ME
|
62 |
+
image_path = "../ctmai-test1.png"
|
63 |
+
summary: str = processor.ask_info(query=None, image_path=image_path) # type: ignore[no-untyped-call] # FIX ME
|
64 |
+
print(summary)
|
ctm/processors/processor_gpt4v_cloth_fashion.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from ctm.processors.processor_gpt4v import GPT4VProcessor
|
2 |
+
|
3 |
+
|
4 |
+
@GPT4VProcessor.register_processor("gpt4v_cloth_fashion_processor") # type: ignore[no-untyped-call] # FIX ME
|
5 |
+
class GPT4VClothFashionProcessor(GPT4VProcessor):
|
6 |
+
def __init__(self, *args, **kwargs): # type: ignore[no-untyped-def] # FIX ME
|
7 |
+
self.init_processor() # type: ignore[no-untyped-call] # FIX ME
|
8 |
+
self.task_instruction = "Focus on the cloth of people in the image, describe the style of the cloth fashion. If there is no people detected, please answer with None."
|
9 |
+
|
10 |
+
|
11 |
+
if __name__ == "__main__":
|
12 |
+
processor = GPT4VProcessor("close_fashion_processor") # type: ignore[no-untyped-call] # FIX ME
|
13 |
+
image_path = "../ctmai-test1.png"
|
14 |
+
summary: str = processor.ask_info(query=None, image_path=image_path) # type: ignore[arg-type] # FIX ME
|
15 |
+
print(summary)
|
ctm/processors/processor_gpt4v_face_emotion.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from ctm.processors.processor_gpt4v import GPT4VProcessor
|
2 |
+
|
3 |
+
|
4 |
+
@GPT4VProcessor.register_processor("gpt4v_face_emotion_processor") # type: ignore[no-untyped-call] # FIX ME
|
5 |
+
class GPT4VFaceEmotionProcessor(GPT4VProcessor):
|
6 |
+
def __init__(self, *args, **kwargs): # type: ignore[no-untyped-def] # FIX ME
|
7 |
+
self.init_processor() # type: ignore[no-untyped-call] # FIX ME
|
8 |
+
self.task_instruction = "Besides the main scene in the image, can you describe the face emotion that is on people's faces within this picture?"
|
9 |
+
|
10 |
+
|
11 |
+
if __name__ == "__main__":
|
12 |
+
processor = GPT4VProcessor("face_emotion_processor") # type: ignore[no-untyped-call] # FIX ME
|
13 |
+
image_path = "../ctmai-test1.png"
|
14 |
+
summary: str = processor.ask_info(query=None, image_path=image_path) # type: ignore[arg-type] # FIX ME
|
15 |
+
print(summary)
|
ctm/processors/processor_gpt4v_ocr.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from ctm.processors.processor_gpt4v import GPT4VProcessor
|
2 |
+
|
3 |
+
|
4 |
+
@GPT4VProcessor.register_processor("gpt4v_ocr_processor") # type: ignore[no-untyped-call] # FIX ME
|
5 |
+
class GPT4VOCRProcessor(GPT4VProcessor):
|
6 |
+
def __init__(self, *args, **kwargs): # type: ignore[no-untyped-def] # FIX ME
|
7 |
+
self.init_processor() # type: ignore[no-untyped-call] # FIX ME
|
8 |
+
self.task_instruction = "You should act like an OCR model. Please extract the text from the image. If there is no text detected, please answer with None."
|
9 |
+
|
10 |
+
|
11 |
+
if __name__ == "__main__":
|
12 |
+
processor = GPT4VProcessor("ocr_processor") # type: ignore[no-untyped-call] # FIX ME
|
13 |
+
image_path = "../ctmai-test1.png"
|
14 |
+
summary: str = processor.ask_info(query=None, image_path=image_path) # type: ignore[arg-type] # FIX ME
|
15 |
+
print(summary)
|
ctm/processors/processor_gpt4v_posture.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from ctm.processors.processor_gpt4v import GPT4VProcessor
|
2 |
+
|
3 |
+
|
4 |
+
@GPT4VProcessor.register_processor("gpt4v_posture_processor") # type: ignore[no-untyped-call] # FIX ME
|
5 |
+
class GPT4VPostureProcessor(GPT4VProcessor):
|
6 |
+
def __init__(self, *args, **kwargs): # type: ignore[no-untyped-def] # FIX ME
|
7 |
+
self.init_processor() # type: ignore[no-untyped-call] # FIX ME
|
8 |
+
self.task_instruction = "Besides the main scene in the image, can you describe the posture that is going on within this picture?"
|
9 |
+
|
10 |
+
|
11 |
+
if __name__ == "__main__":
|
12 |
+
processor = GPT4VProcessor("posture_processor") # type: ignore[no-untyped-call] # FIX ME
|
13 |
+
image_path = "../ctmai-test1.png"
|
14 |
+
summary: str = processor.ask_info(query=None, image_path=image_path) # type: ignore[arg-type] # FIX ME
|
15 |
+
print(summary)
|
ctm/processors/processor_gpt4v_scene_location.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from ctm.processors.processor_gpt4v import GPT4VProcessor
|
2 |
+
|
3 |
+
|
4 |
+
@GPT4VProcessor.register_processor("gpt4v_scene_location_processor") # type: ignore[no-untyped-call] # FIX ME
|
5 |
+
class GPT4VSceneLocationProcessor(GPT4VProcessor):
|
6 |
+
def __init__(self, *args, **kwargs): # type: ignore[no-untyped-def] # FIX ME
|
7 |
+
self.init_processor() # type: ignore[no-untyped-call] # FIX ME
|
8 |
+
self.task_instruction = "Besides the main activity in the image, can you describe the potential location or the event that is going on within this picture?"
|
9 |
+
|
10 |
+
|
11 |
+
if __name__ == "__main__":
|
12 |
+
processor = GPT4VProcessor("scene_location_processor") # type: ignore[no-untyped-call] # FIX ME
|
13 |
+
image_path = "../ctmai-test1.png"
|
14 |
+
summary: str = processor.ask_info(query=None, image_path=image_path) # type: ignore[arg-type] # FIX ME
|
15 |
+
print(summary)
|
ctm/processors/processor_roberta_text_sentiment.py
ADDED
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
from huggingface_hub.inference_api import (
|
4 |
+
InferenceApi, # type: ignore[import] # FIX ME
|
5 |
+
)
|
6 |
+
|
7 |
+
from ctm.messengers.messenger_base import BaseMessenger
|
8 |
+
from ctm.processors.processor_base import BaseProcessor
|
9 |
+
|
10 |
+
|
11 |
+
@BaseProcessor.register_processor("roberta_text_sentiment_processor") # type: ignore[no-untyped-call] # FIX ME
|
12 |
+
class RobertaTextSentimentProcessor(BaseProcessor):
|
13 |
+
def __init__(self, *args, **kwargs): # type: ignore[no-untyped-def] # FIX ME
|
14 |
+
self.init_processor() # type: ignore[no-untyped-call] # FIX ME
|
15 |
+
|
16 |
+
def init_processor(self): # type: ignore[no-untyped-def] # FIX ME
|
17 |
+
self.model = InferenceApi(
|
18 |
+
token=os.environ["HF_TOKEN"],
|
19 |
+
repo_id="cardiffnlp/twitter-roberta-base-sentiment-latest",
|
20 |
+
)
|
21 |
+
self.messenger = BaseMessenger("roberta_text_sentiment_messenger") # type: ignore[no-untyped-call] # FIX ME
|
22 |
+
return
|
23 |
+
|
24 |
+
def update_info(self, feedback: str): # type: ignore[no-untyped-def] # FIX ME
|
25 |
+
self.messenger.add_assistant_message(feedback)
|
26 |
+
|
27 |
+
def ask_info( # type: ignore[override] # FIX ME
|
28 |
+
self,
|
29 |
+
query: str,
|
30 |
+
context: str = None, # type: ignore[assignment] # FIX ME
|
31 |
+
image_path: str = None, # type: ignore[assignment] # FIX ME
|
32 |
+
audio_path: str = None, # type: ignore[assignment] # FIX ME
|
33 |
+
video_path: str = None, # type: ignore[assignment] # FIX ME
|
34 |
+
) -> str:
|
35 |
+
if self.messenger.check_iter_round_num() == 0: # type: ignore[no-untyped-call] # FIX ME
|
36 |
+
self.messenger.add_user_message(context)
|
37 |
+
|
38 |
+
response = self.model(self.messenger.get_messages()) # type: ignore[no-untyped-call] # FIX ME
|
39 |
+
results = response[0]
|
40 |
+
# choose the label with the highest score
|
41 |
+
pos_score = 0
|
42 |
+
neg_score = 0
|
43 |
+
neutral_score = 0
|
44 |
+
for result in results:
|
45 |
+
if result["label"] == "POSITIVE":
|
46 |
+
pos_score = result["score"]
|
47 |
+
elif result["label"] == "NEGATIVE":
|
48 |
+
neg_score = result["score"]
|
49 |
+
else:
|
50 |
+
neutral_score = result["score"]
|
51 |
+
if max(pos_score, neg_score, neutral_score) == pos_score:
|
52 |
+
return "This text is positive."
|
53 |
+
elif max(pos_score, neg_score, neutral_score) == neg_score:
|
54 |
+
return "This text is negative."
|
55 |
+
else:
|
56 |
+
return "This text is neutral."
|
57 |
+
|
58 |
+
|
59 |
+
if __name__ == "__main__":
|
60 |
+
processor = BaseProcessor("roberta_text_sentiment_processor") # type: ignore[no-untyped-call] # FIX ME
|
61 |
+
image_path = "../ctmai-test1.png"
|
62 |
+
text: str = (
|
63 |
+
"In a shocking turn of events, Hugging Face has released a new version of Transformers "
|
64 |
+
"that brings several enhancements and bug fixes. Users are thrilled with the improvements "
|
65 |
+
"and are finding the new version to be significantly better than the previous one. "
|
66 |
+
"The Hugging Face team is thankful for the community's support and continues to work "
|
67 |
+
"towards making the library the best it can be."
|
68 |
+
)
|
69 |
+
label = processor.ask_info(query=None, context=text, image_path=image_path) # type: ignore[no-untyped-call] # FIX ME
|
70 |
+
print(label)
|
ctm/supervisors/__init__.py
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .supervisor_gpt4 import GPT4Supervisior
|
2 |
+
|
3 |
+
__all__ = [
|
4 |
+
"GPT4Supervisior",
|
5 |
+
]
|
ctm/supervisors/supervisor_base.py
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import base64
|
2 |
+
|
3 |
+
|
4 |
+
class BaseSupervisor(object):
|
5 |
+
_supervisor_registry = {}
|
6 |
+
|
7 |
+
@classmethod
|
8 |
+
def register_supervisor(cls, supervisor_name):
|
9 |
+
def decorator(subclass):
|
10 |
+
cls._supervisor_registry[supervisor_name] = subclass
|
11 |
+
return subclass
|
12 |
+
|
13 |
+
return decorator
|
14 |
+
|
15 |
+
def __new__(cls, supervisor_name, *args, **kwargs):
|
16 |
+
if supervisor_name not in cls._supervisor_registry:
|
17 |
+
raise ValueError(
|
18 |
+
f"No supervisor registered with name '{supervisor_name}'"
|
19 |
+
)
|
20 |
+
return super(BaseSupervisor, cls).__new__(
|
21 |
+
cls._supervisor_registry[supervisor_name]
|
22 |
+
)
|
23 |
+
|
24 |
+
def set_model(self):
|
25 |
+
raise NotImplementedError(
|
26 |
+
"The 'set_model' method must be implemented in derived classes."
|
27 |
+
)
|
28 |
+
|
29 |
+
@staticmethod
|
30 |
+
def process_image(image_path):
|
31 |
+
with open(image_path, "rb") as image_file:
|
32 |
+
return base64.b64encode(image_file.read()).decode("utf-8")
|
33 |
+
|
34 |
+
@staticmethod
|
35 |
+
def process_audio(audio_path):
|
36 |
+
return None
|
37 |
+
|
38 |
+
@staticmethod
|
39 |
+
def process_video(video_path):
|
40 |
+
return None
|
41 |
+
|
42 |
+
def ask(self, query, image_path):
|
43 |
+
gist = self.ask_info(query, image_path)
|
44 |
+
score = self.ask_score(query, gist, verbose=True)
|
45 |
+
return gist, score
|
46 |
+
|
47 |
+
def ask_info(self, query: str, context: str = None) -> str:
|
48 |
+
return None
|
49 |
+
|
50 |
+
def ask_score(self, query: str, gist: str, verbose: bool = False) -> float:
|
51 |
+
return None
|
ctm/supervisors/supervisor_gpt4.py
ADDED
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from openai import OpenAI
|
2 |
+
|
3 |
+
from ctm.supervisors.supervisor_base import BaseSupervisor
|
4 |
+
from ctm.utils.exponential_backoff import exponential_backoff
|
5 |
+
|
6 |
+
|
7 |
+
@BaseSupervisor.register_supervisor("gpt4_supervisor")
|
8 |
+
class GPT4Supervisior(BaseSupervisor):
|
9 |
+
def __init__(self, *args, **kwargs):
|
10 |
+
self.init_supervisor()
|
11 |
+
|
12 |
+
def init_supervisor(self):
|
13 |
+
self.model = OpenAI()
|
14 |
+
|
15 |
+
@exponential_backoff(retries=5, base_wait_time=1)
|
16 |
+
def ask_info(self, query: str, context: str = None) -> str:
|
17 |
+
prompt = [
|
18 |
+
{
|
19 |
+
"role": "user",
|
20 |
+
"content": f"The following is detailed information on the topic: {context}. Based on this information, answer the question: {query}. Answer with a few words:",
|
21 |
+
}
|
22 |
+
]
|
23 |
+
responses = self.model.chat.completions.create(
|
24 |
+
model="gpt-4-turbo-preview", messages=prompt, max_tokens=300, n=1
|
25 |
+
)
|
26 |
+
answer = responses.choices[0].message.content
|
27 |
+
return answer
|
28 |
+
|
29 |
+
def ask_score(self, query, gist, verbose=False, *args, **kwargs):
|
30 |
+
max_attempts = 5
|
31 |
+
for attempt in range(max_attempts):
|
32 |
+
try:
|
33 |
+
response = self.model.chat.completions.create(
|
34 |
+
model="gpt-4-0125-preview",
|
35 |
+
messages=[
|
36 |
+
{
|
37 |
+
"role": "user",
|
38 |
+
"content": "How related is the information ({}) with the query ({})? We want to make sure that the information includes a person's name as the answer. Answer with a number from 0 to 5 and do not add any other thing.".format(
|
39 |
+
gist, query
|
40 |
+
),
|
41 |
+
},
|
42 |
+
],
|
43 |
+
max_tokens=50,
|
44 |
+
)
|
45 |
+
score = int(response.choices[0].message.content.strip()) / 5
|
46 |
+
return score
|
47 |
+
except Exception as e:
|
48 |
+
print(f"Attempt {attempt + 1} failed: {e}")
|
49 |
+
if attempt < max_attempts - 1:
|
50 |
+
print("Retrying...")
|
51 |
+
else:
|
52 |
+
print("Max attempts reached. Returning default score.")
|
53 |
+
return 0
|
54 |
+
|
55 |
+
|
56 |
+
if __name__ == "__main__":
|
57 |
+
supervisor = BaseSupervisor("cloth_fashion_supervisor")
|
58 |
+
image_path = "../ctmai-test1.png"
|
59 |
+
summary: str = supervisor.ask_info(query=None, image_path=image_path)
|
60 |
+
print(summary)
|
ctm/utils/__init__.py
ADDED
File without changes
|
ctm/utils/exponential_backoff.py
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import math
|
2 |
+
import time
|
3 |
+
from functools import wraps
|
4 |
+
|
5 |
+
|
6 |
+
def exponential_backoff(retries=5, base_wait_time=1): # type: ignore[no-untyped-def] # FIX ME
|
7 |
+
"""
|
8 |
+
Decorator for applying exponential backoff to a function.
|
9 |
+
:param retries: Maximum number of retries.
|
10 |
+
:param base_wait_time: Base wait time in seconds for the exponential backoff.
|
11 |
+
"""
|
12 |
+
|
13 |
+
def decorator(func): # type: ignore[no-untyped-def] # FIX ME
|
14 |
+
@wraps(func)
|
15 |
+
def wrapper(*args, **kwargs): # type: ignore[no-untyped-def] # FIX ME
|
16 |
+
attempts = 0
|
17 |
+
while attempts < retries:
|
18 |
+
try:
|
19 |
+
return func(*args, **kwargs)
|
20 |
+
except Exception as e:
|
21 |
+
wait_time = base_wait_time * (2**attempts)
|
22 |
+
print(f"Attempt {attempts + 1} failed: {e}")
|
23 |
+
print(f"Waiting {wait_time} seconds before retrying...")
|
24 |
+
time.sleep(wait_time)
|
25 |
+
attempts += 1
|
26 |
+
print(
|
27 |
+
f"Failed to execute '{func.__name__}' after {retries} retries."
|
28 |
+
)
|
29 |
+
return None
|
30 |
+
|
31 |
+
return wrapper
|
32 |
+
|
33 |
+
return decorator
|