File size: 2,636 Bytes
084fe8e
 
acb3380
 
 
 
084fe8e
acb3380
 
084fe8e
 
acb3380
084fe8e
 
 
 
 
 
 
 
acb3380
084fe8e
 
 
 
acb3380
084fe8e
 
acb3380
084fe8e
acb3380
 
084fe8e
 
 
acb3380
084fe8e
acb3380
 
084fe8e
 
acb3380
084fe8e
acb3380
 
084fe8e
 
 
 
 
acb3380
084fe8e
 
 
 
 
 
 
 
 
 
acb3380
 
 
084fe8e
 
 
acb3380
084fe8e
 
acb3380
 
 
084fe8e
acb3380
084fe8e
 
 
acb3380
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
from typing import Any, Dict, List, Optional, Union

from openai import OpenAI

from ctm.messengers.messenger_base import BaseMessenger
from ctm.processors.processor_base import BaseProcessor
from ctm.utils.decorator import info_exponential_backoff


# Ensure that BaseProcessor has a properly typed register_processor method:
@BaseProcessor.register_processor("gpt4v_processor")
class GPT4VProcessor(BaseProcessor):
    def __init__(self, *args: Any, **kwargs: Any) -> None:
        super().__init__(*args, **kwargs)  # Properly initialize the base class

    def init_executor(self) -> None:
        self.executor = OpenAI()

    def init_messenger(self) -> None:
        self.messenger = BaseMessenger("gpt4v_messenger")

    def init_task_info(self) -> None:
        raise NotImplementedError(
            "The 'init_task_info' method must be implemented in derived classes."
        )

    def process(self, payload: Dict[str, Any]) -> Dict[str, Any]:
        return {}  # Return an empty dict or a meaningful response as required

    def update_info(self, feedback: str) -> None:
        self.messenger.add_assistant_message(feedback)

    @info_exponential_backoff(retries=5, base_wait_time=1)
    def gpt4v_request(self) -> str | Any:
        response = self.executor.chat.completions.create(
            model="gpt-4-vision-preview",
            messages=self.messenger.get_messages(),
            max_tokens=300,
        )
        description = response.choices[0].message.content
        return description

    def ask_info(
        self,
        query: str,
        text: Optional[str] = None,
        image: Optional[str] = None,
        video_frames: Optional[str] = None,
        *args: Any,
        **kwargs: Any,
    ) -> str:
        if self.messenger.check_iter_round_num() == 0:
            messages: List[Dict[str, Union[str, Dict[str, str]]]] = [
                {
                    "type": "text",
                    "text": self.task_instruction
                    or "No instruction provided.",
                },
            ]
            if image:
                messages.append(
                    {
                        "type": "image_url",
                        "image_url": f"data:image/jpeg;base64,{image}",
                    }
                )
            self.messenger.add_user_message(messages)

        description = self.gpt4v_request()
        return description


if __name__ == "__main__":
    processor = GPT4VProcessor()
    image_path = "../ctmai-test1.png"
    summary: str = processor.ask_info(
        query="Describe the image.", image=image_path
    )
    print(summary)