File size: 1,933 Bytes
81c4f13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8396f74
 
81c4f13
 
 
 
8396f74
81c4f13
 
 
 
 
 
 
 
 
8396f74
81c4f13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import os
import torch
from PIL import Image
from transformers import TextIteratorStreamer, AutoTokenizer, AutoModelForCausalLM

from src.utils.singleton import Singleton
from src.modules.config import Config, config as ConfigObj
from src.modules.logger import logger as Logger


class ModelLoader:
    __metaclass__ = Singleton

    def __init__(self, conf: Config):
        #self._model = None
        #return  # TODO remove this line

        #print("Not implemented !!!")
        print(conf.model_name)

        #revision = "2024-03-04" - verry slow
        #revision = "main" #does not work
        #revision = "2024-05-08" not work
        revision = "2024-08-26"
        if conf.gpu_mode:
            self._model = AutoModelForCausalLM.from_pretrained(
                conf.model_name, trust_remote_code=True, revision=revision,
                torch_dtype=torch.bfloat16, cache_dir=conf.models_cache_dir,
                device_map={"": "cuda"}, attn_implementation="flash_attention_2"
            ).to("cuda")
        else:
            self._model = AutoModelForCausalLM.from_pretrained(
                conf.model_name, trust_remote_code=True, revision=revision,
                cache_dir=conf.models_cache_dir,
            )
        self._tokenizer = AutoTokenizer.from_pretrained(
            conf.model_name, revision=revision, cache_dir=conf.models_cache_dir
        )
        self._model.eval()

    def image_describe(self, image_path):
        image = Image.open(image_path)
        enc_image = self._model.encode_image(image)
        return self._model.answer_question(enc_image, "Describe this image.", self._tokenizer)

    def image_ask(self, image_path, question):
        image = Image.open(image_path)
        enc_image = self._model.encode_image(image)
        return self._model.answer_question(
            enc_image, question,
            self._tokenizer
        )


model_loader = ModelLoader(ConfigObj)