File size: 1,933 Bytes
81c4f13 8396f74 81c4f13 8396f74 81c4f13 8396f74 81c4f13 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 | import os
import torch
from PIL import Image
from transformers import TextIteratorStreamer, AutoTokenizer, AutoModelForCausalLM
from src.utils.singleton import Singleton
from src.modules.config import Config, config as ConfigObj
from src.modules.logger import logger as Logger
class ModelLoader:
__metaclass__ = Singleton
def __init__(self, conf: Config):
#self._model = None
#return # TODO remove this line
#print("Not implemented !!!")
print(conf.model_name)
#revision = "2024-03-04" - verry slow
#revision = "main" #does not work
#revision = "2024-05-08" not work
revision = "2024-08-26"
if conf.gpu_mode:
self._model = AutoModelForCausalLM.from_pretrained(
conf.model_name, trust_remote_code=True, revision=revision,
torch_dtype=torch.bfloat16, cache_dir=conf.models_cache_dir,
device_map={"": "cuda"}, attn_implementation="flash_attention_2"
).to("cuda")
else:
self._model = AutoModelForCausalLM.from_pretrained(
conf.model_name, trust_remote_code=True, revision=revision,
cache_dir=conf.models_cache_dir,
)
self._tokenizer = AutoTokenizer.from_pretrained(
conf.model_name, revision=revision, cache_dir=conf.models_cache_dir
)
self._model.eval()
def image_describe(self, image_path):
image = Image.open(image_path)
enc_image = self._model.encode_image(image)
return self._model.answer_question(enc_image, "Describe this image.", self._tokenizer)
def image_ask(self, image_path, question):
image = Image.open(image_path)
enc_image = self._model.encode_image(image)
return self._model.answer_question(
enc_image, question,
self._tokenizer
)
model_loader = ModelLoader(ConfigObj)
|