# Copyright (c) 2023-2024 DeepSeek. # # Permission is hereby granted, free of charge, to any person obtaining a copy of # this software and associated documentation files (the "Software"), to deal in # the Software without restriction, including without limitation the rights to # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of # the Software, and to permit persons to whom the Software is furnished to do so, # subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. import json from typing import Dict, List import PIL.Image import torch from transformers import AutoModelForCausalLM from deepseek_vl.models import MultiModalityCausalLM, VLChatProcessor def load_pretrained_model(model_path: str): vl_chat_processor: VLChatProcessor = VLChatProcessor.from_pretrained(model_path) tokenizer = vl_chat_processor.tokenizer vl_gpt: MultiModalityCausalLM = AutoModelForCausalLM.from_pretrained( model_path, trust_remote_code=True ) vl_gpt = vl_gpt.to(torch.bfloat16).cuda().eval() return tokenizer, vl_chat_processor, vl_gpt def load_pil_images(conversations: List[Dict[str, str]]) -> List[PIL.Image.Image]: """ Args: conversations (List[Dict[str, str]]): the conversations with a list of messages. An example is : [ { "role": "User", "content": "\nExtract all information from this image and convert them into markdown format.", "images": ["./examples/table_datasets.png"] }, {"role": "Assistant", "content": ""}, ] Returns: pil_images (List[PIL.Image.Image]): the list of PIL images. """ pil_images = [] for message in conversations: if "images" not in message: continue for image_path in message["images"]: pil_img = PIL.Image.open(image_path) pil_img = pil_img.convert("RGB") pil_images.append(pil_img) return pil_images def load_json(filepath): with open(filepath, "r") as f: data = json.load(f) return data