C98yhou079 commited on
Commit
51a5a00
·
verified ·
1 Parent(s): 46bd772

Create tinyllava/model.py

Browse files
Files changed (1) hide show
  1. tinyllava/model.py +35 -0
tinyllava/model.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # minimal loader that uses transformers to load a multimodal model if available.
2
+ # This is a thin adapter: it expects model checkpoints on HF that are compatible with transformers.auto.modeling.
3
+ # For TinyLLaVA upstream functionality, replace with full repo.
4
+
5
+ from transformers import AutoTokenizer, AutoModelForCausalLM, AutoProcessor
6
+ import torch
7
+
8
+ def load_pretrained_model(model_path: str, model_base=None, model_name: str=None):
9
+ """
10
+ Minimal loader:
11
+ - tokenizer: AutoTokenizer.from_pretrained(model_path)
12
+ - model: AutoModelForCausalLM.from_pretrained(model_path, device_map="auto" if cuda else None)
13
+ - image_processor: AutoProcessor.from_pretrained(model_path) or AutoProcessor from a known vision model
14
+ Returns: tokenizer, model, image_processor, context_len
15
+ """
16
+ if model_name is None:
17
+ model_name = model_path.split("/")[-1]
18
+ # Load tokenizer
19
+ tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=True)
20
+
21
+ # Try to load an image processor / processor; fallback to using a BLIP processor if available
22
+ try:
23
+ image_processor = AutoProcessor.from_pretrained(model_path)
24
+ except Exception:
25
+ # fallback: try a common image processor (BLIP)
26
+ try:
27
+ image_processor = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
28
+ except Exception:
29
+ image_processor = None
30
+
31
+ # Load causal LM
32
+ model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.float32, low_cpu_mem_usage=True)
33
+ # context length: use tokenizer model_max_length if available
34
+ context_len = getattr(tokenizer, "model_max_length", 2048)
35
+ return tokenizer, model, image_processor, context_len