fix import errors
Browse files- README.md +4 -3
- modeling_xgenmm.py +2 -2
- vlm.py +1 -1
README.md
CHANGED
@@ -60,9 +60,10 @@ import json
|
|
60 |
import PIL
|
61 |
import IPython.display as display
|
62 |
import torch
|
63 |
-
|
64 |
-
|
65 |
-
|
|
|
66 |
tokenizer = model.update_special_tokens(tokenizer)
|
67 |
|
68 |
model = model.to('cuda')
|
|
|
60 |
import PIL
|
61 |
import IPython.display as display
|
62 |
import torch
|
63 |
+
model_name_or_path = "Salesforce/xgen-mm-phi3-mini-base-r-v1"
|
64 |
+
model = AutoModelForVision2Seq.from_pretrained(model_name_or_path, trust_remote_code=True)
|
65 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=True, use_fast=True, legacy=False)
|
66 |
+
image_processor = AutoImageProcessor.from_pretrained(model_name_or_path, trust_remote_code=True)
|
67 |
tokenizer = model.update_special_tokens(tokenizer)
|
68 |
|
69 |
model = model.to('cuda')
|
modeling_xgenmm.py
CHANGED
@@ -3,8 +3,8 @@ import torch
|
|
3 |
import open_clip
|
4 |
from typing import List, Optional, Tuple, Union
|
5 |
from utils import check_embedding_fns
|
6 |
-
from vlm import PerceiverResampler, Kosmos
|
7 |
-
from configuration_xgenmm import XGenMMVisionEncoderConfig, XGenMMVisionTokenizerConfig, XGenMMConfig
|
8 |
|
9 |
class XGenMMVisionEncoder(PreTrainedModel):
|
10 |
main_input_name = "pixel_values"
|
|
|
3 |
import open_clip
|
4 |
from typing import List, Optional, Tuple, Union
|
5 |
from utils import check_embedding_fns
|
6 |
+
from .vlm import PerceiverResampler, Kosmos
|
7 |
+
from .configuration_xgenmm import XGenMMVisionEncoderConfig, XGenMMVisionTokenizerConfig, XGenMMConfig
|
8 |
|
9 |
class XGenMMVisionEncoder(PreTrainedModel):
|
10 |
main_input_name = "pixel_values"
|
vlm.py
CHANGED
@@ -11,7 +11,7 @@ from dataclasses import dataclass
|
|
11 |
from transformers import CLIPVisionModel
|
12 |
import transformers
|
13 |
|
14 |
-
from utils import num_params, getattr_recursive, stack_with_padding, get_anyres_image_grid_shape, unpad_image
|
15 |
|
16 |
|
17 |
class VisionTokenizer(nn.Module):
|
|
|
11 |
from transformers import CLIPVisionModel
|
12 |
import transformers
|
13 |
|
14 |
+
from .utils import num_params, getattr_recursive, stack_with_padding, get_anyres_image_grid_shape, unpad_image
|
15 |
|
16 |
|
17 |
class VisionTokenizer(nn.Module):
|