czczup commited on
Commit
594f7d1
1 Parent(s): 9a60327

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. README.md +10 -0
  2. modeling_internvl_chat.py +2 -1
README.md CHANGED
@@ -112,6 +112,7 @@ model = AutoModel.from_pretrained(
112
  path,
113
  torch_dtype=torch.bfloat16,
114
  low_cpu_mem_usage=True,
 
115
  trust_remote_code=True).eval().cuda()
116
  ```
117
 
@@ -126,6 +127,7 @@ model = AutoModel.from_pretrained(
126
  torch_dtype=torch.bfloat16,
127
  load_in_8bit=True,
128
  low_cpu_mem_usage=True,
 
129
  trust_remote_code=True).eval()
130
  ```
131
 
@@ -172,6 +174,7 @@ model = AutoModel.from_pretrained(
172
  path,
173
  torch_dtype=torch.bfloat16,
174
  low_cpu_mem_usage=True,
 
175
  trust_remote_code=True,
176
  device_map=device_map).eval()
177
  ```
@@ -189,6 +192,7 @@ model = AutoModel.from_pretrained(
189
  path,
190
  torch_dtype=torch.bfloat16,
191
  low_cpu_mem_usage=True,
 
192
  trust_remote_code=True).eval().cuda()
193
  tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
194
 
@@ -216,6 +220,7 @@ model = AutoModel.from_pretrained(
216
  path,
217
  torch_dtype=torch.bfloat16,
218
  low_cpu_mem_usage=True,
 
219
  trust_remote_code=True).eval().cuda()
220
  tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
221
 
@@ -242,6 +247,7 @@ model = AutoModel.from_pretrained(
242
  path,
243
  torch_dtype=torch.bfloat16,
244
  low_cpu_mem_usage=True,
 
245
  trust_remote_code=True).eval().cuda()
246
  tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
247
 
@@ -275,6 +281,7 @@ model = AutoModel.from_pretrained(
275
  path,
276
  torch_dtype=torch.bfloat16,
277
  low_cpu_mem_usage=True,
 
278
  trust_remote_code=True).eval().cuda()
279
  tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
280
 
@@ -313,6 +320,7 @@ model = AutoModel.from_pretrained(
313
  path,
314
  torch_dtype=torch.bfloat16,
315
  low_cpu_mem_usage=True,
 
316
  trust_remote_code=True).eval().cuda()
317
  tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
318
 
@@ -350,6 +358,7 @@ model = AutoModel.from_pretrained(
350
  path,
351
  torch_dtype=torch.bfloat16,
352
  low_cpu_mem_usage=True,
 
353
  trust_remote_code=True).eval().cuda()
354
  tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
355
 
@@ -420,6 +429,7 @@ model = AutoModel.from_pretrained(
420
  path,
421
  torch_dtype=torch.bfloat16,
422
  low_cpu_mem_usage=True,
 
423
  trust_remote_code=True).eval().cuda()
424
  tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
425
 
 
112
  path,
113
  torch_dtype=torch.bfloat16,
114
  low_cpu_mem_usage=True,
115
+ use_flash_attn=True,
116
  trust_remote_code=True).eval().cuda()
117
  ```
118
 
 
127
  torch_dtype=torch.bfloat16,
128
  load_in_8bit=True,
129
  low_cpu_mem_usage=True,
130
+ use_flash_attn=True,
131
  trust_remote_code=True).eval()
132
  ```
133
 
 
174
  path,
175
  torch_dtype=torch.bfloat16,
176
  low_cpu_mem_usage=True,
177
+ use_flash_attn=True,
178
  trust_remote_code=True,
179
  device_map=device_map).eval()
180
  ```
 
192
  path,
193
  torch_dtype=torch.bfloat16,
194
  low_cpu_mem_usage=True,
195
+ use_flash_attn=True,
196
  trust_remote_code=True).eval().cuda()
197
  tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
198
 
 
220
  path,
221
  torch_dtype=torch.bfloat16,
222
  low_cpu_mem_usage=True,
223
+ use_flash_attn=True,
224
  trust_remote_code=True).eval().cuda()
225
  tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
226
 
 
247
  path,
248
  torch_dtype=torch.bfloat16,
249
  low_cpu_mem_usage=True,
250
+ use_flash_attn=True,
251
  trust_remote_code=True).eval().cuda()
252
  tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
253
 
 
281
  path,
282
  torch_dtype=torch.bfloat16,
283
  low_cpu_mem_usage=True,
284
+ use_flash_attn=True,
285
  trust_remote_code=True).eval().cuda()
286
  tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
287
 
 
320
  path,
321
  torch_dtype=torch.bfloat16,
322
  low_cpu_mem_usage=True,
323
+ use_flash_attn=True,
324
  trust_remote_code=True).eval().cuda()
325
  tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
326
 
 
358
  path,
359
  torch_dtype=torch.bfloat16,
360
  low_cpu_mem_usage=True,
361
+ use_flash_attn=True,
362
  trust_remote_code=True).eval().cuda()
363
  tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
364
 
 
429
  path,
430
  torch_dtype=torch.bfloat16,
431
  low_cpu_mem_usage=True,
432
+ use_flash_attn=True,
433
  trust_remote_code=True).eval().cuda()
434
  tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
435
 
modeling_internvl_chat.py CHANGED
@@ -17,7 +17,7 @@ from transformers.utils import ModelOutput, logging
17
 
18
  from .configuration_internvl_chat import InternVLChatConfig
19
  from .conversation import get_conv_template
20
- from .modeling_intern_vit import InternVisionModel
21
 
22
  logger = logging.get_logger(__name__)
23
 
@@ -48,6 +48,7 @@ class InternVLChatModel(PreTrainedModel):
48
  self.num_image_token = int((image_size // patch_size) ** 2 * (config.downsample_ratio ** 2))
49
  self.downsample_ratio = config.downsample_ratio
50
  self.ps_version = config.ps_version
 
51
  config.vision_config.use_flash_attn = True if use_flash_attn else False
52
  config.llm_config._attn_implementation = 'flash_attention_2' if use_flash_attn else 'eager'
53
 
 
17
 
18
  from .configuration_internvl_chat import InternVLChatConfig
19
  from .conversation import get_conv_template
20
+ from .modeling_intern_vit import InternVisionModel, has_flash_attn
21
 
22
  logger = logging.get_logger(__name__)
23
 
 
48
  self.num_image_token = int((image_size // patch_size) ** 2 * (config.downsample_ratio ** 2))
49
  self.downsample_ratio = config.downsample_ratio
50
  self.ps_version = config.ps_version
51
+ use_flash_attn = use_flash_attn if has_flash_attn else False
52
  config.vision_config.use_flash_attn = True if use_flash_attn else False
53
  config.llm_config._attn_implementation = 'flash_attention_2' if use_flash_attn else 'eager'
54