Visual Question Answering
Transformers
TensorBoard
Safetensors
internvl_chat
feature-extraction
custom_code
czczup commited on
Commit
1352871
1 Parent(s): 40b10d5

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. modeling_internvl_chat.py +6 -11
modeling_internvl_chat.py CHANGED
@@ -26,7 +26,7 @@ logger = logging.get_logger(__name__)
26
  class InternVLChatModel(PreTrainedModel):
27
  config_class = InternVLChatConfig
28
  main_input_name = 'pixel_values'
29
- _no_split_modules = ['InternVisionEncoderLayer', 'LlamaDecoderLayer']
30
 
31
  def __init__(self, config: InternVLChatConfig, vision_model=None, language_model=None):
32
  super().__init__(config)
@@ -237,10 +237,6 @@ class InternVLChatModel(PreTrainedModel):
237
  raise NotImplementedError
238
  img_context_token_id = tokenizer.convert_tokens_to_ids(IMG_CONTEXT_TOKEN)
239
  self.img_context_token_id = img_context_token_id
240
- if tokenizer.convert_tokens_to_ids('<|im_end|>') != 0:
241
- eos_token_id = tokenizer.convert_tokens_to_ids('<|im_end|>') # 92542, InternLM2
242
- else:
243
- eos_token_id = tokenizer.eos_token_id
244
 
245
  from .conversation import get_conv_template
246
 
@@ -259,6 +255,7 @@ class InternVLChatModel(PreTrainedModel):
259
  model_inputs = tokenizer(queries, return_tensors='pt', padding=True)
260
  input_ids = model_inputs['input_ids'].cuda()
261
  attention_mask = model_inputs['attention_mask'].cuda()
 
262
  generation_config['eos_token_id'] = eos_token_id
263
 
264
  generation_output = self.generate(
@@ -268,7 +265,7 @@ class InternVLChatModel(PreTrainedModel):
268
  **generation_config
269
  )
270
  responses = tokenizer.batch_decode(generation_output, skip_special_tokens=True)
271
- responses = [response.split('<|im_end|>')[0].strip() for response in responses] # for InternLM2
272
  return responses
273
 
274
  def chat(self, tokenizer, pixel_values, question, generation_config, history=None, return_history=False,
@@ -276,10 +273,6 @@ class InternVLChatModel(PreTrainedModel):
276
 
277
  img_context_token_id = tokenizer.convert_tokens_to_ids(IMG_CONTEXT_TOKEN)
278
  self.img_context_token_id = img_context_token_id
279
- if tokenizer.convert_tokens_to_ids('<|im_end|>') != 0:
280
- eos_token_id = tokenizer.convert_tokens_to_ids('<|im_end|>') # 92542, InternLM2
281
- else:
282
- eos_token_id = tokenizer.eos_token_id
283
 
284
  from .conversation import get_conv_template
285
 
@@ -300,7 +293,9 @@ class InternVLChatModel(PreTrainedModel):
300
  model_inputs = tokenizer(query, return_tensors='pt')
301
  input_ids = model_inputs['input_ids'].cuda()
302
  attention_mask = model_inputs['attention_mask'].cuda()
 
303
  generation_config['eos_token_id'] = eos_token_id
 
304
  generation_output = self.generate(
305
  pixel_values=pixel_values,
306
  input_ids=input_ids,
@@ -308,7 +303,7 @@ class InternVLChatModel(PreTrainedModel):
308
  **generation_config
309
  )
310
  response = tokenizer.batch_decode(generation_output, skip_special_tokens=True)[0]
311
- response = response.split('<|im_end|>')[0].strip() # for InternLM2
312
  history.append((question, response))
313
  if return_history:
314
  return response, history
 
26
  class InternVLChatModel(PreTrainedModel):
27
  config_class = InternVLChatConfig
28
  main_input_name = 'pixel_values'
29
+ _no_split_modules = ['InternVisionEncoderLayer', 'LlamaDecoderLayer', 'InternLM2DecoderLayer']
30
 
31
  def __init__(self, config: InternVLChatConfig, vision_model=None, language_model=None):
32
  super().__init__(config)
 
237
  raise NotImplementedError
238
  img_context_token_id = tokenizer.convert_tokens_to_ids(IMG_CONTEXT_TOKEN)
239
  self.img_context_token_id = img_context_token_id
 
 
 
 
240
 
241
  from .conversation import get_conv_template
242
 
 
255
  model_inputs = tokenizer(queries, return_tensors='pt', padding=True)
256
  input_ids = model_inputs['input_ids'].cuda()
257
  attention_mask = model_inputs['attention_mask'].cuda()
258
+ eos_token_id = tokenizer.convert_tokens_to_ids(template.sep)
259
  generation_config['eos_token_id'] = eos_token_id
260
 
261
  generation_output = self.generate(
 
265
  **generation_config
266
  )
267
  responses = tokenizer.batch_decode(generation_output, skip_special_tokens=True)
268
+ responses = [response.split(template.sep)[0].strip() for response in responses]
269
  return responses
270
 
271
  def chat(self, tokenizer, pixel_values, question, generation_config, history=None, return_history=False,
 
273
 
274
  img_context_token_id = tokenizer.convert_tokens_to_ids(IMG_CONTEXT_TOKEN)
275
  self.img_context_token_id = img_context_token_id
 
 
 
 
276
 
277
  from .conversation import get_conv_template
278
 
 
293
  model_inputs = tokenizer(query, return_tensors='pt')
294
  input_ids = model_inputs['input_ids'].cuda()
295
  attention_mask = model_inputs['attention_mask'].cuda()
296
+ eos_token_id = tokenizer.convert_tokens_to_ids(template.sep)
297
  generation_config['eos_token_id'] = eos_token_id
298
+
299
  generation_output = self.generate(
300
  pixel_values=pixel_values,
301
  input_ids=input_ids,
 
303
  **generation_config
304
  )
305
  response = tokenizer.batch_decode(generation_output, skip_special_tokens=True)[0]
306
+ response = response.split(template.sep)[0].strip()
307
  history.append((question, response))
308
  if return_history:
309
  return response, history