zhanghaoji commited on
Commit
0975142
1 Parent(s): c2cad82
flash_vstream/model/language_model/vstream_llama.py CHANGED
@@ -66,6 +66,7 @@ class VStreamLlamaForCausalLM(VStreamMetaForCausalLM, LlamaForCausalLM):
66
  images: Optional[torch.FloatTensor] = None,
67
  features: Optional[torch.FloatTensor] = None,
68
  return_dict: Optional[bool] = None,
 
69
  ) -> Union[Tuple, CausalLMOutputWithPast]:
70
  if inputs_embeds is None:
71
  if self.use_video_streaming_mode:
 
66
  images: Optional[torch.FloatTensor] = None,
67
  features: Optional[torch.FloatTensor] = None,
68
  return_dict: Optional[bool] = None,
69
+ cache_position=None,
70
  ) -> Union[Tuple, CausalLMOutputWithPast]:
71
  if inputs_embeds is None:
72
  if self.use_video_streaming_mode:
requirements.txt CHANGED
@@ -5,7 +5,7 @@ numpy
5
  torch
6
  torchvision
7
  tokenizers
8
- transformers==4.37.2
9
  accelerate
10
  bitsandbytes
11
  scikit-learn
 
5
  torch
6
  torchvision
7
  tokenizers
8
+ transformers==4.31.0
9
  accelerate
10
  bitsandbytes
11
  scikit-learn