Text Generation
Transformers
Safetensors
English
llava
multimodal
conversational
Eval Results
Inference Endpoints
sjrathi commited on
Commit
652a650
1 Parent(s): 226cce8

Fixed errors in generation code

Browse files

changed max_frames_num to int for comparison and removed self from load_video function definition.

Files changed (1) hide show
  1. README.md +2 -2
README.md CHANGED
@@ -171,7 +171,7 @@ import warnings
171
  from decord import VideoReader, cpu
172
  import numpy as np
173
  warnings.filterwarnings("ignore")
174
- def load_video(self, video_path, max_frames_num,fps=1,force_sample=False):
175
  if max_frames_num == 0:
176
  return np.zeros((1, 336, 336, 3))
177
  vr = VideoReader(video_path, ctx=cpu(0),num_threads=1)
@@ -196,7 +196,7 @@ device_map = "auto"
196
  tokenizer, model, image_processor, max_length = load_pretrained_model(pretrained, None, model_name, torch_dtype="bfloat16", device_map=device_map) # Add any other thing you want to pass in llava_model_args
197
  model.eval()
198
  video_path = "XXXX"
199
- max_frames_num = "64"
200
  video,frame_time,video_time = load_video(video_path, max_frames_num, 1, force_sample=True)
201
  video = image_processor.preprocess(video, return_tensors="pt")["pixel_values"].cuda().bfloat16()
202
  video = [video]
 
171
  from decord import VideoReader, cpu
172
  import numpy as np
173
  warnings.filterwarnings("ignore")
174
+ def load_video(video_path, max_frames_num,fps=1,force_sample=False):
175
  if max_frames_num == 0:
176
  return np.zeros((1, 336, 336, 3))
177
  vr = VideoReader(video_path, ctx=cpu(0),num_threads=1)
 
196
  tokenizer, model, image_processor, max_length = load_pretrained_model(pretrained, None, model_name, torch_dtype="bfloat16", device_map=device_map) # Add any other thing you want to pass in llava_model_args
197
  model.eval()
198
  video_path = "XXXX"
199
+ max_frames_num = 64
200
  video,frame_time,video_time = load_video(video_path, max_frames_num, 1, force_sample=True)
201
  video = image_processor.preprocess(video, return_tensors="pt")["pixel_values"].cuda().bfloat16()
202
  video = [video]