Spaces:

lixin4ever
/

VideoLLaMA2

Running on Zero

ClownRat commited on Jun 13

Commit

12e9783

•

1 Parent(s): 41491b0

improve duration.

Files changed (1) hide show

app.py CHANGED Viewed

@@ -80,7 +80,6 @@ class Chat:
         return state
     @torch.inference_mode()
-    @spaces.GPU
     def generate(self, tensor: list, modals: list, prompt: str, first_run: bool, state):
         # TODO: support multiple turns of conversation.
         assert len(tensor) == len(modals)
@@ -131,6 +130,7 @@ def save_video_to_local(video_path):
     return filename
 def generate(image, video, first_run, state, state_, textbox_in, tensor, modals, dtype=torch.float16):
     flag = 1
     if not textbox_in:
@@ -220,7 +220,7 @@ if __name__ == '__main__':
     conv_mode = "llama_2"
     model_path = 'DAMO-NLP-SG/VideoLLaMA2-7B'
-    handler = Chat(model_path, conv_mode=conv_mode, load_8bit=False, load_4bit=False, device='cuda')
     handler.model.to(dtype=torch.float16)
     if not os.path.exists("temp"):

         return state
     @torch.inference_mode()
     def generate(self, tensor: list, modals: list, prompt: str, first_run: bool, state):
         # TODO: support multiple turns of conversation.
         assert len(tensor) == len(modals)
     return filename
+@spaces.GPU(duration=120)
 def generate(image, video, first_run, state, state_, textbox_in, tensor, modals, dtype=torch.float16):
     flag = 1
     if not textbox_in:
     conv_mode = "llama_2"
     model_path = 'DAMO-NLP-SG/VideoLLaMA2-7B'
+    handler = Chat(model_path, conv_mode=conv_mode, load_8bit=True, load_4bit=False, device='cuda')
     handler.model.to(dtype=torch.float16)
     if not os.path.exists("temp"):