llava-hf
/

llava-onevision-qwen2-72b-ov-hf

@@ -90,10 +90,10 @@ import requests
 from PIL import Image
 import torch
-from transformers import AutoProcessor, LlavaNextForConditionalGeneration
 model_id = "llava-hf/llava-onevision-qwen2-72b-ov-hf"
-model = LlavaNextForConditionalGeneration.from_pretrained(
     model_id,
     torch_dtype=torch.float16,
     low_cpu_mem_usage=True,
@@ -130,7 +130,7 @@ print(processor.decode(output[0][2:], skip_special_tokens=True))
 First make sure to install `bitsandbytes`, `pip install bitsandbytes` and make sure to have access to a CUDA compatible GPU device. Simply change the snippet above with:
 ```diff
-model = LlavaNextForConditionalGeneration.from_pretrained(
     model_id,
     torch_dtype=torch.float16,
     low_cpu_mem_usage=True,
@@ -143,7 +143,7 @@ model = LlavaNextForConditionalGeneration.from_pretrained(
 First make sure to install `flash-attn`. Refer to the [original repository of Flash Attention](https://github.com/Dao-AILab/flash-attention) regarding that package installation. Simply change the snippet above with:
 ```diff
-model = LlavaNextForConditionalGeneration.from_pretrained(
     model_id,
     torch_dtype=torch.float16,
     low_cpu_mem_usage=True,

 from PIL import Image
 import torch
+from transformers import AutoProcessor, LlavaOnevisionForConditionalGeneration
 model_id = "llava-hf/llava-onevision-qwen2-72b-ov-hf"
+model = LlavaOnevisionForConditionalGeneration.from_pretrained(
     model_id,
     torch_dtype=torch.float16,
     low_cpu_mem_usage=True,
 First make sure to install `bitsandbytes`, `pip install bitsandbytes` and make sure to have access to a CUDA compatible GPU device. Simply change the snippet above with:
 ```diff
+model = LlavaOnevisionForConditionalGeneration.from_pretrained(
     model_id,
     torch_dtype=torch.float16,
     low_cpu_mem_usage=True,
 First make sure to install `flash-attn`. Refer to the [original repository of Flash Attention](https://github.com/Dao-AILab/flash-attention) regarding that package installation. Simply change the snippet above with:
 ```diff
+model = LlavaOnevisionForConditionalGeneration.from_pretrained(
     model_id,
     torch_dtype=torch.float16,
     low_cpu_mem_usage=True,