Update README.md
Browse files
README.md
CHANGED
@@ -45,10 +45,15 @@ quantization_config = BitsAndBytesConfig(
|
|
45 |
bnb_4bit_quant_type="nf4"
|
46 |
)
|
47 |
|
|
|
|
|
48 |
model_id = "Ertugrul/Pixtral-12B-Captioner-Relaxed"
|
49 |
model = LlavaForConditionalGeneration.from_pretrained(model_id, device_map="auto", torch_dtype=torch.bfloat16)
|
50 |
processor = AutoProcessor.from_pretrained(model_id)
|
51 |
|
|
|
|
|
|
|
52 |
conversation = [
|
53 |
{
|
54 |
"role": "user",
|
@@ -81,6 +86,10 @@ def resize_image(image, target_size=768):
|
|
81 |
# you can try different resolutions or disable it completely
|
82 |
image = resize_image(image, 768)
|
83 |
|
|
|
|
|
|
|
|
|
84 |
with torch.no_grad():
|
85 |
with torch.autocast(device_type="cuda", dtype=torch.bfloat16):
|
86 |
generate_ids = model.generate(**inputs, max_new_tokens=384, do_sample=True, temperature=0.3, use_cache=True, top_k=20)
|
|
|
45 |
bnb_4bit_quant_type="nf4"
|
46 |
)
|
47 |
|
48 |
+
|
49 |
+
|
50 |
model_id = "Ertugrul/Pixtral-12B-Captioner-Relaxed"
|
51 |
model = LlavaForConditionalGeneration.from_pretrained(model_id, device_map="auto", torch_dtype=torch.bfloat16)
|
52 |
processor = AutoProcessor.from_pretrained(model_id)
|
53 |
|
54 |
+
# for quantization just use this instead of previous load
|
55 |
+
# model = LlavaForConditionalGeneration.from_pretrained(model_id, device_map="auto", torch_dtype=torch.bfloat16, quantization_config=quantization_config)
|
56 |
+
|
57 |
conversation = [
|
58 |
{
|
59 |
"role": "user",
|
|
|
86 |
# you can try different resolutions or disable it completely
|
87 |
image = resize_image(image, 768)
|
88 |
|
89 |
+
|
90 |
+
inputs = processor(text=PROMPT, images=image, return_tensors="pt").to("cuda")
|
91 |
+
|
92 |
+
|
93 |
with torch.no_grad():
|
94 |
with torch.autocast(device_type="cuda", dtype=torch.bfloat16):
|
95 |
generate_ids = model.generate(**inputs, max_new_tokens=384, do_sample=True, temperature=0.3, use_cache=True, top_k=20)
|