Ertugrul
/

Pixtral-12B-Captioner-Relaxed

image-text-to-text

Inference Endpoints

Model card Files Files and versions Community

Ertugrul commited on Oct 1

Commit

768ab0d

•

1 Parent(s): 843b9c7

Update README.md

Files changed (1) hide show

README.md +9 -0

README.md CHANGED Viewed

@@ -45,10 +45,15 @@ quantization_config = BitsAndBytesConfig(
     bnb_4bit_quant_type="nf4"
     )
 model_id = "Ertugrul/Pixtral-12B-Captioner-Relaxed"
 model = LlavaForConditionalGeneration.from_pretrained(model_id, device_map="auto", torch_dtype=torch.bfloat16)
 processor = AutoProcessor.from_pretrained(model_id)
 conversation = [
     {
         "role": "user",
@@ -81,6 +86,10 @@ def resize_image(image, target_size=768):
 # you can try different resolutions or disable it completely
 image = resize_image(image, 768)
 with torch.no_grad():
     with torch.autocast(device_type="cuda", dtype=torch.bfloat16):
         generate_ids = model.generate(**inputs, max_new_tokens=384, do_sample=True, temperature=0.3, use_cache=True, top_k=20)

     bnb_4bit_quant_type="nf4"
     )
 model_id = "Ertugrul/Pixtral-12B-Captioner-Relaxed"
 model = LlavaForConditionalGeneration.from_pretrained(model_id, device_map="auto", torch_dtype=torch.bfloat16)
 processor = AutoProcessor.from_pretrained(model_id)
+# for quantization just use this instead of previous load
+# model = LlavaForConditionalGeneration.from_pretrained(model_id, device_map="auto", torch_dtype=torch.bfloat16, quantization_config=quantization_config)
 conversation = [
     {
         "role": "user",
 # you can try different resolutions or disable it completely
 image = resize_image(image, 768)
+inputs = processor(text=PROMPT, images=image, return_tensors="pt").to("cuda")
 with torch.no_grad():
     with torch.autocast(device_type="cuda", dtype=torch.bfloat16):
         generate_ids = model.generate(**inputs, max_new_tokens=384, do_sample=True, temperature=0.3, use_cache=True, top_k=20)