Update README.md
Browse files
README.md
CHANGED
@@ -39,36 +39,22 @@ from uform.gen_model import VLMForCausalLM, VLMProcessor
|
|
39 |
model = VLMForCausalLM.from_pretrained("unum-cloud/uform-gen-chat")
|
40 |
processor = VLMProcessor.from_pretrained("unum-cloud/uform-gen-chat")
|
41 |
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
]
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
"images": image,
|
59 |
-
}
|
60 |
-
|
61 |
-
outputs = model.generate(
|
62 |
-
**inputs,
|
63 |
-
do_sample=False,
|
64 |
-
use_cache=True,
|
65 |
-
max_new_tokens=1024,
|
66 |
-
eos_token_id=32001,
|
67 |
-
pad_token_id=processor.tokenizer.pad_token_id,
|
68 |
-
)
|
69 |
-
|
70 |
-
message = processor.batch_decode(outputs[:, inputs["input_ids"].shape[1]:-1])
|
71 |
-
|
72 |
```
|
73 |
|
74 |
|
|
|
39 |
model = VLMForCausalLM.from_pretrained("unum-cloud/uform-gen-chat")
|
40 |
processor = VLMProcessor.from_pretrained("unum-cloud/uform-gen-chat")
|
41 |
|
42 |
+
prompt = "What do you see?"
|
43 |
+
image = Image.open("zebra.jpg")
|
44 |
+
|
45 |
+
inputs = processor(texts=[prompt], images=[image], return_tensors="pt")
|
46 |
+
with torch.inference_mode():
|
47 |
+
output = model.generate(
|
48 |
+
**inputs,
|
49 |
+
do_sample=False,
|
50 |
+
use_cache=True,
|
51 |
+
max_new_tokens=128,
|
52 |
+
eos_token_id=32001,
|
53 |
+
pad_token_id=processor.tokenizer.pad_token_id
|
54 |
+
)
|
55 |
+
|
56 |
+
prompt_len = inputs["input_ids"].shape[1]
|
57 |
+
decoded_text = processor.batch_decode(output[:, prompt_len:])[0]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
```
|
59 |
|
60 |
|