RaushanTurganbay HF staff commited on
Commit
879d767
1 Parent(s): fd18380

Add chat template examples

Browse files
Files changed (1) hide show
  1. README.md +32 -9
README.md CHANGED
@@ -4,6 +4,9 @@ language:
4
  pipeline_tag: image-text-to-text
5
  inference: false
6
  arxiv: 2312.00784
 
 
 
7
  ---
8
  # VipLLaVA Model Card
9
 
@@ -55,10 +58,21 @@ import requests
55
  model_id = "llava-hf/vip-llava-7b-hf"
56
  pipe = pipeline("image-to-text", model=model_id)
57
  url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/ai2d-demo.jpg"
58
-
59
  image = Image.open(requests.get(url, stream=True).raw)
60
- question = "What does the label 15 represent? (1) lava (2) core (3) tunnel (4) ash cloud"
61
- prompt = f"A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.###Human: <image>\n{question}###Assistant:"
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
  outputs = pipe(image, prompt=prompt, generate_kwargs={"max_new_tokens": 200})
64
  print(outputs)
@@ -76,12 +90,6 @@ import torch
76
  from transformers import AutoProcessor, VipLlavaForConditionalGeneration
77
 
78
  model_id = "llava-hf/vip-llava-7b-hf"
79
-
80
- question = "What are these?"
81
- prompt = f"A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.###Human: <image>\n{question}###Assistant:"
82
-
83
- image_file = "http://images.cocodataset.org/val2017/000000039769.jpg"
84
-
85
  model = VipLlavaForConditionalGeneration.from_pretrained(
86
  model_id,
87
  torch_dtype=torch.float16,
@@ -91,6 +99,21 @@ model = VipLlavaForConditionalGeneration.from_pretrained(
91
  processor = AutoProcessor.from_pretrained(model_id)
92
 
93
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  raw_image = Image.open(requests.get(image_file, stream=True).raw)
95
  inputs = processor(prompt, raw_image, return_tensors='pt').to(0, torch.float16)
96
 
 
4
  pipeline_tag: image-text-to-text
5
  inference: false
6
  arxiv: 2312.00784
7
+ tags:
8
+ - vision
9
+ - image-text-to-text
10
  ---
11
  # VipLLaVA Model Card
12
 
 
58
  model_id = "llava-hf/vip-llava-7b-hf"
59
  pipe = pipeline("image-to-text", model=model_id)
60
  url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/ai2d-demo.jpg"
 
61
  image = Image.open(requests.get(url, stream=True).raw)
62
+
63
+ # Define a chat histiry and use `apply_chat_template` to get correctly formatted prompt
64
+ # Each value in "content" has to be a list of dicts with types ("text", "image")
65
+ conversation = [
66
+ {
67
+
68
+ "role": "user",
69
+ "content": [
70
+ {"type": "text", "text": "What does the label 15 represent? (1) lava (2) core (3) tunnel (4) ash cloud"},
71
+ {"type": "image"},
72
+ ],
73
+ },
74
+ ]
75
+ prompt = processor.apply_chat_template(conversation, add_generation_prompt=True)
76
 
77
  outputs = pipe(image, prompt=prompt, generate_kwargs={"max_new_tokens": 200})
78
  print(outputs)
 
90
  from transformers import AutoProcessor, VipLlavaForConditionalGeneration
91
 
92
  model_id = "llava-hf/vip-llava-7b-hf"
 
 
 
 
 
 
93
  model = VipLlavaForConditionalGeneration.from_pretrained(
94
  model_id,
95
  torch_dtype=torch.float16,
 
99
  processor = AutoProcessor.from_pretrained(model_id)
100
 
101
 
102
+ # Define a chat histiry and use `apply_chat_template` to get correctly formatted prompt
103
+ # Each value in "content" has to be a list of dicts with types ("text", "image")
104
+ conversation = [
105
+ {
106
+
107
+ "role": "user",
108
+ "content": [
109
+ {"type": "text", "text": "What are these?"},
110
+ {"type": "image"},
111
+ ],
112
+ },
113
+ ]
114
+ prompt = processor.apply_chat_template(conversation, add_generation_prompt=True)
115
+
116
+ image_file = "http://images.cocodataset.org/val2017/000000039769.jpg"
117
  raw_image = Image.open(requests.get(image_file, stream=True).raw)
118
  inputs = processor(prompt, raw_image, return_tensors='pt').to(0, torch.float16)
119