chenkq commited on
Commit
53500db
·
1 Parent(s): caae4ce

Update README.md to match chat-v1.1 model

Browse files
Files changed (1) hide show
  1. README.md +30 -4
README.md CHANGED
@@ -29,9 +29,37 @@ model = AutoModelForCausalLM.from_pretrained(
29
  trust_remote_code=True
30
  ).to('cuda').eval()
31
 
 
 
 
32
  query = 'Describe this image'
33
  image = Image.open(requests.get('https://github.com/THUDM/CogVLM/blob/main/examples/1.png?raw=true', stream=True).raw).convert('RGB')
34
- inputs = model.build_conversation_input_ids(tokenizer, query=query, history=[], images=[image])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  inputs = {
36
  'input_ids': inputs['input_ids'].unsqueeze(0).to('cuda'),
37
  'token_type_ids': inputs['token_type_ids'].unsqueeze(0).to('cuda'),
@@ -45,9 +73,7 @@ with torch.no_grad():
45
  outputs = outputs[:, inputs['input_ids'].shape[1]:]
46
  print(tokenizer.decode(outputs[0]))
47
 
48
- # Two professional basketball players are playing against each other. On the left side, there is Kobe Bryant wearing a yellow jersey with the
49
- # number 24 on it. He is holding a brown basketball. On the right side, there is another player wearing a blue and red jersey, blocking Kobe's
50
- # movement. Behind them, there are many spectators watching the game.</s>
51
  ```
52
 
53
  # 方法(Method)
 
29
  trust_remote_code=True
30
  ).to('cuda').eval()
31
 
32
+
33
+ # chat example
34
+
35
  query = 'Describe this image'
36
  image = Image.open(requests.get('https://github.com/THUDM/CogVLM/blob/main/examples/1.png?raw=true', stream=True).raw).convert('RGB')
37
+ inputs = model.build_conversation_input_ids(tokenizer, query=query, history=[], images=[image]) # chat mode
38
+ inputs = {
39
+ 'input_ids': inputs['input_ids'].unsqueeze(0).to('cuda'),
40
+ 'token_type_ids': inputs['token_type_ids'].unsqueeze(0).to('cuda'),
41
+ 'attention_mask': inputs['attention_mask'].unsqueeze(0).to('cuda'),
42
+ 'images': [[inputs['images'][0].to('cuda').to(torch.bfloat16)]],
43
+ }
44
+ gen_kwargs = {"max_length": 2048, "do_sample": False}
45
+
46
+ with torch.no_grad():
47
+ outputs = model.generate(**inputs, **gen_kwargs)
48
+ outputs = outputs[:, inputs['input_ids'].shape[1]:]
49
+ print(tokenizer.decode(outputs[0]))
50
+
51
+ # This image captures a moment from a basketball game. Two players are prominently featured: one wearing a yellow jersey with the number
52
+ # 24 and the word 'Lakers' written on it, and the other wearing a navy blue jersey with the word 'Washington' and the number 34. The player
53
+ # in yellow is holding a basketball and appears to be dribbling it, while the player in navy blue is reaching out with his arm, possibly
54
+ # trying to block or defend. The background shows a filled stadium with spectators, indicating that this is a professional game.</s>
55
+
56
+
57
+
58
+ # vqa example
59
+
60
+ query = 'How many houses are there in this cartoon?'
61
+ image = Image.open(requests.get('https://github.com/THUDM/CogVLM/blob/main/examples/4.jpg?raw=true', stream=True).raw).convert('RGB')
62
+ inputs = model.build_conversation_input_ids(tokenizer, query=query, history=[], images=[image], template_version='vqa') # vqa mode
63
  inputs = {
64
  'input_ids': inputs['input_ids'].unsqueeze(0).to('cuda'),
65
  'token_type_ids': inputs['token_type_ids'].unsqueeze(0).to('cuda'),
 
73
  outputs = outputs[:, inputs['input_ids'].shape[1]:]
74
  print(tokenizer.decode(outputs[0]))
75
 
76
+ # 4
 
 
77
  ```
78
 
79
  # 方法(Method)