ybelkada commited on
Commit
25d2acb
1 Parent(s): af1d88b

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +24 -14
README.md CHANGED
@@ -32,11 +32,12 @@ You can use this model for conditional and un-conditional image captioning
32
  <summary> Click to expand </summary>
33
 
34
  ```python
 
 
 
35
 
36
- from transformers import BlipProcessor, BlipForImageCaptioning
37
-
38
- processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
39
- model = BlipForConditionalGeneration.from_pretrained("Salesfoce/blip-image-captioning-base")
40
 
41
  img_url = 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/demo.jpg'
42
  raw_image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')
@@ -46,13 +47,15 @@ text = "a photography of"
46
  inputs = processor(raw_image, text, return_tensors="pt")
47
 
48
  out = model.generate(**inputs)
49
- print(processor.decode(out[0], skip_special_tokens=True)
 
50
 
51
  # unconditional image captioning
52
  inputs = processor(raw_image, return_tensors="pt")
53
 
54
  out = model.generate(**inputs)
55
- print(processor.decode(out[0], skip_special_tokens=True)
 
56
  ```
57
  </details>
58
 
@@ -64,11 +67,12 @@ print(processor.decode(out[0], skip_special_tokens=True)
64
  <summary> Click to expand </summary>
65
 
66
  ```python
67
-
68
- from transformers import BlipProcessor, BlipForImageCaptioning
 
69
 
70
  processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
71
- model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to("cuda")
72
 
73
  img_url = 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/demo.jpg'
74
  raw_image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')
@@ -78,13 +82,15 @@ text = "a photography of"
78
  inputs = processor(raw_image, text, return_tensors="pt").to("cuda")
79
 
80
  out = model.generate(**inputs)
81
- print(processor.decode(out[0], skip_special_tokens=True)
 
82
 
83
  # unconditional image captioning
84
  inputs = processor(raw_image, return_tensors="pt").to("cuda")
85
 
86
  out = model.generate(**inputs)
87
- print(processor.decode(out[0], skip_special_tokens=True)
 
88
  ```
89
  </details>
90
 
@@ -95,7 +101,9 @@ print(processor.decode(out[0], skip_special_tokens=True)
95
 
96
  ```python
97
  import torch
98
- from transformers import BlipProcessor, BlipForImageCaptioning
 
 
99
 
100
  processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
101
  model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base", torch_dtype=torch.float16).to("cuda")
@@ -108,13 +116,15 @@ text = "a photography of"
108
  inputs = processor(raw_image, text, return_tensors="pt").to("cuda", torch.float16)
109
 
110
  out = model.generate(**inputs)
111
- print(processor.decode(out[0], skip_special_tokens=True)
 
112
 
113
  # unconditional image captioning
114
  inputs = processor(raw_image, return_tensors="pt").to("cuda", torch.float16)
115
 
116
  out = model.generate(**inputs)
117
- print(processor.decode(out[0], skip_special_tokens=True)
 
118
  ```
119
  </details>
120
 
 
32
  <summary> Click to expand </summary>
33
 
34
  ```python
35
+ import requests
36
+ from PIL import Image
37
+ from transformers import BlipProcessor, BlipForConditionalGeneration
38
 
39
+ processor = BlipProcessor.from_pretrained("ybelkada/blip-image-captioning-base")
40
+ model = BlipForConditionalGeneration.from_pretrained("ybelkada/blip-image-captioning-base")
 
 
41
 
42
  img_url = 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/demo.jpg'
43
  raw_image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')
 
47
  inputs = processor(raw_image, text, return_tensors="pt")
48
 
49
  out = model.generate(**inputs)
50
+ print(processor.decode(out[0], skip_special_tokens=True))
51
+ # >>> a photography of a woman and her dog
52
 
53
  # unconditional image captioning
54
  inputs = processor(raw_image, return_tensors="pt")
55
 
56
  out = model.generate(**inputs)
57
+ print(processor.decode(out[0], skip_special_tokens=True))
58
+ >>> a woman sitting on the beach with her dog
59
  ```
60
  </details>
61
 
 
67
  <summary> Click to expand </summary>
68
 
69
  ```python
70
+ import requests
71
+ from PIL import Image
72
+ from transformers import BlipProcessor, BlipForConditionalGeneration
73
 
74
  processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
75
+ model = BlipForConditionalGeneration.from_pretrained("Salesfoce/blip-image-captioning-base").to("cuda")
76
 
77
  img_url = 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/demo.jpg'
78
  raw_image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')
 
82
  inputs = processor(raw_image, text, return_tensors="pt").to("cuda")
83
 
84
  out = model.generate(**inputs)
85
+ print(processor.decode(out[0], skip_special_tokens=True))
86
+ # >>> a photography of a woman and her dog
87
 
88
  # unconditional image captioning
89
  inputs = processor(raw_image, return_tensors="pt").to("cuda")
90
 
91
  out = model.generate(**inputs)
92
+ print(processor.decode(out[0], skip_special_tokens=True))
93
+ >>> a woman sitting on the beach with her dog
94
  ```
95
  </details>
96
 
 
101
 
102
  ```python
103
  import torch
104
+ import requests
105
+ from PIL import Image
106
+ from transformers import BlipProcessor, BlipForConditionalGeneration
107
 
108
  processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
109
  model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base", torch_dtype=torch.float16).to("cuda")
 
116
  inputs = processor(raw_image, text, return_tensors="pt").to("cuda", torch.float16)
117
 
118
  out = model.generate(**inputs)
119
+ print(processor.decode(out[0], skip_special_tokens=True))
120
+ # >>> a photography of a woman and her dog
121
 
122
  # unconditional image captioning
123
  inputs = processor(raw_image, return_tensors="pt").to("cuda", torch.float16)
124
 
125
  out = model.generate(**inputs)
126
+ print(processor.decode(out[0], skip_special_tokens=True))
127
+ >>> a woman sitting on the beach with her dog
128
  ```
129
  </details>
130