Update README.md
Browse files
README.md
CHANGED
@@ -3,7 +3,7 @@ license: bsd-3-clause
|
|
3 |
tags:
|
4 |
- image-captioning
|
5 |
datasets:
|
6 |
-
- unography/laion-
|
7 |
pipeline_tag: image-to-text
|
8 |
languages:
|
9 |
- en
|
@@ -50,7 +50,7 @@ inputs = processor(raw_image, return_tensors="pt")
|
|
50 |
pixel_values = inputs.pixel_values
|
51 |
out = model.generate(pixel_values=pixel_values, max_length=250, num_beams=3, repetition_penalty=2.5)
|
52 |
print(processor.decode(out[0], skip_special_tokens=True))
|
53 |
-
>>> a woman sitting on
|
54 |
|
55 |
```
|
56 |
</details>
|
@@ -67,8 +67,8 @@ import requests
|
|
67 |
from PIL import Image
|
68 |
from transformers import BlipProcessor, BlipForConditionalGeneration
|
69 |
|
70 |
-
processor = BlipProcessor.from_pretrained("unography/blip-
|
71 |
-
model = BlipForConditionalGeneration.from_pretrained("unography/blip-
|
72 |
|
73 |
img_url = 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/demo.jpg'
|
74 |
raw_image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')
|
@@ -77,7 +77,7 @@ inputs = processor(raw_image, return_tensors="pt").to("cuda")
|
|
77 |
pixel_values = inputs.pixel_values
|
78 |
out = model.generate(pixel_values=pixel_values, max_length=250, num_beams=3, repetition_penalty=2.5)
|
79 |
print(processor.decode(out[0], skip_special_tokens=True))
|
80 |
-
>>> a woman sitting on
|
81 |
```
|
82 |
</details>
|
83 |
|
@@ -92,8 +92,8 @@ import requests
|
|
92 |
from PIL import Image
|
93 |
from transformers import BlipProcessor, BlipForConditionalGeneration
|
94 |
|
95 |
-
processor = BlipProcessor.from_pretrained("unography/blip-
|
96 |
-
model = BlipForConditionalGeneration.from_pretrained("unography/blip-
|
97 |
|
98 |
img_url = 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/demo.jpg'
|
99 |
raw_image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')
|
@@ -102,6 +102,6 @@ inputs = processor(raw_image, return_tensors="pt").to("cuda", torch.float16)
|
|
102 |
pixel_values = inputs.pixel_values
|
103 |
out = model.generate(pixel_values=pixel_values, max_length=250, num_beams=3, repetition_penalty=2.5)
|
104 |
print(processor.decode(out[0], skip_special_tokens=True))
|
105 |
-
>>> a woman sitting on
|
106 |
```
|
107 |
</details>
|
|
|
3 |
tags:
|
4 |
- image-captioning
|
5 |
datasets:
|
6 |
+
- unography/laion-14k-GPT4V-LIVIS-Captions
|
7 |
pipeline_tag: image-to-text
|
8 |
languages:
|
9 |
- en
|
|
|
50 |
pixel_values = inputs.pixel_values
|
51 |
out = model.generate(pixel_values=pixel_values, max_length=250, num_beams=3, repetition_penalty=2.5)
|
52 |
print(processor.decode(out[0], skip_special_tokens=True))
|
53 |
+
>>> a woman sitting on the sand, interacting with a dog wearing a blue and white checkered collar. the dog is positioned to the left of the woman, who is holding something in their hand. the background features a serene beach setting with waves crashing onto the shore. there are no other animals or people visible in the image. the time of day appears to be either early morning or late afternoon, based on the lighting and shadows.
|
54 |
|
55 |
```
|
56 |
</details>
|
|
|
67 |
from PIL import Image
|
68 |
from transformers import BlipProcessor, BlipForConditionalGeneration
|
69 |
|
70 |
+
processor = BlipProcessor.from_pretrained("unography/blip-long-cap")
|
71 |
+
model = BlipForConditionalGeneration.from_pretrained("unography/blip-long-cap").to("cuda")
|
72 |
|
73 |
img_url = 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/demo.jpg'
|
74 |
raw_image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')
|
|
|
77 |
pixel_values = inputs.pixel_values
|
78 |
out = model.generate(pixel_values=pixel_values, max_length=250, num_beams=3, repetition_penalty=2.5)
|
79 |
print(processor.decode(out[0], skip_special_tokens=True))
|
80 |
+
>>> a woman sitting on the sand, interacting with a dog wearing a blue and white checkered collar. the dog is positioned to the left of the woman, who is holding something in their hand. the background features a serene beach setting with waves crashing onto the shore. there are no other animals or people visible in the image. the time of day appears to be either early morning or late afternoon, based on the lighting and shadows.
|
81 |
```
|
82 |
</details>
|
83 |
|
|
|
92 |
from PIL import Image
|
93 |
from transformers import BlipProcessor, BlipForConditionalGeneration
|
94 |
|
95 |
+
processor = BlipProcessor.from_pretrained("unography/blip-long-cap")
|
96 |
+
model = BlipForConditionalGeneration.from_pretrained("unography/blip-long-cap", torch_dtype=torch.float16).to("cuda")
|
97 |
|
98 |
img_url = 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/demo.jpg'
|
99 |
raw_image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')
|
|
|
102 |
pixel_values = inputs.pixel_values
|
103 |
out = model.generate(pixel_values=pixel_values, max_length=250, num_beams=3, repetition_penalty=2.5)
|
104 |
print(processor.decode(out[0], skip_special_tokens=True))
|
105 |
+
>>> a woman sitting on the sand, interacting with a dog wearing a blue and white checkered collar. the dog is positioned to the left of the woman, who is holding something in their hand. the background features a serene beach setting with waves crashing onto the shore. there are no other animals or people visible in the image. the time of day appears to be either early morning or late afternoon, based on the lighting and shadows.
|
106 |
```
|
107 |
</details>
|