unography commited on
Commit
53e2e6d
1 Parent(s): 9957875

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +10 -10
README.md CHANGED
@@ -22,7 +22,7 @@ inference:
22
  max_length: 300
23
  ---
24
 
25
- # LongCap: Finetuned [BLIP](https://huggingface.co/Salesforce/blip-image-captioning-base) for generating long captions of images, suitable for prompts for text-to-image generation and captioning text-to-image datasets
26
 
27
 
28
  ## Usage
@@ -41,8 +41,8 @@ import requests
41
  from PIL import Image
42
  from transformers import BlipProcessor, BlipForConditionalGeneration
43
 
44
- processor = BlipProcessor.from_pretrained("unography/blip-long-cap")
45
- model = BlipForConditionalGeneration.from_pretrained("unography/blip-long-cap")
46
 
47
  img_url = 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/demo.jpg'
48
  raw_image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')
@@ -51,7 +51,7 @@ inputs = processor(raw_image, return_tensors="pt")
51
  pixel_values = inputs.pixel_values
52
  out = model.generate(pixel_values=pixel_values, max_length=250)
53
  print(processor.decode(out[0], skip_special_tokens=True))
54
- >>> a beach setting with a woman kneeling down and interacting with a dog. the woman is wearing a collar and is standing near the dog. the dog is positioned on the sand, and the atmosphere is calm and relaxing. there are no other people or animals in the image.
55
 
56
  ```
57
  </details>
@@ -68,8 +68,8 @@ import requests
68
  from PIL import Image
69
  from transformers import BlipProcessor, BlipForConditionalGeneration
70
 
71
- processor = BlipProcessor.from_pretrained("unography/blip-long-cap")
72
- model = BlipForConditionalGeneration.from_pretrained("unography/blip-long-cap").to("cuda")
73
 
74
  img_url = 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/demo.jpg'
75
  raw_image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')
@@ -78,7 +78,7 @@ inputs = processor(raw_image, return_tensors="pt").to("cuda")
78
  pixel_values = inputs.pixel_values
79
  out = model.generate(pixel_values=pixel_values, max_length=250)
80
  print(processor.decode(out[0], skip_special_tokens=True))
81
- >>> a beach setting with a woman kneeling down and interacting with a dog. the woman is wearing a collar and is standing near the dog. the dog is positioned on the sand, and the atmosphere is calm and relaxing. there are no other people or animals in the image.
82
  ```
83
  </details>
84
 
@@ -93,8 +93,8 @@ import requests
93
  from PIL import Image
94
  from transformers import BlipProcessor, BlipForConditionalGeneration
95
 
96
- processor = BlipProcessor.from_pretrained("unography/blip-long-cap")
97
- model = BlipForConditionalGeneration.from_pretrained("unography/blip-long-cap", torch_dtype=torch.float16).to("cuda")
98
 
99
  img_url = 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/demo.jpg'
100
  raw_image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')
@@ -103,6 +103,6 @@ inputs = processor(raw_image, return_tensors="pt").to("cuda", torch.float16)
103
  pixel_values = inputs.pixel_values
104
  out = model.generate(pixel_values=pixel_values, max_length=250)
105
  print(processor.decode(out[0], skip_special_tokens=True))
106
- >>> a beach setting with a woman kneeling down and interacting with a dog. the woman is wearing a collar and is standing near the dog. the dog is positioned on the sand, and the atmosphere is calm and relaxing. there are no other people or animals in the image.
107
  ```
108
  </details>
 
22
  max_length: 300
23
  ---
24
 
25
+ # LongCap: Finetuned [BLIP](https://huggingface.co/Salesforce/blip-image-captioning-large) for generating long captions of images, suitable for prompts for text-to-image generation and captioning text-to-image datasets
26
 
27
 
28
  ## Usage
 
41
  from PIL import Image
42
  from transformers import BlipProcessor, BlipForConditionalGeneration
43
 
44
+ processor = BlipProcessor.from_pretrained("unography/blip-large-long-cap")
45
+ model = BlipForConditionalGeneration.from_pretrained("unography/blip-large-long-cap")
46
 
47
  img_url = 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/demo.jpg'
48
  raw_image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')
 
51
  pixel_values = inputs.pixel_values
52
  out = model.generate(pixel_values=pixel_values, max_length=250)
53
  print(processor.decode(out[0], skip_special_tokens=True))
54
+ >>> a woman sitting on the beach, wearing a checkered shirt and a dog collar. the woman is interacting with the dog, which is positioned towards the left side of the image. the setting is a beachfront with a calm sea and a golden hue.
55
 
56
  ```
57
  </details>
 
68
  from PIL import Image
69
  from transformers import BlipProcessor, BlipForConditionalGeneration
70
 
71
+ processor = BlipProcessor.from_pretrained("unography/blip-large-long-cap")
72
+ model = BlipForConditionalGeneration.from_pretrained("unography/blip-large-long-cap").to("cuda")
73
 
74
  img_url = 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/demo.jpg'
75
  raw_image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')
 
78
  pixel_values = inputs.pixel_values
79
  out = model.generate(pixel_values=pixel_values, max_length=250)
80
  print(processor.decode(out[0], skip_special_tokens=True))
81
+ >>> a woman sitting on the beach, wearing a checkered shirt and a dog collar. the woman is interacting with the dog, which is positioned towards the left side of the image. the setting is a beachfront with a calm sea and a golden hue.
82
  ```
83
  </details>
84
 
 
93
  from PIL import Image
94
  from transformers import BlipProcessor, BlipForConditionalGeneration
95
 
96
+ processor = BlipProcessor.from_pretrained("unography/blip-large-long-cap")
97
+ model = BlipForConditionalGeneration.from_pretrained("unography/blip-large-long-cap", torch_dtype=torch.float16).to("cuda")
98
 
99
  img_url = 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/demo.jpg'
100
  raw_image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')
 
103
  pixel_values = inputs.pixel_values
104
  out = model.generate(pixel_values=pixel_values, max_length=250)
105
  print(processor.decode(out[0], skip_special_tokens=True))
106
+ >>> a woman sitting on the beach, wearing a checkered shirt and a dog collar. the woman is interacting with the dog, which is positioned towards the left side of the image. the setting is a beachfront with a calm sea and a golden hue.
107
  ```
108
  </details>