krypticmouse commited on
Commit
576debb
1 Parent(s): 595277d

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +4 -3
README.md CHANGED
@@ -11,7 +11,8 @@ Here is how to use this model to caption an image of the Flickr8k dataset:
11
  import torch
12
  import requests
13
  from PIL import Image
14
- from transformers import ViTFeatureExtractor, AutoTokenizer, VisionEncoderDecoderModel
 
15
 
16
  if torch.cuda.is_available():
17
  device = 'cuda'
@@ -23,10 +24,10 @@ image = Image.open(requests.get(url, stream=True).raw)
23
 
24
  encoder_checkpoint = 'google/vit-base-patch16-224'
25
  decoder_checkpoint = 'surajp/gpt2-hindi'
26
-
27
  feature_extractor = ViTFeatureExtractor.from_pretrained(encoder_checkpoint)
28
  tokenizer = AutoTokenizer.from_pretrained(decoder_checkpoint)
29
- model = VisionEncoderDecoderModel.from_pretrained('team-indain-image-caption/hindi-image-captioning').to(device)
30
 
31
  #Inference
32
  sample = feature_extractor(image, return_tensors="pt").pixel_values.to(device)
 
11
  import torch
12
  import requests
13
  from PIL import Image
14
+ from transformers import ViTFeatureExtractor, AutoTokenizer,
15
+ VisionEncoderDecoderModel
16
 
17
  if torch.cuda.is_available():
18
  device = 'cuda'
 
24
 
25
  encoder_checkpoint = 'google/vit-base-patch16-224'
26
  decoder_checkpoint = 'surajp/gpt2-hindi'
27
+ model_checkpoint = 'team-indain-image-caption/hindi-image-captioning'
28
  feature_extractor = ViTFeatureExtractor.from_pretrained(encoder_checkpoint)
29
  tokenizer = AutoTokenizer.from_pretrained(decoder_checkpoint)
30
+ model = VisionEncoderDecoderModel.from_pretrained(model_checkpoint).to(device)
31
 
32
  #Inference
33
  sample = feature_extractor(image, return_tensors="pt").pixel_values.to(device)