autonomous019 commited on
Commit
f08c73b
·
1 Parent(s): bf0f55d

experiment with self-caption

Browse files
Files changed (1) hide show
  1. app.py +5 -2
app.py CHANGED
@@ -40,6 +40,7 @@ model.save_pretrained("./vit-bert")
40
  model = VisionEncoderDecoderModel.from_pretrained("./vit-bert")
41
 
42
 
 
43
 
44
  repo_name = "ydshieh/vit-gpt2-coco-en"
45
  test_image = "cats.jpg"
@@ -47,14 +48,16 @@ feature_extractor2 = ViTFeatureExtractor.from_pretrained(repo_name)
47
  tokenizer = AutoTokenizer.from_pretrained(repo_name)
48
  model2 = VisionEncoderDecoderModel.from_pretrained(repo_name)
49
  pixel_values = feature_extractor2(test_image, return_tensors="pt").pixel_values
50
-
 
51
  # autoregressively generate text (using beam search or other decoding strategy)
52
  generated_ids = model2.generate(pixel_values, max_length=16, num_beams=4, return_dict_in_generate=True)
53
  # decode into text
54
  preds = tokenizer.batch_decode(generated_ids[0], skip_special_tokens=True)
55
  preds = [pred.strip() for pred in preds]
 
56
  print(preds)
57
- '''
58
 
59
 
60
  def classify_image(image):
 
40
  model = VisionEncoderDecoderModel.from_pretrained("./vit-bert")
41
 
42
 
43
+ '''
44
 
45
  repo_name = "ydshieh/vit-gpt2-coco-en"
46
  test_image = "cats.jpg"
 
48
  tokenizer = AutoTokenizer.from_pretrained(repo_name)
49
  model2 = VisionEncoderDecoderModel.from_pretrained(repo_name)
50
  pixel_values = feature_extractor2(test_image, return_tensors="pt").pixel_values
51
+ print("Pixel Values")
52
+ print(pixel_values)
53
  # autoregressively generate text (using beam search or other decoding strategy)
54
  generated_ids = model2.generate(pixel_values, max_length=16, num_beams=4, return_dict_in_generate=True)
55
  # decode into text
56
  preds = tokenizer.batch_decode(generated_ids[0], skip_special_tokens=True)
57
  preds = [pred.strip() for pred in preds]
58
+ print("Predictions")
59
  print(preds)
60
+
61
 
62
 
63
  def classify_image(image):