autonomous019 commited on
Commit
0ef83dc
1 Parent(s): 2979393

adding self_caption

Browse files
Files changed (1) hide show
  1. app.py +22 -20
app.py CHANGED
@@ -42,26 +42,28 @@ model = VisionEncoderDecoderModel.from_pretrained("./vit-bert")
42
 
43
  '''
44
 
45
- repo_name = "ydshieh/vit-gpt2-coco-en"
46
- #test_image = "cats.jpg"
47
- url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
48
- test_image = Image.open(requests.get(url, stream=True).raw)
49
- test_image.save("cats.png")
50
- feature_extractor2 = ViTFeatureExtractor.from_pretrained(repo_name)
51
- tokenizer = AutoTokenizer.from_pretrained(repo_name)
52
- model2 = VisionEncoderDecoderModel.from_pretrained(repo_name)
53
- pixel_values = feature_extractor2(test_image, return_tensors="pt").pixel_values
54
- print("Pixel Values")
55
- print(pixel_values)
56
- # autoregressively generate text (using beam search or other decoding strategy)
57
- generated_ids = model2.generate(pixel_values, max_length=16, num_beams=4, return_dict_in_generate=True)
58
- # decode into text
59
- preds = tokenizer.batch_decode(generated_ids[0], skip_special_tokens=True)
60
- preds = [pred.strip() for pred in preds]
61
- print("Predictions")
62
- print(preds)
63
 
64
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
 
66
  def classify_image(image):
67
  results = image_pipe(image)
@@ -87,8 +89,8 @@ title = "Generate a Story from an Image"
87
  description = "Demo for classifying images with Perceiver IO. To use it, simply upload an image and click 'submit' to let the model predict the 5 most probable ImageNet classes. Results will show up in a few seconds." + image_piped
88
  article = "<p style='text-align: center'></p>"
89
 
90
- gr.Interface(fn=classify_image, inputs=image, outputs=label, title=title, description=description, examples="", enable_queue=True).launch(debug=True)
91
-
92
 
93
 
94
 
 
42
 
43
  '''
44
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
 
47
+ def self_caption(image):
48
+ repo_name = "ydshieh/vit-gpt2-coco-en"
49
+ #test_image = "cats.jpg"
50
+ url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
51
+ test_image = Image.open(requests.get(url, stream=True).raw)
52
+ test_image.save("cats.png")
53
+ feature_extractor2 = ViTFeatureExtractor.from_pretrained(repo_name)
54
+ tokenizer = AutoTokenizer.from_pretrained(repo_name)
55
+ model2 = VisionEncoderDecoderModel.from_pretrained(repo_name)
56
+ pixel_values = feature_extractor2(test_image, return_tensors="pt").pixel_values
57
+ print("Pixel Values")
58
+ print(pixel_values)
59
+ # autoregressively generate text (using beam search or other decoding strategy)
60
+ generated_ids = model2.generate(pixel_values, max_length=16, num_beams=4, return_dict_in_generate=True)
61
+ # decode into text
62
+ preds = tokenizer.batch_decode(generated_ids[0], skip_special_tokens=True)
63
+ preds = [pred.strip() for pred in preds]
64
+ print("Predictions")
65
+ print(preds)
66
+ return preds
67
 
68
  def classify_image(image):
69
  results = image_pipe(image)
 
89
  description = "Demo for classifying images with Perceiver IO. To use it, simply upload an image and click 'submit' to let the model predict the 5 most probable ImageNet classes. Results will show up in a few seconds." + image_piped
90
  article = "<p style='text-align: center'></p>"
91
 
92
+ #gr.Interface(fn=classify_image, inputs=image, outputs=label, title=title, description=description, examples="", enable_queue=True).launch(debug=True)
93
+ gr.Interface([classify_image,self_caption], inputs=image, outputs=label, title=title, description=description, examples="", enable_queue=True).launch(debug=True)
94
 
95
 
96