Shriharshan commited on
Commit
efb9c5f
1 Parent(s): e770a77

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -2
app.py CHANGED
@@ -5,6 +5,8 @@ import requests
5
  model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
6
  vit_feature_extactor = ViTFeatureExtractor.from_pretrained("google/vit-base-patch16-224-in21k")
7
  tokenizer = PreTrainedTokenizerFast.from_pretrained("distilgpt2")
 
 
8
  #url = 'https://d2gp644kobdlm6.cloudfront.net/wp-content/uploads/2016/06/bigstock-Shocked-and-surprised-boy-on-t-113798588-300x212.jpg'
9
  # with Image.open(requests.get(url, stream=True).raw) as img:
10
  # pixel_values = vit_feature_extactor(images=img, return_tensors="pt").pixel_values
@@ -12,12 +14,15 @@ tokenizer = PreTrainedTokenizerFast.from_pretrained("distilgpt2")
12
  # generated_senetences = tokenizer.batch_decode(encoder_outputs, skip_special_tokens=True,)
13
  # generated_senetences
14
  # generated_senetences[0].split(".")[0]
 
 
15
  def vit2distilgpt2(img):
16
  pixel_values = vit_feature_extactor(images=img, return_tensors="pt").pixel_values
17
  encoder_outputs = generated_ids = model.generate(pixel_values.to('cpu'),num_beams=5)
18
  generated_senetences = tokenizer.batch_decode(encoder_outputs, skip_special_tokens=True)
19
 
20
  return(generated_senetences[0].split('.')[0])
 
21
  import gradio as gr
22
  inputs = [
23
  gr.inputs.Image(type="pil",label="Original Images")
@@ -36,8 +41,6 @@ examples = [
36
  ]
37
 
38
 
39
-
40
-
41
  gr.Interface(
42
  vit2distilgpt2,
43
  inputs,
 
5
  model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
6
  vit_feature_extactor = ViTFeatureExtractor.from_pretrained("google/vit-base-patch16-224-in21k")
7
  tokenizer = PreTrainedTokenizerFast.from_pretrained("distilgpt2")
8
+
9
+
10
  #url = 'https://d2gp644kobdlm6.cloudfront.net/wp-content/uploads/2016/06/bigstock-Shocked-and-surprised-boy-on-t-113798588-300x212.jpg'
11
  # with Image.open(requests.get(url, stream=True).raw) as img:
12
  # pixel_values = vit_feature_extactor(images=img, return_tensors="pt").pixel_values
 
14
  # generated_senetences = tokenizer.batch_decode(encoder_outputs, skip_special_tokens=True,)
15
  # generated_senetences
16
  # generated_senetences[0].split(".")[0]
17
+
18
+
19
  def vit2distilgpt2(img):
20
  pixel_values = vit_feature_extactor(images=img, return_tensors="pt").pixel_values
21
  encoder_outputs = generated_ids = model.generate(pixel_values.to('cpu'),num_beams=5)
22
  generated_senetences = tokenizer.batch_decode(encoder_outputs, skip_special_tokens=True)
23
 
24
  return(generated_senetences[0].split('.')[0])
25
+
26
  import gradio as gr
27
  inputs = [
28
  gr.inputs.Image(type="pil",label="Original Images")
 
41
  ]
42
 
43
 
 
 
44
  gr.Interface(
45
  vit2distilgpt2,
46
  inputs,