Soumen commited on
Commit
02635d7
1 Parent(s): d69593b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -14
app.py CHANGED
@@ -28,6 +28,20 @@ uploaded_photo = c2.file_uploader("Upload Image",type=['jpg','png','jpeg'], on_c
28
  feature_extractor = ViTFeatureExtractor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
29
  tokenizer = AutoTokenizer.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
30
  camera_photo = c2.camera_input("Take a photo", on_change=change_photo_state)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  if choice == 'Caption':
32
  #st.subheader("Detection")
33
  if st.session_state["photo"]=="done":
@@ -37,20 +51,6 @@ if choice == 'Caption':
37
  our_image= load_image(camera_photo)
38
  elif uploaded_photo==None and camera_photo==None:
39
  our_image= load_image('image.jpg')
40
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
41
- model.to(device)
42
- max_length = 16
43
- num_beams = 4
44
- gen_kwargs = {"max_length": max_length, "num_beams": num_beams}
45
- def predict_step(our_image):
46
- if our_image.mode != "RGB":
47
- our_image = our_image.convert(mode="RGB")
48
- pixel_values = feature_extractor(images=our_image, return_tensors="pt").pixel_values
49
- pixel_values = pixel_values.to(device)
50
- output_ids = model.generate(pixel_values, **gen_kwargs)
51
- preds = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
52
- preds = [pred.strip() for pred in preds]
53
- return preds
54
  st.success(predict_step(our_image))
55
  elif choice == 'About':
56
  st.subheader("About Image Captioning App")
 
28
  feature_extractor = ViTFeatureExtractor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
29
  tokenizer = AutoTokenizer.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
30
  camera_photo = c2.camera_input("Take a photo", on_change=change_photo_state)
31
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
32
+ model.to(device)
33
+ max_length = 16
34
+ num_beams = 4
35
+ gen_kwargs = {"max_length": max_length, "num_beams": num_beams}
36
+ def predict_step(our_image):
37
+ if our_image.mode != "RGB":
38
+ our_image = our_image.convert(mode="RGB")
39
+ pixel_values = feature_extractor(images=our_image, return_tensors="pt").pixel_values
40
+ pixel_values = pixel_values.to(device)
41
+ output_ids = model.generate(pixel_values, **gen_kwargs)
42
+ preds = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
43
+ preds = [pred.strip() for pred in preds]
44
+ return preds
45
  if choice == 'Caption':
46
  #st.subheader("Detection")
47
  if st.session_state["photo"]=="done":
 
51
  our_image= load_image(camera_photo)
52
  elif uploaded_photo==None and camera_photo==None:
53
  our_image= load_image('image.jpg')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  st.success(predict_step(our_image))
55
  elif choice == 'About':
56
  st.subheader("About Image Captioning App")