Spaces:

flax-community
/

image-captioning

Runtime error

App Files Files Community

ydshieh commited on Oct 23, 2021

Commit

8f85ccf

•

1 Parent(s): d1befcb

fix closed image issue

Browse files

Files changed (2) hide show

app.py +52 -48
model.py +2 -1

app.py CHANGED Viewed

@@ -39,55 +39,59 @@ with st.sidebar.form("file-uploader-form", clear_on_submit=True):
     submitted = st.form_submit_button("Upload")
     if submitted and uploaded_file is not None:
         bytes_data = io.BytesIO(uploaded_file.getvalue())
-    uploaded_file = None
-    submitted = None
-image_id = random_image_id
-if sample_image_id != "None":
-    assert type(sample_image_id) == int
-    image_id = sample_image_id
-sample_name = f"COCO_val2017_{str(image_id).zfill(12)}.jpg"
-sample_path = os.path.join(sample_dir, sample_name)
-if bytes_data is not None:
-    image = Image.open(bytes_data)
-    bytes_data = None
-elif os.path.isfile(sample_path):
-    image = Image.open(sample_path)
-else:
-    url = f"http://images.cocodataset.org/val2017/{str(image_id).zfill(12)}.jpg"
-    image = Image.open(requests.get(url, stream=True).raw)
-width, height = image.size
-resized = image
-if height > 384:
-    width = int(width / height * 384)
-    height = 384
-    resized = resized.resize(size=(width, height))
-if width > 512:
-    width = 512
-    height = int(height / width * 512)
-    resized = resized.resize(size=(width, height))
-st.markdown(f"[{str(image_id).zfill(12)}.jpg](http://images.cocodataset.org/val2017/{str(image_id).zfill(12)}.jpg)")
-show = st.image(resized)
-show.image(resized, '\n\nSelected Image')
-resized.close()
-# For newline
-st.sidebar.write('\n')
-with st.spinner('Generating image caption ...'):
-    caption = predict(image)
-    caption_en = caption
-    st.header(f'Predicted caption:\n\n')
-    st.subheader(caption_en)
-st.sidebar.header("ViT-GPT2 predicts:")
-st.sidebar.write(f"**English**: {caption}")
-image.close()

     submitted = st.form_submit_button("Upload")
     if submitted and uploaded_file is not None:
         bytes_data = io.BytesIO(uploaded_file.getvalue())
+if (bytes_data is None) and submitted:
+    st.write("No file is selected to upload")
+else:
+    image_id = random_image_id
+    if sample_image_id != "None":
+        assert type(sample_image_id) == int
+        image_id = sample_image_id
+    sample_name = f"COCO_val2017_{str(image_id).zfill(12)}.jpg"
+    sample_path = os.path.join(sample_dir, sample_name)
+    if bytes_data is not None:
+        image = Image.open(bytes_data)
+    elif os.path.isfile(sample_path):
+        image = Image.open(sample_path)
+    else:
+        url = f"http://images.cocodataset.org/val2017/{str(image_id).zfill(12)}.jpg"
+        image = Image.open(requests.get(url, stream=True).raw)
+    width, height = image.size
+    resized = image.resize(size=(width, height))
+    if height > 384:
+        width = int(width / height * 384)
+        height = 384
+        resized = resized.resize(size=(width, height))
+    width, height = resized.size
+    if width > 512:
+        width = 512
+        height = int(height / width * 512)
+        resized = resized.resize(size=(width, height))
+    if bytes_data is None:
+        st.markdown(f"[{str(image_id).zfill(12)}.jpg](http://images.cocodataset.org/val2017/{str(image_id).zfill(12)}.jpg)")
+    show = st.image(resized)
+    show.image(resized, '\n\nSelected Image')
+    resized.close()
+    # For newline
+    st.sidebar.write('\n')
+    with st.spinner('Generating image caption ...'):
+        caption = predict(image)
+        caption_en = caption
+        st.header(f'Predicted caption:\n\n')
+        st.subheader(caption_en)
+    st.sidebar.header("ViT-GPT2 predicts: ")
+    st.sidebar.write(f"{caption}")
+    image.close()

model.py CHANGED Viewed

@@ -47,6 +47,7 @@ def generate(pixel_values):
 def predict(image):
     pixel_values = feature_extractor(images=image, return_tensors="np").pixel_values
     output_ids = generate(pixel_values)
     preds = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
     preds = [pred.strip() for pred in preds]
@@ -58,7 +59,7 @@ def _compile():
     image_path = 'samples/val_000000039769.jpg'
     image = Image.open(image_path)
-    caption = predict(image)
     image.close()

 def predict(image):
     pixel_values = feature_extractor(images=image, return_tensors="np").pixel_values
     output_ids = generate(pixel_values)
     preds = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
     preds = [pred.strip() for pred in preds]
     image_path = 'samples/val_000000039769.jpg'
     image = Image.open(image_path)
+    predict(image)
     image.close()