Spaces:

clip-italian
/

clip-italian-demo

Running

App Files Files Community

vinid commited on Jul 19, 2021

Commit

d05a223

•

1 Parent(s): 997927f

fixing image2text

Browse files

Files changed (2) hide show

image2text.py +18 -8
requirements.txt +2 -1

image2text.py CHANGED Viewed

@@ -4,7 +4,8 @@ from utils import text_encoder, image_encoder
 from PIL import Image
 from jax import numpy as jnp
 import pandas as pd
 def app():
     st.title("From Image to Text")
@@ -17,23 +18,31 @@ def app():
         image classification task!
         🤌 Italian mode on! 🤌
         """
     )
-    filename = st.file_uploader(
-        "Choose an image from your computer", type=["jpg", "jpeg", "png"]
     )
     MAX_CAP = 4
     col1, col2 = st.beta_columns([3, 1])
     with col2:
         captions_count = st.selectbox(
-            "Number of labels", options=range(1, MAX_CAP + 1)
         )
-        compute = st.button("Compute")
     with col1:
         captions = list()
@@ -43,7 +52,7 @@ def app():
     if compute:
         captions = [c for c in captions if c != ""]
-        if not captions or not filename:
             st.error("Please choose one image and at least one label")
         else:
             with st.spinner("Computing..."):
@@ -55,13 +64,14 @@ def app():
                     text_embeds.extend(text_encoder(c, model, tokenizer))
                 text_embeds = jnp.array(text_embeds)
-                image = Image.open(filename).convert("RGB")
                 transform = get_image_transform(model.config.vision_config.image_size)
                 image_embed = image_encoder(transform(image), model)
                 # we could have a softmax here
-                cos_similarities = jnp.matmul(image_embed, text_embeds.T)
                 chart_data = pd.Series(cos_similarities[0], index=captions)

 from PIL import Image
 from jax import numpy as jnp
 import pandas as pd
+import requests
+import jax
 def app():
     st.title("From Image to Text")
         image classification task!
         🤌 Italian mode on! 🤌
+        For example, try to write "cat" in the space for label1 and "dog" in the space for label2 and the run
+        "classify"!
         """
     )
+    image_url = st.text_input(
+        "You can input the URL of an image",
+        value="https://upload.wikimedia.org/wikipedia/commons/thumb/5/5e/Domestic_Cat_Face_Shot.jpg/1280px-Domestic_Cat_Face_Shot.jpg",
     )
     MAX_CAP = 4
     col1, col2 = st.beta_columns([3, 1])
     with col2:
         captions_count = st.selectbox(
+            "Number of labels", options=range(1, MAX_CAP + 1), index=1
         )
+        compute = st.button("Classify")
     with col1:
         captions = list()
     if compute:
         captions = [c for c in captions if c != ""]
+        if not captions or not image_url:
             st.error("Please choose one image and at least one label")
         else:
             with st.spinner("Computing..."):
                     text_embeds.extend(text_encoder(c, model, tokenizer))
                 text_embeds = jnp.array(text_embeds)
+                image_raw = requests.get(image_url, stream=True).raw
+                image = Image.open(image_raw).convert("RGB")
                 transform = get_image_transform(model.config.vision_config.image_size)
                 image_embed = image_encoder(transform(image), model)
                 # we could have a softmax here
+                cos_similarities = jax.nn.softmax(jnp.matmul(image_embed, text_embeds.T))
                 chart_data = pd.Series(cos_similarities[0], index=captions)

requirements.txt CHANGED Viewed

@@ -5,4 +5,5 @@ torch
 torchvision
 natsort
 stqdm
-pandas

 torchvision
 natsort
 stqdm
+pandas
+requests