Spaces:

Blealtan
/

clip-guided-binary-autoencoder

Runtime error

App Files Files Community

Blealtan commited on Feb 15, 2023

Commit

5527144

1 Parent(s): f1b6ca9

Better support for keep_shape

Browse files

Files changed (1) hide show

app.py +48 -26

app.py CHANGED Viewed

@@ -25,18 +25,6 @@ device = 'cuda' if torch.cuda.is_available() else 'cpu'
 IMG_BITS = 13
-class ToBinary(torch.autograd.Function):
-    @staticmethod
-    def forward(ctx, x):
-        return torch.floor(
-            x + 0.5)  # no need for noise when we have plenty of data
-    @staticmethod
-    def backward(ctx, grad_output):
-        return grad_output.clone()  # pass-through
 class ResBlock(nn.Module):
     def __init__(self, c_x, c_hidden):
@@ -242,26 +230,46 @@ def prepare_model(model_prefix):
     return encoder, decoder
-def encode(model_prefix, img, keep_dims):
     encoder, _ = prepare_model(model_prefix)
-    img_transform = transforms.Compose(
-        [transforms.PILToTensor(),
-         transforms.ConvertImageDtype(torch.float)] +
-        ([transforms.Resize((224, 224))] if not keep_dims else []))
     with torch.no_grad():
-        img = img_transform(img.convert("RGB")).unsqueeze(0).to(device)
-        z = encoder(img)
-        z = ToBinary.apply(z)
     with io.BytesIO() as buffer:
         np.save(buffer, np.packbits(z.cpu().numpy().astype('bool')))
         z_b64 = base64.b64encode(buffer.getvalue()).decode()
-    return json.dumps({"shape": list(z.shape), "data": z_b64})
 def decode(model_prefix, z_str):
     _, decoder = prepare_model(model_prefix)
     z_json = json.loads(z_str)
@@ -269,10 +277,23 @@ def decode(model_prefix, z_str):
         buffer.write(base64.b64decode(z_json["data"]))
         buffer.seek(0)
         z = np.load(buffer)
-    z = np.unpackbits(z).astype('float').reshape(z_json["shape"])
-    decoded = decoder(torch.Tensor(z).to(device))
-    return VF.to_pil_image(decoded[0])
 st.title("Clip Guided Binary Autoencoder")
@@ -288,12 +309,13 @@ encoder_tab, decoder_tab = st.tabs(["Encode", "Decode"])
 with encoder_tab:
     col_in, col_out = st.columns(2)
-    keep_dims = col_in.checkbox('Keep the size of original input image', True)
     uploaded_file = col_in.file_uploader('Choose an Image')
     if uploaded_file is not None:
         image = Image.open(uploaded_file)
         col_in.image(image, 'Input Image')
-        z_str = encode(model_prefix, image, keep_dims)
         col_out.write("Encoded to:")
         col_out.code(z_str, language=None)
         col_out.image(decode(model_prefix, z_str), 'Output Image preview')

 IMG_BITS = 13
 class ResBlock(nn.Module):
     def __init__(self, c_x, c_hidden):
     return encoder, decoder
+def compute_padding(img_shape):
+    hsize, vsize = (img_shape[1] + 7) // 8 * 8, (img_shape[0] + 7) // 8 * 8
+    hpad, vpad = hsize - img_shape[1], vsize - img_shape[0]
+    left, top = hpad // 2, vpad // 2
+    right, bottom = hpad - left, vpad - top
+    return left, top, right, bottom
+def encode(model_prefix, img, keep_shape):
+    gc.collect()
     encoder, _ = prepare_model(model_prefix)
     with torch.no_grad():
+        img = VF.pil_to_tensor(img.convert("RGB"))
+        img = VF.convert_image_dtype(img)
+        img = img.unsqueeze(0).to(device)
+        img_shape = img.shape[2:]
+        if keep_shape:
+            left, top, right, bottom = compute_padding(img_shape)
+            img = VF.pad(img, [left, top, right, bottom], padding_mode='edge')
+        else:
+            img = VF.resize(img, [224, 224])
+        z = torch.floor(encoder(img) + 0.5)
     with io.BytesIO() as buffer:
         np.save(buffer, np.packbits(z.cpu().numpy().astype('bool')))
         z_b64 = base64.b64encode(buffer.getvalue()).decode()
+    return json.dumps({
+        "img_shape": img_shape,
+        "z_shape": z.shape[2:],
+        "keep_shape": keep_shape,
+        "data": z_b64,
+    })
 def decode(model_prefix, z_str):
+    gc.collect()
     _, decoder = prepare_model(model_prefix)
     z_json = json.loads(z_str)
         buffer.write(base64.b64decode(z_json["data"]))
         buffer.seek(0)
         z = np.load(buffer)
+    img_shape = z_json["img_shape"]
+    z_shape = z_json["z_shape"]
+    keep_shape = z_json["keep_shape"]
+    z = np.unpackbits(z)[:IMG_BITS * z_shape[0] * z_shape[1]].astype('float')
+    z = z.reshape([1, IMG_BITS] + z_shape)
+    img = decoder(torch.Tensor(z).to(device))
+    if keep_shape:
+        left, top, right, bottom = compute_padding(img_shape)
+        img = img[0, :, top:img.shape[2] - bottom, left:img.shape[3] - right]
+    else:
+        img = img[0]
+    st.write(img.shape)
+    return VF.to_pil_image(img)
 st.title("Clip Guided Binary Autoencoder")
 with encoder_tab:
     col_in, col_out = st.columns(2)
+    keep_shape = col_in.checkbox(
+        'Use original size of input image instead of rescaling (Experimental)')
     uploaded_file = col_in.file_uploader('Choose an Image')
     if uploaded_file is not None:
         image = Image.open(uploaded_file)
         col_in.image(image, 'Input Image')
+        z_str = encode(model_prefix, image, keep_shape)
         col_out.write("Encoded to:")
         col_out.code(z_str, language=None)
         col_out.image(decode(model_prefix, z_str), 'Output Image preview')