Spaces:

nick-leland
/

ImageTransformationTool

Build error

File size: 17,000 Bytes

import numpy as np
import traceback
import gradio as gr
from PIL import Image
from scipy import ndimage, interpolate
import matplotlib.pyplot as plt
from bulk_bulge_generation import definitions, smooth
# from transformers import pipeline
import fastai
from fastcore.all import *
from fastai.vision.all import *
from ultralytics import ASSETS, YOLO
import cv2

# def apply_vector_field_transform(image, func, radius, center=(0.5, 0.5), strength=1, edge_smoothness=0.1, center_smoothness=0.20):
#     rows, cols = image.shape[:2]
#     max_dim = max(rows, cols)
#     print()
#     print(f"Max_dim is {max_dim}")
#     print()
#     
#     center_y = int(center[1] * rows)
#     center_x = int(center[0] * cols)
#     center_y = abs(rows - center_y)
# 
#     print(f"Image shape: {rows}x{cols}")
#     print(f"Center: ({center_x}, {center_y})")
#     print(f"Radius: {radius}, Strength: {strength}")
#     print(f"Edge smoothness: {edge_smoothness}, Center smoothness: {center_smoothness}")
#     
#     y, x = np.ogrid[:rows, :cols]
#     y = (y - center_y) / max_dim
#     x = (x - center_x) / max_dim
#     
#     dist_from_center = np.sqrt(x**2 + y**2)
#     
#     z = func(x, y)
#     print(f"Function output - min: {np.min(z)}, max: {np.max(z)}")
#     
#     gy, gx = np.gradient(z)
#     print(f"Initial gradient - gx min: {np.min(gx)}, max: {np.max(gx)}")
#     print(f"Initial gradient - gy min: {np.min(gy)}, max: {np.max(gy)}")
# 
#     # Avoid division by zero
#     edge_smoothness = np.maximum(edge_smoothness, 1e-6)
#     center_smoothness = np.maximum(center_smoothness, 1e-6)
# 
#     edge_mask = np.clip((radius - dist_from_center) / (radius * edge_smoothness), 0, 1)
#     center_mask = np.clip((dist_from_center - radius * center_smoothness) / (radius * center_smoothness), 0, 1)
#     mask = edge_mask * center_mask
#     
#     gx = gx * mask
#     gy = gy * mask
#     
#     magnitude = np.sqrt(gx**2 + gy**2)
#     magnitude[magnitude == 0] = 1  # Avoid division by zero
#     gx = gx / magnitude
#     gy = gy / magnitude
#     
#     scale_factor = strength * np.log(max_dim) / 100
#     gx = gx * scale_factor * mask
#     gy = gy * scale_factor * mask
#     
#     print(f"Final gradient - gx min: {np.min(gx)}, max: {np.max(gx)}")
#     print(f"Final gradient - gy min: {np.min(gy)}, max: {np.max(gy)}")
#     
#     # Forward transformation
#     x_new = x + gx
#     y_new = y + gy
#     
#     x_new = x_new * max_dim + center_x
#     y_new = y_new * max_dim + center_y
#     
#     x_new = np.clip(x_new, 0, cols - 1)
#     y_new = np.clip(y_new, 0, rows - 1)
#     
#     # Inverse transformation
#     x_inv = x - gx
#     y_inv = y - gy
#     
#     x_inv = x_inv * max_dim + center_x
#     y_inv = y_inv * max_dim + center_y
#     
#     x_inv = np.clip(x_inv, 0, cols - 1)
#     y_inv = np.clip(y_inv, 0, rows - 1)
#     
#     # Apply transformations
#     channels_forward = [ndimage.map_coordinates(image[..., i], [y_new, x_new], order=1, mode='reflect') 
#                         for i in range(image.shape[2])]
#     channels_inverse = [ndimage.map_coordinates(image[..., i], [y_inv, x_inv], order=1, mode='reflect') 
#                         for i in range(image.shape[2])]
#     
#     transformed_image = np.dstack(channels_forward).astype(image.dtype)
#     inverse_transformed_image = np.dstack(channels_inverse).astype(image.dtype)
#     
#     return transformed_image, inverse_transformed_image, (gx, gy)

def create_gradient_vector_field(gx, gy, image_shape, step=20, reverse=False):
    """
    Create a gradient vector field visualization with option to reverse direction.
    
    :param gx: X-component of the gradient
    :param gy: Y-component of the gradient
    :param image_shape: Shape of the original image (height, width)
    :param step: Spacing between arrows
    :param reverse: If True, reverse the direction of the arrows
    :return: Gradient vector field as a numpy array (RGB image)
    """
    rows, cols = image_shape
    y, x = np.mgrid[step/2:rows:step, step/2:cols:step].reshape(2, -1).astype(int)
    
    # Calculate the scale based on image size
    max_dim = max(rows, cols)
    scale = max_dim / 1000  # Adjusted for longer arrows
    
    # Reverse direction if specified
    direction = -1 if reverse else 1
    
    fig, ax = plt.subplots(figsize=(cols/50, rows/50), dpi=100)
    ax.quiver(x, y, direction * gx[y, x], direction * -gy[y, x], 
              scale=scale, 
              scale_units='width', 
              width=0.002 * max_dim / 500,
              headwidth=8, 
              headlength=12, 
              headaxislength=0, 
              color='black',
              minshaft=2,
              minlength=0,
              pivot='tail')
    ax.set_xlim(0, cols)
    ax.set_ylim(rows, 0)
    ax.set_aspect('equal')
    ax.axis('off')
    
    fig.tight_layout(pad=0)
    fig.canvas.draw()
    vector_field = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
    vector_field = vector_field.reshape(fig.canvas.get_width_height()[::-1] + (3,))
    plt.close(fig)
    
    return vector_field

def apply_gradient_transform(image, gx, gy):
    """
    Apply the gradient transformation to an image.
    
    :param image: Input image as a numpy array
    :param gx: X-component of the gradient
    :param gy: Y-component of the gradient
    :return: Transformed image
    """
    rows, cols = image.shape[:2]
    y, x = np.mgrid[0:rows, 0:cols]
    
    # Apply the transformation
    x_new = x + gx
    y_new = y + gy
    
    # Ensure the new coordinates are within the image boundaries
    x_new = np.clip(x_new, 0, cols - 1)
    y_new = np.clip(y_new, 0, rows - 1)
    
    # Apply the transformation to each channel
    channels = []
    for i in range(image.shape[2]):
        channel = image[:,:,i]
        transformed_channel = interpolate.griddata((y.flatten(), x.flatten()), channel.flatten(), (y_new, x_new), method='linear', fill_value=0)
        channels.append(transformed_channel)
    
    transformed_image = np.dstack(channels).astype(image.dtype)
    
    return transformed_image

def generate_function_gradient(func, image_shape, radius, center=(0.5, 0.5), strength=1, edge_smoothness=0.1, center_smoothness=0.20):
    rows, cols = image_shape[:2]
    max_dim = max(rows, cols)
    
    y, x = np.mgrid[0:rows, 0:cols].astype(np.float32)
    y = (y - center[1] * rows) / max_dim
    x = (x - center[0] * cols) / max_dim
    
    dist_from_center = np.sqrt(x**2 + y**2)
    
    z = func(x, y)
    
    gy, gx = np.gradient(z)

    edge_smoothness = np.maximum(edge_smoothness, 1e-6)
    center_smoothness = np.maximum(center_smoothness, 1e-6)

    edge_mask = np.clip((radius - dist_from_center) / (radius * edge_smoothness), 0, 1)
    center_mask = np.clip((dist_from_center - radius * center_smoothness) / (radius * center_smoothness), 0, 1)
    mask = edge_mask * center_mask
    
    gx *= mask
    gy *= mask
    
    magnitude = np.sqrt(gx**2 + gy**2)
    max_magnitude = np.max(magnitude)
    if max_magnitude > 0:
        gx /= max_magnitude
        gy /= max_magnitude
    
    # Increase the base scale factor
    base_scale = radius * max_dim * 0.2  # Increased from 0.1 to 0.2
    
    # Apply a non-linear scaling to the strength
    adjusted_strength = np.power(strength, 1.5)  # This will make the effect more pronounced at higher strengths
    
    # Increase the maximum strength multiplier
    scale_factor = base_scale * np.clip(adjusted_strength, 0, 3)  # Increased max from 2 to 3
    
    # Apply an additional scaling factor based on image size
    size_factor = np.log(max_dim) / np.log(1000)  # This will be 1 for 1000x1000 images, larger for bigger images
    scale_factor *= size_factor
    
    gx *= scale_factor
    gy *= scale_factor
    
    print(f"Final scale factor: {scale_factor}")
    print(f"Final gradient ranges: gx [{np.min(gx)}, {np.max(gx)}], gy [{np.min(gy)}, {np.max(gy)}]")
    
    return gx, gy

#############################
#    MAIN FUNCTION HERE
#############################

# Version Check 
print(f"NumPy version: {np.__version__}")
print(f"PyTorch version: {torch.__version__}")
print(f"FastAI version: {fastai.__version__}")

learn_bias = load_learner('model_bias.pkl')
learn_fresh = load_learner('model_fresh.pkl')

# Loads the YOLO Model
model_bulge = YOLO("best.onnx")
# modelv8x = YOLO("yolov8x.pt")
# modelv8n = YOLO("yolov8n.pt")

def predict_image(img, model, conf_threshold, iou_threshold):
    """Predicts objects in an image using a YOLOv8 model with adjustable confidence and IOU thresholds."""
    results = model.predict(
        source=img,
        conf=conf_threshold,
        iou=iou_threshold,
        show_labels=True,
        show_conf=True,
        imgsz=640,
    )

    for r in results:
        im_array = r.plot()
        im = Image.fromarray(im_array[..., ::-1])

    return im

def transform_image(image, func_choice, randomization_check, radius, center_x, center_y, strength, reverse_gradient=True, spiral_frequency=1):
    with Image.open(image) as img:
        img = img.convert('RGB')
        I = np.array(img)  

    # Downsample large images
    max_size = 640 # Increased from 512 to allow for more detail, decreased from 1024 to match YOLO model training.
    if max(I.shape[:2]) > max_size:
        scale = max_size / max(I.shape[:2])
        new_size = (int(I.shape[1] * scale), int(I.shape[0] * scale))
        I = cv2.resize(I, new_size, interpolation=cv2.INTER_AREA)
        print(f"Downsampled image to {I.shape}")

    ##################################
    #    Transformation Functions    #
    ##################################

    def pinch(x, y):
        r = np.sqrt(x**2 + y**2)
        return r

    def zoom(x, y):
        return x**2 + y**2

    def shift(x, y):
        return np.arctan2(y, x)

    def bulge(x, y):
        r = -np.sqrt(x**2 + y**2)
        return r 

    def spiral(x, y, frequency=1):
        r = np.sqrt(x**2 + y**2)
        theta = np.arctan2(y, x)
        return r * np.sin(theta - frequency * r)

    rng = np.random.default_rng()
    if randomization_check:
        radius, location, strength, edge_smoothness = definitions(rng)
        center_x, center_y = location
        center_smoothness = edge_smoothness
    else:
        edge_smoothness, center_smoothness = smooth(rng, strength)

    if func_choice == "Pinch":
        func = pinch
        edge_smoothness = 0
        center_smoothness = 0

    elif func_choice == "Spiral":
        func = shift 
        edge_smoothness = 0
        center_smoothness = 0

    elif func_choice == "Bulge":
        func = bulge
        edge_smoothness = 0
        center_smoothness = 0

    elif func_choice == "Volcano":
        func = bulge
        edge_smoothness = 0
        center_smoothness = 0

    elif func_choice == "Shift Up":
        func = lambda x, y: spiral(x, y, frequency=spiral_frequency)
        edge_smoothness = 0
        center_smoothness = 0


    print(f"Function choice: {func_choice}")
    print(f"Input image shape: {I.shape}")
    print(f"Radius: {radius}, Center: ({center_x}, {center_y}), Strength: {strength}")

    # strength = strength * 2  # This allows for stronger effects

    try:
        strength = 0.8

        # Generate gradients
        gx, gy = generate_function_gradient(func, I.shape, radius, (center_x, center_y), strength, edge_smoothness, center_smoothness)
        
        # Vectorized transformation
        rows, cols = I.shape[:2]
        y, x = np.mgrid[0:rows, 0:cols].astype(np.float32)
        
        x_new = x + gx
        y_new = y + gy
        
        x_new = np.clip(x_new, 0, cols - 1)
        y_new = np.clip(y_new, 0, rows - 1)
        
        transformed = cv2.remap(I, x_new, y_new, cv2.INTER_LINEAR)
        
        inv_gx, inv_gy = -gx, -gy
        x_inv = x + inv_gx
        y_inv = y + inv_gy

        x_inv = np.clip(x_inv, 0, cols - 1)
        y_inv = np.clip(y_inv, 0, rows - 1)
        
        inverse_transformed = cv2.remap(I, x_inv, y_inv, cv2.INTER_LINEAR)

        # Apply Inverse to detected location
        YOLO_image = predict_image(transformed, model_bulge, 0.5, 0.5)


        applied_transformed = cv2.remap(transformed, x_inv, y_inv, cv2.INTER_LINEAR)

        # print(f"Transformed image shape: {transformed.shape}")
        # print(f"Inverse transformed image shape: {inverse_transformed.shape}")
        
        vector_field = create_gradient_vector_field(gx, gy, I.shape[:2], reverse=reverse_gradient)
        inverted_vector_field = create_gradient_vector_field(inv_gx, inv_gy, I.shape[:2], reverse=False)
        
        # print(f"Vector field shape: {vector_field.shape}")
        # print(f"Inverted vector field shape: {inverted_vector_field.shape}")

        # If we downsampled earlier, upsample the results back to original size
        if max(I.shape[:2]) != max(np.asarray(Image.open(image)).shape[:2]):
            original_size = np.asarray(Image.open(image)).shape[:2][::-1]
            transformed = cv2.resize(transformed, original_size, interpolation=cv2.INTER_LINEAR)
            inverse_transformed = cv2.resize(inverse_transformed, original_size, interpolation=cv2.INTER_LINEAR)
            applied_transformed = cv2.resize(applied_transformed, original_size, interpolation=cv2.INTER_LINEAR)

            vector_field = cv2.resize(vector_field, original_size, interpolation=cv2.INTER_LINEAR)
            inverted_vector_field = cv2.resize(inverted_vector_field, original_size, interpolation=cv2.INTER_LINEAR)

    except Exception as e:
        print(f"Error in transformation: {str(e)}")
        traceback.print_exc()
        transformed = np.zeros_like(I)
        inverse_transformed = np.zeros_like(I)
        vector_field = np.zeros_like(I)
        inverted_vector_field = np.zeros_like(I)

    result = Image.fromarray(transformed.astype('uint8'), 'RGB')

    # categories = ['Distorted', 'Maze']

    # def clean_output(result_values):
    #     pred, idx, probs = result_values
    #     return dict(zip(categories, map(float, probs)))

    # Outdated, changing to a classification basis
    # result_bias = learn_bias.predict(result)
    # result_fresh = learn_fresh.predict(result)
    # result_bias_final = clean_output(result_bias)
    # result_fresh_final = clean_output(result_fresh)

    result_localization = model_bulge.predict(transformed, save=True)
    print(result_localization, "bulge")
    # result_localization1 = modelv8n.predict(transformed, save=True)
    # print(result_localization1, "modelv8n")
    # result_localization2 = modelv8x.predict(transformed, save=True)
    # print(result_localization2, "modelv8x")


    # YOLO_image1 = predict_image(transformed, modelv8n, 0.5, 0.5)
    # YOLO_image2 = predict_image(transformed, modelv8x, 0.5, 0.5)

    # return transformed, YOLO_image, YOLO_image1, YOLO_image2, result_bias_final, result_fresh_final, vector_field, inverse_transformed, inverted_vector_field
    # return transformed, YOLO_image, result_bias_final, result_fresh_final, vector_field, inverse_transformed, inverted_vector_field
    return transformed, YOLO_image, vector_field, inverse_transformed, inverted_vector_field, applied_transformed


demo = gr.Interface(
    fn=transform_image,
    inputs=[
        gr.Image(type="filepath"),
        gr.Dropdown(["Pinch", "Spiral", "Shift Up", "Bulge", "Volcano"], value="Bulge", label="Function"), 
        gr.Checkbox(label="Randomize inputs?"),
        gr.Slider(0, 0.5, value=0.25, label="Radius (as fraction of image size)"),
        gr.Slider(0, 1, value=0.5, label="Center X"),
        gr.Slider(0, 1, value=0.5, label="Center Y"),
        gr.Slider(0, 1, value=0.5, label="Strength"),
        # gr.Slider(0, 1, value=0.5, label="Edge Smoothness"),
        # gr.Slider(0, 0.5, value=0.1, label="Center Smoothness")
        # gr.Checkbox(label="Reverse Gradient Direction"),
    ],
    examples=[
        [np.asarray(Image.open("examples/1500_maze.jpg")), "Bulge", True, 0.25, 0.5, 0.5, 0.5],
        [np.asarray(Image.open("examples/2048_maze.jpg")), "Bulge", True, 0.25, 0.5, 0.5, 0.5],
        [np.asarray(Image.open("examples/2300_fresh.jpg")), "Bulge", True, 0.25, 0.5, 0.5, 0.5],
        [np.asarray(Image.open("examples/50_fresh.jpg")), "Bulge", True, 0.25, 0.5, 0.5, 0.5]
    ],
    outputs=[
        gr.Image(label="Transformed Image"),
        gr.Image(label="bulge_model Model Classification"),
        # gr.Image(label="yolov8n Model Classification"),
        # gr.Image(label="yolov8x Model Classification"),
        # gr.Label(),
        # gr.Label(),
        gr.Image(label="Gradient Vector Field"),
        gr.Image(label="Inverse Gradient"),
        gr.Image(label="Inverted Vector Field"),
        gr.Image(label="Fixed Image")
    ],
    title="Image Transformation Demo!",
    article="If you like this demo, please star the github repository for the project! Located [here!](https://github.com/nick-leland/DistortionML)",
    description=""
)

demo.launch(share=True)