import numpy as np import traceback import gradio as gr from PIL import Image from scipy import ndimage, interpolate import matplotlib.pyplot as plt from bulk_bulge_generation import definitions, smooth # from transformers import pipeline import fastai from fastcore.all import * from fastai.vision.all import * from ultralytics import ASSETS, YOLO import cv2 # def apply_vector_field_transform(image, func, radius, center=(0.5, 0.5), strength=1, edge_smoothness=0.1, center_smoothness=0.20): # rows, cols = image.shape[:2] # max_dim = max(rows, cols) # print() # print(f"Max_dim is {max_dim}") # print() # # center_y = int(center[1] * rows) # center_x = int(center[0] * cols) # center_y = abs(rows - center_y) # # print(f"Image shape: {rows}x{cols}") # print(f"Center: ({center_x}, {center_y})") # print(f"Radius: {radius}, Strength: {strength}") # print(f"Edge smoothness: {edge_smoothness}, Center smoothness: {center_smoothness}") # # y, x = np.ogrid[:rows, :cols] # y = (y - center_y) / max_dim # x = (x - center_x) / max_dim # # dist_from_center = np.sqrt(x**2 + y**2) # # z = func(x, y) # print(f"Function output - min: {np.min(z)}, max: {np.max(z)}") # # gy, gx = np.gradient(z) # print(f"Initial gradient - gx min: {np.min(gx)}, max: {np.max(gx)}") # print(f"Initial gradient - gy min: {np.min(gy)}, max: {np.max(gy)}") # # # Avoid division by zero # edge_smoothness = np.maximum(edge_smoothness, 1e-6) # center_smoothness = np.maximum(center_smoothness, 1e-6) # # edge_mask = np.clip((radius - dist_from_center) / (radius * edge_smoothness), 0, 1) # center_mask = np.clip((dist_from_center - radius * center_smoothness) / (radius * center_smoothness), 0, 1) # mask = edge_mask * center_mask # # gx = gx * mask # gy = gy * mask # # magnitude = np.sqrt(gx**2 + gy**2) # magnitude[magnitude == 0] = 1 # Avoid division by zero # gx = gx / magnitude # gy = gy / magnitude # # scale_factor = strength * np.log(max_dim) / 100 # gx = gx * scale_factor * mask # gy = gy * scale_factor * mask # # print(f"Final gradient - gx min: {np.min(gx)}, max: {np.max(gx)}") # print(f"Final gradient - gy min: {np.min(gy)}, max: {np.max(gy)}") # # # Forward transformation # x_new = x + gx # y_new = y + gy # # x_new = x_new * max_dim + center_x # y_new = y_new * max_dim + center_y # # x_new = np.clip(x_new, 0, cols - 1) # y_new = np.clip(y_new, 0, rows - 1) # # # Inverse transformation # x_inv = x - gx # y_inv = y - gy # # x_inv = x_inv * max_dim + center_x # y_inv = y_inv * max_dim + center_y # # x_inv = np.clip(x_inv, 0, cols - 1) # y_inv = np.clip(y_inv, 0, rows - 1) # # # Apply transformations # channels_forward = [ndimage.map_coordinates(image[..., i], [y_new, x_new], order=1, mode='reflect') # for i in range(image.shape[2])] # channels_inverse = [ndimage.map_coordinates(image[..., i], [y_inv, x_inv], order=1, mode='reflect') # for i in range(image.shape[2])] # # transformed_image = np.dstack(channels_forward).astype(image.dtype) # inverse_transformed_image = np.dstack(channels_inverse).astype(image.dtype) # # return transformed_image, inverse_transformed_image, (gx, gy) def create_gradient_vector_field(gx, gy, image_shape, step=20, reverse=False): """ Create a gradient vector field visualization with option to reverse direction. :param gx: X-component of the gradient :param gy: Y-component of the gradient :param image_shape: Shape of the original image (height, width) :param step: Spacing between arrows :param reverse: If True, reverse the direction of the arrows :return: Gradient vector field as a numpy array (RGB image) """ rows, cols = image_shape y, x = np.mgrid[step/2:rows:step, step/2:cols:step].reshape(2, -1).astype(int) # Calculate the scale based on image size max_dim = max(rows, cols) scale = max_dim / 1000 # Adjusted for longer arrows # Reverse direction if specified direction = -1 if reverse else 1 fig, ax = plt.subplots(figsize=(cols/50, rows/50), dpi=100) ax.quiver(x, y, direction * gx[y, x], direction * -gy[y, x], scale=scale, scale_units='width', width=0.002 * max_dim / 500, headwidth=8, headlength=12, headaxislength=0, color='black', minshaft=2, minlength=0, pivot='tail') ax.set_xlim(0, cols) ax.set_ylim(rows, 0) ax.set_aspect('equal') ax.axis('off') fig.tight_layout(pad=0) fig.canvas.draw() vector_field = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8) vector_field = vector_field.reshape(fig.canvas.get_width_height()[::-1] + (3,)) plt.close(fig) return vector_field def apply_gradient_transform(image, gx, gy): """ Apply the gradient transformation to an image. :param image: Input image as a numpy array :param gx: X-component of the gradient :param gy: Y-component of the gradient :return: Transformed image """ rows, cols = image.shape[:2] y, x = np.mgrid[0:rows, 0:cols] # Apply the transformation x_new = x + gx y_new = y + gy # Ensure the new coordinates are within the image boundaries x_new = np.clip(x_new, 0, cols - 1) y_new = np.clip(y_new, 0, rows - 1) # Apply the transformation to each channel channels = [] for i in range(image.shape[2]): channel = image[:,:,i] transformed_channel = interpolate.griddata((y.flatten(), x.flatten()), channel.flatten(), (y_new, x_new), method='linear', fill_value=0) channels.append(transformed_channel) transformed_image = np.dstack(channels).astype(image.dtype) return transformed_image def generate_function_gradient(func, image_shape, radius, center=(0.5, 0.5), strength=1, edge_smoothness=0.1, center_smoothness=0.20): rows, cols = image_shape[:2] max_dim = max(rows, cols) y, x = np.mgrid[0:rows, 0:cols].astype(np.float32) y = (y - center[1] * rows) / max_dim x = (x - center[0] * cols) / max_dim dist_from_center = np.sqrt(x**2 + y**2) z = func(x, y) gy, gx = np.gradient(z) edge_smoothness = np.maximum(edge_smoothness, 1e-6) center_smoothness = np.maximum(center_smoothness, 1e-6) edge_mask = np.clip((radius - dist_from_center) / (radius * edge_smoothness), 0, 1) center_mask = np.clip((dist_from_center - radius * center_smoothness) / (radius * center_smoothness), 0, 1) mask = edge_mask * center_mask gx *= mask gy *= mask magnitude = np.sqrt(gx**2 + gy**2) max_magnitude = np.max(magnitude) if max_magnitude > 0: gx /= max_magnitude gy /= max_magnitude # Increase the base scale factor base_scale = radius * max_dim * 0.2 # Increased from 0.1 to 0.2 # Apply a non-linear scaling to the strength adjusted_strength = np.power(strength, 1.5) # This will make the effect more pronounced at higher strengths # Increase the maximum strength multiplier scale_factor = base_scale * np.clip(adjusted_strength, 0, 3) # Increased max from 2 to 3 # Apply an additional scaling factor based on image size size_factor = np.log(max_dim) / np.log(1000) # This will be 1 for 1000x1000 images, larger for bigger images scale_factor *= size_factor gx *= scale_factor gy *= scale_factor print(f"Final scale factor: {scale_factor}") print(f"Final gradient ranges: gx [{np.min(gx)}, {np.max(gx)}], gy [{np.min(gy)}, {np.max(gy)}]") return gx, gy ############################# # MAIN FUNCTION HERE ############################# # Version Check print(f"NumPy version: {np.__version__}") print(f"PyTorch version: {torch.__version__}") print(f"FastAI version: {fastai.__version__}") learn_bias = load_learner('model_bias.pkl') learn_fresh = load_learner('model_fresh.pkl') # Loads the YOLO Model model_bulge = YOLO("best.onnx") # modelv8x = YOLO("yolov8x.pt") # modelv8n = YOLO("yolov8n.pt") def predict_image(img, model, conf_threshold, iou_threshold): """Predicts objects in an image using a YOLOv8 model with adjustable confidence and IOU thresholds.""" results = model.predict( source=img, conf=conf_threshold, iou=iou_threshold, show_labels=True, show_conf=True, imgsz=640, ) for r in results: im_array = r.plot() im = Image.fromarray(im_array[..., ::-1]) return im def transform_image(image, func_choice, randomization_check, radius, center_x, center_y, strength, reverse_gradient=True, spiral_frequency=1): with Image.open(image) as img: img = img.convert('RGB') I = np.array(img) # Downsample large images max_size = 640 # Increased from 512 to allow for more detail, decreased from 1024 to match YOLO model training. if max(I.shape[:2]) > max_size: scale = max_size / max(I.shape[:2]) new_size = (int(I.shape[1] * scale), int(I.shape[0] * scale)) I = cv2.resize(I, new_size, interpolation=cv2.INTER_AREA) print(f"Downsampled image to {I.shape}") ################################## # Transformation Functions # ################################## def pinch(x, y): r = np.sqrt(x**2 + y**2) return r def zoom(x, y): return x**2 + y**2 def shift(x, y): return np.arctan2(y, x) def bulge(x, y): r = -np.sqrt(x**2 + y**2) return r def spiral(x, y, frequency=1): r = np.sqrt(x**2 + y**2) theta = np.arctan2(y, x) return r * np.sin(theta - frequency * r) rng = np.random.default_rng() if randomization_check: radius, location, strength, edge_smoothness = definitions(rng) center_x, center_y = location center_smoothness = edge_smoothness else: edge_smoothness, center_smoothness = smooth(rng, strength) if func_choice == "Pinch": func = pinch edge_smoothness = 0 center_smoothness = 0 elif func_choice == "Spiral": func = shift edge_smoothness = 0 center_smoothness = 0 elif func_choice == "Bulge": func = bulge edge_smoothness = 0 center_smoothness = 0 elif func_choice == "Volcano": func = bulge edge_smoothness = 0 center_smoothness = 0 elif func_choice == "Shift Up": func = lambda x, y: spiral(x, y, frequency=spiral_frequency) edge_smoothness = 0 center_smoothness = 0 print(f"Function choice: {func_choice}") print(f"Input image shape: {I.shape}") print(f"Radius: {radius}, Center: ({center_x}, {center_y}), Strength: {strength}") # strength = strength * 2 # This allows for stronger effects try: strength = 0.8 # Generate gradients gx, gy = generate_function_gradient(func, I.shape, radius, (center_x, center_y), strength, edge_smoothness, center_smoothness) # Vectorized transformation rows, cols = I.shape[:2] y, x = np.mgrid[0:rows, 0:cols].astype(np.float32) x_new = x + gx y_new = y + gy x_new = np.clip(x_new, 0, cols - 1) y_new = np.clip(y_new, 0, rows - 1) transformed = cv2.remap(I, x_new, y_new, cv2.INTER_LINEAR) inv_gx, inv_gy = -gx, -gy x_inv = x + inv_gx y_inv = y + inv_gy x_inv = np.clip(x_inv, 0, cols - 1) y_inv = np.clip(y_inv, 0, rows - 1) inverse_transformed = cv2.remap(I, x_inv, y_inv, cv2.INTER_LINEAR) # Apply Inverse to detected location YOLO_image = predict_image(transformed, model_bulge, 0.5, 0.5) applied_transformed = cv2.remap(transformed, x_inv, y_inv, cv2.INTER_LINEAR) # print(f"Transformed image shape: {transformed.shape}") # print(f"Inverse transformed image shape: {inverse_transformed.shape}") vector_field = create_gradient_vector_field(gx, gy, I.shape[:2], reverse=reverse_gradient) inverted_vector_field = create_gradient_vector_field(inv_gx, inv_gy, I.shape[:2], reverse=False) # print(f"Vector field shape: {vector_field.shape}") # print(f"Inverted vector field shape: {inverted_vector_field.shape}") # If we downsampled earlier, upsample the results back to original size if max(I.shape[:2]) != max(np.asarray(Image.open(image)).shape[:2]): original_size = np.asarray(Image.open(image)).shape[:2][::-1] transformed = cv2.resize(transformed, original_size, interpolation=cv2.INTER_LINEAR) inverse_transformed = cv2.resize(inverse_transformed, original_size, interpolation=cv2.INTER_LINEAR) applied_transformed = cv2.resize(applied_transformed, original_size, interpolation=cv2.INTER_LINEAR) vector_field = cv2.resize(vector_field, original_size, interpolation=cv2.INTER_LINEAR) inverted_vector_field = cv2.resize(inverted_vector_field, original_size, interpolation=cv2.INTER_LINEAR) except Exception as e: print(f"Error in transformation: {str(e)}") traceback.print_exc() transformed = np.zeros_like(I) inverse_transformed = np.zeros_like(I) vector_field = np.zeros_like(I) inverted_vector_field = np.zeros_like(I) result = Image.fromarray(transformed.astype('uint8'), 'RGB') # categories = ['Distorted', 'Maze'] # def clean_output(result_values): # pred, idx, probs = result_values # return dict(zip(categories, map(float, probs))) # Outdated, changing to a classification basis # result_bias = learn_bias.predict(result) # result_fresh = learn_fresh.predict(result) # result_bias_final = clean_output(result_bias) # result_fresh_final = clean_output(result_fresh) result_localization = model_bulge.predict(transformed, save=True) print(result_localization, "bulge") # result_localization1 = modelv8n.predict(transformed, save=True) # print(result_localization1, "modelv8n") # result_localization2 = modelv8x.predict(transformed, save=True) # print(result_localization2, "modelv8x") # YOLO_image1 = predict_image(transformed, modelv8n, 0.5, 0.5) # YOLO_image2 = predict_image(transformed, modelv8x, 0.5, 0.5) # return transformed, YOLO_image, YOLO_image1, YOLO_image2, result_bias_final, result_fresh_final, vector_field, inverse_transformed, inverted_vector_field # return transformed, YOLO_image, result_bias_final, result_fresh_final, vector_field, inverse_transformed, inverted_vector_field return transformed, YOLO_image, vector_field, inverse_transformed, inverted_vector_field, applied_transformed demo = gr.Interface( fn=transform_image, inputs=[ gr.Image(type="filepath"), gr.Dropdown(["Pinch", "Spiral", "Shift Up", "Bulge", "Volcano"], value="Bulge", label="Function"), gr.Checkbox(label="Randomize inputs?"), gr.Slider(0, 0.5, value=0.25, label="Radius (as fraction of image size)"), gr.Slider(0, 1, value=0.5, label="Center X"), gr.Slider(0, 1, value=0.5, label="Center Y"), gr.Slider(0, 1, value=0.5, label="Strength"), # gr.Slider(0, 1, value=0.5, label="Edge Smoothness"), # gr.Slider(0, 0.5, value=0.1, label="Center Smoothness") # gr.Checkbox(label="Reverse Gradient Direction"), ], examples=[ [np.asarray(Image.open("examples/1500_maze.jpg")), "Bulge", True, 0.25, 0.5, 0.5, 0.5], [np.asarray(Image.open("examples/2048_maze.jpg")), "Bulge", True, 0.25, 0.5, 0.5, 0.5], [np.asarray(Image.open("examples/2300_fresh.jpg")), "Bulge", True, 0.25, 0.5, 0.5, 0.5], [np.asarray(Image.open("examples/50_fresh.jpg")), "Bulge", True, 0.25, 0.5, 0.5, 0.5] ], outputs=[ gr.Image(label="Transformed Image"), gr.Image(label="bulge_model Model Classification"), # gr.Image(label="yolov8n Model Classification"), # gr.Image(label="yolov8x Model Classification"), # gr.Label(), # gr.Label(), gr.Image(label="Gradient Vector Field"), gr.Image(label="Inverse Gradient"), gr.Image(label="Inverted Vector Field"), gr.Image(label="Fixed Image") ], title="Image Transformation Demo!", article="If you like this demo, please star the github repository for the project! Located [here!](https://github.com/nick-leland/DistortionML)", description="" ) demo.launch(share=True)