frutiemax
/

rct_model

Model card Files Files and versions Community

frutiemax commited on Sep 22, 2023

Commit

d751051

1 Parent(s): 2a7e546

Training code

Browse files

Files changed (3) hide show

rct_diffusion_pipeline.py +198 -8
test_pipeline.py +6 -5
train_model.py +128 -0

rct_diffusion_pipeline.py CHANGED Viewed

@@ -1,15 +1,205 @@
 from diffusers import DiffusionPipeline
 import torch
 class RCTDiffusionPipeline(DiffusionPipeline):
-    def __init__(self, unet, scheduler):
         super().__init__()
-        self.register_modules(unet=unet, scheduler=scheduler)
-    def __call__(self):
-        image = torch.randn((1, self.unet.config.in_channels, self.unet.config.sample_size, self.unet.config.sample_size))
-        timestep = 1
-        model_output = self.unet(image, timestep).sample
-        scheduler_output = self.scheduler.step(model_output, timestep, image).prev_sample
-        return scheduler_output

 from diffusers import DiffusionPipeline
+from diffusers import DDPMPipeline
+from diffusers import DDPMScheduler, UNet2DModel
 import torch
+import torchvision.transforms as T
+from PIL import Image
+from transformers import AutoTokenizer
+from datasets import load_dataset
+import numpy as np
+import pandas as pd
+from tqdm.auto import tqdm
 class RCTDiffusionPipeline(DiffusionPipeline):
+    def __init__(self):
         super().__init__()
+        # dictionnary that keeps the different classes of object description, color1, color2 and color3
+        self.object_description_dict = {}
+        self.color1_dict = {}
+        self.color2_dict = {}
+        self.color3_dict = {}
+        self.load_dictionaries_from_dataset()
+        self.scheduler = DDPMScheduler()
+        # the number of hidden features is dependant on the loaded dictionaries!
+        self.unet = UNet2DModel(sample_size=256, in_channels=12, out_channels=12, \
+                        down_block_types=('DownBlock2D', 'DownBlock2D', 'AttnDownBlock2D'), up_block_types=('UpBlock2D', 'UpBlock2D', 'AttnUpBlock2D'), \
+                            block_out_channels=(16, 32, 64), norm_num_groups=16)
+        self.unet.to('cuda')
+    def load_dictionaries_from_dataset(self):
+        dataset = load_dataset('frutiemax/rct_dataset')
+        dataset = dataset['train']
+        for row in dataset:
+            if not row['object_description'] in self.object_description_dict:
+                self.object_description_dict[row['object_description']] = len(self.object_description_dict)
+            if not row['color1'] in self.color1_dict and row['color1'] != 'none':
+                self.color1_dict[row['color1']] = len(self.color1_dict)
+            if not row['color2'] in self.color2_dict and row['color2'] != 'none':
+                self.color2_dict[row['color2']] = len(self.color2_dict)
+            if not row['color3'] in self.color3_dict and row['color3'] != 'none':
+                self.color3_dict[row['color3']] = len(self.color3_dict)
+    # helper functions to know the classes
+    def print_class_tokens_to_csv(self):
+        object_descriptions = pd.DataFrame(self.object_description_dict.items())
+        object_descriptions.to_csv('object_descriptions_tokens.csv')
+        color1 = pd.DataFrame(self.color1_dict.items())
+        color1.to_csv('color1_tokens.csv')
+        color2 = pd.DataFrame(self.color2_dict.items())
+        color2.to_csv('color2_tokens.csv')
+        color3 = pd.DataFrame(self.color3_dict.items())
+        color3.to_csv('color3_tokens.csv')
+    # helper functions to build weight tables
+    def get_object_description_weights(self, classifiers : list[tuple[str, float]]) -> np.array:
+        result = np.zeros(len(self.object_description_dict.items()))
+        for classifier in classifiers:
+            id, weight = classifier
+            if id in self.object_description_dict:
+                weight_index = self.object_description_dict[id]
+                result[weight_index] = weight
+        return result
+    def get_color1_weights(self, classifiers : list[tuple[str, float]]) -> np.array:
+        result = np.zeros(len(self.color1_dict.items()))
+        for classifier in classifiers:
+            id, weight = classifier
+            if id in self.color1_dict:
+                weight_index = self.color1_dict[id]
+                result[weight_index] = weight
+        return result
+    def get_color2_weights(self, classifiers : list[tuple[str, float]]) -> np.array:
+        result = np.zeros(len(self.color2_dict.items()))
+        for classifier in classifiers:
+            id, weight = classifier
+            if id in self.color2_dict:
+                weight_index = self.color2_dict[id]
+                result[weight_index] = weight
+        return result
+    def get_color3_weights(self, classifiers : list[tuple[str, float]]) -> np.array:
+        result = np.zeros(len(self.color3_dict.items()))
+        for classifier in classifiers:
+            id, weight = classifier
+            if id in self.color3_dict:
+                weight_index = self.color3_dict[id]
+                result[weight_index] = weight
+        return result
+    def get_class_labels_size(self):
+        return len(self.object_description_dict.items()) + len(self.color1_dict.items()) + len(self.color2_dict.items()) + len(self.color3_dict.items())
+    def pack_labels_to_tensor(self, num_images, object_descriptions : np.array, colors1: np.array, colors2 : np.array, colors3 : np.array) -> torch.Tensor:
+        num_labels = self.get_class_labels_size()
+        class_labels = torch.Tensor(size=(num_images, num_labels))
+        for batch_index in range(num_images):
+            offset = 0
+            class_labels[batch_index, offset:offset + len(self.object_description_dict)] = torch.from_numpy(object_descriptions[batch_index])
+            offset += len(self.object_description_dict.items())
+            class_labels[batch_index, offset:offset + len(self.color1_dict)] = torch.from_numpy(colors1[batch_index])
+            offset += len(self.color1_dict.items())
+            class_labels[batch_index, offset:offset + len(self.color2_dict)] = torch.from_numpy(colors2[batch_index])
+            offset += len(self.color2_dict.items())
+            class_labels[batch_index, offset:offset + len(self.color3_dict)] = torch.from_numpy(colors3[batch_index])
+        return class_labels
+    def __call__(self, object_description : list[list[tuple[str, float]]], color1 : list[list[tuple[str, float]]], \
+                color2 : list[list[tuple[str, float]]] = None, color3 : list[list[tuple[str, float]]] = None, \
+                 batch_size=1, num_inference_steps=20, generator=torch.manual_seed(torch.random.seed())):
+        # check if the labels are the correct size
+        if len(object_description) != batch_size:
+            return None
+        if len(color1) != batch_size:
+            return None
+        if color2 != None and len(color2) != batch_size:
+            return None
+        if color3 != None and len(color3) != batch_size:
+            return None
+        # ok build the labels for each batch
+        object_descriptions = []
+        colors1 = []
+        colors2 = []
+        colors3 = []
+        for batch_index in range(batch_size):
+            obj_desc = self.get_object_description_weights(object_description[batch_index])
+            c1 = self.get_color1_weights(color1[batch_index])
+            if color2 != None:
+                c2 = self.get_color2_weights(color2[batch_index])
+            else:
+                c2 = self.get_color2_weights([])
+            if color3 != None:
+                c3 = self.get_color3_weights(color3[batch_index])
+            else:
+                c3 = self.get_color3_weights([])
+            object_descriptions.append(obj_desc)
+            colors1.append(c1)
+            colors2.append(c2)
+            colors3.append(c3)
+        # now put those weights into a tensor
+        class_labels = self.pack_labels_to_tensor(batch_size, object_descriptions, colors1, colors2, colors3)
+        class_labels = class_labels.to('cuda')
+        # set the inference steps
+        self.scheduler.set_timesteps(num_inference_steps)
+        noise_batches = torch.Tensor(size=(batch_size, 4, 3, 256, 256)).to('cuda')
+        for batch_index in range(batch_size):
+            for view_index in range(4):
+                noise = torch.randn(3, 256, 256).to('cuda')
+                noise_batches[batch_index, view_index] = noise
+        # reshape the data so it's (batch_size, 12, 256, 256)
+        noise_batches = torch.reshape(noise_batches, (batch_size, 12, 256, 256)).to('cuda')
+        # now call the model for the n interations
+        progress_bar = tqdm(total=num_inference_steps)
+        epoch = 0
+        for t in self.scheduler.timesteps:
+            progress_bar.set_description(f'Inference step {epoch}')
+            with torch.no_grad():
+                noise_residual = self.unet(noise_batches, t, class_labels=class_labels).sample
+            previous_noisy_sample = self.scheduler.step(noise_residual, t, noise_batches).prev_sample
+            noise_batches = previous_noisy_sample
+            progress_bar.update(1)
+            epoch = epoch + 1
+        # reshape the data so we get back 4 RGB images
+        noise_batches = torch.reshape(noise_batches, (batch_size, 4, 3, 256, 256)).to('cpu')
+        # convert those tensors to PIL images
+        output_images = []
+        tensor_to_pil = T.ToPILImage('RGB')
+        for batch_index in range(batch_size):
+            for image_index in range(4):
+                output_images.append(tensor_to_pil(noise_batches[batch_index, image_index]))
+        # for now just return the images
+        return output_images

test_pipeline.py CHANGED Viewed

@@ -1,8 +1,9 @@
 from rct_diffusion_pipeline import RCTDiffusionPipeline
-from diffusers import DDPMScheduler, UNet2DModel
-scheduler = DDPMScheduler()
-unet = UNet2DModel()
-pipeline = RCTDiffusionPipeline(unet=unet, scheduler=scheduler)
-output = pipeline()

 from rct_diffusion_pipeline import RCTDiffusionPipeline
+torch_device = "cuda"
+pipeline = RCTDiffusionPipeline()
+pipeline.print_class_tokens_to_csv()
+output = pipeline([[('aleppo pine tree', 1.0)]], [[('dark green', 1.0)]])
+print('test')

train_model.py ADDED Viewed

	@@ -0,0 +1,128 @@

+from datasets import load_dataset
+from PIL.Image import Image
+import PIL
+from PIL.Image import Resampling
+import numpy as np
+from rct_diffusion_pipeline import RCTDiffusionPipeline
+import torch
+import torchvision.transforms as T
+import torch.nn.functional as F
+from diffusers.optimization import get_cosine_schedule_with_warmup
+from tqdm.auto import tqdm
+def save_and_test(pipeline, epoch):
+    outputs = pipeline([[('aleppo pine tree', 1.0)]], [[('dark green', 1.0)]])
+    for image_index in range(len(outputs)):
+        file_name = f'out{image_index}_{epoch}.png'
+        outputs[image_index].save(file_name)
+    model_file = f'rct_foliage_{epoch}.pth'
+    pipeline.save_pretrained(model_file)
+def train_model(batch_size=4, epochs=100, save_model_interval=10, start_learning_rate=1e-3, lr_warmup_steps=500):
+    dataset = load_dataset('frutiemax/rct_dataset')
+    dataset = dataset['train']
+    num_images = int(dataset.num_rows / 4)
+    # let's get all the entries for the 4 views split in four lists
+    views = []
+    for view_index in range(4):
+        entries = [entry for entry in dataset if entry['view'] == view_index]
+        views.append(entries)
+    # convert those images to 256x256 by cropping and scaling up the image
+    image_views = []
+    for view_index in range(4):
+        images = []
+        for entry in views[view_index]:
+            image = entry['image']
+            scale_factor = int(np.minimum(256 / image.width, 256 / image.height))
+            image = Image.resize(image, size=(scale_factor * image.width, scale_factor * image.height), resample=Resampling.NEAREST)
+            new_image = PIL.Image.new('RGB', (256, 256))
+            new_image.paste(image, box=(int((256 - image.width)/2), int((256 - image.height)/2)))
+            images.append(new_image)
+        image_views.append(images)
+    del views
+    # convert those views in tensors
+    targets = torch.Tensor(size=(num_images, 4, 3, 256, 256))
+    pillow_to_tensor = T.ToTensor()
+    for image_index in range(num_images):
+        for view_index in range(4):
+            targets[image_index, view_index] = pillow_to_tensor(image_views[view_index][image_index])
+    del image_views
+    del entries
+    targets = torch.reshape(targets, (num_images, 12, 256, 256))
+    # get the labels
+    view0_entries = [row for row in dataset if row['view'] == 0]
+    obj_descriptions = [row['object_description'] for row in view0_entries]
+    colors1 = [row['color1'] for row in view0_entries]
+    colors2 = [row['color2'] for row in view0_entries]
+    colors3 = [row['color3'] for row in view0_entries]
+    del view0_entries
+    # convert those descriptions, color1, color2 and color3 to a list of tuple with label and weight=1.0
+    obj_descriptions = [[(obj_desc, 1.0)] for obj_desc in obj_descriptions]
+    colors1 = [[(color1, 1.0)] for color1 in colors1]
+    colors2 = [[(color2, 1.0)] for color2 in colors2]
+    colors3 = [[(color3, 1.0)] for color3 in colors3]
+    # convert those tuples in numpy arrays using the helper function of the model
+    model = RCTDiffusionPipeline()
+    obj_descriptions = [model.get_object_description_weights(obj_desc) for obj_desc in obj_descriptions]
+    colors1 = [model.get_color1_weights(color1) for color1 in colors1]
+    colors2 = [model.get_color2_weights(color2) for color2 in colors2]
+    colors3 = [model.get_color3_weights(color3) for color3 in colors3]
+    # finally, convert those numpy arrays to a tensor
+    class_labels = model.pack_labels_to_tensor(num_images, obj_descriptions, colors1, colors2, colors3)
+    del obj_descriptions
+    del colors1
+    del colors2
+    del colors3
+    del dataset
+    optimizer = torch.optim.Adam(model.unet.parameters(), lr=start_learning_rate)
+    lr_scheduler = get_cosine_schedule_with_warmup(
+        optimizer=optimizer,
+        num_warmup_steps=lr_warmup_steps,
+        num_training_steps=num_images * epochs
+    )
+    # lets train for 100 epoch for each sprite in the dataset with a random noise level
+    progress_bar = tqdm(total=epochs)
+    for epoch in range(epochs):
+        # create a noisy version of each sprite
+        for batch_index in range(0, num_images, batch_size):
+            progress_bar.set_description(f'epoch={epoch}, batch_index={batch_index}')
+            batch_end = np.minimum(num_images, batch_index + batch_size)
+            clean_images = targets[batch_index:batch_end].to('cuda')
+            batch_labels = class_labels[batch_index:batch_end].to('cuda')
+            noise = torch.randn(clean_images.shape).to('cuda')
+            timesteps = torch.randint(0, model.scheduler.config.num_train_timesteps, (batch_size, )).to('cuda')
+            noisy_images = model.scheduler.add_noise(clean_images, noise, timesteps)
+            noise_pred = model.unet(noisy_images, timesteps, batch_labels, return_dict=False)[0]
+            loss = F.mse_loss(noise_pred, noise)
+            loss.backward()
+            optimizer.step()
+            lr_scheduler.step()
+            optimizer.zero_grad()
+        if (epoch + 1) % save_model_interval == 0:
+            save_and_test(model, epoch)
+        progress_bar.update(1)
+if __name__ == '__main__':
+    train_model()