diff --git a/.gitignore b/.gitignore
index 5bbee1b..1d17dae 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1 @@
 .venv
-data
-scripts/wandb
-models
-scripts/yolov8*
diff --git a/requirements.txt b/requirements.txt
index d1c8048..85f0bbc 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -9,7 +9,13 @@ numpy
 rich
 tqdm
 transformers
-opencv-python-headless
 fastapi
 uvicorn
 matplotlib
+accelerate
+torchvision
+ftfy
+tensorboard
+Jinja2
+datasets
+peft
diff --git a/scripts/clear_memory.py b/scripts/clear_memory.py
deleted file mode 100644
index 7b6010e..0000000
--- a/scripts/clear_memory.py
+++ /dev/null
@@ -1,18 +0,0 @@
-import gc
-import torch
-from logger import rich_logger as l
-
-def clear_memory():
-    """
-    Clears the memory by collecting garbage and emptying the CUDA cache.
-
-    This function is useful when dealing with memory-intensive operations in Python, especially when using libraries like PyTorch.
-
-    Note:
-        This function requires the `gc` and `torch` modules to be imported.
-
-    """
-    gc.collect()
-    torch.cuda.empty_cache()
-    l.info("Memory Cleared")
-    
\ No newline at end of file
diff --git a/scripts/config.py b/scripts/config.py
index b620197..10947d3 100644
--- a/scripts/config.py
+++ b/scripts/config.py
@@ -1,13 +1,60 @@
-LOGS_DIR = '../logs'
-DATA_DIR = '../data'
-Project_Name = 'product_placement_api'
-entity = 'vikramxd'
-image_dir = '../sample_data'
-mask_dir = '../masks'
-segmentation_model = 'facebook/sam-vit-large'
-detection_model = 'yolov8l'
-kandinsky_model_name = 'kandinsky-community/kandinsky-2-2-decoder-inpaint'
-video_model_name = 'stabilityai/stable-video-diffusion-img2vid-xt'
-target_width = 2560
-target_height = 1440
-roi_scale = 0.6
+MODEL_NAME="stabilityai/stable-diffusion-xl-base-1.0"
+VAE_NAME= "madebyollin/sdxl-vae-fp16-fix"
+DATASET_NAME= "hahminlew/kream-product-blip-captions"
+PROJECT_NAME = "Product Photography"
+
+class Config:
+    def __init__(self):
+        self.pretrained_model_name_or_path = MODEL_NAME
+        self.pretrained_vae_model_name_or_path = VAE_NAME
+        self.revision = None
+        self.variant = None
+        self.dataset_name = DATASET_NAME
+        self.dataset_config_name = None
+        self.train_data_dir = None
+        self.image_column = 'image'
+        self.caption_column = 'text'
+        self.validation_prompt = None
+        self.num_validation_images = 4
+        self.validation_epochs = 1
+        self.max_train_samples = None
+        self.output_dir = "output"
+        self.cache_dir = None
+        self.seed = None
+        self.resolution = 1024
+        self.center_crop = False
+        self.random_flip = False
+        self.train_text_encoder = False
+        self.train_batch_size = 16
+        self.num_train_epochs = 200
+        self.max_train_steps = None
+        self.checkpointing_steps = 500
+        self.checkpoints_total_limit = None
+        self.resume_from_checkpoint = None
+        self.gradient_accumulation_steps = 1
+        self.gradient_checkpointing = False
+        self.learning_rate = 1e-4
+        self.scale_lr = False
+        self.lr_scheduler = "constant"
+        self.lr_warmup_steps = 500
+        self.snr_gamma = None
+        self.allow_tf32 = False
+        self.dataloader_num_workers = 0
+        self.use_8bit_adam = True
+        self.adam_beta1 = 0.9
+        self.adam_beta2 = 0.999
+        self.adam_weight_decay = 1e-2
+        self.adam_epsilon = 1e-08
+        self.max_grad_norm = 1.0
+        self.push_to_hub = False
+        self.hub_token = None
+        self.prediction_type = None
+        self.hub_model_id = None
+        self.logging_dir = "logs"
+        self.report_to = "wandb"
+        self.mixed_precision = None
+        self.local_rank = -1
+        self.enable_xformers_memory_efficient_attention = False
+        self.noise_offset = 0
+        self.rank = 4
+        self.debug_loss = False
diff --git a/scripts/endpoint.py b/scripts/endpoint.py
deleted file mode 100644
index cbb9ebe..0000000
--- a/scripts/endpoint.py
+++ /dev/null
@@ -1,65 +0,0 @@
-from fastapi import FastAPI,HTTPException
-from fastapi.responses import FileResponse
-from fastapi.middleware.cors import CORSMiddleware
-from models import kandinsky_inpainting_inference
-from segment_everything import extend_image, generate_mask_from_bbox, invert_mask
-from video_pipeline import fetch_video_pipeline 
-from diffusers.utils import load_image
-from logger import rich_logger as l
-from fastapi import UploadFile, File
-from config import segmentation_model, detection_model,target_height, target_width, roi_scale
-from PIL import Image
-import io
-import tempfile
-
-
-
-
-
-
-app = FastAPI(title="Product Diffusion API",
-              description="API for Product Diffusion", 
-              version="0.1.0",
-              openapi_url="/api/v1/openapi.json")
-
-
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],
-    allow_methods=["*"],
-    allow_headers=["*"],
-    allow_credentials=True
-    
-)
-
-@app.post("/api/v1/image_outpainting")
-async def image_outpainting(image: UploadFile, prompt: str, negative_prompt: str,num_inference_steps:int=30):
-    """
-    Perform Outpainting on an image.
-
-    Args:
-        image (UploadFile): The input image file.
-        prompt (str): The prompt for the outpainting.
-        negative_prompt (str): The negative prompt for the outpainting.
-
-    Returns:
-        JSONResponse: The output image path.
-    """
-    image_data = await image.read()
-    image = Image.open(io.BytesIO(image_data))
-    image = load_image(image)
-    image = extend_image(image, target_width=target_width, target_height=target_height, roi_scale=roi_scale)
-    mask_image = generate_mask_from_bbox(image, segmentation_model, detection_model)
-    mask_image = Image.fromarray(mask_image)
-    mask_image = invert_mask(mask_image)
-    output_image = kandinsky_inpainting_inference(prompt, negative_prompt, image, mask_image,num_inference_steps=num_inference_steps)
-    with tempfile.NamedTemporaryFile(suffix='.jpg', delete=False) as temp_file:
-        output_image.save(temp_file, format='JPEG')
-        temp_file_path = temp_file.name
-    return FileResponse(temp_file_path, media_type='image/jpeg', filename='output_image.jpg')
-    
-
-
-
-  
-    
\ No newline at end of file
diff --git a/scripts/logger.py b/scripts/logger.py
index 2e0f42f..c493b93 100644
--- a/scripts/logger.py
+++ b/scripts/logger.py
@@ -25,5 +25,4 @@ for level in log_levels:
     file_handler = RotatingFileHandler(log_file, maxBytes=10 * 1024 * 1024, backupCount=5)
     file_handler.setLevel(level)
     file_handler.setFormatter(logging.Formatter('%(asctime)s [%(levelname)s] %(module)s - %(message)s'))
-    rich_logger.addHandler(file_handler)
-
+    rich_logger.addHandler(file_handler)
\ No newline at end of file
diff --git a/scripts/models.py b/scripts/models.py
deleted file mode 100644
index 2ca9eea..0000000
--- a/scripts/models.py
+++ /dev/null
@@ -1,82 +0,0 @@
-from logger import rich_logger as l
-from wandb.integration.diffusers import autolog
-from config import Project_Name
-from clear_memory import clear_memory
-import numpy as np
-import torch
-from diffusers.utils import load_image
-from pipeline import fetch_kandinsky_pipeline
-from config import controlnet_adapter_model_name,controlnet_base_model_name,kandinsky_model_name
-from diffusers import StableDiffusionInpaintPipeline, DPMSolverMultistepScheduler
-from video_pipeline import fetch_video_pipeline
-from config import video_model_name
-
-
-
-
-
-
-
-
-    
-
-
-
-
-
-
-
-def kandinsky_inpainting_inference(prompt, negative_prompt, image, mask_image,num_inference_steps=800,strength=1.0,guidance_scale = 7.8):
-    """
-    Perform Kandinsky inpainting inference on the given image.
-
-    Args:
-        prompt (str): The prompt for the inpainting process.
-        negative_prompt (str): The negative prompt for the inpainting process.
-        image (PIL.Image.Image): The input image to be inpainted.
-        mask_image (PIL.Image.Image): The mask image indicating the areas to be inpainted.
-
-    Returns:
-        PIL.Image.Image: The output inpainted image.
-    """
-    clear_memory()
-    l.info("Kandinsky Inpainting Inference ->")
-    pipe = fetch_kandinsky_pipeline(controlnet_adapter_model_name, controlnet_base_model_name,kandinsky_model_name, image)
-    output_image = pipe(prompt=prompt,negative_prompt=negative_prompt,image=image,mask_image=mask_image,num_inference_steps=num_inference_steps,strength=strength,guidance_scale = guidance_scale,height = 1472, width = 2560).images[0]
-    return output_image
-
-    
-
-
-  
-
-
-
-    
-def image_to_video_pipeline(image, video_model_name, decode_chunk_size, motion_bucket_id, generator=torch.manual_seed(42)):
-    """
-    Converts an image to a video using a specified video model.
-
-    Args:
-        image (Image): The input image to convert to video.
-        video_model_name (str): The name of the video model to use.
-        decode_chunk_size (int): The size of the chunks to decode.
-        motion_bucket_id (str): The ID of the motion bucket.
-        generator (torch.Generator, optional): The random number generator. Defaults to torch.manual_seed(42).
-
-    Returns:
-        list: The frames of the generated video.
-    """
-    clear_memory()
-    l.info("Stable Video Diffusion Image 2 Video pipeline Inference ->")
-    pipe = fetch_video_pipeline(video_model_name)
-    frames = pipe(image=image, decode_chunk_size=decode_chunk_size, motion_bucket_id=motion_bucket_id, generator=generator).frames[0]
-    return frames
-
-
-
-    
-    
-    
-    
-
diff --git a/scripts/pipeline.py b/scripts/pipeline.py
deleted file mode 100644
index af0e6bf..0000000
--- a/scripts/pipeline.py
+++ /dev/null
@@ -1,100 +0,0 @@
-from diffusers import ControlNetModel,StableDiffusionControlNetInpaintPipeline,AutoPipelineForInpainting
-import torch
-
-
-
-
-
-
-
-class PipelineFetcher:
-    """
-    A class that fetches different pipelines for image processing.
-
-    Args:
-        controlnet_adapter_model_name (str): The name of the controlnet adapter model.
-        controlnet_base_model_name (str): The name of the controlnet base model.
-        kandinsky_model_name (str): The name of the Kandinsky model.
-        image (str): The image to be processed.
-
-    """
-
-    def __init__(self, controlnet_adapter_model_name, controlnet_base_model_name, kandinsky_model_name, image: str):
-        self.controlnet_adapter_model_name = controlnet_adapter_model_name
-        self.controlnet_base_model_name = controlnet_base_model_name
-        self.kandinsky_model_name = kandinsky_model_name
-        self.image = image
-
-    def ControlNetInpaintPipeline(self):
-        """
-        Fetches the ControlNet inpainting pipeline.
-
-        Returns:
-            pipe (StableDiffusionControlNetInpaintPipeline): The ControlNet inpainting pipeline.
-
-        """
-        controlnet = ControlNetModel.from_pretrained(self.controlnet_adapter_model_name, torch_dtype=torch.float16)
-        pipe = StableDiffusionControlNetInpaintPipeline.from_pretrained(
-            self.controlnet_base_model_name, controlnet=controlnet, torch_dtype=torch.float16
-        )
-        pipe.to('cuda')
-
-        return pipe
-
-    def KandinskyPipeline(self):
-        """
-        Fetches the Kandinsky pipeline.
-
-        Returns:
-            pipe (AutoPipelineForInpainting): The Kandinsky pipeline.
-
-        """
-        pipe = AutoPipelineForInpainting.from_pretrained(self.kandinsky_model_name, torch_dtype=torch.float16)
-        pipe = pipe.to('cuda')
-        pipe.unet = torch.compile(pipe.unet)
-        
-        return pipe
-
-
-
-
-
-
-def fetch_control_pipeline(controlnet_adapter_model_name, controlnet_base_model_name, kandinsky_model_name, image):
-    """
-    Fetches the control pipeline for image processing.
-
-    Args:
-        controlnet_adapter_model_name (str): The name of the controlnet adapter model.
-        controlnet_base_model_name (str): The name of the controlnet base model.
-        kandinsky_model_name (str): The name of the Kandinsky model.
-        image: The input image for processing.
-
-    Returns:
-        pipe: The control pipeline for image processing.
-    """
-    pipe_fetcher = PipelineFetcher(controlnet_adapter_model_name, controlnet_base_model_name, kandinsky_model_name, image)
-    pipe = pipe_fetcher.ControlNetInpaintPipeline()
-    return pipe
-
-
-def fetch_kandinsky_pipeline(controlnet_adapter_model_name, controlnet_base_model_name, kandinsky_model_name, image):
-    """
-    Fetches the Kandinsky pipeline.
-
-    Args:
-        controlnet_adapter_model_name (str): The name of the controlnet adapter model.
-        controlnet_base_model_name (str): The name of the controlnet base model.
-        kandinsky_model_name (str): The name of the Kandinsky model.
-        image: The input image.
-
-    Returns:
-        pipe: The Kandinsky pipeline.
-    """
-    pipe_fetcher = PipelineFetcher(controlnet_adapter_model_name, controlnet_base_model_name, kandinsky_model_name, image)
-    pipe = pipe_fetcher.KandinskyPipeline()
-    pipe = pipe.to('cuda')
-    
-    return pipe
-
-
diff --git a/scripts/run.py b/scripts/run.py
deleted file mode 100644
index cccc06a..0000000
--- a/scripts/run.py
+++ /dev/null
@@ -1,39 +0,0 @@
-import argparse
-import os
-from segment_everything import generate_mask_from_bbox, extend_image, invert_mask
-from models import kandinsky_inpainting_inference, load_image
-from PIL import Image
-from config import segmentation_model, detection_model,target_height, target_width, roi_scale
-
-def main(args):
-    """
-    Main function that performs the product diffusion process.
-
-    Args:
-        args (Namespace): Command-line arguments.
-
-    Returns:
-        None
-    """
-    os.makedirs(args.output_dir, exist_ok=True)
-    os.makedirs(args.mask_dir, exist_ok=True)
-    output_image_path = os.path.join(args.output_dir, f'{args.uid}_output.jpg')
-    image = load_image(args.image_path)
-    extended_image = extend_image(image, target_width=target_width, target_height=target_height, roi_scale=roi_scale)
-    mask = generate_mask_from_bbox(extended_image, segmentation_model, detection_model)
-    mask_image = Image.fromarray(mask)
-    inverted_mask = invert_mask(mask_image)
-    #inverted_mask = Image.fromarray(inverted_mask)
-    output_image = kandinsky_inpainting_inference(args.prompt, args.negative_prompt, extended_image, inverted_mask)
-    output_image.save(output_image_path)
-    
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description='Perform Outpainting on an image.')
-    parser.add_argument('--image_path', type=str, required=True, help='Path to the input image.')
-    parser.add_argument('--prompt', type=str, required=True, help='Prompt for the Kandinsky inpainting.')
-    parser.add_argument('--negative_prompt', type=str, required=True, help='Negative prompt for the Kandinsky inpainting.')
-    parser.add_argument('--output_dir', type=str, required=True, help='Directory to save the output image.')
-    parser.add_argument('--mask_dir', type=str, required=True, help='Directory to save the mask image.')
-    parser.add_argument('--uid', type=str, required=True, help='Unique identifier for the image and mask.')
-    args = parser.parse_args()
-    main(args)
\ No newline at end of file
diff --git a/scripts/segment_everything.py b/scripts/segment_everything.py
deleted file mode 100644
index c2e9532..0000000
--- a/scripts/segment_everything.py
+++ /dev/null
@@ -1,125 +0,0 @@
-from ultralytics import YOLO
-from transformers import SamModel, SamProcessor
-import torch
-from diffusers.utils import load_image
-from PIL import Image, ImageOps
-import numpy as np
-import torch
-from diffusers import StableVideoDiffusionPipeline
-
-
-
-
-
-
-
-
-
-device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-
-
-
-
-
-
-
-
-
-def extend_image(image, target_width, target_height, roi_scale=0.5):
-    """
-    Extends an image to fit within the specified target dimensions while maintaining the aspect ratio.
-    
-    Args:
-        image (PIL.Image.Image): The image to be extended.
-        target_width (int): The desired width of the extended image.
-        target_height (int): The desired height of the extended image.
-        roi_scale (float, optional): The scale factor applied to the resized image. Defaults to 0.5.
-    
-    Returns:
-        PIL.Image.Image: The extended image.
-    """
-    original_image = image
-    original_width, original_height = original_image.size
-    scale = min(target_width / original_width, target_height / original_height)
-    new_width = int(original_width * scale * roi_scale)
-    new_height = int(original_height * scale * roi_scale)
-    original_image_resized = original_image.resize((new_width, new_height))
-    extended_image = Image.new("RGB", (target_width, target_height), "white")
-    paste_x = (target_width - new_width) // 2
-    paste_y = (target_height - new_height) // 2
-    extended_image.paste(original_image_resized, (paste_x, paste_y))
-    return extended_image
-
-
-
-
-
-def generate_mask_from_bbox(image: Image, segmentation_model: str ,detection_model) -> Image:
-    """
-    Generates a mask from the bounding box of an image using YOLO and SAM-ViT models.
-
-    Args:
-        image_path (str): The path to the input image.
-
-    Returns:
-        numpy.ndarray: The generated mask as a NumPy array.
-    """
-   
-    yolo = YOLO(detection_model)
-    processor = SamProcessor.from_pretrained(segmentation_model)
-    model = SamModel.from_pretrained(segmentation_model).to(device)
-    results = yolo(image)
-    bboxes = results[0].boxes.xyxy.tolist()
-    input_boxes = [[[bboxes[0]]]]
-    inputs = processor(load_image(image), input_boxes=input_boxes, return_tensors="pt").to("cuda")
-    with torch.no_grad():
-        outputs = model(**inputs)
-    mask = processor.image_processor.post_process_masks(
-        outputs.pred_masks.cpu(),
-        inputs["original_sizes"].cpu(),
-        inputs["reshaped_input_sizes"].cpu()
-    )[0][0][0].numpy()
-    return mask
-
-
-
-
-
-
-def invert_mask(mask_image: Image) -> np.ndarray:
-    """Method to invert mask
-    Args:
-        mask_image (np.ndarray): input mask image
-    Returns:
-        np.ndarray: inverted mask image
-    """
-    inverted_mask_image = ImageOps.invert(mask_image)
-    return inverted_mask_image
-
-
-
-
-
-
-
-
-def fetch_video_pipeline(video_model_name):
-    """
-    Fetches the video pipeline for image processing.
-
-    Args:
-        video_model_name (str): The name of the video model.
-
-    Returns:
-        pipe (StableVideoDiffusionPipeline): The video pipeline.
-
-    """
-    pipe = StableVideoDiffusionPipeline.from_pretrained(
-        video_model_name, torch_dtype=torch.float16, 
-    )
-    pipe = pipe.to('cuda')
-    pipe.unet= torch.compile(pipe.unet)
-    
-    
-    return pipe
-
diff --git a/scripts/video_pipeline.py b/scripts/video_pipeline.py
deleted file mode 100644
index e69de29..0000000