import gradio as gr
import torch
from diffusers import StableDiffusionControlNetPipeline, ControlNetModel, UniPCMultistepScheduler
from torchvision import transforms
import numpy as np
from keybert import KeyBERT
import gensim
from gensim.parsing.preprocessing import STOPWORDS as stop_words
import re

def sketch_transform(size):
    return transforms.Compose([
        transforms.Resize(size)
    ])

def keep_english(text):
    return re.sub(r'[^a-zA-Z0-9\s]', '', text.strip())

def remove_stopwords(text):
    return " ".join([word for word in gensim.utils.simple_preprocess(text, deacc = True, min_len = 0) if word not in stop_words])

def remove_unwanted_words(text, unwanted_words):
    text_list = text.lower().split()
    resultwords  = [word for word in text_list if word not in unwanted_words]
    return ' '.join(resultwords)
    
def generate_image(image, text_prompt, text_city):
    unwanted_words = ['seattle', 'chicago', 'sanfrancisco', 'newyork', 'image', 'shows', 'generally', 'sky', 'view']
    n=5
    diversity=0.3
    ngram=(3,3)
    kw_model = KeyBERT()
    
    prompt = text_prompt
    prompt = keep_english(prompt)
    prompt = remove_unwanted_words(prompt, unwanted_words)
    keywords = kw_model.extract_keywords(prompt, top_n=n, use_mmr=True, diversity=diversity, keyphrase_ngram_range=ngram)
    keywords_substring = ", ".join([key[0] for key in keywords])
    prompt = f"Realistic {text_city} aerial satellite top view image with high quality details with buildings and roads in {text_city} that probably has the following objects and characterstics: {keywords_substring}"
    print(prompt)
    if image is not None:
        sketch = image['layers'][0]
        sketch = transforms.ToTensor()(sketch).unsqueeze(0)
        sketch = sketch_transform(size=(256,256))(sketch)

        with torch.no_grad():  # Disable gradient calculation for inference
            model_output = pipe(prompt, num_inference_steps=20, generator=torch.manual_seed(0), image=sketch)
            generated_image = model_output.images[0]

    return generated_image

controlnet_model_name_or_path = "./controlnet"
pretrained_model_name_or_path = "runwayml/stable-diffusion-v1-5"

controlnet = ControlNetModel.from_pretrained(controlnet_model_name_or_path, torch_dtype=torch.float16,  conditioning_channels=3)
pipe = StableDiffusionControlNetPipeline.from_pretrained(pretrained_model_name_or_path, controlnet=controlnet, torch_dtype=torch.float16, safety_checker=None)

if torch.cuda.is_available():
    device = 'cuda' 
else:
    device = 'cpu'

pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
pipe.enable_model_cpu_offload(device=device)

iface = gr.Interface(
    fn=generate_image,
    inputs=[
        gr.ImageEditor(sources=(), 
                       image_mode='RGB', 
                       type='pil',
                       brush=gr.Brush(colors=["#ffb266",  #building
                                              "#4059ff", #parking
                                              "#66ff66", #grass
                                              #"#009900", #forest
                                              "#cce5ff", #water
                                              #"#c0c0c0", #path
                                              "#606060" #road
                                              ], color_mode="fixed")
                      ),
        gr.Textbox(placeholder='residential area with a lot of trees', label='Prompt'),
        gr.Textbox(placeholder='Seattle, SanFrancisco, NewYork, Chicago', label='City')
        ],
    outputs="image"
)

iface.launch()