Spaces:

ai-art
/

stablediffusion

Runtime error

App Files Files Community

aclicheroux commited on Oct 28, 2022

Commit

e0c66e4

•

1 Parent(s): d9f9915

Initial commit

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

CODEOWNERS +1 -0
app.py +137 -0
artists.csv +0 -0
environment-wsl2.yaml +11 -0
javascript/aspectRatioOverlay.js +119 -0
javascript/contextMenus.js +177 -0
javascript/dragdrop.js +86 -0
javascript/edit-attention.js +45 -0
javascript/hints.js +121 -0
javascript/imageMaskFix.js +45 -0
javascript/imageviewer.js +236 -0
javascript/notification.js +49 -0
javascript/progressbar.js +76 -0
javascript/textualInversion.js +8 -0
javascript/ui.js +234 -0
launch.py +169 -0
modules/artists.py +25 -0
modules/bsrgan_model.py +76 -0
modules/bsrgan_model_arch.py +102 -0
modules/codeformer/codeformer_arch.py +278 -0
modules/codeformer/vqgan_arch.py +437 -0
modules/codeformer_model.py +140 -0
modules/deepbooru.py +173 -0
modules/devices.py +72 -0
modules/errors.py +10 -0
modules/esrgan_model.py +158 -0
modules/esrgan_model_arch.py +80 -0
modules/extras.py +222 -0
modules/face_restoration.py +19 -0
modules/generation_parameters_copypaste.py +101 -0
modules/gfpgan_model.py +115 -0
modules/hypernetworks/hypernetwork.py +314 -0
modules/hypernetworks/ui.py +47 -0
modules/images.py +465 -0
modules/img2img.py +137 -0
modules/interrogate.py +171 -0
modules/ldsr_model.py +54 -0
modules/ldsr_model_arch.py +222 -0
modules/lowvram.py +82 -0
modules/masking.py +99 -0
modules/memmon.py +85 -0
modules/modelloader.py +153 -0
modules/ngrok.py +15 -0
modules/paths.py +40 -0
modules/processing.py +721 -0
modules/prompt_parser.py +366 -0
modules/realesrgan_model.py +133 -0
modules/safe.py +110 -0
modules/safety.py +42 -0
modules/scripts.py +201 -0

CODEOWNERS ADDED Viewed

	@@ -0,0 +1 @@


1	+ * @AUTOMATIC1111

app.py ADDED Viewed

	@@ -0,0 +1,137 @@

+import os
+import threading
+import time
+import importlib
+import signal
+import threading
+from fastapi.middleware.gzip import GZipMiddleware
+from modules.paths import script_path
+from modules import devices, sd_samplers
+import modules.codeformer_model as codeformer
+import modules.extras
+import modules.face_restoration
+import modules.gfpgan_model as gfpgan
+import modules.img2img
+import modules.lowvram
+import modules.paths
+import modules.scripts
+import modules.sd_hijack
+import modules.sd_models
+import modules.shared as shared
+import modules.txt2img
+import modules.ui
+from modules import devices
+from modules import modelloader
+from modules.paths import script_path
+from modules.shared import cmd_opts
+import modules.hypernetworks.hypernetwork
+queue_lock = threading.Lock()
+def wrap_queued_call(func):
+    def f(*args, **kwargs):
+        with queue_lock:
+            res = func(*args, **kwargs)
+        return res
+    return f
+def wrap_gradio_gpu_call(func, extra_outputs=None):
+    def f(*args, **kwargs):
+        devices.torch_gc()
+        shared.state.sampling_step = 0
+        shared.state.job_count = -1
+        shared.state.job_no = 0
+        shared.state.job_timestamp = shared.state.get_job_timestamp()
+        shared.state.current_latent = None
+        shared.state.current_image = None
+        shared.state.current_image_sampling_step = 0
+        shared.state.skipped = False
+        shared.state.interrupted = False
+        shared.state.textinfo = None
+        with queue_lock:
+            res = func(*args, **kwargs)
+        shared.state.job = ""
+        shared.state.job_count = 0
+        devices.torch_gc()
+        return res
+    return modules.ui.wrap_gradio_call(f, extra_outputs=extra_outputs)
+def initialize():
+    modelloader.cleanup_models()
+    modules.sd_models.setup_model()
+    codeformer.setup_model(cmd_opts.codeformer_models_path)
+    gfpgan.setup_model(cmd_opts.gfpgan_models_path)
+    shared.face_restorers.append(modules.face_restoration.FaceRestoration())
+    modelloader.load_upscalers()
+    modules.scripts.load_scripts(os.path.join(script_path, "scripts"))
+    shared.sd_model = modules.sd_models.load_model()
+    shared.opts.onchange("sd_model_checkpoint", wrap_queued_call(lambda: modules.sd_models.reload_model_weights(shared.sd_model)))
+    shared.opts.onchange("sd_hypernetwork", wrap_queued_call(lambda: modules.hypernetworks.hypernetwork.load_hypernetwork(shared.opts.sd_hypernetwork)))
+    shared.opts.onchange("sd_hypernetwork_strength", modules.hypernetworks.hypernetwork.apply_strength)
+def webui():
+    initialize()
+    # make the program just exit at ctrl+c without waiting for anything
+    def sigint_handler(sig, frame):
+        print(f'Interrupted with signal {sig} in {frame}')
+        os._exit(0)
+    signal.signal(signal.SIGINT, sigint_handler)
+    while 1:
+        demo = modules.ui.create_ui(wrap_gradio_gpu_call=wrap_gradio_gpu_call)
+        app, local_url, share_url = demo.launch(
+            share=cmd_opts.share,
+            server_name="0.0.0.0" if cmd_opts.listen else None,
+            server_port=cmd_opts.port,
+            debug=cmd_opts.gradio_debug,
+            auth=[tuple(cred.split(':')) for cred in cmd_opts.gradio_auth.strip('"').split(',')] if cmd_opts.gradio_auth else None,
+            inbrowser=cmd_opts.autolaunch,
+            prevent_thread_lock=True
+        )
+        app.add_middleware(GZipMiddleware, minimum_size=1000)
+        while 1:
+            time.sleep(0.5)
+            if getattr(demo, 'do_restart', False):
+                time.sleep(0.5)
+                demo.close()
+                time.sleep(0.5)
+                break
+        sd_samplers.set_samplers()
+        print('Reloading Custom Scripts')
+        modules.scripts.reload_scripts(os.path.join(script_path, "scripts"))
+        print('Reloading modules: modules.ui')
+        importlib.reload(modules.ui)
+        print('Refreshing Model List')
+        modules.sd_models.list_models()
+        print('Restarting Gradio')
+if __name__ == "__main__":
+    webui()

artists.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

environment-wsl2.yaml ADDED Viewed

	@@ -0,0 +1,11 @@

+name: automatic
+channels:
+  - pytorch
+  - defaults
+dependencies:
+  - python=3.10
+  - pip=22.2.2
+  - cudatoolkit=11.3
+  - pytorch=1.12.1
+  - torchvision=0.13.1
+  - numpy=1.23.1

javascript/aspectRatioOverlay.js ADDED Viewed

	@@ -0,0 +1,119 @@

+let currentWidth = null;
+let currentHeight = null;
+let arFrameTimeout = setTimeout(function(){},0);
+function dimensionChange(e,dimname){
+	if(dimname == 'Width'){
+		currentWidth = e.target.value*1.0
+	}
+	if(dimname == 'Height'){
+		currentHeight = e.target.value*1.0
+	}
+	var inImg2img   = Boolean(gradioApp().querySelector("button.rounded-t-lg.border-gray-200"))
+	if(!inImg2img){
+		return;
+	}
+	var img2imgMode = gradioApp().querySelector('#mode_img2img.tabs > div > button.rounded-t-lg.border-gray-200')
+	if(img2imgMode){
+		img2imgMode=img2imgMode.innerText
+	}else{
+		return;
+	}
+	var redrawImage = gradioApp().querySelector('div[data-testid=image] img');
+	var inpaintImage = gradioApp().querySelector('#img2maskimg div[data-testid=image] img')
+	var targetElement = null;
+	if(img2imgMode=='img2img' && redrawImage){
+		targetElement = redrawImage;
+	}else if(img2imgMode=='Inpaint' && inpaintImage){
+		targetElement = inpaintImage;
+	}
+	if(targetElement){
+		var arPreviewRect = gradioApp().querySelector('#imageARPreview');
+		if(!arPreviewRect){
+		    arPreviewRect = document.createElement('div')
+		    arPreviewRect.id = "imageARPreview";
+		    gradioApp().getRootNode().appendChild(arPreviewRect)
+		}
+		var viewportOffset = targetElement.getBoundingClientRect();
+		viewportscale = Math.min(  targetElement.clientWidth/targetElement.naturalWidth, targetElement.clientHeight/targetElement.naturalHeight )
+		scaledx = targetElement.naturalWidth*viewportscale
+		scaledy = targetElement.naturalHeight*viewportscale
+		cleintRectTop    = (viewportOffset.top+window.scrollY)
+		cleintRectLeft   = (viewportOffset.left+window.scrollX)
+		cleintRectCentreY = cleintRectTop  + (targetElement.clientHeight/2)
+		cleintRectCentreX = cleintRectLeft + (targetElement.clientWidth/2)
+		viewRectTop    = cleintRectCentreY-(scaledy/2)
+		viewRectLeft   = cleintRectCentreX-(scaledx/2)
+		arRectWidth  = scaledx
+		arRectHeight = scaledy
+		arscale = Math.min(  arRectWidth/currentWidth, arRectHeight/currentHeight )
+		arscaledx = currentWidth*arscale
+		arscaledy = currentHeight*arscale
+		arRectTop    = cleintRectCentreY-(arscaledy/2)
+		arRectLeft   = cleintRectCentreX-(arscaledx/2)
+		arRectWidth  = arscaledx
+		arRectHeight = arscaledy
+	    arPreviewRect.style.top  = arRectTop+'px';
+	    arPreviewRect.style.left = arRectLeft+'px';
+	    arPreviewRect.style.width = arRectWidth+'px';
+	    arPreviewRect.style.height = arRectHeight+'px';
+	    clearTimeout(arFrameTimeout);
+	    arFrameTimeout = setTimeout(function(){
+	    	arPreviewRect.style.display = 'none';
+	    },2000);
+	    arPreviewRect.style.display = 'block';
+	}
+}
+onUiUpdate(function(){
+	var arPreviewRect = gradioApp().querySelector('#imageARPreview');
+	if(arPreviewRect){
+		arPreviewRect.style.display = 'none';
+	}
+	var inImg2img   = Boolean(gradioApp().querySelector("button.rounded-t-lg.border-gray-200"))
+	if(inImg2img){
+		let inputs = gradioApp().querySelectorAll('input');
+		inputs.forEach(function(e){
+			let parentLabel = e.parentElement.querySelector('label')
+			if(parentLabel && parentLabel.innerText){
+				if(!e.classList.contains('scrollwatch')){
+					if(parentLabel.innerText == 'Width' || parentLabel.innerText == 'Height'){
+						e.addEventListener('input', function(e){dimensionChange(e,parentLabel.innerText)} )
+						e.classList.add('scrollwatch')
+					}
+					if(parentLabel.innerText == 'Width'){
+						currentWidth = e.value*1.0
+					}
+					if(parentLabel.innerText == 'Height'){
+						currentHeight = e.value*1.0
+					}
+				}
+			}
+		})
+	}
+});

javascript/contextMenus.js ADDED Viewed

	@@ -0,0 +1,177 @@

+contextMenuInit = function(){
+  let eventListenerApplied=false;
+  let menuSpecs = new Map();
+  const uid = function(){
+    return Date.now().toString(36) + Math.random().toString(36).substr(2);
+  }
+  function showContextMenu(event,element,menuEntries){
+    let posx = event.clientX + document.body.scrollLeft + document.documentElement.scrollLeft;
+    let posy = event.clientY + document.body.scrollTop + document.documentElement.scrollTop;
+    let oldMenu = gradioApp().querySelector('#context-menu')
+    if(oldMenu){
+      oldMenu.remove()
+    }
+    let tabButton = uiCurrentTab
+    let baseStyle = window.getComputedStyle(tabButton)
+    const contextMenu = document.createElement('nav')
+    contextMenu.id = "context-menu"
+    contextMenu.style.background = baseStyle.background
+    contextMenu.style.color = baseStyle.color
+    contextMenu.style.fontFamily = baseStyle.fontFamily
+    contextMenu.style.top = posy+'px'
+    contextMenu.style.left = posx+'px'
+    const contextMenuList = document.createElement('ul')
+    contextMenuList.className = 'context-menu-items';
+    contextMenu.append(contextMenuList);
+    menuEntries.forEach(function(entry){
+      let contextMenuEntry = document.createElement('a')
+      contextMenuEntry.innerHTML = entry['name']
+      contextMenuEntry.addEventListener("click", function(e) {
+        entry['func']();
+      })
+      contextMenuList.append(contextMenuEntry);
+    })
+    gradioApp().getRootNode().appendChild(contextMenu)
+    let menuWidth = contextMenu.offsetWidth + 4;
+    let menuHeight = contextMenu.offsetHeight + 4;
+    let windowWidth = window.innerWidth;
+    let windowHeight = window.innerHeight;
+    if ( (windowWidth - posx) < menuWidth ) {
+      contextMenu.style.left = windowWidth - menuWidth + "px";
+    }
+    if ( (windowHeight - posy) < menuHeight ) {
+      contextMenu.style.top = windowHeight - menuHeight + "px";
+    }
+  }
+  function appendContextMenuOption(targetEmementSelector,entryName,entryFunction){
+    currentItems = menuSpecs.get(targetEmementSelector)
+    if(!currentItems){
+      currentItems = []
+      menuSpecs.set(targetEmementSelector,currentItems);
+    }
+    let newItem = {'id':targetEmementSelector+'_'+uid(),
+                   'name':entryName,
+                   'func':entryFunction,
+                   'isNew':true}
+    currentItems.push(newItem)
+    return newItem['id']
+  }
+  function removeContextMenuOption(uid){
+    menuSpecs.forEach(function(v,k) {
+      let index = -1
+      v.forEach(function(e,ei){if(e['id']==uid){index=ei}})
+      if(index>=0){
+        v.splice(index, 1);
+      }
+    })
+  }
+  function addContextMenuEventListener(){
+    if(eventListenerApplied){
+      return;
+    }
+    gradioApp().addEventListener("click", function(e) {
+      let source = e.composedPath()[0]
+      if(source.id && source.id.indexOf('check_progress')>-1){
+        return
+      }
+      let oldMenu = gradioApp().querySelector('#context-menu')
+      if(oldMenu){
+        oldMenu.remove()
+      }
+    });
+    gradioApp().addEventListener("contextmenu", function(e) {
+      let oldMenu = gradioApp().querySelector('#context-menu')
+      if(oldMenu){
+        oldMenu.remove()
+      }
+      menuSpecs.forEach(function(v,k) {
+        if(e.composedPath()[0].matches(k)){
+          showContextMenu(e,e.composedPath()[0],v)
+          e.preventDefault()
+          return
+        }
+      })
+    });
+    eventListenerApplied=true
+  }
+  return [appendContextMenuOption, removeContextMenuOption, addContextMenuEventListener]
+}
+initResponse = contextMenuInit();
+appendContextMenuOption     = initResponse[0];
+removeContextMenuOption     = initResponse[1];
+addContextMenuEventListener = initResponse[2];
+(function(){
+  //Start example Context Menu Items
+  let generateOnRepeat = function(genbuttonid,interruptbuttonid){
+    let genbutton = gradioApp().querySelector(genbuttonid);
+    let interruptbutton = gradioApp().querySelector(interruptbuttonid);
+    if(!interruptbutton.offsetParent){
+      genbutton.click();
+    }
+    clearInterval(window.generateOnRepeatInterval)
+    window.generateOnRepeatInterval = setInterval(function(){
+      if(!interruptbutton.offsetParent){
+        genbutton.click();
+      }
+    },
+    500)
+  }
+  appendContextMenuOption('#txt2img_generate','Generate forever',function(){
+    generateOnRepeat('#txt2img_generate','#txt2img_interrupt');
+  })
+  appendContextMenuOption('#img2img_generate','Generate forever',function(){
+    generateOnRepeat('#img2img_generate','#img2img_interrupt');
+  })
+  let cancelGenerateForever = function(){
+    clearInterval(window.generateOnRepeatInterval)
+  }
+  appendContextMenuOption('#txt2img_interrupt','Cancel generate forever',cancelGenerateForever)
+  appendContextMenuOption('#txt2img_generate', 'Cancel generate forever',cancelGenerateForever)
+  appendContextMenuOption('#img2img_interrupt','Cancel generate forever',cancelGenerateForever)
+  appendContextMenuOption('#img2img_generate', 'Cancel generate forever',cancelGenerateForever)
+  appendContextMenuOption('#roll','Roll three',
+    function(){
+      let rollbutton = get_uiCurrentTabContent().querySelector('#roll');
+      setTimeout(function(){rollbutton.click()},100)
+      setTimeout(function(){rollbutton.click()},200)
+      setTimeout(function(){rollbutton.click()},300)
+    }
+  )
+})();
+//End example Context Menu Items
+onUiUpdate(function(){
+  addContextMenuEventListener()
+});

javascript/dragdrop.js ADDED Viewed

	@@ -0,0 +1,86 @@

+// allows drag-dropping files into gradio image elements, and also pasting images from clipboard
+function isValidImageList( files ) {
+    return files && files?.length === 1 && ['image/png', 'image/gif', 'image/jpeg'].includes(files[0].type);
+}
+function dropReplaceImage( imgWrap, files ) {
+    if ( ! isValidImageList( files ) ) {
+        return;
+    }
+    imgWrap.querySelector('.modify-upload button + button, .touch-none + div button + button')?.click();
+    const callback = () => {
+        const fileInput = imgWrap.querySelector('input[type="file"]');
+        if ( fileInput ) {
+            fileInput.files = files;
+            fileInput.dispatchEvent(new Event('change'));
+        }
+    };
+    if ( imgWrap.closest('#pnginfo_image') ) {
+        // special treatment for PNG Info tab, wait for fetch request to finish
+        const oldFetch = window.fetch;
+        window.fetch = async (input, options) => {
+            const response = await oldFetch(input, options);
+            if ( 'api/predict/' === input ) {
+                const content = await response.text();
+                window.fetch = oldFetch;
+                window.requestAnimationFrame( () => callback() );
+                return new Response(content, {
+                    status: response.status,
+                    statusText: response.statusText,
+                    headers: response.headers
+                })
+            }
+            return response;
+        };
+    } else {
+        window.requestAnimationFrame( () => callback() );
+    }
+}
+window.document.addEventListener('dragover', e => {
+    const target = e.composedPath()[0];
+    const imgWrap = target.closest('[data-testid="image"]');
+    if ( !imgWrap ) {
+        return;
+    }
+    e.stopPropagation();
+    e.preventDefault();
+    e.dataTransfer.dropEffect = 'copy';
+});
+window.document.addEventListener('drop', e => {
+    const target = e.composedPath()[0];
+    const imgWrap = target.closest('[data-testid="image"]');
+    if ( !imgWrap ) {
+        return;
+    }
+    e.stopPropagation();
+    e.preventDefault();
+    const files = e.dataTransfer.files;
+    dropReplaceImage( imgWrap, files );
+});
+window.addEventListener('paste', e => {
+    const files = e.clipboardData.files;
+    if ( ! isValidImageList( files ) ) {
+        return;
+    }
+    const visibleImageFields = [...gradioApp().querySelectorAll('[data-testid="image"]')]
+        .filter(el => uiElementIsVisible(el));
+    if ( ! visibleImageFields.length ) {
+        return;
+    }
+    const firstFreeImageField = visibleImageFields
+        .filter(el => el.querySelector('input[type=file]'))?.[0];
+    dropReplaceImage(
+        firstFreeImageField ?
+        firstFreeImageField :
+        visibleImageFields[visibleImageFields.length - 1]
+    , files );
+});

javascript/edit-attention.js ADDED Viewed

	@@ -0,0 +1,45 @@

+addEventListener('keydown', (event) => {
+	let target = event.originalTarget || event.composedPath()[0];
+	if (!target.hasAttribute("placeholder")) return;
+	if (!target.placeholder.toLowerCase().includes("prompt")) return;
+	let plus = "ArrowUp"
+	let minus = "ArrowDown"
+	if (event.key != plus && event.key != minus) return;
+	selectionStart = target.selectionStart;
+	selectionEnd = target.selectionEnd;
+	if(selectionStart == selectionEnd) return;
+	event.preventDefault();
+	if (selectionStart == 0 || target.value[selectionStart - 1] != "(") {
+		target.value = target.value.slice(0, selectionStart) +
+			"(" + target.value.slice(selectionStart, selectionEnd) + ":1.0)" +
+			target.value.slice(selectionEnd);
+		target.focus();
+		target.selectionStart = selectionStart + 1;
+		target.selectionEnd = selectionEnd + 1;
+	} else {
+		end = target.value.slice(selectionEnd + 1).indexOf(")") + 1;
+		weight = parseFloat(target.value.slice(selectionEnd + 1, selectionEnd + 1 + end));
+		if (isNaN(weight)) return;
+		if (event.key == minus) weight -= 0.1;
+		if (event.key == plus) weight += 0.1;
+		weight = parseFloat(weight.toPrecision(12));
+		target.value = target.value.slice(0, selectionEnd + 1) +
+			weight +
+			target.value.slice(selectionEnd + 1 + end - 1);
+		target.focus();
+		target.selectionStart = selectionStart;
+		target.selectionEnd = selectionEnd;
+	}
+	// Since we've modified a Gradio Textbox component manually, we need to simulate an `input` DOM event to ensure its
+	// internal Svelte data binding remains in sync.
+	target.dispatchEvent(new Event("input", { bubbles: true }));
+});

javascript/hints.js ADDED Viewed

	@@ -0,0 +1,121 @@

+// mouseover tooltips for various UI elements
+titles = {
+    "Sampling steps": "How many times to improve the generated image iteratively; higher values take longer; very low values can produce bad results",
+    "Sampling method": "Which algorithm to use to produce the image",
+	"GFPGAN": "Restore low quality faces using GFPGAN neural network",
+	"Euler a": "Euler Ancestral - very creative, each can get a completely different picture depending on step count, setting steps to higher than 30-40 does not help",
+	"DDIM": "Denoising Diffusion Implicit Models - best at inpainting",
+	"Batch count": "How many batches of images to create",
+	"Batch size": "How many image to create in a single batch",
+    "CFG Scale": "Classifier Free Guidance Scale - how strongly the image should conform to prompt - lower values produce more creative results",
+    "Seed": "A value that determines the output of random number generator - if you create an image with same parameters and seed as another image, you'll get the same result",
+    "\u{1f3b2}\ufe0f": "Set seed to -1, which will cause a new random number to be used every time",
+    "\u267b\ufe0f": "Reuse seed from last generation, mostly useful if it was randomed",
+    "\u{1f3a8}": "Add a random artist to the prompt.",
+    "\u2199\ufe0f": "Read generation parameters from prompt or last generation if prompt is empty into user interface.",
+    "\u{1f4c2}": "Open images output directory",
+    "Inpaint a part of image": "Draw a mask over an image, and the script will regenerate the masked area with content according to prompt",
+    "SD upscale": "Upscale image normally, split result into tiles, improve each tile using img2img, merge whole image back",
+    "Just resize": "Resize image to target resolution. Unless height and width match, you will get incorrect aspect ratio.",
+    "Crop and resize": "Resize the image so that entirety of target resolution is filled with the image. Crop parts that stick out.",
+    "Resize and fill": "Resize the image so that entirety of image is inside target resolution. Fill empty space with image's colors.",
+    "Mask blur": "How much to blur the mask before processing, in pixels.",
+    "Masked content": "What to put inside the masked area before processing it with Stable Diffusion.",
+    "fill": "fill it with colors of the image",
+    "original": "keep whatever was there originally",
+    "latent noise": "fill it with latent space noise",
+    "latent nothing": "fill it with latent space zeroes",
+    "Inpaint at full resolution": "Upscale masked region to target resolution, do inpainting, downscale back and paste into original image",
+    "Denoising strength": "Determines how little respect the algorithm should have for image's content. At 0, nothing will change, and at 1 you'll get an unrelated image. With values below 1.0, processing will take less steps than the Sampling Steps slider specifies.",
+    "Denoising strength change factor": "In loopback mode, on each loop the denoising strength is multiplied by this value. <1 means decreasing variety so your sequence will converge on a fixed picture. >1 means increasing variety so your sequence will become more and more chaotic.",
+    "Skip": "Stop processing current image and continue processing.",
+    "Interrupt": "Stop processing images and return any results accumulated so far.",
+    "Save": "Write image to a directory (default - log/images) and generation parameters into csv file.",
+    "X values": "Separate values for X axis using commas.",
+    "Y values": "Separate values for Y axis using commas.",
+    "None": "Do not do anything special",
+    "Prompt matrix": "Separate prompts into parts using vertical pipe character (|) and the script will create a picture for every combination of them (except for the first part, which will be present in all combinations)",
+    "X/Y plot": "Create a grid where images will have different parameters. Use inputs below to specify which parameters will be shared by columns and rows",
+    "Custom code": "Run Python code. Advanced user only. Must run program with --allow-code for this to work",
+    "Prompt S/R": "Separate a list of words with commas, and the first word will be used as a keyword: script will search for this word in the prompt, and replace it with others",
+    "Prompt order": "Separate a list of words with commas, and the script will make a variation of prompt with those words for their every possible order",
+    "Tiling": "Produce an image that can be tiled.",
+    "Tile overlap": "For SD upscale, how much overlap in pixels should there be between tiles. Tiles overlap so that when they are merged back into one picture, there is no clearly visible seam.",
+    "Variation seed": "Seed of a different picture to be mixed into the generation.",
+    "Variation strength": "How strong of a variation to produce. At 0, there will be no effect. At 1, you will get the complete picture with variation seed (except for ancestral samplers, where you will just get something).",
+    "Resize seed from height": "Make an attempt to produce a picture similar to what would have been produced with same seed at specified resolution",
+    "Resize seed from width": "Make an attempt to produce a picture similar to what would have been produced with same seed at specified resolution",
+    "Interrogate": "Reconstruct prompt from existing image and put it into the prompt field.",
+    "Images filename pattern": "Use following tags to define how filenames for images are chosen: [steps], [cfg], [prompt], [prompt_no_styles], [prompt_spaces], [width], [height], [styles], [sampler], [seed], [model_hash], [prompt_words], [date], [datetime], [job_timestamp]; leave empty for default.",
+    "Directory name pattern": "Use following tags to define how subdirectories for images and grids are chosen: [steps], [cfg], [prompt], [prompt_no_styles], [prompt_spaces], [width], [height], [styles], [sampler], [seed], [model_hash], [prompt_words], [date], [datetime], [job_timestamp]; leave empty for default.",
+    "Max prompt words": "Set the maximum number of words to be used in the [prompt_words] option; ATTENTION: If the words are too long, they may exceed the maximum length of the file path that the system can handle",
+    "Loopback": "Process an image, use it as an input, repeat.",
+    "Loops": "How many times to repeat processing an image and using it as input for the next iteration",
+    "Style 1": "Style to apply; styles have components for both positive and negative prompts and apply to both",
+    "Style 2": "Style to apply; styles have components for both positive and negative prompts and apply to both",
+    "Apply style": "Insert selected styles into prompt fields",
+    "Create style": "Save current prompts as a style. If you add the token {prompt} to the text, the style use that as placeholder for your prompt when you use the style in the future.",
+    "Checkpoint name": "Loads weights from checkpoint before making images. You can either use hash or a part of filename (as seen in settings) for checkpoint name. Recommended to use with Y axis for less switching.",
+    "vram": "Torch active: Peak amount of VRAM used by Torch during generation, excluding cached data.\nTorch reserved: Peak amount of VRAM allocated by Torch, including all active and cached data.\nSys VRAM: Peak amount of VRAM allocation across all applications / total GPU VRAM (peak utilization%).",
+    "Highres. fix": "Use a two step process to partially create an image at smaller resolution, upscale, and then improve details in it without changing composition",
+    "Scale latent": "Uscale the image in latent space. Alternative is to produce the full image from latent representation, upscale that, and then move it back to latent space.",
+    "Eta noise seed delta": "If this values is non-zero, it will be added to seed and used to initialize RNG for noises when using samplers with Eta. You can use this to produce even more variation of images, or you can use this to match images of other software if you know what you are doing.",
+    "Do not add watermark to images": "If this option is enabled, watermark will not be added to created images. Warning: if you do not add watermark, you may be behaving in an unethical manner.",
+    "Filename word regex": "This regular expression will be used extract words from filename, and they will be joined using the option below into label text used for training. Leave empty to keep filename text as it is.",
+    "Filename join string": "This string will be used to hoin split words into a single line if the option above is enabled.",
+    "Quicksettings list": "List of setting names, separated by commas, for settings that should go to the quick access bar at the top, rather than the usual setting tab. See modules/shared.py for setting names. Requires restarting to apply."
+}
+onUiUpdate(function(){
+	gradioApp().querySelectorAll('span, button, select, p').forEach(function(span){
+		tooltip = titles[span.textContent];
+		if(!tooltip){
+		    tooltip = titles[span.value];
+		}
+		if(!tooltip){
+			for (const c of span.classList) {
+				if (c in titles) {
+					tooltip = titles[c];
+					break;
+				}
+			}
+		}
+		if(tooltip){
+			span.title = tooltip;
+		}
+	})
+	gradioApp().querySelectorAll('select').forEach(function(select){
+	    if (select.onchange != null) return;
+	    select.onchange = function(){
+            select.title = titles[select.value] || "";
+	    }
+	})
+})

javascript/imageMaskFix.js ADDED Viewed

	@@ -0,0 +1,45 @@

+/**
+ * temporary fix for https://github.com/AUTOMATIC1111/stable-diffusion-webui/issues/668
+ * @see https://github.com/gradio-app/gradio/issues/1721
+ */
+window.addEventListener( 'resize', () => imageMaskResize());
+function imageMaskResize() {
+    const canvases = gradioApp().querySelectorAll('#img2maskimg .touch-none canvas');
+    if ( ! canvases.length ) {
+    canvases_fixed = false;
+    window.removeEventListener( 'resize', imageMaskResize );
+    return;
+    }
+    const wrapper = canvases[0].closest('.touch-none');
+    const previewImage = wrapper.previousElementSibling;
+    if ( ! previewImage.complete ) {
+        previewImage.addEventListener( 'load', () => imageMaskResize());
+        return;
+    }
+    const w = previewImage.width;
+    const h = previewImage.height;
+    const nw = previewImage.naturalWidth;
+    const nh = previewImage.naturalHeight;
+    const portrait = nh > nw;
+    const factor = portrait;
+    const wW = Math.min(w, portrait ? h/nh*nw : w/nw*nw);
+    const wH = Math.min(h, portrait ? h/nh*nh : w/nw*nh);
+    wrapper.style.width = `${wW}px`;
+    wrapper.style.height = `${wH}px`;
+    wrapper.style.left = `${(w-wW)/2}px`;
+    wrapper.style.top = `${(h-wH)/2}px`;
+    canvases.forEach( c => {
+        c.style.width = c.style.height = '';
+        c.style.maxWidth = '100%';
+        c.style.maxHeight = '100%';
+        c.style.objectFit = 'contain';
+    });
+ }
+ onUiUpdate(() => imageMaskResize());

javascript/imageviewer.js ADDED Viewed

	@@ -0,0 +1,236 @@

+// A full size 'lightbox' preview modal shown when left clicking on gallery previews
+function closeModal() {
+    gradioApp().getElementById("lightboxModal").style.display = "none";
+}
+function showModal(event) {
+    const source = event.target || event.srcElement;
+    const modalImage = gradioApp().getElementById("modalImage")
+    const lb = gradioApp().getElementById("lightboxModal")
+    modalImage.src = source.src
+    if (modalImage.style.display === 'none') {
+        lb.style.setProperty('background-image', 'url(' + source.src + ')');
+    }
+    lb.style.display = "block";
+    lb.focus()
+    event.stopPropagation()
+}
+function negmod(n, m) {
+    return ((n % m) + m) % m;
+}
+function updateOnBackgroundChange() {
+    const modalImage = gradioApp().getElementById("modalImage")
+    if (modalImage && modalImage.offsetParent) {
+        let allcurrentButtons = gradioApp().querySelectorAll(".gallery-item.transition-all.\\!ring-2")
+        let currentButton = null
+        allcurrentButtons.forEach(function(elem) {
+            if (elem.parentElement.offsetParent) {
+                currentButton = elem;
+            }
+        })
+        if (modalImage.src != currentButton.children[0].src) {
+            modalImage.src = currentButton.children[0].src;
+            if (modalImage.style.display === 'none') {
+                modal.style.setProperty('background-image', `url(${modalImage.src})`)
+            }
+        }
+    }
+}
+function modalImageSwitch(offset) {
+    var allgalleryButtons = gradioApp().querySelectorAll(".gallery-item.transition-all")
+    var galleryButtons = []
+    allgalleryButtons.forEach(function(elem) {
+        if (elem.parentElement.offsetParent) {
+            galleryButtons.push(elem);
+        }
+    })
+    if (galleryButtons.length > 1) {
+        var allcurrentButtons = gradioApp().querySelectorAll(".gallery-item.transition-all.\\!ring-2")
+        var currentButton = null
+        allcurrentButtons.forEach(function(elem) {
+            if (elem.parentElement.offsetParent) {
+                currentButton = elem;
+            }
+        })
+        var result = -1
+        galleryButtons.forEach(function(v, i) {
+            if (v == currentButton) {
+                result = i
+            }
+        })
+        if (result != -1) {
+            nextButton = galleryButtons[negmod((result + offset), galleryButtons.length)]
+            nextButton.click()
+            const modalImage = gradioApp().getElementById("modalImage");
+            const modal = gradioApp().getElementById("lightboxModal");
+            modalImage.src = nextButton.children[0].src;
+            if (modalImage.style.display === 'none') {
+                modal.style.setProperty('background-image', `url(${modalImage.src})`)
+            }
+            setTimeout(function() {
+                modal.focus()
+            }, 10)
+        }
+    }
+}
+function modalNextImage(event) {
+    modalImageSwitch(1)
+    event.stopPropagation()
+}
+function modalPrevImage(event) {
+    modalImageSwitch(-1)
+    event.stopPropagation()
+}
+function modalKeyHandler(event) {
+    switch (event.key) {
+        case "ArrowLeft":
+            modalPrevImage(event)
+            break;
+        case "ArrowRight":
+            modalNextImage(event)
+            break;
+        case "Escape":
+            closeModal();
+            break;
+    }
+}
+function showGalleryImage() {
+    setTimeout(function() {
+        fullImg_preview = gradioApp().querySelectorAll('img.w-full.object-contain')
+        if (fullImg_preview != null) {
+            fullImg_preview.forEach(function function_name(e) {
+                if (e.dataset.modded)
+                    return;
+                e.dataset.modded = true;
+                if(e && e.parentElement.tagName == 'DIV'){
+                    e.style.cursor='pointer'
+                    e.addEventListener('click', function (evt) {
+                        if(!opts.js_modal_lightbox) return;
+                        modalZoomSet(gradioApp().getElementById('modalImage'), opts.js_modal_lightbox_initially_zoomed)
+                        showModal(evt)
+                    }, true);
+                }
+            });
+        }
+    }, 100);
+}
+function modalZoomSet(modalImage, enable) {
+    if (enable) {
+        modalImage.classList.add('modalImageFullscreen');
+    } else {
+        modalImage.classList.remove('modalImageFullscreen');
+    }
+}
+function modalZoomToggle(event) {
+    modalImage = gradioApp().getElementById("modalImage");
+    modalZoomSet(modalImage, !modalImage.classList.contains('modalImageFullscreen'))
+    event.stopPropagation()
+}
+function modalTileImageToggle(event) {
+    const modalImage = gradioApp().getElementById("modalImage");
+    const modal = gradioApp().getElementById("lightboxModal");
+    const isTiling = modalImage.style.display === 'none';
+    if (isTiling) {
+        modalImage.style.display = 'block';
+        modal.style.setProperty('background-image', 'none')
+    } else {
+        modalImage.style.display = 'none';
+        modal.style.setProperty('background-image', `url(${modalImage.src})`)
+    }
+    event.stopPropagation()
+}
+function galleryImageHandler(e) {
+    if (e && e.parentElement.tagName == 'BUTTON') {
+        e.onclick = showGalleryImage;
+    }
+}
+onUiUpdate(function() {
+    fullImg_preview = gradioApp().querySelectorAll('img.w-full')
+    if (fullImg_preview != null) {
+        fullImg_preview.forEach(galleryImageHandler);
+    }
+    updateOnBackgroundChange();
+})
+document.addEventListener("DOMContentLoaded", function() {
+    const modalFragment = document.createDocumentFragment();
+    const modal = document.createElement('div')
+    modal.onclick = closeModal;
+    modal.id = "lightboxModal";
+    modal.tabIndex = 0
+    modal.addEventListener('keydown', modalKeyHandler, true)
+    const modalControls = document.createElement('div')
+    modalControls.className = 'modalControls gradio-container';
+    modal.append(modalControls);
+    const modalZoom = document.createElement('span')
+    modalZoom.className = 'modalZoom cursor';
+    modalZoom.innerHTML = '&#10529;'
+    modalZoom.addEventListener('click', modalZoomToggle, true)
+    modalZoom.title = "Toggle zoomed view";
+    modalControls.appendChild(modalZoom)
+    const modalTileImage = document.createElement('span')
+    modalTileImage.className = 'modalTileImage cursor';
+    modalTileImage.innerHTML = '&#8862;'
+    modalTileImage.addEventListener('click', modalTileImageToggle, true)
+    modalTileImage.title = "Preview tiling";
+    modalControls.appendChild(modalTileImage)
+    const modalClose = document.createElement('span')
+    modalClose.className = 'modalClose cursor';
+    modalClose.innerHTML = '&times;'
+    modalClose.onclick = closeModal;
+    modalClose.title = "Close image viewer";
+    modalControls.appendChild(modalClose)
+    const modalImage = document.createElement('img')
+    modalImage.id = 'modalImage';
+    modalImage.onclick = closeModal;
+    modalImage.tabIndex = 0
+    modalImage.addEventListener('keydown', modalKeyHandler, true)
+    modal.appendChild(modalImage)
+    const modalPrev = document.createElement('a')
+    modalPrev.className = 'modalPrev';
+    modalPrev.innerHTML = '&#10094;'
+    modalPrev.tabIndex = 0
+    modalPrev.addEventListener('click', modalPrevImage, true);
+    modalPrev.addEventListener('keydown', modalKeyHandler, true)
+    modal.appendChild(modalPrev)
+    const modalNext = document.createElement('a')
+    modalNext.className = 'modalNext';
+    modalNext.innerHTML = '&#10095;'
+    modalNext.tabIndex = 0
+    modalNext.addEventListener('click', modalNextImage, true);
+    modalNext.addEventListener('keydown', modalKeyHandler, true)
+    modal.appendChild(modalNext)
+    gradioApp().getRootNode().appendChild(modal)
+    document.body.appendChild(modalFragment);
+});

javascript/notification.js ADDED Viewed

	@@ -0,0 +1,49 @@

+// Monitors the gallery and sends a browser notification when the leading image is new.
+let lastHeadImg = null;
+notificationButton = null
+onUiUpdate(function(){
+    if(notificationButton == null){
+        notificationButton = gradioApp().getElementById('request_notifications')
+        if(notificationButton != null){
+            notificationButton.addEventListener('click', function (evt) {
+                Notification.requestPermission();
+            },true);
+        }
+    }
+    const galleryPreviews = gradioApp().querySelectorAll('img.h-full.w-full.overflow-hidden');
+    if (galleryPreviews == null) return;
+    const headImg = galleryPreviews[0]?.src;
+    if (headImg == null || headImg == lastHeadImg) return;
+    lastHeadImg = headImg;
+    // play notification sound if available
+    gradioApp().querySelector('#audio_notification audio')?.play();
+    if (document.hasFocus()) return;
+    // Multiple copies of the images are in the DOM when one is selected. Dedup with a Set to get the real number generated.
+    const imgs = new Set(Array.from(galleryPreviews).map(img => img.src));
+    const notification = new Notification(
+        'Stable Diffusion',
+        {
+            body: `Generated ${imgs.size > 1 ? imgs.size - opts.return_grid : 1} image${imgs.size > 1 ? 's' : ''}`,
+            icon: headImg,
+            image: headImg,
+        }
+    );
+    notification.onclick = function(_){
+        parent.focus();
+        this.close();
+    };
+});

javascript/progressbar.js ADDED Viewed

	@@ -0,0 +1,76 @@

+// code related to showing and updating progressbar shown as the image is being made
+global_progressbars = {}
+function check_progressbar(id_part, id_progressbar, id_progressbar_span, id_skip, id_interrupt, id_preview, id_gallery){
+    var progressbar = gradioApp().getElementById(id_progressbar)
+    var skip = id_skip ? gradioApp().getElementById(id_skip) : null
+    var interrupt = gradioApp().getElementById(id_interrupt)
+    if(opts.show_progress_in_title && progressbar && progressbar.offsetParent){
+        if(progressbar.innerText){
+            let newtitle = 'Stable Diffusion - ' + progressbar.innerText
+            if(document.title != newtitle){
+                document.title =  newtitle;
+            }
+        }else{
+            let newtitle = 'Stable Diffusion'
+            if(document.title != newtitle){
+                document.title =  newtitle;
+            }
+        }
+    }
+	if(progressbar!= null && progressbar != global_progressbars[id_progressbar]){
+	    global_progressbars[id_progressbar] = progressbar
+        var mutationObserver = new MutationObserver(function(m){
+            preview = gradioApp().getElementById(id_preview)
+            gallery = gradioApp().getElementById(id_gallery)
+            if(preview != null && gallery != null){
+                preview.style.width = gallery.clientWidth + "px"
+                preview.style.height = gallery.clientHeight + "px"
+                var progressDiv = gradioApp().querySelectorAll('#' + id_progressbar_span).length > 0;
+                if(!progressDiv){
+                    if (skip) {
+                        skip.style.display = "none"
+                    }
+                    interrupt.style.display = "none"
+                }
+            }
+            window.setTimeout(function() { requestMoreProgress(id_part, id_progressbar_span, id_skip, id_interrupt) }, 500)
+        });
+        mutationObserver.observe( progressbar, { childList:true, subtree:true })
+	}
+}
+onUiUpdate(function(){
+    check_progressbar('txt2img', 'txt2img_progressbar', 'txt2img_progress_span', 'txt2img_skip', 'txt2img_interrupt', 'txt2img_preview', 'txt2img_gallery')
+    check_progressbar('img2img', 'img2img_progressbar', 'img2img_progress_span', 'img2img_skip', 'img2img_interrupt', 'img2img_preview', 'img2img_gallery')
+    check_progressbar('ti', 'ti_progressbar', 'ti_progress_span', '', 'ti_interrupt', 'ti_preview', 'ti_gallery')
+})
+function requestMoreProgress(id_part, id_progressbar_span, id_skip, id_interrupt){
+    btn = gradioApp().getElementById(id_part+"_check_progress");
+    if(btn==null) return;
+    btn.click();
+    var progressDiv = gradioApp().querySelectorAll('#' + id_progressbar_span).length > 0;
+    var skip = id_skip ? gradioApp().getElementById(id_skip) : null
+    var interrupt = gradioApp().getElementById(id_interrupt)
+    if(progressDiv && interrupt){
+        if (skip) {
+            skip.style.display = "block"
+        }
+        interrupt.style.display = "block"
+    }
+}
+function requestProgress(id_part){
+    btn = gradioApp().getElementById(id_part+"_check_progress_initial");
+    if(btn==null) return;
+    btn.click();
+}

javascript/textualInversion.js ADDED Viewed

	@@ -0,0 +1,8 @@

+function start_training_textual_inversion(){
+    requestProgress('ti')
+    gradioApp().querySelector('#ti_error').innerHTML=''
+    return args_to_array(arguments)
+}

javascript/ui.js ADDED Viewed

	@@ -0,0 +1,234 @@

+// various functions for interation with ui.py not large enough to warrant putting them in separate files
+function selected_gallery_index(){
+    var buttons = gradioApp().querySelectorAll('[style="display: block;"].tabitem .gallery-item')
+    var button = gradioApp().querySelector('[style="display: block;"].tabitem .gallery-item.\\!ring-2')
+    var result = -1
+    buttons.forEach(function(v, i){ if(v==button) { result = i } })
+    return result
+}
+function extract_image_from_gallery(gallery){
+    if(gallery.length == 1){
+        return gallery[0]
+    }
+    index = selected_gallery_index()
+    if (index < 0 || index >= gallery.length){
+        return [null]
+    }
+    return gallery[index];
+}
+function args_to_array(args){
+    res = []
+    for(var i=0;i<args.length;i++){
+        res.push(args[i])
+    }
+    return res
+}
+function switch_to_txt2img(){
+    gradioApp().querySelector('#tabs').querySelectorAll('button')[0].click();
+    return args_to_array(arguments);
+}
+function switch_to_img2img_img2img(){
+    gradioApp().querySelector('#tabs').querySelectorAll('button')[1].click();
+    gradioApp().getElementById('mode_img2img').querySelectorAll('button')[0].click();
+    return args_to_array(arguments);
+}
+function switch_to_img2img_inpaint(){
+    gradioApp().querySelector('#tabs').querySelectorAll('button')[1].click();
+    gradioApp().getElementById('mode_img2img').querySelectorAll('button')[1].click();
+    return args_to_array(arguments);
+}
+function switch_to_extras(){
+    gradioApp().querySelector('#tabs').querySelectorAll('button')[2].click();
+    return args_to_array(arguments);
+}
+function extract_image_from_gallery_txt2img(gallery){
+    switch_to_txt2img()
+    return extract_image_from_gallery(gallery);
+}
+function extract_image_from_gallery_img2img(gallery){
+    switch_to_img2img_img2img()
+    return extract_image_from_gallery(gallery);
+}
+function extract_image_from_gallery_inpaint(gallery){
+    switch_to_img2img_inpaint()
+    return extract_image_from_gallery(gallery);
+}
+function extract_image_from_gallery_extras(gallery){
+    switch_to_extras()
+    return extract_image_from_gallery(gallery);
+}
+function get_tab_index(tabId){
+    var res = 0
+    gradioApp().getElementById(tabId).querySelector('div').querySelectorAll('button').forEach(function(button, i){
+        if(button.className.indexOf('bg-white') != -1)
+            res = i
+    })
+    return res
+}
+function create_tab_index_args(tabId, args){
+    var res = []
+    for(var i=0; i<args.length; i++){
+        res.push(args[i])
+    }
+    res[0] = get_tab_index(tabId)
+    return res
+}
+function get_extras_tab_index(){
+    const [,,...args] = [...arguments]
+    return [get_tab_index('mode_extras'), get_tab_index('extras_resize_mode'), ...args]
+}
+function create_submit_args(args){
+    res = []
+    for(var i=0;i<args.length;i++){
+        res.push(args[i])
+    }
+    // As it is currently, txt2img and img2img send back the previous output args (txt2img_gallery, generation_info, html_info) whenever you generate a new image.
+    // This can lead to uploading a huge gallery of previously generated images, which leads to an unnecessary delay between submitting and beginning to generate.
+    // I don't know why gradio is seding outputs along with inputs, but we can prevent sending the image gallery here, which seems to be an issue for some.
+    // If gradio at some point stops sending outputs, this may break something
+    if(Array.isArray(res[res.length - 3])){
+        res[res.length - 3] = null
+    }
+    return res
+}
+function submit(){
+    requestProgress('txt2img')
+    return create_submit_args(arguments)
+}
+function submit_img2img(){
+    requestProgress('img2img')
+    res = create_submit_args(arguments)
+    res[0] = get_tab_index('mode_img2img')
+    return res
+}
+function ask_for_style_name(_, prompt_text, negative_prompt_text) {
+    name_ = prompt('Style name:')
+    return name_ === null ? [null, null, null]: [name_, prompt_text, negative_prompt_text]
+}
+opts = {}
+function apply_settings(jsdata){
+    console.log(jsdata)
+    opts = JSON.parse(jsdata)
+    return jsdata
+}
+onUiUpdate(function(){
+	if(Object.keys(opts).length != 0) return;
+	json_elem = gradioApp().getElementById('settings_json')
+	if(json_elem == null) return;
+    textarea = json_elem.querySelector('textarea')
+    jsdata = textarea.value
+    opts = JSON.parse(jsdata)
+    Object.defineProperty(textarea, 'value', {
+        set: function(newValue) {
+            var valueProp = Object.getOwnPropertyDescriptor(HTMLTextAreaElement.prototype, 'value');
+            var oldValue = valueProp.get.call(textarea);
+            valueProp.set.call(textarea, newValue);
+            if (oldValue != newValue) {
+                opts = JSON.parse(textarea.value)
+            }
+        },
+        get: function() {
+            var valueProp = Object.getOwnPropertyDescriptor(HTMLTextAreaElement.prototype, 'value');
+            return valueProp.get.call(textarea);
+        }
+    });
+    json_elem.parentElement.style.display="none"
+	if (!txt2img_textarea) {
+		txt2img_textarea = gradioApp().querySelector("#txt2img_prompt > label > textarea");
+		txt2img_textarea?.addEventListener("input", () => update_token_counter("txt2img_token_button"));
+        txt2img_textarea?.addEventListener("keyup", (event) => submit_prompt(event, "txt2img_generate"));
+	}
+	if (!img2img_textarea) {
+		img2img_textarea = gradioApp().querySelector("#img2img_prompt > label > textarea");
+		img2img_textarea?.addEventListener("input", () => update_token_counter("img2img_token_button"));
+        img2img_textarea?.addEventListener("keyup", (event) => submit_prompt(event, "img2img_generate"));
+	}
+})
+let txt2img_textarea, img2img_textarea = undefined;
+let wait_time = 800
+let token_timeout;
+function update_txt2img_tokens(...args) {
+	update_token_counter("txt2img_token_button")
+	if (args.length == 2)
+		return args[0]
+	return args;
+}
+function update_img2img_tokens(...args) {
+	update_token_counter("img2img_token_button")
+	if (args.length == 2)
+		return args[0]
+	return args;
+}
+function update_token_counter(button_id) {
+	if (token_timeout)
+		clearTimeout(token_timeout);
+	token_timeout = setTimeout(() => gradioApp().getElementById(button_id)?.click(), wait_time);
+}
+function submit_prompt(event, generate_button_id) {
+    if (event.altKey && event.keyCode === 13) {
+        event.preventDefault();
+        gradioApp().getElementById(generate_button_id).click();
+        return;
+    }
+}
+function restart_reload(){
+    document.body.innerHTML='<h1 style="font-family:monospace;margin-top:20%;color:lightgray;text-align:center;">Reloading...</h1>';
+    setTimeout(function(){location.reload()},2000)
+}

launch.py ADDED Viewed

	@@ -0,0 +1,169 @@

+# this scripts installs necessary requirements and launches main program in webui.py
+import subprocess
+import os
+import sys
+import importlib.util
+import shlex
+import platform
+dir_repos = "repositories"
+python = sys.executable
+git = os.environ.get('GIT', "git")
+def extract_arg(args, name):
+    return [x for x in args if x != name], name in args
+def run(command, desc=None, errdesc=None):
+    if desc is not None:
+        print(desc)
+    result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
+    if result.returncode != 0:
+        message = f"""{errdesc or 'Error running command'}.
+Command: {command}
+Error code: {result.returncode}
+stdout: {result.stdout.decode(encoding="utf8", errors="ignore") if len(result.stdout)>0 else '<empty>'}
+stderr: {result.stderr.decode(encoding="utf8", errors="ignore") if len(result.stderr)>0 else '<empty>'}
+"""
+        raise RuntimeError(message)
+    return result.stdout.decode(encoding="utf8", errors="ignore")
+def check_run(command):
+    result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
+    return result.returncode == 0
+def is_installed(package):
+    try:
+        spec = importlib.util.find_spec(package)
+    except ModuleNotFoundError:
+        return False
+    return spec is not None
+def repo_dir(name):
+    return os.path.join(dir_repos, name)
+def run_python(code, desc=None, errdesc=None):
+    return run(f'"{python}" -c "{code}"', desc, errdesc)
+def run_pip(args, desc=None):
+    return run(f'"{python}" -m pip {args} --prefer-binary', desc=f"Installing {desc}", errdesc=f"Couldn't install {desc}")
+def check_run_python(code):
+    return check_run(f'"{python}" -c "{code}"')
+def git_clone(url, dir, name, commithash=None):
+    # TODO clone into temporary dir and move if successful
+    if os.path.exists(dir):
+        if commithash is None:
+            return
+        current_hash = run(f'"{git}" -C {dir} rev-parse HEAD', None, f"Couldn't determine {name}'s hash: {commithash}").strip()
+        if current_hash == commithash:
+            return
+        run(f'"{git}" -C {dir} fetch', f"Fetching updates for {name}...", f"Couldn't fetch {name}")
+        run(f'"{git}" -C {dir} checkout {commithash}', f"Checking out commint for {name} with hash: {commithash}...", f"Couldn't checkout commit {commithash} for {name}")
+        return
+    run(f'"{git}" clone "{url}" "{dir}"', f"Cloning {name} into {dir}...", f"Couldn't clone {name}")
+    if commithash is not None:
+        run(f'"{git}" -C {dir} checkout {commithash}', None, "Couldn't checkout {name}'s hash: {commithash}")
+def prepare_enviroment():
+    torch_command = os.environ.get('TORCH_COMMAND', "pip install torch==1.12.1+cu113 torchvision==0.13.1+cu113 --extra-index-url https://download.pytorch.org/whl/cu113")
+    requirements_file = os.environ.get('REQS_FILE', "requirements_versions.txt")
+    commandline_args = os.environ.get('COMMANDLINE_ARGS', "")
+    gfpgan_package = os.environ.get('GFPGAN_PACKAGE', "git+https://github.com/TencentARC/GFPGAN.git@8d2447a2d918f8eba5a4a01463fd48e45126a379")
+    clip_package = os.environ.get('CLIP_PACKAGE', "git+https://github.com/openai/CLIP.git@d50d76daa670286dd6cacf3bcd80b5e4823fc8e1")
+    stable_diffusion_commit_hash = os.environ.get('STABLE_DIFFUSION_COMMIT_HASH', "69ae4b35e0a0f6ee1af8bb9a5d0016ccb27e36dc")
+    taming_transformers_commit_hash = os.environ.get('TAMING_TRANSFORMERS_COMMIT_HASH', "24268930bf1dce879235a7fddd0b2355b84d7ea6")
+    k_diffusion_commit_hash = os.environ.get('K_DIFFUSION_COMMIT_HASH', "f4e99857772fc3a126ba886aadf795a332774878")
+    codeformer_commit_hash = os.environ.get('CODEFORMER_COMMIT_HASH', "c5b4593074ba6214284d6acd5f1719b6c5d739af")
+    blip_commit_hash = os.environ.get('BLIP_COMMIT_HASH', "48211a1594f1321b00f14c9f7a5b4813144b2fb9")
+    args = shlex.split(commandline_args)
+    args, skip_torch_cuda_test = extract_arg(args, '--skip-torch-cuda-test')
+    xformers = '--xformers' in args
+    deepdanbooru = '--deepdanbooru' in args
+    ngrok = '--ngrok' in args
+    try:
+        commit = run(f"{git} rev-parse HEAD").strip()
+    except Exception:
+        commit = "<none>"
+    print(f"Python {sys.version}")
+    print(f"Commit hash: {commit}")
+    if not is_installed("torch") or not is_installed("torchvision"):
+        run(f'"{python}" -m {torch_command}', "Installing torch and torchvision", "Couldn't install torch")
+    if not skip_torch_cuda_test:
+        run_python("import torch; assert torch.cuda.is_available(), 'Torch is not able to use GPU; add --skip-torch-cuda-test to COMMANDLINE_ARGS variable to disable this check'")
+    if not is_installed("gfpgan"):
+        run_pip(f"install {gfpgan_package}", "gfpgan")
+    if not is_installed("clip"):
+        run_pip(f"install {clip_package}", "clip")
+    if not is_installed("xformers") and xformers and platform.python_version().startswith("3.10"):
+        if platform.system() == "Windows":
+            run_pip("install https://github.com/C43H66N12O12S2/stable-diffusion-webui/releases/download/c/xformers-0.0.14.dev0-cp310-cp310-win_amd64.whl", "xformers")
+        elif platform.system() == "Linux":
+            run_pip("install xformers", "xformers")
+    if not is_installed("deepdanbooru") and deepdanbooru:
+        run_pip("install git+https://github.com/KichangKim/DeepDanbooru.git@edf73df4cdaeea2cf00e9ac08bd8a9026b7a7b26#egg=deepdanbooru[tensorflow] tensorflow==2.10.0 tensorflow-io==0.27.0", "deepdanbooru")
+    if not is_installed("pyngrok") and ngrok:
+        run_pip("install pyngrok", "ngrok")
+    os.makedirs(dir_repos, exist_ok=True)
+    git_clone("https://github.com/CompVis/stable-diffusion.git", repo_dir('stable-diffusion'), "Stable Diffusion", stable_diffusion_commit_hash)
+    git_clone("https://github.com/CompVis/taming-transformers.git", repo_dir('taming-transformers'), "Taming Transformers", taming_transformers_commit_hash)
+    git_clone("https://github.com/crowsonkb/k-diffusion.git", repo_dir('k-diffusion'), "K-diffusion", k_diffusion_commit_hash)
+    git_clone("https://github.com/sczhou/CodeFormer.git", repo_dir('CodeFormer'), "CodeFormer", codeformer_commit_hash)
+    git_clone("https://github.com/salesforce/BLIP.git", repo_dir('BLIP'), "BLIP", blip_commit_hash)
+    if not is_installed("lpips"):
+        run_pip(f"install -r {os.path.join(repo_dir('CodeFormer'), 'requirements.txt')}", "requirements for CodeFormer")
+    run_pip(f"install -r {requirements_file}", "requirements for Web UI")
+    sys.argv += args
+    if "--exit" in args:
+        print("Exiting because of --exit argument")
+        exit(0)
+def start_webui():
+    print(f"Launching Web UI with arguments: {' '.join(sys.argv[1:])}")
+    import webui
+    webui.webui()
+if __name__ == "__main__":
+    prepare_enviroment()
+    start_webui()

modules/artists.py ADDED Viewed

	@@ -0,0 +1,25 @@

+import os.path
+import csv
+from collections import namedtuple
+Artist = namedtuple("Artist", ['name', 'weight', 'category'])
+class ArtistsDatabase:
+    def __init__(self, filename):
+        self.cats = set()
+        self.artists = []
+        if not os.path.exists(filename):
+            return
+        with open(filename, "r", newline='', encoding="utf8") as file:
+            reader = csv.DictReader(file)
+            for row in reader:
+                artist = Artist(row["artist"], float(row["score"]), row["category"])
+                self.artists.append(artist)
+                self.cats.add(artist.category)
+    def categories(self):
+        return sorted(self.cats)

modules/bsrgan_model.py ADDED Viewed

	@@ -0,0 +1,76 @@

+import os.path
+import sys
+import traceback
+import PIL.Image
+import numpy as np
+import torch
+from basicsr.utils.download_util import load_file_from_url
+import modules.upscaler
+from modules import devices, modelloader
+from modules.bsrgan_model_arch import RRDBNet
+class UpscalerBSRGAN(modules.upscaler.Upscaler):
+    def __init__(self, dirname):
+        self.name = "BSRGAN"
+        self.model_name = "BSRGAN 4x"
+        self.model_url = "https://github.com/cszn/KAIR/releases/download/v1.0/BSRGAN.pth"
+        self.user_path = dirname
+        super().__init__()
+        model_paths = self.find_models(ext_filter=[".pt", ".pth"])
+        scalers = []
+        if len(model_paths) == 0:
+            scaler_data = modules.upscaler.UpscalerData(self.model_name, self.model_url, self, 4)
+            scalers.append(scaler_data)
+        for file in model_paths:
+            if "http" in file:
+                name = self.model_name
+            else:
+                name = modelloader.friendly_name(file)
+            try:
+                scaler_data = modules.upscaler.UpscalerData(name, file, self, 4)
+                scalers.append(scaler_data)
+            except Exception:
+                print(f"Error loading BSRGAN model: {file}", file=sys.stderr)
+                print(traceback.format_exc(), file=sys.stderr)
+        self.scalers = scalers
+    def do_upscale(self, img: PIL.Image, selected_file):
+        torch.cuda.empty_cache()
+        model = self.load_model(selected_file)
+        if model is None:
+            return img
+        model.to(devices.device_bsrgan)
+        torch.cuda.empty_cache()
+        img = np.array(img)
+        img = img[:, :, ::-1]
+        img = np.moveaxis(img, 2, 0) / 255
+        img = torch.from_numpy(img).float()
+        img = img.unsqueeze(0).to(devices.device_bsrgan)
+        with torch.no_grad():
+            output = model(img)
+        output = output.squeeze().float().cpu().clamp_(0, 1).numpy()
+        output = 255. * np.moveaxis(output, 0, 2)
+        output = output.astype(np.uint8)
+        output = output[:, :, ::-1]
+        torch.cuda.empty_cache()
+        return PIL.Image.fromarray(output, 'RGB')
+    def load_model(self, path: str):
+        if "http" in path:
+            filename = load_file_from_url(url=self.model_url, model_dir=self.model_path, file_name="%s.pth" % self.name,
+                                          progress=True)
+        else:
+            filename = path
+        if not os.path.exists(filename) or filename is None:
+            print(f"BSRGAN: Unable to load model from {filename}", file=sys.stderr)
+            return None
+        model = RRDBNet(in_nc=3, out_nc=3, nf=64, nb=23, gc=32, sf=4)  # define network
+        model.load_state_dict(torch.load(filename), strict=True)
+        model.eval()
+        for k, v in model.named_parameters():
+            v.requires_grad = False
+        return model

modules/bsrgan_model_arch.py ADDED Viewed

	@@ -0,0 +1,102 @@

+import functools
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.nn.init as init
+def initialize_weights(net_l, scale=1):
+    if not isinstance(net_l, list):
+        net_l = [net_l]
+    for net in net_l:
+        for m in net.modules():
+            if isinstance(m, nn.Conv2d):
+                init.kaiming_normal_(m.weight, a=0, mode='fan_in')
+                m.weight.data *= scale  # for residual block
+                if m.bias is not None:
+                    m.bias.data.zero_()
+            elif isinstance(m, nn.Linear):
+                init.kaiming_normal_(m.weight, a=0, mode='fan_in')
+                m.weight.data *= scale
+                if m.bias is not None:
+                    m.bias.data.zero_()
+            elif isinstance(m, nn.BatchNorm2d):
+                init.constant_(m.weight, 1)
+                init.constant_(m.bias.data, 0.0)
+def make_layer(block, n_layers):
+    layers = []
+    for _ in range(n_layers):
+        layers.append(block())
+    return nn.Sequential(*layers)
+class ResidualDenseBlock_5C(nn.Module):
+    def __init__(self, nf=64, gc=32, bias=True):
+        super(ResidualDenseBlock_5C, self).__init__()
+        # gc: growth channel, i.e. intermediate channels
+        self.conv1 = nn.Conv2d(nf, gc, 3, 1, 1, bias=bias)
+        self.conv2 = nn.Conv2d(nf + gc, gc, 3, 1, 1, bias=bias)
+        self.conv3 = nn.Conv2d(nf + 2 * gc, gc, 3, 1, 1, bias=bias)
+        self.conv4 = nn.Conv2d(nf + 3 * gc, gc, 3, 1, 1, bias=bias)
+        self.conv5 = nn.Conv2d(nf + 4 * gc, nf, 3, 1, 1, bias=bias)
+        self.lrelu = nn.LeakyReLU(negative_slope=0.2, inplace=True)
+        # initialization
+        initialize_weights([self.conv1, self.conv2, self.conv3, self.conv4, self.conv5], 0.1)
+    def forward(self, x):
+        x1 = self.lrelu(self.conv1(x))
+        x2 = self.lrelu(self.conv2(torch.cat((x, x1), 1)))
+        x3 = self.lrelu(self.conv3(torch.cat((x, x1, x2), 1)))
+        x4 = self.lrelu(self.conv4(torch.cat((x, x1, x2, x3), 1)))
+        x5 = self.conv5(torch.cat((x, x1, x2, x3, x4), 1))
+        return x5 * 0.2 + x
+class RRDB(nn.Module):
+    '''Residual in Residual Dense Block'''
+    def __init__(self, nf, gc=32):
+        super(RRDB, self).__init__()
+        self.RDB1 = ResidualDenseBlock_5C(nf, gc)
+        self.RDB2 = ResidualDenseBlock_5C(nf, gc)
+        self.RDB3 = ResidualDenseBlock_5C(nf, gc)
+    def forward(self, x):
+        out = self.RDB1(x)
+        out = self.RDB2(out)
+        out = self.RDB3(out)
+        return out * 0.2 + x
+class RRDBNet(nn.Module):
+    def __init__(self, in_nc=3, out_nc=3, nf=64, nb=23, gc=32, sf=4):
+        super(RRDBNet, self).__init__()
+        RRDB_block_f = functools.partial(RRDB, nf=nf, gc=gc)
+        self.sf = sf
+        self.conv_first = nn.Conv2d(in_nc, nf, 3, 1, 1, bias=True)
+        self.RRDB_trunk = make_layer(RRDB_block_f, nb)
+        self.trunk_conv = nn.Conv2d(nf, nf, 3, 1, 1, bias=True)
+        #### upsampling
+        self.upconv1 = nn.Conv2d(nf, nf, 3, 1, 1, bias=True)
+        if self.sf==4:
+            self.upconv2 = nn.Conv2d(nf, nf, 3, 1, 1, bias=True)
+        self.HRconv = nn.Conv2d(nf, nf, 3, 1, 1, bias=True)
+        self.conv_last = nn.Conv2d(nf, out_nc, 3, 1, 1, bias=True)
+        self.lrelu = nn.LeakyReLU(negative_slope=0.2, inplace=True)
+    def forward(self, x):
+        fea = self.conv_first(x)
+        trunk = self.trunk_conv(self.RRDB_trunk(fea))
+        fea = fea + trunk
+        fea = self.lrelu(self.upconv1(F.interpolate(fea, scale_factor=2, mode='nearest')))
+        if self.sf==4:
+            fea = self.lrelu(self.upconv2(F.interpolate(fea, scale_factor=2, mode='nearest')))
+        out = self.conv_last(self.lrelu(self.HRconv(fea)))
+        return out

modules/codeformer/codeformer_arch.py ADDED Viewed

	@@ -0,0 +1,278 @@

+# this file is copied from CodeFormer repository. Please see comment in modules/codeformer_model.py
+import math
+import numpy as np
+import torch
+from torch import nn, Tensor
+import torch.nn.functional as F
+from typing import Optional, List
+from modules.codeformer.vqgan_arch import *
+from basicsr.utils import get_root_logger
+from basicsr.utils.registry import ARCH_REGISTRY
+def calc_mean_std(feat, eps=1e-5):
+    """Calculate mean and std for adaptive_instance_normalization.
+    Args:
+        feat (Tensor): 4D tensor.
+        eps (float): A small value added to the variance to avoid
+            divide-by-zero. Default: 1e-5.
+    """
+    size = feat.size()
+    assert len(size) == 4, 'The input feature should be 4D tensor.'
+    b, c = size[:2]
+    feat_var = feat.view(b, c, -1).var(dim=2) + eps
+    feat_std = feat_var.sqrt().view(b, c, 1, 1)
+    feat_mean = feat.view(b, c, -1).mean(dim=2).view(b, c, 1, 1)
+    return feat_mean, feat_std
+def adaptive_instance_normalization(content_feat, style_feat):
+    """Adaptive instance normalization.
+    Adjust the reference features to have the similar color and illuminations
+    as those in the degradate features.
+    Args:
+        content_feat (Tensor): The reference feature.
+        style_feat (Tensor): The degradate features.
+    """
+    size = content_feat.size()
+    style_mean, style_std = calc_mean_std(style_feat)
+    content_mean, content_std = calc_mean_std(content_feat)
+    normalized_feat = (content_feat - content_mean.expand(size)) / content_std.expand(size)
+    return normalized_feat * style_std.expand(size) + style_mean.expand(size)
+class PositionEmbeddingSine(nn.Module):
+    """
+    This is a more standard version of the position embedding, very similar to the one
+    used by the Attention is all you need paper, generalized to work on images.
+    """
+    def __init__(self, num_pos_feats=64, temperature=10000, normalize=False, scale=None):
+        super().__init__()
+        self.num_pos_feats = num_pos_feats
+        self.temperature = temperature
+        self.normalize = normalize
+        if scale is not None and normalize is False:
+            raise ValueError("normalize should be True if scale is passed")
+        if scale is None:
+            scale = 2 * math.pi
+        self.scale = scale
+    def forward(self, x, mask=None):
+        if mask is None:
+            mask = torch.zeros((x.size(0), x.size(2), x.size(3)), device=x.device, dtype=torch.bool)
+        not_mask = ~mask
+        y_embed = not_mask.cumsum(1, dtype=torch.float32)
+        x_embed = not_mask.cumsum(2, dtype=torch.float32)
+        if self.normalize:
+            eps = 1e-6
+            y_embed = y_embed / (y_embed[:, -1:, :] + eps) * self.scale
+            x_embed = x_embed / (x_embed[:, :, -1:] + eps) * self.scale
+        dim_t = torch.arange(self.num_pos_feats, dtype=torch.float32, device=x.device)
+        dim_t = self.temperature ** (2 * (dim_t // 2) / self.num_pos_feats)
+        pos_x = x_embed[:, :, :, None] / dim_t
+        pos_y = y_embed[:, :, :, None] / dim_t
+        pos_x = torch.stack(
+            (pos_x[:, :, :, 0::2].sin(), pos_x[:, :, :, 1::2].cos()), dim=4
+        ).flatten(3)
+        pos_y = torch.stack(
+            (pos_y[:, :, :, 0::2].sin(), pos_y[:, :, :, 1::2].cos()), dim=4
+        ).flatten(3)
+        pos = torch.cat((pos_y, pos_x), dim=3).permute(0, 3, 1, 2)
+        return pos
+def _get_activation_fn(activation):
+    """Return an activation function given a string"""
+    if activation == "relu":
+        return F.relu
+    if activation == "gelu":
+        return F.gelu
+    if activation == "glu":
+        return F.glu
+    raise RuntimeError(F"activation should be relu/gelu, not {activation}.")
+class TransformerSALayer(nn.Module):
+    def __init__(self, embed_dim, nhead=8, dim_mlp=2048, dropout=0.0, activation="gelu"):
+        super().__init__()
+        self.self_attn = nn.MultiheadAttention(embed_dim, nhead, dropout=dropout)
+        # Implementation of Feedforward model - MLP
+        self.linear1 = nn.Linear(embed_dim, dim_mlp)
+        self.dropout = nn.Dropout(dropout)
+        self.linear2 = nn.Linear(dim_mlp, embed_dim)
+        self.norm1 = nn.LayerNorm(embed_dim)
+        self.norm2 = nn.LayerNorm(embed_dim)
+        self.dropout1 = nn.Dropout(dropout)
+        self.dropout2 = nn.Dropout(dropout)
+        self.activation = _get_activation_fn(activation)
+    def with_pos_embed(self, tensor, pos: Optional[Tensor]):
+        return tensor if pos is None else tensor + pos
+    def forward(self, tgt,
+                tgt_mask: Optional[Tensor] = None,
+                tgt_key_padding_mask: Optional[Tensor] = None,
+                query_pos: Optional[Tensor] = None):
+        # self attention
+        tgt2 = self.norm1(tgt)
+        q = k = self.with_pos_embed(tgt2, query_pos)
+        tgt2 = self.self_attn(q, k, value=tgt2, attn_mask=tgt_mask,
+                              key_padding_mask=tgt_key_padding_mask)[0]
+        tgt = tgt + self.dropout1(tgt2)
+        # ffn
+        tgt2 = self.norm2(tgt)
+        tgt2 = self.linear2(self.dropout(self.activation(self.linear1(tgt2))))
+        tgt = tgt + self.dropout2(tgt2)
+        return tgt
+class Fuse_sft_block(nn.Module):
+    def __init__(self, in_ch, out_ch):
+        super().__init__()
+        self.encode_enc = ResBlock(2*in_ch, out_ch)
+        self.scale = nn.Sequential(
+                    nn.Conv2d(in_ch, out_ch, kernel_size=3, padding=1),
+                    nn.LeakyReLU(0.2, True),
+                    nn.Conv2d(out_ch, out_ch, kernel_size=3, padding=1))
+        self.shift = nn.Sequential(
+                    nn.Conv2d(in_ch, out_ch, kernel_size=3, padding=1),
+                    nn.LeakyReLU(0.2, True),
+                    nn.Conv2d(out_ch, out_ch, kernel_size=3, padding=1))
+    def forward(self, enc_feat, dec_feat, w=1):
+        enc_feat = self.encode_enc(torch.cat([enc_feat, dec_feat], dim=1))
+        scale = self.scale(enc_feat)
+        shift = self.shift(enc_feat)
+        residual = w * (dec_feat * scale + shift)
+        out = dec_feat + residual
+        return out
+@ARCH_REGISTRY.register()
+class CodeFormer(VQAutoEncoder):
+    def __init__(self, dim_embd=512, n_head=8, n_layers=9,
+                codebook_size=1024, latent_size=256,
+                connect_list=['32', '64', '128', '256'],
+                fix_modules=['quantize','generator']):
+        super(CodeFormer, self).__init__(512, 64, [1, 2, 2, 4, 4, 8], 'nearest',2, [16], codebook_size)
+        if fix_modules is not None:
+            for module in fix_modules:
+                for param in getattr(self, module).parameters():
+                    param.requires_grad = False
+        self.connect_list = connect_list
+        self.n_layers = n_layers
+        self.dim_embd = dim_embd
+        self.dim_mlp = dim_embd*2
+        self.position_emb = nn.Parameter(torch.zeros(latent_size, self.dim_embd))
+        self.feat_emb = nn.Linear(256, self.dim_embd)
+        # transformer
+        self.ft_layers = nn.Sequential(*[TransformerSALayer(embed_dim=dim_embd, nhead=n_head, dim_mlp=self.dim_mlp, dropout=0.0)
+                                    for _ in range(self.n_layers)])
+        # logits_predict head
+        self.idx_pred_layer = nn.Sequential(
+            nn.LayerNorm(dim_embd),
+            nn.Linear(dim_embd, codebook_size, bias=False))
+        self.channels = {
+            '16': 512,
+            '32': 256,
+            '64': 256,
+            '128': 128,
+            '256': 128,
+            '512': 64,
+        }
+        # after second residual block for > 16, before attn layer for ==16
+        self.fuse_encoder_block = {'512':2, '256':5, '128':8, '64':11, '32':14, '16':18}
+        # after first residual block for > 16, before attn layer for ==16
+        self.fuse_generator_block = {'16':6, '32': 9, '64':12, '128':15, '256':18, '512':21}
+        # fuse_convs_dict
+        self.fuse_convs_dict = nn.ModuleDict()
+        for f_size in self.connect_list:
+            in_ch = self.channels[f_size]
+            self.fuse_convs_dict[f_size] = Fuse_sft_block(in_ch, in_ch)
+    def _init_weights(self, module):
+        if isinstance(module, (nn.Linear, nn.Embedding)):
+            module.weight.data.normal_(mean=0.0, std=0.02)
+            if isinstance(module, nn.Linear) and module.bias is not None:
+                module.bias.data.zero_()
+        elif isinstance(module, nn.LayerNorm):
+            module.bias.data.zero_()
+            module.weight.data.fill_(1.0)
+    def forward(self, x, w=0, detach_16=True, code_only=False, adain=False):
+        # ################### Encoder #####################
+        enc_feat_dict = {}
+        out_list = [self.fuse_encoder_block[f_size] for f_size in self.connect_list]
+        for i, block in enumerate(self.encoder.blocks):
+            x = block(x)
+            if i in out_list:
+                enc_feat_dict[str(x.shape[-1])] = x.clone()
+        lq_feat = x
+        # ################# Transformer ###################
+        # quant_feat, codebook_loss, quant_stats = self.quantize(lq_feat)
+        pos_emb = self.position_emb.unsqueeze(1).repeat(1,x.shape[0],1)
+        # BCHW -> BC(HW) -> (HW)BC
+        feat_emb = self.feat_emb(lq_feat.flatten(2).permute(2,0,1))
+        query_emb = feat_emb
+        # Transformer encoder
+        for layer in self.ft_layers:
+            query_emb = layer(query_emb, query_pos=pos_emb)
+        # output logits
+        logits = self.idx_pred_layer(query_emb) # (hw)bn
+        logits = logits.permute(1,0,2) # (hw)bn -> b(hw)n
+        if code_only: # for training stage II
+          # logits doesn't need softmax before cross_entropy loss
+            return logits, lq_feat
+        # ################# Quantization ###################
+        # if self.training:
+        #     quant_feat = torch.einsum('btn,nc->btc', [soft_one_hot, self.quantize.embedding.weight])
+        #     # b(hw)c -> bc(hw) -> bchw
+        #     quant_feat = quant_feat.permute(0,2,1).view(lq_feat.shape)
+        # ------------
+        soft_one_hot = F.softmax(logits, dim=2)
+        _, top_idx = torch.topk(soft_one_hot, 1, dim=2)
+        quant_feat = self.quantize.get_codebook_feat(top_idx, shape=[x.shape[0],16,16,256])
+        # preserve gradients
+        # quant_feat = lq_feat + (quant_feat - lq_feat).detach()
+        if detach_16:
+            quant_feat = quant_feat.detach() # for training stage III
+        if adain:
+            quant_feat = adaptive_instance_normalization(quant_feat, lq_feat)
+        # ################## Generator ####################
+        x = quant_feat
+        fuse_list = [self.fuse_generator_block[f_size] for f_size in self.connect_list]
+        for i, block in enumerate(self.generator.blocks):
+            x = block(x)
+            if i in fuse_list: # fuse after i-th block
+                f_size = str(x.shape[-1])
+                if w>0:
+                    x = self.fuse_convs_dict[f_size](enc_feat_dict[f_size].detach(), x, w)
+        out = x
+        # logits doesn't need softmax before cross_entropy loss
+        return out, logits, lq_feat

modules/codeformer/vqgan_arch.py ADDED Viewed

	@@ -0,0 +1,437 @@

+# this file is copied from CodeFormer repository. Please see comment in modules/codeformer_model.py
+'''
+VQGAN code, adapted from the original created by the Unleashing Transformers authors:
+https://github.com/samb-t/unleashing-transformers/blob/master/models/vqgan.py
+'''
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import copy
+from basicsr.utils import get_root_logger
+from basicsr.utils.registry import ARCH_REGISTRY
+def normalize(in_channels):
+    return torch.nn.GroupNorm(num_groups=32, num_channels=in_channels, eps=1e-6, affine=True)
+@torch.jit.script
+def swish(x):
+    return x*torch.sigmoid(x)
+#  Define VQVAE classes
+class VectorQuantizer(nn.Module):
+    def __init__(self, codebook_size, emb_dim, beta):
+        super(VectorQuantizer, self).__init__()
+        self.codebook_size = codebook_size  # number of embeddings
+        self.emb_dim = emb_dim  # dimension of embedding
+        self.beta = beta  # commitment cost used in loss term, beta * ||z_e(x)-sg[e]||^2
+        self.embedding = nn.Embedding(self.codebook_size, self.emb_dim)
+        self.embedding.weight.data.uniform_(-1.0 / self.codebook_size, 1.0 / self.codebook_size)
+    def forward(self, z):
+        # reshape z -> (batch, height, width, channel) and flatten
+        z = z.permute(0, 2, 3, 1).contiguous()
+        z_flattened = z.view(-1, self.emb_dim)
+        # distances from z to embeddings e_j (z - e)^2 = z^2 + e^2 - 2 e * z
+        d = (z_flattened ** 2).sum(dim=1, keepdim=True) + (self.embedding.weight**2).sum(1) - \
+            2 * torch.matmul(z_flattened, self.embedding.weight.t())
+        mean_distance = torch.mean(d)
+        # find closest encodings
+        # min_encoding_indices = torch.argmin(d, dim=1).unsqueeze(1)
+        min_encoding_scores, min_encoding_indices = torch.topk(d, 1, dim=1, largest=False)
+        # [0-1], higher score, higher confidence
+        min_encoding_scores = torch.exp(-min_encoding_scores/10)
+        min_encodings = torch.zeros(min_encoding_indices.shape[0], self.codebook_size).to(z)
+        min_encodings.scatter_(1, min_encoding_indices, 1)
+        # get quantized latent vectors
+        z_q = torch.matmul(min_encodings, self.embedding.weight).view(z.shape)
+        # compute loss for embedding
+        loss = torch.mean((z_q.detach()-z)**2) + self.beta * torch.mean((z_q - z.detach()) ** 2)
+        # preserve gradients
+        z_q = z + (z_q - z).detach()
+        # perplexity
+        e_mean = torch.mean(min_encodings, dim=0)
+        perplexity = torch.exp(-torch.sum(e_mean * torch.log(e_mean + 1e-10)))
+        # reshape back to match original input shape
+        z_q = z_q.permute(0, 3, 1, 2).contiguous()
+        return z_q, loss, {
+            "perplexity": perplexity,
+            "min_encodings": min_encodings,
+            "min_encoding_indices": min_encoding_indices,
+            "min_encoding_scores": min_encoding_scores,
+            "mean_distance": mean_distance
+            }
+    def get_codebook_feat(self, indices, shape):
+        # input indices: batch*token_num -> (batch*token_num)*1
+        # shape: batch, height, width, channel
+        indices = indices.view(-1,1)
+        min_encodings = torch.zeros(indices.shape[0], self.codebook_size).to(indices)
+        min_encodings.scatter_(1, indices, 1)
+        # get quantized latent vectors
+        z_q = torch.matmul(min_encodings.float(), self.embedding.weight)
+        if shape is not None:  # reshape back to match original input shape
+            z_q = z_q.view(shape).permute(0, 3, 1, 2).contiguous()
+        return z_q
+class GumbelQuantizer(nn.Module):
+    def __init__(self, codebook_size, emb_dim, num_hiddens, straight_through=False, kl_weight=5e-4, temp_init=1.0):
+        super().__init__()
+        self.codebook_size = codebook_size  # number of embeddings
+        self.emb_dim = emb_dim  # dimension of embedding
+        self.straight_through = straight_through
+        self.temperature = temp_init
+        self.kl_weight = kl_weight
+        self.proj = nn.Conv2d(num_hiddens, codebook_size, 1)  # projects last encoder layer to quantized logits
+        self.embed = nn.Embedding(codebook_size, emb_dim)
+    def forward(self, z):
+        hard = self.straight_through if self.training else True
+        logits = self.proj(z)
+        soft_one_hot = F.gumbel_softmax(logits, tau=self.temperature, dim=1, hard=hard)
+        z_q = torch.einsum("b n h w, n d -> b d h w", soft_one_hot, self.embed.weight)
+        # + kl divergence to the prior loss
+        qy = F.softmax(logits, dim=1)
+        diff = self.kl_weight * torch.sum(qy * torch.log(qy * self.codebook_size + 1e-10), dim=1).mean()
+        min_encoding_indices = soft_one_hot.argmax(dim=1)
+        return z_q, diff, {
+            "min_encoding_indices": min_encoding_indices
+        }
+class Downsample(nn.Module):
+    def __init__(self, in_channels):
+        super().__init__()
+        self.conv = torch.nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=2, padding=0)
+    def forward(self, x):
+        pad = (0, 1, 0, 1)
+        x = torch.nn.functional.pad(x, pad, mode="constant", value=0)
+        x = self.conv(x)
+        return x
+class Upsample(nn.Module):
+    def __init__(self, in_channels):
+        super().__init__()
+        self.conv = nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1)
+    def forward(self, x):
+        x = F.interpolate(x, scale_factor=2.0, mode="nearest")
+        x = self.conv(x)
+        return x
+class ResBlock(nn.Module):
+    def __init__(self, in_channels, out_channels=None):
+        super(ResBlock, self).__init__()
+        self.in_channels = in_channels
+        self.out_channels = in_channels if out_channels is None else out_channels
+        self.norm1 = normalize(in_channels)
+        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1)
+        self.norm2 = normalize(out_channels)
+        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1)
+        if self.in_channels != self.out_channels:
+            self.conv_out = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0)
+    def forward(self, x_in):
+        x = x_in
+        x = self.norm1(x)
+        x = swish(x)
+        x = self.conv1(x)
+        x = self.norm2(x)
+        x = swish(x)
+        x = self.conv2(x)
+        if self.in_channels != self.out_channels:
+            x_in = self.conv_out(x_in)
+        return x + x_in
+class AttnBlock(nn.Module):
+    def __init__(self, in_channels):
+        super().__init__()
+        self.in_channels = in_channels
+        self.norm = normalize(in_channels)
+        self.q = torch.nn.Conv2d(
+            in_channels,
+            in_channels,
+            kernel_size=1,
+            stride=1,
+            padding=0
+        )
+        self.k = torch.nn.Conv2d(
+            in_channels,
+            in_channels,
+            kernel_size=1,
+            stride=1,
+            padding=0
+        )
+        self.v = torch.nn.Conv2d(
+            in_channels,
+            in_channels,
+            kernel_size=1,
+            stride=1,
+            padding=0
+        )
+        self.proj_out = torch.nn.Conv2d(
+            in_channels,
+            in_channels,
+            kernel_size=1,
+            stride=1,
+            padding=0
+        )
+    def forward(self, x):
+        h_ = x
+        h_ = self.norm(h_)
+        q = self.q(h_)
+        k = self.k(h_)
+        v = self.v(h_)
+        # compute attention
+        b, c, h, w = q.shape
+        q = q.reshape(b, c, h*w)
+        q = q.permute(0, 2, 1)
+        k = k.reshape(b, c, h*w)
+        w_ = torch.bmm(q, k)
+        w_ = w_ * (int(c)**(-0.5))
+        w_ = F.softmax(w_, dim=2)
+        # attend to values
+        v = v.reshape(b, c, h*w)
+        w_ = w_.permute(0, 2, 1)
+        h_ = torch.bmm(v, w_)
+        h_ = h_.reshape(b, c, h, w)
+        h_ = self.proj_out(h_)
+        return x+h_
+class Encoder(nn.Module):
+    def __init__(self, in_channels, nf, emb_dim, ch_mult, num_res_blocks, resolution, attn_resolutions):
+        super().__init__()
+        self.nf = nf
+        self.num_resolutions = len(ch_mult)
+        self.num_res_blocks = num_res_blocks
+        self.resolution = resolution
+        self.attn_resolutions = attn_resolutions
+        curr_res = self.resolution
+        in_ch_mult = (1,)+tuple(ch_mult)
+        blocks = []
+        # initial convultion
+        blocks.append(nn.Conv2d(in_channels, nf, kernel_size=3, stride=1, padding=1))
+        # residual and downsampling blocks, with attention on smaller res (16x16)
+        for i in range(self.num_resolutions):
+            block_in_ch = nf * in_ch_mult[i]
+            block_out_ch = nf * ch_mult[i]
+            for _ in range(self.num_res_blocks):
+                blocks.append(ResBlock(block_in_ch, block_out_ch))
+                block_in_ch = block_out_ch
+                if curr_res in attn_resolutions:
+                    blocks.append(AttnBlock(block_in_ch))
+            if i != self.num_resolutions - 1:
+                blocks.append(Downsample(block_in_ch))
+                curr_res = curr_res // 2
+        # non-local attention block
+        blocks.append(ResBlock(block_in_ch, block_in_ch))
+        blocks.append(AttnBlock(block_in_ch))
+        blocks.append(ResBlock(block_in_ch, block_in_ch))
+        # normalise and convert to latent size
+        blocks.append(normalize(block_in_ch))
+        blocks.append(nn.Conv2d(block_in_ch, emb_dim, kernel_size=3, stride=1, padding=1))
+        self.blocks = nn.ModuleList(blocks)
+    def forward(self, x):
+        for block in self.blocks:
+            x = block(x)
+        return x
+class Generator(nn.Module):
+    def __init__(self, nf, emb_dim, ch_mult, res_blocks, img_size, attn_resolutions):
+        super().__init__()
+        self.nf = nf
+        self.ch_mult = ch_mult
+        self.num_resolutions = len(self.ch_mult)
+        self.num_res_blocks = res_blocks
+        self.resolution = img_size
+        self.attn_resolutions = attn_resolutions
+        self.in_channels = emb_dim
+        self.out_channels = 3
+        block_in_ch = self.nf * self.ch_mult[-1]
+        curr_res = self.resolution // 2 ** (self.num_resolutions-1)
+        blocks = []
+        # initial conv
+        blocks.append(nn.Conv2d(self.in_channels, block_in_ch, kernel_size=3, stride=1, padding=1))
+        # non-local attention block
+        blocks.append(ResBlock(block_in_ch, block_in_ch))
+        blocks.append(AttnBlock(block_in_ch))
+        blocks.append(ResBlock(block_in_ch, block_in_ch))
+        for i in reversed(range(self.num_resolutions)):
+            block_out_ch = self.nf * self.ch_mult[i]
+            for _ in range(self.num_res_blocks):
+                blocks.append(ResBlock(block_in_ch, block_out_ch))
+                block_in_ch = block_out_ch
+                if curr_res in self.attn_resolutions:
+                    blocks.append(AttnBlock(block_in_ch))
+            if i != 0:
+                blocks.append(Upsample(block_in_ch))
+                curr_res = curr_res * 2
+        blocks.append(normalize(block_in_ch))
+        blocks.append(nn.Conv2d(block_in_ch, self.out_channels, kernel_size=3, stride=1, padding=1))
+        self.blocks = nn.ModuleList(blocks)
+    def forward(self, x):
+        for block in self.blocks:
+            x = block(x)
+        return x
+@ARCH_REGISTRY.register()
+class VQAutoEncoder(nn.Module):
+    def __init__(self, img_size, nf, ch_mult, quantizer="nearest", res_blocks=2, attn_resolutions=[16], codebook_size=1024, emb_dim=256,
+                beta=0.25, gumbel_straight_through=False, gumbel_kl_weight=1e-8, model_path=None):
+        super().__init__()
+        logger = get_root_logger()
+        self.in_channels = 3
+        self.nf = nf
+        self.n_blocks = res_blocks
+        self.codebook_size = codebook_size
+        self.embed_dim = emb_dim
+        self.ch_mult = ch_mult
+        self.resolution = img_size
+        self.attn_resolutions = attn_resolutions
+        self.quantizer_type = quantizer
+        self.encoder = Encoder(
+            self.in_channels,
+            self.nf,
+            self.embed_dim,
+            self.ch_mult,
+            self.n_blocks,
+            self.resolution,
+            self.attn_resolutions
+        )
+        if self.quantizer_type == "nearest":
+            self.beta = beta #0.25
+            self.quantize = VectorQuantizer(self.codebook_size, self.embed_dim, self.beta)
+        elif self.quantizer_type == "gumbel":
+            self.gumbel_num_hiddens = emb_dim
+            self.straight_through = gumbel_straight_through
+            self.kl_weight = gumbel_kl_weight
+            self.quantize = GumbelQuantizer(
+                self.codebook_size,
+                self.embed_dim,
+                self.gumbel_num_hiddens,
+                self.straight_through,
+                self.kl_weight
+            )
+        self.generator = Generator(
+            self.nf,
+            self.embed_dim,
+            self.ch_mult,
+            self.n_blocks,
+            self.resolution,
+            self.attn_resolutions
+        )
+        if model_path is not None:
+            chkpt = torch.load(model_path, map_location='cpu')
+            if 'params_ema' in chkpt:
+                self.load_state_dict(torch.load(model_path, map_location='cpu')['params_ema'])
+                logger.info(f'vqgan is loaded from: {model_path} [params_ema]')
+            elif 'params' in chkpt:
+                self.load_state_dict(torch.load(model_path, map_location='cpu')['params'])
+                logger.info(f'vqgan is loaded from: {model_path} [params]')
+            else:
+                raise ValueError(f'Wrong params!')
+    def forward(self, x):
+        x = self.encoder(x)
+        quant, codebook_loss, quant_stats = self.quantize(x)
+        x = self.generator(quant)
+        return x, codebook_loss, quant_stats
+# patch based discriminator
+@ARCH_REGISTRY.register()
+class VQGANDiscriminator(nn.Module):
+    def __init__(self, nc=3, ndf=64, n_layers=4, model_path=None):
+        super().__init__()
+        layers = [nn.Conv2d(nc, ndf, kernel_size=4, stride=2, padding=1), nn.LeakyReLU(0.2, True)]
+        ndf_mult = 1
+        ndf_mult_prev = 1
+        for n in range(1, n_layers):  # gradually increase the number of filters
+            ndf_mult_prev = ndf_mult
+            ndf_mult = min(2 ** n, 8)
+            layers += [
+                nn.Conv2d(ndf * ndf_mult_prev, ndf * ndf_mult, kernel_size=4, stride=2, padding=1, bias=False),
+                nn.BatchNorm2d(ndf * ndf_mult),
+                nn.LeakyReLU(0.2, True)
+            ]
+        ndf_mult_prev = ndf_mult
+        ndf_mult = min(2 ** n_layers, 8)
+        layers += [
+            nn.Conv2d(ndf * ndf_mult_prev, ndf * ndf_mult, kernel_size=4, stride=1, padding=1, bias=False),
+            nn.BatchNorm2d(ndf * ndf_mult),
+            nn.LeakyReLU(0.2, True)
+        ]
+        layers += [
+            nn.Conv2d(ndf * ndf_mult, 1, kernel_size=4, stride=1, padding=1)]  # output 1 channel prediction map
+        self.main = nn.Sequential(*layers)
+        if model_path is not None:
+            chkpt = torch.load(model_path, map_location='cpu')
+            if 'params_d' in chkpt:
+                self.load_state_dict(torch.load(model_path, map_location='cpu')['params_d'])
+            elif 'params' in chkpt:
+                self.load_state_dict(torch.load(model_path, map_location='cpu')['params'])
+            else:
+                raise ValueError(f'Wrong params!')
+    def forward(self, x):
+        return self.main(x)

modules/codeformer_model.py ADDED Viewed

	@@ -0,0 +1,140 @@

+import os
+import sys
+import traceback
+import cv2
+import torch
+import modules.face_restoration
+import modules.shared
+from modules import shared, devices, modelloader
+from modules.paths import script_path, models_path
+# codeformer people made a choice to include modified basicsr library to their project which makes
+# it utterly impossible to use it alongside with other libraries that also use basicsr, like GFPGAN.
+# I am making a choice to include some files from codeformer to work around this issue.
+model_dir = "Codeformer"
+model_path = os.path.join(models_path, model_dir)
+model_url = 'https://github.com/sczhou/CodeFormer/releases/download/v0.1.0/codeformer.pth'
+have_codeformer = False
+codeformer = None
+def setup_model(dirname):
+    global model_path
+    if not os.path.exists(model_path):
+        os.makedirs(model_path)
+    path = modules.paths.paths.get("CodeFormer", None)
+    if path is None:
+        return
+    try:
+        from torchvision.transforms.functional import normalize
+        from modules.codeformer.codeformer_arch import CodeFormer
+        from basicsr.utils.download_util import load_file_from_url
+        from basicsr.utils import imwrite, img2tensor, tensor2img
+        from facelib.utils.face_restoration_helper import FaceRestoreHelper
+        from modules.shared import cmd_opts
+        net_class = CodeFormer
+        class FaceRestorerCodeFormer(modules.face_restoration.FaceRestoration):
+            def name(self):
+                return "CodeFormer"
+            def __init__(self, dirname):
+                self.net = None
+                self.face_helper = None
+                self.cmd_dir = dirname
+            def create_models(self):
+                if self.net is not None and self.face_helper is not None:
+                    self.net.to(devices.device_codeformer)
+                    return self.net, self.face_helper
+                model_paths = modelloader.load_models(model_path, model_url, self.cmd_dir, download_name='codeformer-v0.1.0.pth')
+                if len(model_paths) != 0:
+                    ckpt_path = model_paths[0]
+                else:
+                    print("Unable to load codeformer model.")
+                    return None, None
+                net = net_class(dim_embd=512, codebook_size=1024, n_head=8, n_layers=9, connect_list=['32', '64', '128', '256']).to(devices.device_codeformer)
+                checkpoint = torch.load(ckpt_path)['params_ema']
+                net.load_state_dict(checkpoint)
+                net.eval()
+                face_helper = FaceRestoreHelper(1, face_size=512, crop_ratio=(1, 1), det_model='retinaface_resnet50', save_ext='png', use_parse=True, device=devices.device_codeformer)
+                self.net = net
+                self.face_helper = face_helper
+                return net, face_helper
+            def send_model_to(self, device):
+                self.net.to(device)
+                self.face_helper.face_det.to(device)
+                self.face_helper.face_parse.to(device)
+            def restore(self, np_image, w=None):
+                np_image = np_image[:, :, ::-1]
+                original_resolution = np_image.shape[0:2]
+                self.create_models()
+                if self.net is None or self.face_helper is None:
+                    return np_image
+                self.send_model_to(devices.device_codeformer)
+                self.face_helper.clean_all()
+                self.face_helper.read_image(np_image)
+                self.face_helper.get_face_landmarks_5(only_center_face=False, resize=640, eye_dist_threshold=5)
+                self.face_helper.align_warp_face()
+                for idx, cropped_face in enumerate(self.face_helper.cropped_faces):
+                    cropped_face_t = img2tensor(cropped_face / 255., bgr2rgb=True, float32=True)
+                    normalize(cropped_face_t, (0.5, 0.5, 0.5), (0.5, 0.5, 0.5), inplace=True)
+                    cropped_face_t = cropped_face_t.unsqueeze(0).to(devices.device_codeformer)
+                    try:
+                        with torch.no_grad():
+                            output = self.net(cropped_face_t, w=w if w is not None else shared.opts.code_former_weight, adain=True)[0]
+                            restored_face = tensor2img(output, rgb2bgr=True, min_max=(-1, 1))
+                        del output
+                        torch.cuda.empty_cache()
+                    except Exception as error:
+                        print(f'\tFailed inference for CodeFormer: {error}', file=sys.stderr)
+                        restored_face = tensor2img(cropped_face_t, rgb2bgr=True, min_max=(-1, 1))
+                    restored_face = restored_face.astype('uint8')
+                    self.face_helper.add_restored_face(restored_face)
+                self.face_helper.get_inverse_affine(None)
+                restored_img = self.face_helper.paste_faces_to_input_image()
+                restored_img = restored_img[:, :, ::-1]
+                if original_resolution != restored_img.shape[0:2]:
+                    restored_img = cv2.resize(restored_img, (0, 0), fx=original_resolution[1]/restored_img.shape[1], fy=original_resolution[0]/restored_img.shape[0], interpolation=cv2.INTER_LINEAR)
+                self.face_helper.clean_all()
+                if shared.opts.face_restoration_unload:
+                    self.send_model_to(devices.cpu)
+                return restored_img
+        global have_codeformer
+        have_codeformer = True
+        global codeformer
+        codeformer = FaceRestorerCodeFormer(dirname)
+        shared.face_restorers.append(codeformer)
+    except Exception:
+        print("Error setting up CodeFormer:", file=sys.stderr)
+        print(traceback.format_exc(), file=sys.stderr)
+   # sys.path = stored_sys_path

modules/deepbooru.py ADDED Viewed

	@@ -0,0 +1,173 @@

+import os.path
+from concurrent.futures import ProcessPoolExecutor
+import multiprocessing
+import time
+import re
+re_special = re.compile(r'([\\()])')
+def get_deepbooru_tags(pil_image):
+    """
+    This method is for running only one image at a time for simple use.  Used to the img2img interrogate.
+    """
+    from modules import shared  # prevents circular reference
+    try:
+        create_deepbooru_process(shared.opts.interrogate_deepbooru_score_threshold, create_deepbooru_opts())
+        return get_tags_from_process(pil_image)
+    finally:
+        release_process()
+OPT_INCLUDE_RANKS = "include_ranks"
+def create_deepbooru_opts():
+    from modules import shared
+    return {
+        "use_spaces": shared.opts.deepbooru_use_spaces,
+        "use_escape": shared.opts.deepbooru_escape,
+        "alpha_sort": shared.opts.deepbooru_sort_alpha,
+        OPT_INCLUDE_RANKS: shared.opts.interrogate_return_ranks,
+    }
+def deepbooru_process(queue, deepbooru_process_return, threshold, deepbooru_opts):
+    model, tags = get_deepbooru_tags_model()
+    while True: # while process is running, keep monitoring queue for new image
+        pil_image = queue.get()
+        if pil_image == "QUIT":
+            break
+        else:
+            deepbooru_process_return["value"] = get_deepbooru_tags_from_model(model, tags, pil_image, threshold, deepbooru_opts)
+def create_deepbooru_process(threshold, deepbooru_opts):
+    """
+    Creates deepbooru process.  A queue is created to send images into the process.  This enables multiple images
+    to be processed in a row without reloading the model or creating a new process.  To return the data, a shared
+    dictionary is created to hold the tags created.  To wait for tags to be returned, a value of -1 is assigned
+    to the dictionary and the method adding the image to the queue should wait for this value to be updated with
+    the tags.
+    """
+    from modules import shared  # prevents circular reference
+    shared.deepbooru_process_manager = multiprocessing.Manager()
+    shared.deepbooru_process_queue = shared.deepbooru_process_manager.Queue()
+    shared.deepbooru_process_return = shared.deepbooru_process_manager.dict()
+    shared.deepbooru_process_return["value"] = -1
+    shared.deepbooru_process = multiprocessing.Process(target=deepbooru_process, args=(shared.deepbooru_process_queue, shared.deepbooru_process_return, threshold, deepbooru_opts))
+    shared.deepbooru_process.start()
+def get_tags_from_process(image):
+    from modules import shared
+    shared.deepbooru_process_return["value"] = -1
+    shared.deepbooru_process_queue.put(image)
+    while shared.deepbooru_process_return["value"] == -1:
+        time.sleep(0.2)
+    caption = shared.deepbooru_process_return["value"]
+    shared.deepbooru_process_return["value"] = -1
+    return caption
+def release_process():
+    """
+    Stops the deepbooru process to return used memory
+    """
+    from modules import shared  # prevents circular reference
+    shared.deepbooru_process_queue.put("QUIT")
+    shared.deepbooru_process.join()
+    shared.deepbooru_process_queue = None
+    shared.deepbooru_process = None
+    shared.deepbooru_process_return = None
+    shared.deepbooru_process_manager = None
+def get_deepbooru_tags_model():
+    import deepdanbooru as dd
+    import tensorflow as tf
+    import numpy as np
+    this_folder = os.path.dirname(__file__)
+    model_path = os.path.abspath(os.path.join(this_folder, '..', 'models', 'deepbooru'))
+    if not os.path.exists(os.path.join(model_path, 'project.json')):
+        # there is no point importing these every time
+        import zipfile
+        from basicsr.utils.download_util import load_file_from_url
+        load_file_from_url(
+            r"https://github.com/KichangKim/DeepDanbooru/releases/download/v3-20211112-sgd-e28/deepdanbooru-v3-20211112-sgd-e28.zip",
+            model_path)
+        with zipfile.ZipFile(os.path.join(model_path, "deepdanbooru-v3-20211112-sgd-e28.zip"), "r") as zip_ref:
+            zip_ref.extractall(model_path)
+        os.remove(os.path.join(model_path, "deepdanbooru-v3-20211112-sgd-e28.zip"))
+    tags = dd.project.load_tags_from_project(model_path)
+    model = dd.project.load_model_from_project(
+        model_path, compile_model=True
+    )
+    return model, tags
+def get_deepbooru_tags_from_model(model, tags, pil_image, threshold, deepbooru_opts):
+    import deepdanbooru as dd
+    import tensorflow as tf
+    import numpy as np
+    alpha_sort = deepbooru_opts['alpha_sort']
+    use_spaces = deepbooru_opts['use_spaces']
+    use_escape = deepbooru_opts['use_escape']
+    include_ranks = deepbooru_opts['include_ranks']
+    width = model.input_shape[2]
+    height = model.input_shape[1]
+    image = np.array(pil_image)
+    image = tf.image.resize(
+        image,
+        size=(height, width),
+        method=tf.image.ResizeMethod.AREA,
+        preserve_aspect_ratio=True,
+    )
+    image = image.numpy()  # EagerTensor to np.array
+    image = dd.image.transform_and_pad_image(image, width, height)
+    image = image / 255.0
+    image_shape = image.shape
+    image = image.reshape((1, image_shape[0], image_shape[1], image_shape[2]))
+    y = model.predict(image)[0]
+    result_dict = {}
+    for i, tag in enumerate(tags):
+        result_dict[tag] = y[i]
+    unsorted_tags_in_theshold = []
+    result_tags_print = []
+    for tag in tags:
+        if result_dict[tag] >= threshold:
+            if tag.startswith("rating:"):
+                continue
+            unsorted_tags_in_theshold.append((result_dict[tag], tag))
+            result_tags_print.append(f'{result_dict[tag]} {tag}')
+    # sort tags
+    result_tags_out = []
+    sort_ndx = 0
+    if alpha_sort:
+        sort_ndx = 1
+    # sort by reverse by likelihood and normal for alpha, and format tag text as requested
+    unsorted_tags_in_theshold.sort(key=lambda y: y[sort_ndx], reverse=(not alpha_sort))
+    for weight, tag in unsorted_tags_in_theshold:
+        # note: tag_outformat will still have a colon if include_ranks is True
+        tag_outformat = tag.replace(':', ' ')
+        if use_spaces:
+            tag_outformat = tag_outformat.replace('_', ' ')
+        if use_escape:
+            tag_outformat = re.sub(re_special, r'\\\1', tag_outformat)
+        if include_ranks:
+            tag_outformat = f"({tag_outformat}:{weight:.3f})"
+        result_tags_out.append(tag_outformat)
+    print('\n'.join(sorted(result_tags_print, reverse=True)))
+    return ', '.join(result_tags_out)

modules/devices.py ADDED Viewed

	@@ -0,0 +1,72 @@

+import contextlib
+import torch
+from modules import errors
+# has_mps is only available in nightly pytorch (for now), `getattr` for compatibility
+has_mps = getattr(torch, 'has_mps', False)
+cpu = torch.device("cpu")
+def get_optimal_device():
+    if torch.cuda.is_available():
+        return torch.device("cuda")
+    if has_mps:
+        return torch.device("mps")
+    return cpu
+def torch_gc():
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+        torch.cuda.ipc_collect()
+def enable_tf32():
+    if torch.cuda.is_available():
+        torch.backends.cuda.matmul.allow_tf32 = True
+        torch.backends.cudnn.allow_tf32 = True
+errors.run(enable_tf32, "Enabling TF32")
+device = device_gfpgan = device_bsrgan = device_esrgan = device_scunet = device_codeformer = get_optimal_device()
+dtype = torch.float16
+dtype_vae = torch.float16
+def randn(seed, shape):
+    # Pytorch currently doesn't handle setting randomness correctly when the metal backend is used.
+    if device.type == 'mps':
+        generator = torch.Generator(device=cpu)
+        generator.manual_seed(seed)
+        noise = torch.randn(shape, generator=generator, device=cpu).to(device)
+        return noise
+    torch.manual_seed(seed)
+    return torch.randn(shape, device=device)
+def randn_without_seed(shape):
+    # Pytorch currently doesn't handle setting randomness correctly when the metal backend is used.
+    if device.type == 'mps':
+        generator = torch.Generator(device=cpu)
+        noise = torch.randn(shape, generator=generator, device=cpu).to(device)
+        return noise
+    return torch.randn(shape, device=device)
+def autocast(disable=False):
+    from modules import shared
+    if disable:
+        return contextlib.nullcontext()
+    if dtype == torch.float32 or shared.cmd_opts.precision == "full":
+        return contextlib.nullcontext()
+    return torch.autocast("cuda")

modules/errors.py ADDED Viewed

	@@ -0,0 +1,10 @@

+import sys
+import traceback
+def run(code, task):
+    try:
+        code()
+    except Exception as e:
+        print(f"{task}: {type(e).__name__}", file=sys.stderr)
+        print(traceback.format_exc(), file=sys.stderr)

modules/esrgan_model.py ADDED Viewed

	@@ -0,0 +1,158 @@

+import os
+import numpy as np
+import torch
+from PIL import Image
+from basicsr.utils.download_util import load_file_from_url
+import modules.esrgan_model_arch as arch
+from modules import shared, modelloader, images, devices
+from modules.upscaler import Upscaler, UpscalerData
+from modules.shared import opts
+def fix_model_layers(crt_model, pretrained_net):
+    # this code is adapted from https://github.com/xinntao/ESRGAN
+    if 'conv_first.weight' in pretrained_net:
+        return pretrained_net
+    if 'model.0.weight' not in pretrained_net:
+        is_realesrgan = "params_ema" in pretrained_net and 'body.0.rdb1.conv1.weight' in pretrained_net["params_ema"]
+        if is_realesrgan:
+            raise Exception("The file is a RealESRGAN model, it can't be used as a ESRGAN model.")
+        else:
+            raise Exception("The file is not a ESRGAN model.")
+    crt_net = crt_model.state_dict()
+    load_net_clean = {}
+    for k, v in pretrained_net.items():
+        if k.startswith('module.'):
+            load_net_clean[k[7:]] = v
+        else:
+            load_net_clean[k] = v
+    pretrained_net = load_net_clean
+    tbd = []
+    for k, v in crt_net.items():
+        tbd.append(k)
+    # directly copy
+    for k, v in crt_net.items():
+        if k in pretrained_net and pretrained_net[k].size() == v.size():
+            crt_net[k] = pretrained_net[k]
+            tbd.remove(k)
+    crt_net['conv_first.weight'] = pretrained_net['model.0.weight']
+    crt_net['conv_first.bias'] = pretrained_net['model.0.bias']
+    for k in tbd.copy():
+        if 'RDB' in k:
+            ori_k = k.replace('RRDB_trunk.', 'model.1.sub.')
+            if '.weight' in k:
+                ori_k = ori_k.replace('.weight', '.0.weight')
+            elif '.bias' in k:
+                ori_k = ori_k.replace('.bias', '.0.bias')
+            crt_net[k] = pretrained_net[ori_k]
+            tbd.remove(k)
+    crt_net['trunk_conv.weight'] = pretrained_net['model.1.sub.23.weight']
+    crt_net['trunk_conv.bias'] = pretrained_net['model.1.sub.23.bias']
+    crt_net['upconv1.weight'] = pretrained_net['model.3.weight']
+    crt_net['upconv1.bias'] = pretrained_net['model.3.bias']
+    crt_net['upconv2.weight'] = pretrained_net['model.6.weight']
+    crt_net['upconv2.bias'] = pretrained_net['model.6.bias']
+    crt_net['HRconv.weight'] = pretrained_net['model.8.weight']
+    crt_net['HRconv.bias'] = pretrained_net['model.8.bias']
+    crt_net['conv_last.weight'] = pretrained_net['model.10.weight']
+    crt_net['conv_last.bias'] = pretrained_net['model.10.bias']
+    return crt_net
+class UpscalerESRGAN(Upscaler):
+    def __init__(self, dirname):
+        self.name = "ESRGAN"
+        self.model_url = "https://github.com/cszn/KAIR/releases/download/v1.0/ESRGAN.pth"
+        self.model_name = "ESRGAN_4x"
+        self.scalers = []
+        self.user_path = dirname
+        super().__init__()
+        model_paths = self.find_models(ext_filter=[".pt", ".pth"])
+        scalers = []
+        if len(model_paths) == 0:
+            scaler_data = UpscalerData(self.model_name, self.model_url, self, 4)
+            scalers.append(scaler_data)
+        for file in model_paths:
+            if "http" in file:
+                name = self.model_name
+            else:
+                name = modelloader.friendly_name(file)
+            scaler_data = UpscalerData(name, file, self, 4)
+            self.scalers.append(scaler_data)
+    def do_upscale(self, img, selected_model):
+        model = self.load_model(selected_model)
+        if model is None:
+            return img
+        model.to(devices.device_esrgan)
+        img = esrgan_upscale(model, img)
+        return img
+    def load_model(self, path: str):
+        if "http" in path:
+            filename = load_file_from_url(url=self.model_url, model_dir=self.model_path,
+                                          file_name="%s.pth" % self.model_name,
+                                          progress=True)
+        else:
+            filename = path
+        if not os.path.exists(filename) or filename is None:
+            print("Unable to load %s from %s" % (self.model_path, filename))
+            return None
+        pretrained_net = torch.load(filename, map_location='cpu' if devices.device_esrgan.type == 'mps' else None)
+        crt_model = arch.RRDBNet(3, 3, 64, 23, gc=32)
+        pretrained_net = fix_model_layers(crt_model, pretrained_net)
+        crt_model.load_state_dict(pretrained_net)
+        crt_model.eval()
+        return crt_model
+def upscale_without_tiling(model, img):
+    img = np.array(img)
+    img = img[:, :, ::-1]
+    img = np.moveaxis(img, 2, 0) / 255
+    img = torch.from_numpy(img).float()
+    img = img.unsqueeze(0).to(devices.device_esrgan)
+    with torch.no_grad():
+        output = model(img)
+    output = output.squeeze().float().cpu().clamp_(0, 1).numpy()
+    output = 255. * np.moveaxis(output, 0, 2)
+    output = output.astype(np.uint8)
+    output = output[:, :, ::-1]
+    return Image.fromarray(output, 'RGB')
+def esrgan_upscale(model, img):
+    if opts.ESRGAN_tile == 0:
+        return upscale_without_tiling(model, img)
+    grid = images.split_grid(img, opts.ESRGAN_tile, opts.ESRGAN_tile, opts.ESRGAN_tile_overlap)
+    newtiles = []
+    scale_factor = 1
+    for y, h, row in grid.tiles:
+        newrow = []
+        for tiledata in row:
+            x, w, tile = tiledata
+            output = upscale_without_tiling(model, tile)
+            scale_factor = output.width // tile.width
+            newrow.append([x * scale_factor, w * scale_factor, output])
+        newtiles.append([y * scale_factor, h * scale_factor, newrow])
+    newgrid = images.Grid(newtiles, grid.tile_w * scale_factor, grid.tile_h * scale_factor, grid.image_w * scale_factor, grid.image_h * scale_factor, grid.overlap * scale_factor)
+    output = images.combine_grid(newgrid)
+    return output

modules/esrgan_model_arch.py ADDED Viewed

	@@ -0,0 +1,80 @@

+# this file is taken from https://github.com/xinntao/ESRGAN
+import functools
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+def make_layer(block, n_layers):
+    layers = []
+    for _ in range(n_layers):
+        layers.append(block())
+    return nn.Sequential(*layers)
+class ResidualDenseBlock_5C(nn.Module):
+    def __init__(self, nf=64, gc=32, bias=True):
+        super(ResidualDenseBlock_5C, self).__init__()
+        # gc: growth channel, i.e. intermediate channels
+        self.conv1 = nn.Conv2d(nf, gc, 3, 1, 1, bias=bias)
+        self.conv2 = nn.Conv2d(nf + gc, gc, 3, 1, 1, bias=bias)
+        self.conv3 = nn.Conv2d(nf + 2 * gc, gc, 3, 1, 1, bias=bias)
+        self.conv4 = nn.Conv2d(nf + 3 * gc, gc, 3, 1, 1, bias=bias)
+        self.conv5 = nn.Conv2d(nf + 4 * gc, nf, 3, 1, 1, bias=bias)
+        self.lrelu = nn.LeakyReLU(negative_slope=0.2, inplace=True)
+        # initialization
+        # mutil.initialize_weights([self.conv1, self.conv2, self.conv3, self.conv4, self.conv5], 0.1)
+    def forward(self, x):
+        x1 = self.lrelu(self.conv1(x))
+        x2 = self.lrelu(self.conv2(torch.cat((x, x1), 1)))
+        x3 = self.lrelu(self.conv3(torch.cat((x, x1, x2), 1)))
+        x4 = self.lrelu(self.conv4(torch.cat((x, x1, x2, x3), 1)))
+        x5 = self.conv5(torch.cat((x, x1, x2, x3, x4), 1))
+        return x5 * 0.2 + x
+class RRDB(nn.Module):
+    '''Residual in Residual Dense Block'''
+    def __init__(self, nf, gc=32):
+        super(RRDB, self).__init__()
+        self.RDB1 = ResidualDenseBlock_5C(nf, gc)
+        self.RDB2 = ResidualDenseBlock_5C(nf, gc)
+        self.RDB3 = ResidualDenseBlock_5C(nf, gc)
+    def forward(self, x):
+        out = self.RDB1(x)
+        out = self.RDB2(out)
+        out = self.RDB3(out)
+        return out * 0.2 + x
+class RRDBNet(nn.Module):
+    def __init__(self, in_nc, out_nc, nf, nb, gc=32):
+        super(RRDBNet, self).__init__()
+        RRDB_block_f = functools.partial(RRDB, nf=nf, gc=gc)
+        self.conv_first = nn.Conv2d(in_nc, nf, 3, 1, 1, bias=True)
+        self.RRDB_trunk = make_layer(RRDB_block_f, nb)
+        self.trunk_conv = nn.Conv2d(nf, nf, 3, 1, 1, bias=True)
+        #### upsampling
+        self.upconv1 = nn.Conv2d(nf, nf, 3, 1, 1, bias=True)
+        self.upconv2 = nn.Conv2d(nf, nf, 3, 1, 1, bias=True)
+        self.HRconv = nn.Conv2d(nf, nf, 3, 1, 1, bias=True)
+        self.conv_last = nn.Conv2d(nf, out_nc, 3, 1, 1, bias=True)
+        self.lrelu = nn.LeakyReLU(negative_slope=0.2, inplace=True)
+    def forward(self, x):
+        fea = self.conv_first(x)
+        trunk = self.trunk_conv(self.RRDB_trunk(fea))
+        fea = fea + trunk
+        fea = self.lrelu(self.upconv1(F.interpolate(fea, scale_factor=2, mode='nearest')))
+        fea = self.lrelu(self.upconv2(F.interpolate(fea, scale_factor=2, mode='nearest')))
+        out = self.conv_last(self.lrelu(self.HRconv(fea)))
+        return out

modules/extras.py ADDED Viewed

	@@ -0,0 +1,222 @@

+import math
+import os
+import numpy as np
+from PIL import Image
+import torch
+import tqdm
+from modules import processing, shared, images, devices, sd_models
+from modules.shared import opts
+import modules.gfpgan_model
+from modules.ui import plaintext_to_html
+import modules.codeformer_model
+import piexif
+import piexif.helper
+import gradio as gr
+cached_images = {}
+def run_extras(extras_mode, resize_mode, image, image_folder, gfpgan_visibility, codeformer_visibility, codeformer_weight, upscaling_resize, upscaling_resize_w, upscaling_resize_h, upscaling_crop, extras_upscaler_1, extras_upscaler_2, extras_upscaler_2_visibility):
+    devices.torch_gc()
+    imageArr = []
+    # Also keep track of original file names
+    imageNameArr = []
+    if extras_mode == 1:
+        #convert file to pillow image
+        for img in image_folder:
+            image = Image.open(img)
+            imageArr.append(image)
+            imageNameArr.append(os.path.splitext(img.orig_name)[0])
+    else:
+        imageArr.append(image)
+        imageNameArr.append(None)
+    outpath = opts.outdir_samples or opts.outdir_extras_samples
+    outputs = []
+    for image, image_name in zip(imageArr, imageNameArr):
+        if image is None:
+            return outputs, "Please select an input image.", ''
+        existing_pnginfo = image.info or {}
+        image = image.convert("RGB")
+        info = ""
+        if gfpgan_visibility > 0:
+            restored_img = modules.gfpgan_model.gfpgan_fix_faces(np.array(image, dtype=np.uint8))
+            res = Image.fromarray(restored_img)
+            if gfpgan_visibility < 1.0:
+                res = Image.blend(image, res, gfpgan_visibility)
+            info += f"GFPGAN visibility:{round(gfpgan_visibility, 2)}\n"
+            image = res
+        if codeformer_visibility > 0:
+            restored_img = modules.codeformer_model.codeformer.restore(np.array(image, dtype=np.uint8), w=codeformer_weight)
+            res = Image.fromarray(restored_img)
+            if codeformer_visibility < 1.0:
+                res = Image.blend(image, res, codeformer_visibility)
+            info += f"CodeFormer w: {round(codeformer_weight, 2)}, CodeFormer visibility:{round(codeformer_visibility, 2)}\n"
+            image = res
+        if resize_mode == 1:
+            upscaling_resize = max(upscaling_resize_w/image.width, upscaling_resize_h/image.height)
+            crop_info = " (crop)" if upscaling_crop else ""
+            info += f"Resize to: {upscaling_resize_w:g}x{upscaling_resize_h:g}{crop_info}\n"
+        if upscaling_resize != 1.0:
+            def upscale(image, scaler_index, resize, mode, resize_w, resize_h, crop):
+                small = image.crop((image.width // 2, image.height // 2, image.width // 2 + 10, image.height // 2 + 10))
+                pixels = tuple(np.array(small).flatten().tolist())
+                key = (resize, scaler_index, image.width, image.height, gfpgan_visibility, codeformer_visibility, codeformer_weight) + pixels
+                c = cached_images.get(key)
+                if c is None:
+                    upscaler = shared.sd_upscalers[scaler_index]
+                    c = upscaler.scaler.upscale(image, resize, upscaler.data_path)
+                    if mode == 1 and crop:
+                        cropped = Image.new("RGB", (resize_w, resize_h))
+                        cropped.paste(c, box=(resize_w // 2 - c.width // 2, resize_h // 2 - c.height // 2))
+                        c = cropped
+                    cached_images[key] = c
+                return c
+            info += f"Upscale: {round(upscaling_resize, 3)}, model:{shared.sd_upscalers[extras_upscaler_1].name}\n"
+            res = upscale(image, extras_upscaler_1, upscaling_resize, resize_mode, upscaling_resize_w, upscaling_resize_h, upscaling_crop)
+            if extras_upscaler_2 != 0 and extras_upscaler_2_visibility > 0:
+                res2 = upscale(image, extras_upscaler_2, upscaling_resize, resize_mode, upscaling_resize_w, upscaling_resize_h, upscaling_crop)
+                info += f"Upscale: {round(upscaling_resize, 3)}, visibility: {round(extras_upscaler_2_visibility, 3)}, model:{shared.sd_upscalers[extras_upscaler_2].name}\n"
+                res = Image.blend(res, res2, extras_upscaler_2_visibility)
+            image = res
+        while len(cached_images) > 2:
+            del cached_images[next(iter(cached_images.keys()))]
+        images.save_image(image, path=outpath, basename="", seed=None, prompt=None, extension=opts.samples_format, info=info, short_filename=True,
+                          no_prompt=True, grid=False, pnginfo_section_name="extras", existing_info=existing_pnginfo,
+                          forced_filename=image_name if opts.use_original_name_batch else None)
+        if opts.enable_pnginfo:
+            image.info = existing_pnginfo
+            image.info["extras"] = info
+        outputs.append(image)
+    devices.torch_gc()
+    return outputs, plaintext_to_html(info), ''
+def run_pnginfo(image):
+    if image is None:
+        return '', '', ''
+    items = image.info
+    geninfo = ''
+    if "exif" in image.info:
+        exif = piexif.load(image.info["exif"])
+        exif_comment = (exif or {}).get("Exif", {}).get(piexif.ExifIFD.UserComment, b'')
+        try:
+            exif_comment = piexif.helper.UserComment.load(exif_comment)
+        except ValueError:
+            exif_comment = exif_comment.decode('utf8', errors="ignore")
+        items['exif comment'] = exif_comment
+        geninfo = exif_comment
+        for field in ['jfif', 'jfif_version', 'jfif_unit', 'jfif_density', 'dpi', 'exif',
+                      'loop', 'background', 'timestamp', 'duration']:
+            items.pop(field, None)
+    geninfo = items.get('parameters', geninfo)
+    info = ''
+    for key, text in items.items():
+        info += f"""
+<div>
+<p><b>{plaintext_to_html(str(key))}</b></p>
+<p>{plaintext_to_html(str(text))}</p>
+</div>
+""".strip()+"\n"
+    if len(info) == 0:
+        message = "Nothing found in the image."
+        info = f"<div><p>{message}<p></div>"
+    return '', geninfo, info
+def run_modelmerger(primary_model_name, secondary_model_name, interp_method, interp_amount, save_as_half, custom_name):
+    # Linear interpolation (https://en.wikipedia.org/wiki/Linear_interpolation)
+    def weighted_sum(theta0, theta1, alpha):
+        return ((1 - alpha) * theta0) + (alpha * theta1)
+    # Smoothstep (https://en.wikipedia.org/wiki/Smoothstep)
+    def sigmoid(theta0, theta1, alpha):
+        alpha = alpha * alpha * (3 - (2 * alpha))
+        return theta0 + ((theta1 - theta0) * alpha)
+    # Inverse Smoothstep (https://en.wikipedia.org/wiki/Smoothstep)
+    def inv_sigmoid(theta0, theta1, alpha):
+        import math
+        alpha = 0.5 - math.sin(math.asin(1.0 - 2.0 * alpha) / 3.0)
+        return theta0 + ((theta1 - theta0) * alpha)
+    primary_model_info = sd_models.checkpoints_list[primary_model_name]
+    secondary_model_info = sd_models.checkpoints_list[secondary_model_name]
+    print(f"Loading {primary_model_info.filename}...")
+    primary_model = torch.load(primary_model_info.filename, map_location='cpu')
+    print(f"Loading {secondary_model_info.filename}...")
+    secondary_model = torch.load(secondary_model_info.filename, map_location='cpu')
+    theta_0 = sd_models.get_state_dict_from_checkpoint(primary_model)
+    theta_1 = sd_models.get_state_dict_from_checkpoint(secondary_model)
+    theta_funcs = {
+        "Weighted Sum": weighted_sum,
+        "Sigmoid": sigmoid,
+        "Inverse Sigmoid": inv_sigmoid,
+    }
+    theta_func = theta_funcs[interp_method]
+    print(f"Merging...")
+    for key in tqdm.tqdm(theta_0.keys()):
+        if 'model' in key and key in theta_1:
+            theta_0[key] = theta_func(theta_0[key], theta_1[key], (float(1.0) - interp_amount))  # Need to reverse the interp_amount to match the desired mix ration in the merged checkpoint
+            if save_as_half:
+                theta_0[key] = theta_0[key].half()
+    for key in theta_1.keys():
+        if 'model' in key and key not in theta_0:
+            theta_0[key] = theta_1[key]
+            if save_as_half:
+                theta_0[key] = theta_0[key].half()
+    ckpt_dir = shared.cmd_opts.ckpt_dir or sd_models.model_path
+    filename = primary_model_info.model_name + '_' + str(round(interp_amount, 2)) + '-' + secondary_model_info.model_name + '_' + str(round((float(1.0) - interp_amount), 2)) + '-' + interp_method.replace(" ", "_") + '-merged.ckpt'
+    filename = filename if custom_name == '' else (custom_name + '.ckpt')
+    output_modelname = os.path.join(ckpt_dir, filename)
+    print(f"Saving to {output_modelname}...")
+    torch.save(primary_model, output_modelname)
+    sd_models.list_models()
+    print(f"Checkpoint saved.")
+    return ["Checkpoint saved to " + output_modelname] + [gr.Dropdown.update(choices=sd_models.checkpoint_tiles()) for _ in range(3)]

modules/face_restoration.py ADDED Viewed

	@@ -0,0 +1,19 @@

+from modules import shared
+class FaceRestoration:
+    def name(self):
+        return "None"
+    def restore(self, np_image):
+        return np_image
+def restore_faces(np_image):
+    face_restorers = [x for x in shared.face_restorers if x.name() == shared.opts.face_restoration_model or shared.opts.face_restoration_model is None]
+    if len(face_restorers) == 0:
+        return np_image
+    face_restorer = face_restorers[0]
+    return face_restorer.restore(np_image)

modules/generation_parameters_copypaste.py ADDED Viewed

	@@ -0,0 +1,101 @@

+import os
+import re
+import gradio as gr
+from modules.shared import script_path
+from modules import shared
+re_param_code = r"\s*([\w ]+):\s*([^,]+)(?:,|$)"
+re_param = re.compile(re_param_code)
+re_params = re.compile(r"^(?:" + re_param_code + "){3,}$")
+re_imagesize = re.compile(r"^(\d+)x(\d+)$")
+type_of_gr_update = type(gr.update())
+def parse_generation_parameters(x: str):
+    """parses generation parameters string, the one you see in text field under the picture in UI:
+```
+girl with an artist's beret, determined, blue eyes, desert scene, computer monitors, heavy makeup, by Alphonse Mucha and Charlie Bowater, ((eyeshadow)), (coquettish), detailed, intricate
+Negative prompt: ugly, fat, obese, chubby, (((deformed))), [blurry], bad anatomy, disfigured, poorly drawn face, mutation, mutated, (extra_limb), (ugly), (poorly drawn hands), messy drawing
+Steps: 20, Sampler: Euler a, CFG scale: 7, Seed: 965400086, Size: 512x512, Model hash: 45dee52b
+```
+    returns a dict with field values
+    """
+    res = {}
+    prompt = ""
+    negative_prompt = ""
+    done_with_prompt = False
+    *lines, lastline = x.strip().split("\n")
+    if not re_params.match(lastline):
+        lines.append(lastline)
+        lastline = ''
+    for i, line in enumerate(lines):
+        line = line.strip()
+        if line.startswith("Negative prompt:"):
+            done_with_prompt = True
+            line = line[16:].strip()
+        if done_with_prompt:
+            negative_prompt += ("" if negative_prompt == "" else "\n") + line
+        else:
+            prompt += ("" if prompt == "" else "\n") + line
+    if len(prompt) > 0:
+        res["Prompt"] = prompt
+    if len(negative_prompt) > 0:
+        res["Negative prompt"] = negative_prompt
+    for k, v in re_param.findall(lastline):
+        m = re_imagesize.match(v)
+        if m is not None:
+            res[k+"-1"] = m.group(1)
+            res[k+"-2"] = m.group(2)
+        else:
+            res[k] = v
+    return res
+def connect_paste(button, paste_fields, input_comp, js=None):
+    def paste_func(prompt):
+        if not prompt and not shared.cmd_opts.hide_ui_dir_config:
+            filename = os.path.join(script_path, "params.txt")
+            if os.path.exists(filename):
+                with open(filename, "r", encoding="utf8") as file:
+                    prompt = file.read()
+        params = parse_generation_parameters(prompt)
+        res = []
+        for output, key in paste_fields:
+            if callable(key):
+                v = key(params)
+            else:
+                v = params.get(key, None)
+            if v is None:
+                res.append(gr.update())
+            elif isinstance(v, type_of_gr_update):
+                res.append(v)
+            else:
+                try:
+                    valtype = type(output.value)
+                    val = valtype(v)
+                    res.append(gr.update(value=val))
+                except Exception:
+                    res.append(gr.update())
+        return res
+    button.click(
+        fn=paste_func,
+        _js=js,
+        inputs=[input_comp],
+        outputs=[x[0] for x in paste_fields],
+    )

modules/gfpgan_model.py ADDED Viewed

	@@ -0,0 +1,115 @@

+import os
+import sys
+import traceback
+import facexlib
+import gfpgan
+import modules.face_restoration
+from modules import shared, devices, modelloader
+from modules.paths import models_path
+model_dir = "GFPGAN"
+user_path = None
+model_path = os.path.join(models_path, model_dir)
+model_url = "https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.4.pth"
+have_gfpgan = False
+loaded_gfpgan_model = None
+def gfpgann():
+    global loaded_gfpgan_model
+    global model_path
+    if loaded_gfpgan_model is not None:
+        loaded_gfpgan_model.gfpgan.to(devices.device_gfpgan)
+        return loaded_gfpgan_model
+    if gfpgan_constructor is None:
+        return None
+    models = modelloader.load_models(model_path, model_url, user_path, ext_filter="GFPGAN")
+    if len(models) == 1 and "http" in models[0]:
+        model_file = models[0]
+    elif len(models) != 0:
+        latest_file = max(models, key=os.path.getctime)
+        model_file = latest_file
+    else:
+        print("Unable to load gfpgan model!")
+        return None
+    model = gfpgan_constructor(model_path=model_file, upscale=1, arch='clean', channel_multiplier=2, bg_upsampler=None)
+    loaded_gfpgan_model = model
+    return model
+def send_model_to(model, device):
+    model.gfpgan.to(device)
+    model.face_helper.face_det.to(device)
+    model.face_helper.face_parse.to(device)
+def gfpgan_fix_faces(np_image):
+    model = gfpgann()
+    if model is None:
+        return np_image
+    send_model_to(model, devices.device_gfpgan)
+    np_image_bgr = np_image[:, :, ::-1]
+    cropped_faces, restored_faces, gfpgan_output_bgr = model.enhance(np_image_bgr, has_aligned=False, only_center_face=False, paste_back=True)
+    np_image = gfpgan_output_bgr[:, :, ::-1]
+    model.face_helper.clean_all()
+    if shared.opts.face_restoration_unload:
+        send_model_to(model, devices.cpu)
+    return np_image
+gfpgan_constructor = None
+def setup_model(dirname):
+    global model_path
+    if not os.path.exists(model_path):
+        os.makedirs(model_path)
+    try:
+        from gfpgan import GFPGANer
+        from facexlib import detection, parsing
+        global user_path
+        global have_gfpgan
+        global gfpgan_constructor
+        load_file_from_url_orig = gfpgan.utils.load_file_from_url
+        facex_load_file_from_url_orig = facexlib.detection.load_file_from_url
+        facex_load_file_from_url_orig2 = facexlib.parsing.load_file_from_url
+        def my_load_file_from_url(**kwargs):
+            return load_file_from_url_orig(**dict(kwargs, model_dir=model_path))
+        def facex_load_file_from_url(**kwargs):
+            return facex_load_file_from_url_orig(**dict(kwargs, save_dir=model_path, model_dir=None))
+        def facex_load_file_from_url2(**kwargs):
+            return facex_load_file_from_url_orig2(**dict(kwargs, save_dir=model_path, model_dir=None))
+        gfpgan.utils.load_file_from_url = my_load_file_from_url
+        facexlib.detection.load_file_from_url = facex_load_file_from_url
+        facexlib.parsing.load_file_from_url = facex_load_file_from_url2
+        user_path = dirname
+        have_gfpgan = True
+        gfpgan_constructor = GFPGANer
+        class FaceRestorerGFPGAN(modules.face_restoration.FaceRestoration):
+            def name(self):
+                return "GFPGAN"
+            def restore(self, np_image):
+                return gfpgan_fix_faces(np_image)
+        shared.face_restorers.append(FaceRestorerGFPGAN())
+    except Exception:
+        print("Error setting up GFPGAN:", file=sys.stderr)
+        print(traceback.format_exc(), file=sys.stderr)

modules/hypernetworks/hypernetwork.py ADDED Viewed

	@@ -0,0 +1,314 @@

+import datetime
+import glob
+import html
+import os
+import sys
+import traceback
+import tqdm
+import torch
+from ldm.util import default
+from modules import devices, shared, processing, sd_models
+import torch
+from torch import einsum
+from einops import rearrange, repeat
+import modules.textual_inversion.dataset
+from modules.textual_inversion.learn_schedule import LearnRateScheduler
+class HypernetworkModule(torch.nn.Module):
+    multiplier = 1.0
+    def __init__(self, dim, state_dict=None):
+        super().__init__()
+        self.linear1 = torch.nn.Linear(dim, dim * 2)
+        self.linear2 = torch.nn.Linear(dim * 2, dim)
+        if state_dict is not None:
+            self.load_state_dict(state_dict, strict=True)
+        else:
+            self.linear1.weight.data.normal_(mean=0.0, std=0.01)
+            self.linear1.bias.data.zero_()
+            self.linear2.weight.data.normal_(mean=0.0, std=0.01)
+            self.linear2.bias.data.zero_()
+        self.to(devices.device)
+    def forward(self, x):
+        return x + (self.linear2(self.linear1(x))) * self.multiplier
+def apply_strength(value=None):
+    HypernetworkModule.multiplier = value if value is not None else shared.opts.sd_hypernetwork_strength
+class Hypernetwork:
+    filename = None
+    name = None
+    def __init__(self, name=None, enable_sizes=None):
+        self.filename = None
+        self.name = name
+        self.layers = {}
+        self.step = 0
+        self.sd_checkpoint = None
+        self.sd_checkpoint_name = None
+        for size in enable_sizes or []:
+            self.layers[size] = (HypernetworkModule(size), HypernetworkModule(size))
+    def weights(self):
+        res = []
+        for k, layers in self.layers.items():
+            for layer in layers:
+                layer.train()
+                res += [layer.linear1.weight, layer.linear1.bias, layer.linear2.weight, layer.linear2.bias]
+        return res
+    def save(self, filename):
+        state_dict = {}
+        for k, v in self.layers.items():
+            state_dict[k] = (v[0].state_dict(), v[1].state_dict())
+        state_dict['step'] = self.step
+        state_dict['name'] = self.name
+        state_dict['sd_checkpoint'] = self.sd_checkpoint
+        state_dict['sd_checkpoint_name'] = self.sd_checkpoint_name
+        torch.save(state_dict, filename)
+    def load(self, filename):
+        self.filename = filename
+        if self.name is None:
+            self.name = os.path.splitext(os.path.basename(filename))[0]
+        state_dict = torch.load(filename, map_location='cpu')
+        for size, sd in state_dict.items():
+            if type(size) == int:
+                self.layers[size] = (HypernetworkModule(size, sd[0]), HypernetworkModule(size, sd[1]))
+        self.name = state_dict.get('name', self.name)
+        self.step = state_dict.get('step', 0)
+        self.sd_checkpoint = state_dict.get('sd_checkpoint', None)
+        self.sd_checkpoint_name = state_dict.get('sd_checkpoint_name', None)
+def list_hypernetworks(path):
+    res = {}
+    for filename in glob.iglob(os.path.join(path, '**/*.pt'), recursive=True):
+        name = os.path.splitext(os.path.basename(filename))[0]
+        res[name] = filename
+    return res
+def load_hypernetwork(filename):
+    path = shared.hypernetworks.get(filename, None)
+    if path is not None:
+        print(f"Loading hypernetwork {filename}")
+        try:
+            shared.loaded_hypernetwork = Hypernetwork()
+            shared.loaded_hypernetwork.load(path)
+        except Exception:
+            print(f"Error loading hypernetwork {path}", file=sys.stderr)
+            print(traceback.format_exc(), file=sys.stderr)
+    else:
+        if shared.loaded_hypernetwork is not None:
+            print(f"Unloading hypernetwork")
+        shared.loaded_hypernetwork = None
+def find_closest_hypernetwork_name(search: str):
+    if not search:
+        return None
+    search = search.lower()
+    applicable = [name for name in shared.hypernetworks if search in name.lower()]
+    if not applicable:
+        return None
+    applicable = sorted(applicable, key=lambda name: len(name))
+    return applicable[0]
+def apply_hypernetwork(hypernetwork, context, layer=None):
+    hypernetwork_layers = (hypernetwork.layers if hypernetwork is not None else {}).get(context.shape[2], None)
+    if hypernetwork_layers is None:
+        return context, context
+    if layer is not None:
+        layer.hyper_k = hypernetwork_layers[0]
+        layer.hyper_v = hypernetwork_layers[1]
+    context_k = hypernetwork_layers[0](context)
+    context_v = hypernetwork_layers[1](context)
+    return context_k, context_v
+def attention_CrossAttention_forward(self, x, context=None, mask=None):
+    h = self.heads
+    q = self.to_q(x)
+    context = default(context, x)
+    context_k, context_v = apply_hypernetwork(shared.loaded_hypernetwork, context, self)
+    k = self.to_k(context_k)
+    v = self.to_v(context_v)
+    q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> (b h) n d', h=h), (q, k, v))
+    sim = einsum('b i d, b j d -> b i j', q, k) * self.scale
+    if mask is not None:
+        mask = rearrange(mask, 'b ... -> b (...)')
+        max_neg_value = -torch.finfo(sim.dtype).max
+        mask = repeat(mask, 'b j -> (b h) () j', h=h)
+        sim.masked_fill_(~mask, max_neg_value)
+    # attention, what we cannot get enough of
+    attn = sim.softmax(dim=-1)
+    out = einsum('b i j, b j d -> b i d', attn, v)
+    out = rearrange(out, '(b h) n d -> b n (h d)', h=h)
+    return self.to_out(out)
+def train_hypernetwork(hypernetwork_name, learn_rate, data_root, log_directory, steps, create_image_every, save_hypernetwork_every, template_file, preview_image_prompt):
+    assert hypernetwork_name, 'hypernetwork not selected'
+    path = shared.hypernetworks.get(hypernetwork_name, None)
+    shared.loaded_hypernetwork = Hypernetwork()
+    shared.loaded_hypernetwork.load(path)
+    shared.state.textinfo = "Initializing hypernetwork training..."
+    shared.state.job_count = steps
+    filename = os.path.join(shared.cmd_opts.hypernetwork_dir, f'{hypernetwork_name}.pt')
+    log_directory = os.path.join(log_directory, datetime.datetime.now().strftime("%Y-%m-%d"), hypernetwork_name)
+    unload = shared.opts.unload_models_when_training
+    if save_hypernetwork_every > 0:
+        hypernetwork_dir = os.path.join(log_directory, "hypernetworks")
+        os.makedirs(hypernetwork_dir, exist_ok=True)
+    else:
+        hypernetwork_dir = None
+    if create_image_every > 0:
+        images_dir = os.path.join(log_directory, "images")
+        os.makedirs(images_dir, exist_ok=True)
+    else:
+        images_dir = None
+    shared.state.textinfo = f"Preparing dataset from {html.escape(data_root)}..."
+    with torch.autocast("cuda"):
+        ds = modules.textual_inversion.dataset.PersonalizedBase(data_root=data_root, width=512, height=512, repeats=1, placeholder_token=hypernetwork_name, model=shared.sd_model, device=devices.device, template_file=template_file, include_cond=True)
+    if unload:
+        shared.sd_model.cond_stage_model.to(devices.cpu)
+        shared.sd_model.first_stage_model.to(devices.cpu)
+    hypernetwork = shared.loaded_hypernetwork
+    weights = hypernetwork.weights()
+    for weight in weights:
+        weight.requires_grad = True
+    losses = torch.zeros((32,))
+    last_saved_file = "<none>"
+    last_saved_image = "<none>"
+    ititial_step = hypernetwork.step or 0
+    if ititial_step > steps:
+        return hypernetwork, filename
+    scheduler = LearnRateScheduler(learn_rate, steps, ititial_step)
+    optimizer = torch.optim.AdamW(weights, lr=scheduler.learn_rate)
+    pbar = tqdm.tqdm(enumerate(ds), total=steps - ititial_step)
+    for i, entry in pbar:
+        hypernetwork.step = i + ititial_step
+        scheduler.apply(optimizer, hypernetwork.step)
+        if scheduler.finished:
+            break
+        if shared.state.interrupted:
+            break
+        with torch.autocast("cuda"):
+            cond = entry.cond.to(devices.device)
+            x = entry.latent.to(devices.device)
+            loss = shared.sd_model(x.unsqueeze(0), cond)[0]
+            del x
+            del cond
+            losses[hypernetwork.step % losses.shape[0]] = loss.item()
+            optimizer.zero_grad()
+            loss.backward()
+            optimizer.step()
+        pbar.set_description(f"loss: {losses.mean():.7f}")
+        if hypernetwork.step > 0 and hypernetwork_dir is not None and hypernetwork.step % save_hypernetwork_every == 0:
+            last_saved_file = os.path.join(hypernetwork_dir, f'{hypernetwork_name}-{hypernetwork.step}.pt')
+            hypernetwork.save(last_saved_file)
+        if hypernetwork.step > 0 and images_dir is not None and hypernetwork.step % create_image_every == 0:
+            last_saved_image = os.path.join(images_dir, f'{hypernetwork_name}-{hypernetwork.step}.png')
+            preview_text = entry.cond_text if preview_image_prompt == "" else preview_image_prompt
+            optimizer.zero_grad()
+            shared.sd_model.cond_stage_model.to(devices.device)
+            shared.sd_model.first_stage_model.to(devices.device)
+            p = processing.StableDiffusionProcessingTxt2Img(
+                sd_model=shared.sd_model,
+                prompt=preview_text,
+                steps=20,
+                do_not_save_grid=True,
+                do_not_save_samples=True,
+            )
+            processed = processing.process_images(p)
+            image = processed.images[0] if len(processed.images)>0 else None
+            if unload:
+                shared.sd_model.cond_stage_model.to(devices.cpu)
+                shared.sd_model.first_stage_model.to(devices.cpu)
+            if image is not None:
+                shared.state.current_image = image
+                image.save(last_saved_image)
+                last_saved_image += f", prompt: {preview_text}"
+        shared.state.job_no = hypernetwork.step
+        shared.state.textinfo = f"""
+<p>
+Loss: {losses.mean():.7f}<br/>
+Step: {hypernetwork.step}<br/>
+Last prompt: {html.escape(entry.cond_text)}<br/>
+Last saved embedding: {html.escape(last_saved_file)}<br/>
+Last saved image: {html.escape(last_saved_image)}<br/>
+</p>
+"""
+    checkpoint = sd_models.select_checkpoint()
+    hypernetwork.sd_checkpoint = checkpoint.hash
+    hypernetwork.sd_checkpoint_name = checkpoint.model_name
+    hypernetwork.save(filename)
+    return hypernetwork, filename

modules/hypernetworks/ui.py ADDED Viewed

	@@ -0,0 +1,47 @@

+import html
+import os
+import gradio as gr
+import modules.textual_inversion.textual_inversion
+import modules.textual_inversion.preprocess
+from modules import sd_hijack, shared, devices
+from modules.hypernetworks import hypernetwork
+def create_hypernetwork(name, enable_sizes):
+    fn = os.path.join(shared.cmd_opts.hypernetwork_dir, f"{name}.pt")
+    assert not os.path.exists(fn), f"file {fn} already exists"
+    hypernet = modules.hypernetworks.hypernetwork.Hypernetwork(name=name, enable_sizes=[int(x) for x in enable_sizes])
+    hypernet.save(fn)
+    shared.reload_hypernetworks()
+    return gr.Dropdown.update(choices=sorted([x for x in shared.hypernetworks.keys()])), f"Created: {fn}", ""
+def train_hypernetwork(*args):
+    initial_hypernetwork = shared.loaded_hypernetwork
+    assert not shared.cmd_opts.lowvram, 'Training models with lowvram is not possible'
+    try:
+        sd_hijack.undo_optimizations()
+        hypernetwork, filename = modules.hypernetworks.hypernetwork.train_hypernetwork(*args)
+        res = f"""
+Training {'interrupted' if shared.state.interrupted else 'finished'} at {hypernetwork.step} steps.
+Hypernetwork saved to {html.escape(filename)}
+"""
+        return res, ""
+    except Exception:
+        raise
+    finally:
+        shared.loaded_hypernetwork = initial_hypernetwork
+        shared.sd_model.cond_stage_model.to(devices.device)
+        shared.sd_model.first_stage_model.to(devices.device)
+        sd_hijack.apply_optimizations()

modules/images.py ADDED Viewed

	@@ -0,0 +1,465 @@

+import datetime
+import math
+import os
+from collections import namedtuple
+import re
+import numpy as np
+import piexif
+import piexif.helper
+from PIL import Image, ImageFont, ImageDraw, PngImagePlugin
+from fonts.ttf import Roboto
+import string
+from modules import sd_samplers, shared
+from modules.shared import opts, cmd_opts
+LANCZOS = (Image.Resampling.LANCZOS if hasattr(Image, 'Resampling') else Image.LANCZOS)
+def image_grid(imgs, batch_size=1, rows=None):
+    if rows is None:
+        if opts.n_rows > 0:
+            rows = opts.n_rows
+        elif opts.n_rows == 0:
+            rows = batch_size
+        else:
+            rows = math.sqrt(len(imgs))
+            rows = round(rows)
+    cols = math.ceil(len(imgs) / rows)
+    w, h = imgs[0].size
+    grid = Image.new('RGB', size=(cols * w, rows * h), color='black')
+    for i, img in enumerate(imgs):
+        grid.paste(img, box=(i % cols * w, i // cols * h))
+    return grid
+Grid = namedtuple("Grid", ["tiles", "tile_w", "tile_h", "image_w", "image_h", "overlap"])
+def split_grid(image, tile_w=512, tile_h=512, overlap=64):
+    w = image.width
+    h = image.height
+    non_overlap_width = tile_w - overlap
+    non_overlap_height = tile_h - overlap
+    cols = math.ceil((w - overlap) / non_overlap_width)
+    rows = math.ceil((h - overlap) / non_overlap_height)
+    dx = (w - tile_w) / (cols - 1) if cols > 1 else 0
+    dy = (h - tile_h) / (rows - 1) if rows > 1 else 0
+    grid = Grid([], tile_w, tile_h, w, h, overlap)
+    for row in range(rows):
+        row_images = []
+        y = int(row * dy)
+        if y + tile_h >= h:
+            y = h - tile_h
+        for col in range(cols):
+            x = int(col * dx)
+            if x + tile_w >= w:
+                x = w - tile_w
+            tile = image.crop((x, y, x + tile_w, y + tile_h))
+            row_images.append([x, tile_w, tile])
+        grid.tiles.append([y, tile_h, row_images])
+    return grid
+def combine_grid(grid):
+    def make_mask_image(r):
+        r = r * 255 / grid.overlap
+        r = r.astype(np.uint8)
+        return Image.fromarray(r, 'L')
+    mask_w = make_mask_image(np.arange(grid.overlap, dtype=np.float32).reshape((1, grid.overlap)).repeat(grid.tile_h, axis=0))
+    mask_h = make_mask_image(np.arange(grid.overlap, dtype=np.float32).reshape((grid.overlap, 1)).repeat(grid.image_w, axis=1))
+    combined_image = Image.new("RGB", (grid.image_w, grid.image_h))
+    for y, h, row in grid.tiles:
+        combined_row = Image.new("RGB", (grid.image_w, h))
+        for x, w, tile in row:
+            if x == 0:
+                combined_row.paste(tile, (0, 0))
+                continue
+            combined_row.paste(tile.crop((0, 0, grid.overlap, h)), (x, 0), mask=mask_w)
+            combined_row.paste(tile.crop((grid.overlap, 0, w, h)), (x + grid.overlap, 0))
+        if y == 0:
+            combined_image.paste(combined_row, (0, 0))
+            continue
+        combined_image.paste(combined_row.crop((0, 0, combined_row.width, grid.overlap)), (0, y), mask=mask_h)
+        combined_image.paste(combined_row.crop((0, grid.overlap, combined_row.width, h)), (0, y + grid.overlap))
+    return combined_image
+class GridAnnotation:
+    def __init__(self, text='', is_active=True):
+        self.text = text
+        self.is_active = is_active
+        self.size = None
+def draw_grid_annotations(im, width, height, hor_texts, ver_texts):
+    def wrap(drawing, text, font, line_length):
+        lines = ['']
+        for word in text.split():
+            line = f'{lines[-1]} {word}'.strip()
+            if drawing.textlength(line, font=font) <= line_length:
+                lines[-1] = line
+            else:
+                lines.append(word)
+        return lines
+    def draw_texts(drawing, draw_x, draw_y, lines):
+        for i, line in enumerate(lines):
+            drawing.multiline_text((draw_x, draw_y + line.size[1] / 2), line.text, font=fnt, fill=color_active if line.is_active else color_inactive, anchor="mm", align="center")
+            if not line.is_active:
+                drawing.line((draw_x - line.size[0] // 2, draw_y + line.size[1] // 2, draw_x + line.size[0] // 2, draw_y + line.size[1] // 2), fill=color_inactive, width=4)
+            draw_y += line.size[1] + line_spacing
+    fontsize = (width + height) // 25
+    line_spacing = fontsize // 2
+    try:
+        fnt = ImageFont.truetype(opts.font or Roboto, fontsize)
+    except Exception:
+        fnt = ImageFont.truetype(Roboto, fontsize)
+    color_active = (0, 0, 0)
+    color_inactive = (153, 153, 153)
+    pad_left = 0 if sum([sum([len(line.text) for line in lines]) for lines in ver_texts]) == 0 else width * 3 // 4
+    cols = im.width // width
+    rows = im.height // height
+    assert cols == len(hor_texts), f'bad number of horizontal texts: {len(hor_texts)}; must be {cols}'
+    assert rows == len(ver_texts), f'bad number of vertical texts: {len(ver_texts)}; must be {rows}'
+    calc_img = Image.new("RGB", (1, 1), "white")
+    calc_d = ImageDraw.Draw(calc_img)
+    for texts, allowed_width in zip(hor_texts + ver_texts, [width] * len(hor_texts) + [pad_left] * len(ver_texts)):
+        items = [] + texts
+        texts.clear()
+        for line in items:
+            wrapped = wrap(calc_d, line.text, fnt, allowed_width)
+            texts += [GridAnnotation(x, line.is_active) for x in wrapped]
+        for line in texts:
+            bbox = calc_d.multiline_textbbox((0, 0), line.text, font=fnt)
+            line.size = (bbox[2] - bbox[0], bbox[3] - bbox[1])
+    hor_text_heights = [sum([line.size[1] + line_spacing for line in lines]) - line_spacing for lines in hor_texts]
+    ver_text_heights = [sum([line.size[1] + line_spacing for line in lines]) - line_spacing * len(lines) for lines in
+                        ver_texts]
+    pad_top = max(hor_text_heights) + line_spacing * 2
+    result = Image.new("RGB", (im.width + pad_left, im.height + pad_top), "white")
+    result.paste(im, (pad_left, pad_top))
+    d = ImageDraw.Draw(result)
+    for col in range(cols):
+        x = pad_left + width * col + width / 2
+        y = pad_top / 2 - hor_text_heights[col] / 2
+        draw_texts(d, x, y, hor_texts[col])
+    for row in range(rows):
+        x = pad_left / 2
+        y = pad_top + height * row + height / 2 - ver_text_heights[row] / 2
+        draw_texts(d, x, y, ver_texts[row])
+    return result
+def draw_prompt_matrix(im, width, height, all_prompts):
+    prompts = all_prompts[1:]
+    boundary = math.ceil(len(prompts) / 2)
+    prompts_horiz = prompts[:boundary]
+    prompts_vert = prompts[boundary:]
+    hor_texts = [[GridAnnotation(x, is_active=pos & (1 << i) != 0) for i, x in enumerate(prompts_horiz)] for pos in range(1 << len(prompts_horiz))]
+    ver_texts = [[GridAnnotation(x, is_active=pos & (1 << i) != 0) for i, x in enumerate(prompts_vert)] for pos in range(1 << len(prompts_vert))]
+    return draw_grid_annotations(im, width, height, hor_texts, ver_texts)
+def resize_image(resize_mode, im, width, height):
+    def resize(im, w, h):
+        if opts.upscaler_for_img2img is None or opts.upscaler_for_img2img == "None" or im.mode == 'L':
+            return im.resize((w, h), resample=LANCZOS)
+        scale = max(w / im.width, h / im.height)
+        if scale > 1.0:
+            upscalers = [x for x in shared.sd_upscalers if x.name == opts.upscaler_for_img2img]
+            assert len(upscalers) > 0, f"could not find upscaler named {opts.upscaler_for_img2img}"
+            upscaler = upscalers[0]
+            im = upscaler.scaler.upscale(im, scale, upscaler.data_path)
+        if im.width != w or im.height != h:
+            im = im.resize((w, h), resample=LANCZOS)
+        return im
+    if resize_mode == 0:
+        res = resize(im, width, height)
+    elif resize_mode == 1:
+        ratio = width / height
+        src_ratio = im.width / im.height
+        src_w = width if ratio > src_ratio else im.width * height // im.height
+        src_h = height if ratio <= src_ratio else im.height * width // im.width
+        resized = resize(im, src_w, src_h)
+        res = Image.new("RGB", (width, height))
+        res.paste(resized, box=(width // 2 - src_w // 2, height // 2 - src_h // 2))
+    else:
+        ratio = width / height
+        src_ratio = im.width / im.height
+        src_w = width if ratio < src_ratio else im.width * height // im.height
+        src_h = height if ratio >= src_ratio else im.height * width // im.width
+        resized = resize(im, src_w, src_h)
+        res = Image.new("RGB", (width, height))
+        res.paste(resized, box=(width // 2 - src_w // 2, height // 2 - src_h // 2))
+        if ratio < src_ratio:
+            fill_height = height // 2 - src_h // 2
+            res.paste(resized.resize((width, fill_height), box=(0, 0, width, 0)), box=(0, 0))
+            res.paste(resized.resize((width, fill_height), box=(0, resized.height, width, resized.height)), box=(0, fill_height + src_h))
+        elif ratio > src_ratio:
+            fill_width = width // 2 - src_w // 2
+            res.paste(resized.resize((fill_width, height), box=(0, 0, 0, height)), box=(0, 0))
+            res.paste(resized.resize((fill_width, height), box=(resized.width, 0, resized.width, height)), box=(fill_width + src_w, 0))
+    return res
+invalid_filename_chars = '<>:"/\\|?*\n'
+invalid_filename_prefix = ' '
+invalid_filename_postfix = ' .'
+re_nonletters = re.compile(r'[\s' + string.punctuation + ']+')
+max_filename_part_length = 128
+def sanitize_filename_part(text, replace_spaces=True):
+    if replace_spaces:
+        text = text.replace(' ', '_')
+    text = text.translate({ord(x): '_' for x in invalid_filename_chars})
+    text = text.lstrip(invalid_filename_prefix)[:max_filename_part_length]
+    text = text.rstrip(invalid_filename_postfix)
+    return text
+def apply_filename_pattern(x, p, seed, prompt):
+    max_prompt_words = opts.directories_max_prompt_words
+    if seed is not None:
+        x = x.replace("[seed]", str(seed))
+    if p is not None:
+        x = x.replace("[steps]", str(p.steps))
+        x = x.replace("[cfg]", str(p.cfg_scale))
+        x = x.replace("[width]", str(p.width))
+        x = x.replace("[height]", str(p.height))
+        x = x.replace("[styles]", sanitize_filename_part(", ".join([x for x in p.styles if not x == "None"]) or "None", replace_spaces=False))
+        x = x.replace("[sampler]", sanitize_filename_part(sd_samplers.samplers[p.sampler_index].name, replace_spaces=False))
+    x = x.replace("[model_hash]", getattr(p, "sd_model_hash", shared.sd_model.sd_model_hash))
+    x = x.replace("[date]", datetime.date.today().isoformat())
+    x = x.replace("[datetime]", datetime.datetime.now().strftime("%Y%m%d%H%M%S"))
+    x = x.replace("[job_timestamp]", getattr(p, "job_timestamp", shared.state.job_timestamp))
+    # Apply [prompt] at last. Because it may contain any replacement word.^M
+    if prompt is not None:
+        x = x.replace("[prompt]", sanitize_filename_part(prompt))
+        if "[prompt_no_styles]" in x:
+            prompt_no_style = prompt
+            for style in shared.prompt_styles.get_style_prompts(p.styles):
+                if len(style) > 0:
+                    style_parts = [y for y in style.split("{prompt}")]
+                    for part in style_parts:
+                        prompt_no_style = prompt_no_style.replace(part, "").replace(", ,", ",").strip().strip(',')
+            prompt_no_style = prompt_no_style.replace(style, "").strip().strip(',').strip()
+            x = x.replace("[prompt_no_styles]", sanitize_filename_part(prompt_no_style, replace_spaces=False))
+        x = x.replace("[prompt_spaces]", sanitize_filename_part(prompt, replace_spaces=False))
+        if "[prompt_words]" in x:
+            words = [x for x in re_nonletters.split(prompt or "") if len(x) > 0]
+            if len(words) == 0:
+                words = ["empty"]
+            x = x.replace("[prompt_words]", sanitize_filename_part(" ".join(words[0:max_prompt_words]), replace_spaces=False))
+    if cmd_opts.hide_ui_dir_config:
+        x = re.sub(r'^[\\/]+|\.{2,}[\\/]+|[\\/]+\.{2,}', '', x)
+    return x
+def get_next_sequence_number(path, basename):
+    """
+    Determines and returns the next sequence number to use when saving an image in the specified directory.
+    The sequence starts at 0.
+    """
+    result = -1
+    if basename != '':
+        basename = basename + "-"
+    prefix_length = len(basename)
+    for p in os.listdir(path):
+        if p.startswith(basename):
+            l = os.path.splitext(p[prefix_length:])[0].split('-')  # splits the filename (removing the basename first if one is defined, so the sequence number is always the first element)
+            try:
+                result = max(int(l[0]), result)
+            except ValueError:
+                pass
+    return result + 1
+def save_image(image, path, basename, seed=None, prompt=None, extension='png', info=None, short_filename=False, no_prompt=False, grid=False, pnginfo_section_name='parameters', p=None, existing_info=None, forced_filename=None, suffix="", save_to_dirs=None):
+    '''Save an image.
+    Args:
+        image (`PIL.Image`):
+            The image to be saved.
+        path (`str`):
+            The directory to save the image. Note, the option `save_to_dirs` will make the image to be saved into a sub directory.
+        basename (`str`):
+            The base filename which will be applied to `filename pattern`.
+        seed, prompt, short_filename,
+        extension (`str`):
+            Image file extension, default is `png`.
+        pngsectionname (`str`):
+            Specify the name of the section which `info` will be saved in.
+        info (`str` or `PngImagePlugin.iTXt`):
+            PNG info chunks.
+        existing_info (`dict`):
+            Additional PNG info. `existing_info == {pngsectionname: info, ...}`
+        no_prompt:
+            TODO I don't know its meaning.
+        p (`StableDiffusionProcessing`)
+        forced_filename (`str`):
+            If specified, `basename` and filename pattern will be ignored.
+        save_to_dirs (bool):
+            If true, the image will be saved into a subdirectory of `path`.
+    Returns: (fullfn, txt_fullfn)
+        fullfn (`str`):
+            The full path of the saved imaged.
+        txt_fullfn (`str` or None):
+            If a text file is saved for this image, this will be its full path. Otherwise None.
+    '''
+    if short_filename or prompt is None or seed is None:
+        file_decoration = ""
+    elif opts.save_to_dirs:
+        file_decoration = opts.samples_filename_pattern or "[seed]"
+    else:
+        file_decoration = opts.samples_filename_pattern or "[seed]-[prompt_spaces]"
+    if file_decoration != "":
+        file_decoration = "-" + file_decoration.lower()
+    file_decoration = apply_filename_pattern(file_decoration, p, seed, prompt) + suffix
+    if extension == 'png' and opts.enable_pnginfo and info is not None:
+        pnginfo = PngImagePlugin.PngInfo()
+        if existing_info is not None:
+            for k, v in existing_info.items():
+                pnginfo.add_text(k, str(v))
+        pnginfo.add_text(pnginfo_section_name, info)
+    else:
+        pnginfo = None
+    if save_to_dirs is None:
+        save_to_dirs = (grid and opts.grid_save_to_dirs) or (not grid and opts.save_to_dirs and not no_prompt)
+    if save_to_dirs:
+        dirname = apply_filename_pattern(opts.directories_filename_pattern or "[prompt_words]", p, seed, prompt).strip('\\ /')
+        path = os.path.join(path, dirname)
+    os.makedirs(path, exist_ok=True)
+    if forced_filename is None:
+        basecount = get_next_sequence_number(path, basename)
+        fullfn = "a.png"
+        fullfn_without_extension = "a"
+        for i in range(500):
+            fn = f"{basecount + i:05}" if basename == '' else f"{basename}-{basecount + i:04}"
+            fullfn = os.path.join(path, f"{fn}{file_decoration}.{extension}")
+            fullfn_without_extension = os.path.join(path, f"{fn}{file_decoration}")
+            if not os.path.exists(fullfn):
+                break
+    else:
+        fullfn = os.path.join(path, f"{forced_filename}.{extension}")
+        fullfn_without_extension = os.path.join(path, forced_filename)
+    def exif_bytes():
+        return piexif.dump({
+            "Exif": {
+                piexif.ExifIFD.UserComment: piexif.helper.UserComment.dump(info or "", encoding="unicode")
+            },
+        })
+    if extension.lower() in ("jpg", "jpeg", "webp"):
+        image.save(fullfn, quality=opts.jpeg_quality)
+        if opts.enable_pnginfo and info is not None:
+            piexif.insert(exif_bytes(), fullfn)
+    else:
+        image.save(fullfn, quality=opts.jpeg_quality, pnginfo=pnginfo)
+    target_side_length = 4000
+    oversize = image.width > target_side_length or image.height > target_side_length
+    if opts.export_for_4chan and (oversize or os.stat(fullfn).st_size > 4 * 1024 * 1024):
+        ratio = image.width / image.height
+        if oversize and ratio > 1:
+            image = image.resize((target_side_length, image.height * target_side_length // image.width), LANCZOS)
+        elif oversize:
+            image = image.resize((image.width * target_side_length // image.height, target_side_length), LANCZOS)
+        image.save(fullfn_without_extension + ".jpg", quality=opts.jpeg_quality)
+        if opts.enable_pnginfo and info is not None:
+            piexif.insert(exif_bytes(), fullfn_without_extension + ".jpg")
+    if opts.save_txt and info is not None:
+        txt_fullfn = f"{fullfn_without_extension}.txt"
+        with open(txt_fullfn, "w", encoding="utf8") as file:
+            file.write(info + "\n")
+    else:
+        txt_fullfn = None
+    return fullfn, txt_fullfn

modules/img2img.py ADDED Viewed

	@@ -0,0 +1,137 @@

+import math
+import os
+import sys
+import traceback
+import numpy as np
+from PIL import Image, ImageOps, ImageChops
+from modules import devices
+from modules.processing import Processed, StableDiffusionProcessingImg2Img, process_images
+from modules.shared import opts, state
+import modules.shared as shared
+import modules.processing as processing
+from modules.ui import plaintext_to_html
+import modules.images as images
+import modules.scripts
+def process_batch(p, input_dir, output_dir, args):
+    processing.fix_seed(p)
+    images = [file for file in [os.path.join(input_dir, x) for x in os.listdir(input_dir)] if os.path.isfile(file)]
+    print(f"Will process {len(images)} images, creating {p.n_iter * p.batch_size} new images for each.")
+    save_normally = output_dir == ''
+    p.do_not_save_grid = True
+    p.do_not_save_samples = not save_normally
+    state.job_count = len(images) * p.n_iter
+    for i, image in enumerate(images):
+        state.job = f"{i+1} out of {len(images)}"
+        if state.skipped:
+            state.skipped = False
+        if state.interrupted:
+            break
+        img = Image.open(image)
+        p.init_images = [img] * p.batch_size
+        proc = modules.scripts.scripts_img2img.run(p, *args)
+        if proc is None:
+            proc = process_images(p)
+        for n, processed_image in enumerate(proc.images):
+            filename = os.path.basename(image)
+            if n > 0:
+                left, right = os.path.splitext(filename)
+                filename = f"{left}-{n}{right}"
+            if not save_normally:
+                processed_image.save(os.path.join(output_dir, filename))
+def img2img(mode: int, prompt: str, negative_prompt: str, prompt_style: str, prompt_style2: str, init_img, init_img_with_mask, init_img_inpaint, init_mask_inpaint, mask_mode, steps: int, sampler_index: int, mask_blur: int, inpainting_fill: int, restore_faces: bool, tiling: bool, n_iter: int, batch_size: int, cfg_scale: float, denoising_strength: float, seed: int, subseed: int, subseed_strength: float, seed_resize_from_h: int, seed_resize_from_w: int, seed_enable_extras: bool, height: int, width: int, resize_mode: int, inpaint_full_res: bool, inpaint_full_res_padding: int, inpainting_mask_invert: int, img2img_batch_input_dir: str, img2img_batch_output_dir: str, *args):
+    is_inpaint = mode == 1
+    is_batch = mode == 2
+    if is_inpaint:
+        if mask_mode == 0:
+            image = init_img_with_mask['image']
+            mask = init_img_with_mask['mask']
+            alpha_mask = ImageOps.invert(image.split()[-1]).convert('L').point(lambda x: 255 if x > 0 else 0, mode='1')
+            mask = ImageChops.lighter(alpha_mask, mask.convert('L')).convert('L')
+            image = image.convert('RGB')
+        else:
+            image = init_img_inpaint
+            mask = init_mask_inpaint
+    else:
+        image = init_img
+        mask = None
+    assert 0. <= denoising_strength <= 1., 'can only work with strength in [0.0, 1.0]'
+    p = StableDiffusionProcessingImg2Img(
+        sd_model=shared.sd_model,
+        outpath_samples=opts.outdir_samples or opts.outdir_img2img_samples,
+        outpath_grids=opts.outdir_grids or opts.outdir_img2img_grids,
+        prompt=prompt,
+        negative_prompt=negative_prompt,
+        styles=[prompt_style, prompt_style2],
+        seed=seed,
+        subseed=subseed,
+        subseed_strength=subseed_strength,
+        seed_resize_from_h=seed_resize_from_h,
+        seed_resize_from_w=seed_resize_from_w,
+        seed_enable_extras=seed_enable_extras,
+        sampler_index=sampler_index,
+        batch_size=batch_size,
+        n_iter=n_iter,
+        steps=steps,
+        cfg_scale=cfg_scale,
+        width=width,
+        height=height,
+        restore_faces=restore_faces,
+        tiling=tiling,
+        init_images=[image],
+        mask=mask,
+        mask_blur=mask_blur,
+        inpainting_fill=inpainting_fill,
+        resize_mode=resize_mode,
+        denoising_strength=denoising_strength,
+        inpaint_full_res=inpaint_full_res,
+        inpaint_full_res_padding=inpaint_full_res_padding,
+        inpainting_mask_invert=inpainting_mask_invert,
+    )
+    if shared.cmd_opts.enable_console_prompts:
+        print(f"\nimg2img: {prompt}", file=shared.progress_print_out)
+    p.extra_generation_params["Mask blur"] = mask_blur
+    if is_batch:
+        assert not shared.cmd_opts.hide_ui_dir_config, "Launched with --hide-ui-dir-config, batch img2img disabled"
+        process_batch(p, img2img_batch_input_dir, img2img_batch_output_dir, args)
+        processed = Processed(p, [], p.seed, "")
+    else:
+        processed = modules.scripts.scripts_img2img.run(p, *args)
+        if processed is None:
+            processed = process_images(p)
+    shared.total_tqdm.clear()
+    generation_info_js = processed.js()
+    if opts.samples_log_stdout:
+        print(generation_info_js)
+    if opts.do_not_show_images:
+        processed.images = []
+    return processed.images, generation_info_js, plaintext_to_html(processed.info)

modules/interrogate.py ADDED Viewed

	@@ -0,0 +1,171 @@

+import contextlib
+import os
+import sys
+import traceback
+from collections import namedtuple
+import re
+import torch
+from torchvision import transforms
+from torchvision.transforms.functional import InterpolationMode
+import modules.shared as shared
+from modules import devices, paths, lowvram
+blip_image_eval_size = 384
+blip_model_url = 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_caption_capfilt_large.pth'
+clip_model_name = 'ViT-L/14'
+Category = namedtuple("Category", ["name", "topn", "items"])
+re_topn = re.compile(r"\.top(\d+)\.")
+class InterrogateModels:
+    blip_model = None
+    clip_model = None
+    clip_preprocess = None
+    categories = None
+    dtype = None
+    def __init__(self, content_dir):
+        self.categories = []
+        if os.path.exists(content_dir):
+            for filename in os.listdir(content_dir):
+                m = re_topn.search(filename)
+                topn = 1 if m is None else int(m.group(1))
+                with open(os.path.join(content_dir, filename), "r", encoding="utf8") as file:
+                    lines = [x.strip() for x in file.readlines()]
+                self.categories.append(Category(name=filename, topn=topn, items=lines))
+    def load_blip_model(self):
+        import models.blip
+        blip_model = models.blip.blip_decoder(pretrained=blip_model_url, image_size=blip_image_eval_size, vit='base', med_config=os.path.join(paths.paths["BLIP"], "configs", "med_config.json"))
+        blip_model.eval()
+        return blip_model
+    def load_clip_model(self):
+        import clip
+        model, preprocess = clip.load(clip_model_name)
+        model.eval()
+        model = model.to(shared.device)
+        return model, preprocess
+    def load(self):
+        if self.blip_model is None:
+            self.blip_model = self.load_blip_model()
+            if not shared.cmd_opts.no_half:
+                self.blip_model = self.blip_model.half()
+        self.blip_model = self.blip_model.to(shared.device)
+        if self.clip_model is None:
+            self.clip_model, self.clip_preprocess = self.load_clip_model()
+            if not shared.cmd_opts.no_half:
+                self.clip_model = self.clip_model.half()
+        self.clip_model = self.clip_model.to(shared.device)
+        self.dtype = next(self.clip_model.parameters()).dtype
+    def send_clip_to_ram(self):
+        if not shared.opts.interrogate_keep_models_in_memory:
+            if self.clip_model is not None:
+                self.clip_model = self.clip_model.to(devices.cpu)
+    def send_blip_to_ram(self):
+        if not shared.opts.interrogate_keep_models_in_memory:
+            if self.blip_model is not None:
+                self.blip_model = self.blip_model.to(devices.cpu)
+    def unload(self):
+        self.send_clip_to_ram()
+        self.send_blip_to_ram()
+        devices.torch_gc()
+    def rank(self, image_features, text_array, top_count=1):
+        import clip
+        if shared.opts.interrogate_clip_dict_limit != 0:
+            text_array = text_array[0:int(shared.opts.interrogate_clip_dict_limit)]
+        top_count = min(top_count, len(text_array))
+        text_tokens = clip.tokenize([text for text in text_array], truncate=True).to(shared.device)
+        text_features = self.clip_model.encode_text(text_tokens).type(self.dtype)
+        text_features /= text_features.norm(dim=-1, keepdim=True)
+        similarity = torch.zeros((1, len(text_array))).to(shared.device)
+        for i in range(image_features.shape[0]):
+            similarity += (100.0 * image_features[i].unsqueeze(0) @ text_features.T).softmax(dim=-1)
+        similarity /= image_features.shape[0]
+        top_probs, top_labels = similarity.cpu().topk(top_count, dim=-1)
+        return [(text_array[top_labels[0][i].numpy()], (top_probs[0][i].numpy()*100)) for i in range(top_count)]
+    def generate_caption(self, pil_image):
+        gpu_image = transforms.Compose([
+            transforms.Resize((blip_image_eval_size, blip_image_eval_size), interpolation=InterpolationMode.BICUBIC),
+            transforms.ToTensor(),
+            transforms.Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711))
+        ])(pil_image).unsqueeze(0).type(self.dtype).to(shared.device)
+        with torch.no_grad():
+            caption = self.blip_model.generate(gpu_image, sample=False, num_beams=shared.opts.interrogate_clip_num_beams, min_length=shared.opts.interrogate_clip_min_length, max_length=shared.opts.interrogate_clip_max_length)
+        return caption[0]
+    def interrogate(self, pil_image, include_ranks=False):
+        res = None
+        try:
+            if shared.cmd_opts.lowvram or shared.cmd_opts.medvram:
+                lowvram.send_everything_to_cpu()
+                devices.torch_gc()
+            self.load()
+            caption = self.generate_caption(pil_image)
+            self.send_blip_to_ram()
+            devices.torch_gc()
+            res = caption
+            clip_image = self.clip_preprocess(pil_image).unsqueeze(0).type(self.dtype).to(shared.device)
+            precision_scope = torch.autocast if shared.cmd_opts.precision == "autocast" else contextlib.nullcontext
+            with torch.no_grad(), precision_scope("cuda"):
+                image_features = self.clip_model.encode_image(clip_image).type(self.dtype)
+                image_features /= image_features.norm(dim=-1, keepdim=True)
+                if shared.opts.interrogate_use_builtin_artists:
+                    artist = self.rank(image_features, ["by " + artist.name for artist in shared.artist_db.artists])[0]
+                    res += ", " + artist[0]
+                for name, topn, items in self.categories:
+                    matches = self.rank(image_features, items, top_count=topn)
+                    for match, score in matches:
+                        if include_ranks:
+                            res += ", " + match
+                        else:
+                            res += f", ({match}:{score})"
+        except Exception:
+            print(f"Error interrogating", file=sys.stderr)
+            print(traceback.format_exc(), file=sys.stderr)
+            res += "<error>"
+        self.unload()
+        return res

modules/ldsr_model.py ADDED Viewed

	@@ -0,0 +1,54 @@

+import os
+import sys
+import traceback
+from basicsr.utils.download_util import load_file_from_url
+from modules.upscaler import Upscaler, UpscalerData
+from modules.ldsr_model_arch import LDSR
+from modules import shared
+class UpscalerLDSR(Upscaler):
+    def __init__(self, user_path):
+        self.name = "LDSR"
+        self.user_path = user_path
+        self.model_url = "https://heibox.uni-heidelberg.de/f/578df07c8fc04ffbadf3/?dl=1"
+        self.yaml_url = "https://heibox.uni-heidelberg.de/f/31a76b13ea27482981b4/?dl=1"
+        super().__init__()
+        scaler_data = UpscalerData("LDSR", None, self)
+        self.scalers = [scaler_data]
+    def load_model(self, path: str):
+        # Remove incorrect project.yaml file if too big
+        yaml_path = os.path.join(self.model_path, "project.yaml")
+        old_model_path = os.path.join(self.model_path, "model.pth")
+        new_model_path = os.path.join(self.model_path, "model.ckpt")
+        if os.path.exists(yaml_path):
+            statinfo = os.stat(yaml_path)
+            if statinfo.st_size >= 10485760:
+                print("Removing invalid LDSR YAML file.")
+                os.remove(yaml_path)
+        if os.path.exists(old_model_path):
+            print("Renaming model from model.pth to model.ckpt")
+            os.rename(old_model_path, new_model_path)
+        model = load_file_from_url(url=self.model_url, model_dir=self.model_path,
+                                   file_name="model.ckpt", progress=True)
+        yaml = load_file_from_url(url=self.yaml_url, model_dir=self.model_path,
+                                  file_name="project.yaml", progress=True)
+        try:
+            return LDSR(model, yaml)
+        except Exception:
+            print("Error importing LDSR:", file=sys.stderr)
+            print(traceback.format_exc(), file=sys.stderr)
+        return None
+    def do_upscale(self, img, path):
+        ldsr = self.load_model(path)
+        if ldsr is None:
+            print("NO LDSR!")
+            return img
+        ddim_steps = shared.opts.ldsr_steps
+        return ldsr.super_resolution(img, ddim_steps, self.scale)

modules/ldsr_model_arch.py ADDED Viewed

	@@ -0,0 +1,222 @@

+import gc
+import time
+import warnings
+import numpy as np
+import torch
+import torchvision
+from PIL import Image
+from einops import rearrange, repeat
+from omegaconf import OmegaConf
+from ldm.models.diffusion.ddim import DDIMSampler
+from ldm.util import instantiate_from_config, ismap
+warnings.filterwarnings("ignore", category=UserWarning)
+# Create LDSR Class
+class LDSR:
+    def load_model_from_config(self, half_attention):
+        print(f"Loading model from {self.modelPath}")
+        pl_sd = torch.load(self.modelPath, map_location="cpu")
+        sd = pl_sd["state_dict"]
+        config = OmegaConf.load(self.yamlPath)
+        model = instantiate_from_config(config.model)
+        model.load_state_dict(sd, strict=False)
+        model.cuda()
+        if half_attention:
+            model = model.half()
+        model.eval()
+        return {"model": model}
+    def __init__(self, model_path, yaml_path):
+        self.modelPath = model_path
+        self.yamlPath = yaml_path
+    @staticmethod
+    def run(model, selected_path, custom_steps, eta):
+        example = get_cond(selected_path)
+        n_runs = 1
+        guider = None
+        ckwargs = None
+        ddim_use_x0_pred = False
+        temperature = 1.
+        eta = eta
+        custom_shape = None
+        height, width = example["image"].shape[1:3]
+        split_input = height >= 128 and width >= 128
+        if split_input:
+            ks = 128
+            stride = 64
+            vqf = 4  #
+            model.split_input_params = {"ks": (ks, ks), "stride": (stride, stride),
+                                        "vqf": vqf,
+                                        "patch_distributed_vq": True,
+                                        "tie_braker": False,
+                                        "clip_max_weight": 0.5,
+                                        "clip_min_weight": 0.01,
+                                        "clip_max_tie_weight": 0.5,
+                                        "clip_min_tie_weight": 0.01}
+        else:
+            if hasattr(model, "split_input_params"):
+                delattr(model, "split_input_params")
+        x_t = None
+        logs = None
+        for n in range(n_runs):
+            if custom_shape is not None:
+                x_t = torch.randn(1, custom_shape[1], custom_shape[2], custom_shape[3]).to(model.device)
+                x_t = repeat(x_t, '1 c h w -> b c h w', b=custom_shape[0])
+            logs = make_convolutional_sample(example, model,
+                                             custom_steps=custom_steps,
+                                             eta=eta, quantize_x0=False,
+                                             custom_shape=custom_shape,
+                                             temperature=temperature, noise_dropout=0.,
+                                             corrector=guider, corrector_kwargs=ckwargs, x_T=x_t,
+                                             ddim_use_x0_pred=ddim_use_x0_pred
+                                             )
+        return logs
+    def super_resolution(self, image, steps=100, target_scale=2, half_attention=False):
+        model = self.load_model_from_config(half_attention)
+        # Run settings
+        diffusion_steps = int(steps)
+        eta = 1.0
+        down_sample_method = 'Lanczos'
+        gc.collect()
+        torch.cuda.empty_cache()
+        im_og = image
+        width_og, height_og = im_og.size
+        # If we can adjust the max upscale size, then the 4 below should be our variable
+        down_sample_rate = target_scale / 4
+        wd = width_og * down_sample_rate
+        hd = height_og * down_sample_rate
+        width_downsampled_pre = int(wd)
+        height_downsampled_pre = int(hd)
+        if down_sample_rate != 1:
+            print(
+                f'Downsampling from [{width_og}, {height_og}] to [{width_downsampled_pre}, {height_downsampled_pre}]')
+            im_og = im_og.resize((width_downsampled_pre, height_downsampled_pre), Image.LANCZOS)
+        else:
+            print(f"Down sample rate is 1 from {target_scale} / 4 (Not downsampling)")
+        logs = self.run(model["model"], im_og, diffusion_steps, eta)
+        sample = logs["sample"]
+        sample = sample.detach().cpu()
+        sample = torch.clamp(sample, -1., 1.)
+        sample = (sample + 1.) / 2. * 255
+        sample = sample.numpy().astype(np.uint8)
+        sample = np.transpose(sample, (0, 2, 3, 1))
+        a = Image.fromarray(sample[0])
+        del model
+        gc.collect()
+        torch.cuda.empty_cache()
+        return a
+def get_cond(selected_path):
+    example = dict()
+    up_f = 4
+    c = selected_path.convert('RGB')
+    c = torch.unsqueeze(torchvision.transforms.ToTensor()(c), 0)
+    c_up = torchvision.transforms.functional.resize(c, size=[up_f * c.shape[2], up_f * c.shape[3]],
+                                                    antialias=True)
+    c_up = rearrange(c_up, '1 c h w -> 1 h w c')
+    c = rearrange(c, '1 c h w -> 1 h w c')
+    c = 2. * c - 1.
+    c = c.to(torch.device("cuda"))
+    example["LR_image"] = c
+    example["image"] = c_up
+    return example
+@torch.no_grad()
+def convsample_ddim(model, cond, steps, shape, eta=1.0, callback=None, normals_sequence=None,
+                    mask=None, x0=None, quantize_x0=False, temperature=1., score_corrector=None,
+                    corrector_kwargs=None, x_t=None
+                    ):
+    ddim = DDIMSampler(model)
+    bs = shape[0]
+    shape = shape[1:]
+    print(f"Sampling with eta = {eta}; steps: {steps}")
+    samples, intermediates = ddim.sample(steps, batch_size=bs, shape=shape, conditioning=cond, callback=callback,
+                                         normals_sequence=normals_sequence, quantize_x0=quantize_x0, eta=eta,
+                                         mask=mask, x0=x0, temperature=temperature, verbose=False,
+                                         score_corrector=score_corrector,
+                                         corrector_kwargs=corrector_kwargs, x_t=x_t)
+    return samples, intermediates
+@torch.no_grad()
+def make_convolutional_sample(batch, model, custom_steps=None, eta=1.0, quantize_x0=False, custom_shape=None, temperature=1., noise_dropout=0., corrector=None,
+                              corrector_kwargs=None, x_T=None, ddim_use_x0_pred=False):
+    log = dict()
+    z, c, x, xrec, xc = model.get_input(batch, model.first_stage_key,
+                                        return_first_stage_outputs=True,
+                                        force_c_encode=not (hasattr(model, 'split_input_params')
+                                                            and model.cond_stage_key == 'coordinates_bbox'),
+                                        return_original_cond=True)
+    if custom_shape is not None:
+        z = torch.randn(custom_shape)
+        print(f"Generating {custom_shape[0]} samples of shape {custom_shape[1:]}")
+    z0 = None
+    log["input"] = x
+    log["reconstruction"] = xrec
+    if ismap(xc):
+        log["original_conditioning"] = model.to_rgb(xc)
+        if hasattr(model, 'cond_stage_key'):
+            log[model.cond_stage_key] = model.to_rgb(xc)
+    else:
+        log["original_conditioning"] = xc if xc is not None else torch.zeros_like(x)
+        if model.cond_stage_model:
+            log[model.cond_stage_key] = xc if xc is not None else torch.zeros_like(x)
+            if model.cond_stage_key == 'class_label':
+                log[model.cond_stage_key] = xc[model.cond_stage_key]
+    with model.ema_scope("Plotting"):
+        t0 = time.time()
+        sample, intermediates = convsample_ddim(model, c, steps=custom_steps, shape=z.shape,
+                                                eta=eta,
+                                                quantize_x0=quantize_x0, mask=None, x0=z0,
+                                                temperature=temperature, score_corrector=corrector, corrector_kwargs=corrector_kwargs,
+                                                x_t=x_T)
+        t1 = time.time()
+        if ddim_use_x0_pred:
+            sample = intermediates['pred_x0'][-1]
+    x_sample = model.decode_first_stage(sample)
+    try:
+        x_sample_noquant = model.decode_first_stage(sample, force_not_quantize=True)
+        log["sample_noquant"] = x_sample_noquant
+        log["sample_diff"] = torch.abs(x_sample_noquant - x_sample)
+    except:
+        pass
+    log["sample"] = x_sample
+    log["time"] = t1 - t0
+    return log

modules/lowvram.py ADDED Viewed

	@@ -0,0 +1,82 @@

+import torch
+from modules.devices import get_optimal_device
+module_in_gpu = None
+cpu = torch.device("cpu")
+device = gpu = get_optimal_device()
+def send_everything_to_cpu():
+    global module_in_gpu
+    if module_in_gpu is not None:
+        module_in_gpu.to(cpu)
+    module_in_gpu = None
+def setup_for_low_vram(sd_model, use_medvram):
+    parents = {}
+    def send_me_to_gpu(module, _):
+        """send this module to GPU; send whatever tracked module was previous in GPU to CPU;
+        we add this as forward_pre_hook to a lot of modules and this way all but one of them will
+        be in CPU
+        """
+        global module_in_gpu
+        module = parents.get(module, module)
+        if module_in_gpu == module:
+            return
+        if module_in_gpu is not None:
+            module_in_gpu.to(cpu)
+        module.to(gpu)
+        module_in_gpu = module
+    # see below for register_forward_pre_hook;
+    # first_stage_model does not use forward(), it uses encode/decode, so register_forward_pre_hook is
+    # useless here, and we just replace those methods
+    def first_stage_model_encode_wrap(self, encoder, x):
+        send_me_to_gpu(self, None)
+        return encoder(x)
+    def first_stage_model_decode_wrap(self, decoder, z):
+        send_me_to_gpu(self, None)
+        return decoder(z)
+    # remove three big modules, cond, first_stage, and unet from the model and then
+    # send the model to GPU. Then put modules back. the modules will be in CPU.
+    stored = sd_model.cond_stage_model.transformer, sd_model.first_stage_model, sd_model.model
+    sd_model.cond_stage_model.transformer, sd_model.first_stage_model, sd_model.model = None, None, None
+    sd_model.to(device)
+    sd_model.cond_stage_model.transformer, sd_model.first_stage_model, sd_model.model = stored
+    # register hooks for those the first two models
+    sd_model.cond_stage_model.transformer.register_forward_pre_hook(send_me_to_gpu)
+    sd_model.first_stage_model.register_forward_pre_hook(send_me_to_gpu)
+    sd_model.first_stage_model.encode = lambda x, en=sd_model.first_stage_model.encode: first_stage_model_encode_wrap(sd_model.first_stage_model, en, x)
+    sd_model.first_stage_model.decode = lambda z, de=sd_model.first_stage_model.decode: first_stage_model_decode_wrap(sd_model.first_stage_model, de, z)
+    parents[sd_model.cond_stage_model.transformer] = sd_model.cond_stage_model
+    if use_medvram:
+        sd_model.model.register_forward_pre_hook(send_me_to_gpu)
+    else:
+        diff_model = sd_model.model.diffusion_model
+        # the third remaining model is still too big for 4 GB, so we also do the same for its submodules
+        # so that only one of them is in GPU at a time
+        stored = diff_model.input_blocks, diff_model.middle_block, diff_model.output_blocks, diff_model.time_embed
+        diff_model.input_blocks, diff_model.middle_block, diff_model.output_blocks, diff_model.time_embed = None, None, None, None
+        sd_model.model.to(device)
+        diff_model.input_blocks, diff_model.middle_block, diff_model.output_blocks, diff_model.time_embed = stored
+        # install hooks for bits of third model
+        diff_model.time_embed.register_forward_pre_hook(send_me_to_gpu)
+        for block in diff_model.input_blocks:
+            block.register_forward_pre_hook(send_me_to_gpu)
+        diff_model.middle_block.register_forward_pre_hook(send_me_to_gpu)
+        for block in diff_model.output_blocks:
+            block.register_forward_pre_hook(send_me_to_gpu)

modules/masking.py ADDED Viewed

	@@ -0,0 +1,99 @@

+from PIL import Image, ImageFilter, ImageOps
+def get_crop_region(mask, pad=0):
+    """finds a rectangular region that contains all masked ares in an image. Returns (x1, y1, x2, y2) coordinates of the rectangle.
+    For example, if a user has painted the top-right part of a 512x512 image", the result may be (256, 0, 512, 256)"""
+    h, w = mask.shape
+    crop_left = 0
+    for i in range(w):
+        if not (mask[:, i] == 0).all():
+            break
+        crop_left += 1
+    crop_right = 0
+    for i in reversed(range(w)):
+        if not (mask[:, i] == 0).all():
+            break
+        crop_right += 1
+    crop_top = 0
+    for i in range(h):
+        if not (mask[i] == 0).all():
+            break
+        crop_top += 1
+    crop_bottom = 0
+    for i in reversed(range(h)):
+        if not (mask[i] == 0).all():
+            break
+        crop_bottom += 1
+    return (
+        int(max(crop_left-pad, 0)),
+        int(max(crop_top-pad, 0)),
+        int(min(w - crop_right + pad, w)),
+        int(min(h - crop_bottom + pad, h))
+    )
+def expand_crop_region(crop_region, processing_width, processing_height, image_width, image_height):
+    """expands crop region get_crop_region() to match the ratio of the image the region will processed in; returns expanded region
+    for example, if user drew mask in a 128x32 region, and the dimensions for processing are 512x512, the region will be expanded to 128x128."""
+    x1, y1, x2, y2 = crop_region
+    ratio_crop_region = (x2 - x1) / (y2 - y1)
+    ratio_processing = processing_width / processing_height
+    if ratio_crop_region > ratio_processing:
+        desired_height = (x2 - x1) * ratio_processing
+        desired_height_diff = int(desired_height - (y2-y1))
+        y1 -= desired_height_diff//2
+        y2 += desired_height_diff - desired_height_diff//2
+        if y2 >= image_height:
+            diff = y2 - image_height
+            y2 -= diff
+            y1 -= diff
+        if y1 < 0:
+            y2 -= y1
+            y1 -= y1
+        if y2 >= image_height:
+            y2 = image_height
+    else:
+        desired_width = (y2 - y1) * ratio_processing
+        desired_width_diff = int(desired_width - (x2-x1))
+        x1 -= desired_width_diff//2
+        x2 += desired_width_diff - desired_width_diff//2
+        if x2 >= image_width:
+            diff = x2 - image_width
+            x2 -= diff
+            x1 -= diff
+        if x1 < 0:
+            x2 -= x1
+            x1 -= x1
+        if x2 >= image_width:
+            x2 = image_width
+    return x1, y1, x2, y2
+def fill(image, mask):
+    """fills masked regions with colors from image using blur. Not extremely effective."""
+    image_mod = Image.new('RGBA', (image.width, image.height))
+    image_masked = Image.new('RGBa', (image.width, image.height))
+    image_masked.paste(image.convert("RGBA").convert("RGBa"), mask=ImageOps.invert(mask.convert('L')))
+    image_masked = image_masked.convert('RGBa')
+    for radius, repeats in [(256, 1), (64, 1), (16, 2), (4, 4), (2, 2), (0, 1)]:
+        blurred = image_masked.filter(ImageFilter.GaussianBlur(radius)).convert('RGBA')
+        for _ in range(repeats):
+            image_mod.alpha_composite(blurred)
+    return image_mod.convert("RGB")

modules/memmon.py ADDED Viewed

	@@ -0,0 +1,85 @@

+import threading
+import time
+from collections import defaultdict
+import torch
+class MemUsageMonitor(threading.Thread):
+    run_flag = None
+    device = None
+    disabled = False
+    opts = None
+    data = None
+    def __init__(self, name, device, opts):
+        threading.Thread.__init__(self)
+        self.name = name
+        self.device = device
+        self.opts = opts
+        self.daemon = True
+        self.run_flag = threading.Event()
+        self.data = defaultdict(int)
+        try:
+            torch.cuda.mem_get_info()
+            torch.cuda.memory_stats(self.device)
+        except Exception as e:  # AMD or whatever
+            print(f"Warning: caught exception '{e}', memory monitor disabled")
+            self.disabled = True
+    def run(self):
+        if self.disabled:
+            return
+        while True:
+            self.run_flag.wait()
+            torch.cuda.reset_peak_memory_stats()
+            self.data.clear()
+            if self.opts.memmon_poll_rate <= 0:
+                self.run_flag.clear()
+                continue
+            self.data["min_free"] = torch.cuda.mem_get_info()[0]
+            while self.run_flag.is_set():
+                free, total = torch.cuda.mem_get_info()  # calling with self.device errors, torch bug?
+                self.data["min_free"] = min(self.data["min_free"], free)
+                time.sleep(1 / self.opts.memmon_poll_rate)
+    def dump_debug(self):
+        print(self, 'recorded data:')
+        for k, v in self.read().items():
+            print(k, -(v // -(1024 ** 2)))
+        print(self, 'raw torch memory stats:')
+        tm = torch.cuda.memory_stats(self.device)
+        for k, v in tm.items():
+            if 'bytes' not in k:
+                continue
+            print('\t' if 'peak' in k else '', k, -(v // -(1024 ** 2)))
+        print(torch.cuda.memory_summary())
+    def monitor(self):
+        self.run_flag.set()
+    def read(self):
+        if not self.disabled:
+            free, total = torch.cuda.mem_get_info()
+            self.data["total"] = total
+            torch_stats = torch.cuda.memory_stats(self.device)
+            self.data["active_peak"] = torch_stats["active_bytes.all.peak"]
+            self.data["reserved_peak"] = torch_stats["reserved_bytes.all.peak"]
+            self.data["system_peak"] = total - self.data["min_free"]
+        return self.data
+    def stop(self):
+        self.run_flag.clear()
+        return self.read()

modules/modelloader.py ADDED Viewed

	@@ -0,0 +1,153 @@

+import glob
+import os
+import shutil
+import importlib
+from urllib.parse import urlparse
+from basicsr.utils.download_util import load_file_from_url
+from modules import shared
+from modules.upscaler import Upscaler
+from modules.paths import script_path, models_path
+def load_models(model_path: str, model_url: str = None, command_path: str = None, ext_filter=None, download_name=None) -> list:
+    """
+    A one-and done loader to try finding the desired models in specified directories.
+    @param download_name: Specify to download from model_url immediately.
+    @param model_url: If no other models are found, this will be downloaded on upscale.
+    @param model_path: The location to store/find models in.
+    @param command_path: A command-line argument to search for models in first.
+    @param ext_filter: An optional list of filename extensions to filter by
+    @return: A list of paths containing the desired model(s)
+    """
+    output = []
+    if ext_filter is None:
+        ext_filter = []
+    try:
+        places = []
+        if command_path is not None and command_path != model_path:
+            pretrained_path = os.path.join(command_path, 'experiments/pretrained_models')
+            if os.path.exists(pretrained_path):
+                print(f"Appending path: {pretrained_path}")
+                places.append(pretrained_path)
+            elif os.path.exists(command_path):
+                places.append(command_path)
+        places.append(model_path)
+        for place in places:
+            if os.path.exists(place):
+                for file in glob.iglob(place + '**/**', recursive=True):
+                    full_path = file
+                    if os.path.isdir(full_path):
+                        continue
+                    if len(ext_filter) != 0:
+                        model_name, extension = os.path.splitext(file)
+                        if extension not in ext_filter:
+                            continue
+                    if file not in output:
+                        output.append(full_path)
+        if model_url is not None and len(output) == 0:
+            if download_name is not None:
+                dl = load_file_from_url(model_url, model_path, True, download_name)
+                output.append(dl)
+            else:
+                output.append(model_url)
+    except Exception:
+        pass
+    return output
+def friendly_name(file: str):
+    if "http" in file:
+        file = urlparse(file).path
+    file = os.path.basename(file)
+    model_name, extension = os.path.splitext(file)
+    return model_name
+def cleanup_models():
+    # This code could probably be more efficient if we used a tuple list or something to store the src/destinations
+    # and then enumerate that, but this works for now. In the future, it'd be nice to just have every "model" scaler
+    # somehow auto-register and just do these things...
+    root_path = script_path
+    src_path = models_path
+    dest_path = os.path.join(models_path, "Stable-diffusion")
+    move_files(src_path, dest_path, ".ckpt")
+    src_path = os.path.join(root_path, "ESRGAN")
+    dest_path = os.path.join(models_path, "ESRGAN")
+    move_files(src_path, dest_path)
+    src_path = os.path.join(root_path, "gfpgan")
+    dest_path = os.path.join(models_path, "GFPGAN")
+    move_files(src_path, dest_path)
+    src_path = os.path.join(root_path, "SwinIR")
+    dest_path = os.path.join(models_path, "SwinIR")
+    move_files(src_path, dest_path)
+    src_path = os.path.join(root_path, "repositories/latent-diffusion/experiments/pretrained_models/")
+    dest_path = os.path.join(models_path, "LDSR")
+    move_files(src_path, dest_path)
+def move_files(src_path: str, dest_path: str, ext_filter: str = None):
+    try:
+        if not os.path.exists(dest_path):
+            os.makedirs(dest_path)
+        if os.path.exists(src_path):
+            for file in os.listdir(src_path):
+                fullpath = os.path.join(src_path, file)
+                if os.path.isfile(fullpath):
+                    if ext_filter is not None:
+                        if ext_filter not in file:
+                            continue
+                    print(f"Moving {file} from {src_path} to {dest_path}.")
+                    try:
+                        shutil.move(fullpath, dest_path)
+                    except:
+                        pass
+            if len(os.listdir(src_path)) == 0:
+                print(f"Removing empty folder: {src_path}")
+                shutil.rmtree(src_path, True)
+    except:
+        pass
+def load_upscalers():
+    sd = shared.script_path
+    # We can only do this 'magic' method to dynamically load upscalers if they are referenced,
+    # so we'll try to import any _model.py files before looking in __subclasses__
+    modules_dir = os.path.join(sd, "modules")
+    for file in os.listdir(modules_dir):
+        if "_model.py" in file:
+            model_name = file.replace("_model.py", "")
+            full_model = f"modules.{model_name}_model"
+            try:
+                importlib.import_module(full_model)
+            except:
+                pass
+    datas = []
+    c_o = vars(shared.cmd_opts)
+    for cls in Upscaler.__subclasses__():
+        name = cls.__name__
+        module_name = cls.__module__
+        module = importlib.import_module(module_name)
+        class_ = getattr(module, name)
+        cmd_name = f"{name.lower().replace('upscaler', '')}_models_path"
+        opt_string = None
+        try:
+            if cmd_name in c_o:
+                opt_string = c_o[cmd_name]
+        except:
+            pass
+        scaler = class_(opt_string)
+        for child in scaler.scalers:
+            datas.append(child)
+    shared.sd_upscalers = datas

modules/ngrok.py ADDED Viewed

	@@ -0,0 +1,15 @@

+from pyngrok import ngrok, conf, exception
+def connect(token, port):
+    if token == None:
+        token = 'None'
+    conf.get_default().auth_token = token
+    try:
+        public_url = ngrok.connect(port).public_url
+    except exception.PyngrokNgrokError:
+        print(f'Invalid ngrok authtoken, ngrok connection aborted.\n'
+              f'Your token: {token}, get the right one on https://dashboard.ngrok.com/get-started/your-authtoken')
+    else:
+        print(f'ngrok connected to localhost:{port}! URL: {public_url}\n'
+               'You can use this link after the launch is complete.')

modules/paths.py ADDED Viewed

	@@ -0,0 +1,40 @@

+import argparse
+import os
+import sys
+import modules.safe
+script_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
+models_path = os.path.join(script_path, "models")
+sys.path.insert(0, script_path)
+# search for directory of stable diffusion in following places
+sd_path = None
+possible_sd_paths = [os.path.join(script_path, 'repositories/stable-diffusion'), '.', os.path.dirname(script_path)]
+for possible_sd_path in possible_sd_paths:
+    if os.path.exists(os.path.join(possible_sd_path, 'ldm/models/diffusion/ddpm.py')):
+        sd_path = os.path.abspath(possible_sd_path)
+        break
+assert sd_path is not None, "Couldn't find Stable Diffusion in any of: " + str(possible_sd_paths)
+path_dirs = [
+    (sd_path, 'ldm', 'Stable Diffusion', []),
+    (os.path.join(sd_path, '../taming-transformers'), 'taming', 'Taming Transformers', []),
+    (os.path.join(sd_path, '../CodeFormer'), 'inference_codeformer.py', 'CodeFormer', []),
+    (os.path.join(sd_path, '../BLIP'), 'models/blip.py', 'BLIP', []),
+    (os.path.join(sd_path, '../k-diffusion'), 'k_diffusion/sampling.py', 'k_diffusion', ["atstart"]),
+]
+paths = {}
+for d, must_exist, what, options in path_dirs:
+    must_exist_path = os.path.abspath(os.path.join(script_path, d, must_exist))
+    if not os.path.exists(must_exist_path):
+        print(f"Warning: {what} not found at path {must_exist_path}", file=sys.stderr)
+    else:
+        d = os.path.abspath(d)
+        if "atstart" in options:
+            sys.path.insert(0, d)
+        else:
+            sys.path.append(d)
+        paths[what] = d

modules/processing.py ADDED Viewed

	@@ -0,0 +1,721 @@

+import json
+import math
+import os
+import sys
+import torch
+import numpy as np
+from PIL import Image, ImageFilter, ImageOps
+import random
+import cv2
+from skimage import exposure
+import modules.sd_hijack
+from modules import devices, prompt_parser, masking, sd_samplers, lowvram
+from modules.sd_hijack import model_hijack
+from modules.shared import opts, cmd_opts, state
+import modules.shared as shared
+import modules.face_restoration
+import modules.images as images
+import modules.styles
+import logging
+# some of those options should not be changed at all because they would break the model, so I removed them from options.
+opt_C = 4
+opt_f = 8
+def setup_color_correction(image):
+    logging.info("Calibrating color correction.")
+    correction_target = cv2.cvtColor(np.asarray(image.copy()), cv2.COLOR_RGB2LAB)
+    return correction_target
+def apply_color_correction(correction, image):
+    logging.info("Applying color correction.")
+    image = Image.fromarray(cv2.cvtColor(exposure.match_histograms(
+        cv2.cvtColor(
+            np.asarray(image),
+            cv2.COLOR_RGB2LAB
+        ),
+        correction,
+        channel_axis=2
+    ), cv2.COLOR_LAB2RGB).astype("uint8"))
+    return image
+def get_correct_sampler(p):
+    if isinstance(p, modules.processing.StableDiffusionProcessingTxt2Img):
+        return sd_samplers.samplers
+    elif isinstance(p, modules.processing.StableDiffusionProcessingImg2Img):
+        return sd_samplers.samplers_for_img2img
+class StableDiffusionProcessing:
+    def __init__(self, sd_model=None, outpath_samples=None, outpath_grids=None, prompt="", styles=None, seed=-1, subseed=-1, subseed_strength=0, seed_resize_from_h=-1, seed_resize_from_w=-1, seed_enable_extras=True, sampler_index=0, batch_size=1, n_iter=1, steps=50, cfg_scale=7.0, width=512, height=512, restore_faces=False, tiling=False, do_not_save_samples=False, do_not_save_grid=False, extra_generation_params=None, overlay_images=None, negative_prompt=None, eta=None):
+        self.sd_model = sd_model
+        self.outpath_samples: str = outpath_samples
+        self.outpath_grids: str = outpath_grids
+        self.prompt: str = prompt
+        self.prompt_for_display: str = None
+        self.negative_prompt: str = (negative_prompt or "")
+        self.styles: list = styles or []
+        self.seed: int = seed
+        self.subseed: int = subseed
+        self.subseed_strength: float = subseed_strength
+        self.seed_resize_from_h: int = seed_resize_from_h
+        self.seed_resize_from_w: int = seed_resize_from_w
+        self.sampler_index: int = sampler_index
+        self.batch_size: int = batch_size
+        self.n_iter: int = n_iter
+        self.steps: int = steps
+        self.cfg_scale: float = cfg_scale
+        self.width: int = width
+        self.height: int = height
+        self.restore_faces: bool = restore_faces
+        self.tiling: bool = tiling
+        self.do_not_save_samples: bool = do_not_save_samples
+        self.do_not_save_grid: bool = do_not_save_grid
+        self.extra_generation_params: dict = extra_generation_params or {}
+        self.overlay_images = overlay_images
+        self.eta = eta
+        self.paste_to = None
+        self.color_corrections = None
+        self.denoising_strength: float = 0
+        self.sampler_noise_scheduler_override = None
+        self.ddim_discretize = opts.ddim_discretize
+        self.s_churn = opts.s_churn
+        self.s_tmin = opts.s_tmin
+        self.s_tmax = float('inf')  # not representable as a standard ui option
+        self.s_noise = opts.s_noise
+        if not seed_enable_extras:
+            self.subseed = -1
+            self.subseed_strength = 0
+            self.seed_resize_from_h = 0
+            self.seed_resize_from_w = 0
+    def init(self, all_prompts, all_seeds, all_subseeds):
+        pass
+    def sample(self, conditioning, unconditional_conditioning, seeds, subseeds, subseed_strength):
+        raise NotImplementedError()
+class Processed:
+    def __init__(self, p: StableDiffusionProcessing, images_list, seed=-1, info="", subseed=None, all_prompts=None, all_seeds=None, all_subseeds=None, index_of_first_image=0, infotexts=None):
+        self.images = images_list
+        self.prompt = p.prompt
+        self.negative_prompt = p.negative_prompt
+        self.seed = seed
+        self.subseed = subseed
+        self.subseed_strength = p.subseed_strength
+        self.info = info
+        self.width = p.width
+        self.height = p.height
+        self.sampler_index = p.sampler_index
+        self.sampler = sd_samplers.samplers[p.sampler_index].name
+        self.cfg_scale = p.cfg_scale
+        self.steps = p.steps
+        self.batch_size = p.batch_size
+        self.restore_faces = p.restore_faces
+        self.face_restoration_model = opts.face_restoration_model if p.restore_faces else None
+        self.sd_model_hash = shared.sd_model.sd_model_hash
+        self.seed_resize_from_w = p.seed_resize_from_w
+        self.seed_resize_from_h = p.seed_resize_from_h
+        self.denoising_strength = getattr(p, 'denoising_strength', None)
+        self.extra_generation_params = p.extra_generation_params
+        self.index_of_first_image = index_of_first_image
+        self.styles = p.styles
+        self.job_timestamp = state.job_timestamp
+        self.clip_skip = opts.CLIP_stop_at_last_layers
+        self.eta = p.eta
+        self.ddim_discretize = p.ddim_discretize
+        self.s_churn = p.s_churn
+        self.s_tmin = p.s_tmin
+        self.s_tmax = p.s_tmax
+        self.s_noise = p.s_noise
+        self.sampler_noise_scheduler_override = p.sampler_noise_scheduler_override
+        self.prompt = self.prompt if type(self.prompt) != list else self.prompt[0]
+        self.negative_prompt = self.negative_prompt if type(self.negative_prompt) != list else self.negative_prompt[0]
+        self.seed = int(self.seed if type(self.seed) != list else self.seed[0])
+        self.subseed = int(self.subseed if type(self.subseed) != list else self.subseed[0]) if self.subseed is not None else -1
+        self.all_prompts = all_prompts or [self.prompt]
+        self.all_seeds = all_seeds or [self.seed]
+        self.all_subseeds = all_subseeds or [self.subseed]
+        self.infotexts = infotexts or [info]
+    def js(self):
+        obj = {
+            "prompt": self.prompt,
+            "all_prompts": self.all_prompts,
+            "negative_prompt": self.negative_prompt,
+            "seed": self.seed,
+            "all_seeds": self.all_seeds,
+            "subseed": self.subseed,
+            "all_subseeds": self.all_subseeds,
+            "subseed_strength": self.subseed_strength,
+            "width": self.width,
+            "height": self.height,
+            "sampler_index": self.sampler_index,
+            "sampler": self.sampler,
+            "cfg_scale": self.cfg_scale,
+            "steps": self.steps,
+            "batch_size": self.batch_size,
+            "restore_faces": self.restore_faces,
+            "face_restoration_model": self.face_restoration_model,
+            "sd_model_hash": self.sd_model_hash,
+            "seed_resize_from_w": self.seed_resize_from_w,
+            "seed_resize_from_h": self.seed_resize_from_h,
+            "denoising_strength": self.denoising_strength,
+            "extra_generation_params": self.extra_generation_params,
+            "index_of_first_image": self.index_of_first_image,
+            "infotexts": self.infotexts,
+            "styles": self.styles,
+            "job_timestamp": self.job_timestamp,
+            "clip_skip": self.clip_skip,
+        }
+        return json.dumps(obj)
+    def infotext(self,  p: StableDiffusionProcessing, index):
+        return create_infotext(p, self.all_prompts, self.all_seeds, self.all_subseeds, comments=[], position_in_batch=index % self.batch_size, iteration=index // self.batch_size)
+# from https://discuss.pytorch.org/t/help-regarding-slerp-function-for-generative-model-sampling/32475/3
+def slerp(val, low, high):
+    low_norm = low/torch.norm(low, dim=1, keepdim=True)
+    high_norm = high/torch.norm(high, dim=1, keepdim=True)
+    dot = (low_norm*high_norm).sum(1)
+    if dot.mean() > 0.9995:
+        return low * val + high * (1 - val)
+    omega = torch.acos(dot)
+    so = torch.sin(omega)
+    res = (torch.sin((1.0-val)*omega)/so).unsqueeze(1)*low + (torch.sin(val*omega)/so).unsqueeze(1) * high
+    return res
+def create_random_tensors(shape, seeds, subseeds=None, subseed_strength=0.0, seed_resize_from_h=0, seed_resize_from_w=0, p=None):
+    xs = []
+    # if we have multiple seeds, this means we are working with batch size>1; this then
+    # enables the generation of additional tensors with noise that the sampler will use during its processing.
+    # Using those pre-generated tensors instead of simple torch.randn allows a batch with seeds [100, 101] to
+    # produce the same images as with two batches [100], [101].
+    if p is not None and p.sampler is not None and (len(seeds) > 1 and opts.enable_batch_seeds or opts.eta_noise_seed_delta > 0):
+        sampler_noises = [[] for _ in range(p.sampler.number_of_needed_noises(p))]
+    else:
+        sampler_noises = None
+    for i, seed in enumerate(seeds):
+        noise_shape = shape if seed_resize_from_h <= 0 or seed_resize_from_w <= 0 else (shape[0], seed_resize_from_h//8, seed_resize_from_w//8)
+        subnoise = None
+        if subseeds is not None:
+            subseed = 0 if i >= len(subseeds) else subseeds[i]
+            subnoise = devices.randn(subseed, noise_shape)
+        # randn results depend on device; gpu and cpu get different results for same seed;
+        # the way I see it, it's better to do this on CPU, so that everyone gets same result;
+        # but the original script had it like this, so I do not dare change it for now because
+        # it will break everyone's seeds.
+        noise = devices.randn(seed, noise_shape)
+        if subnoise is not None:
+            noise = slerp(subseed_strength, noise, subnoise)
+        if noise_shape != shape:
+            x = devices.randn(seed, shape)
+            dx = (shape[2] - noise_shape[2]) // 2
+            dy = (shape[1] - noise_shape[1]) // 2
+            w = noise_shape[2] if dx >= 0 else noise_shape[2] + 2 * dx
+            h = noise_shape[1] if dy >= 0 else noise_shape[1] + 2 * dy
+            tx = 0 if dx < 0 else dx
+            ty = 0 if dy < 0 else dy
+            dx = max(-dx, 0)
+            dy = max(-dy, 0)
+            x[:, ty:ty+h, tx:tx+w] = noise[:, dy:dy+h, dx:dx+w]
+            noise = x
+        if sampler_noises is not None:
+            cnt = p.sampler.number_of_needed_noises(p)
+            if opts.eta_noise_seed_delta > 0:
+                torch.manual_seed(seed + opts.eta_noise_seed_delta)
+            for j in range(cnt):
+                sampler_noises[j].append(devices.randn_without_seed(tuple(noise_shape)))
+        xs.append(noise)
+    if sampler_noises is not None:
+        p.sampler.sampler_noises = [torch.stack(n).to(shared.device) for n in sampler_noises]
+    x = torch.stack(xs).to(shared.device)
+    return x
+def decode_first_stage(model, x):
+    with devices.autocast(disable=x.dtype == devices.dtype_vae):
+        x = model.decode_first_stage(x)
+    return x
+def get_fixed_seed(seed):
+    if seed is None or seed == '' or seed == -1:
+        return int(random.randrange(4294967294))
+    return seed
+def fix_seed(p):
+    p.seed = get_fixed_seed(p.seed)
+    p.subseed = get_fixed_seed(p.subseed)
+def create_infotext(p, all_prompts, all_seeds, all_subseeds, comments, iteration=0, position_in_batch=0):
+    index = position_in_batch + iteration * p.batch_size
+    clip_skip = getattr(p, 'clip_skip', opts.CLIP_stop_at_last_layers)
+    generation_params = {
+        "Steps": p.steps,
+        "Sampler": get_correct_sampler(p)[p.sampler_index].name,
+        "CFG scale": p.cfg_scale,
+        "Seed": all_seeds[index],
+        "Face restoration": (opts.face_restoration_model if p.restore_faces else None),
+        "Size": f"{p.width}x{p.height}",
+        "Model hash": getattr(p, 'sd_model_hash', None if not opts.add_model_hash_to_info or not shared.sd_model.sd_model_hash else shared.sd_model.sd_model_hash),
+        "Model": (None if not opts.add_model_name_to_info or not shared.sd_model.sd_checkpoint_info.model_name else shared.sd_model.sd_checkpoint_info.model_name.replace(',', '').replace(':', '')),
+        "Hypernet": (None if shared.loaded_hypernetwork is None else shared.loaded_hypernetwork.name.replace(',', '').replace(':', '')),
+        "Batch size": (None if p.batch_size < 2 else p.batch_size),
+        "Batch pos": (None if p.batch_size < 2 else position_in_batch),
+        "Variation seed": (None if p.subseed_strength == 0 else all_subseeds[index]),
+        "Variation seed strength": (None if p.subseed_strength == 0 else p.subseed_strength),
+        "Seed resize from": (None if p.seed_resize_from_w == 0 or p.seed_resize_from_h == 0 else f"{p.seed_resize_from_w}x{p.seed_resize_from_h}"),
+        "Denoising strength": getattr(p, 'denoising_strength', None),
+        "Eta": (None if p.sampler is None or p.sampler.eta == p.sampler.default_eta else p.sampler.eta),
+        "Clip skip": None if clip_skip <= 1 else clip_skip,
+        "ENSD": None if opts.eta_noise_seed_delta == 0 else opts.eta_noise_seed_delta,
+    }
+    generation_params.update(p.extra_generation_params)
+    generation_params_text = ", ".join([k if k == v else f'{k}: {v}' for k, v in generation_params.items() if v is not None])
+    negative_prompt_text = "\nNegative prompt: " + p.negative_prompt if p.negative_prompt else ""
+    return f"{all_prompts[index]}{negative_prompt_text}\n{generation_params_text}".strip()
+def process_images(p: StableDiffusionProcessing) -> Processed:
+    """this is the main loop that both txt2img and img2img use; it calls func_init once inside all the scopes and func_sample once per batch"""
+    if type(p.prompt) == list:
+        assert(len(p.prompt) > 0)
+    else:
+        assert p.prompt is not None
+    with open(os.path.join(shared.script_path, "params.txt"), "w", encoding="utf8") as file:
+        processed = Processed(p, [], p.seed, "")
+        file.write(processed.infotext(p, 0))
+    devices.torch_gc()
+    seed = get_fixed_seed(p.seed)
+    subseed = get_fixed_seed(p.subseed)
+    if p.outpath_samples is not None:
+        os.makedirs(p.outpath_samples, exist_ok=True)
+    if p.outpath_grids is not None:
+        os.makedirs(p.outpath_grids, exist_ok=True)
+    modules.sd_hijack.model_hijack.apply_circular(p.tiling)
+    modules.sd_hijack.model_hijack.clear_comments()
+    comments = {}
+    shared.prompt_styles.apply_styles(p)
+    if type(p.prompt) == list:
+        all_prompts = p.prompt
+    else:
+        all_prompts = p.batch_size * p.n_iter * [p.prompt]
+    if type(seed) == list:
+        all_seeds = seed
+    else:
+        all_seeds = [int(seed) + (x if p.subseed_strength == 0 else 0) for x in range(len(all_prompts))]
+    if type(subseed) == list:
+        all_subseeds = subseed
+    else:
+        all_subseeds = [int(subseed) + x for x in range(len(all_prompts))]
+    def infotext(iteration=0, position_in_batch=0):
+        return create_infotext(p, all_prompts, all_seeds, all_subseeds, comments, iteration, position_in_batch)
+    if os.path.exists(cmd_opts.embeddings_dir):
+        model_hijack.embedding_db.load_textual_inversion_embeddings()
+    infotexts = []
+    output_images = []
+    with torch.no_grad(), p.sd_model.ema_scope():
+        with devices.autocast():
+            p.init(all_prompts, all_seeds, all_subseeds)
+        if state.job_count == -1:
+            state.job_count = p.n_iter
+        for n in range(p.n_iter):
+            if state.skipped:
+                state.skipped = False
+            if state.interrupted:
+                break
+            prompts = all_prompts[n * p.batch_size:(n + 1) * p.batch_size]
+            seeds = all_seeds[n * p.batch_size:(n + 1) * p.batch_size]
+            subseeds = all_subseeds[n * p.batch_size:(n + 1) * p.batch_size]
+            if (len(prompts) == 0):
+                break
+            #uc = p.sd_model.get_learned_conditioning(len(prompts) * [p.negative_prompt])
+            #c = p.sd_model.get_learned_conditioning(prompts)
+            with devices.autocast():
+                uc = prompt_parser.get_learned_conditioning(shared.sd_model, len(prompts) * [p.negative_prompt], p.steps)
+                c = prompt_parser.get_multicond_learned_conditioning(shared.sd_model, prompts, p.steps)
+            if len(model_hijack.comments) > 0:
+                for comment in model_hijack.comments:
+                    comments[comment] = 1
+            if p.n_iter > 1:
+                shared.state.job = f"Batch {n+1} out of {p.n_iter}"
+            with devices.autocast():
+                samples_ddim = p.sample(conditioning=c, unconditional_conditioning=uc, seeds=seeds, subseeds=subseeds, subseed_strength=p.subseed_strength)
+            if state.interrupted or state.skipped:
+                # if we are interrupted, sample returns just noise
+                # use the image collected previously in sampler loop
+                samples_ddim = shared.state.current_latent
+            samples_ddim = samples_ddim.to(devices.dtype_vae)
+            x_samples_ddim = decode_first_stage(p.sd_model, samples_ddim)
+            x_samples_ddim = torch.clamp((x_samples_ddim + 1.0) / 2.0, min=0.0, max=1.0)
+            del samples_ddim
+            if shared.cmd_opts.lowvram or shared.cmd_opts.medvram:
+                lowvram.send_everything_to_cpu()
+            devices.torch_gc()
+            if opts.filter_nsfw:
+                import modules.safety as safety
+                x_samples_ddim = modules.safety.censor_batch(x_samples_ddim)
+            for i, x_sample in enumerate(x_samples_ddim):
+                x_sample = 255. * np.moveaxis(x_sample.cpu().numpy(), 0, 2)
+                x_sample = x_sample.astype(np.uint8)
+                if p.restore_faces:
+                    if opts.save and not p.do_not_save_samples and opts.save_images_before_face_restoration:
+                        images.save_image(Image.fromarray(x_sample), p.outpath_samples, "", seeds[i], prompts[i], opts.samples_format, info=infotext(n, i), p=p, suffix="-before-face-restoration")
+                    devices.torch_gc()
+                    x_sample = modules.face_restoration.restore_faces(x_sample)
+                    devices.torch_gc()
+                image = Image.fromarray(x_sample)
+                if p.color_corrections is not None and i < len(p.color_corrections):
+                    if opts.save and not p.do_not_save_samples and opts.save_images_before_color_correction:
+                        images.save_image(image, p.outpath_samples, "", seeds[i], prompts[i], opts.samples_format, info=infotext(n, i), p=p, suffix="-before-color-correction")
+                    image = apply_color_correction(p.color_corrections[i], image)
+                if p.overlay_images is not None and i < len(p.overlay_images):
+                    overlay = p.overlay_images[i]
+                    if p.paste_to is not None:
+                        x, y, w, h = p.paste_to
+                        base_image = Image.new('RGBA', (overlay.width, overlay.height))
+                        image = images.resize_image(1, image, w, h)
+                        base_image.paste(image, (x, y))
+                        image = base_image
+                    image = image.convert('RGBA')
+                    image.alpha_composite(overlay)
+                    image = image.convert('RGB')
+                if opts.samples_save and not p.do_not_save_samples:
+                    images.save_image(image, p.outpath_samples, "", seeds[i], prompts[i], opts.samples_format, info=infotext(n, i), p=p)
+                text = infotext(n, i)
+                infotexts.append(text)
+                if opts.enable_pnginfo:
+                    image.info["parameters"] = text
+                output_images.append(image)
+            del x_samples_ddim
+            devices.torch_gc()
+            state.nextjob()
+        p.color_corrections = None
+        index_of_first_image = 0
+        unwanted_grid_because_of_img_count = len(output_images) < 2 and opts.grid_only_if_multiple
+        if (opts.return_grid or opts.grid_save) and not p.do_not_save_grid and not unwanted_grid_because_of_img_count:
+            grid = images.image_grid(output_images, p.batch_size)
+            if opts.return_grid:
+                text = infotext()
+                infotexts.insert(0, text)
+                if opts.enable_pnginfo:
+                    grid.info["parameters"] = text
+                output_images.insert(0, grid)
+                index_of_first_image = 1
+            if opts.grid_save:
+                images.save_image(grid, p.outpath_grids, "grid", all_seeds[0], all_prompts[0], opts.grid_format, info=infotext(), short_filename=not opts.grid_extended_filename, p=p, grid=True)
+    devices.torch_gc()
+    return Processed(p, output_images, all_seeds[0], infotext() + "".join(["\n\n" + x for x in comments]), subseed=all_subseeds[0], all_prompts=all_prompts, all_seeds=all_seeds, all_subseeds=all_subseeds, index_of_first_image=index_of_first_image, infotexts=infotexts)
+class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
+    sampler = None
+    firstphase_width = 0
+    firstphase_height = 0
+    firstphase_width_truncated = 0
+    firstphase_height_truncated = 0
+    def __init__(self, enable_hr=False, scale_latent=True, denoising_strength=0.75, **kwargs):
+        super().__init__(**kwargs)
+        self.enable_hr = enable_hr
+        self.scale_latent = scale_latent
+        self.denoising_strength = denoising_strength
+    def init(self, all_prompts, all_seeds, all_subseeds):
+        if self.enable_hr:
+            if state.job_count == -1:
+                state.job_count = self.n_iter * 2
+            else:
+                state.job_count = state.job_count * 2
+            desired_pixel_count = 512 * 512
+            actual_pixel_count = self.width * self.height
+            scale = math.sqrt(desired_pixel_count / actual_pixel_count)
+            self.firstphase_width = math.ceil(scale * self.width / 64) * 64
+            self.firstphase_height = math.ceil(scale * self.height / 64) * 64
+            self.firstphase_width_truncated = int(scale * self.width)
+            self.firstphase_height_truncated = int(scale * self.height)
+    def sample(self, conditioning, unconditional_conditioning, seeds, subseeds, subseed_strength):
+        self.sampler = sd_samplers.create_sampler_with_index(sd_samplers.samplers, self.sampler_index, self.sd_model)
+        if not self.enable_hr:
+            x = create_random_tensors([opt_C, self.height // opt_f, self.width // opt_f], seeds=seeds, subseeds=subseeds, subseed_strength=self.subseed_strength, seed_resize_from_h=self.seed_resize_from_h, seed_resize_from_w=self.seed_resize_from_w, p=self)
+            samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning)
+            return samples
+        x = create_random_tensors([opt_C, self.firstphase_height // opt_f, self.firstphase_width // opt_f], seeds=seeds, subseeds=subseeds, subseed_strength=self.subseed_strength, seed_resize_from_h=self.seed_resize_from_h, seed_resize_from_w=self.seed_resize_from_w, p=self)
+        samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning)
+        truncate_x = (self.firstphase_width - self.firstphase_width_truncated) // opt_f
+        truncate_y = (self.firstphase_height - self.firstphase_height_truncated) // opt_f
+        samples = samples[:, :, truncate_y//2:samples.shape[2]-truncate_y//2, truncate_x//2:samples.shape[3]-truncate_x//2]
+        if self.scale_latent:
+            samples = torch.nn.functional.interpolate(samples, size=(self.height // opt_f, self.width // opt_f), mode="bilinear")
+        else:
+            decoded_samples = decode_first_stage(self.sd_model, samples)
+            if opts.upscaler_for_img2img is None or opts.upscaler_for_img2img == "None":
+                decoded_samples = torch.nn.functional.interpolate(decoded_samples, size=(self.height, self.width), mode="bilinear")
+            else:
+                lowres_samples = torch.clamp((decoded_samples + 1.0) / 2.0, min=0.0, max=1.0)
+                batch_images = []
+                for i, x_sample in enumerate(lowres_samples):
+                    x_sample = 255. * np.moveaxis(x_sample.cpu().numpy(), 0, 2)
+                    x_sample = x_sample.astype(np.uint8)
+                    image = Image.fromarray(x_sample)
+                    image = images.resize_image(0, image, self.width, self.height)
+                    image = np.array(image).astype(np.float32) / 255.0
+                    image = np.moveaxis(image, 2, 0)
+                    batch_images.append(image)
+                decoded_samples = torch.from_numpy(np.array(batch_images))
+                decoded_samples = decoded_samples.to(shared.device)
+                decoded_samples = 2. * decoded_samples - 1.
+            samples = self.sd_model.get_first_stage_encoding(self.sd_model.encode_first_stage(decoded_samples))
+        shared.state.nextjob()
+        self.sampler = sd_samplers.create_sampler_with_index(sd_samplers.samplers, self.sampler_index, self.sd_model)
+        noise = create_random_tensors(samples.shape[1:], seeds=seeds, subseeds=subseeds, subseed_strength=subseed_strength, seed_resize_from_h=self.seed_resize_from_h, seed_resize_from_w=self.seed_resize_from_w, p=self)
+        # GC now before running the next img2img to prevent running out of memory
+        x = None
+        devices.torch_gc()
+        samples = self.sampler.sample_img2img(self, samples, noise, conditioning, unconditional_conditioning, steps=self.steps)
+        return samples
+class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
+    sampler = None
+    def __init__(self, init_images=None, resize_mode=0, denoising_strength=0.75, mask=None, mask_blur=4, inpainting_fill=0, inpaint_full_res=True, inpaint_full_res_padding=0, inpainting_mask_invert=0, **kwargs):
+        super().__init__(**kwargs)
+        self.init_images = init_images
+        self.resize_mode: int = resize_mode
+        self.denoising_strength: float = denoising_strength
+        self.init_latent = None
+        self.image_mask = mask
+        #self.image_unblurred_mask = None
+        self.latent_mask = None
+        self.mask_for_overlay = None
+        self.mask_blur = mask_blur
+        self.inpainting_fill = inpainting_fill
+        self.inpaint_full_res = inpaint_full_res
+        self.inpaint_full_res_padding = inpaint_full_res_padding
+        self.inpainting_mask_invert = inpainting_mask_invert
+        self.mask = None
+        self.nmask = None
+    def init(self, all_prompts, all_seeds, all_subseeds):
+        self.sampler = sd_samplers.create_sampler_with_index(sd_samplers.samplers_for_img2img, self.sampler_index, self.sd_model)
+        crop_region = None
+        if self.image_mask is not None:
+            self.image_mask = self.image_mask.convert('L')
+            if self.inpainting_mask_invert:
+                self.image_mask = ImageOps.invert(self.image_mask)
+            #self.image_unblurred_mask = self.image_mask
+            if self.mask_blur > 0:
+                self.image_mask = self.image_mask.filter(ImageFilter.GaussianBlur(self.mask_blur))
+            if self.inpaint_full_res:
+                self.mask_for_overlay = self.image_mask
+                mask = self.image_mask.convert('L')
+                crop_region = masking.get_crop_region(np.array(mask), self.inpaint_full_res_padding)
+                crop_region = masking.expand_crop_region(crop_region, self.width, self.height, mask.width, mask.height)
+                x1, y1, x2, y2 = crop_region
+                mask = mask.crop(crop_region)
+                self.image_mask = images.resize_image(2, mask, self.width, self.height)
+                self.paste_to = (x1, y1, x2-x1, y2-y1)
+            else:
+                self.image_mask = images.resize_image(self.resize_mode, self.image_mask, self.width, self.height)
+                np_mask = np.array(self.image_mask)
+                np_mask = np.clip((np_mask.astype(np.float32)) * 2, 0, 255).astype(np.uint8)
+                self.mask_for_overlay = Image.fromarray(np_mask)
+            self.overlay_images = []
+        latent_mask = self.latent_mask if self.latent_mask is not None else self.image_mask
+        add_color_corrections = opts.img2img_color_correction and self.color_corrections is None
+        if add_color_corrections:
+            self.color_corrections = []
+        imgs = []
+        for img in self.init_images:
+            image = img.convert("RGB")
+            if crop_region is None:
+                image = images.resize_image(self.resize_mode, image, self.width, self.height)
+            if self.image_mask is not None:
+                image_masked = Image.new('RGBa', (image.width, image.height))
+                image_masked.paste(image.convert("RGBA").convert("RGBa"), mask=ImageOps.invert(self.mask_for_overlay.convert('L')))
+                self.overlay_images.append(image_masked.convert('RGBA'))
+            if crop_region is not None:
+                image = image.crop(crop_region)
+                image = images.resize_image(2, image, self.width, self.height)
+            if self.image_mask is not None:
+                if self.inpainting_fill != 1:
+                    image = masking.fill(image, latent_mask)
+            if add_color_corrections:
+                self.color_corrections.append(setup_color_correction(image))
+            image = np.array(image).astype(np.float32) / 255.0
+            image = np.moveaxis(image, 2, 0)
+            imgs.append(image)
+        if len(imgs) == 1:
+            batch_images = np.expand_dims(imgs[0], axis=0).repeat(self.batch_size, axis=0)
+            if self.overlay_images is not None:
+                self.overlay_images = self.overlay_images * self.batch_size
+        elif len(imgs) <= self.batch_size:
+            self.batch_size = len(imgs)
+            batch_images = np.array(imgs)
+        else:
+            raise RuntimeError(f"bad number of images passed: {len(imgs)}; expecting {self.batch_size} or less")
+        image = torch.from_numpy(batch_images)
+        image = 2. * image - 1.
+        image = image.to(shared.device)
+        self.init_latent = self.sd_model.get_first_stage_encoding(self.sd_model.encode_first_stage(image))
+        if self.image_mask is not None:
+            init_mask = latent_mask
+            latmask = init_mask.convert('RGB').resize((self.init_latent.shape[3], self.init_latent.shape[2]))
+            latmask = np.moveaxis(np.array(latmask, dtype=np.float32), 2, 0) / 255
+            latmask = latmask[0]
+            latmask = np.around(latmask)
+            latmask = np.tile(latmask[None], (4, 1, 1))
+            self.mask = torch.asarray(1.0 - latmask).to(shared.device).type(self.sd_model.dtype)
+            self.nmask = torch.asarray(latmask).to(shared.device).type(self.sd_model.dtype)
+            # this needs to be fixed to be done in sample() using actual seeds for batches
+            if self.inpainting_fill == 2:
+                self.init_latent = self.init_latent * self.mask + create_random_tensors(self.init_latent.shape[1:], all_seeds[0:self.init_latent.shape[0]]) * self.nmask
+            elif self.inpainting_fill == 3:
+                self.init_latent = self.init_latent * self.mask
+    def sample(self, conditioning, unconditional_conditioning, seeds, subseeds, subseed_strength):
+        x = create_random_tensors([opt_C, self.height // opt_f, self.width // opt_f], seeds=seeds, subseeds=subseeds, subseed_strength=self.subseed_strength, seed_resize_from_h=self.seed_resize_from_h, seed_resize_from_w=self.seed_resize_from_w, p=self)
+        samples = self.sampler.sample_img2img(self, self.init_latent, x, conditioning, unconditional_conditioning)
+        if self.mask is not None:
+            samples = samples * self.nmask + self.init_latent * self.mask
+        del x
+        devices.torch_gc()
+        return samples

modules/prompt_parser.py ADDED Viewed

	@@ -0,0 +1,366 @@

+import re
+from collections import namedtuple
+from typing import List
+import lark
+# a prompt like this: "fantasy landscape with a [mountain:lake:0.25] and [an oak:a christmas tree:0.75][ in foreground::0.6][ in background:0.25] [shoddy:masterful:0.5]"
+# will be represented with prompt_schedule like this (assuming steps=100):
+# [25, 'fantasy landscape with a mountain and an oak in foreground shoddy']
+# [50, 'fantasy landscape with a lake and an oak in foreground in background shoddy']
+# [60, 'fantasy landscape with a lake and an oak in foreground in background masterful']
+# [75, 'fantasy landscape with a lake and an oak in background masterful']
+# [100, 'fantasy landscape with a lake and a christmas tree in background masterful']
+schedule_parser = lark.Lark(r"""
+!start: (prompt | /[][():]/+)*
+prompt: (emphasized | scheduled | alternate | plain | WHITESPACE)*
+!emphasized: "(" prompt ")"
+        | "(" prompt ":" prompt ")"
+        | "[" prompt "]"
+scheduled: "[" [prompt ":"] prompt ":" [WHITESPACE] NUMBER "]"
+alternate: "[" prompt ("|" prompt)+ "]"
+WHITESPACE: /\s+/
+plain: /([^\\\[\]():|]|\\.)+/
+%import common.SIGNED_NUMBER -> NUMBER
+""")
+def get_learned_conditioning_prompt_schedules(prompts, steps):
+    """
+    >>> g = lambda p: get_learned_conditioning_prompt_schedules([p], 10)[0]
+    >>> g("test")
+    [[10, 'test']]
+    >>> g("a [b:3]")
+    [[3, 'a '], [10, 'a b']]
+    >>> g("a [b: 3]")
+    [[3, 'a '], [10, 'a b']]
+    >>> g("a [[[b]]:2]")
+    [[2, 'a '], [10, 'a [[b]]']]
+    >>> g("[(a:2):3]")
+    [[3, ''], [10, '(a:2)']]
+    >>> g("a [b : c : 1] d")
+    [[1, 'a b  d'], [10, 'a  c  d']]
+    >>> g("a[b:[c:d:2]:1]e")
+    [[1, 'abe'], [2, 'ace'], [10, 'ade']]
+    >>> g("a [unbalanced")
+    [[10, 'a [unbalanced']]
+    >>> g("a [b:.5] c")
+    [[5, 'a  c'], [10, 'a b c']]
+    >>> g("a [{b|d{:.5] c")  # not handling this right now
+    [[5, 'a  c'], [10, 'a {b|d{ c']]
+    >>> g("((a][:b:c [d:3]")
+    [[3, '((a][:b:c '], [10, '((a][:b:c d']]
+    """
+    def collect_steps(steps, tree):
+        l = [steps]
+        class CollectSteps(lark.Visitor):
+            def scheduled(self, tree):
+                tree.children[-1] = float(tree.children[-1])
+                if tree.children[-1] < 1:
+                    tree.children[-1] *= steps
+                tree.children[-1] = min(steps, int(tree.children[-1]))
+                l.append(tree.children[-1])
+            def alternate(self, tree):
+                l.extend(range(1, steps+1))
+        CollectSteps().visit(tree)
+        return sorted(set(l))
+    def at_step(step, tree):
+        class AtStep(lark.Transformer):
+            def scheduled(self, args):
+                before, after, _, when = args
+                yield before or () if step <= when else after
+            def alternate(self, args):
+                yield next(args[(step - 1)%len(args)])
+            def start(self, args):
+                def flatten(x):
+                    if type(x) == str:
+                        yield x
+                    else:
+                        for gen in x:
+                            yield from flatten(gen)
+                return ''.join(flatten(args))
+            def plain(self, args):
+                yield args[0].value
+            def __default__(self, data, children, meta):
+                for child in children:
+                    yield from child
+        return AtStep().transform(tree)
+    def get_schedule(prompt):
+        try:
+            tree = schedule_parser.parse(prompt)
+        except lark.exceptions.LarkError as e:
+            if 0:
+                import traceback
+                traceback.print_exc()
+            return [[steps, prompt]]
+        return [[t, at_step(t, tree)] for t in collect_steps(steps, tree)]
+    promptdict = {prompt: get_schedule(prompt) for prompt in set(prompts)}
+    return [promptdict[prompt] for prompt in prompts]
+ScheduledPromptConditioning = namedtuple("ScheduledPromptConditioning", ["end_at_step", "cond"])
+def get_learned_conditioning(model, prompts, steps):
+    """converts a list of prompts into a list of prompt schedules - each schedule is a list of ScheduledPromptConditioning, specifying the comdition (cond),
+    and the sampling step at which this condition is to be replaced by the next one.
+    Input:
+    (model, ['a red crown', 'a [blue:green:5] jeweled crown'], 20)
+    Output:
+    [
+        [
+            ScheduledPromptConditioning(end_at_step=20, cond=tensor([[-0.3886,  0.0229, -0.0523,  ..., -0.4901, -0.3066,  0.0674], ..., [ 0.3317, -0.5102, -0.4066,  ...,  0.4119, -0.7647, -1.0160]], device='cuda:0'))
+        ],
+        [
+            ScheduledPromptConditioning(end_at_step=5, cond=tensor([[-0.3886,  0.0229, -0.0522,  ..., -0.4901, -0.3067,  0.0673], ..., [-0.0192,  0.3867, -0.4644,  ...,  0.1135, -0.3696, -0.4625]], device='cuda:0')),
+            ScheduledPromptConditioning(end_at_step=20, cond=tensor([[-0.3886,  0.0229, -0.0522,  ..., -0.4901, -0.3067,  0.0673], ..., [-0.7352, -0.4356, -0.7888,  ...,  0.6994, -0.4312, -1.2593]], device='cuda:0'))
+        ]
+    ]
+    """
+    res = []
+    prompt_schedules = get_learned_conditioning_prompt_schedules(prompts, steps)
+    cache = {}
+    for prompt, prompt_schedule in zip(prompts, prompt_schedules):
+        cached = cache.get(prompt, None)
+        if cached is not None:
+            res.append(cached)
+            continue
+        texts = [x[1] for x in prompt_schedule]
+        conds = model.get_learned_conditioning(texts)
+        cond_schedule = []
+        for i, (end_at_step, text) in enumerate(prompt_schedule):
+            cond_schedule.append(ScheduledPromptConditioning(end_at_step, conds[i]))
+        cache[prompt] = cond_schedule
+        res.append(cond_schedule)
+    return res
+re_AND = re.compile(r"\bAND\b")
+re_weight = re.compile(r"^(.*?)(?:\s*:\s*([-+]?(?:\d+\.?|\d*\.\d+)))?\s*$")
+def get_multicond_prompt_list(prompts):
+    res_indexes = []
+    prompt_flat_list = []
+    prompt_indexes = {}
+    for prompt in prompts:
+        subprompts = re_AND.split(prompt)
+        indexes = []
+        for subprompt in subprompts:
+            match = re_weight.search(subprompt)
+            text, weight = match.groups() if match is not None else (subprompt, 1.0)
+            weight = float(weight) if weight is not None else 1.0
+            index = prompt_indexes.get(text, None)
+            if index is None:
+                index = len(prompt_flat_list)
+                prompt_flat_list.append(text)
+                prompt_indexes[text] = index
+            indexes.append((index, weight))
+        res_indexes.append(indexes)
+    return res_indexes, prompt_flat_list, prompt_indexes
+class ComposableScheduledPromptConditioning:
+    def __init__(self, schedules, weight=1.0):
+        self.schedules: List[ScheduledPromptConditioning] = schedules
+        self.weight: float = weight
+class MulticondLearnedConditioning:
+    def __init__(self, shape, batch):
+        self.shape: tuple = shape  # the shape field is needed to send this object to DDIM/PLMS
+        self.batch: List[List[ComposableScheduledPromptConditioning]] = batch
+def get_multicond_learned_conditioning(model, prompts, steps) -> MulticondLearnedConditioning:
+    """same as get_learned_conditioning, but returns a list of ScheduledPromptConditioning along with the weight objects for each prompt.
+    For each prompt, the list is obtained by splitting the prompt using the AND separator.
+    https://energy-based-model.github.io/Compositional-Visual-Generation-with-Composable-Diffusion-Models/
+    """
+    res_indexes, prompt_flat_list, prompt_indexes = get_multicond_prompt_list(prompts)
+    learned_conditioning = get_learned_conditioning(model, prompt_flat_list, steps)
+    res = []
+    for indexes in res_indexes:
+        res.append([ComposableScheduledPromptConditioning(learned_conditioning[i], weight) for i, weight in indexes])
+    return MulticondLearnedConditioning(shape=(len(prompts),), batch=res)
+def reconstruct_cond_batch(c: List[List[ScheduledPromptConditioning]], current_step):
+    param = c[0][0].cond
+    res = torch.zeros((len(c),) + param.shape, device=param.device, dtype=param.dtype)
+    for i, cond_schedule in enumerate(c):
+        target_index = 0
+        for current, (end_at, cond) in enumerate(cond_schedule):
+            if current_step <= end_at:
+                target_index = current
+                break
+        res[i] = cond_schedule[target_index].cond
+    return res
+def reconstruct_multicond_batch(c: MulticondLearnedConditioning, current_step):
+    param = c.batch[0][0].schedules[0].cond
+    tensors = []
+    conds_list = []
+    for batch_no, composable_prompts in enumerate(c.batch):
+        conds_for_batch = []
+        for cond_index, composable_prompt in enumerate(composable_prompts):
+            target_index = 0
+            for current, (end_at, cond) in enumerate(composable_prompt.schedules):
+                if current_step <= end_at:
+                    target_index = current
+                    break
+            conds_for_batch.append((len(tensors), composable_prompt.weight))
+            tensors.append(composable_prompt.schedules[target_index].cond)
+        conds_list.append(conds_for_batch)
+    # if prompts have wildly different lengths above the limit we'll get tensors fo different shapes
+    # and won't be able to torch.stack them. So this fixes that.
+    token_count = max([x.shape[0] for x in tensors])
+    for i in range(len(tensors)):
+        if tensors[i].shape[0] != token_count:
+            last_vector = tensors[i][-1:]
+            last_vector_repeated = last_vector.repeat([token_count - tensors[i].shape[0], 1])
+            tensors[i] = torch.vstack([tensors[i], last_vector_repeated])
+    return conds_list, torch.stack(tensors).to(device=param.device, dtype=param.dtype)
+re_attention = re.compile(r"""
+\\\(|
+\\\)|
+\\\[|
+\\]|
+\\\\|
+\\|
+\(|
+\[|
+:([+-]?[.\d]+)\)|
+\)|
+]|
+[^\\()\[\]:]+|
+:
+""", re.X)
+def parse_prompt_attention(text):
+    """
+    Parses a string with attention tokens and returns a list of pairs: text and its assoicated weight.
+    Accepted tokens are:
+      (abc) - increases attention to abc by a multiplier of 1.1
+      (abc:3.12) - increases attention to abc by a multiplier of 3.12
+      [abc] - decreases attention to abc by a multiplier of 1.1
+      \( - literal character '('
+      \[ - literal character '['
+      \) - literal character ')'
+      \] - literal character ']'
+      \\ - literal character '\'
+      anything else - just text
+    >>> parse_prompt_attention('normal text')
+    [['normal text', 1.0]]
+    >>> parse_prompt_attention('an (important) word')
+    [['an ', 1.0], ['important', 1.1], [' word', 1.0]]
+    >>> parse_prompt_attention('(unbalanced')
+    [['unbalanced', 1.1]]
+    >>> parse_prompt_attention('\(literal\]')
+    [['(literal]', 1.0]]
+    >>> parse_prompt_attention('(unnecessary)(parens)')
+    [['unnecessaryparens', 1.1]]
+    >>> parse_prompt_attention('a (((house:1.3)) [on] a (hill:0.5), sun, (((sky))).')
+    [['a ', 1.0],
+     ['house', 1.5730000000000004],
+     [' ', 1.1],
+     ['on', 1.0],
+     [' a ', 1.1],
+     ['hill', 0.55],
+     [', sun, ', 1.1],
+     ['sky', 1.4641000000000006],
+     ['.', 1.1]]
+    """
+    res = []
+    round_brackets = []
+    square_brackets = []
+    round_bracket_multiplier = 1.1
+    square_bracket_multiplier = 1 / 1.1
+    def multiply_range(start_position, multiplier):
+        for p in range(start_position, len(res)):
+            res[p][1] *= multiplier
+    for m in re_attention.finditer(text):
+        text = m.group(0)
+        weight = m.group(1)
+        if text.startswith('\\'):
+            res.append([text[1:], 1.0])
+        elif text == '(':
+            round_brackets.append(len(res))
+        elif text == '[':
+            square_brackets.append(len(res))
+        elif weight is not None and len(round_brackets) > 0:
+            multiply_range(round_brackets.pop(), float(weight))
+        elif text == ')' and len(round_brackets) > 0:
+            multiply_range(round_brackets.pop(), round_bracket_multiplier)
+        elif text == ']' and len(square_brackets) > 0:
+            multiply_range(square_brackets.pop(), square_bracket_multiplier)
+        else:
+            res.append([text, 1.0])
+    for pos in round_brackets:
+        multiply_range(pos, round_bracket_multiplier)
+    for pos in square_brackets:
+        multiply_range(pos, square_bracket_multiplier)
+    if len(res) == 0:
+        res = [["", 1.0]]
+    # merge runs of identical weights
+    i = 0
+    while i + 1 < len(res):
+        if res[i][1] == res[i + 1][1]:
+            res[i][0] += res[i + 1][0]
+            res.pop(i + 1)
+        else:
+            i += 1
+    return res
+if __name__ == "__main__":
+    import doctest
+    doctest.testmod(optionflags=doctest.NORMALIZE_WHITESPACE)
+else:
+    import torch  # doctest faster

modules/realesrgan_model.py ADDED Viewed

	@@ -0,0 +1,133 @@

+import os
+import sys
+import traceback
+import numpy as np
+from PIL import Image
+from basicsr.utils.download_util import load_file_from_url
+from realesrgan import RealESRGANer
+from modules.upscaler import Upscaler, UpscalerData
+from modules.shared import cmd_opts, opts
+class UpscalerRealESRGAN(Upscaler):
+    def __init__(self, path):
+        self.name = "RealESRGAN"
+        self.user_path = path
+        super().__init__()
+        try:
+            from basicsr.archs.rrdbnet_arch import RRDBNet
+            from realesrgan import RealESRGANer
+            from realesrgan.archs.srvgg_arch import SRVGGNetCompact
+            self.enable = True
+            self.scalers = []
+            scalers = self.load_models(path)
+            for scaler in scalers:
+                if scaler.name in opts.realesrgan_enabled_models:
+                    self.scalers.append(scaler)
+        except Exception:
+            print("Error importing Real-ESRGAN:", file=sys.stderr)
+            print(traceback.format_exc(), file=sys.stderr)
+            self.enable = False
+            self.scalers = []
+    def do_upscale(self, img, path):
+        if not self.enable:
+            return img
+        info = self.load_model(path)
+        if not os.path.exists(info.data_path):
+            print("Unable to load RealESRGAN model: %s" % info.name)
+            return img
+        upsampler = RealESRGANer(
+            scale=info.scale,
+            model_path=info.data_path,
+            model=info.model(),
+            half=not cmd_opts.no_half,
+            tile=opts.ESRGAN_tile,
+            tile_pad=opts.ESRGAN_tile_overlap,
+        )
+        upsampled = upsampler.enhance(np.array(img), outscale=info.scale)[0]
+        image = Image.fromarray(upsampled)
+        return image
+    def load_model(self, path):
+        try:
+            info = None
+            for scaler in self.scalers:
+                if scaler.data_path == path:
+                    info = scaler
+            if info is None:
+                print(f"Unable to find model info: {path}")
+                return None
+            model_file = load_file_from_url(url=info.data_path, model_dir=self.model_path, progress=True)
+            info.data_path = model_file
+            return info
+        except Exception as e:
+            print(f"Error making Real-ESRGAN models list: {e}", file=sys.stderr)
+            print(traceback.format_exc(), file=sys.stderr)
+        return None
+    def load_models(self, _):
+        return get_realesrgan_models(self)
+def get_realesrgan_models(scaler):
+    try:
+        from basicsr.archs.rrdbnet_arch import RRDBNet
+        from realesrgan.archs.srvgg_arch import SRVGGNetCompact
+        models = [
+            UpscalerData(
+                name="R-ESRGAN General 4xV3",
+                path="https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-general-x4v3.pth",
+                scale=4,
+                upscaler=scaler,
+                model=lambda: SRVGGNetCompact(num_in_ch=3, num_out_ch=3, num_feat=64, num_conv=32, upscale=4, act_type='prelu')
+            ),
+            UpscalerData(
+                name="R-ESRGAN General WDN 4xV3",
+                path="https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-general-wdn-x4v3.pth",
+                scale=4,
+                upscaler=scaler,
+                model=lambda: SRVGGNetCompact(num_in_ch=3, num_out_ch=3, num_feat=64, num_conv=32, upscale=4, act_type='prelu')
+            ),
+            UpscalerData(
+                name="R-ESRGAN AnimeVideo",
+                path="https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-animevideov3.pth",
+                scale=4,
+                upscaler=scaler,
+                model=lambda: SRVGGNetCompact(num_in_ch=3, num_out_ch=3, num_feat=64, num_conv=16, upscale=4, act_type='prelu')
+            ),
+            UpscalerData(
+                name="R-ESRGAN 4x+",
+                path="https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth",
+                scale=4,
+                upscaler=scaler,
+                model=lambda: RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4)
+            ),
+            UpscalerData(
+                name="R-ESRGAN 4x+ Anime6B",
+                path="https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.2.4/RealESRGAN_x4plus_anime_6B.pth",
+                scale=4,
+                upscaler=scaler,
+                model=lambda: RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=6, num_grow_ch=32, scale=4)
+            ),
+            UpscalerData(
+                name="R-ESRGAN 2x+",
+                path="https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.1/RealESRGAN_x2plus.pth",
+                scale=2,
+                upscaler=scaler,
+                model=lambda: RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=2)
+            ),
+        ]
+        return models
+    except Exception as e:
+        print("Error making Real-ESRGAN models list:", file=sys.stderr)
+        print(traceback.format_exc(), file=sys.stderr)

modules/safe.py ADDED Viewed

	@@ -0,0 +1,110 @@

+# this code is adapted from the script contributed by anon from /h/
+import io
+import pickle
+import collections
+import sys
+import traceback
+import torch
+import numpy
+import _codecs
+import zipfile
+import re
+# PyTorch 1.13 and later have _TypedStorage renamed to TypedStorage
+TypedStorage = torch.storage.TypedStorage if hasattr(torch.storage, 'TypedStorage') else torch.storage._TypedStorage
+def encode(*args):
+    out = _codecs.encode(*args)
+    return out
+class RestrictedUnpickler(pickle.Unpickler):
+    def persistent_load(self, saved_id):
+        assert saved_id[0] == 'storage'
+        return TypedStorage()
+    def find_class(self, module, name):
+        if module == 'collections' and name == 'OrderedDict':
+            return getattr(collections, name)
+        if module == 'torch._utils' and name in ['_rebuild_tensor_v2', '_rebuild_parameter']:
+            return getattr(torch._utils, name)
+        if module == 'torch' and name in ['FloatStorage', 'HalfStorage', 'IntStorage', 'LongStorage', 'DoubleStorage']:
+            return getattr(torch, name)
+        if module == 'torch.nn.modules.container' and name in ['ParameterDict']:
+            return getattr(torch.nn.modules.container, name)
+        if module == 'numpy.core.multiarray' and name == 'scalar':
+            return numpy.core.multiarray.scalar
+        if module == 'numpy' and name == 'dtype':
+            return numpy.dtype
+        if module == '_codecs' and name == 'encode':
+            return encode
+        if module == "pytorch_lightning.callbacks" and name == 'model_checkpoint':
+            import pytorch_lightning.callbacks
+            return pytorch_lightning.callbacks.model_checkpoint
+        if module == "pytorch_lightning.callbacks.model_checkpoint" and name == 'ModelCheckpoint':
+            import pytorch_lightning.callbacks.model_checkpoint
+            return pytorch_lightning.callbacks.model_checkpoint.ModelCheckpoint
+        if module == "__builtin__" and name == 'set':
+            return set
+        # Forbid everything else.
+        raise pickle.UnpicklingError(f"global '{module}/{name}' is forbidden")
+allowed_zip_names = ["archive/data.pkl", "archive/version"]
+allowed_zip_names_re = re.compile(r"^archive/data/\d+$")
+def check_zip_filenames(filename, names):
+    for name in names:
+        if name in allowed_zip_names:
+            continue
+        if allowed_zip_names_re.match(name):
+            continue
+        raise Exception(f"bad file inside {filename}: {name}")
+def check_pt(filename):
+    try:
+        # new pytorch format is a zip file
+        with zipfile.ZipFile(filename) as z:
+            check_zip_filenames(filename, z.namelist())
+            with z.open('archive/data.pkl') as file:
+                unpickler = RestrictedUnpickler(file)
+                unpickler.load()
+    except zipfile.BadZipfile:
+        # if it's not a zip file, it's an olf pytorch format, with five objects written to pickle
+        with open(filename, "rb") as file:
+            unpickler = RestrictedUnpickler(file)
+            for i in range(5):
+                unpickler.load()
+def load(filename, *args, **kwargs):
+    from modules import shared
+    try:
+        if not shared.cmd_opts.disable_safe_unpickle:
+            check_pt(filename)
+    except Exception:
+        print(f"Error verifying pickled file from {filename}:", file=sys.stderr)
+        print(traceback.format_exc(), file=sys.stderr)
+        print(f"\nThe file may be malicious, so the program is not going to read it.", file=sys.stderr)
+        print(f"You can skip this check with --disable-safe-unpickle commandline argument.", file=sys.stderr)
+        return None
+    return unsafe_torch_load(filename, *args, **kwargs)
+unsafe_torch_load = torch.load
+torch.load = load

modules/safety.py ADDED Viewed

	@@ -0,0 +1,42 @@

+import torch
+from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
+from transformers import AutoFeatureExtractor
+from PIL import Image
+import modules.shared as shared
+safety_model_id = "CompVis/stable-diffusion-safety-checker"
+safety_feature_extractor = None
+safety_checker = None
+def numpy_to_pil(images):
+    """
+    Convert a numpy image or a batch of images to a PIL image.
+    """
+    if images.ndim == 3:
+        images = images[None, ...]
+    images = (images * 255).round().astype("uint8")
+    pil_images = [Image.fromarray(image) for image in images]
+    return pil_images
+# check and replace nsfw content
+def check_safety(x_image):
+    global safety_feature_extractor, safety_checker
+    if safety_feature_extractor is None:
+        safety_feature_extractor = AutoFeatureExtractor.from_pretrained(safety_model_id)
+        safety_checker = StableDiffusionSafetyChecker.from_pretrained(safety_model_id)
+    safety_checker_input = safety_feature_extractor(numpy_to_pil(x_image), return_tensors="pt")
+    x_checked_image, has_nsfw_concept = safety_checker(images=x_image, clip_input=safety_checker_input.pixel_values)
+    return x_checked_image, has_nsfw_concept
+def censor_batch(x):
+    x_samples_ddim_numpy = x.cpu().permute(0, 2, 3, 1).numpy()
+    x_checked_image, has_nsfw_concept = check_safety(x_samples_ddim_numpy)
+    x = torch.from_numpy(x_checked_image).permute(0, 3, 1, 2)
+    return x

modules/scripts.py ADDED Viewed

	@@ -0,0 +1,201 @@

+import os
+import sys
+import traceback
+import modules.ui as ui
+import gradio as gr
+from modules.processing import StableDiffusionProcessing
+from modules import shared
+class Script:
+    filename = None
+    args_from = None
+    args_to = None
+    # The title of the script. This is what will be displayed in the dropdown menu.
+    def title(self):
+        raise NotImplementedError()
+    # How the script is displayed in the UI. See https://gradio.app/docs/#components
+    # for the different UI components you can use and how to create them.
+    # Most UI components can return a value, such as a boolean for a checkbox.
+    # The returned values are passed to the run method as parameters.
+    def ui(self, is_img2img):
+        pass
+    # Determines when the script should be shown in the dropdown menu via the
+    # returned value. As an example:
+    # is_img2img is True if the current tab is img2img, and False if it is txt2img.
+    # Thus, return is_img2img to only show the script on the img2img tab.
+    def show(self, is_img2img):
+        return True
+    # This is where the additional processing is implemented. The parameters include
+    # self, the model object "p" (a StableDiffusionProcessing class, see
+    # processing.py), and the parameters returned by the ui method.
+    # Custom functions can be defined here, and additional libraries can be imported
+    # to be used in processing. The return value should be a Processed object, which is
+    # what is returned by the process_images method.
+    def run(self, *args):
+        raise NotImplementedError()
+    # The description method is currently unused.
+    # To add a description that appears when hovering over the title, amend the "titles"
+    # dict in script.js to include the script title (returned by title) as a key, and
+    # your description as the value.
+    def describe(self):
+        return ""
+scripts_data = []
+def load_scripts(basedir):
+    if not os.path.exists(basedir):
+        return
+    for filename in sorted(os.listdir(basedir)):
+        path = os.path.join(basedir, filename)
+        if not os.path.isfile(path):
+            continue
+        try:
+            with open(path, "r", encoding="utf8") as file:
+                text = file.read()
+            from types import ModuleType
+            compiled = compile(text, path, 'exec')
+            module = ModuleType(filename)
+            exec(compiled, module.__dict__)
+            for key, script_class in module.__dict__.items():
+                if type(script_class) == type and issubclass(script_class, Script):
+                    scripts_data.append((script_class, path))
+        except Exception:
+            print(f"Error loading script: {filename}", file=sys.stderr)
+            print(traceback.format_exc(), file=sys.stderr)
+def wrap_call(func, filename, funcname, *args, default=None, **kwargs):
+    try:
+        res = func(*args, **kwargs)
+        return res
+    except Exception:
+        print(f"Error calling: {filename}/{funcname}", file=sys.stderr)
+        print(traceback.format_exc(), file=sys.stderr)
+    return default
+class ScriptRunner:
+    def __init__(self):
+        self.scripts = []
+    def setup_ui(self, is_img2img):
+        for script_class, path in scripts_data:
+            script = script_class()
+            script.filename = path
+            if not script.show(is_img2img):
+                continue
+            self.scripts.append(script)
+        titles = [wrap_call(script.title, script.filename, "title") or f"{script.filename} [error]" for script in self.scripts]
+        dropdown = gr.Dropdown(label="Script", choices=["None"] + titles, value="None", type="index")
+        inputs = [dropdown]
+        for script in self.scripts:
+            script.args_from = len(inputs)
+            script.args_to = len(inputs)
+            controls = wrap_call(script.ui, script.filename, "ui", is_img2img)
+            if controls is None:
+                continue
+            for control in controls:
+                control.custom_script_source = os.path.basename(script.filename)
+                control.visible = False
+            inputs += controls
+            script.args_to = len(inputs)
+        def select_script(script_index):
+            if 0 < script_index <= len(self.scripts):
+                script = self.scripts[script_index-1]
+                args_from = script.args_from
+                args_to = script.args_to
+            else:
+                args_from = 0
+                args_to = 0
+            return [ui.gr_show(True if i == 0 else args_from <= i < args_to) for i in range(len(inputs))]
+        dropdown.change(
+            fn=select_script,
+            inputs=[dropdown],
+            outputs=inputs
+        )
+        return inputs
+    def run(self, p: StableDiffusionProcessing, *args):
+        script_index = args[0]
+        if script_index == 0:
+            return None
+        script = self.scripts[script_index-1]
+        if script is None:
+            return None
+        script_args = args[script.args_from:script.args_to]
+        processed = script.run(p, *script_args)
+        shared.total_tqdm.clear()
+        return processed
+    def reload_sources(self):
+        for si, script in list(enumerate(self.scripts)):
+            with open(script.filename, "r", encoding="utf8") as file:
+                args_from = script.args_from
+                args_to = script.args_to
+                filename = script.filename
+                text = file.read()
+                from types import ModuleType
+                compiled = compile(text, filename, 'exec')
+                module = ModuleType(script.filename)
+                exec(compiled, module.__dict__)
+                for key, script_class in module.__dict__.items():
+                    if type(script_class) == type and issubclass(script_class, Script):
+                        self.scripts[si] = script_class()
+                        self.scripts[si].filename = filename
+                        self.scripts[si].args_from = args_from
+                        self.scripts[si].args_to = args_to
+scripts_txt2img = ScriptRunner()
+scripts_img2img = ScriptRunner()
+def reload_script_body_only():
+    scripts_txt2img.reload_sources()
+    scripts_img2img.reload_sources()
+def reload_scripts(basedir):
+    global scripts_txt2img, scripts_img2img
+    scripts_data.clear()
+    load_scripts(basedir)
+    scripts_txt2img = ScriptRunner()
+    scripts_img2img = ScriptRunner()