3 files changed, 579 insertions, 1 deletions
diff --git a/extensions-builtin/LDSR/sd_hijack_autoencoder.py b/extensions-builtin/LDSR/sd_hijack_autoencoder.py
index 81c5101b..27a86e13 100644
--- a/extensions-builtin/LDSR/sd_hijack_autoencoder.py
+++ b/extensions-builtin/LDSR/sd_hijack_autoencoder.py
@@ -10,7 +10,7 @@ from contextlib import contextmanager
 from torch.optim.lr_scheduler import LambdaLR
 
 from ldm.modules.ema import LitEma
-from taming.modules.vqvae.quantize import VectorQuantizer2 as VectorQuantizer
+from vqvae_quantize import VectorQuantizer2 as VectorQuantizer
 from ldm.modules.diffusionmodules.model import Encoder, Decoder
 from ldm.util import instantiate_from_config
 
diff --git a/extensions-builtin/LDSR/vqvae_quantize.py b/extensions-builtin/LDSR/vqvae_quantize.py
new file mode 100644
index 00000000..dd14b8fd
--- /dev/null
+++ b/extensions-builtin/LDSR/vqvae_quantize.py
@@ -0,0 +1,147 @@
+# Vendored from https://raw.githubusercontent.com/CompVis/taming-transformers/24268930bf1dce879235a7fddd0b2355b84d7ea6/taming/modules/vqvae/quantize.py,
+# where the license is as follows:
+#
+# Copyright (c) 2020 Patrick Esser and Robin Rombach and Björn Ommer
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
+# OR OTHER DEALINGS IN THE SOFTWARE./
+
+import torch
+import torch.nn as nn
+import numpy as np
+from einops import rearrange
+
+
+class VectorQuantizer2(nn.Module):
+    """
+    Improved version over VectorQuantizer, can be used as a drop-in replacement. Mostly
+    avoids costly matrix multiplications and allows for post-hoc remapping of indices.
+    """
+
+    # NOTE: due to a bug the beta term was applied to the wrong term. for
+    # backwards compatibility we use the buggy version by default, but you can
+    # specify legacy=False to fix it.
+    def __init__(self, n_e, e_dim, beta, remap=None, unknown_index="random",
+                 sane_index_shape=False, legacy=True):
+        super().__init__()
+        self.n_e = n_e
+        self.e_dim = e_dim
+        self.beta = beta
+        self.legacy = legacy
+
+        self.embedding = nn.Embedding(self.n_e, self.e_dim)
+        self.embedding.weight.data.uniform_(-1.0 / self.n_e, 1.0 / self.n_e)
+
+        self.remap = remap
+        if self.remap is not None:
+            self.register_buffer("used", torch.tensor(np.load(self.remap)))
+            self.re_embed = self.used.shape[0]
+            self.unknown_index = unknown_index  # "random" or "extra" or integer
+            if self.unknown_index == "extra":
+                self.unknown_index = self.re_embed
+                self.re_embed = self.re_embed + 1
+            print(f"Remapping {self.n_e} indices to {self.re_embed} indices. "
+                  f"Using {self.unknown_index} for unknown indices.")
+        else:
+            self.re_embed = n_e
+
+        self.sane_index_shape = sane_index_shape
+
+    def remap_to_used(self, inds):
+        ishape = inds.shape
+        assert len(ishape) > 1
+        inds = inds.reshape(ishape[0], -1)
+        used = self.used.to(inds)
+        match = (inds[:, :, None] == used[None, None, ...]).long()
+        new = match.argmax(-1)
+        unknown = match.sum(2) < 1
+        if self.unknown_index == "random":
+            new[unknown] = torch.randint(0, self.re_embed, size=new[unknown].shape).to(device=new.device)
+        else:
+            new[unknown] = self.unknown_index
+        return new.reshape(ishape)
+
+    def unmap_to_all(self, inds):
+        ishape = inds.shape
+        assert len(ishape) > 1
+        inds = inds.reshape(ishape[0], -1)
+        used = self.used.to(inds)
+        if self.re_embed > self.used.shape[0]:  # extra token
+            inds[inds >= self.used.shape[0]] = 0  # simply set to zero
+        back = torch.gather(used[None, :][inds.shape[0] * [0], :], 1, inds)
+        return back.reshape(ishape)
+
+    def forward(self, z, temp=None, rescale_logits=False, return_logits=False):
+        assert temp is None or temp == 1.0, "Only for interface compatible with Gumbel"
+        assert rescale_logits is False, "Only for interface compatible with Gumbel"
+        assert return_logits is False, "Only for interface compatible with Gumbel"
+        # reshape z -> (batch, height, width, channel) and flatten
+        z = rearrange(z, 'b c h w -> b h w c').contiguous()
+        z_flattened = z.view(-1, self.e_dim)
+        # distances from z to embeddings e_j (z - e)^2 = z^2 + e^2 - 2 e * z
+
+        d = torch.sum(z_flattened ** 2, dim=1, keepdim=True) + \
+            torch.sum(self.embedding.weight ** 2, dim=1) - 2 * \
+            torch.einsum('bd,dn->bn', z_flattened, rearrange(self.embedding.weight, 'n d -> d n'))
+
+        min_encoding_indices = torch.argmin(d, dim=1)
+        z_q = self.embedding(min_encoding_indices).view(z.shape)
+        perplexity = None
+        min_encodings = None
+
+        # compute loss for embedding
+        if not self.legacy:
+            loss = self.beta * torch.mean((z_q.detach() - z) ** 2) + \
+                   torch.mean((z_q - z.detach()) ** 2)
+        else:
+            loss = torch.mean((z_q.detach() - z) ** 2) + self.beta * \
+                   torch.mean((z_q - z.detach()) ** 2)
+
+        # preserve gradients
+        z_q = z + (z_q - z).detach()
+
+        # reshape back to match original input shape
+        z_q = rearrange(z_q, 'b h w c -> b c h w').contiguous()
+
+        if self.remap is not None:
+            min_encoding_indices = min_encoding_indices.reshape(z.shape[0], -1)  # add batch axis
+            min_encoding_indices = self.remap_to_used(min_encoding_indices)
+            min_encoding_indices = min_encoding_indices.reshape(-1, 1)  # flatten
+
+        if self.sane_index_shape:
+            min_encoding_indices = min_encoding_indices.reshape(
+                z_q.shape[0], z_q.shape[2], z_q.shape[3])
+
+        return z_q, loss, (perplexity, min_encodings, min_encoding_indices)
+
+    def get_codebook_entry(self, indices, shape):
+        # shape specifying (batch, height, width, channel)
+        if self.remap is not None:
+            indices = indices.reshape(shape[0], -1)  # add batch axis
+            indices = self.unmap_to_all(indices)
+            indices = indices.reshape(-1)  # flatten again
+
+        # get quantized latent vectors
+        z_q = self.embedding(indices)
+
+        if shape is not None:
+            z_q = z_q.view(shape)
+            # reshape back to match original input shape
+            z_q = z_q.permute(0, 3, 1, 2).contiguous()
+
+        return z_q
diff --git a/extensions-builtin/canvas-zoom-and-pan/javascript/zoom.js b/extensions-builtin/canvas-zoom-and-pan/javascript/zoom.js
new file mode 100644
index 00000000..f555960d
--- /dev/null
+++ b/extensions-builtin/canvas-zoom-and-pan/javascript/zoom.js
@@ -0,0 +1,431 @@
+// Main
+
+// Helper functions
+// Get active tab
+function getActiveTab(elements, all = false) {
+    const tabs = elements.img2imgTabs.querySelectorAll("button");
+
+    if (all) return tabs;
+
+    for (let tab of tabs) {
+        if (tab.classList.contains("selected")) {
+            return tab;
+        }
+    }
+}
+
+onUiLoaded(async() => {
+    const hotkeysConfig = {
+        resetZoom: "KeyR",
+        fitToScreen: "KeyS",
+        moveKey: "KeyF",
+        overlap: "KeyO"
+    };
+
+    let isMoving = false;
+    let mouseX, mouseY;
+
+    const elementIDs = {
+        sketch: "#img2img_sketch",
+        inpaint: "#img2maskimg",
+        inpaintSketch: "#inpaint_sketch",
+        img2imgTabs: "#mode_img2img .tab-nav"
+    };
+
+    async function getElements() {
+        const elements = await Promise.all(
+            Object.values(elementIDs).map(id => document.querySelector(id))
+        );
+        return Object.fromEntries(
+            Object.keys(elementIDs).map((key, index) => [key, elements[index]])
+        );
+    }
+
+    const elements = await getElements();
+
+    function applyZoomAndPan(targetElement, elemId) {
+        targetElement.style.transformOrigin = "0 0";
+        let [zoomLevel, panX, panY] = [1, 0, 0];
+        let fullScreenMode = false;
+
+        // In the course of research, it was found that the tag img is very harmful when zooming and creates white canvases. This hack allows you to almost never think about this problem, it has no effect on webui.
+        function fixCanvas() {
+            const activeTab = getActiveTab(elements).textContent.trim();
+
+            if (activeTab !== "img2img") {
+                const img = targetElement.querySelector(`${elemId} img`);
+
+                if (img && img.style.display !== "none") {
+                    img.style.display = "none";
+                    img.style.visibility = "hidden";
+                }
+            }
+        }
+
+        // Reset the zoom level and pan position of the target element to their initial values
+        function resetZoom() {
+            zoomLevel = 1;
+            panX = 0;
+            panY = 0;
+
+            fixCanvas();
+            targetElement.style.transform = `scale(${zoomLevel}) translate(${panX}px, ${panY}px)`;
+
+            const canvas = gradioApp().querySelector(
+                `${elemId} canvas[key="interface"]`
+            );
+
+            toggleOverlap("off");
+            fullScreenMode = false;
+
+            if (
+                canvas &&
+                parseFloat(canvas.style.width) > 865 &&
+                parseFloat(targetElement.style.width) > 865
+            ) {
+                fitToElement();
+                return;
+            }
+
+            targetElement.style.width = "";
+            if (canvas) {
+                targetElement.style.height = canvas.style.height;
+            }
+        }
+
+        // Toggle the zIndex of the target element between two values, allowing it to overlap or be overlapped by other elements
+        function toggleOverlap(forced = "") {
+            const zIndex1 = "0";
+            const zIndex2 = "998";
+
+            targetElement.style.zIndex =
+                targetElement.style.zIndex !== zIndex2 ? zIndex2 : zIndex1;
+
+            if (forced === "off") {
+                targetElement.style.zIndex = zIndex1;
+            } else if (forced === "on") {
+                targetElement.style.zIndex = zIndex2;
+            }
+        }
+
+        // Adjust the brush size based on the deltaY value from a mouse wheel event
+        function adjustBrushSize(
+            elemId,
+            deltaY,
+            withoutValue = false,
+            percentage = 5
+        ) {
+            const input =
+                gradioApp().querySelector(
+                    `${elemId} input[aria-label='Brush radius']`
+                ) ||
+                gradioApp().querySelector(
+                    `${elemId} button[aria-label="Use brush"]`
+                );
+
+            if (input) {
+                input.click();
+                if (!withoutValue) {
+                    const maxValue =
+                        parseFloat(input.getAttribute("max")) || 100;
+                    const changeAmount = maxValue * (percentage / 100);
+                    const newValue =
+                        parseFloat(input.value) +
+                        (deltaY > 0 ? -changeAmount : changeAmount);
+                    input.value = Math.min(Math.max(newValue, 0), maxValue);
+                    input.dispatchEvent(new Event("change"));
+                }
+            }
+        }
+
+        // Reset zoom when uploading a new image
+        const fileInput = gradioApp().querySelector(
+            `${elemId} input[type="file"][accept="image/*"].svelte-116rqfv`
+        );
+        fileInput.addEventListener("click", resetZoom);
+
+        // Update the zoom level and pan position of the target element based on the values of the zoomLevel, panX and panY variables
+        function updateZoom(newZoomLevel, mouseX, mouseY) {
+            newZoomLevel = Math.max(0.5, Math.min(newZoomLevel, 15));
+            panX += mouseX - (mouseX * newZoomLevel) / zoomLevel;
+            panY += mouseY - (mouseY * newZoomLevel) / zoomLevel;
+
+            targetElement.style.transformOrigin = "0 0";
+            targetElement.style.transform = `translate(${panX}px, ${panY}px) scale(${newZoomLevel})`;
+
+            toggleOverlap("on");
+            return newZoomLevel;
+        }
+
+        // Change the zoom level based on user interaction
+        function changeZoomLevel(operation, e) {
+            if (e.shiftKey) {
+                e.preventDefault();
+
+                let zoomPosX, zoomPosY;
+                let delta = 0.2;
+                if (zoomLevel > 7) {
+                    delta = 0.9;
+                } else if (zoomLevel > 2) {
+                    delta = 0.6;
+                }
+
+                zoomPosX = e.clientX;
+                zoomPosY = e.clientY;
+
+                fullScreenMode = false;
+                zoomLevel = updateZoom(
+                    zoomLevel + (operation === "+" ? delta : -delta),
+                    zoomPosX - targetElement.getBoundingClientRect().left,
+                    zoomPosY - targetElement.getBoundingClientRect().top
+                );
+            }
+        }
+
+        /**
+         * This function fits the target element to the screen by calculating
+         * the required scale and offsets. It also updates the global variables
+         * zoomLevel, panX, and panY to reflect the new state.
+         */
+
+        function fitToElement() {
+            //Reset Zoom
+            targetElement.style.transform = `translate(${0}px, ${0}px) scale(${1})`;
+
+            // Get element and screen dimensions
+            const elementWidth = targetElement.offsetWidth;
+            const elementHeight = targetElement.offsetHeight;
+            const parentElement = targetElement.parentElement;
+            const screenWidth = parentElement.clientWidth;
+            const screenHeight = parentElement.clientHeight;
+
+            // Get element's coordinates relative to the parent element
+            const elementRect = targetElement.getBoundingClientRect();
+            const parentRect = parentElement.getBoundingClientRect();
+            const elementX = elementRect.x - parentRect.x;
+
+            // Calculate scale and offsets
+            const scaleX = screenWidth / elementWidth;
+            const scaleY = screenHeight / elementHeight;
+            const scale = Math.min(scaleX, scaleY);
+
+            const transformOrigin =
+                window.getComputedStyle(targetElement).transformOrigin;
+            const [originX, originY] = transformOrigin.split(" ");
+            const originXValue = parseFloat(originX);
+            const originYValue = parseFloat(originY);
+
+            const offsetX =
+                (screenWidth - elementWidth * scale) / 2 -
+                originXValue * (1 - scale);
+            const offsetY =
+                (screenHeight - elementHeight * scale) / 2.5 -
+                originYValue * (1 - scale);
+
+            // Apply scale and offsets to the element
+            targetElement.style.transform = `translate(${offsetX}px, ${offsetY}px) scale(${scale})`;
+
+            // Update global variables
+            zoomLevel = scale;
+            panX = offsetX;
+            panY = offsetY;
+
+            fullScreenMode = false;
+            toggleOverlap("off");
+        }
+
+        /**
+         * This function fits the target element to the screen by calculating
+         * the required scale and offsets. It also updates the global variables
+         * zoomLevel, panX, and panY to reflect the new state.
+         */
+
+        // Fullscreen mode
+        function fitToScreen() {
+            const canvas = gradioApp().querySelector(
+                `${elemId} canvas[key="interface"]`
+            );
+
+            if (!canvas) return;
+
+            if (canvas.offsetWidth > 862) {
+                targetElement.style.width = canvas.offsetWidth + "px";
+            }
+
+            if (fullScreenMode) {
+                resetZoom();
+                fullScreenMode = false;
+                return;
+            }
+
+            //Reset Zoom
+            targetElement.style.transform = `translate(${0}px, ${0}px) scale(${1})`;
+
+            // Get scrollbar width to right-align the image
+            const scrollbarWidth = window.innerWidth - document.documentElement.clientWidth;
+
+            // Get element and screen dimensions
+            const elementWidth = targetElement.offsetWidth;
+            const elementHeight = targetElement.offsetHeight;
+            const screenWidth = window.innerWidth - scrollbarWidth;
+            const screenHeight = window.innerHeight;
+
+            // Get element's coordinates relative to the page
+            const elementRect = targetElement.getBoundingClientRect();
+            const elementY = elementRect.y;
+            const elementX = elementRect.x;
+
+            // Calculate scale and offsets
+            const scaleX = screenWidth / elementWidth;
+            const scaleY = screenHeight / elementHeight;
+            const scale = Math.min(scaleX, scaleY);
+
+            // Get the current transformOrigin
+            const computedStyle = window.getComputedStyle(targetElement);
+            const transformOrigin = computedStyle.transformOrigin;
+            const [originX, originY] = transformOrigin.split(" ");
+            const originXValue = parseFloat(originX);
+            const originYValue = parseFloat(originY);
+
+            // Calculate offsets with respect to the transformOrigin
+            const offsetX =
+                (screenWidth - elementWidth * scale) / 2 -
+                elementX -
+                originXValue * (1 - scale);
+            const offsetY =
+                (screenHeight - elementHeight * scale) / 2 -
+                elementY -
+                originYValue * (1 - scale);
+
+            // Apply scale and offsets to the element
+            targetElement.style.transform = `translate(${offsetX}px, ${offsetY}px) scale(${scale})`;
+
+            // Update global variables
+            zoomLevel = scale;
+            panX = offsetX;
+            panY = offsetY;
+
+            fullScreenMode = true;
+            toggleOverlap("on");
+        }
+
+        // Handle keydown events
+        function handleKeyDown(event) {
+            const hotkeyActions = {
+                [hotkeysConfig.resetZoom]: resetZoom,
+                [hotkeysConfig.overlap]: toggleOverlap,
+                [hotkeysConfig.fitToScreen]: fitToScreen
+                // [hotkeysConfig.moveKey] : moveCanvas,
+            };
+
+            const action = hotkeyActions[event.code];
+            if (action) {
+                event.preventDefault();
+                action(event);
+            }
+        }
+
+        // Get Mouse position
+        function getMousePosition(e) {
+            mouseX = e.offsetX;
+            mouseY = e.offsetY;
+        }
+
+        targetElement.addEventListener("mousemove", getMousePosition);
+
+        // Handle events only inside the targetElement
+        let isKeyDownHandlerAttached = false;
+
+        function handleMouseMove() {
+            if (!isKeyDownHandlerAttached) {
+                document.addEventListener("keydown", handleKeyDown);
+                isKeyDownHandlerAttached = true;
+            }
+        }
+
+        function handleMouseLeave() {
+            if (isKeyDownHandlerAttached) {
+                document.removeEventListener("keydown", handleKeyDown);
+                isKeyDownHandlerAttached = false;
+            }
+        }
+
+        // Add mouse event handlers
+        targetElement.addEventListener("mousemove", handleMouseMove);
+        targetElement.addEventListener("mouseleave", handleMouseLeave);
+
+        // Reset zoom when click on another tab
+        elements.img2imgTabs.addEventListener("click", resetZoom);
+        elements.img2imgTabs.addEventListener("click", () => {
+            // targetElement.style.width = "";
+            if (parseInt(targetElement.style.width) > 865) {
+                setTimeout(fitToElement, 0);
+            }
+        });
+
+        targetElement.addEventListener("wheel", e => {
+            // change zoom level
+            const operation = e.deltaY > 0 ? "-" : "+";
+            changeZoomLevel(operation, e);
+
+            // Handle brush size adjustment with ctrl key pressed
+            if (e.ctrlKey || e.metaKey) {
+                e.preventDefault();
+
+                // Increase or decrease brush size based on scroll direction
+                adjustBrushSize(elemId, e.deltaY);
+            }
+        });
+
+        /**
+         * Handle the move event for pan functionality. Updates the panX and panY variables and applies the new transform to the target element.
+         * @param {MouseEvent} e - The mouse event.
+         */
+        function handleMoveKeyDown(e) {
+            if (e.code === hotkeysConfig.moveKey) {
+                if (!e.ctrlKey && !e.metaKey) {
+                    isMoving = true;
+                }
+            }
+        }
+
+        function handleMoveKeyUp(e) {
+            if (e.code === hotkeysConfig.moveKey) {
+                isMoving = false;
+            }
+        }
+
+        document.addEventListener("keydown", handleMoveKeyDown);
+        document.addEventListener("keyup", handleMoveKeyUp);
+
+        // Detect zoom level and update the pan speed.
+        function updatePanPosition(movementX, movementY) {
+            let panSpeed = 1.5;
+
+            if (zoomLevel > 8) {
+                panSpeed = 2.5;
+            }
+
+            panX = panX + movementX * panSpeed;
+            panY = panY + movementY * panSpeed;
+
+            targetElement.style.transform = `translate(${panX}px, ${panY}px) scale(${zoomLevel})`;
+            toggleOverlap("on");
+        }
+
+        function handleMoveByKey(e) {
+            if (isMoving) {
+                updatePanPosition(e.movementX, e.movementY);
+                targetElement.style.pointerEvents = "none";
+            } else {
+                targetElement.style.pointerEvents = "auto";
+            }
+        }
+
+        gradioApp().addEventListener("mousemove", handleMoveByKey);
+    }
+
+    applyZoomAndPan(elements.sketch, elementIDs.sketch);
+    applyZoomAndPan(elements.inpaint, elementIDs.inpaint);
+    applyZoomAndPan(elements.inpaintSketch, elementIDs.inpaintSketch);
+});