path: root/extensions-builtin
diff options
Diffstat (limited to 'extensions-builtin')
10 files changed, 693 insertions, 2 deletions
diff --git a/extensions-builtin/Lora/lora_logger.py b/extensions-builtin/Lora/lora_logger.py
new file mode 100644
index 00000000..d51de297
--- /dev/null
+++ b/extensions-builtin/Lora/lora_logger.py
@@ -0,0 +1,33 @@
+import sys
+import copy
+import logging
+class ColoredFormatter(logging.Formatter):
+ COLORS = {
+ "DEBUG": "\033[0;36m", # CYAN
+ "INFO": "\033[0;32m", # GREEN
+ "WARNING": "\033[0;33m", # YELLOW
+ "ERROR": "\033[0;31m", # RED
+ "CRITICAL": "\033[0;37;41m", # WHITE ON RED
+ "RESET": "\033[0m", # RESET COLOR
+ }
+ def format(self, record):
+ colored_record = copy.copy(record)
+ levelname = colored_record.levelname
+ seq = self.COLORS.get(levelname, self.COLORS["RESET"])
+ colored_record.levelname = f"{seq}{levelname}{self.COLORS['RESET']}"
+ return super().format(colored_record)
+logger = logging.getLogger("lora")
+logger.propagate = False
+if not logger.handlers:
+ handler = logging.StreamHandler(sys.stdout)
+ handler.setFormatter(
+ ColoredFormatter("[%(name)s]-%(levelname)s: %(message)s")
+ )
+ logger.addHandler(handler)
diff --git a/extensions-builtin/Lora/lyco_helpers.py b/extensions-builtin/Lora/lyco_helpers.py
index 279b34bc..1679a0ce 100644
--- a/extensions-builtin/Lora/lyco_helpers.py
+++ b/extensions-builtin/Lora/lyco_helpers.py
@@ -19,3 +19,50 @@ def rebuild_cp_decomposition(up, down, mid):
up = up.reshape(up.size(0), -1)
down = down.reshape(down.size(0), -1)
return torch.einsum('n m k l, i n, m j -> i j k l', mid, up, down)
+# copied from https://github.com/KohakuBlueleaf/LyCORIS/blob/dev/lycoris/modules/lokr.py
+def factorization(dimension: int, factor:int=-1) -> tuple[int, int]:
+ '''
+ return a tuple of two value of input dimension decomposed by the number closest to factor
+ second value is higher or equal than first value.
+ In LoRA with Kroneckor Product, first value is a value for weight scale.
+ secon value is a value for weight.
+ Becuase of non-commutative property, A⊗B ≠ B⊗A. Meaning of two matrices is slightly different.
+ examples)
+ factor
+ -1 2 4 8 16 ...
+ 127 -> 1, 127 127 -> 1, 127 127 -> 1, 127 127 -> 1, 127 127 -> 1, 127
+ 128 -> 8, 16 128 -> 2, 64 128 -> 4, 32 128 -> 8, 16 128 -> 8, 16
+ 250 -> 10, 25 250 -> 2, 125 250 -> 2, 125 250 -> 5, 50 250 -> 10, 25
+ 360 -> 8, 45 360 -> 2, 180 360 -> 4, 90 360 -> 8, 45 360 -> 12, 30
+ 512 -> 16, 32 512 -> 2, 256 512 -> 4, 128 512 -> 8, 64 512 -> 16, 32
+ 1024 -> 32, 32 1024 -> 2, 512 1024 -> 4, 256 1024 -> 8, 128 1024 -> 16, 64
+ '''
+ if factor > 0 and (dimension % factor) == 0:
+ m = factor
+ n = dimension // factor
+ if m > n:
+ n, m = m, n
+ return m, n
+ if factor < 0:
+ factor = dimension
+ m, n = 1, dimension
+ length = m + n
+ while m<n:
+ new_m = m + 1
+ while dimension%new_m != 0:
+ new_m += 1
+ new_n = dimension // new_m
+ if new_m + new_n > length or new_m>factor:
+ break
+ else:
+ m, n = new_m, new_n
+ if m > n:
+ n, m = m, n
+ return m, n
diff --git a/extensions-builtin/Lora/network.py b/extensions-builtin/Lora/network.py
index d8e8dfb7..6021fd8d 100644
--- a/extensions-builtin/Lora/network.py
+++ b/extensions-builtin/Lora/network.py
@@ -93,6 +93,7 @@ class Network: # LoraModule
self.unet_multiplier = 1.0
self.dyn_dim = None
self.modules = {}
+ self.bundle_embeddings = {}
self.mtime = None
self.mentioned_name = None
diff --git a/extensions-builtin/Lora/network_glora.py b/extensions-builtin/Lora/network_glora.py
new file mode 100644
index 00000000..492d4870
--- /dev/null
+++ b/extensions-builtin/Lora/network_glora.py
@@ -0,0 +1,33 @@
+import network
+class ModuleTypeGLora(network.ModuleType):
+ def create_module(self, net: network.Network, weights: network.NetworkWeights):
+ if all(x in weights.w for x in ["a1.weight", "a2.weight", "alpha", "b1.weight", "b2.weight"]):
+ return NetworkModuleGLora(net, weights)
+ return None
+# adapted from https://github.com/KohakuBlueleaf/LyCORIS
+class NetworkModuleGLora(network.NetworkModule):
+ def __init__(self, net: network.Network, weights: network.NetworkWeights):
+ super().__init__(net, weights)
+ if hasattr(self.sd_module, 'weight'):
+ self.shape = self.sd_module.weight.shape
+ self.w1a = weights.w["a1.weight"]
+ self.w1b = weights.w["b1.weight"]
+ self.w2a = weights.w["a2.weight"]
+ self.w2b = weights.w["b2.weight"]
+ def calc_updown(self, orig_weight):
+ w1a = self.w1a.to(orig_weight.device, dtype=orig_weight.dtype)
+ w1b = self.w1b.to(orig_weight.device, dtype=orig_weight.dtype)
+ w2a = self.w2a.to(orig_weight.device, dtype=orig_weight.dtype)
+ w2b = self.w2b.to(orig_weight.device, dtype=orig_weight.dtype)
+ output_shape = [w1a.size(0), w1b.size(1)]
+ updown = ((w2b @ w1b) + ((orig_weight @ w2a) @ w1a))
+ return self.finalize_updown(updown, orig_weight, output_shape)
diff --git a/extensions-builtin/Lora/network_oft.py b/extensions-builtin/Lora/network_oft.py
new file mode 100644
index 00000000..05c37811
--- /dev/null
+++ b/extensions-builtin/Lora/network_oft.py
@@ -0,0 +1,97 @@
+import torch
+import network
+from lyco_helpers import factorization
+from einops import rearrange
+class ModuleTypeOFT(network.ModuleType):
+ def create_module(self, net: network.Network, weights: network.NetworkWeights):
+ if all(x in weights.w for x in ["oft_blocks"]) or all(x in weights.w for x in ["oft_diag"]):
+ return NetworkModuleOFT(net, weights)
+ return None
+# Supports both kohya-ss' implementation of COFT https://github.com/kohya-ss/sd-scripts/blob/main/networks/oft.py
+# and KohakuBlueleaf's implementation of OFT/COFT https://github.com/KohakuBlueleaf/LyCORIS/blob/dev/lycoris/modules/diag_oft.py
+class NetworkModuleOFT(network.NetworkModule):
+ def __init__(self, net: network.Network, weights: network.NetworkWeights):
+ super().__init__(net, weights)
+ self.lin_module = None
+ self.org_module: list[torch.Module] = [self.sd_module]
+ # kohya-ss
+ if "oft_blocks" in weights.w.keys():
+ self.is_kohya = True
+ self.oft_blocks = weights.w["oft_blocks"] # (num_blocks, block_size, block_size)
+ self.alpha = weights.w["alpha"] # alpha is constraint
+ self.dim = self.oft_blocks.shape[0] # lora dim
+ elif "oft_diag" in weights.w.keys():
+ self.is_kohya = False
+ self.oft_blocks = weights.w["oft_diag"]
+ # self.alpha is unused
+ self.dim = self.oft_blocks.shape[1] # (num_blocks, block_size, block_size)
+ is_linear = type(self.sd_module) in [torch.nn.Linear, torch.nn.modules.linear.NonDynamicallyQuantizableLinear]
+ is_conv = type(self.sd_module) in [torch.nn.Conv2d]
+ is_other_linear = type(self.sd_module) in [torch.nn.MultiheadAttention] # unsupported
+ if is_linear:
+ self.out_dim = self.sd_module.out_features
+ elif is_conv:
+ self.out_dim = self.sd_module.out_channels
+ elif is_other_linear:
+ self.out_dim = self.sd_module.embed_dim
+ if self.is_kohya:
+ self.constraint = self.alpha * self.out_dim
+ self.num_blocks = self.dim
+ self.block_size = self.out_dim // self.dim
+ else:
+ self.constraint = None
+ self.block_size, self.num_blocks = factorization(self.out_dim, self.dim)
+ def calc_updown_kb(self, orig_weight, multiplier):
+ oft_blocks = self.oft_blocks.to(orig_weight.device, dtype=orig_weight.dtype)
+ oft_blocks = oft_blocks - oft_blocks.transpose(1, 2) # ensure skew-symmetric orthogonal matrix
+ R = oft_blocks.to(orig_weight.device, dtype=orig_weight.dtype)
+ R = R * multiplier + torch.eye(self.block_size, device=orig_weight.device)
+ # This errors out for MultiheadAttention, might need to be handled up-stream
+ merged_weight = rearrange(orig_weight, '(k n) ... -> k n ...', k=self.num_blocks, n=self.block_size)
+ merged_weight = torch.einsum(
+ 'k n m, k n ... -> k m ...',
+ R,
+ merged_weight
+ )
+ merged_weight = rearrange(merged_weight, 'k m ... -> (k m) ...')
+ updown = merged_weight.to(orig_weight.device, dtype=orig_weight.dtype) - orig_weight
+ output_shape = orig_weight.shape
+ return self.finalize_updown(updown, orig_weight, output_shape)
+ def calc_updown(self, orig_weight):
+ # if alpha is a very small number as in coft, calc_scale() will return a almost zero number so we ignore it
+ multiplier = self.multiplier()
+ return self.calc_updown_kb(orig_weight, multiplier)
+ # override to remove the multiplier/scale factor; it's already multiplied in get_weight
+ def finalize_updown(self, updown, orig_weight, output_shape, ex_bias=None):
+ if self.bias is not None:
+ updown = updown.reshape(self.bias.shape)
+ updown += self.bias.to(orig_weight.device, dtype=orig_weight.dtype)
+ updown = updown.reshape(output_shape)
+ if len(output_shape) == 4:
+ updown = updown.reshape(output_shape)
+ if orig_weight.size().numel() == updown.size().numel():
+ updown = updown.reshape(orig_weight.shape)
+ if ex_bias is not None:
+ ex_bias = ex_bias * self.multiplier()
+ return updown, ex_bias
diff --git a/extensions-builtin/Lora/networks.py b/extensions-builtin/Lora/networks.py
index 96f935b2..7f814706 100644
--- a/extensions-builtin/Lora/networks.py
+++ b/extensions-builtin/Lora/networks.py
@@ -5,16 +5,21 @@ import re
import lora_patches
import network
import network_lora
+import network_glora
import network_hada
import network_ia3
import network_lokr
import network_full
import network_norm
+import network_oft
import torch
from typing import Union
from modules import shared, devices, sd_models, errors, scripts, sd_hijack
+import modules.textual_inversion.textual_inversion as textual_inversion
+from lora_logger import logger
module_types = [
@@ -23,6 +28,8 @@ module_types = [
+ network_glora.ModuleTypeGLora(),
+ network_oft.ModuleTypeOFT(),
@@ -149,9 +156,19 @@ def load_network(name, network_on_disk):
is_sd2 = 'model_transformer_resblocks' in shared.sd_model.network_layer_mapping
matched_networks = {}
+ bundle_embeddings = {}
for key_network, weight in sd.items():
key_network_without_network_parts, network_part = key_network.split(".", 1)
+ if key_network_without_network_parts == "bundle_emb":
+ emb_name, vec_name = network_part.split(".", 1)
+ emb_dict = bundle_embeddings.get(emb_name, {})
+ if vec_name.split('.')[0] == 'string_to_param':
+ _, k2 = vec_name.split('.', 1)
+ emb_dict['string_to_param'] = {k2: weight}
+ else:
+ emb_dict[vec_name] = weight
+ bundle_embeddings[emb_name] = emb_dict
key = convert_diffusers_name_to_compvis(key_network_without_network_parts, is_sd2)
sd_module = shared.sd_model.network_layer_mapping.get(key, None)
@@ -174,6 +191,17 @@ def load_network(name, network_on_disk):
key = key_network_without_network_parts.replace("lora_te1_text_model", "transformer_text_model")
sd_module = shared.sd_model.network_layer_mapping.get(key, None)
+ # kohya_ss OFT module
+ elif sd_module is None and "oft_unet" in key_network_without_network_parts:
+ key = key_network_without_network_parts.replace("oft_unet", "diffusion_model")
+ sd_module = shared.sd_model.network_layer_mapping.get(key, None)
+ # KohakuBlueLeaf OFT module
+ if sd_module is None and "oft_diag" in key:
+ key = key_network_without_network_parts.replace("lora_unet", "diffusion_model")
+ key = key_network_without_network_parts.replace("lora_te1_text_model", "0_transformer_text_model")
+ sd_module = shared.sd_model.network_layer_mapping.get(key, None)
if sd_module is None:
keys_failed_to_match[key_network] = key
@@ -195,6 +223,14 @@ def load_network(name, network_on_disk):
net.modules[key] = net_module
+ embeddings = {}
+ for emb_name, data in bundle_embeddings.items():
+ embedding = textual_inversion.create_embedding_from_data(data, emb_name, filename=network_on_disk.filename + "/" + emb_name)
+ embedding.loaded = None
+ embeddings[emb_name] = embedding
+ net.bundle_embeddings = embeddings
if keys_failed_to_match:
logging.debug(f"Network {network_on_disk.filename} didn't match keys: {keys_failed_to_match}")
@@ -210,11 +246,15 @@ def purge_networks_from_memory():
def load_networks(names, te_multipliers=None, unet_multipliers=None, dyn_dims=None):
+ emb_db = sd_hijack.model_hijack.embedding_db
already_loaded = {}
for net in loaded_networks:
if net.name in names:
already_loaded[net.name] = net
+ for emb_name, embedding in net.bundle_embeddings.items():
+ if embedding.loaded:
+ emb_db.register_embedding_by_name(None, shared.sd_model, emb_name)
@@ -257,6 +297,21 @@ def load_networks(names, te_multipliers=None, unet_multipliers=None, dyn_dims=No
net.dyn_dim = dyn_dims[i] if dyn_dims else 1.0
+ for emb_name, embedding in net.bundle_embeddings.items():
+ if embedding.loaded is None and emb_name in emb_db.word_embeddings:
+ logger.warning(
+ f'Skip bundle embedding: "{emb_name}"'
+ ' as it was already loaded from embeddings folder'
+ )
+ continue
+ embedding.loaded = False
+ if emb_db.expected_shape == -1 or emb_db.expected_shape == embedding.shape:
+ embedding.loaded = True
+ emb_db.register_embedding(embedding, shared.sd_model)
+ else:
+ emb_db.skipped_embeddings[name] = embedding
if failed_to_load_networks:
sd_hijack.model_hijack.comments.append("Networks not found: " + ", ".join(failed_to_load_networks))
@@ -418,6 +473,7 @@ def network_forward(module, input, original_forward):
def network_reset_cached_weight(self: Union[torch.nn.Conv2d, torch.nn.Linear]):
self.network_current_names = ()
self.network_weights_backup = None
+ self.network_bias_backup = None
def network_Linear_forward(self, input):
@@ -564,6 +620,7 @@ extra_network_lora = None
available_networks = {}
available_network_aliases = {}
loaded_networks = []
+loaded_bundle_embeddings = {}
networks_in_memory = {}
available_network_hash_lookup = {}
forbidden_network_aliases = {}
diff --git a/extensions-builtin/Lora/ui_extra_networks_lora.py b/extensions-builtin/Lora/ui_extra_networks_lora.py
index 55409a78..df02c663 100644
--- a/extensions-builtin/Lora/ui_extra_networks_lora.py
+++ b/extensions-builtin/Lora/ui_extra_networks_lora.py
@@ -17,6 +17,8 @@ class ExtraNetworksPageLora(ui_extra_networks.ExtraNetworksPage):
def create_item(self, name, index=None, enable_filter=True):
lora_on_disk = networks.available_networks.get(name)
+ if lora_on_disk is None:
+ return
path, ext = os.path.splitext(lora_on_disk.filename)
@@ -66,9 +68,10 @@ class ExtraNetworksPageLora(ui_extra_networks.ExtraNetworksPage):
return item
def list_items(self):
- for index, name in enumerate(networks.available_networks):
+ # instantiate a list to protect against concurrent modification
+ names = list(networks.available_networks)
+ for index, name in enumerate(names):
item = self.create_item(name, index)
if item is not None:
yield item
diff --git a/extensions-builtin/hypertile/hypertile.py b/extensions-builtin/hypertile/hypertile.py
new file mode 100644
index 00000000..feb02fd2
--- /dev/null
+++ b/extensions-builtin/hypertile/hypertile.py
@@ -0,0 +1,345 @@
+Hypertile module for splitting attention layers in SD-1.5 U-Net and SD-1.5 VAE
+Warn: The patch works well only if the input image has a width and height that are multiples of 128
+Original author: @tfernd Github: https://github.com/tfernd/HyperTile
+from __future__ import annotations
+import functools
+from dataclasses import dataclass
+from typing import Callable
+from functools import wraps, cache
+import math
+import torch.nn as nn
+import random
+from einops import rearrange
+class HypertileParams:
+ depth = 0
+ layer_name = ""
+ tile_size: int = 0
+ swap_size: int = 0
+ aspect_ratio: float = 1.0
+ forward = None
+ enabled = False
+# TODO add SD-XL layers
+ 0: [
+ # SD 1.5 U-Net (diffusers)
+ "down_blocks.0.attentions.0.transformer_blocks.0.attn1",
+ "down_blocks.0.attentions.1.transformer_blocks.0.attn1",
+ "up_blocks.3.attentions.0.transformer_blocks.0.attn1",
+ "up_blocks.3.attentions.1.transformer_blocks.0.attn1",
+ "up_blocks.3.attentions.2.transformer_blocks.0.attn1",
+ # SD 1.5 U-Net (ldm)
+ "input_blocks.1.1.transformer_blocks.0.attn1",
+ "input_blocks.2.1.transformer_blocks.0.attn1",
+ "output_blocks.9.1.transformer_blocks.0.attn1",
+ "output_blocks.10.1.transformer_blocks.0.attn1",
+ "output_blocks.11.1.transformer_blocks.0.attn1",
+ # SD 1.5 VAE
+ "decoder.mid_block.attentions.0",
+ "decoder.mid.attn_1",
+ ],
+ 1: [
+ # SD 1.5 U-Net (diffusers)
+ "down_blocks.1.attentions.0.transformer_blocks.0.attn1",
+ "down_blocks.1.attentions.1.transformer_blocks.0.attn1",
+ "up_blocks.2.attentions.0.transformer_blocks.0.attn1",
+ "up_blocks.2.attentions.1.transformer_blocks.0.attn1",
+ "up_blocks.2.attentions.2.transformer_blocks.0.attn1",
+ # SD 1.5 U-Net (ldm)
+ "input_blocks.4.1.transformer_blocks.0.attn1",
+ "input_blocks.5.1.transformer_blocks.0.attn1",
+ "output_blocks.6.1.transformer_blocks.0.attn1",
+ "output_blocks.7.1.transformer_blocks.0.attn1",
+ "output_blocks.8.1.transformer_blocks.0.attn1",
+ ],
+ 2: [
+ # SD 1.5 U-Net (diffusers)
+ "down_blocks.2.attentions.0.transformer_blocks.0.attn1",
+ "down_blocks.2.attentions.1.transformer_blocks.0.attn1",
+ "up_blocks.1.attentions.0.transformer_blocks.0.attn1",
+ "up_blocks.1.attentions.1.transformer_blocks.0.attn1",
+ "up_blocks.1.attentions.2.transformer_blocks.0.attn1",
+ # SD 1.5 U-Net (ldm)
+ "input_blocks.7.1.transformer_blocks.0.attn1",
+ "input_blocks.8.1.transformer_blocks.0.attn1",
+ "output_blocks.3.1.transformer_blocks.0.attn1",
+ "output_blocks.4.1.transformer_blocks.0.attn1",
+ "output_blocks.5.1.transformer_blocks.0.attn1",
+ ],
+ 3: [
+ # SD 1.5 U-Net (diffusers)
+ "mid_block.attentions.0.transformer_blocks.0.attn1",
+ # SD 1.5 U-Net (ldm)
+ "middle_block.1.transformer_blocks.0.attn1",
+ ],
+# XL layers, thanks for GitHub@gel-crabs for the help
+ 0: [
+ # SD 1.5 U-Net (diffusers)
+ "down_blocks.0.attentions.0.transformer_blocks.0.attn1",
+ "down_blocks.0.attentions.1.transformer_blocks.0.attn1",
+ "up_blocks.3.attentions.0.transformer_blocks.0.attn1",
+ "up_blocks.3.attentions.1.transformer_blocks.0.attn1",
+ "up_blocks.3.attentions.2.transformer_blocks.0.attn1",
+ # SD 1.5 U-Net (ldm)
+ "input_blocks.4.1.transformer_blocks.0.attn1",
+ "input_blocks.5.1.transformer_blocks.0.attn1",
+ "output_blocks.3.1.transformer_blocks.0.attn1",
+ "output_blocks.4.1.transformer_blocks.0.attn1",
+ "output_blocks.5.1.transformer_blocks.0.attn1",
+ # SD 1.5 VAE
+ "decoder.mid_block.attentions.0",
+ "decoder.mid.attn_1",
+ ],
+ 1: [
+ # SD 1.5 U-Net (diffusers)
+ #"down_blocks.1.attentions.0.transformer_blocks.0.attn1",
+ #"down_blocks.1.attentions.1.transformer_blocks.0.attn1",
+ #"up_blocks.2.attentions.0.transformer_blocks.0.attn1",
+ #"up_blocks.2.attentions.1.transformer_blocks.0.attn1",
+ #"up_blocks.2.attentions.2.transformer_blocks.0.attn1",
+ # SD 1.5 U-Net (ldm)
+ "input_blocks.4.1.transformer_blocks.1.attn1",
+ "input_blocks.5.1.transformer_blocks.1.attn1",
+ "output_blocks.3.1.transformer_blocks.1.attn1",
+ "output_blocks.4.1.transformer_blocks.1.attn1",
+ "output_blocks.5.1.transformer_blocks.1.attn1",
+ "input_blocks.7.1.transformer_blocks.0.attn1",
+ "input_blocks.8.1.transformer_blocks.0.attn1",
+ "output_blocks.0.1.transformer_blocks.0.attn1",
+ "output_blocks.1.1.transformer_blocks.0.attn1",
+ "output_blocks.2.1.transformer_blocks.0.attn1",
+ "input_blocks.7.1.transformer_blocks.1.attn1",
+ "input_blocks.8.1.transformer_blocks.1.attn1",
+ "output_blocks.0.1.transformer_blocks.1.attn1",
+ "output_blocks.1.1.transformer_blocks.1.attn1",
+ "output_blocks.2.1.transformer_blocks.1.attn1",
+ "input_blocks.7.1.transformer_blocks.2.attn1",
+ "input_blocks.8.1.transformer_blocks.2.attn1",
+ "output_blocks.0.1.transformer_blocks.2.attn1",
+ "output_blocks.1.1.transformer_blocks.2.attn1",
+ "output_blocks.2.1.transformer_blocks.2.attn1",
+ "input_blocks.7.1.transformer_blocks.3.attn1",
+ "input_blocks.8.1.transformer_blocks.3.attn1",
+ "output_blocks.0.1.transformer_blocks.3.attn1",
+ "output_blocks.1.1.transformer_blocks.3.attn1",
+ "output_blocks.2.1.transformer_blocks.3.attn1",
+ "input_blocks.7.1.transformer_blocks.4.attn1",
+ "input_blocks.8.1.transformer_blocks.4.attn1",
+ "output_blocks.0.1.transformer_blocks.4.attn1",
+ "output_blocks.1.1.transformer_blocks.4.attn1",
+ "output_blocks.2.1.transformer_blocks.4.attn1",
+ "input_blocks.7.1.transformer_blocks.5.attn1",
+ "input_blocks.8.1.transformer_blocks.5.attn1",
+ "output_blocks.0.1.transformer_blocks.5.attn1",
+ "output_blocks.1.1.transformer_blocks.5.attn1",
+ "output_blocks.2.1.transformer_blocks.5.attn1",
+ "input_blocks.7.1.transformer_blocks.6.attn1",
+ "input_blocks.8.1.transformer_blocks.6.attn1",
+ "output_blocks.0.1.transformer_blocks.6.attn1",
+ "output_blocks.1.1.transformer_blocks.6.attn1",
+ "output_blocks.2.1.transformer_blocks.6.attn1",
+ "input_blocks.7.1.transformer_blocks.7.attn1",
+ "input_blocks.8.1.transformer_blocks.7.attn1",
+ "output_blocks.0.1.transformer_blocks.7.attn1",
+ "output_blocks.1.1.transformer_blocks.7.attn1",
+ "output_blocks.2.1.transformer_blocks.7.attn1",
+ "input_blocks.7.1.transformer_blocks.8.attn1",
+ "input_blocks.8.1.transformer_blocks.8.attn1",
+ "output_blocks.0.1.transformer_blocks.8.attn1",
+ "output_blocks.1.1.transformer_blocks.8.attn1",
+ "output_blocks.2.1.transformer_blocks.8.attn1",
+ "input_blocks.7.1.transformer_blocks.9.attn1",
+ "input_blocks.8.1.transformer_blocks.9.attn1",
+ "output_blocks.0.1.transformer_blocks.9.attn1",
+ "output_blocks.1.1.transformer_blocks.9.attn1",
+ "output_blocks.2.1.transformer_blocks.9.attn1",
+ ],
+ 2: [
+ # SD 1.5 U-Net (diffusers)
+ "mid_block.attentions.0.transformer_blocks.0.attn1",
+ # SD 1.5 U-Net (ldm)
+ "middle_block.1.transformer_blocks.0.attn1",
+ "middle_block.1.transformer_blocks.1.attn1",
+ "middle_block.1.transformer_blocks.2.attn1",
+ "middle_block.1.transformer_blocks.3.attn1",
+ "middle_block.1.transformer_blocks.4.attn1",
+ "middle_block.1.transformer_blocks.5.attn1",
+ "middle_block.1.transformer_blocks.6.attn1",
+ "middle_block.1.transformer_blocks.7.attn1",
+ "middle_block.1.transformer_blocks.8.attn1",
+ "middle_block.1.transformer_blocks.9.attn1",
+ ],
+ 3 : [] # TODO - separate layers for SD-XL
+RNG_INSTANCE = random.Random()
+def random_divisor(value: int, min_value: int, /, max_options: int = 1) -> int:
+ """
+ Returns a random divisor of value that
+ x * min_value <= value
+ if max_options is 1, the behavior is deterministic
+ """
+ min_value = min(min_value, value)
+ # All big divisors of value (inclusive)
+ divisors = [i for i in range(min_value, value + 1) if value % i == 0] # divisors in small -> big order
+ ns = [value // i for i in divisors[:max_options]] # has at least 1 element # big -> small order
+ idx = RNG_INSTANCE.randint(0, len(ns) - 1)
+ return ns[idx]
+def set_hypertile_seed(seed: int) -> None:
+ RNG_INSTANCE.seed(seed)
+def largest_tile_size_available(width: int, height: int) -> int:
+ """
+ Calculates the largest tile size available for a given width and height
+ Tile size is always a power of 2
+ """
+ gcd = math.gcd(width, height)
+ largest_tile_size_available = 1
+ while gcd % (largest_tile_size_available * 2) == 0:
+ largest_tile_size_available *= 2
+ return largest_tile_size_available
+def iterative_closest_divisors(hw:int, aspect_ratio:float) -> tuple[int, int]:
+ """
+ Finds h and w such that h*w = hw and h/w = aspect_ratio
+ We check all possible divisors of hw and return the closest to the aspect ratio
+ """
+ divisors = [i for i in range(2, hw + 1) if hw % i == 0] # all divisors of hw
+ pairs = [(i, hw // i) for i in divisors] # all pairs of divisors of hw
+ ratios = [w/h for h, w in pairs] # all ratios of pairs of divisors of hw
+ closest_ratio = min(ratios, key=lambda x: abs(x - aspect_ratio)) # closest ratio to aspect_ratio
+ closest_pair = pairs[ratios.index(closest_ratio)] # closest pair of divisors to aspect_ratio
+ return closest_pair
+def find_hw_candidates(hw:int, aspect_ratio:float) -> tuple[int, int]:
+ """
+ Finds h and w such that h*w = hw and h/w = aspect_ratio
+ """
+ h, w = round(math.sqrt(hw * aspect_ratio)), round(math.sqrt(hw / aspect_ratio))
+ # find h and w such that h*w = hw and h/w = aspect_ratio
+ if h * w != hw:
+ w_candidate = hw / h
+ # check if w is an integer
+ if not w_candidate.is_integer():
+ h_candidate = hw / w
+ # check if h is an integer
+ if not h_candidate.is_integer():
+ return iterative_closest_divisors(hw, aspect_ratio)
+ else:
+ h = int(h_candidate)
+ else:
+ w = int(w_candidate)
+ return h, w
+def self_attn_forward(params: HypertileParams, scale_depth=True) -> Callable:
+ @wraps(params.forward)
+ def wrapper(*args, **kwargs):
+ if not params.enabled:
+ return params.forward(*args, **kwargs)
+ latent_tile_size = max(128, params.tile_size) // 8
+ x = args[0]
+ # VAE
+ if x.ndim == 4:
+ b, c, h, w = x.shape
+ nh = random_divisor(h, latent_tile_size, params.swap_size)
+ nw = random_divisor(w, latent_tile_size, params.swap_size)
+ if nh * nw > 1:
+ x = rearrange(x, "b c (nh h) (nw w) -> (b nh nw) c h w", nh=nh, nw=nw) # split into nh * nw tiles
+ out = params.forward(x, *args[1:], **kwargs)
+ if nh * nw > 1:
+ out = rearrange(out, "(b nh nw) c h w -> b c (nh h) (nw w)", nh=nh, nw=nw)
+ # U-Net
+ else:
+ hw: int = x.size(1)
+ h, w = find_hw_candidates(hw, params.aspect_ratio)
+ assert h * w == hw, f"Invalid aspect ratio {params.aspect_ratio} for input of shape {x.shape}, hw={hw}, h={h}, w={w}"
+ factor = 2 ** params.depth if scale_depth else 1
+ nh = random_divisor(h, latent_tile_size * factor, params.swap_size)
+ nw = random_divisor(w, latent_tile_size * factor, params.swap_size)
+ if nh * nw > 1:
+ x = rearrange(x, "b (nh h nw w) c -> (b nh nw) (h w) c", h=h // nh, w=w // nw, nh=nh, nw=nw)
+ out = params.forward(x, *args[1:], **kwargs)
+ if nh * nw > 1:
+ out = rearrange(out, "(b nh nw) hw c -> b nh nw hw c", nh=nh, nw=nw)
+ out = rearrange(out, "b nh nw (h w) c -> b (nh h nw w) c", h=h // nh, w=w // nw)
+ return out
+ return wrapper
+def hypertile_hook_model(model: nn.Module, width, height, *, enable=False, tile_size_max=128, swap_size=1, max_depth=3, is_sdxl=False):
+ hypertile_layers = getattr(model, "__webui_hypertile_layers", None)
+ if hypertile_layers is None:
+ if not enable:
+ return
+ hypertile_layers = {}
+ layers = DEPTH_LAYERS_XL if is_sdxl else DEPTH_LAYERS
+ for depth in range(4):
+ for layer_name, module in model.named_modules():
+ if any(layer_name.endswith(try_name) for try_name in layers[depth]):
+ params = HypertileParams()
+ module.__webui_hypertile_params = params
+ params.forward = module.forward
+ params.depth = depth
+ params.layer_name = layer_name
+ module.forward = self_attn_forward(params)
+ hypertile_layers[layer_name] = 1
+ model.__webui_hypertile_layers = hypertile_layers
+ aspect_ratio = width / height
+ tile_size = min(largest_tile_size_available(width, height), tile_size_max)
+ for layer_name, module in model.named_modules():
+ if layer_name in hypertile_layers:
+ params = module.__webui_hypertile_params
+ params.tile_size = tile_size
+ params.swap_size = swap_size
+ params.aspect_ratio = aspect_ratio
+ params.enabled = enable and params.depth <= max_depth
diff --git a/extensions-builtin/hypertile/scripts/hypertile_script.py b/extensions-builtin/hypertile/scripts/hypertile_script.py
new file mode 100644
index 00000000..3cc29cd1
--- /dev/null
+++ b/extensions-builtin/hypertile/scripts/hypertile_script.py
@@ -0,0 +1,73 @@
+import hypertile
+from modules import scripts, script_callbacks, shared
+class ScriptHypertile(scripts.Script):
+ name = "Hypertile"
+ def title(self):
+ return self.name
+ def show(self, is_img2img):
+ return scripts.AlwaysVisible
+ def process(self, p, *args):
+ hypertile.set_hypertile_seed(p.all_seeds[0])
+ configure_hypertile(p.width, p.height, enable_unet=shared.opts.hypertile_enable_unet)
+ def before_hr(self, p, *args):
+ configure_hypertile(p.hr_upscale_to_x, p.hr_upscale_to_y, enable_unet=shared.opts.hypertile_enable_unet_secondpass or shared.opts.hypertile_enable_unet)
+def configure_hypertile(width, height, enable_unet=True):
+ hypertile.hypertile_hook_model(
+ shared.sd_model.first_stage_model,
+ width,
+ height,
+ swap_size=shared.opts.hypertile_swap_size_vae,
+ max_depth=shared.opts.hypertile_max_depth_vae,
+ tile_size_max=shared.opts.hypertile_max_tile_vae,
+ enable=shared.opts.hypertile_enable_vae,
+ )
+ hypertile.hypertile_hook_model(
+ shared.sd_model.model,
+ width,
+ height,
+ swap_size=shared.opts.hypertile_swap_size_unet,
+ max_depth=shared.opts.hypertile_max_depth_unet,
+ tile_size_max=shared.opts.hypertile_max_tile_unet,
+ enable=enable_unet,
+ is_sdxl=shared.sd_model.is_sdxl
+ )
+def on_ui_settings():
+ import gradio as gr
+ options = {
+ "hypertile_explanation": shared.OptionHTML("""
+ <a href='https://github.com/tfernd/HyperTile'>Hypertile</a> optimizes the self-attention layer within U-Net and VAE models,
+ resulting in a reduction in computation time ranging from 1 to 4 times. The larger the generated image is, the greater the
+ benefit.
+ """),
+ "hypertile_enable_unet": shared.OptionInfo(False, "Enable Hypertile U-Net").info("noticeable change in details of the generated picture; if enabled, overrides the setting below"),
+ "hypertile_enable_unet_secondpass": shared.OptionInfo(False, "Enable Hypertile U-Net for hires fix second pass"),
+ "hypertile_max_depth_unet": shared.OptionInfo(3, "Hypertile U-Net max depth", gr.Slider, {"minimum": 0, "maximum": 3, "step": 1}),
+ "hypertile_max_tile_unet": shared.OptionInfo(256, "Hypertile U-net max tile size", gr.Slider, {"minimum": 0, "maximum": 512, "step": 16}),
+ "hypertile_swap_size_unet": shared.OptionInfo(3, "Hypertile U-net swap size", gr.Slider, {"minimum": 0, "maximum": 6, "step": 1}),
+ "hypertile_enable_vae": shared.OptionInfo(False, "Enable Hypertile VAE").info("minimal change in the generated picture"),
+ "hypertile_max_depth_vae": shared.OptionInfo(3, "Hypertile VAE max depth", gr.Slider, {"minimum": 0, "maximum": 3, "step": 1}),
+ "hypertile_max_tile_vae": shared.OptionInfo(128, "Hypertile VAE max tile size", gr.Slider, {"minimum": 0, "maximum": 512, "step": 16}),
+ "hypertile_swap_size_vae": shared.OptionInfo(3, "Hypertile VAE swap size ", gr.Slider, {"minimum": 0, "maximum": 6, "step": 1}),
+ }
+ for name, opt in options.items():
+ opt.section = ('hypertile', "Hypertile")
+ shared.opts.add_option(name, opt)
diff --git a/extensions-builtin/mobile/javascript/mobile.js b/extensions-builtin/mobile/javascript/mobile.js
index 652f07ac..bff1aced 100644
--- a/extensions-builtin/mobile/javascript/mobile.js
+++ b/extensions-builtin/mobile/javascript/mobile.js
@@ -12,6 +12,8 @@ function isMobile() {
function reportWindowSize() {
+ if (gradioApp().querySelector('.toprow-compact-tools')) return; // not applicable for compact prompt layout
var currentlyMobile = isMobile();
if (currentlyMobile == isSetupForMobile) return;
isSetupForMobile = currentlyMobile;