aboutsummaryrefslogtreecommitdiff
path: root/extensions-builtin
diff options
context:
space:
mode:
Diffstat (limited to 'extensions-builtin')
-rw-r--r--extensions-builtin/LDSR/ldsr_model_arch.py13
-rw-r--r--extensions-builtin/LDSR/scripts/ldsr_model.py23
-rw-r--r--extensions-builtin/LDSR/sd_hijack_autoencoder.py28
-rw-r--r--extensions-builtin/LDSR/sd_hijack_ddpm_v1.py66
-rw-r--r--extensions-builtin/Lora/extra_networks_lora.py3
-rw-r--r--extensions-builtin/Lora/lora.py321
-rw-r--r--extensions-builtin/Lora/scripts/lora_script.py50
-rw-r--r--extensions-builtin/Lora/ui_extra_networks_lora.py8
-rw-r--r--extensions-builtin/ScuNET/scripts/scunet_model.py95
-rw-r--r--extensions-builtin/ScuNET/scunet_model_arch.py11
-rw-r--r--extensions-builtin/SwinIR/scripts/swinir_model.py7
-rw-r--r--extensions-builtin/SwinIR/swinir_model_arch.py6
-rw-r--r--extensions-builtin/SwinIR/swinir_model_arch_v2.py58
-rw-r--r--extensions-builtin/prompt-bracket-checker/javascript/prompt-bracket-checker.js128
14 files changed, 552 insertions, 265 deletions
diff --git a/extensions-builtin/LDSR/ldsr_model_arch.py b/extensions-builtin/LDSR/ldsr_model_arch.py
index bc11cc6e..7f450086 100644
--- a/extensions-builtin/LDSR/ldsr_model_arch.py
+++ b/extensions-builtin/LDSR/ldsr_model_arch.py
@@ -88,7 +88,7 @@ class LDSR:
x_t = None
logs = None
- for n in range(n_runs):
+ for _ in range(n_runs):
if custom_shape is not None:
x_t = torch.randn(1, custom_shape[1], custom_shape[2], custom_shape[3]).to(model.device)
x_t = repeat(x_t, '1 c h w -> b c h w', b=custom_shape[0])
@@ -110,7 +110,6 @@ class LDSR:
diffusion_steps = int(steps)
eta = 1.0
- down_sample_method = 'Lanczos'
gc.collect()
if torch.cuda.is_available:
@@ -131,11 +130,11 @@ class LDSR:
im_og = im_og.resize((width_downsampled_pre, height_downsampled_pre), Image.LANCZOS)
else:
print(f"Down sample rate is 1 from {target_scale} / 4 (Not downsampling)")
-
+
# pad width and height to multiples of 64, pads with the edge values of image to avoid artifacts
pad_w, pad_h = np.max(((2, 2), np.ceil(np.array(im_og.size) / 64).astype(int)), axis=0) * 64 - im_og.size
im_padded = Image.fromarray(np.pad(np.array(im_og), ((0, pad_h), (0, pad_w), (0, 0)), mode='edge'))
-
+
logs = self.run(model["model"], im_padded, diffusion_steps, eta)
sample = logs["sample"]
@@ -158,7 +157,7 @@ class LDSR:
def get_cond(selected_path):
- example = dict()
+ example = {}
up_f = 4
c = selected_path.convert('RGB')
c = torch.unsqueeze(torchvision.transforms.ToTensor()(c), 0)
@@ -196,7 +195,7 @@ def convsample_ddim(model, cond, steps, shape, eta=1.0, callback=None, normals_s
@torch.no_grad()
def make_convolutional_sample(batch, model, custom_steps=None, eta=1.0, quantize_x0=False, custom_shape=None, temperature=1., noise_dropout=0., corrector=None,
corrector_kwargs=None, x_T=None, ddim_use_x0_pred=False):
- log = dict()
+ log = {}
z, c, x, xrec, xc = model.get_input(batch, model.first_stage_key,
return_first_stage_outputs=True,
@@ -244,7 +243,7 @@ def make_convolutional_sample(batch, model, custom_steps=None, eta=1.0, quantize
x_sample_noquant = model.decode_first_stage(sample, force_not_quantize=True)
log["sample_noquant"] = x_sample_noquant
log["sample_diff"] = torch.abs(x_sample_noquant - x_sample)
- except:
+ except Exception:
pass
log["sample"] = x_sample
diff --git a/extensions-builtin/LDSR/scripts/ldsr_model.py b/extensions-builtin/LDSR/scripts/ldsr_model.py
index b8cff29b..fbbe9005 100644
--- a/extensions-builtin/LDSR/scripts/ldsr_model.py
+++ b/extensions-builtin/LDSR/scripts/ldsr_model.py
@@ -7,7 +7,8 @@ from basicsr.utils.download_util import load_file_from_url
from modules.upscaler import Upscaler, UpscalerData
from ldsr_model_arch import LDSR
from modules import shared, script_callbacks
-import sd_hijack_autoencoder, sd_hijack_ddpm_v1
+import sd_hijack_autoencoder # noqa: F401
+import sd_hijack_ddpm_v1 # noqa: F401
class UpscalerLDSR(Upscaler):
@@ -25,22 +26,28 @@ class UpscalerLDSR(Upscaler):
yaml_path = os.path.join(self.model_path, "project.yaml")
old_model_path = os.path.join(self.model_path, "model.pth")
new_model_path = os.path.join(self.model_path, "model.ckpt")
- safetensors_model_path = os.path.join(self.model_path, "model.safetensors")
+
+ local_model_paths = self.find_models(ext_filter=[".ckpt", ".safetensors"])
+ local_ckpt_path = next(iter([local_model for local_model in local_model_paths if local_model.endswith("model.ckpt")]), None)
+ local_safetensors_path = next(iter([local_model for local_model in local_model_paths if local_model.endswith("model.safetensors")]), None)
+ local_yaml_path = next(iter([local_model for local_model in local_model_paths if local_model.endswith("project.yaml")]), None)
+
if os.path.exists(yaml_path):
statinfo = os.stat(yaml_path)
if statinfo.st_size >= 10485760:
print("Removing invalid LDSR YAML file.")
os.remove(yaml_path)
+
if os.path.exists(old_model_path):
print("Renaming model from model.pth to model.ckpt")
os.rename(old_model_path, new_model_path)
- if os.path.exists(safetensors_model_path):
- model = safetensors_model_path
+
+ if local_safetensors_path is not None and os.path.exists(local_safetensors_path):
+ model = local_safetensors_path
else:
- model = load_file_from_url(url=self.model_url, model_dir=self.model_path,
- file_name="model.ckpt", progress=True)
- yaml = load_file_from_url(url=self.yaml_url, model_dir=self.model_path,
- file_name="project.yaml", progress=True)
+ model = local_ckpt_path if local_ckpt_path is not None else load_file_from_url(url=self.model_url, model_dir=self.model_path, file_name="model.ckpt", progress=True)
+
+ yaml = local_yaml_path if local_yaml_path is not None else load_file_from_url(url=self.yaml_url, model_dir=self.model_path, file_name="project.yaml", progress=True)
try:
return LDSR(model, yaml)
diff --git a/extensions-builtin/LDSR/sd_hijack_autoencoder.py b/extensions-builtin/LDSR/sd_hijack_autoencoder.py
index 8e03c7f8..81c5101b 100644
--- a/extensions-builtin/LDSR/sd_hijack_autoencoder.py
+++ b/extensions-builtin/LDSR/sd_hijack_autoencoder.py
@@ -1,16 +1,21 @@
# The content of this file comes from the ldm/models/autoencoder.py file of the compvis/stable-diffusion repo
# The VQModel & VQModelInterface were subsequently removed from ldm/models/autoencoder.py when we moved to the stability-ai/stablediffusion repo
# As the LDSR upscaler relies on VQModel & VQModelInterface, the hijack aims to put them back into the ldm.models.autoencoder
-
+import numpy as np
import torch
import pytorch_lightning as pl
import torch.nn.functional as F
from contextlib import contextmanager
+
+from torch.optim.lr_scheduler import LambdaLR
+
+from ldm.modules.ema import LitEma
from taming.modules.vqvae.quantize import VectorQuantizer2 as VectorQuantizer
from ldm.modules.diffusionmodules.model import Encoder, Decoder
from ldm.util import instantiate_from_config
import ldm.models.autoencoder
+from packaging import version
class VQModel(pl.LightningModule):
def __init__(self,
@@ -19,7 +24,7 @@ class VQModel(pl.LightningModule):
n_embed,
embed_dim,
ckpt_path=None,
- ignore_keys=[],
+ ignore_keys=None,
image_key="image",
colorize_nlabels=None,
monitor=None,
@@ -57,7 +62,7 @@ class VQModel(pl.LightningModule):
print(f"Keeping EMAs of {len(list(self.model_ema.buffers()))}.")
if ckpt_path is not None:
- self.init_from_ckpt(ckpt_path, ignore_keys=ignore_keys)
+ self.init_from_ckpt(ckpt_path, ignore_keys=ignore_keys or [])
self.scheduler_config = scheduler_config
self.lr_g_factor = lr_g_factor
@@ -76,11 +81,11 @@ class VQModel(pl.LightningModule):
if context is not None:
print(f"{context}: Restored training weights")
- def init_from_ckpt(self, path, ignore_keys=list()):
+ def init_from_ckpt(self, path, ignore_keys=None):
sd = torch.load(path, map_location="cpu")["state_dict"]
keys = list(sd.keys())
for k in keys:
- for ik in ignore_keys:
+ for ik in ignore_keys or []:
if k.startswith(ik):
print("Deleting key {} from state_dict.".format(k))
del sd[k]
@@ -165,7 +170,7 @@ class VQModel(pl.LightningModule):
def validation_step(self, batch, batch_idx):
log_dict = self._validation_step(batch, batch_idx)
with self.ema_scope():
- log_dict_ema = self._validation_step(batch, batch_idx, suffix="_ema")
+ self._validation_step(batch, batch_idx, suffix="_ema")
return log_dict
def _validation_step(self, batch, batch_idx, suffix=""):
@@ -232,7 +237,7 @@ class VQModel(pl.LightningModule):
return self.decoder.conv_out.weight
def log_images(self, batch, only_inputs=False, plot_ema=False, **kwargs):
- log = dict()
+ log = {}
x = self.get_input(batch, self.image_key)
x = x.to(self.device)
if only_inputs:
@@ -249,7 +254,8 @@ class VQModel(pl.LightningModule):
if plot_ema:
with self.ema_scope():
xrec_ema, _ = self(x)
- if x.shape[1] > 3: xrec_ema = self.to_rgb(xrec_ema)
+ if x.shape[1] > 3:
+ xrec_ema = self.to_rgb(xrec_ema)
log["reconstructions_ema"] = xrec_ema
return log
@@ -264,7 +270,7 @@ class VQModel(pl.LightningModule):
class VQModelInterface(VQModel):
def __init__(self, embed_dim, *args, **kwargs):
- super().__init__(embed_dim=embed_dim, *args, **kwargs)
+ super().__init__(*args, embed_dim=embed_dim, **kwargs)
self.embed_dim = embed_dim
def encode(self, x):
@@ -282,5 +288,5 @@ class VQModelInterface(VQModel):
dec = self.decoder(quant)
return dec
-setattr(ldm.models.autoencoder, "VQModel", VQModel)
-setattr(ldm.models.autoencoder, "VQModelInterface", VQModelInterface)
+ldm.models.autoencoder.VQModel = VQModel
+ldm.models.autoencoder.VQModelInterface = VQModelInterface
diff --git a/extensions-builtin/LDSR/sd_hijack_ddpm_v1.py b/extensions-builtin/LDSR/sd_hijack_ddpm_v1.py
index 5c0488e5..631a08ef 100644
--- a/extensions-builtin/LDSR/sd_hijack_ddpm_v1.py
+++ b/extensions-builtin/LDSR/sd_hijack_ddpm_v1.py
@@ -48,7 +48,7 @@ class DDPMV1(pl.LightningModule):
beta_schedule="linear",
loss_type="l2",
ckpt_path=None,
- ignore_keys=[],
+ ignore_keys=None,
load_only_unet=False,
monitor="val/loss",
use_ema=True,
@@ -100,7 +100,7 @@ class DDPMV1(pl.LightningModule):
if monitor is not None:
self.monitor = monitor
if ckpt_path is not None:
- self.init_from_ckpt(ckpt_path, ignore_keys=ignore_keys, only_model=load_only_unet)
+ self.init_from_ckpt(ckpt_path, ignore_keys=ignore_keys or [], only_model=load_only_unet)
self.register_schedule(given_betas=given_betas, beta_schedule=beta_schedule, timesteps=timesteps,
linear_start=linear_start, linear_end=linear_end, cosine_s=cosine_s)
@@ -182,13 +182,13 @@ class DDPMV1(pl.LightningModule):
if context is not None:
print(f"{context}: Restored training weights")
- def init_from_ckpt(self, path, ignore_keys=list(), only_model=False):
+ def init_from_ckpt(self, path, ignore_keys=None, only_model=False):
sd = torch.load(path, map_location="cpu")
if "state_dict" in list(sd.keys()):
sd = sd["state_dict"]
keys = list(sd.keys())
for k in keys:
- for ik in ignore_keys:
+ for ik in ignore_keys or []:
if k.startswith(ik):
print("Deleting key {} from state_dict.".format(k))
del sd[k]
@@ -375,7 +375,7 @@ class DDPMV1(pl.LightningModule):
@torch.no_grad()
def log_images(self, batch, N=8, n_row=2, sample=True, return_keys=None, **kwargs):
- log = dict()
+ log = {}
x = self.get_input(batch, self.first_stage_key)
N = min(x.shape[0], N)
n_row = min(x.shape[0], n_row)
@@ -383,7 +383,7 @@ class DDPMV1(pl.LightningModule):
log["inputs"] = x
# get diffusion row
- diffusion_row = list()
+ diffusion_row = []
x_start = x[:n_row]
for t in range(self.num_timesteps):
@@ -444,13 +444,13 @@ class LatentDiffusionV1(DDPMV1):
conditioning_key = None
ckpt_path = kwargs.pop("ckpt_path", None)
ignore_keys = kwargs.pop("ignore_keys", [])
- super().__init__(conditioning_key=conditioning_key, *args, **kwargs)
+ super().__init__(*args, conditioning_key=conditioning_key, **kwargs)
self.concat_mode = concat_mode
self.cond_stage_trainable = cond_stage_trainable
self.cond_stage_key = cond_stage_key
try:
self.num_downs = len(first_stage_config.params.ddconfig.ch_mult) - 1
- except:
+ except Exception:
self.num_downs = 0
if not scale_by_std:
self.scale_factor = scale_factor
@@ -460,7 +460,7 @@ class LatentDiffusionV1(DDPMV1):
self.instantiate_cond_stage(cond_stage_config)
self.cond_stage_forward = cond_stage_forward
self.clip_denoised = False
- self.bbox_tokenizer = None
+ self.bbox_tokenizer = None
self.restarted_from_ckpt = False
if ckpt_path is not None:
@@ -792,7 +792,7 @@ class LatentDiffusionV1(DDPMV1):
z = z.view((z.shape[0], -1, ks[0], ks[1], z.shape[-1])) # (bn, nc, ks[0], ks[1], L )
# 2. apply model loop over last dim
- if isinstance(self.first_stage_model, VQModelInterface):
+ if isinstance(self.first_stage_model, VQModelInterface):
output_list = [self.first_stage_model.decode(z[:, :, :, :, i],
force_not_quantize=predict_cids or force_not_quantize)
for i in range(z.shape[-1])]
@@ -877,16 +877,6 @@ class LatentDiffusionV1(DDPMV1):
c = self.q_sample(x_start=c, t=tc, noise=torch.randn_like(c.float()))
return self.p_losses(x, c, t, *args, **kwargs)
- def _rescale_annotations(self, bboxes, crop_coordinates): # TODO: move to dataset
- def rescale_bbox(bbox):
- x0 = clamp((bbox[0] - crop_coordinates[0]) / crop_coordinates[2])
- y0 = clamp((bbox[1] - crop_coordinates[1]) / crop_coordinates[3])
- w = min(bbox[2] / crop_coordinates[2], 1 - x0)
- h = min(bbox[3] / crop_coordinates[3], 1 - y0)
- return x0, y0, w, h
-
- return [rescale_bbox(b) for b in bboxes]
-
def apply_model(self, x_noisy, t, cond, return_ids=False):
if isinstance(cond, dict):
@@ -900,7 +890,7 @@ class LatentDiffusionV1(DDPMV1):
if hasattr(self, "split_input_params"):
assert len(cond) == 1 # todo can only deal with one conditioning atm
- assert not return_ids
+ assert not return_ids
ks = self.split_input_params["ks"] # eg. (128, 128)
stride = self.split_input_params["stride"] # eg. (64, 64)
@@ -1126,7 +1116,7 @@ class LatentDiffusionV1(DDPMV1):
if cond is not None:
if isinstance(cond, dict):
cond = {key: cond[key][:batch_size] if not isinstance(cond[key], list) else
- list(map(lambda x: x[:batch_size], cond[key])) for key in cond}
+ [x[:batch_size] for x in cond[key]] for key in cond}
else:
cond = [c[:batch_size] for c in cond] if isinstance(cond, list) else cond[:batch_size]
@@ -1157,8 +1147,10 @@ class LatentDiffusionV1(DDPMV1):
if i % log_every_t == 0 or i == timesteps - 1:
intermediates.append(x0_partial)
- if callback: callback(i)
- if img_callback: img_callback(img, i)
+ if callback:
+ callback(i)
+ if img_callback:
+ img_callback(img, i)
return img, intermediates
@torch.no_grad()
@@ -1205,8 +1197,10 @@ class LatentDiffusionV1(DDPMV1):
if i % log_every_t == 0 or i == timesteps - 1:
intermediates.append(img)
- if callback: callback(i)
- if img_callback: img_callback(img, i)
+ if callback:
+ callback(i)
+ if img_callback:
+ img_callback(img, i)
if return_intermediates:
return img, intermediates
@@ -1221,7 +1215,7 @@ class LatentDiffusionV1(DDPMV1):
if cond is not None:
if isinstance(cond, dict):
cond = {key: cond[key][:batch_size] if not isinstance(cond[key], list) else
- list(map(lambda x: x[:batch_size], cond[key])) for key in cond}
+ [x[:batch_size] for x in cond[key]] for key in cond}
else:
cond = [c[:batch_size] for c in cond] if isinstance(cond, list) else cond[:batch_size]
return self.p_sample_loop(cond,
@@ -1253,7 +1247,7 @@ class LatentDiffusionV1(DDPMV1):
use_ddim = ddim_steps is not None
- log = dict()
+ log = {}
z, c, x, xrec, xc = self.get_input(batch, self.first_stage_key,
return_first_stage_outputs=True,
force_c_encode=True,
@@ -1280,7 +1274,7 @@ class LatentDiffusionV1(DDPMV1):
if plot_diffusion_rows:
# get diffusion row
- diffusion_row = list()
+ diffusion_row = []
z_start = z[:n_row]
for t in range(self.num_timesteps):
if t % self.log_every_t == 0 or t == self.num_timesteps - 1:
@@ -1322,7 +1316,7 @@ class LatentDiffusionV1(DDPMV1):
if inpaint:
# make a simple center square
- b, h, w = z.shape[0], z.shape[2], z.shape[3]
+ h, w = z.shape[2], z.shape[3]
mask = torch.ones(N, h, w).to(self.device)
# zeros will be filled in
mask[:, h // 4:3 * h // 4, w // 4:3 * w // 4] = 0.
@@ -1424,10 +1418,10 @@ class Layout2ImgDiffusionV1(LatentDiffusionV1):
# TODO: move all layout-specific hacks to this class
def __init__(self, cond_stage_key, *args, **kwargs):
assert cond_stage_key == 'coordinates_bbox', 'Layout2ImgDiffusion only for cond_stage_key="coordinates_bbox"'
- super().__init__(cond_stage_key=cond_stage_key, *args, **kwargs)
+ super().__init__(*args, cond_stage_key=cond_stage_key, **kwargs)
def log_images(self, batch, N=8, *args, **kwargs):
- logs = super().log_images(batch=batch, N=N, *args, **kwargs)
+ logs = super().log_images(*args, batch=batch, N=N, **kwargs)
key = 'train' if self.training else 'validation'
dset = self.trainer.datamodule.datasets[key]
@@ -1443,7 +1437,7 @@ class Layout2ImgDiffusionV1(LatentDiffusionV1):
logs['bbox_image'] = cond_img
return logs
-setattr(ldm.models.diffusion.ddpm, "DDPMV1", DDPMV1)
-setattr(ldm.models.diffusion.ddpm, "LatentDiffusionV1", LatentDiffusionV1)
-setattr(ldm.models.diffusion.ddpm, "DiffusionWrapperV1", DiffusionWrapperV1)
-setattr(ldm.models.diffusion.ddpm, "Layout2ImgDiffusionV1", Layout2ImgDiffusionV1)
+ldm.models.diffusion.ddpm.DDPMV1 = DDPMV1
+ldm.models.diffusion.ddpm.LatentDiffusionV1 = LatentDiffusionV1
+ldm.models.diffusion.ddpm.DiffusionWrapperV1 = DiffusionWrapperV1
+ldm.models.diffusion.ddpm.Layout2ImgDiffusionV1 = Layout2ImgDiffusionV1
diff --git a/extensions-builtin/Lora/extra_networks_lora.py b/extensions-builtin/Lora/extra_networks_lora.py
index 6be6ef73..ccb249ac 100644
--- a/extensions-builtin/Lora/extra_networks_lora.py
+++ b/extensions-builtin/Lora/extra_networks_lora.py
@@ -1,6 +1,7 @@
from modules import extra_networks, shared
import lora
+
class ExtraNetworkLora(extra_networks.ExtraNetwork):
def __init__(self):
super().__init__('lora')
@@ -8,7 +9,7 @@ class ExtraNetworkLora(extra_networks.ExtraNetwork):
def activate(self, p, params_list):
additional = shared.opts.sd_lora
- if additional != "" and additional in lora.available_loras and len([x for x in params_list if x.items[0] == additional]) == 0:
+ if additional != "None" and additional in lora.available_loras and len([x for x in params_list if x.items[0] == additional]) == 0:
p.all_prompts = [x + f"<lora:{additional}:{shared.opts.extra_networks_default_multiplier}>" for x in p.all_prompts]
params_list.append(extra_networks.ExtraNetworkParams(items=[additional, shared.opts.extra_networks_default_multiplier]))
diff --git a/extensions-builtin/Lora/lora.py b/extensions-builtin/Lora/lora.py
index 8937b585..1308c48b 100644
--- a/extensions-builtin/Lora/lora.py
+++ b/extensions-builtin/Lora/lora.py
@@ -1,21 +1,34 @@
-import glob
import os
import re
import torch
+from typing import Union
-from modules import shared, devices, sd_models, errors
+from modules import shared, devices, sd_models, errors, scripts
metadata_tags_order = {"ss_sd_model_name": 1, "ss_resolution": 2, "ss_clip_skip": 3, "ss_num_train_images": 10, "ss_tag_frequency": 20}
re_digits = re.compile(r"\d+")
-re_unet_down_blocks = re.compile(r"lora_unet_down_blocks_(\d+)_attentions_(\d+)_(.+)")
-re_unet_mid_blocks = re.compile(r"lora_unet_mid_block_attentions_(\d+)_(.+)")
-re_unet_up_blocks = re.compile(r"lora_unet_up_blocks_(\d+)_attentions_(\d+)_(.+)")
-re_text_block = re.compile(r"lora_te_text_model_encoder_layers_(\d+)_(.+)")
+re_x_proj = re.compile(r"(.*)_([qkv]_proj)$")
+re_compiled = {}
+
+suffix_conversion = {
+ "attentions": {},
+ "resnets": {
+ "conv1": "in_layers_2",
+ "conv2": "out_layers_3",
+ "time_emb_proj": "emb_layers_1",
+ "conv_shortcut": "skip_connection",
+ }
+}
+
+
+def convert_diffusers_name_to_compvis(key, is_sd2):
+ def match(match_list, regex_text):
+ regex = re_compiled.get(regex_text)
+ if regex is None:
+ regex = re.compile(regex_text)
+ re_compiled[regex_text] = regex
-
-def convert_diffusers_name_to_compvis(key):
- def match(match_list, regex):
r = re.match(regex, key)
if not r:
return False
@@ -26,16 +39,33 @@ def convert_diffusers_name_to_compvis(key):
m = []
- if match(m, re_unet_down_blocks):
- return f"diffusion_model_input_blocks_{1 + m[0] * 3 + m[1]}_1_{m[2]}"
+ if match(m, r"lora_unet_down_blocks_(\d+)_(attentions|resnets)_(\d+)_(.+)"):
+ suffix = suffix_conversion.get(m[1], {}).get(m[3], m[3])
+ return f"diffusion_model_input_blocks_{1 + m[0] * 3 + m[2]}_{1 if m[1] == 'attentions' else 0}_{suffix}"
+
+ if match(m, r"lora_unet_mid_block_(attentions|resnets)_(\d+)_(.+)"):
+ suffix = suffix_conversion.get(m[0], {}).get(m[2], m[2])
+ return f"diffusion_model_middle_block_{1 if m[0] == 'attentions' else m[1] * 2}_{suffix}"
+
+ if match(m, r"lora_unet_up_blocks_(\d+)_(attentions|resnets)_(\d+)_(.+)"):
+ suffix = suffix_conversion.get(m[1], {}).get(m[3], m[3])
+ return f"diffusion_model_output_blocks_{m[0] * 3 + m[2]}_{1 if m[1] == 'attentions' else 0}_{suffix}"
- if match(m, re_unet_mid_blocks):
- return f"diffusion_model_middle_block_1_{m[1]}"
+ if match(m, r"lora_unet_down_blocks_(\d+)_downsamplers_0_conv"):
+ return f"diffusion_model_input_blocks_{3 + m[0] * 3}_0_op"
- if match(m, re_unet_up_blocks):
- return f"diffusion_model_output_blocks_{m[0] * 3 + m[1]}_1_{m[2]}"
+ if match(m, r"lora_unet_up_blocks_(\d+)_upsamplers_0_conv"):
+ return f"diffusion_model_output_blocks_{2 + m[0] * 3}_{2 if m[0]>0 else 1}_conv"
+
+ if match(m, r"lora_te_text_model_encoder_layers_(\d+)_(.+)"):
+ if is_sd2:
+ if 'mlp_fc1' in m[1]:
+ return f"model_transformer_resblocks_{m[0]}_{m[1].replace('mlp_fc1', 'mlp_c_fc')}"
+ elif 'mlp_fc2' in m[1]:
+ return f"model_transformer_resblocks_{m[0]}_{m[1].replace('mlp_fc2', 'mlp_c_proj')}"
+ else:
+ return f"model_transformer_resblocks_{m[0]}_{m[1].replace('self_attn', 'attn')}"
- if match(m, re_text_block):
return f"transformer_text_model_encoder_layers_{m[0]}_{m[1]}"
return key
@@ -62,6 +92,7 @@ class LoraOnDisk:
self.metadata = m
self.ssmd_cover_images = self.metadata.pop('ssmd_cover_images', None) # those are cover images and they are too big to display in UI as text
+ self.alias = self.metadata.get('ss_output_name', self.name)
class LoraModule:
@@ -101,15 +132,26 @@ def load_lora(name, filename):
sd = sd_models.read_state_dict(filename)
- keys_failed_to_match = []
+ # this should not be needed but is here as an emergency fix for an unknown error people are experiencing in 1.2.0
+ if not hasattr(shared.sd_model, 'lora_layer_mapping'):
+ assign_lora_names_to_compvis_modules(shared.sd_model)
+
+ keys_failed_to_match = {}
+ is_sd2 = 'model_transformer_resblocks' in shared.sd_model.lora_layer_mapping
for key_diffusers, weight in sd.items():
- fullkey = convert_diffusers_name_to_compvis(key_diffusers)
- key, lora_key = fullkey.split(".", 1)
+ key_diffusers_without_lora_parts, lora_key = key_diffusers.split(".", 1)
+ key = convert_diffusers_name_to_compvis(key_diffusers_without_lora_parts, is_sd2)
sd_module = shared.sd_model.lora_layer_mapping.get(key, None)
+
+ if sd_module is None:
+ m = re_x_proj.match(key)
+ if m:
+ sd_module = shared.sd_model.lora_layer_mapping.get(m.group(1), None)
+
if sd_module is None:
- keys_failed_to_match.append(key_diffusers)
+ keys_failed_to_match[key_diffusers] = key
continue
lora_module = lora.modules.get(key, None)
@@ -123,22 +165,30 @@ def load_lora(name, filename):
if type(sd_module) == torch.nn.Linear:
module = torch.nn.Linear(weight.shape[1], weight.shape[0], bias=False)
- elif type(sd_module) == torch.nn.Conv2d:
+ elif type(sd_module) == torch.nn.modules.linear.NonDynamicallyQuantizableLinear:
+ module = torch.nn.Linear(weight.shape[1], weight.shape[0], bias=False)
+ elif type(sd_module) == torch.nn.MultiheadAttention:
+ module = torch.nn.Linear(weight.shape[1], weight.shape[0], bias=False)
+ elif type(sd_module) == torch.nn.Conv2d and weight.shape[2:] == (1, 1):
module = torch.nn.Conv2d(weight.shape[1], weight.shape[0], (1, 1), bias=False)
+ elif type(sd_module) == torch.nn.Conv2d and weight.shape[2:] == (3, 3):
+ module = torch.nn.Conv2d(weight.shape[1], weight.shape[0], (3, 3), bias=False)
else:
- assert False, f'Lora layer {key_diffusers} matched a layer with unsupported type: {type(sd_module).__name__}'
+ print(f'Lora layer {key_diffusers} matched a layer with unsupported type: {type(sd_module).__name__}')
+ continue
+ raise AssertionError(f"Lora layer {key_diffusers} matched a layer with unsupported type: {type(sd_module).__name__}")
with torch.no_grad():
module.weight.copy_(weight)
- module.to(device=devices.device, dtype=devices.dtype)
+ module.to(device=devices.cpu, dtype=devices.dtype)
if lora_key == "lora_up.weight":
lora_module.up = module
elif lora_key == "lora_down.weight":
lora_module.down = module
else:
- assert False, f'Bad Lora layer name: {key_diffusers} - must end in lora_up.weight, lora_down.weight or alpha'
+ raise AssertionError(f"Bad Lora layer name: {key_diffusers} - must end in lora_up.weight, lora_down.weight or alpha")
if len(keys_failed_to_match) > 0:
print(f"Failed to match keys when loading Lora {filename}: {keys_failed_to_match}")
@@ -155,11 +205,11 @@ def load_loras(names, multipliers=None):
loaded_loras.clear()
- loras_on_disk = [available_loras.get(name, None) for name in names]
- if any([x is None for x in loras_on_disk]):
+ loras_on_disk = [available_lora_aliases.get(name, None) for name in names]
+ if any(x is None for x in loras_on_disk):
list_available_loras()
- loras_on_disk = [available_loras.get(name, None) for name in names]
+ loras_on_disk = [available_lora_aliases.get(name, None) for name in names]
for i, name in enumerate(names):
lora = already_loaded.get(name, None)
@@ -167,7 +217,11 @@ def load_loras(names, multipliers=None):
lora_on_disk = loras_on_disk[i]
if lora_on_disk is not None:
if lora is None or os.path.getmtime(lora_on_disk.filename) > lora.mtime:
- lora = load_lora(name, lora_on_disk.filename)
+ try:
+ lora = load_lora(name, lora_on_disk.filename)
+ except Exception as e:
+ errors.display(e, f"loading Lora {lora_on_disk.filename}")
+ continue
if lora is None:
print(f"Couldn't find Lora with name {name}")
@@ -177,50 +231,229 @@ def load_loras(names, multipliers=None):
loaded_loras.append(lora)
-def lora_forward(module, input, res):
+def lora_calc_updown(lora, module, target):
+ with torch.no_grad():
+ up = module.up.weight.to(target.device, dtype=target.dtype)
+ down = module.down.weight.to(target.device, dtype=target.dtype)
+
+ if up.shape[2:] == (1, 1) and down.shape[2:] == (1, 1):
+ updown = (up.squeeze(2).squeeze(2) @ down.squeeze(2).squeeze(2)).unsqueeze(2).unsqueeze(3)
+ elif up.shape[2:] == (3, 3) or down.shape[2:] == (3, 3):
+ updown = torch.nn.functional.conv2d(down.permute(1, 0, 2, 3), up).permute(1, 0, 2, 3)
+ else:
+ updown = up @ down
+
+ updown = updown * lora.multiplier * (module.alpha / module.up.weight.shape[1] if module.alpha else 1.0)
+
+ return updown
+
+
+def lora_restore_weights_from_backup(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.MultiheadAttention]):
+ weights_backup = getattr(self, "lora_weights_backup", None)
+
+ if weights_backup is None:
+ return
+
+ if isinstance(self, torch.nn.MultiheadAttention):
+ self.in_proj_weight.copy_(weights_backup[0])
+ self.out_proj.weight.copy_(weights_backup[1])
+ else:
+ self.weight.copy_(weights_backup)
+
+
+def lora_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.MultiheadAttention]):
+ """
+ Applies the currently selected set of Loras to the weights of torch layer self.
+ If weights already have this particular set of loras applied, does nothing.
+ If not, restores orginal weights from backup and alters weights according to loras.
+ """
+
+ lora_layer_name = getattr(self, 'lora_layer_name', None)
+ if lora_layer_name is None:
+ return
+
+ current_names = getattr(self, "lora_current_names", ())
+ wanted_names = tuple((x.name, x.multiplier) for x in loaded_loras)
+
+ weights_backup = getattr(self, "lora_weights_backup", None)
+ if weights_backup is None:
+ if isinstance(self, torch.nn.MultiheadAttention):
+ weights_backup = (self.in_proj_weight.to(devices.cpu, copy=True), self.out_proj.weight.to(devices.cpu, copy=True))
+ else:
+ weights_backup = self.weight.to(devices.cpu, copy=True)
+
+ self.lora_weights_backup = weights_backup
+
+ if current_names != wanted_names:
+ lora_restore_weights_from_backup(self)
+
+ for lora in loaded_loras:
+ module = lora.modules.get(lora_layer_name, None)
+ if module is not None and hasattr(self, 'weight'):
+ self.weight += lora_calc_updown(lora, module, self.weight)
+ continue
+
+ module_q = lora.modules.get(lora_layer_name + "_q_proj", None)
+ module_k = lora.modules.get(lora_layer_name + "_k_proj", None)
+ module_v = lora.modules.get(lora_layer_name + "_v_proj", None)
+ module_out = lora.modules.get(lora_layer_name + "_out_proj", None)
+
+ if isinstance(self, torch.nn.MultiheadAttention) and module_q and module_k and module_v and module_out:
+ updown_q = lora_calc_updown(lora, module_q, self.in_proj_weight)
+ updown_k = lora_calc_updown(lora, module_k, self.in_proj_weight)
+ updown_v = lora_calc_updown(lora, module_v, self.in_proj_weight)
+ updown_qkv = torch.vstack([updown_q, updown_k, updown_v])
+
+ self.in_proj_weight += updown_qkv
+ self.out_proj.weight += lora_calc_updown(lora, module_out, self.out_proj.weight)
+ continue
+
+ if module is None:
+ continue
+
+ print(f'failed to calculate lora weights for layer {lora_layer_name}')
+
+ self.lora_current_names = wanted_names
+
+
+def lora_forward(module, input, original_forward):
+ """
+ Old way of applying Lora by executing operations during layer's forward.
+ Stacking many loras this way results in big performance degradation.
+ """
+
if len(loaded_loras) == 0:
- return res
+ return original_forward(module, input)
+
+ input = devices.cond_cast_unet(input)
+
+ lora_restore_weights_from_backup(module)
+ lora_reset_cached_weight(module)
+
+ res = original_forward(module, input)
lora_layer_name = getattr(module, 'lora_layer_name', None)
for lora in loaded_loras:
module = lora.modules.get(lora_layer_name, None)
- if module is not None:
- if shared.opts.lora_apply_to_outputs and res.shape == input.shape:
- res = res + module.up(module.down(res)) * lora.multiplier * (module.alpha / module.up.weight.shape[1] if module.alpha else 1.0)
- else:
- res = res + module.up(module.down(input)) * lora.multiplier * (module.alpha / module.up.weight.shape[1] if module.alpha else 1.0)
+ if module is None:
+ continue
+
+ module.up.to(device=devices.device)
+ module.down.to(device=devices.device)
+
+ res = res + module.up(module.down(input)) * lora.multiplier * (module.alpha / module.up.weight.shape[1] if module.alpha else 1.0)
return res
+def lora_reset_cached_weight(self: Union[torch.nn.Conv2d, torch.nn.Linear]):
+ self.lora_current_names = ()
+ self.lora_weights_backup = None
+
+
def lora_Linear_forward(self, input):
- return lora_forward(self, input, torch.nn.Linear_forward_before_lora(self, input))
+ if shared.opts.lora_functional:
+ return lora_forward(self, input, torch.nn.Linear_forward_before_lora)
+
+ lora_apply_weights(self)
+
+ return torch.nn.Linear_forward_before_lora(self, input)
+
+
+def lora_Linear_load_state_dict(self, *args, **kwargs):
+ lora_reset_cached_weight(self)
+
+ return torch.nn.Linear_load_state_dict_before_lora(self, *args, **kwargs)
def lora_Conv2d_forward(self, input):
- return lora_forward(self, input, torch.nn.Conv2d_forward_before_lora(self, input))
+ if shared.opts.lora_functional:
+ return lora_forward(self, input, torch.nn.Conv2d_forward_before_lora)
+
+ lora_apply_weights(self)
+
+ return torch.nn.Conv2d_forward_before_lora(self, input)
+
+
+def lora_Conv2d_load_state_dict(self, *args, **kwargs):
+ lora_reset_cached_weight(self)
+
+ return torch.nn.Conv2d_load_state_dict_before_lora(self, *args, **kwargs)
+
+
+def lora_MultiheadAttention_forward(self, *args, **kwargs):
+ lora_apply_weights(self)
+
+ return torch.nn.MultiheadAttention_forward_before_lora(self, *args, **kwargs)
+
+
+def lora_MultiheadAttention_load_state_dict(self, *args, **kwargs):
+ lora_reset_cached_weight(self)
+
+ return torch.nn.MultiheadAttention_load_state_dict_before_lora(self, *args, **kwargs)
def list_available_loras():
available_loras.clear()
+ available_lora_aliases.clear()
+ forbidden_lora_aliases.clear()
+ forbidden_lora_aliases.update({"none": 1})
os.makedirs(shared.cmd_opts.lora_dir, exist_ok=True)
- candidates = \
- glob.glob(os.path.join(shared.cmd_opts.lora_dir, '**/*.pt'), recursive=True) + \
- glob.glob(os.path.join(shared.cmd_opts.lora_dir, '**/*.safetensors'), recursive=True) + \
- glob.glob(os.path.join(shared.cmd_opts.lora_dir, '**/*.ckpt'), recursive=True)
-
- for filename in sorted(candidates):
+ candidates = list(shared.walk_files(shared.cmd_opts.lora_dir, allowed_extensions=[".pt", ".ckpt", ".safetensors"]))
+ for filename in sorted(candidates, key=str.lower):
if os.path.isdir(filename):
continue
name = os.path.splitext(os.path.basename(filename))[0]
+ entry = LoraOnDisk(name, filename)
+
+ available_loras[name] = entry
+
+ if entry.alias in available_lora_aliases:
+ forbidden_lora_aliases[entry.alias.lower()] = 1
+
+ available_lora_aliases[name] = entry
+ available_lora_aliases[entry.alias] = entry
+
+
+re_lora_name = re.compile(r"(.*)\s*\([0-9a-fA-F]+\)")
+
+
+def infotext_pasted(infotext, params):
+ if "AddNet Module 1" in [x[1] for x in scripts.scripts_txt2img.infotext_fields]:
+ return # if the other extension is active, it will handle those fields, no need to do anything
+
+ added = []
+
+ for k in params:
+ if not k.startswith("AddNet Model "):
+ continue
+
+ num = k[13:]
+
+ if params.get("AddNet Module " + num) != "LoRA":
+ continue
+
+ name = params.get("AddNet Model " + num)
+ if name is None:
+ continue
+
+ m = re_lora_name.match(name)
+ if m:
+ name = m.group(1)
+
+ multiplier = params.get("AddNet Weight A " + num, "1.0")
- available_loras[name] = LoraOnDisk(name, filename)
+ added.append(f"<lora:{name}:{multiplier}>")
+ if added:
+ params["Prompt"] += "\n" + "".join(added)
available_loras = {}
+available_lora_aliases = {}
+forbidden_lora_aliases = {}
loaded_loras = []
list_available_loras()
diff --git a/extensions-builtin/Lora/scripts/lora_script.py b/extensions-builtin/Lora/scripts/lora_script.py
index 2e860160..728e0b86 100644
--- a/extensions-builtin/Lora/scripts/lora_script.py
+++ b/extensions-builtin/Lora/scripts/lora_script.py
@@ -1,15 +1,19 @@
import torch
import gradio as gr
+from fastapi import FastAPI
import lora
import extra_networks_lora
import ui_extra_networks_lora
from modules import script_callbacks, ui_extra_networks, extra_networks, shared
-
def unload():
torch.nn.Linear.forward = torch.nn.Linear_forward_before_lora
+ torch.nn.Linear._load_from_state_dict = torch.nn.Linear_load_state_dict_before_lora
torch.nn.Conv2d.forward = torch.nn.Conv2d_forward_before_lora
+ torch.nn.Conv2d._load_from_state_dict = torch.nn.Conv2d_load_state_dict_before_lora
+ torch.nn.MultiheadAttention.forward = torch.nn.MultiheadAttention_forward_before_lora
+ torch.nn.MultiheadAttention._load_from_state_dict = torch.nn.MultiheadAttention_load_state_dict_before_lora
def before_ui():
@@ -20,19 +24,59 @@ def before_ui():
if not hasattr(torch.nn, 'Linear_forward_before_lora'):
torch.nn.Linear_forward_before_lora = torch.nn.Linear.forward
+if not hasattr(torch.nn, 'Linear_load_state_dict_before_lora'):
+ torch.nn.Linear_load_state_dict_before_lora = torch.nn.Linear._load_from_state_dict
+
if not hasattr(torch.nn, 'Conv2d_forward_before_lora'):
torch.nn.Conv2d_forward_before_lora = torch.nn.Conv2d.forward
+if not hasattr(torch.nn, 'Conv2d_load_state_dict_before_lora'):
+ torch.nn.Conv2d_load_state_dict_before_lora = torch.nn.Conv2d._load_from_state_dict
+
+if not hasattr(torch.nn, 'MultiheadAttention_forward_before_lora'):
+ torch.nn.MultiheadAttention_forward_before_lora = torch.nn.MultiheadAttention.forward
+
+if not hasattr(torch.nn, 'MultiheadAttention_load_state_dict_before_lora'):
+ torch.nn.MultiheadAttention_load_state_dict_before_lora = torch.nn.MultiheadAttention._load_from_state_dict
+
torch.nn.Linear.forward = lora.lora_Linear_forward
+torch.nn.Linear._load_from_state_dict = lora.lora_Linear_load_state_dict
torch.nn.Conv2d.forward = lora.lora_Conv2d_forward
+torch.nn.Conv2d._load_from_state_dict = lora.lora_Conv2d_load_state_dict
+torch.nn.MultiheadAttention.forward = lora.lora_MultiheadAttention_forward
+torch.nn.MultiheadAttention._load_from_state_dict = lora.lora_MultiheadAttention_load_state_dict
script_callbacks.on_model_loaded(lora.assign_lora_names_to_compvis_modules)
script_callbacks.on_script_unloaded(unload)
script_callbacks.on_before_ui(before_ui)
+script_callbacks.on_infotext_pasted(lora.infotext_pasted)
shared.options_templates.update(shared.options_section(('extra_networks', "Extra Networks"), {
- "sd_lora": shared.OptionInfo("None", "Add Lora to prompt", gr.Dropdown, lambda: {"choices": [""] + [x for x in lora.available_loras]}, refresh=lora.list_available_loras),
- "lora_apply_to_outputs": shared.OptionInfo(False, "Apply Lora to outputs rather than inputs when possible (experimental)"),
+ "sd_lora": shared.OptionInfo("None", "Add Lora to prompt", gr.Dropdown, lambda: {"choices": ["None", *lora.available_loras]}, refresh=lora.list_available_loras),
+ "lora_preferred_name": shared.OptionInfo("Alias from file", "When adding to prompt, refer to lora by", gr.Radio, {"choices": ["Alias from file", "Filename"]}),
+}))
+
+shared.options_templates.update(shared.options_section(('compatibility', "Compatibility"), {
+ "lora_functional": shared.OptionInfo(False, "Lora: use old method that takes longer when you have multiple Loras active and produces same results as kohya-ss/sd-webui-additional-networks extension"),
}))
+
+
+def create_lora_json(obj: lora.LoraOnDisk):
+ return {
+ "name": obj.name,
+ "alias": obj.alias,
+ "path": obj.filename,
+ "metadata": obj.metadata,
+ }
+
+
+def api_loras(_: gr.Blocks, app: FastAPI):
+ @app.get("/sdapi/v1/loras")
+ async def get_loras():
+ return [create_lora_json(obj) for obj in lora.available_loras.values()]
+
+
+script_callbacks.on_app_started(api_loras)
+
diff --git a/extensions-builtin/Lora/ui_extra_networks_lora.py b/extensions-builtin/Lora/ui_extra_networks_lora.py
index 68b11332..2050e3fa 100644
--- a/extensions-builtin/Lora/ui_extra_networks_lora.py
+++ b/extensions-builtin/Lora/ui_extra_networks_lora.py
@@ -15,13 +15,19 @@ class ExtraNetworksPageLora(ui_extra_networks.ExtraNetworksPage):
def list_items(self):
for name, lora_on_disk in lora.available_loras.items():
path, ext = os.path.splitext(lora_on_disk.filename)
+
+ if shared.opts.lora_preferred_name == "Filename" or lora_on_disk.alias.lower() in lora.forbidden_lora_aliases:
+ alias = name
+ else:
+ alias = lora_on_disk.alias
+
yield {
"name": name,
"filename": path,
"preview": self.find_preview(path),
"description": self.find_description(path),
"search_term": self.search_terms_from_path(lora_on_disk.filename),
- "prompt": json.dumps(f"<lora:{name}:") + " + opts.extra_networks_default_multiplier + " + json.dumps(">"),
+ "prompt": json.dumps(f"<lora:{alias}:") + " + opts.extra_networks_default_multiplier + " + json.dumps(">"),
"local_preview": f"{path}.{shared.opts.samples_format}",
"metadata": json.dumps(lora_on_disk.metadata, indent=4) if lora_on_disk.metadata else None,
}
diff --git a/extensions-builtin/ScuNET/scripts/scunet_model.py b/extensions-builtin/ScuNET/scripts/scunet_model.py
index e0fbf3a3..cc2cbc6a 100644
--- a/extensions-builtin/ScuNET/scripts/scunet_model.py
+++ b/extensions-builtin/ScuNET/scripts/scunet_model.py
@@ -5,11 +5,14 @@ import traceback
import PIL.Image
import numpy as np
import torch
+from tqdm import tqdm
+
from basicsr.utils.download_util import load_file_from_url
import modules.upscaler
-from modules import devices, modelloader
+from modules import devices, modelloader, script_callbacks
from scunet_model_arch import SCUNet as net
+from modules.shared import opts
class UpscalerScuNET(modules.upscaler.Upscaler):
@@ -42,28 +45,78 @@ class UpscalerScuNET(modules.upscaler.Upscaler):
scalers.append(scaler_data2)
self.scalers = scalers
- def do_upscale(self, img: PIL.Image, selected_file):
+ @staticmethod
+ @torch.no_grad()
+ def tiled_inference(img, model):
+ # test the image tile by tile
+ h, w = img.shape[2:]
+ tile = opts.SCUNET_tile
+ tile_overlap = opts.SCUNET_tile_overlap
+ if tile == 0:
+ return model(img)
+
+ device = devices.get_device_for('scunet')
+ assert tile % 8 == 0, "tile size should be a multiple of window_size"
+ sf = 1
+
+ stride = tile - tile_overlap
+ h_idx_list = list(range(0, h - tile, stride)) + [h - tile]
+ w_idx_list = list(range(0, w - tile, stride)) + [w - tile]
+ E = torch.zeros(1, 3, h * sf, w * sf, dtype=img.dtype, device=device)
+ W = torch.zeros_like(E, dtype=devices.dtype, device=device)
+
+ with tqdm(total=len(h_idx_list) * len(w_idx_list), desc="ScuNET tiles") as pbar:
+ for h_idx in h_idx_list:
+
+ for w_idx in w_idx_list:
+
+ in_patch = img[..., h_idx: h_idx + tile, w_idx: w_idx + tile]
+
+ out_patch = model(in_patch)
+ out_patch_mask = torch.ones_like(out_patch)
+
+ E[
+ ..., h_idx * sf: (h_idx + tile) * sf, w_idx * sf: (w_idx + tile) * sf
+ ].add_(out_patch)
+ W[
+ ..., h_idx * sf: (h_idx + tile) * sf, w_idx * sf: (w_idx + tile) * sf
+ ].add_(out_patch_mask)
+ pbar.update(1)
+ output = E.div_(W)
+
+ return output
+
+ def do_upscale(self, img: PIL.Image.Image, selected_file):
+
torch.cuda.empty_cache()
model = self.load_model(selected_file)
if model is None:
+ print(f"ScuNET: Unable to load model from {selected_file}", file=sys.stderr)
return img
device = devices.get_device_for('scunet')
- img = np.array(img)
- img = img[:, :, ::-1]
- img = np.moveaxis(img, 2, 0) / 255
- img = torch.from_numpy(img).float()
- img = img.unsqueeze(0).to(device)
-
- with torch.no_grad():
- output = model(img)
- output = output.squeeze().float().cpu().clamp_(0, 1).numpy()
- output = 255. * np.moveaxis(output, 0, 2)
- output = output.astype(np.uint8)
- output = output[:, :, ::-1]
+ tile = opts.SCUNET_tile
+ h, w = img.height, img.width
+ np_img = np.array(img)
+ np_img = np_img[:, :, ::-1] # RGB to BGR
+ np_img = np_img.transpose((2, 0, 1)) / 255 # HWC to CHW
+ torch_img = torch.from_numpy(np_img).float().unsqueeze(0).to(device) # type: ignore
+
+ if tile > h or tile > w:
+ _img = torch.zeros(1, 3, max(h, tile), max(w, tile), dtype=torch_img.dtype, device=torch_img.device)
+ _img[:, :, :h, :w] = torch_img # pad image
+ torch_img = _img
+
+ torch_output = self.tiled_inference(torch_img, model).squeeze(0)
+ torch_output = torch_output[:, :h * 1, :w * 1] # remove padding, if any
+ np_output: np.ndarray = torch_output.float().cpu().clamp_(0, 1).numpy()
+ del torch_img, torch_output
torch.cuda.empty_cache()
- return PIL.Image.fromarray(output, 'RGB')
+
+ output = np_output.transpose((1, 2, 0)) # CHW to HWC
+ output = output[:, :, ::-1] # BGR to RGB
+ return PIL.Image.fromarray((output * 255).astype(np.uint8))
def load_model(self, path: str):
device = devices.get_device_for('scunet')
@@ -79,9 +132,19 @@ class UpscalerScuNET(modules.upscaler.Upscaler):
model = net(in_nc=3, config=[4, 4, 4, 4, 4, 4, 4], dim=64)
model.load_state_dict(torch.load(filename), strict=True)
model.eval()
- for k, v in model.named_parameters():
+ for _, v in model.named_parameters():
v.requires_grad = False
model = model.to(device)
return model
+
+def on_ui_settings():
+ import gradio as gr
+ from modules import shared
+
+ shared.opts.add_option("SCUNET_tile", shared.OptionInfo(256, "Tile size for SCUNET upscalers.", gr.Slider, {"minimum": 0, "maximum": 512, "step": 16}, section=('upscaling', "Upscaling")).info("0 = no tiling"))
+ shared.opts.add_option("SCUNET_tile_overlap", shared.OptionInfo(8, "Tile overlap for SCUNET upscalers.", gr.Slider, {"minimum": 0, "maximum": 64, "step": 1}, section=('upscaling', "Upscaling")).info("Low values = visible seam"))
+
+
+script_callbacks.on_ui_settings(on_ui_settings)
diff --git a/extensions-builtin/ScuNET/scunet_model_arch.py b/extensions-builtin/ScuNET/scunet_model_arch.py
index 43ca8d36..b51a8806 100644
--- a/extensions-builtin/ScuNET/scunet_model_arch.py
+++ b/extensions-builtin/ScuNET/scunet_model_arch.py
@@ -61,7 +61,9 @@ class WMSA(nn.Module):
Returns:
output: tensor shape [b h w c]
"""
- if self.type != 'W': x = torch.roll(x, shifts=(-(self.window_size // 2), -(self.window_size // 2)), dims=(1, 2))
+ if self.type != 'W':
+ x = torch.roll(x, shifts=(-(self.window_size // 2), -(self.window_size // 2)), dims=(1, 2))
+
x = rearrange(x, 'b (w1 p1) (w2 p2) c -> b w1 w2 p1 p2 c', p1=self.window_size, p2=self.window_size)
h_windows = x.size(1)
w_windows = x.size(2)
@@ -85,8 +87,9 @@ class WMSA(nn.Module):
output = self.linear(output)
output = rearrange(output, 'b (w1 w2) (p1 p2) c -> b (w1 p1) (w2 p2) c', w1=h_windows, p1=self.window_size)
- if self.type != 'W': output = torch.roll(output, shifts=(self.window_size // 2, self.window_size // 2),
- dims=(1, 2))
+ if self.type != 'W':
+ output = torch.roll(output, shifts=(self.window_size // 2, self.window_size // 2), dims=(1, 2))
+
return output
def relative_embedding(self):
@@ -262,4 +265,4 @@ class SCUNet(nn.Module):
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.LayerNorm):
nn.init.constant_(m.bias, 0)
- nn.init.constant_(m.weight, 1.0) \ No newline at end of file
+ nn.init.constant_(m.weight, 1.0)
diff --git a/extensions-builtin/SwinIR/scripts/swinir_model.py b/extensions-builtin/SwinIR/scripts/swinir_model.py
index e8783bca..0ba50487 100644
--- a/extensions-builtin/SwinIR/scripts/swinir_model.py
+++ b/extensions-builtin/SwinIR/scripts/swinir_model.py
@@ -1,4 +1,3 @@
-import contextlib
import os
import numpy as np
@@ -8,7 +7,7 @@ from basicsr.utils.download_util import load_file_from_url
from tqdm import tqdm
from modules import modelloader, devices, script_callbacks, shared
-from modules.shared import cmd_opts, opts, state
+from modules.shared import opts, state
from swinir_model_arch import SwinIR as net
from swinir_model_arch_v2 import Swin2SR as net2
from modules.upscaler import Upscaler, UpscalerData
@@ -45,7 +44,7 @@ class UpscalerSwinIR(Upscaler):
img = upscale(img, model)
try:
torch.cuda.empty_cache()
- except:
+ except Exception:
pass
return img
@@ -151,7 +150,7 @@ def inference(img, model, tile, tile_overlap, window_size, scale):
for w_idx in w_idx_list:
if state.interrupted or state.skipped:
break
-
+
in_patch = img[..., h_idx: h_idx + tile, w_idx: w_idx + tile]
out_patch = model(in_patch)
out_patch_mask = torch.ones_like(out_patch)
diff --git a/extensions-builtin/SwinIR/swinir_model_arch.py b/extensions-builtin/SwinIR/swinir_model_arch.py
index 863f42db..93b93274 100644
--- a/extensions-builtin/SwinIR/swinir_model_arch.py
+++ b/extensions-builtin/SwinIR/swinir_model_arch.py
@@ -644,7 +644,7 @@ class SwinIR(nn.Module):
"""
def __init__(self, img_size=64, patch_size=1, in_chans=3,
- embed_dim=96, depths=[6, 6, 6, 6], num_heads=[6, 6, 6, 6],
+ embed_dim=96, depths=(6, 6, 6, 6), num_heads=(6, 6, 6, 6),
window_size=7, mlp_ratio=4., qkv_bias=True, qk_scale=None,
drop_rate=0., attn_drop_rate=0., drop_path_rate=0.1,
norm_layer=nn.LayerNorm, ape=False, patch_norm=True,
@@ -805,7 +805,7 @@ class SwinIR(nn.Module):
def forward(self, x):
H, W = x.shape[2:]
x = self.check_image_size(x)
-
+
self.mean = self.mean.type_as(x)
x = (x - self.mean) * self.img_range
@@ -844,7 +844,7 @@ class SwinIR(nn.Module):
H, W = self.patches_resolution
flops += H * W * 3 * self.embed_dim * 9
flops += self.patch_embed.flops()
- for i, layer in enumerate(self.layers):
+ for layer in self.layers:
flops += layer.flops()
flops += H * W * 3 * self.embed_dim * self.embed_dim
flops += self.upsample.flops()
diff --git a/extensions-builtin/SwinIR/swinir_model_arch_v2.py b/extensions-builtin/SwinIR/swinir_model_arch_v2.py
index 0e28ae6e..dad22cca 100644
--- a/extensions-builtin/SwinIR/swinir_model_arch_v2.py
+++ b/extensions-builtin/SwinIR/swinir_model_arch_v2.py
@@ -74,7 +74,7 @@ class WindowAttention(nn.Module):
"""
def __init__(self, dim, window_size, num_heads, qkv_bias=True, attn_drop=0., proj_drop=0.,
- pretrained_window_size=[0, 0]):
+ pretrained_window_size=(0, 0)):
super().__init__()
self.dim = dim
@@ -241,7 +241,7 @@ class SwinTransformerBlock(nn.Module):
attn_mask = None
self.register_buffer("attn_mask", attn_mask)
-
+
def calculate_mask(self, x_size):
# calculate attention mask for SW-MSA
H, W = x_size
@@ -263,7 +263,7 @@ class SwinTransformerBlock(nn.Module):
attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2)
attn_mask = attn_mask.masked_fill(attn_mask != 0, float(-100.0)).masked_fill(attn_mask == 0, float(0.0))
- return attn_mask
+ return attn_mask
def forward(self, x, x_size):
H, W = x_size
@@ -288,7 +288,7 @@ class SwinTransformerBlock(nn.Module):
attn_windows = self.attn(x_windows, mask=self.attn_mask) # nW*B, window_size*window_size, C
else:
attn_windows = self.attn(x_windows, mask=self.calculate_mask(x_size).to(x.device))
-
+
# merge windows
attn_windows = attn_windows.view(-1, self.window_size, self.window_size, C)
shifted_x = window_reverse(attn_windows, self.window_size, H, W) # B H' W' C
@@ -369,7 +369,7 @@ class PatchMerging(nn.Module):
H, W = self.input_resolution
flops = (H // 2) * (W // 2) * 4 * self.dim * 2 * self.dim
flops += H * W * self.dim // 2
- return flops
+ return flops
class BasicLayer(nn.Module):
""" A basic Swin Transformer layer for one stage.
@@ -447,7 +447,7 @@ class BasicLayer(nn.Module):
nn.init.constant_(blk.norm1.weight, 0)
nn.init.constant_(blk.norm2.bias, 0)
nn.init.constant_(blk.norm2.weight, 0)
-
+
class PatchEmbed(nn.Module):
r""" Image to Patch Embedding
Args:
@@ -492,7 +492,7 @@ class PatchEmbed(nn.Module):
flops = Ho * Wo * self.embed_dim * self.in_chans * (self.patch_size[0] * self.patch_size[1])
if self.norm is not None:
flops += Ho * Wo * self.embed_dim
- return flops
+ return flops
class RSTB(nn.Module):
"""Residual Swin Transformer Block (RSTB).
@@ -531,7 +531,7 @@ class RSTB(nn.Module):
num_heads=num_heads,
window_size=window_size,
mlp_ratio=mlp_ratio,
- qkv_bias=qkv_bias,
+ qkv_bias=qkv_bias,
drop=drop, attn_drop=attn_drop,
drop_path=drop_path,
norm_layer=norm_layer,
@@ -622,7 +622,7 @@ class Upsample(nn.Sequential):
else:
raise ValueError(f'scale {scale} is not supported. ' 'Supported scales: 2^n and 3.')
super(Upsample, self).__init__(*m)
-
+
class Upsample_hf(nn.Sequential):
"""Upsample module.
@@ -642,7 +642,7 @@ class Upsample_hf(nn.Sequential):
m.append(nn.PixelShuffle(3))
else:
raise ValueError(f'scale {scale} is not supported. ' 'Supported scales: 2^n and 3.')
- super(Upsample_hf, self).__init__(*m)
+ super(Upsample_hf, self).__init__(*m)
class UpsampleOneStep(nn.Sequential):
@@ -667,8 +667,8 @@ class UpsampleOneStep(nn.Sequential):
H, W = self.input_resolution
flops = H * W * self.num_feat * 3 * 9
return flops
-
-
+
+
class Swin2SR(nn.Module):
r""" Swin2SR
@@ -698,8 +698,8 @@ class Swin2SR(nn.Module):
"""
def __init__(self, img_size=64, patch_size=1, in_chans=3,
- embed_dim=96, depths=[6, 6, 6, 6], num_heads=[6, 6, 6, 6],
- window_size=7, mlp_ratio=4., qkv_bias=True,
+ embed_dim=96, depths=(6, 6, 6, 6), num_heads=(6, 6, 6, 6),
+ window_size=7, mlp_ratio=4., qkv_bias=True,
drop_rate=0., attn_drop_rate=0., drop_path_rate=0.1,
norm_layer=nn.LayerNorm, ape=False, patch_norm=True,
use_checkpoint=False, upscale=2, img_range=1., upsampler='', resi_connection='1conv',
@@ -764,7 +764,7 @@ class Swin2SR(nn.Module):
num_heads=num_heads[i_layer],
window_size=window_size,
mlp_ratio=self.mlp_ratio,
- qkv_bias=qkv_bias,
+ qkv_bias=qkv_bias,
drop=drop_rate, attn_drop=attn_drop_rate,
drop_path=dpr[sum(depths[:i_layer]):sum(depths[:i_layer + 1])], # no impact on SR results
norm_layer=norm_layer,
@@ -776,7 +776,7 @@ class Swin2SR(nn.Module):
)
self.layers.append(layer)
-
+
if self.upsampler == 'pixelshuffle_hf':
self.layers_hf = nn.ModuleList()
for i_layer in range(self.num_layers):
@@ -787,7 +787,7 @@ class Swin2SR(nn.Module):
num_heads=num_heads[i_layer],
window_size=window_size,
mlp_ratio=self.mlp_ratio,
- qkv_bias=qkv_bias,
+ qkv_bias=qkv_bias,
drop=drop_rate, attn_drop=attn_drop_rate,
drop_path=dpr[sum(depths[:i_layer]):sum(depths[:i_layer + 1])], # no impact on SR results
norm_layer=norm_layer,
@@ -799,7 +799,7 @@ class Swin2SR(nn.Module):
)
self.layers_hf.append(layer)
-
+
self.norm = norm_layer(self.num_features)
# build the last conv layer in deep feature extraction
@@ -829,10 +829,10 @@ class Swin2SR(nn.Module):
self.conv_aux = nn.Conv2d(num_feat, num_out_ch, 3, 1, 1)
self.conv_after_aux = nn.Sequential(
nn.Conv2d(3, num_feat, 3, 1, 1),
- nn.LeakyReLU(inplace=True))
+ nn.LeakyReLU(inplace=True))
self.upsample = Upsample(upscale, num_feat)
self.conv_last = nn.Conv2d(num_feat, num_out_ch, 3, 1, 1)
-
+
elif self.upsampler == 'pixelshuffle_hf':
self.conv_before_upsample = nn.Sequential(nn.Conv2d(embed_dim, num_feat, 3, 1, 1),
nn.LeakyReLU(inplace=True))
@@ -846,7 +846,7 @@ class Swin2SR(nn.Module):
nn.Conv2d(embed_dim, num_feat, 3, 1, 1),
nn.LeakyReLU(inplace=True))
self.conv_last_hf = nn.Conv2d(num_feat, num_out_ch, 3, 1, 1)
-
+
elif self.upsampler == 'pixelshuffledirect':
# for lightweight SR (to save parameters)
self.upsample = UpsampleOneStep(upscale, embed_dim, num_out_ch,
@@ -905,7 +905,7 @@ class Swin2SR(nn.Module):
x = self.patch_unembed(x, x_size)
return x
-
+
def forward_features_hf(self, x):
x_size = (x.shape[2], x.shape[3])
x = self.patch_embed(x)
@@ -919,7 +919,7 @@ class Swin2SR(nn.Module):
x = self.norm(x) # B L C
x = self.patch_unembed(x, x_size)
- return x
+ return x
def forward(self, x):
H, W = x.shape[2:]
@@ -951,7 +951,7 @@ class Swin2SR(nn.Module):
x = self.conv_after_body(self.forward_features(x)) + x
x_before = self.conv_before_upsample(x)
x_out = self.conv_last(self.upsample(x_before))
-
+
x_hf = self.conv_first_hf(x_before)
x_hf = self.conv_after_body_hf(self.forward_features_hf(x_hf)) + x_hf
x_hf = self.conv_before_upsample_hf(x_hf)
@@ -977,15 +977,15 @@ class Swin2SR(nn.Module):
x_first = self.conv_first(x)
res = self.conv_after_body(self.forward_features(x_first)) + x_first
x = x + self.conv_last(res)
-
+
x = x / self.img_range + self.mean
if self.upsampler == "pixelshuffle_aux":
return x[:, :, :H*self.upscale, :W*self.upscale], aux
-
+
elif self.upsampler == "pixelshuffle_hf":
x_out = x_out / self.img_range + self.mean
return x_out[:, :, :H*self.upscale, :W*self.upscale], x[:, :, :H*self.upscale, :W*self.upscale], x_hf[:, :, :H*self.upscale, :W*self.upscale]
-
+
else:
return x[:, :, :H*self.upscale, :W*self.upscale]
@@ -994,7 +994,7 @@ class Swin2SR(nn.Module):
H, W = self.patches_resolution
flops += H * W * 3 * self.embed_dim * 9
flops += self.patch_embed.flops()
- for i, layer in enumerate(self.layers):
+ for layer in self.layers:
flops += layer.flops()
flops += H * W * 3 * self.embed_dim * self.embed_dim
flops += self.upsample.flops()
@@ -1014,4 +1014,4 @@ if __name__ == '__main__':
x = torch.randn((1, 3, height, width))
x = model(x)
- print(x.shape) \ No newline at end of file
+ print(x.shape)
diff --git a/extensions-builtin/prompt-bracket-checker/javascript/prompt-bracket-checker.js b/extensions-builtin/prompt-bracket-checker/javascript/prompt-bracket-checker.js
index 4a85c8eb..5c7a836a 100644
--- a/extensions-builtin/prompt-bracket-checker/javascript/prompt-bracket-checker.js
+++ b/extensions-builtin/prompt-bracket-checker/javascript/prompt-bracket-checker.js
@@ -1,110 +1,42 @@
// Stable Diffusion WebUI - Bracket checker
-// Version 1.0
-// By Hingashi no Florin/Bwin4L
+// By Hingashi no Florin/Bwin4L & @akx
// Counts open and closed brackets (round, square, curly) in the prompt and negative prompt text boxes in the txt2img and img2img tabs.
// If there's a mismatch, the keyword counter turns red and if you hover on it, a tooltip tells you what's wrong.
-function checkBrackets(evt, textArea, counterElt) {
- errorStringParen = '(...) - Different number of opening and closing parentheses detected.\n';
- errorStringSquare = '[...] - Different number of opening and closing square brackets detected.\n';
- errorStringCurly = '{...} - Different number of opening and closing curly brackets detected.\n';
-
- openBracketRegExp = /\(/g;
- closeBracketRegExp = /\)/g;
-
- openSquareBracketRegExp = /\[/g;
- closeSquareBracketRegExp = /\]/g;
-
- openCurlyBracketRegExp = /\{/g;
- closeCurlyBracketRegExp = /\}/g;
-
- totalOpenBracketMatches = 0;
- totalCloseBracketMatches = 0;
- totalOpenSquareBracketMatches = 0;
- totalCloseSquareBracketMatches = 0;
- totalOpenCurlyBracketMatches = 0;
- totalCloseCurlyBracketMatches = 0;
-
- openBracketMatches = textArea.value.match(openBracketRegExp);
- if(openBracketMatches) {
- totalOpenBracketMatches = openBracketMatches.length;
- }
-
- closeBracketMatches = textArea.value.match(closeBracketRegExp);
- if(closeBracketMatches) {
- totalCloseBracketMatches = closeBracketMatches.length;
- }
-
- openSquareBracketMatches = textArea.value.match(openSquareBracketRegExp);
- if(openSquareBracketMatches) {
- totalOpenSquareBracketMatches = openSquareBracketMatches.length;
- }
-
- closeSquareBracketMatches = textArea.value.match(closeSquareBracketRegExp);
- if(closeSquareBracketMatches) {
- totalCloseSquareBracketMatches = closeSquareBracketMatches.length;
- }
-
- openCurlyBracketMatches = textArea.value.match(openCurlyBracketRegExp);
- if(openCurlyBracketMatches) {
- totalOpenCurlyBracketMatches = openCurlyBracketMatches.length;
- }
-
- closeCurlyBracketMatches = textArea.value.match(closeCurlyBracketRegExp);
- if(closeCurlyBracketMatches) {
- totalCloseCurlyBracketMatches = closeCurlyBracketMatches.length;
- }
-
- if(totalOpenBracketMatches != totalCloseBracketMatches) {
- if(!counterElt.title.includes(errorStringParen)) {
- counterElt.title += errorStringParen;
+function checkBrackets(textArea, counterElt) {
+ var counts = {};
+ (textArea.value.match(/[(){}\[\]]/g) || []).forEach(bracket => {
+ counts[bracket] = (counts[bracket] || 0) + 1;
+ });
+ var errors = [];
+
+ function checkPair(open, close, kind) {
+ if (counts[open] !== counts[close]) {
+ errors.push(
+ `${open}...${close} - Detected ${counts[open] || 0} opening and ${counts[close] || 0} closing ${kind}.`
+ );
}
- } else {
- counterElt.title = counterElt.title.replace(errorStringParen, '');
}
- if(totalOpenSquareBracketMatches != totalCloseSquareBracketMatches) {
- if(!counterElt.title.includes(errorStringSquare)) {
- counterElt.title += errorStringSquare;
- }
- } else {
- counterElt.title = counterElt.title.replace(errorStringSquare, '');
- }
+ checkPair('(', ')', 'round brackets');
+ checkPair('[', ']', 'square brackets');
+ checkPair('{', '}', 'curly brackets');
+ counterElt.title = errors.join('\n');
+ counterElt.classList.toggle('error', errors.length !== 0);
+}
- if(totalOpenCurlyBracketMatches != totalCloseCurlyBracketMatches) {
- if(!counterElt.title.includes(errorStringCurly)) {
- counterElt.title += errorStringCurly;
- }
- } else {
- counterElt.title = counterElt.title.replace(errorStringCurly, '');
- }
+function setupBracketChecking(id_prompt, id_counter) {
+ var textarea = gradioApp().querySelector("#" + id_prompt + " > label > textarea");
+ var counter = gradioApp().getElementById(id_counter)
- if(counterElt.title != '') {
- counterElt.classList.add('error');
- } else {
- counterElt.classList.remove('error');
+ if (textarea && counter) {
+ textarea.addEventListener("input", () => checkBrackets(textarea, counter));
}
}
-function setupBracketChecking(id_prompt, id_counter){
- var textarea = gradioApp().querySelector("#" + id_prompt + " > label > textarea");
- var counter = gradioApp().getElementById(id_counter)
- textarea.addEventListener("input", function(evt){
- checkBrackets(evt, textarea, counter)
- });
-}
-
-var shadowRootLoaded = setInterval(function() {
- var shadowRoot = document.querySelector('gradio-app').shadowRoot;
- if(! shadowRoot) return false;
-
- var shadowTextArea = shadowRoot.querySelectorAll('#txt2img_prompt > label > textarea');
- if(shadowTextArea.length < 1) return false;
-
- clearInterval(shadowRootLoaded);
-
- setupBracketChecking('txt2img_prompt', 'txt2img_token_counter')
- setupBracketChecking('txt2img_neg_prompt', 'txt2img_negative_token_counter')
- setupBracketChecking('img2img_prompt', 'imgimg_token_counter')
- setupBracketChecking('img2img_neg_prompt', 'img2img_negative_token_counter')
-}, 1000);
+onUiLoaded(function () {
+ setupBracketChecking('txt2img_prompt', 'txt2img_token_counter');
+ setupBracketChecking('txt2img_neg_prompt', 'txt2img_negative_token_counter');
+ setupBracketChecking('img2img_prompt', 'img2img_token_counter');
+ setupBracketChecking('img2img_neg_prompt', 'img2img_negative_token_counter');
+});