Date: Wed, 23 Nov 2022 02:49:01 +0900
Subject: small fixes
---
modules/hypernetworks/hypernetwork.py | 6 +++---
modules/textual_inversion/textual_inversion.py | 2 +-
2 files changed, 4 insertions(+), 4 deletions(-)
(limited to 'modules')
diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py
index 0128419b..4541af18 100644
--- a/modules/hypernetworks/hypernetwork.py
+++ b/modules/hypernetworks/hypernetwork.py
@@ -435,8 +435,8 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, gradient_step,
optimizer_name = hypernetwork.optimizer_name
else:
print(f"Optimizer type {hypernetwork.optimizer_name} is not defined!")
- optimizer = torch.optim.AdamW(params=weights, lr=scheduler.learn_rate)
- optimizer_name = 'AdamW'
+ optimizer = torch.optim.AdamW(params=weights, lr=scheduler.learn_rate)
+ optimizer_name = 'AdamW'
if hypernetwork.optimizer_state_dict: # This line must be changed if Optimizer type can be different from saved optimizer.
try:
@@ -582,7 +582,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, gradient_step,
shared.state.textinfo = f"""
Loss: {loss_step:.7f}
-Step: {hypernetwork.step}
+Step: {steps_done}
Last prompt: {html.escape(batch.cond_text[0])}
Last saved hypernetwork: {html.escape(last_saved_file)}
Last saved image: {html.escape(last_saved_image)}
diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py
index 3036e48a..fee08e33 100644
--- a/modules/textual_inversion/textual_inversion.py
+++ b/modules/textual_inversion/textual_inversion.py
@@ -436,7 +436,7 @@ def train_embedding(embedding_name, learn_rate, batch_size, gradient_step, data_
shared.state.textinfo = f"""
Loss: {loss_step:.7f}
-Step: {embedding.step}
+Step: {steps_done}
Last prompt: {html.escape(batch.cond_text[0])}
Last saved embedding: {html.escape(last_saved_file)}
Last saved image: {html.escape(last_saved_image)}
--
cgit v1.2.1
From 75b67eebf21f72f5b693926476d9c3b12471f0d6 Mon Sep 17 00:00:00 2001
From: Sena <34237511+sena-nana@users.noreply.github.com>
Date: Wed, 23 Nov 2022 17:43:58 +0800
Subject: Fix bare base64 not accept
---
modules/api/api.py | 13 ++++++++++---
1 file changed, 10 insertions(+), 3 deletions(-)
(limited to 'modules')
diff --git a/modules/api/api.py b/modules/api/api.py
index 7a567be3..648bd6a8 100644
--- a/modules/api/api.py
+++ b/modules/api/api.py
@@ -3,6 +3,7 @@ import io
import time
import uvicorn
from threading import Lock
+from io import BytesIO
from gradio.processing_utils import encode_pil_to_base64, decode_base64_to_file, decode_base64_to_image
from fastapi import APIRouter, Depends, FastAPI, HTTPException
from fastapi.security import HTTPBasic, HTTPBasicCredentials
@@ -13,7 +14,7 @@ from modules import sd_samplers, deepbooru
from modules.api.models import *
from modules.processing import StableDiffusionProcessingTxt2Img, StableDiffusionProcessingImg2Img, process_images
from modules.extras import run_extras, run_pnginfo
-from PIL import PngImagePlugin
+from PIL import PngImagePlugin,Image
from modules.sd_models import checkpoints_list
from modules.realesrgan_model import get_realesrgan_models
from typing import List
@@ -133,7 +134,10 @@ class Api:
mask = img2imgreq.mask
if mask:
- mask = decode_base64_to_image(mask)
+ if mask.startswith("data:image/"):
+ mask = decode_base64_to_image(mask)
+ else:
+ mask = Image.open(BytesIO(base64.b64decode(mask)))
populate = img2imgreq.copy(update={ # Override __init__ params
"sd_model": shared.sd_model,
@@ -147,7 +151,10 @@ class Api:
imgs = []
for img in init_images:
- img = decode_base64_to_image(img)
+ if img.startswith("data:image/"):
+ img = decode_base64_to_image(img)
+ else:
+ img = Image.open(BytesIO(base64.b64decode(img)))
imgs = [img] * p.batch_size
p.init_images = imgs
--
cgit v1.2.1
From adb6cb7619989cbc7a271cc6c2ae27bb936c43d9 Mon Sep 17 00:00:00 2001
From: Billy Cao
Date: Wed, 23 Nov 2022 18:11:24 +0800
Subject: Patch UNet Forward to support resolutions that are not multiples of
64 Also modifed the UI to no longer step in 64
---
modules/sd_hijack.py | 2 ++
modules/sd_hijack_optimizations.py | 31 +++++++++++++++++++++++++++++++
modules/ui.py | 24 ++++++++++++------------
3 files changed, 45 insertions(+), 12 deletions(-)
(limited to 'modules')
diff --git a/modules/sd_hijack.py b/modules/sd_hijack.py
index eaedac13..6141f705 100644
--- a/modules/sd_hijack.py
+++ b/modules/sd_hijack.py
@@ -16,6 +16,7 @@ import ldm.modules.attention
import ldm.modules.diffusionmodules.model
import ldm.models.diffusion.ddim
import ldm.models.diffusion.plms
+import ldm.modules.diffusionmodules.openaimodel
attention_CrossAttention_forward = ldm.modules.attention.CrossAttention.forward
diffusionmodules_model_nonlinearity = ldm.modules.diffusionmodules.model.nonlinearity
@@ -26,6 +27,7 @@ def apply_optimizations():
undo_optimizations()
ldm.modules.diffusionmodules.model.nonlinearity = silu
+ ldm.modules.diffusionmodules.openaimodel.UNetModel.forward = sd_hijack_optimizations.patched_unet_forward
if cmd_opts.force_enable_xformers or (cmd_opts.xformers and shared.xformers_available and torch.version.cuda and (6, 0) <= torch.cuda.get_device_capability(shared.device) <= (9, 0)):
print("Applying xformers cross attention optimization.")
diff --git a/modules/sd_hijack_optimizations.py b/modules/sd_hijack_optimizations.py
index 98123fbf..8cd4c954 100644
--- a/modules/sd_hijack_optimizations.py
+++ b/modules/sd_hijack_optimizations.py
@@ -5,6 +5,7 @@ import importlib
import torch
from torch import einsum
+import torch.nn.functional as F
from ldm.util import default
from einops import rearrange
@@ -12,6 +13,8 @@ from einops import rearrange
from modules import shared
from modules.hypernetworks import hypernetwork
+from ldm.modules.diffusionmodules.util import timestep_embedding
+
if shared.cmd_opts.xformers or shared.cmd_opts.force_enable_xformers:
try:
@@ -310,3 +313,31 @@ def xformers_attnblock_forward(self, x):
return x + out
except NotImplementedError:
return cross_attention_attnblock_forward(self, x)
+
+def patched_unet_forward(self, x, timesteps=None, context=None, y=None,**kwargs):
+ assert (y is not None) == (
+ self.num_classes is not None
+ ), "must specify y if and only if the model is class-conditional"
+ hs = []
+ t_emb = timestep_embedding(timesteps, self.model_channels, repeat_only=False)
+ emb = self.time_embed(t_emb)
+
+ if self.num_classes is not None:
+ assert y.shape == (x.shape[0],)
+ emb = emb + self.label_emb(y)
+
+ h = x.type(self.dtype)
+ for module in self.input_blocks:
+ h = module(h, emb, context)
+ hs.append(h)
+ h = self.middle_block(h, emb, context)
+ for module in self.output_blocks:
+ if h.shape[-2:] != hs[-1].shape[-2:]:
+ h = F.interpolate(h, hs[-1].shape[-2:], mode="nearest")
+ h = torch.cat([h, hs.pop()], dim=1)
+ h = module(h, emb, context)
+ h = h.type(x.dtype)
+ if self.predict_codebook_ids:
+ return self.id_predictor(h)
+ else:
+ return self.out(h)
diff --git a/modules/ui.py b/modules/ui.py
index e6da1b2a..85e531af 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -380,8 +380,8 @@ def create_seed_inputs():
with gr.Row(visible=False) as seed_extra_row_2:
seed_extras.append(seed_extra_row_2)
- seed_resize_from_w = gr.Slider(minimum=0, maximum=2048, step=64, label="Resize seed from width", value=0)
- seed_resize_from_h = gr.Slider(minimum=0, maximum=2048, step=64, label="Resize seed from height", value=0)
+ seed_resize_from_w = gr.Slider(minimum=0, maximum=2048, step=1, label="Resize seed from width", value=0)
+ seed_resize_from_h = gr.Slider(minimum=0, maximum=2048, step=1, label="Resize seed from height", value=0)
random_seed.click(fn=lambda: -1, show_progress=False, inputs=[], outputs=[seed])
random_subseed.click(fn=lambda: -1, show_progress=False, inputs=[], outputs=[subseed])
@@ -715,8 +715,8 @@ def create_ui(wrap_gradio_gpu_call):
sampler_index = gr.Radio(label='Sampling method', elem_id="txt2img_sampling", choices=[x.name for x in samplers], value=samplers[0].name, type="index")
with gr.Group():
- width = gr.Slider(minimum=64, maximum=2048, step=64, label="Width", value=512)
- height = gr.Slider(minimum=64, maximum=2048, step=64, label="Height", value=512)
+ width = gr.Slider(minimum=64, maximum=2048, step=1, label="Width", value=512)
+ height = gr.Slider(minimum=64, maximum=2048, step=1, label="Height", value=512)
with gr.Row():
restore_faces = gr.Checkbox(label='Restore faces', value=False, visible=len(shared.face_restorers) > 1)
@@ -724,8 +724,8 @@ def create_ui(wrap_gradio_gpu_call):
enable_hr = gr.Checkbox(label='Highres. fix', value=False)
with gr.Row(visible=False) as hr_options:
- firstphase_width = gr.Slider(minimum=0, maximum=1024, step=64, label="Firstpass width", value=0)
- firstphase_height = gr.Slider(minimum=0, maximum=1024, step=64, label="Firstpass height", value=0)
+ firstphase_width = gr.Slider(minimum=0, maximum=1024, step=1, label="Firstpass width", value=0)
+ firstphase_height = gr.Slider(minimum=0, maximum=1024, step=1, label="Firstpass height", value=0)
denoising_strength = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label='Denoising strength', value=0.7)
with gr.Row(equal_height=True):
@@ -901,8 +901,8 @@ def create_ui(wrap_gradio_gpu_call):
sampler_index = gr.Radio(label='Sampling method', choices=[x.name for x in samplers_for_img2img], value=samplers_for_img2img[0].name, type="index")
with gr.Group():
- width = gr.Slider(minimum=64, maximum=2048, step=64, label="Width", value=512, elem_id="img2img_width")
- height = gr.Slider(minimum=64, maximum=2048, step=64, label="Height", value=512, elem_id="img2img_height")
+ width = gr.Slider(minimum=64, maximum=2048, step=1, label="Width", value=512, elem_id="img2img_width")
+ height = gr.Slider(minimum=64, maximum=2048, step=1, label="Height", value=512, elem_id="img2img_height")
with gr.Row():
restore_faces = gr.Checkbox(label='Restore faces', value=False, visible=len(shared.face_restorers) > 1)
@@ -1231,8 +1231,8 @@ def create_ui(wrap_gradio_gpu_call):
with gr.Tab(label="Preprocess images"):
process_src = gr.Textbox(label='Source directory')
process_dst = gr.Textbox(label='Destination directory')
- process_width = gr.Slider(minimum=64, maximum=2048, step=64, label="Width", value=512)
- process_height = gr.Slider(minimum=64, maximum=2048, step=64, label="Height", value=512)
+ process_width = gr.Slider(minimum=64, maximum=2048, step=1, label="Width", value=512)
+ process_height = gr.Slider(minimum=64, maximum=2048, step=1, label="Height", value=512)
preprocess_txt_action = gr.Dropdown(label='Existing Caption txt Action', value="ignore", choices=["ignore", "copy", "prepend", "append"])
with gr.Row():
@@ -1289,8 +1289,8 @@ def create_ui(wrap_gradio_gpu_call):
dataset_directory = gr.Textbox(label='Dataset directory', placeholder="Path to directory with input images")
log_directory = gr.Textbox(label='Log directory', placeholder="Path to directory where to write outputs", value="textual_inversion")
template_file = gr.Textbox(label='Prompt template file', value=os.path.join(script_path, "textual_inversion_templates", "style_filewords.txt"))
- training_width = gr.Slider(minimum=64, maximum=2048, step=64, label="Width", value=512)
- training_height = gr.Slider(minimum=64, maximum=2048, step=64, label="Height", value=512)
+ training_width = gr.Slider(minimum=64, maximum=2048, step=1, label="Width", value=512)
+ training_height = gr.Slider(minimum=64, maximum=2048, step=1, label="Height", value=512)
steps = gr.Number(label='Max steps', value=100000, precision=0)
create_image_every = gr.Number(label='Save an image to log directory every N steps, 0 to disable', value=500, precision=0)
save_embedding_every = gr.Number(label='Save a copy of embedding to log directory every N steps, 0 to disable', value=500, precision=0)
--
cgit v1.2.1
From d2c97fc3fe5857d6fba9ad1695ed3ac6ec455ca9 Mon Sep 17 00:00:00 2001
From: flamelaw
Date: Wed, 23 Nov 2022 20:00:00 +0900
Subject: fix dropout, implement train/eval mode
---
modules/hypernetworks/hypernetwork.py | 24 ++++++++++++++++++------
1 file changed, 18 insertions(+), 6 deletions(-)
(limited to 'modules')
diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py
index 4541af18..9388959f 100644
--- a/modules/hypernetworks/hypernetwork.py
+++ b/modules/hypernetworks/hypernetwork.py
@@ -154,16 +154,28 @@ class Hypernetwork:
HypernetworkModule(size, None, self.layer_structure, self.activation_func, self.weight_init,
self.add_layer_norm, self.use_dropout, self.activate_output, last_layer_dropout=self.last_layer_dropout),
)
+ self.eval_mode()
def weights(self):
res = []
+ for k, layers in self.layers.items():
+ for layer in layers:
+ res += layer.parameters()
+ return res
+ def train_mode(self):
for k, layers in self.layers.items():
for layer in layers:
layer.train()
- res += layer.trainables()
+ for param in layer.parameters():
+ param.requires_grad = True
- return res
+ def eval_mode(self):
+ for k, layers in self.layers.items():
+ for layer in layers:
+ layer.eval()
+ for param in layer.parameters():
+ param.requires_grad = False
def save(self, filename):
state_dict = {}
@@ -426,8 +438,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, gradient_step,
shared.sd_model.first_stage_model.to(devices.cpu)
weights = hypernetwork.weights()
- for weight in weights:
- weight.requires_grad = True
+ hypernetwork.train_mode()
# Here we use optimizer from saved HN, or we can specify as UI option.
if hypernetwork.optimizer_name in optimizer_dict:
@@ -538,7 +549,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, gradient_step,
if images_dir is not None and steps_done % create_image_every == 0:
forced_filename = f'{hypernetwork_name}-{steps_done}'
last_saved_image = os.path.join(images_dir, forced_filename)
-
+ hypernetwork.eval_mode()
shared.sd_model.cond_stage_model.to(devices.device)
shared.sd_model.first_stage_model.to(devices.device)
@@ -571,7 +582,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, gradient_step,
if unload:
shared.sd_model.cond_stage_model.to(devices.cpu)
shared.sd_model.first_stage_model.to(devices.cpu)
-
+ hypernetwork.train_mode()
if image is not None:
shared.state.current_image = image
last_saved_image, last_text_info = images.save_image(image, images_dir, "", p.seed, p.prompt, shared.opts.samples_format, processed.infotexts[0], p=p, forced_filename=forced_filename, save_to_dirs=False)
@@ -593,6 +604,7 @@ Last saved image: {html.escape(last_saved_image)}
finally:
pbar.leave = False
pbar.close()
+ hypernetwork.eval_mode()
#report_statistics(loss_dict)
filename = os.path.join(shared.cmd_opts.hypernetwork_dir, f'{hypernetwork_name}.pt')
--
cgit v1.2.1
From 1bd57cc9791e2e742f72a3d74d589f2c289e8e92 Mon Sep 17 00:00:00 2001
From: flamelaw
Date: Wed, 23 Nov 2022 20:21:52 +0900
Subject: last_layer_dropout default to False
---
modules/hypernetworks/hypernetwork.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
(limited to 'modules')
diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py
index 9388959f..8466887f 100644
--- a/modules/hypernetworks/hypernetwork.py
+++ b/modules/hypernetworks/hypernetwork.py
@@ -38,7 +38,7 @@ class HypernetworkModule(torch.nn.Module):
activation_dict.update({cls_name.lower(): cls_obj for cls_name, cls_obj in inspect.getmembers(torch.nn.modules.activation) if inspect.isclass(cls_obj) and cls_obj.__module__ == 'torch.nn.modules.activation'})
def __init__(self, dim, state_dict=None, layer_structure=None, activation_func=None, weight_init='Normal',
- add_layer_norm=False, use_dropout=False, activate_output=False, last_layer_dropout=True):
+ add_layer_norm=False, use_dropout=False, activate_output=False, last_layer_dropout=False):
super().__init__()
assert layer_structure is not None, "layer_structure must not be None"
--
cgit v1.2.1
From 6001684be3e7b023346326b9dfc771219b8fe47e Mon Sep 17 00:00:00 2001
From: "Alex \"mcmonkey\" Goodwin"
Date: Wed, 23 Nov 2022 06:35:44 -0800
Subject: add model_name pattern for saving
---
modules/images.py | 1 +
1 file changed, 1 insertion(+)
(limited to 'modules')
diff --git a/modules/images.py b/modules/images.py
index 26d5b7a9..420828b0 100644
--- a/modules/images.py
+++ b/modules/images.py
@@ -305,6 +305,7 @@ class FilenameGenerator:
'styles': lambda self: self.p and sanitize_filename_part(", ".join([style for style in self.p.styles if not style == "None"]) or "None", replace_spaces=False),
'sampler': lambda self: self.p and sanitize_filename_part(self.p.sampler_name, replace_spaces=False),
'model_hash': lambda self: getattr(self.p, "sd_model_hash", shared.sd_model.sd_model_hash),
+ 'model_name': lambda self: shared.sd_model.sd_checkpoint_info.model_name,
'date': lambda self: datetime.datetime.now().strftime('%Y-%m-%d'),
'datetime': lambda self, *args: self.datetime(*args), # accepts formats: [datetime], [datetime], [datetime]
'job_timestamp': lambda self: getattr(self.p, "job_timestamp", shared.state.job_timestamp),
--
cgit v1.2.1
From ffcbbcf385eb847ced957510ab726291a8b20207 Mon Sep 17 00:00:00 2001
From: "Alex \"mcmonkey\" Goodwin"
Date: Wed, 23 Nov 2022 06:44:20 -0800
Subject: add filename santization
Probably redundant, considering the model name *is* a filename, but I suppose better safe than sorry.
---
modules/images.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
(limited to 'modules')
diff --git a/modules/images.py b/modules/images.py
index 420828b0..8fa96b16 100644
--- a/modules/images.py
+++ b/modules/images.py
@@ -305,7 +305,7 @@ class FilenameGenerator:
'styles': lambda self: self.p and sanitize_filename_part(", ".join([style for style in self.p.styles if not style == "None"]) or "None", replace_spaces=False),
'sampler': lambda self: self.p and sanitize_filename_part(self.p.sampler_name, replace_spaces=False),
'model_hash': lambda self: getattr(self.p, "sd_model_hash", shared.sd_model.sd_model_hash),
- 'model_name': lambda self: shared.sd_model.sd_checkpoint_info.model_name,
+ 'model_name': lambda self: sanitize_filename_part(shared.sd_model.sd_checkpoint_info.model_name, replace_spaces=False),
'date': lambda self: datetime.datetime.now().strftime('%Y-%m-%d'),
'datetime': lambda self, *args: self.datetime(*args), # accepts formats: [datetime], [datetime], [datetime]
'job_timestamp': lambda self: getattr(self.p, "job_timestamp", shared.state.job_timestamp),
--
cgit v1.2.1
From 904121fecc0a1f11db76a73ca8649fb21e05ac5b Mon Sep 17 00:00:00 2001
From: Nandaka
Date: Thu, 24 Nov 2022 02:39:09 +0000
Subject: Support NAI exif for PNG Info
---
modules/extras.py | 14 ++++++++++++++
1 file changed, 14 insertions(+)
(limited to 'modules')
diff --git a/modules/extras.py b/modules/extras.py
index 71b93a06..af4cd97d 100644
--- a/modules/extras.py
+++ b/modules/extras.py
@@ -233,6 +233,20 @@ def run_pnginfo(image):
geninfo = items.get('parameters', geninfo)
+ # nai prompt
+ if "Software" in items.keys() and items["Software"] == "NovelAI":
+ import json
+ json_info = json.loads(items["Comment"])
+ geninfo = f'{items["Description"]}\r\nNegative prompt: {json_info["uc"]}\r\n'
+ sampler = "Euler a"
+ if json_info["sampler"] == "k_euler_ancestral":
+ sampler = "Euler a"
+ elif json_info["sampler"] == "k_euler":
+ sampler = "Euler"
+ model_hash = '925997e9' # assuming this is the correct model hash
+ # not sure with noise and strength parameter
+ geninfo += f'Steps: {json_info["steps"]}, Sampler: {sampler}, CFG scale: {json_info["scale"]}, Seed: {json_info["seed"]}, Size: {image.width}x{image.height}, Model hash: {model_hash}' # , Denoising strength: {json_info["noise"]}'
+
info = ''
for key, text in items.items():
info += f"""
--
cgit v1.2.1
From fcd75bd8740855e0c7bc80c0e8a4e1033b76d007 Mon Sep 17 00:00:00 2001
From: Sena <34237511+sena-nana@users.noreply.github.com>
Date: Thu, 24 Nov 2022 13:10:40 +0800
Subject: Fix other apis
---
modules/api/api.py | 16 +++++++---------
1 file changed, 7 insertions(+), 9 deletions(-)
(limited to 'modules')
diff --git a/modules/api/api.py b/modules/api/api.py
index 648bd6a8..efcedbba 100644
--- a/modules/api/api.py
+++ b/modules/api/api.py
@@ -4,7 +4,7 @@ import time
import uvicorn
from threading import Lock
from io import BytesIO
-from gradio.processing_utils import encode_pil_to_base64, decode_base64_to_file, decode_base64_to_image
+from gradio.processing_utils import decode_base64_to_file
from fastapi import APIRouter, Depends, FastAPI, HTTPException
from fastapi.security import HTTPBasic, HTTPBasicCredentials
from secrets import compare_digest
@@ -41,6 +41,10 @@ def setUpscalers(req: dict):
reqDict.pop('upscaler_2')
return reqDict
+def decode_base64_to_image(encoding):
+ if encoding.startswith("data:image/"):
+ encoding = encoding.split(";")[1].split(",")[1]
+ return Image.open(BytesIO(base64.b64decode(encoding)))
def encode_pil_to_base64(image):
with io.BytesIO() as output_bytes:
@@ -134,10 +138,7 @@ class Api:
mask = img2imgreq.mask
if mask:
- if mask.startswith("data:image/"):
- mask = decode_base64_to_image(mask)
- else:
- mask = Image.open(BytesIO(base64.b64decode(mask)))
+ mask = decode_base64_to_image(mask)
populate = img2imgreq.copy(update={ # Override __init__ params
"sd_model": shared.sd_model,
@@ -151,10 +152,7 @@ class Api:
imgs = []
for img in init_images:
- if img.startswith("data:image/"):
- img = decode_base64_to_image(img)
- else:
- img = Image.open(BytesIO(base64.b64decode(img)))
+ img = decode_base64_to_image(img)
imgs = [img] * p.batch_size
p.init_images = imgs
--
cgit v1.2.1
From a2ae5a655518b150a34b95d7afecc87a43280406 Mon Sep 17 00:00:00 2001
From: "Tiago F. Santos"
Date: Thu, 24 Nov 2022 13:04:45 +0000
Subject: [interrogator] mkdir check
---
modules/interrogate.py | 16 ++++++++++------
1 file changed, 10 insertions(+), 6 deletions(-)
(limited to 'modules')
diff --git a/modules/interrogate.py b/modules/interrogate.py
index 1a9c758e..f177a5a8 100644
--- a/modules/interrogate.py
+++ b/modules/interrogate.py
@@ -14,7 +14,8 @@ import modules.shared as shared
from modules import devices, paths, lowvram
blip_image_eval_size = 384
-blip_model_local = os.path.join('models', 'Interrogator', 'BLIP_model.pth')
+blip_local_dir = os.path.join('models', 'Interrogator')
+blip_local_file = os.path.join(blip_local_dir, 'model_base_caption_capfilt_large.pth')
blip_model_url = 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_caption_capfilt_large.pth'
clip_model_name = 'ViT-L/14'
@@ -48,13 +49,16 @@ class InterrogateModels:
def load_blip_model(self):
import models.blip
- if not os.path.isfile(blip_model_local):
+ if not os.path.isfile(blip_local_file):
+ if not os.path.isdir(blip_local_dir):
+ os.mkdir(blip_local_dir)
+
print("Downloading BLIP...")
- import requests as req
- open(blip_model_local, 'wb').write(req.get(blip_model_url, allow_redirects=True).content)
- print("BLIP downloaded to", blip_model_local + '.')
+ from requests import get as reqget
+ open(blip_local_file, 'wb').write(reqget(blip_model_url, allow_redirects=True).content)
+ print("BLIP downloaded to", blip_local_file + '.')
- blip_model = models.blip.blip_decoder(pretrained=blip_model_local, image_size=blip_image_eval_size, vit='base', med_config=os.path.join(paths.paths["BLIP"], "configs", "med_config.json"))
+ blip_model = models.blip.blip_decoder(pretrained=blip_local_file, image_size=blip_image_eval_size, vit='base', med_config=os.path.join(paths.paths["BLIP"], "configs", "med_config.json"))
blip_model.eval()
return blip_model
--
cgit v1.2.1
From c833d5bfaae05de41d8e795aba5b15822673ef04 Mon Sep 17 00:00:00 2001
From: Jay Smith
Date: Fri, 25 Nov 2022 20:12:23 -0600
Subject: fixes #3449 - VRAM leak when switching to/from inpainting model
---
modules/sd_samplers.py | 33 +++++++++++++++------------------
1 file changed, 15 insertions(+), 18 deletions(-)
(limited to 'modules')
diff --git a/modules/sd_samplers.py b/modules/sd_samplers.py
index 4fe67854..44112f99 100644
--- a/modules/sd_samplers.py
+++ b/modules/sd_samplers.py
@@ -1,4 +1,4 @@
-from collections import namedtuple
+from collections import namedtuple, deque
import numpy as np
from math import floor
import torch
@@ -335,18 +335,28 @@ class CFGDenoiser(torch.nn.Module):
class TorchHijack:
- def __init__(self, kdiff_sampler):
- self.kdiff_sampler = kdiff_sampler
+ def __init__(self, sampler_noises):
+ # Using a deque to efficiently receive the sampler_noises in the same order as the previous index-based
+ # implementation.
+ self.sampler_noises = deque(sampler_noises)
def __getattr__(self, item):
if item == 'randn_like':
- return self.kdiff_sampler.randn_like
+ return self.randn_like
if hasattr(torch, item):
return getattr(torch, item)
raise AttributeError("'{}' object has no attribute '{}'".format(type(self).__name__, item))
+ def randn_like(self, x):
+ if self.sampler_noises:
+ noise = self.sampler_noises.popleft()
+ if noise.shape == x.shape:
+ return noise
+
+ return torch.randn_like(x)
+
class KDiffusionSampler:
def __init__(self, funcname, sd_model):
@@ -356,7 +366,6 @@ class KDiffusionSampler:
self.extra_params = sampler_extra_params.get(funcname, [])
self.model_wrap_cfg = CFGDenoiser(self.model_wrap)
self.sampler_noises = None
- self.sampler_noise_index = 0
self.stop_at = None
self.eta = None
self.default_eta = 1.0
@@ -389,26 +398,14 @@ class KDiffusionSampler:
def number_of_needed_noises(self, p):
return p.steps
- def randn_like(self, x):
- noise = self.sampler_noises[self.sampler_noise_index] if self.sampler_noises is not None and self.sampler_noise_index < len(self.sampler_noises) else None
-
- if noise is not None and x.shape == noise.shape:
- res = noise
- else:
- res = torch.randn_like(x)
-
- self.sampler_noise_index += 1
- return res
-
def initialize(self, p):
self.model_wrap_cfg.mask = p.mask if hasattr(p, 'mask') else None
self.model_wrap_cfg.nmask = p.nmask if hasattr(p, 'nmask') else None
self.model_wrap.step = 0
- self.sampler_noise_index = 0
self.eta = p.eta or opts.eta_ancestral
if self.sampler_noises is not None:
- k_diffusion.sampling.torch = TorchHijack(self)
+ k_diffusion.sampling.torch = TorchHijack(self.sampler_noises)
extra_params_kwargs = {}
for param_name in self.extra_params:
--
cgit v1.2.1
From ce6911158b5b2f9cf79b405a1f368f875492044d Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Sat, 26 Nov 2022 16:10:46 +0300
Subject: Add support Stable Diffusion 2.0
---
modules/paths.py | 2 +-
modules/sd_hijack.py | 297 +++---------------------
modules/sd_hijack_clip.py | 301 +++++++++++++++++++++++++
modules/sd_hijack_inpainting.py | 20 +-
modules/sd_hijack_open_clip.py | 37 +++
modules/sd_samplers.py | 14 +-
modules/shared.py | 34 ++-
modules/textual_inversion/textual_inversion.py | 7 +-
modules/ui.py | 13 +-
9 files changed, 421 insertions(+), 304 deletions(-)
create mode 100644 modules/sd_hijack_clip.py
create mode 100644 modules/sd_hijack_open_clip.py
(limited to 'modules')
diff --git a/modules/paths.py b/modules/paths.py
index 1e7a2fbc..4dd03a35 100644
--- a/modules/paths.py
+++ b/modules/paths.py
@@ -9,7 +9,7 @@ sys.path.insert(0, script_path)
# search for directory of stable diffusion in following places
sd_path = None
-possible_sd_paths = [os.path.join(script_path, 'repositories/stable-diffusion'), '.', os.path.dirname(script_path)]
+possible_sd_paths = [os.path.join(script_path, 'repositories/stable-diffusion-stability-ai'), '.', os.path.dirname(script_path)]
for possible_sd_path in possible_sd_paths:
if os.path.exists(os.path.join(possible_sd_path, 'ldm/models/diffusion/ddpm.py')):
sd_path = os.path.abspath(possible_sd_path)
diff --git a/modules/sd_hijack.py b/modules/sd_hijack.py
index eaedac13..d5243fd3 100644
--- a/modules/sd_hijack.py
+++ b/modules/sd_hijack.py
@@ -9,18 +9,29 @@ from torch.nn.functional import silu
import modules.textual_inversion.textual_inversion
from modules import prompt_parser, devices, sd_hijack_optimizations, shared
-from modules.shared import opts, device, cmd_opts
+from modules.shared import cmd_opts
+from modules import sd_hijack_clip, sd_hijack_open_clip
+
from modules.sd_hijack_optimizations import invokeAI_mps_available
import ldm.modules.attention
import ldm.modules.diffusionmodules.model
import ldm.models.diffusion.ddim
import ldm.models.diffusion.plms
+import ldm.modules.encoders.modules
attention_CrossAttention_forward = ldm.modules.attention.CrossAttention.forward
diffusionmodules_model_nonlinearity = ldm.modules.diffusionmodules.model.nonlinearity
diffusionmodules_model_AttnBlock_forward = ldm.modules.diffusionmodules.model.AttnBlock.forward
+# new memory efficient cross attention blocks do not support hypernets and we already
+# have memory efficient cross attention anyway, so this disables SD2.0's memory efficient cross attention
+ldm.modules.attention.MemoryEfficientCrossAttention = ldm.modules.attention.CrossAttention
+ldm.modules.attention.BasicTransformerBlock.ATTENTION_MODES["softmax-xformers"] = ldm.modules.attention.CrossAttention
+
+# silence new console spam from SD2
+ldm.modules.attention.print = lambda *args: None
+ldm.modules.diffusionmodules.model.print = lambda *args: None
def apply_optimizations():
undo_optimizations()
@@ -49,16 +60,11 @@ def apply_optimizations():
def undo_optimizations():
- from modules.hypernetworks import hypernetwork
-
- ldm.modules.attention.CrossAttention.forward = hypernetwork.attention_CrossAttention_forward
+ ldm.modules.attention.CrossAttention.forward = attention_CrossAttention_forward # this stops hypernets from working
ldm.modules.diffusionmodules.model.nonlinearity = diffusionmodules_model_nonlinearity
ldm.modules.diffusionmodules.model.AttnBlock.forward = diffusionmodules_model_AttnBlock_forward
-def get_target_prompt_token_count(token_count):
- return math.ceil(max(token_count, 1) / 75) * 75
-
class StableDiffusionModelHijack:
fixes = None
@@ -70,10 +76,13 @@ class StableDiffusionModelHijack:
embedding_db = modules.textual_inversion.textual_inversion.EmbeddingDatabase(cmd_opts.embeddings_dir)
def hijack(self, m):
- model_embeddings = m.cond_stage_model.transformer.text_model.embeddings
-
- model_embeddings.token_embedding = EmbeddingsWithFixes(model_embeddings.token_embedding, self)
- m.cond_stage_model = FrozenCLIPEmbedderWithCustomWords(m.cond_stage_model, self)
+ if type(m.cond_stage_model) == ldm.modules.encoders.modules.FrozenCLIPEmbedder:
+ model_embeddings = m.cond_stage_model.transformer.text_model.embeddings
+ model_embeddings.token_embedding = EmbeddingsWithFixes(model_embeddings.token_embedding, self)
+ m.cond_stage_model = sd_hijack_clip.FrozenCLIPEmbedderWithCustomWords(m.cond_stage_model, self)
+ elif type(m.cond_stage_model) == ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder:
+ m.cond_stage_model.model.token_embedding = EmbeddingsWithFixes(m.cond_stage_model.model.token_embedding, self)
+ m.cond_stage_model = sd_hijack_open_clip.FrozenOpenCLIPEmbedderWithCustomWords(m.cond_stage_model, self)
self.clip = m.cond_stage_model
@@ -89,12 +98,15 @@ class StableDiffusionModelHijack:
self.layers = flatten(m)
def undo_hijack(self, m):
- if type(m.cond_stage_model) == FrozenCLIPEmbedderWithCustomWords:
+ if type(m.cond_stage_model) == sd_hijack_clip.FrozenCLIPEmbedderWithCustomWords:
m.cond_stage_model = m.cond_stage_model.wrapped
- model_embeddings = m.cond_stage_model.transformer.text_model.embeddings
- if type(model_embeddings.token_embedding) == EmbeddingsWithFixes:
- model_embeddings.token_embedding = model_embeddings.token_embedding.wrapped
+ model_embeddings = m.cond_stage_model.transformer.text_model.embeddings
+ if type(model_embeddings.token_embedding) == EmbeddingsWithFixes:
+ model_embeddings.token_embedding = model_embeddings.token_embedding.wrapped
+ elif type(m.cond_stage_model) == sd_hijack_open_clip.FrozenOpenCLIPEmbedderWithCustomWords:
+ m.cond_stage_model.wrapped.model.token_embedding = m.cond_stage_model.wrapped.model.token_embedding.wrapped
+ m.cond_stage_model = m.cond_stage_model.wrapped
self.apply_circular(False)
self.layers = None
@@ -114,261 +126,8 @@ class StableDiffusionModelHijack:
def tokenize(self, text):
_, remade_batch_tokens, _, _, _, token_count = self.clip.process_text([text])
- return remade_batch_tokens[0], token_count, get_target_prompt_token_count(token_count)
-
-
-class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module):
- def __init__(self, wrapped, hijack):
- super().__init__()
- self.wrapped = wrapped
- self.hijack: StableDiffusionModelHijack = hijack
- self.tokenizer = wrapped.tokenizer
- self.token_mults = {}
-
- self.comma_token = [v for k, v in self.tokenizer.get_vocab().items() if k == ','][0]
-
- tokens_with_parens = [(k, v) for k, v in self.tokenizer.get_vocab().items() if '(' in k or ')' in k or '[' in k or ']' in k]
- for text, ident in tokens_with_parens:
- mult = 1.0
- for c in text:
- if c == '[':
- mult /= 1.1
- if c == ']':
- mult *= 1.1
- if c == '(':
- mult *= 1.1
- if c == ')':
- mult /= 1.1
-
- if mult != 1.0:
- self.token_mults[ident] = mult
-
- def tokenize_line(self, line, used_custom_terms, hijack_comments):
- id_end = self.wrapped.tokenizer.eos_token_id
-
- if opts.enable_emphasis:
- parsed = prompt_parser.parse_prompt_attention(line)
- else:
- parsed = [[line, 1.0]]
-
- tokenized = self.wrapped.tokenizer([text for text, _ in parsed], truncation=False, add_special_tokens=False)["input_ids"]
-
- fixes = []
- remade_tokens = []
- multipliers = []
- last_comma = -1
-
- for tokens, (text, weight) in zip(tokenized, parsed):
- i = 0
- while i < len(tokens):
- token = tokens[i]
-
- embedding, embedding_length_in_tokens = self.hijack.embedding_db.find_embedding_at_position(tokens, i)
-
- if token == self.comma_token:
- last_comma = len(remade_tokens)
- elif opts.comma_padding_backtrack != 0 and max(len(remade_tokens), 1) % 75 == 0 and last_comma != -1 and len(remade_tokens) - last_comma <= opts.comma_padding_backtrack:
- last_comma += 1
- reloc_tokens = remade_tokens[last_comma:]
- reloc_mults = multipliers[last_comma:]
-
- remade_tokens = remade_tokens[:last_comma]
- length = len(remade_tokens)
-
- rem = int(math.ceil(length / 75)) * 75 - length
- remade_tokens += [id_end] * rem + reloc_tokens
- multipliers = multipliers[:last_comma] + [1.0] * rem + reloc_mults
-
- if embedding is None:
- remade_tokens.append(token)
- multipliers.append(weight)
- i += 1
- else:
- emb_len = int(embedding.vec.shape[0])
- iteration = len(remade_tokens) // 75
- if (len(remade_tokens) + emb_len) // 75 != iteration:
- rem = (75 * (iteration + 1) - len(remade_tokens))
- remade_tokens += [id_end] * rem
- multipliers += [1.0] * rem
- iteration += 1
- fixes.append((iteration, (len(remade_tokens) % 75, embedding)))
- remade_tokens += [0] * emb_len
- multipliers += [weight] * emb_len
- used_custom_terms.append((embedding.name, embedding.checksum()))
- i += embedding_length_in_tokens
-
- token_count = len(remade_tokens)
- prompt_target_length = get_target_prompt_token_count(token_count)
- tokens_to_add = prompt_target_length - len(remade_tokens)
-
- remade_tokens = remade_tokens + [id_end] * tokens_to_add
- multipliers = multipliers + [1.0] * tokens_to_add
-
- return remade_tokens, fixes, multipliers, token_count
-
- def process_text(self, texts):
- used_custom_terms = []
- remade_batch_tokens = []
- hijack_comments = []
- hijack_fixes = []
- token_count = 0
-
- cache = {}
- batch_multipliers = []
- for line in texts:
- if line in cache:
- remade_tokens, fixes, multipliers = cache[line]
- else:
- remade_tokens, fixes, multipliers, current_token_count = self.tokenize_line(line, used_custom_terms, hijack_comments)
- token_count = max(current_token_count, token_count)
-
- cache[line] = (remade_tokens, fixes, multipliers)
-
- remade_batch_tokens.append(remade_tokens)
- hijack_fixes.append(fixes)
- batch_multipliers.append(multipliers)
-
- return batch_multipliers, remade_batch_tokens, used_custom_terms, hijack_comments, hijack_fixes, token_count
-
- def process_text_old(self, text):
- id_start = self.wrapped.tokenizer.bos_token_id
- id_end = self.wrapped.tokenizer.eos_token_id
- maxlen = self.wrapped.max_length # you get to stay at 77
- used_custom_terms = []
- remade_batch_tokens = []
- overflowing_words = []
- hijack_comments = []
- hijack_fixes = []
- token_count = 0
-
- cache = {}
- batch_tokens = self.wrapped.tokenizer(text, truncation=False, add_special_tokens=False)["input_ids"]
- batch_multipliers = []
- for tokens in batch_tokens:
- tuple_tokens = tuple(tokens)
-
- if tuple_tokens in cache:
- remade_tokens, fixes, multipliers = cache[tuple_tokens]
- else:
- fixes = []
- remade_tokens = []
- multipliers = []
- mult = 1.0
-
- i = 0
- while i < len(tokens):
- token = tokens[i]
-
- embedding, embedding_length_in_tokens = self.hijack.embedding_db.find_embedding_at_position(tokens, i)
-
- mult_change = self.token_mults.get(token) if opts.enable_emphasis else None
- if mult_change is not None:
- mult *= mult_change
- i += 1
- elif embedding is None:
- remade_tokens.append(token)
- multipliers.append(mult)
- i += 1
- else:
- emb_len = int(embedding.vec.shape[0])
- fixes.append((len(remade_tokens), embedding))
- remade_tokens += [0] * emb_len
- multipliers += [mult] * emb_len
- used_custom_terms.append((embedding.name, embedding.checksum()))
- i += embedding_length_in_tokens
-
- if len(remade_tokens) > maxlen - 2:
- vocab = {v: k for k, v in self.wrapped.tokenizer.get_vocab().items()}
- ovf = remade_tokens[maxlen - 2:]
- overflowing_words = [vocab.get(int(x), "") for x in ovf]
- overflowing_text = self.wrapped.tokenizer.convert_tokens_to_string(''.join(overflowing_words))
- hijack_comments.append(f"Warning: too many input tokens; some ({len(overflowing_words)}) have been truncated:\n{overflowing_text}\n")
-
- token_count = len(remade_tokens)
- remade_tokens = remade_tokens + [id_end] * (maxlen - 2 - len(remade_tokens))
- remade_tokens = [id_start] + remade_tokens[0:maxlen - 2] + [id_end]
- cache[tuple_tokens] = (remade_tokens, fixes, multipliers)
-
- multipliers = multipliers + [1.0] * (maxlen - 2 - len(multipliers))
- multipliers = [1.0] + multipliers[0:maxlen - 2] + [1.0]
-
- remade_batch_tokens.append(remade_tokens)
- hijack_fixes.append(fixes)
- batch_multipliers.append(multipliers)
- return batch_multipliers, remade_batch_tokens, used_custom_terms, hijack_comments, hijack_fixes, token_count
-
- def forward(self, text):
- use_old = opts.use_old_emphasis_implementation
- if use_old:
- batch_multipliers, remade_batch_tokens, used_custom_terms, hijack_comments, hijack_fixes, token_count = self.process_text_old(text)
- else:
- batch_multipliers, remade_batch_tokens, used_custom_terms, hijack_comments, hijack_fixes, token_count = self.process_text(text)
-
- self.hijack.comments += hijack_comments
-
- if len(used_custom_terms) > 0:
- self.hijack.comments.append("Used embeddings: " + ", ".join([f'{word} [{checksum}]' for word, checksum in used_custom_terms]))
-
- if use_old:
- self.hijack.fixes = hijack_fixes
- return self.process_tokens(remade_batch_tokens, batch_multipliers)
-
- z = None
- i = 0
- while max(map(len, remade_batch_tokens)) != 0:
- rem_tokens = [x[75:] for x in remade_batch_tokens]
- rem_multipliers = [x[75:] for x in batch_multipliers]
-
- self.hijack.fixes = []
- for unfiltered in hijack_fixes:
- fixes = []
- for fix in unfiltered:
- if fix[0] == i:
- fixes.append(fix[1])
- self.hijack.fixes.append(fixes)
-
- tokens = []
- multipliers = []
- for j in range(len(remade_batch_tokens)):
- if len(remade_batch_tokens[j]) > 0:
- tokens.append(remade_batch_tokens[j][:75])
- multipliers.append(batch_multipliers[j][:75])
- else:
- tokens.append([self.wrapped.tokenizer.eos_token_id] * 75)
- multipliers.append([1.0] * 75)
-
- z1 = self.process_tokens(tokens, multipliers)
- z = z1 if z is None else torch.cat((z, z1), axis=-2)
-
- remade_batch_tokens = rem_tokens
- batch_multipliers = rem_multipliers
- i += 1
-
- return z
-
- def process_tokens(self, remade_batch_tokens, batch_multipliers):
- if not opts.use_old_emphasis_implementation:
- remade_batch_tokens = [[self.wrapped.tokenizer.bos_token_id] + x[:75] + [self.wrapped.tokenizer.eos_token_id] for x in remade_batch_tokens]
- batch_multipliers = [[1.0] + x[:75] + [1.0] for x in batch_multipliers]
-
- tokens = torch.asarray(remade_batch_tokens).to(device)
- outputs = self.wrapped.transformer(input_ids=tokens, output_hidden_states=-opts.CLIP_stop_at_last_layers)
-
- if opts.CLIP_stop_at_last_layers > 1:
- z = outputs.hidden_states[-opts.CLIP_stop_at_last_layers]
- z = self.wrapped.transformer.text_model.final_layer_norm(z)
- else:
- z = outputs.last_hidden_state
-
- # restoring original mean is likely not correct, but it seems to work well to prevent artifacts that happen otherwise
- batch_multipliers_of_same_length = [x + [1.0] * (75 - len(x)) for x in batch_multipliers]
- batch_multipliers = torch.asarray(batch_multipliers_of_same_length).to(device)
- original_mean = z.mean()
- z *= batch_multipliers.reshape(batch_multipliers.shape + (1,)).expand(z.shape)
- new_mean = z.mean()
- z *= original_mean / new_mean
+ return remade_batch_tokens[0], token_count, sd_hijack_clip.get_target_prompt_token_count(token_count)
- return z
class EmbeddingsWithFixes(torch.nn.Module):
diff --git a/modules/sd_hijack_clip.py b/modules/sd_hijack_clip.py
new file mode 100644
index 00000000..b451d1cf
--- /dev/null
+++ b/modules/sd_hijack_clip.py
@@ -0,0 +1,301 @@
+import math
+
+import torch
+
+from modules import prompt_parser, devices
+from modules.shared import opts
+
+
+def get_target_prompt_token_count(token_count):
+ return math.ceil(max(token_count, 1) / 75) * 75
+
+
+class FrozenCLIPEmbedderWithCustomWordsBase(torch.nn.Module):
+ def __init__(self, wrapped, hijack):
+ super().__init__()
+ self.wrapped = wrapped
+ self.hijack = hijack
+
+ def tokenize(self, texts):
+ raise NotImplementedError
+
+ def encode_with_transformers(self, tokens):
+ raise NotImplementedError
+
+ def encode_embedding_init_text(self, init_text, nvpt):
+ raise NotImplementedError
+
+ def tokenize_line(self, line, used_custom_terms, hijack_comments):
+ if opts.enable_emphasis:
+ parsed = prompt_parser.parse_prompt_attention(line)
+ else:
+ parsed = [[line, 1.0]]
+
+ tokenized = self.tokenize([text for text, _ in parsed])
+
+ fixes = []
+ remade_tokens = []
+ multipliers = []
+ last_comma = -1
+
+ for tokens, (text, weight) in zip(tokenized, parsed):
+ i = 0
+ while i < len(tokens):
+ token = tokens[i]
+
+ embedding, embedding_length_in_tokens = self.hijack.embedding_db.find_embedding_at_position(tokens, i)
+
+ if token == self.comma_token:
+ last_comma = len(remade_tokens)
+ elif opts.comma_padding_backtrack != 0 and max(len(remade_tokens), 1) % 75 == 0 and last_comma != -1 and len(remade_tokens) - last_comma <= opts.comma_padding_backtrack:
+ last_comma += 1
+ reloc_tokens = remade_tokens[last_comma:]
+ reloc_mults = multipliers[last_comma:]
+
+ remade_tokens = remade_tokens[:last_comma]
+ length = len(remade_tokens)
+
+ rem = int(math.ceil(length / 75)) * 75 - length
+ remade_tokens += [self.id_end] * rem + reloc_tokens
+ multipliers = multipliers[:last_comma] + [1.0] * rem + reloc_mults
+
+ if embedding is None:
+ remade_tokens.append(token)
+ multipliers.append(weight)
+ i += 1
+ else:
+ emb_len = int(embedding.vec.shape[0])
+ iteration = len(remade_tokens) // 75
+ if (len(remade_tokens) + emb_len) // 75 != iteration:
+ rem = (75 * (iteration + 1) - len(remade_tokens))
+ remade_tokens += [self.id_end] * rem
+ multipliers += [1.0] * rem
+ iteration += 1
+ fixes.append((iteration, (len(remade_tokens) % 75, embedding)))
+ remade_tokens += [0] * emb_len
+ multipliers += [weight] * emb_len
+ used_custom_terms.append((embedding.name, embedding.checksum()))
+ i += embedding_length_in_tokens
+
+ token_count = len(remade_tokens)
+ prompt_target_length = get_target_prompt_token_count(token_count)
+ tokens_to_add = prompt_target_length - len(remade_tokens)
+
+ remade_tokens = remade_tokens + [self.id_end] * tokens_to_add
+ multipliers = multipliers + [1.0] * tokens_to_add
+
+ return remade_tokens, fixes, multipliers, token_count
+
+ def process_text(self, texts):
+ used_custom_terms = []
+ remade_batch_tokens = []
+ hijack_comments = []
+ hijack_fixes = []
+ token_count = 0
+
+ cache = {}
+ batch_multipliers = []
+ for line in texts:
+ if line in cache:
+ remade_tokens, fixes, multipliers = cache[line]
+ else:
+ remade_tokens, fixes, multipliers, current_token_count = self.tokenize_line(line, used_custom_terms, hijack_comments)
+ token_count = max(current_token_count, token_count)
+
+ cache[line] = (remade_tokens, fixes, multipliers)
+
+ remade_batch_tokens.append(remade_tokens)
+ hijack_fixes.append(fixes)
+ batch_multipliers.append(multipliers)
+
+ return batch_multipliers, remade_batch_tokens, used_custom_terms, hijack_comments, hijack_fixes, token_count
+
+ def process_text_old(self, texts):
+ id_start = self.id_start
+ id_end = self.id_end
+ maxlen = self.wrapped.max_length # you get to stay at 77
+ used_custom_terms = []
+ remade_batch_tokens = []
+ hijack_comments = []
+ hijack_fixes = []
+ token_count = 0
+
+ cache = {}
+ batch_tokens = self.tokenize(texts)
+ batch_multipliers = []
+ for tokens in batch_tokens:
+ tuple_tokens = tuple(tokens)
+
+ if tuple_tokens in cache:
+ remade_tokens, fixes, multipliers = cache[tuple_tokens]
+ else:
+ fixes = []
+ remade_tokens = []
+ multipliers = []
+ mult = 1.0
+
+ i = 0
+ while i < len(tokens):
+ token = tokens[i]
+
+ embedding, embedding_length_in_tokens = self.hijack.embedding_db.find_embedding_at_position(tokens, i)
+
+ mult_change = self.token_mults.get(token) if opts.enable_emphasis else None
+ if mult_change is not None:
+ mult *= mult_change
+ i += 1
+ elif embedding is None:
+ remade_tokens.append(token)
+ multipliers.append(mult)
+ i += 1
+ else:
+ emb_len = int(embedding.vec.shape[0])
+ fixes.append((len(remade_tokens), embedding))
+ remade_tokens += [0] * emb_len
+ multipliers += [mult] * emb_len
+ used_custom_terms.append((embedding.name, embedding.checksum()))
+ i += embedding_length_in_tokens
+
+ if len(remade_tokens) > maxlen - 2:
+ vocab = {v: k for k, v in self.wrapped.tokenizer.get_vocab().items()}
+ ovf = remade_tokens[maxlen - 2:]
+ overflowing_words = [vocab.get(int(x), "") for x in ovf]
+ overflowing_text = self.wrapped.tokenizer.convert_tokens_to_string(''.join(overflowing_words))
+ hijack_comments.append(f"Warning: too many input tokens; some ({len(overflowing_words)}) have been truncated:\n{overflowing_text}\n")
+
+ token_count = len(remade_tokens)
+ remade_tokens = remade_tokens + [id_end] * (maxlen - 2 - len(remade_tokens))
+ remade_tokens = [id_start] + remade_tokens[0:maxlen - 2] + [id_end]
+ cache[tuple_tokens] = (remade_tokens, fixes, multipliers)
+
+ multipliers = multipliers + [1.0] * (maxlen - 2 - len(multipliers))
+ multipliers = [1.0] + multipliers[0:maxlen - 2] + [1.0]
+
+ remade_batch_tokens.append(remade_tokens)
+ hijack_fixes.append(fixes)
+ batch_multipliers.append(multipliers)
+ return batch_multipliers, remade_batch_tokens, used_custom_terms, hijack_comments, hijack_fixes, token_count
+
+ def forward(self, text):
+ use_old = opts.use_old_emphasis_implementation
+ if use_old:
+ batch_multipliers, remade_batch_tokens, used_custom_terms, hijack_comments, hijack_fixes, token_count = self.process_text_old(text)
+ else:
+ batch_multipliers, remade_batch_tokens, used_custom_terms, hijack_comments, hijack_fixes, token_count = self.process_text(text)
+
+ self.hijack.comments += hijack_comments
+
+ if len(used_custom_terms) > 0:
+ self.hijack.comments.append("Used embeddings: " + ", ".join([f'{word} [{checksum}]' for word, checksum in used_custom_terms]))
+
+ if use_old:
+ self.hijack.fixes = hijack_fixes
+ return self.process_tokens(remade_batch_tokens, batch_multipliers)
+
+ z = None
+ i = 0
+ while max(map(len, remade_batch_tokens)) != 0:
+ rem_tokens = [x[75:] for x in remade_batch_tokens]
+ rem_multipliers = [x[75:] for x in batch_multipliers]
+
+ self.hijack.fixes = []
+ for unfiltered in hijack_fixes:
+ fixes = []
+ for fix in unfiltered:
+ if fix[0] == i:
+ fixes.append(fix[1])
+ self.hijack.fixes.append(fixes)
+
+ tokens = []
+ multipliers = []
+ for j in range(len(remade_batch_tokens)):
+ if len(remade_batch_tokens[j]) > 0:
+ tokens.append(remade_batch_tokens[j][:75])
+ multipliers.append(batch_multipliers[j][:75])
+ else:
+ tokens.append([self.id_end] * 75)
+ multipliers.append([1.0] * 75)
+
+ z1 = self.process_tokens(tokens, multipliers)
+ z = z1 if z is None else torch.cat((z, z1), axis=-2)
+
+ remade_batch_tokens = rem_tokens
+ batch_multipliers = rem_multipliers
+ i += 1
+
+ return z
+
+ def process_tokens(self, remade_batch_tokens, batch_multipliers):
+ if not opts.use_old_emphasis_implementation:
+ remade_batch_tokens = [[self.id_start] + x[:75] + [self.id_end] for x in remade_batch_tokens]
+ batch_multipliers = [[1.0] + x[:75] + [1.0] for x in batch_multipliers]
+
+ tokens = torch.asarray(remade_batch_tokens).to(devices.device)
+
+ if self.id_end != self.id_pad:
+ for batch_pos in range(len(remade_batch_tokens)):
+ index = remade_batch_tokens[batch_pos].index(self.id_end)
+ tokens[batch_pos, index+1:tokens.shape[1]] = self.id_pad
+
+ z = self.encode_with_transformers(tokens)
+
+ # restoring original mean is likely not correct, but it seems to work well to prevent artifacts that happen otherwise
+ batch_multipliers_of_same_length = [x + [1.0] * (75 - len(x)) for x in batch_multipliers]
+ batch_multipliers = torch.asarray(batch_multipliers_of_same_length).to(devices.device)
+ original_mean = z.mean()
+ z *= batch_multipliers.reshape(batch_multipliers.shape + (1,)).expand(z.shape)
+ new_mean = z.mean()
+ z *= original_mean / new_mean
+
+ return z
+
+
+class FrozenCLIPEmbedderWithCustomWords(FrozenCLIPEmbedderWithCustomWordsBase):
+ def __init__(self, wrapped, hijack):
+ super().__init__(wrapped, hijack)
+ self.tokenizer = wrapped.tokenizer
+ self.comma_token = [v for k, v in self.tokenizer.get_vocab().items() if k == ','][0]
+
+ self.token_mults = {}
+ tokens_with_parens = [(k, v) for k, v in self.tokenizer.get_vocab().items() if '(' in k or ')' in k or '[' in k or ']' in k]
+ for text, ident in tokens_with_parens:
+ mult = 1.0
+ for c in text:
+ if c == '[':
+ mult /= 1.1
+ if c == ']':
+ mult *= 1.1
+ if c == '(':
+ mult *= 1.1
+ if c == ')':
+ mult /= 1.1
+
+ if mult != 1.0:
+ self.token_mults[ident] = mult
+
+ self.id_start = self.wrapped.tokenizer.bos_token_id
+ self.id_end = self.wrapped.tokenizer.eos_token_id
+ self.id_pad = self.id_end
+
+ def tokenize(self, texts):
+ tokenized = self.wrapped.tokenizer(texts, truncation=False, add_special_tokens=False)["input_ids"]
+
+ return tokenized
+
+ def encode_with_transformers(self, tokens):
+ outputs = self.wrapped.transformer(input_ids=tokens, output_hidden_states=-opts.CLIP_stop_at_last_layers)
+
+ if opts.CLIP_stop_at_last_layers > 1:
+ z = outputs.hidden_states[-opts.CLIP_stop_at_last_layers]
+ z = self.wrapped.transformer.text_model.final_layer_norm(z)
+ else:
+ z = outputs.last_hidden_state
+
+ return z
+
+ def encode_embedding_init_text(self, init_text, nvpt):
+ embedding_layer = self.wrapped.transformer.text_model.embeddings
+ ids = self.wrapped.tokenizer(init_text, max_length=nvpt, return_tensors="pt", add_special_tokens=False)["input_ids"]
+ embedded = embedding_layer.token_embedding.wrapped(ids.to(devices.device)).squeeze(0)
+
+ return embedded
diff --git a/modules/sd_hijack_inpainting.py b/modules/sd_hijack_inpainting.py
index 46714a4f..938f9a58 100644
--- a/modules/sd_hijack_inpainting.py
+++ b/modules/sd_hijack_inpainting.py
@@ -199,8 +199,8 @@ def sample_plms(self,
@torch.no_grad()
def p_sample_plms(self, x, c, t, index, repeat_noise=False, use_original_steps=False, quantize_denoised=False,
- temperature=1., noise_dropout=0., score_corrector=None, corrector_kwargs=None,
- unconditional_guidance_scale=1., unconditional_conditioning=None, old_eps=None, t_next=None):
+ temperature=1., noise_dropout=0., score_corrector=None, corrector_kwargs=None,
+ unconditional_guidance_scale=1., unconditional_conditioning=None, old_eps=None, t_next=None, dynamic_threshold=None):
b, *_, device = *x.shape, x.device
def get_model_output(x, t):
@@ -249,6 +249,8 @@ def p_sample_plms(self, x, c, t, index, repeat_noise=False, use_original_steps=F
pred_x0 = (x - sqrt_one_minus_at * e_t) / a_t.sqrt()
if quantize_denoised:
pred_x0, _, *_ = self.model.first_stage_model.quantize(pred_x0)
+ if dynamic_threshold is not None:
+ pred_x0 = norm_thresholding(pred_x0, dynamic_threshold)
# direction pointing to x_t
dir_xt = (1. - a_prev - sigma_t**2).sqrt() * e_t
noise = sigma_t * noise_like(x.shape, device, repeat_noise) * temperature
@@ -321,12 +323,16 @@ def should_hijack_inpainting(checkpoint_info):
def do_inpainting_hijack():
- ldm.models.diffusion.ddpm.get_unconditional_conditioning = get_unconditional_conditioning
+ # most of this stuff seems to no longer be needed because it is already included into SD2.0
+ # LatentInpaintDiffusion remains because SD2.0's LatentInpaintDiffusion can't be loaded without specifying a checkpoint
+ # p_sample_plms is needed because PLMS can't work with dicts as conditionings
+ # this file should be cleaned up later if weverything tuens out to work fine
+
+ # ldm.models.diffusion.ddpm.get_unconditional_conditioning = get_unconditional_conditioning
ldm.models.diffusion.ddpm.LatentInpaintDiffusion = LatentInpaintDiffusion
- ldm.models.diffusion.ddim.DDIMSampler.p_sample_ddim = p_sample_ddim
- ldm.models.diffusion.ddim.DDIMSampler.sample = sample_ddim
+ # ldm.models.diffusion.ddim.DDIMSampler.p_sample_ddim = p_sample_ddim
+ # ldm.models.diffusion.ddim.DDIMSampler.sample = sample_ddim
ldm.models.diffusion.plms.PLMSSampler.p_sample_plms = p_sample_plms
- ldm.models.diffusion.plms.PLMSSampler.sample = sample_plms
-
+ # ldm.models.diffusion.plms.PLMSSampler.sample = sample_plms
diff --git a/modules/sd_hijack_open_clip.py b/modules/sd_hijack_open_clip.py
new file mode 100644
index 00000000..f733e852
--- /dev/null
+++ b/modules/sd_hijack_open_clip.py
@@ -0,0 +1,37 @@
+import open_clip.tokenizer
+import torch
+
+from modules import sd_hijack_clip, devices
+from modules.shared import opts
+
+tokenizer = open_clip.tokenizer._tokenizer
+
+
+class FrozenOpenCLIPEmbedderWithCustomWords(sd_hijack_clip.FrozenCLIPEmbedderWithCustomWordsBase):
+ def __init__(self, wrapped, hijack):
+ super().__init__(wrapped, hijack)
+
+ self.comma_token = [v for k, v in tokenizer.encoder.items() if k == ','][0]
+ self.id_start = tokenizer.encoder[""]
+ self.id_end = tokenizer.encoder[""]
+ self.id_pad = 0
+
+ def tokenize(self, texts):
+ assert not opts.use_old_emphasis_implementation, 'Old emphasis implementation not supported for Open Clip'
+
+ tokenized = [tokenizer.encode(text) for text in texts]
+
+ return tokenized
+
+ def encode_with_transformers(self, tokens):
+ # set self.wrapped.layer_idx here according to opts.CLIP_stop_at_last_layers
+ z = self.wrapped.encode_with_transformer(tokens)
+
+ return z
+
+ def encode_embedding_init_text(self, init_text, nvpt):
+ ids = tokenizer.encode(init_text)
+ ids = torch.asarray([ids], device=devices.device, dtype=torch.int)
+ embedded = self.wrapped.model.token_embedding.wrapped(ids).squeeze(0)
+
+ return embedded
diff --git a/modules/sd_samplers.py b/modules/sd_samplers.py
index 4fe67854..4edd8c60 100644
--- a/modules/sd_samplers.py
+++ b/modules/sd_samplers.py
@@ -127,7 +127,8 @@ class InterruptedException(BaseException):
class VanillaStableDiffusionSampler:
def __init__(self, constructor, sd_model):
self.sampler = constructor(sd_model)
- self.orig_p_sample_ddim = self.sampler.p_sample_ddim if hasattr(self.sampler, 'p_sample_ddim') else self.sampler.p_sample_plms
+ self.is_plms = hasattr(self.sampler, 'p_sample_plms')
+ self.orig_p_sample_ddim = self.sampler.p_sample_plms if self.is_plms else self.sampler.p_sample_ddim
self.mask = None
self.nmask = None
self.init_latent = None
@@ -218,7 +219,6 @@ class VanillaStableDiffusionSampler:
self.mask = p.mask if hasattr(p, 'mask') else None
self.nmask = p.nmask if hasattr(p, 'nmask') else None
-
def adjust_steps_if_invalid(self, p, num_steps):
if (self.config.name == 'DDIM' and p.ddim_discretize == 'uniform') or (self.config.name == 'PLMS'):
valid_step = 999 / (1000 // num_steps)
@@ -227,7 +227,6 @@ class VanillaStableDiffusionSampler:
return num_steps
-
def sample_img2img(self, p, x, noise, conditioning, unconditional_conditioning, steps=None, image_conditioning=None):
steps, t_enc = setup_img2img_steps(p, steps)
steps = self.adjust_steps_if_invalid(p, steps)
@@ -260,9 +259,10 @@ class VanillaStableDiffusionSampler:
steps = self.adjust_steps_if_invalid(p, steps or p.steps)
# Wrap the conditioning models with additional image conditioning for inpainting model
+ # dummy_for_plms is needed because PLMS code checks the first item in the dict to have the right shape
if image_conditioning is not None:
- conditioning = {"c_concat": [image_conditioning], "c_crossattn": [conditioning]}
- unconditional_conditioning = {"c_concat": [image_conditioning], "c_crossattn": [unconditional_conditioning]}
+ conditioning = {"dummy_for_plms": np.zeros((conditioning.shape[0],)), "c_crossattn": [conditioning], "c_concat": [image_conditioning]}
+ unconditional_conditioning = {"c_crossattn": [unconditional_conditioning], "c_concat": [image_conditioning]}
samples_ddim = self.launch_sampling(steps, lambda: self.sampler.sample(S=steps, conditioning=conditioning, batch_size=int(x.shape[0]), shape=x[0].shape, verbose=False, unconditional_guidance_scale=p.cfg_scale, unconditional_conditioning=unconditional_conditioning, x_T=x, eta=self.eta)[0])
@@ -350,7 +350,9 @@ class TorchHijack:
class KDiffusionSampler:
def __init__(self, funcname, sd_model):
- self.model_wrap = k_diffusion.external.CompVisDenoiser(sd_model, quantize=shared.opts.enable_quantization)
+ denoiser = k_diffusion.external.CompVisVDenoiser if sd_model.parameterization == "v" else k_diffusion.external.CompVisDenoiser
+
+ self.model_wrap = denoiser(sd_model, quantize=shared.opts.enable_quantization)
self.funcname = funcname
self.func = getattr(k_diffusion.sampling, self.funcname)
self.extra_params = sampler_extra_params.get(funcname, [])
diff --git a/modules/shared.py b/modules/shared.py
index c93ae2a3..8fb1387a 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -11,17 +11,15 @@ import tqdm
import modules.artists
import modules.interrogate
import modules.memmon
-import modules.sd_models
import modules.styles
import modules.devices as devices
-from modules import sd_samplers, sd_models, localization, sd_vae, extensions, script_loading
-from modules.hypernetworks import hypernetwork
+from modules import localization, sd_vae, extensions, script_loading
from modules.paths import models_path, script_path, sd_path
sd_model_file = os.path.join(script_path, 'model.ckpt')
default_sd_model_file = sd_model_file
parser = argparse.ArgumentParser()
-parser.add_argument("--config", type=str, default=os.path.join(sd_path, "configs/stable-diffusion/v1-inference.yaml"), help="path to config which constructs model",)
+parser.add_argument("--config", type=str, default=os.path.join(script_path, "v1-inference.yaml"), help="path to config which constructs model",)
parser.add_argument("--ckpt", type=str, default=sd_model_file, help="path to checkpoint of stable diffusion model; if specified, this checkpoint will be added to the list of checkpoints and loaded",)
parser.add_argument("--ckpt-dir", type=str, default=None, help="Path to directory with stable diffusion checkpoints")
parser.add_argument("--gfpgan-dir", type=str, help="GFPGAN directory", default=('./src/gfpgan' if os.path.exists('./src/gfpgan') else './GFPGAN'))
@@ -121,10 +119,12 @@ xformers_available = False
config_filename = cmd_opts.ui_settings_file
os.makedirs(cmd_opts.hypernetwork_dir, exist_ok=True)
-hypernetworks = hypernetwork.list_hypernetworks(cmd_opts.hypernetwork_dir)
+hypernetworks = {}
loaded_hypernetwork = None
+
def reload_hypernetworks():
+ from modules.hypernetworks import hypernetwork
global hypernetworks
hypernetworks = hypernetwork.list_hypernetworks(cmd_opts.hypernetwork_dir)
@@ -206,10 +206,11 @@ class State:
if self.current_latent is None:
return
+ import modules.sd_samplers
if opts.show_progress_grid:
- self.current_image = sd_samplers.samples_to_image_grid(self.current_latent)
+ self.current_image = modules.sd_samplers.samples_to_image_grid(self.current_latent)
else:
- self.current_image = sd_samplers.sample_to_image(self.current_latent)
+ self.current_image = modules.sd_samplers.sample_to_image(self.current_latent)
self.current_image_sampling_step = self.sampling_step
@@ -248,6 +249,21 @@ def options_section(section_identifier, options_dict):
return options_dict
+def list_checkpoint_tiles():
+ import modules.sd_models
+ return modules.sd_models.checkpoint_tiles()
+
+
+def refresh_checkpoints():
+ import modules.sd_models
+ return modules.sd_models.list_models()
+
+
+def list_samplers():
+ import modules.sd_samplers
+ return modules.sd_samplers.all_samplers
+
+
hide_dirs = {"visible": not cmd_opts.hide_ui_dir_config}
options_templates = {}
@@ -333,7 +349,7 @@ options_templates.update(options_section(('training', "Training"), {
}))
options_templates.update(options_section(('sd', "Stable Diffusion"), {
- "sd_model_checkpoint": OptionInfo(None, "Stable Diffusion checkpoint", gr.Dropdown, lambda: {"choices": modules.sd_models.checkpoint_tiles()}, refresh=sd_models.list_models),
+ "sd_model_checkpoint": OptionInfo(None, "Stable Diffusion checkpoint", gr.Dropdown, lambda: {"choices": list_checkpoint_tiles()}, refresh=refresh_checkpoints),
"sd_checkpoint_cache": OptionInfo(0, "Checkpoints to cache in RAM", gr.Slider, {"minimum": 0, "maximum": 10, "step": 1}),
"sd_vae": OptionInfo("auto", "SD VAE", gr.Dropdown, lambda: {"choices": sd_vae.vae_list}, refresh=sd_vae.refresh_vae_list),
"sd_vae_as_default": OptionInfo(False, "Ignore selected VAE for stable diffusion checkpoints that have their own .vae.pt next to them"),
@@ -385,7 +401,7 @@ options_templates.update(options_section(('ui', "User interface"), {
}))
options_templates.update(options_section(('sampler-params', "Sampler parameters"), {
- "hide_samplers": OptionInfo([], "Hide samplers in user interface (requires restart)", gr.CheckboxGroup, lambda: {"choices": [x.name for x in sd_samplers.all_samplers]}),
+ "hide_samplers": OptionInfo([], "Hide samplers in user interface (requires restart)", gr.CheckboxGroup, lambda: {"choices": [x.name for x in list_samplers()]}),
"eta_ddim": OptionInfo(0.0, "eta (noise multiplier) for DDIM", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.01}),
"eta_ancestral": OptionInfo(1.0, "eta (noise multiplier) for ancestral samplers", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.01}),
"ddim_discretize": OptionInfo('uniform', "img2img DDIM discretize", gr.Radio, {"choices": ['uniform', 'quad']}),
diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py
index 5e4d8688..a273e663 100644
--- a/modules/textual_inversion/textual_inversion.py
+++ b/modules/textual_inversion/textual_inversion.py
@@ -64,7 +64,8 @@ class EmbeddingDatabase:
self.word_embeddings[embedding.name] = embedding
- ids = model.cond_stage_model.tokenizer([embedding.name], add_special_tokens=False)['input_ids'][0]
+ # TODO changing between clip and open clip changes tokenization, which will cause embeddings to stop working
+ ids = model.cond_stage_model.tokenize([embedding.name])[0]
first_id = ids[0]
if first_id not in self.ids_lookup:
@@ -155,13 +156,11 @@ class EmbeddingDatabase:
def create_embedding(name, num_vectors_per_token, overwrite_old, init_text='*'):
cond_model = shared.sd_model.cond_stage_model
- embedding_layer = cond_model.wrapped.transformer.text_model.embeddings
with devices.autocast():
cond_model([""]) # will send cond model to GPU if lowvram/medvram is active
- ids = cond_model.tokenizer(init_text, max_length=num_vectors_per_token, return_tensors="pt", add_special_tokens=False)["input_ids"]
- embedded = embedding_layer.token_embedding.wrapped(ids.to(devices.device)).squeeze(0)
+ embedded = cond_model.encode_embedding_init_text(init_text, num_vectors_per_token)
vec = torch.zeros((num_vectors_per_token, embedded.shape[1]), device=devices.device)
for i in range(num_vectors_per_token):
diff --git a/modules/ui.py b/modules/ui.py
index e6da1b2a..e5cb69d0 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -478,9 +478,7 @@ def create_toprow(is_img2img):
if is_img2img:
with gr.Column(scale=1, elem_id="interrogate_col"):
button_interrogate = gr.Button('Interrogate\nCLIP', elem_id="interrogate")
-
- if cmd_opts.deepdanbooru:
- button_deepbooru = gr.Button('Interrogate\nDeepBooru', elem_id="deepbooru")
+ button_deepbooru = gr.Button('Interrogate\nDeepBooru', elem_id="deepbooru")
with gr.Column(scale=1):
with gr.Row():
@@ -1004,11 +1002,10 @@ def create_ui(wrap_gradio_gpu_call):
outputs=[img2img_prompt],
)
- if cmd_opts.deepdanbooru:
- img2img_deepbooru.click(
- fn=interrogate_deepbooru,
- inputs=[init_img],
- outputs=[img2img_prompt],
+ img2img_deepbooru.click(
+ fn=interrogate_deepbooru,
+ inputs=[init_img],
+ outputs=[img2img_prompt],
)
--
cgit v1.2.1
From 1123f52cadf8d86c006177791b3191e5b8388b5a Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Sat, 26 Nov 2022 16:37:37 +0300
Subject: add 1024 module for hypernets for the new open clip
---
modules/ui.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
(limited to 'modules')
diff --git a/modules/ui.py b/modules/ui.py
index e5cb69d0..16f262c4 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -1210,7 +1210,7 @@ def create_ui(wrap_gradio_gpu_call):
with gr.Tab(label="Create hypernetwork"):
new_hypernetwork_name = gr.Textbox(label="Name")
- new_hypernetwork_sizes = gr.CheckboxGroup(label="Modules", value=["768", "320", "640", "1280"], choices=["768", "320", "640", "1280"])
+ new_hypernetwork_sizes = gr.CheckboxGroup(label="Modules", value=["768", "1024", "320", "640", "1280"], choices=["768", "320", "640", "1280"])
new_hypernetwork_layer_structure = gr.Textbox("1, 2, 1", label="Enter hypernetwork layer structure", placeholder="1st and last digit must be 1. ex:'1, 2, 1'")
new_hypernetwork_activation_func = gr.Dropdown(value="linear", label="Select activation function of hypernetwork. Recommended : Swish / Linear(none)", choices=modules.hypernetworks.ui.keys)
new_hypernetwork_initialization_option = gr.Dropdown(value = "Normal", label="Select Layer weights initialization. Recommended: Kaiming for relu-like, Xavier for sigmoid-like, Normal otherwise", choices=["Normal", "KaimingUniform", "KaimingNormal", "XavierUniform", "XavierNormal"])
--
cgit v1.2.1
From 64c7b7975cedeb2aaa1a9c8eb4a479fc575843f8 Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Sat, 26 Nov 2022 16:45:57 +0300
Subject: restore hypernetworks to seemingly working state
---
modules/sd_hijack.py | 3 ++-
modules/ui.py | 2 +-
2 files changed, 3 insertions(+), 2 deletions(-)
(limited to 'modules')
diff --git a/modules/sd_hijack.py b/modules/sd_hijack.py
index d5243fd3..64655eb1 100644
--- a/modules/sd_hijack.py
+++ b/modules/sd_hijack.py
@@ -9,6 +9,7 @@ from torch.nn.functional import silu
import modules.textual_inversion.textual_inversion
from modules import prompt_parser, devices, sd_hijack_optimizations, shared
+from modules.hypernetworks import hypernetwork
from modules.shared import cmd_opts
from modules import sd_hijack_clip, sd_hijack_open_clip
@@ -60,7 +61,7 @@ def apply_optimizations():
def undo_optimizations():
- ldm.modules.attention.CrossAttention.forward = attention_CrossAttention_forward # this stops hypernets from working
+ ldm.modules.attention.CrossAttention.forward = hypernetwork.attention_CrossAttention_forward
ldm.modules.diffusionmodules.model.nonlinearity = diffusionmodules_model_nonlinearity
ldm.modules.diffusionmodules.model.AttnBlock.forward = diffusionmodules_model_AttnBlock_forward
diff --git a/modules/ui.py b/modules/ui.py
index 16f262c4..c8b8fecd 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -1210,7 +1210,7 @@ def create_ui(wrap_gradio_gpu_call):
with gr.Tab(label="Create hypernetwork"):
new_hypernetwork_name = gr.Textbox(label="Name")
- new_hypernetwork_sizes = gr.CheckboxGroup(label="Modules", value=["768", "1024", "320", "640", "1280"], choices=["768", "320", "640", "1280"])
+ new_hypernetwork_sizes = gr.CheckboxGroup(label="Modules", value=["768", "320", "640", "1280"], choices=["768", "1024", "320", "640", "1280"])
new_hypernetwork_layer_structure = gr.Textbox("1, 2, 1", label="Enter hypernetwork layer structure", placeholder="1st and last digit must be 1. ex:'1, 2, 1'")
new_hypernetwork_activation_func = gr.Dropdown(value="linear", label="Select activation function of hypernetwork. Recommended : Swish / Linear(none)", choices=modules.hypernetworks.ui.keys)
new_hypernetwork_initialization_option = gr.Dropdown(value = "Normal", label="Select Layer weights initialization. Recommended: Kaiming for relu-like, Xavier for sigmoid-like, Normal otherwise", choices=["Normal", "KaimingUniform", "KaimingNormal", "XavierUniform", "XavierNormal"])
--
cgit v1.2.1
From 755df94b2aa62eabd96f900e0dd7ddc83c2f692c Mon Sep 17 00:00:00 2001
From: flamelaw
Date: Sun, 27 Nov 2022 00:35:44 +0900
Subject: set TI AdamW default weight decay to 0
---
modules/textual_inversion/textual_inversion.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
(limited to 'modules')
diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py
index fee08e33..b9b1394f 100644
--- a/modules/textual_inversion/textual_inversion.py
+++ b/modules/textual_inversion/textual_inversion.py
@@ -283,7 +283,7 @@ def train_embedding(embedding_name, learn_rate, batch_size, gradient_step, data_
shared.sd_model.first_stage_model.to(devices.cpu)
embedding.vec.requires_grad = True
- optimizer = torch.optim.AdamW([embedding.vec], lr=scheduler.learn_rate)
+ optimizer = torch.optim.AdamW([embedding.vec], lr=scheduler.learn_rate, weight_decay=0.0)
scaler = torch.cuda.amp.GradScaler()
batch_size = ds.batch_size
--
cgit v1.2.1
From b5050ad2071644f7b4c99660dc66a8a95136102f Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Sat, 26 Nov 2022 20:52:16 +0300
Subject: make SD2 compatible with --medvram setting
---
modules/lowvram.py | 8 ++++++++
1 file changed, 8 insertions(+)
(limited to 'modules')
diff --git a/modules/lowvram.py b/modules/lowvram.py
index a4652cb1..aa464a95 100644
--- a/modules/lowvram.py
+++ b/modules/lowvram.py
@@ -51,6 +51,10 @@ def setup_for_low_vram(sd_model, use_medvram):
send_me_to_gpu(first_stage_model, None)
return first_stage_model_decode(z)
+ # for SD1, cond_stage_model is CLIP and its NN is in the tranformer frield, but for SD2, it's open clip, and it's in model field
+ if hasattr(sd_model.cond_stage_model, 'model'):
+ sd_model.cond_stage_model.transformer = sd_model.cond_stage_model.model
+
# remove three big modules, cond, first_stage, and unet from the model and then
# send the model to GPU. Then put modules back. the modules will be in CPU.
stored = sd_model.cond_stage_model.transformer, sd_model.first_stage_model, sd_model.model
@@ -65,6 +69,10 @@ def setup_for_low_vram(sd_model, use_medvram):
sd_model.first_stage_model.decode = first_stage_model_decode_wrap
parents[sd_model.cond_stage_model.transformer] = sd_model.cond_stage_model
+ if hasattr(sd_model.cond_stage_model, 'model'):
+ sd_model.cond_stage_model.model = sd_model.cond_stage_model.transformer
+ del sd_model.cond_stage_model.transformer
+
if use_medvram:
sd_model.model.register_forward_pre_hook(send_me_to_gpu)
else:
--
cgit v1.2.1
From 1e506657e1cb732a5f0e567ba2585fba2bbb1327 Mon Sep 17 00:00:00 2001
From: MrCheeze
Date: Sat, 26 Nov 2022 13:28:44 -0500
Subject: no-half support for SD 2.0
---
modules/sd_models.py | 3 +++
1 file changed, 3 insertions(+)
(limited to 'modules')
diff --git a/modules/sd_models.py b/modules/sd_models.py
index c59151e0..0e0bd79e 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -244,6 +244,9 @@ def load_model(checkpoint_info=None):
do_inpainting_hijack()
+ if shared.cmd_opts.no_half:
+ sd_config.model.params.unet_config.params.use_fp16 = False
+
sd_model = instantiate_from_config(sd_config.model)
load_model_weights(sd_model, checkpoint_info)
--
cgit v1.2.1
From c67c40f983997594f76b2312f92c3761e8d83715 Mon Sep 17 00:00:00 2001
From: Matthew McGoogan
Date: Sat, 26 Nov 2022 23:25:16 +0000
Subject: torch.cuda.empty_cache() defaults to cuda:0 device unless explicitly
set otherwise first. Updating torch_gc() to use the device set by --device-id
if specified to avoid OOM edge cases on multi-GPU systems.
---
modules/devices.py | 14 ++++++++++++--
1 file changed, 12 insertions(+), 2 deletions(-)
(limited to 'modules')
diff --git a/modules/devices.py b/modules/devices.py
index 67165bf6..93d82bbc 100644
--- a/modules/devices.py
+++ b/modules/devices.py
@@ -44,8 +44,18 @@ def get_optimal_device():
def torch_gc():
if torch.cuda.is_available():
- torch.cuda.empty_cache()
- torch.cuda.ipc_collect()
+ from modules import shared
+
+ device_id = shared.cmd_opts.device_id
+
+ if device_id is not None:
+ cuda_device = f"cuda:{device_id}"
+ else:
+ cuda_device = "cuda"
+
+ with torch.cuda.device(cuda_device):
+ torch.cuda.empty_cache()
+ torch.cuda.ipc_collect()
def enable_tf32():
--
cgit v1.2.1
From b006382784a2f0887317bb60ea49d19b50a5dc7e Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Sun, 27 Nov 2022 11:52:53 +0300
Subject: serve images from where they are saved instead of a temporary
directory add an option to choose a different temporary directory in the UI
add an option to cleanup the selected temporary directory at startup
---
modules/images.py | 2 ++
modules/shared.py | 7 ++++++
modules/ui.py | 16 -------------
modules/ui_tempdir.py | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 71 insertions(+), 16 deletions(-)
create mode 100644 modules/ui_tempdir.py
(limited to 'modules')
diff --git a/modules/images.py b/modules/images.py
index 26d5b7a9..8737ccff 100644
--- a/modules/images.py
+++ b/modules/images.py
@@ -524,6 +524,8 @@ def save_image(image, path, basename, seed=None, prompt=None, extension='png', i
else:
image.save(fullfn, quality=opts.jpeg_quality)
+ image.already_saved_as = fullfn
+
target_side_length = 4000
oversize = image.width > target_side_length or image.height > target_side_length
if opts.export_for_4chan and (oversize or os.stat(fullfn).st_size > 4 * 1024 * 1024):
diff --git a/modules/shared.py b/modules/shared.py
index 8fb1387a..af975f54 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -16,6 +16,9 @@ import modules.devices as devices
from modules import localization, sd_vae, extensions, script_loading
from modules.paths import models_path, script_path, sd_path
+
+demo = None
+
sd_model_file = os.path.join(script_path, 'model.ckpt')
default_sd_model_file = sd_model_file
parser = argparse.ArgumentParser()
@@ -292,6 +295,10 @@ options_templates.update(options_section(('saving-images', "Saving images/grids"
"use_original_name_batch": OptionInfo(False, "Use original name for output filename during batch process in extras tab"),
"save_selected_only": OptionInfo(True, "When using 'Save' button, only save a single selected image"),
"do_not_add_watermark": OptionInfo(False, "Do not add watermark to images"),
+
+ "temp_dir": OptionInfo("", "Directory for temporary images; leave empty for default"),
+ "clean_temp_dir_at_start": OptionInfo(False, "Cleanup non-default temporary directory when starting webui"),
+
}))
options_templates.update(options_section(('saving-paths', "Paths for saving"), {
diff --git a/modules/ui.py b/modules/ui.py
index c8b8fecd..ea925c40 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -157,22 +157,6 @@ def save_files(js_data, images, do_make_zip, index):
return gr.File.update(value=fullfns, visible=True), '', '', plaintext_to_html(f"Saved: {filenames[0]}")
-def save_pil_to_file(pil_image, dir=None):
- use_metadata = False
- metadata = PngImagePlugin.PngInfo()
- for key, value in pil_image.info.items():
- if isinstance(key, str) and isinstance(value, str):
- metadata.add_text(key, value)
- use_metadata = True
-
- file_obj = tempfile.NamedTemporaryFile(delete=False, suffix=".png", dir=dir)
- pil_image.save(file_obj, pnginfo=(metadata if use_metadata else None))
- return file_obj
-
-
-# override save to file function so that it also writes PNG info
-gr.processing_utils.save_pil_to_file = save_pil_to_file
-
def wrap_gradio_call(func, extra_outputs=None, add_stats=False):
def f(*args, extra_outputs_array=extra_outputs, **kwargs):
diff --git a/modules/ui_tempdir.py b/modules/ui_tempdir.py
new file mode 100644
index 00000000..9c6d3a9d
--- /dev/null
+++ b/modules/ui_tempdir.py
@@ -0,0 +1,62 @@
+import os
+import tempfile
+from collections import namedtuple
+
+import gradio as gr
+
+from PIL import PngImagePlugin
+
+from modules import shared
+
+
+Savedfile = namedtuple("Savedfile", ["name"])
+
+
+def save_pil_to_file(pil_image, dir=None):
+ already_saved_as = getattr(pil_image, 'already_saved_as', None)
+ if already_saved_as:
+ shared.demo.temp_dirs = shared.demo.temp_dirs | {os.path.abspath(os.path.dirname(already_saved_as))}
+ file_obj = Savedfile(already_saved_as)
+ return file_obj
+
+ if shared.opts.temp_dir != "":
+ dir = shared.opts.temp_dir
+
+ use_metadata = False
+ metadata = PngImagePlugin.PngInfo()
+ for key, value in pil_image.info.items():
+ if isinstance(key, str) and isinstance(value, str):
+ metadata.add_text(key, value)
+ use_metadata = True
+
+ file_obj = tempfile.NamedTemporaryFile(delete=False, suffix=".png", dir=dir)
+ pil_image.save(file_obj, pnginfo=(metadata if use_metadata else None))
+ return file_obj
+
+
+# override save to file function so that it also writes PNG info
+gr.processing_utils.save_pil_to_file = save_pil_to_file
+
+
+def on_tmpdir_changed():
+ if shared.opts.temp_dir == "" or shared.demo is None:
+ return
+
+ os.makedirs(shared.opts.temp_dir, exist_ok=True)
+
+ shared.demo.temp_dirs = shared.demo.temp_dirs | {os.path.abspath(shared.opts.temp_dir)}
+
+
+def cleanup_tmpdr():
+ temp_dir = shared.opts.temp_dir
+ if temp_dir == "" or not os.path.isdir(temp_dir):
+ return
+
+ for root, dirs, files in os.walk(temp_dir, topdown=False):
+ for name in files:
+ _, extension = os.path.splitext(name)
+ if extension != ".png":
+ continue
+
+ filename = os.path.join(root, name)
+ os.remove(filename)
--
cgit v1.2.1
From 5b2c316890b7b8af95f0d0334d1fd34b9a687b99 Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Sun, 27 Nov 2022 13:08:54 +0300
Subject: eliminate duplicated code from #5095
---
modules/devices.py | 30 +++++++++++-------------------
1 file changed, 11 insertions(+), 19 deletions(-)
(limited to 'modules')
diff --git a/modules/devices.py b/modules/devices.py
index 93d82bbc..dd50fe24 100644
--- a/modules/devices.py
+++ b/modules/devices.py
@@ -24,17 +24,18 @@ def extract_device_id(args, name):
return None
-def get_optimal_device():
- if torch.cuda.is_available():
- from modules import shared
+def get_cuda_device_string():
+ from modules import shared
+
+ if shared.cmd_opts.device_id is not None:
+ return f"cuda:{shared.cmd_opts.device_id}"
- device_id = shared.cmd_opts.device_id
+ return "cuda"
- if device_id is not None:
- cuda_device = f"cuda:{device_id}"
- return torch.device(cuda_device)
- else:
- return torch.device("cuda")
+
+def get_optimal_device():
+ if torch.cuda.is_available():
+ return torch.device(get_cuda_device_string())
if has_mps():
return torch.device("mps")
@@ -44,16 +45,7 @@ def get_optimal_device():
def torch_gc():
if torch.cuda.is_available():
- from modules import shared
-
- device_id = shared.cmd_opts.device_id
-
- if device_id is not None:
- cuda_device = f"cuda:{device_id}"
- else:
- cuda_device = "cuda"
-
- with torch.cuda.device(cuda_device):
+ with torch.cuda.device(get_cuda_device_string()):
torch.cuda.empty_cache()
torch.cuda.ipc_collect()
--
cgit v1.2.1
From 40ca34b837b5068ec35b8d5681bae32cf28f5816 Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Sun, 27 Nov 2022 13:17:39 +0300
Subject: fix for broken sampler selection in img2img and xy plot #4860 #4909
---
modules/img2img.py | 2 +-
modules/processing.py | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
(limited to 'modules')
diff --git a/modules/img2img.py b/modules/img2img.py
index 9fc5b693..7e58994a 100644
--- a/modules/img2img.py
+++ b/modules/img2img.py
@@ -99,7 +99,7 @@ def img2img(mode: int, prompt: str, negative_prompt: str, prompt_style: str, pro
seed_resize_from_h=seed_resize_from_h,
seed_resize_from_w=seed_resize_from_w,
seed_enable_extras=seed_enable_extras,
- sampler_index=sd_samplers.samplers_for_img2img[sampler_index].name,
+ sampler_name=sd_samplers.samplers_for_img2img[sampler_index].name,
batch_size=batch_size,
n_iter=n_iter,
steps=steps,
diff --git a/modules/processing.py b/modules/processing.py
index c310df6a..edceb532 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -74,7 +74,7 @@ class StableDiffusionProcessing():
"""
def __init__(self, sd_model=None, outpath_samples=None, outpath_grids=None, prompt: str = "", styles: List[str] = None, seed: int = -1, subseed: int = -1, subseed_strength: float = 0, seed_resize_from_h: int = -1, seed_resize_from_w: int = -1, seed_enable_extras: bool = True, sampler_name: str = None, batch_size: int = 1, n_iter: int = 1, steps: int = 50, cfg_scale: float = 7.0, width: int = 512, height: int = 512, restore_faces: bool = False, tiling: bool = False, do_not_save_samples: bool = False, do_not_save_grid: bool = False, extra_generation_params: Dict[Any, Any] = None, overlay_images: Any = None, negative_prompt: str = None, eta: float = None, do_not_reload_embeddings: bool = False, denoising_strength: float = 0, ddim_discretize: str = None, s_churn: float = 0.0, s_tmax: float = None, s_tmin: float = 0.0, s_noise: float = 1.0, override_settings: Dict[str, Any] = None, sampler_index: int = None):
if sampler_index is not None:
- warnings.warn("sampler_index argument for StableDiffusionProcessing does not do anything; use sampler_name")
+ print("sampler_index argument for StableDiffusionProcessing does not do anything; use sampler_name", file=sys.stderr)
self.sd_model = sd_model
self.outpath_samples: str = outpath_samples
--
cgit v1.2.1
From 10923f9b3a10a9af20429e51242614e259fbd434 Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Sun, 27 Nov 2022 13:43:10 +0300
Subject: calculate dictionary for sampler names only once
---
modules/sd_samplers.py | 7 +++++++
1 file changed, 7 insertions(+)
(limited to 'modules')
diff --git a/modules/sd_samplers.py b/modules/sd_samplers.py
index 43ce34eb..6f8ccf1d 100644
--- a/modules/sd_samplers.py
+++ b/modules/sd_samplers.py
@@ -52,6 +52,7 @@ all_samplers_map = {x.name: x for x in all_samplers}
samplers = []
samplers_for_img2img = []
+samplers_map = {}
def create_sampler(name, model):
@@ -77,6 +78,12 @@ def set_samplers():
samplers = [x for x in all_samplers if x.name not in hidden]
samplers_for_img2img = [x for x in all_samplers if x.name not in hidden_img2img]
+ samplers_map.clear()
+ for sampler in all_samplers:
+ samplers_map[sampler.name.lower()] = sampler.name
+ for alias in sampler.aliases:
+ samplers_map[alias.lower()] = sampler.name
+
set_samplers()
--
cgit v1.2.1
From 6074175faa751dde933aa8e15cd687ca4e4b4a23 Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Sun, 27 Nov 2022 14:46:40 +0300
Subject: add safetensors to requirements
---
modules/sd_models.py | 11 +++++------
1 file changed, 5 insertions(+), 6 deletions(-)
(limited to 'modules')
diff --git a/modules/sd_models.py b/modules/sd_models.py
index ae36841a..77236480 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -5,6 +5,7 @@ import gc
from collections import namedtuple
import torch
import re
+import safetensors.torch
from omegaconf import OmegaConf
from ldm.util import instantiate_from_config
@@ -173,14 +174,12 @@ def load_model_weights(model, checkpoint_info, vae_file="auto"):
# load from file
print(f"Loading weights [{sd_model_hash}] from {checkpoint_file}")
- if checkpoint_file.endswith(".safetensors"):
- try:
- from safetensors.torch import load_file
- except ImportError as e:
- raise ImportError(f"The model is in safetensors format and it is not installed, use `pip install safetensors`: {e}")
- pl_sd = load_file(checkpoint_file, device=shared.weight_load_location)
+ _, extension = os.path.splitext(checkpoint_file)
+ if extension.lower() == ".safetensors":
+ pl_sd = safetensors.torch.load_file(checkpoint_file, device=shared.weight_load_location)
else:
pl_sd = torch.load(checkpoint_file, map_location=shared.weight_load_location)
+
if "global_step" in pl_sd:
print(f"Global Step: {pl_sd['global_step']}")
--
cgit v1.2.1
From dac9b6f15de5e675053d9490a20e0457dcd1a23e Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Sun, 27 Nov 2022 15:51:29 +0300
Subject: add safetensors support for model merging #4869
---
modules/extras.py | 26 ++++++++++++++------------
modules/sd_models.py | 26 +++++++++++++++-----------
modules/ui.py | 7 ++++++-
3 files changed, 35 insertions(+), 24 deletions(-)
(limited to 'modules')
diff --git a/modules/extras.py b/modules/extras.py
index 71b93a06..3d65d90a 100644
--- a/modules/extras.py
+++ b/modules/extras.py
@@ -20,6 +20,7 @@ import modules.codeformer_model
import piexif
import piexif.helper
import gradio as gr
+import safetensors.torch
class LruCache(OrderedDict):
@@ -249,7 +250,7 @@ def run_pnginfo(image):
return '', geninfo, info
-def run_modelmerger(primary_model_name, secondary_model_name, teritary_model_name, interp_method, multiplier, save_as_half, custom_name):
+def run_modelmerger(primary_model_name, secondary_model_name, teritary_model_name, interp_method, multiplier, save_as_half, custom_name, checkpoint_format):
def weighted_sum(theta0, theta1, alpha):
return ((1 - alpha) * theta0) + (alpha * theta1)
@@ -264,19 +265,15 @@ def run_modelmerger(primary_model_name, secondary_model_name, teritary_model_nam
teritary_model_info = sd_models.checkpoints_list.get(teritary_model_name, None)
print(f"Loading {primary_model_info.filename}...")
- primary_model = torch.load(primary_model_info.filename, map_location='cpu')
- theta_0 = sd_models.get_state_dict_from_checkpoint(primary_model)
+ theta_0 = sd_models.read_state_dict(primary_model_info.filename, map_location='cpu')
print(f"Loading {secondary_model_info.filename}...")
- secondary_model = torch.load(secondary_model_info.filename, map_location='cpu')
- theta_1 = sd_models.get_state_dict_from_checkpoint(secondary_model)
+ theta_1 = sd_models.read_state_dict(secondary_model_info.filename, map_location='cpu')
if teritary_model_info is not None:
print(f"Loading {teritary_model_info.filename}...")
- teritary_model = torch.load(teritary_model_info.filename, map_location='cpu')
- theta_2 = sd_models.get_state_dict_from_checkpoint(teritary_model)
+ theta_2 = sd_models.read_state_dict(teritary_model_info.filename, map_location='cpu')
else:
- teritary_model = None
theta_2 = None
theta_funcs = {
@@ -295,7 +292,7 @@ def run_modelmerger(primary_model_name, secondary_model_name, teritary_model_nam
theta_1[key] = theta_func1(theta_1[key], t2)
else:
theta_1[key] = torch.zeros_like(theta_1[key])
- del theta_2, teritary_model
+ del theta_2
for key in tqdm.tqdm(theta_0.keys()):
if 'model' in key and key in theta_1:
@@ -314,12 +311,17 @@ def run_modelmerger(primary_model_name, secondary_model_name, teritary_model_nam
ckpt_dir = shared.cmd_opts.ckpt_dir or sd_models.model_path
- filename = primary_model_info.model_name + '_' + str(round(1-multiplier, 2)) + '-' + secondary_model_info.model_name + '_' + str(round(multiplier, 2)) + '-' + interp_method.replace(" ", "_") + '-merged.ckpt'
- filename = filename if custom_name == '' else (custom_name + '.ckpt')
+ filename = primary_model_info.model_name + '_' + str(round(1-multiplier, 2)) + '-' + secondary_model_info.model_name + '_' + str(round(multiplier, 2)) + '-' + interp_method.replace(" ", "_") + '-merged.' + checkpoint_format
+ filename = filename if custom_name == '' else (custom_name + '.' + checkpoint_format)
output_modelname = os.path.join(ckpt_dir, filename)
print(f"Saving to {output_modelname}...")
- torch.save(primary_model, output_modelname)
+
+ _, extension = os.path.splitext(output_modelname)
+ if extension.lower() == ".safetensors":
+ safetensors.torch.save_file(theta_0, output_modelname, metadata={"format": "pt"})
+ else:
+ torch.save(theta_0, output_modelname)
sd_models.list_models()
diff --git a/modules/sd_models.py b/modules/sd_models.py
index 77236480..a1ea5611 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -160,6 +160,20 @@ def get_state_dict_from_checkpoint(pl_sd):
return pl_sd
+def read_state_dict(checkpoint_file, print_global_state=False, map_location=None):
+ _, extension = os.path.splitext(checkpoint_file)
+ if extension.lower() == ".safetensors":
+ pl_sd = safetensors.torch.load_file(checkpoint_file, device=map_location or shared.weight_load_location)
+ else:
+ pl_sd = torch.load(checkpoint_file, map_location=map_location or shared.weight_load_location)
+
+ if print_global_state and "global_step" in pl_sd:
+ print(f"Global Step: {pl_sd['global_step']}")
+
+ sd = get_state_dict_from_checkpoint(pl_sd)
+ return sd
+
+
def load_model_weights(model, checkpoint_info, vae_file="auto"):
checkpoint_file = checkpoint_info.filename
sd_model_hash = checkpoint_info.hash
@@ -174,17 +188,7 @@ def load_model_weights(model, checkpoint_info, vae_file="auto"):
# load from file
print(f"Loading weights [{sd_model_hash}] from {checkpoint_file}")
- _, extension = os.path.splitext(checkpoint_file)
- if extension.lower() == ".safetensors":
- pl_sd = safetensors.torch.load_file(checkpoint_file, device=shared.weight_load_location)
- else:
- pl_sd = torch.load(checkpoint_file, map_location=shared.weight_load_location)
-
- if "global_step" in pl_sd:
- print(f"Global Step: {pl_sd['global_step']}")
-
- sd = get_state_dict_from_checkpoint(pl_sd)
- del pl_sd
+ sd = read_state_dict(checkpoint_file)
model.load_state_dict(sd, strict=False)
del sd
diff --git a/modules/ui.py b/modules/ui.py
index de2b5544..aa13978d 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -1164,7 +1164,11 @@ def create_ui(wrap_gradio_gpu_call):
custom_name = gr.Textbox(label="Custom Name (Optional)")
interp_amount = gr.Slider(minimum=0.0, maximum=1.0, step=0.05, label='Multiplier (M) - set to 0 to get model A', value=0.3)
interp_method = gr.Radio(choices=["Weighted sum", "Add difference"], value="Weighted sum", label="Interpolation Method")
- save_as_half = gr.Checkbox(value=False, label="Save as float16")
+
+ with gr.Row():
+ checkpoint_format = gr.Radio(choices=["ckpt", "safetensors"], value="ckpt", label="Checkpoint format")
+ save_as_half = gr.Checkbox(value=False, label="Save as float16")
+
modelmerger_merge = gr.Button(elem_id="modelmerger_merge", label="Merge", variant='primary')
with gr.Column(variant='panel'):
@@ -1692,6 +1696,7 @@ def create_ui(wrap_gradio_gpu_call):
interp_amount,
save_as_half,
custom_name,
+ checkpoint_format,
],
outputs=[
submit_result,
--
cgit v1.2.1
From 3cf93de24f90247af33ab9cf743a6eb45308d668 Mon Sep 17 00:00:00 2001
From: Billy Cao
Date: Sun, 27 Nov 2022 21:12:37 +0800
Subject: Fix sampler_name for API requests are being ignored
---
modules/api/api.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
(limited to 'modules')
diff --git a/modules/api/api.py b/modules/api/api.py
index efcedbba..53980551 100644
--- a/modules/api/api.py
+++ b/modules/api/api.py
@@ -112,7 +112,7 @@ class Api:
def text2imgapi(self, txt2imgreq: StableDiffusionTxt2ImgProcessingAPI):
populate = txt2imgreq.copy(update={ # Override __init__ params
"sd_model": shared.sd_model,
- "sampler_name": validate_sampler_name(txt2imgreq.sampler_index),
+ "sampler_name": validate_sampler_name(txt2imgreq.sampler_name or txt2imgreq.sampler_index),
"do_not_save_samples": True,
"do_not_save_grid": True
}
@@ -142,7 +142,7 @@ class Api:
populate = img2imgreq.copy(update={ # Override __init__ params
"sd_model": shared.sd_model,
- "sampler_name": validate_sampler_name(img2imgreq.sampler_index),
+ "sampler_name": validate_sampler_name(img2imgreq.sampler_name or img2imgreq.sampler_index),
"do_not_save_samples": True,
"do_not_save_grid": True,
"mask": mask
--
cgit v1.2.1
From 06ada734c7f85e5e6e2e6ae78fb873be0222bfd5 Mon Sep 17 00:00:00 2001
From: Billy Cao
Date: Sun, 27 Nov 2022 21:19:47 +0800
Subject: Prevent warning on sampler_index if sampler_name is being used
---
modules/api/api.py | 4 ++++
1 file changed, 4 insertions(+)
(limited to 'modules')
diff --git a/modules/api/api.py b/modules/api/api.py
index 53980551..2f450fc4 100644
--- a/modules/api/api.py
+++ b/modules/api/api.py
@@ -117,6 +117,8 @@ class Api:
"do_not_save_grid": True
}
)
+ if populate.sampler_name:
+ populate.sampler_index = None # prevent a warning later on
p = StableDiffusionProcessingTxt2Img(**vars(populate))
# Override object param
@@ -148,6 +150,8 @@ class Api:
"mask": mask
}
)
+ if populate.sampler_name:
+ populate.sampler_index = None # prevent a warning later on
p = StableDiffusionProcessingImg2Img(**vars(populate))
imgs = []
--
cgit v1.2.1
From 185ab3cbd1330f6dbc87e5dd72cf115b3edfd153 Mon Sep 17 00:00:00 2001
From: cat
Date: Sat, 26 Nov 2022 20:07:51 +0500
Subject: Atomically rename saved image to avoid race condition with other
processes.
---
modules/images.py | 51 +++++++++++++++++++++++++++++----------------------
1 file changed, 29 insertions(+), 22 deletions(-)
(limited to 'modules')
diff --git a/modules/images.py b/modules/images.py
index b968d6a6..51b1ff9c 100644
--- a/modules/images.py
+++ b/modules/images.py
@@ -500,30 +500,39 @@ def save_image(image, path, basename, seed=None, prompt=None, extension='png', i
image = params.image
fullfn = params.filename
info = params.pnginfo.get(pnginfo_section_name, None)
- fullfn_without_extension, extension = os.path.splitext(params.filename)
- def exif_bytes():
- return piexif.dump({
- "Exif": {
- piexif.ExifIFD.UserComment: piexif.helper.UserComment.dump(info or "", encoding="unicode")
- },
- })
+ def _atomically_save_image(image_to_save, filename_without_extension, extension):
+ # save image with .tmp extension to avoid race condition when another process detects new image in the directory
+ temp_file_path = filename_without_extension + ".tmp"
+ image_format = Image.registered_extensions()[extension]
- if extension.lower() == '.png':
- pnginfo_data = PngImagePlugin.PngInfo()
- if opts.enable_pnginfo:
- for k, v in params.pnginfo.items():
- pnginfo_data.add_text(k, str(v))
+ if extension.lower() == '.png':
+ pnginfo_data = PngImagePlugin.PngInfo()
+ if opts.enable_pnginfo:
+ for k, v in params.pnginfo.items():
+ pnginfo_data.add_text(k, str(v))
- image.save(fullfn, quality=opts.jpeg_quality, pnginfo=pnginfo_data)
+ image_to_save.save(temp_file_path, format=image_format, quality=opts.jpeg_quality, pnginfo=pnginfo_data)
- elif extension.lower() in (".jpg", ".jpeg", ".webp"):
- image.save(fullfn, quality=opts.jpeg_quality)
+ elif extension.lower() in (".jpg", ".jpeg", ".webp"):
+ image_to_save.save(temp_file_path, format=image_format, quality=opts.jpeg_quality)
- if opts.enable_pnginfo and info is not None:
- piexif.insert(exif_bytes(), fullfn)
- else:
- image.save(fullfn, quality=opts.jpeg_quality)
+ if opts.enable_pnginfo and info is not None:
+ exif_bytes = piexif.dump({
+ "Exif": {
+ piexif.ExifIFD.UserComment: piexif.helper.UserComment.dump(info or "", encoding="unicode")
+ },
+ })
+
+ piexif.insert(exif_bytes, temp_file_path)
+ else:
+ image_to_save.save(temp_file_path, format=image_format, quality=opts.jpeg_quality)
+
+ # atomically rename the file with correct extension
+ os.replace(temp_file_path, filename_without_extension + extension)
+
+ fullfn_without_extension, extension = os.path.splitext(params.filename)
+ _atomically_save_image(image, fullfn_without_extension, extension)
image.already_saved_as = fullfn
@@ -537,9 +546,7 @@ def save_image(image, path, basename, seed=None, prompt=None, extension='png', i
elif oversize:
image = image.resize((image.width * target_side_length // image.height, target_side_length), LANCZOS)
- image.save(fullfn_without_extension + ".jpg", quality=opts.jpeg_quality)
- if opts.enable_pnginfo and info is not None:
- piexif.insert(exif_bytes(), fullfn_without_extension + ".jpg")
+ _atomically_save_image(image, fullfn_without_extension, ".jpg")
if opts.save_txt and info is not None:
txt_fullfn = f"{fullfn_without_extension}.txt"
--
cgit v1.2.1
From 506d529d19f135f57e142371271f84d4971b456f Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Sun, 27 Nov 2022 16:28:32 +0300
Subject: rework #5012 to also work for pictures dragged into the prompt and
also add Clip skip + ENSD to parameters
---
modules/extras.py | 40 ++++--------------------------
modules/generation_parameters_copypaste.py | 1 +
modules/images.py | 38 +++++++++++++++++++++++++++-
modules/sd_samplers.py | 2 +-
4 files changed, 44 insertions(+), 37 deletions(-)
(limited to 'modules')
diff --git a/modules/extras.py b/modules/extras.py
index 0057bf9c..6021a024 100644
--- a/modules/extras.py
+++ b/modules/extras.py
@@ -1,6 +1,8 @@
from __future__ import annotations
import math
import os
+import sys
+import traceback
import numpy as np
from PIL import Image
@@ -12,7 +14,7 @@ from typing import Callable, List, OrderedDict, Tuple
from functools import partial
from dataclasses import dataclass
-from modules import processing, shared, images, devices, sd_models
+from modules import processing, shared, images, devices, sd_models, sd_samplers
from modules.shared import opts
import modules.gfpgan_model
from modules.ui import plaintext_to_html
@@ -22,7 +24,6 @@ import piexif.helper
import gradio as gr
import safetensors.torch
-
class LruCache(OrderedDict):
@dataclass(frozen=True)
class Key:
@@ -214,39 +215,8 @@ def run_pnginfo(image):
if image is None:
return '', '', ''
- items = image.info
- geninfo = ''
-
- if "exif" in image.info:
- exif = piexif.load(image.info["exif"])
- exif_comment = (exif or {}).get("Exif", {}).get(piexif.ExifIFD.UserComment, b'')
- try:
- exif_comment = piexif.helper.UserComment.load(exif_comment)
- except ValueError:
- exif_comment = exif_comment.decode('utf8', errors="ignore")
-
- items['exif comment'] = exif_comment
- geninfo = exif_comment
-
- for field in ['jfif', 'jfif_version', 'jfif_unit', 'jfif_density', 'dpi', 'exif',
- 'loop', 'background', 'timestamp', 'duration']:
- items.pop(field, None)
-
- geninfo = items.get('parameters', geninfo)
-
- # nai prompt
- if "Software" in items.keys() and items["Software"] == "NovelAI":
- import json
- json_info = json.loads(items["Comment"])
- geninfo = f'{items["Description"]}\r\nNegative prompt: {json_info["uc"]}\r\n'
- sampler = "Euler a"
- if json_info["sampler"] == "k_euler_ancestral":
- sampler = "Euler a"
- elif json_info["sampler"] == "k_euler":
- sampler = "Euler"
- model_hash = '925997e9' # assuming this is the correct model hash
- # not sure with noise and strength parameter
- geninfo += f'Steps: {json_info["steps"]}, Sampler: {sampler}, CFG scale: {json_info["scale"]}, Seed: {json_info["seed"]}, Size: {image.width}x{image.height}, Model hash: {model_hash}' # , Denoising strength: {json_info["noise"]}'
+ geninfo, items = images.read_info_from_image(image)
+ items = {**{'parameters': geninfo}, **items}
info = ''
for key, text in items.items():
diff --git a/modules/generation_parameters_copypaste.py b/modules/generation_parameters_copypaste.py
index 1408ea05..0973c695 100644
--- a/modules/generation_parameters_copypaste.py
+++ b/modules/generation_parameters_copypaste.py
@@ -75,6 +75,7 @@ def integrate_settings_paste_fields(component_dict):
'CLIP_stop_at_last_layers': 'Clip skip',
'inpainting_mask_weight': 'Conditional mask weight',
'sd_model_checkpoint': 'Model hash',
+ 'eta_noise_seed_delta': 'ENSD',
}
settings_paste_fields = [
(component_dict[k], lambda d, k=k, v=v: ui.apply_setting(k, d.get(v, None)))
diff --git a/modules/images.py b/modules/images.py
index b968d6a6..08a72e67 100644
--- a/modules/images.py
+++ b/modules/images.py
@@ -15,6 +15,7 @@ import piexif.helper
from PIL import Image, ImageFont, ImageDraw, PngImagePlugin
from fonts.ttf import Roboto
import string
+import json
from modules import sd_samplers, shared, script_callbacks
from modules.shared import opts, cmd_opts
@@ -553,10 +554,45 @@ def save_image(image, path, basename, seed=None, prompt=None, extension='png', i
return fullfn, txt_fullfn
+def read_info_from_image(image):
+ items = image.info or {}
+
+ geninfo = items.pop('parameters', None)
+
+ if "exif" in items:
+ exif = piexif.load(items["exif"])
+ exif_comment = (exif or {}).get("Exif", {}).get(piexif.ExifIFD.UserComment, b'')
+ try:
+ exif_comment = piexif.helper.UserComment.load(exif_comment)
+ except ValueError:
+ exif_comment = exif_comment.decode('utf8', errors="ignore")
+
+ items['exif comment'] = exif_comment
+ geninfo = exif_comment
+
+ for field in ['jfif', 'jfif_version', 'jfif_unit', 'jfif_density', 'dpi', 'exif',
+ 'loop', 'background', 'timestamp', 'duration']:
+ items.pop(field, None)
+
+ if items.get("Software", None) == "NovelAI":
+ try:
+ json_info = json.loads(items["Comment"])
+ sampler = sd_samplers.samplers_map.get(json_info["sampler"], "Euler a")
+
+ geninfo = f"""{items["Description"]}
+Negative prompt: {json_info["uc"]}
+Steps: {json_info["steps"]}, Sampler: {sampler}, CFG scale: {json_info["scale"]}, Seed: {json_info["seed"]}, Size: {image.width}x{image.height}, Clip skip: 2, ENSD: 31337"""
+ except Exception:
+ print(f"Error parsing NovelAI iamge generation parameters:", file=sys.stderr)
+ print(traceback.format_exc(), file=sys.stderr)
+
+ return geninfo, items
+
+
def image_data(data):
try:
image = Image.open(io.BytesIO(data))
- textinfo = image.text["parameters"]
+ textinfo, _ = read_info_from_image(image)
return textinfo, None
except Exception:
pass
diff --git a/modules/sd_samplers.py b/modules/sd_samplers.py
index 2ca17d8b..5fefb227 100644
--- a/modules/sd_samplers.py
+++ b/modules/sd_samplers.py
@@ -18,7 +18,7 @@ from modules.script_callbacks import CFGDenoiserParams, cfg_denoiser_callback
SamplerData = namedtuple('SamplerData', ['name', 'constructor', 'aliases', 'options'])
samplers_k_diffusion = [
- ('Euler a', 'sample_euler_ancestral', ['k_euler_a'], {}),
+ ('Euler a', 'sample_euler_ancestral', ['k_euler_a', 'k_euler_ancestral'], {}),
('Euler', 'sample_euler', ['k_euler'], {}),
('LMS', 'sample_lms', ['k_lms'], {}),
('Heun', 'sample_heun', ['k_heun'], {}),
--
cgit v1.2.1
From 8c13f3a2a56b2ab8d6ff0bc5b2ffe0313e74f323 Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Sun, 27 Nov 2022 16:35:35 +0300
Subject: cherrypick from #4971
---
modules/ui.py | 1 +
1 file changed, 1 insertion(+)
(limited to 'modules')
diff --git a/modules/ui.py b/modules/ui.py
index aa13978d..446bee40 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -1044,6 +1044,7 @@ def create_ui(wrap_gradio_gpu_call):
(seed_resize_from_w, "Seed resize from-1"),
(seed_resize_from_h, "Seed resize from-2"),
(denoising_strength, "Denoising strength"),
+ (mask_blur, "Mask blur"),
*modules.scripts.scripts_img2img.infotext_fields
]
parameters_copypaste.add_paste_fields("img2img", init_img, img2img_paste_fields)
--
cgit v1.2.1
From 9146a5884cbdf67c019685950f7ad0b3f7bd9230 Mon Sep 17 00:00:00 2001
From: uservar <63248296+uservar@users.noreply.github.com>
Date: Sun, 27 Nov 2022 19:11:50 +0000
Subject: Better should hijack inpainting detection
---
modules/sd_hijack_inpainting.py | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
(limited to 'modules')
diff --git a/modules/sd_hijack_inpainting.py b/modules/sd_hijack_inpainting.py
index 938f9a58..5dcbbed9 100644
--- a/modules/sd_hijack_inpainting.py
+++ b/modules/sd_hijack_inpainting.py
@@ -1,3 +1,4 @@
+import os
import torch
from einops import repeat
@@ -319,7 +320,9 @@ class LatentInpaintDiffusion(LatentDiffusion):
def should_hijack_inpainting(checkpoint_info):
- return str(checkpoint_info.filename).endswith("inpainting.ckpt") and not checkpoint_info.config.endswith("inpainting.yaml")
+ ckpt_basename = os.path.basename(checkpoint_info.filename).lower()
+ cfg_basename = os.path.basename(checkpoint_info.config).lower()
+ return "inpainting" in ckpt_basename and not "inpainting" in cfg_basename
def do_inpainting_hijack():
--
cgit v1.2.1
From aa12dfada05a1f5bef558f24f3a318a1c293a01f Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Sun, 27 Nov 2022 23:04:42 +0300
Subject: fix the bug that makes it impossible to send images to other tabs
---
modules/generation_parameters_copypaste.py | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
(limited to 'modules')
diff --git a/modules/generation_parameters_copypaste.py b/modules/generation_parameters_copypaste.py
index 0973c695..01980dca 100644
--- a/modules/generation_parameters_copypaste.py
+++ b/modules/generation_parameters_copypaste.py
@@ -2,6 +2,8 @@ import base64
import io
import os
import re
+from pathlib import Path
+
import gradio as gr
from modules.shared import script_path
from modules import shared
@@ -35,9 +37,8 @@ def quote(text):
def image_from_url_text(filedata):
if type(filedata) == dict and filedata["is_file"]:
filename = filedata["name"]
- tempdir = os.path.normpath(tempfile.gettempdir())
- normfn = os.path.normpath(filename)
- assert normfn.startswith(tempdir), 'trying to open image file not in temporary directory'
+ is_in_right_dir = any(Path(temp_dir).resolve() in Path(filename).resolve().parents for temp_dir in shared.demo.temp_dirs)
+ assert is_in_right_dir, 'trying to open image file outside of allowed directories'
return Image.open(filename)
--
cgit v1.2.1
From bb11bee22ab02aa2fb5b96baa9be8103fff19e6a Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Sun, 27 Nov 2022 23:14:13 +0300
Subject: if image on disk was deleted between being generated and request
being completed, do use temporary dir to store it for the browser
---
modules/ui_tempdir.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
(limited to 'modules')
diff --git a/modules/ui_tempdir.py b/modules/ui_tempdir.py
index 9c6d3a9d..07210d14 100644
--- a/modules/ui_tempdir.py
+++ b/modules/ui_tempdir.py
@@ -14,7 +14,7 @@ Savedfile = namedtuple("Savedfile", ["name"])
def save_pil_to_file(pil_image, dir=None):
already_saved_as = getattr(pil_image, 'already_saved_as', None)
- if already_saved_as:
+ if already_saved_as and os.path.isfile(already_saved_as):
shared.demo.temp_dirs = shared.demo.temp_dirs | {os.path.abspath(os.path.dirname(already_saved_as))}
file_obj = Savedfile(already_saved_as)
return file_obj
--
cgit v1.2.1
From 0376da180c81a11880a2587903d69d85541051e7 Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Mon, 28 Nov 2022 08:39:59 +0300
Subject: make it possible to save nai model using safetensors
---
modules/sd_models.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
(limited to 'modules')
diff --git a/modules/sd_models.py b/modules/sd_models.py
index a1ea5611..283cf1cd 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -144,8 +144,8 @@ def transform_checkpoint_dict_key(k):
def get_state_dict_from_checkpoint(pl_sd):
- if "state_dict" in pl_sd:
- pl_sd = pl_sd["state_dict"]
+ pl_sd = pl_sd.pop("state_dict", pl_sd)
+ pl_sd.pop("state_dict", None)
sd = {}
for k, v in pl_sd.items():
--
cgit v1.2.1
From 0b5dcb3d7ce397ad38312dbfc70febe7bb42dcc3 Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Mon, 28 Nov 2022 09:00:10 +0300
Subject: fix an error that happens when you type into prompt while switching
model, put queue stuff into separate file
---
modules/call_queue.py | 98 +++++++++++++++++++++++++++++++++++++++++++++++++++
modules/ui.py | 67 ++---------------------------------
2 files changed, 101 insertions(+), 64 deletions(-)
create mode 100644 modules/call_queue.py
(limited to 'modules')
diff --git a/modules/call_queue.py b/modules/call_queue.py
new file mode 100644
index 00000000..4cd49533
--- /dev/null
+++ b/modules/call_queue.py
@@ -0,0 +1,98 @@
+import html
+import sys
+import threading
+import traceback
+import time
+
+from modules import shared
+
+queue_lock = threading.Lock()
+
+
+def wrap_queued_call(func):
+ def f(*args, **kwargs):
+ with queue_lock:
+ res = func(*args, **kwargs)
+
+ return res
+
+ return f
+
+
+def wrap_gradio_gpu_call(func, extra_outputs=None):
+ def f(*args, **kwargs):
+
+ shared.state.begin()
+
+ with queue_lock:
+ res = func(*args, **kwargs)
+
+ shared.state.end()
+
+ return res
+
+ return wrap_gradio_call(f, extra_outputs=extra_outputs, add_stats=True)
+
+
+def wrap_gradio_call(func, extra_outputs=None, add_stats=False):
+ def f(*args, extra_outputs_array=extra_outputs, **kwargs):
+ run_memmon = shared.opts.memmon_poll_rate > 0 and not shared.mem_mon.disabled and add_stats
+ if run_memmon:
+ shared.mem_mon.monitor()
+ t = time.perf_counter()
+
+ try:
+ res = list(func(*args, **kwargs))
+ except Exception as e:
+ # When printing out our debug argument list, do not print out more than a MB of text
+ max_debug_str_len = 131072 # (1024*1024)/8
+
+ print("Error completing request", file=sys.stderr)
+ argStr = f"Arguments: {str(args)} {str(kwargs)}"
+ print(argStr[:max_debug_str_len], file=sys.stderr)
+ if len(argStr) > max_debug_str_len:
+ print(f"(Argument list truncated at {max_debug_str_len}/{len(argStr)} characters)", file=sys.stderr)
+
+ print(traceback.format_exc(), file=sys.stderr)
+
+ shared.state.job = ""
+ shared.state.job_count = 0
+
+ if extra_outputs_array is None:
+ extra_outputs_array = [None, '']
+
+ res = extra_outputs_array + [f"{html.escape(type(e).__name__+': '+str(e))}
"]
+
+ shared.state.skipped = False
+ shared.state.interrupted = False
+ shared.state.job_count = 0
+
+ if not add_stats:
+ return tuple(res)
+
+ elapsed = time.perf_counter() - t
+ elapsed_m = int(elapsed // 60)
+ elapsed_s = elapsed % 60
+ elapsed_text = f"{elapsed_s:.2f}s"
+ if elapsed_m > 0:
+ elapsed_text = f"{elapsed_m}m "+elapsed_text
+
+ if run_memmon:
+ mem_stats = {k: -(v//-(1024*1024)) for k, v in shared.mem_mon.stop().items()}
+ active_peak = mem_stats['active_peak']
+ reserved_peak = mem_stats['reserved_peak']
+ sys_peak = mem_stats['system_peak']
+ sys_total = mem_stats['total']
+ sys_pct = round(sys_peak/max(sys_total, 1) * 100, 2)
+
+ vram_html = f"Torch active/reserved: {active_peak}/{reserved_peak} MiB, Sys VRAM: {sys_peak}/{sys_total} MiB ({sys_pct}%)
"
+ else:
+ vram_html = ''
+
+ # last item is always HTML
+ res[-1] += f""
+
+ return tuple(res)
+
+ return f
+
diff --git a/modules/ui.py b/modules/ui.py
index 446bee40..00809361 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -17,7 +17,7 @@ import gradio.routes
import gradio.utils
import numpy as np
from PIL import Image, PngImagePlugin
-
+from modules.call_queue import wrap_gradio_gpu_call, wrap_queued_call, wrap_gradio_call
from modules import sd_hijack, sd_models, localization, script_callbacks, ui_extensions, deepbooru
from modules.paths import script_path
@@ -158,67 +158,6 @@ def save_files(js_data, images, do_make_zip, index):
return gr.File.update(value=fullfns, visible=True), '', '', plaintext_to_html(f"Saved: {filenames[0]}")
-def wrap_gradio_call(func, extra_outputs=None, add_stats=False):
- def f(*args, extra_outputs_array=extra_outputs, **kwargs):
- run_memmon = opts.memmon_poll_rate > 0 and not shared.mem_mon.disabled and add_stats
- if run_memmon:
- shared.mem_mon.monitor()
- t = time.perf_counter()
-
- try:
- res = list(func(*args, **kwargs))
- except Exception as e:
- # When printing out our debug argument list, do not print out more than a MB of text
- max_debug_str_len = 131072 # (1024*1024)/8
-
- print("Error completing request", file=sys.stderr)
- argStr = f"Arguments: {str(args)} {str(kwargs)}"
- print(argStr[:max_debug_str_len], file=sys.stderr)
- if len(argStr) > max_debug_str_len:
- print(f"(Argument list truncated at {max_debug_str_len}/{len(argStr)} characters)", file=sys.stderr)
-
- print(traceback.format_exc(), file=sys.stderr)
-
- shared.state.job = ""
- shared.state.job_count = 0
-
- if extra_outputs_array is None:
- extra_outputs_array = [None, '']
-
- res = extra_outputs_array + [f"{plaintext_to_html(type(e).__name__+': '+str(e))}
"]
-
- shared.state.skipped = False
- shared.state.interrupted = False
- shared.state.job_count = 0
-
- if not add_stats:
- return tuple(res)
-
- elapsed = time.perf_counter() - t
- elapsed_m = int(elapsed // 60)
- elapsed_s = elapsed % 60
- elapsed_text = f"{elapsed_s:.2f}s"
- if elapsed_m > 0:
- elapsed_text = f"{elapsed_m}m "+elapsed_text
-
- if run_memmon:
- mem_stats = {k: -(v//-(1024*1024)) for k, v in shared.mem_mon.stop().items()}
- active_peak = mem_stats['active_peak']
- reserved_peak = mem_stats['reserved_peak']
- sys_peak = mem_stats['system_peak']
- sys_total = mem_stats['total']
- sys_pct = round(sys_peak/max(sys_total, 1) * 100, 2)
-
- vram_html = f"Torch active/reserved: {active_peak}/{reserved_peak} MiB, Sys VRAM: {sys_peak}/{sys_total} MiB ({sys_pct}%)
"
- else:
- vram_html = ''
-
- # last item is always HTML
- res[-1] += f""
-
- return tuple(res)
-
- return f
def calc_time_left(progress, threshold, label, force_display):
@@ -666,7 +605,7 @@ Requested path was: {f}
return result_gallery, generation_info if tabname != "extras" else html_info_x, html_info
-def create_ui(wrap_gradio_gpu_call):
+def create_ui():
import modules.img2img
import modules.txt2img
@@ -826,7 +765,7 @@ def create_ui(wrap_gradio_gpu_call):
height,
]
- token_button.click(fn=update_token_counter, inputs=[txt2img_prompt, steps], outputs=[token_counter])
+ token_button.click(fn=wrap_queued_call(update_token_counter), inputs=[txt2img_prompt, steps], outputs=[token_counter])
modules.scripts.scripts_current = modules.scripts.scripts_img2img
modules.scripts.scripts_img2img.initialize_scripts(is_img2img=True)
--
cgit v1.2.1
From 67efee33a6c65e58b3f6c788993d0e68a33e4fd0 Mon Sep 17 00:00:00 2001
From: klimaleksus
Date: Mon, 28 Nov 2022 16:29:43 +0500
Subject: Make VAE step sequential to prevent VRAM spikes
---
modules/processing.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
(limited to 'modules')
diff --git a/modules/processing.py b/modules/processing.py
index edceb532..fd995b8a 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -530,8 +530,8 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
with devices.autocast():
samples_ddim = p.sample(conditioning=c, unconditional_conditioning=uc, seeds=seeds, subseeds=subseeds, subseed_strength=p.subseed_strength, prompts=prompts)
- samples_ddim = samples_ddim.to(devices.dtype_vae)
- x_samples_ddim = decode_first_stage(p.sd_model, samples_ddim)
+ x_samples_ddim = [decode_first_stage(p.sd_model, samples_ddim[i:i+1].to(dtype=devices.dtype_vae))[0].cpu() for i in range(samples_ddim.size(0))]
+ x_samples_ddim = torch.stack(x_samples_ddim).float()
x_samples_ddim = torch.clamp((x_samples_ddim + 1.0) / 2.0, min=0.0, max=1.0)
del samples_ddim
--
cgit v1.2.1
From 98ca437edfbf71dd956d67d37f2136b12d13be0d Mon Sep 17 00:00:00 2001
From: brkirch
Date: Sat, 12 Nov 2022 02:17:55 -0500
Subject: Refactor and instead check if mps is being used, not availability
---
modules/sd_hijack.py | 6 +-----
1 file changed, 1 insertion(+), 5 deletions(-)
(limited to 'modules')
diff --git a/modules/sd_hijack.py b/modules/sd_hijack.py
index b824b5bf..ce583950 100644
--- a/modules/sd_hijack.py
+++ b/modules/sd_hijack.py
@@ -182,11 +182,7 @@ def register_buffer(self, name, attr):
if type(attr) == torch.Tensor:
if attr.device != devices.device:
-
- if devices.has_mps():
- attr = attr.to(device="mps", dtype=torch.float32)
- else:
- attr = attr.to(devices.device)
+ attr = attr.to(device=devices.device, dtype=(torch.float32 if devices.device.type == 'mps' else None))
setattr(self, name, attr)
--
cgit v1.2.1
From 75c4511e6b81ae8fb0dbd932043e8eb35cd09f72 Mon Sep 17 00:00:00 2001
From: zhaohu xing <920232796@qq.com>
Date: Tue, 29 Nov 2022 10:28:41 +0800
Subject: add AltDiffusion to webui
Signed-off-by: zhaohu xing <920232796@qq.com>
---
modules/devices.py | 4 ++--
modules/sd_hijack.py | 23 +++++++++++++++++------
modules/shared.py | 6 +++++-
3 files changed, 24 insertions(+), 9 deletions(-)
(limited to 'modules')
diff --git a/modules/devices.py b/modules/devices.py
index 67165bf6..f30b6ebc 100644
--- a/modules/devices.py
+++ b/modules/devices.py
@@ -36,8 +36,8 @@ def get_optimal_device():
else:
return torch.device("cuda")
- if has_mps():
- return torch.device("mps")
+ # if has_mps():
+ # return torch.device("mps")
return cpu
diff --git a/modules/sd_hijack.py b/modules/sd_hijack.py
index eaedac13..26280fe4 100644
--- a/modules/sd_hijack.py
+++ b/modules/sd_hijack.py
@@ -70,14 +70,19 @@ class StableDiffusionModelHijack:
embedding_db = modules.textual_inversion.textual_inversion.EmbeddingDatabase(cmd_opts.embeddings_dir)
def hijack(self, m):
- model_embeddings = m.cond_stage_model.transformer.text_model.embeddings
+
+ if shared.text_model_name == "XLMR-Large":
+ model_embeddings = m.cond_stage_model.roberta.embeddings
+ model_embeddings.token_embedding = EmbeddingsWithFixes(model_embeddings.word_embeddings, self)
+ else :
+ model_embeddings = m.cond_stage_model.transformer.text_model.embeddings
+ model_embeddings.token_embedding = EmbeddingsWithFixes(model_embeddings.token_embeddings, self)
- model_embeddings.token_embedding = EmbeddingsWithFixes(model_embeddings.token_embedding, self)
m.cond_stage_model = FrozenCLIPEmbedderWithCustomWords(m.cond_stage_model, self)
self.clip = m.cond_stage_model
- apply_optimizations()
+ # apply_optimizations()
def flatten(el):
flattened = [flatten(children) for children in el.children()]
@@ -125,8 +130,11 @@ class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module):
self.tokenizer = wrapped.tokenizer
self.token_mults = {}
- self.comma_token = [v for k, v in self.tokenizer.get_vocab().items() if k == ','][0]
-
+ try:
+ self.comma_token = [v for k, v in self.tokenizer.get_vocab().items() if k == ','][0]
+ except:
+ self.comma_token = None
+
tokens_with_parens = [(k, v) for k, v in self.tokenizer.get_vocab().items() if '(' in k or ')' in k or '[' in k or ']' in k]
for text, ident in tokens_with_parens:
mult = 1.0
@@ -298,6 +306,9 @@ class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module):
return batch_multipliers, remade_batch_tokens, used_custom_terms, hijack_comments, hijack_fixes, token_count
def forward(self, text):
+ if shared.text_model_name == "XLMR-Large":
+ return self.wrapped.encode(text)
+
use_old = opts.use_old_emphasis_implementation
if use_old:
batch_multipliers, remade_batch_tokens, used_custom_terms, hijack_comments, hijack_fixes, token_count = self.process_text_old(text)
@@ -359,7 +370,7 @@ class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module):
z = self.wrapped.transformer.text_model.final_layer_norm(z)
else:
z = outputs.last_hidden_state
-
+
# restoring original mean is likely not correct, but it seems to work well to prevent artifacts that happen otherwise
batch_multipliers_of_same_length = [x + [1.0] * (75 - len(x)) for x in batch_multipliers]
batch_multipliers = torch.asarray(batch_multipliers_of_same_length).to(device)
diff --git a/modules/shared.py b/modules/shared.py
index c93ae2a3..9941d2f4 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -21,7 +21,7 @@ from modules.paths import models_path, script_path, sd_path
sd_model_file = os.path.join(script_path, 'model.ckpt')
default_sd_model_file = sd_model_file
parser = argparse.ArgumentParser()
-parser.add_argument("--config", type=str, default=os.path.join(sd_path, "configs/stable-diffusion/v1-inference.yaml"), help="path to config which constructs model",)
+parser.add_argument("--config", type=str, default="configs/altdiffusion/ad-inference.yaml", help="path to config which constructs model",)
parser.add_argument("--ckpt", type=str, default=sd_model_file, help="path to checkpoint of stable diffusion model; if specified, this checkpoint will be added to the list of checkpoints and loaded",)
parser.add_argument("--ckpt-dir", type=str, default=None, help="Path to directory with stable diffusion checkpoints")
parser.add_argument("--gfpgan-dir", type=str, help="GFPGAN directory", default=('./src/gfpgan' if os.path.exists('./src/gfpgan') else './GFPGAN'))
@@ -106,6 +106,10 @@ restricted_opts = {
"outdir_txt2img_grids",
"outdir_save",
}
+from omegaconf import OmegaConf
+config = OmegaConf.load(f"{cmd_opts.config}")
+# XLMR-Large
+text_model_name = config.model.params.cond_stage_config.params.name
cmd_opts.disable_extension_access = (cmd_opts.share or cmd_opts.listen or cmd_opts.server_name) and not cmd_opts.enable_insecure_extension_access
--
cgit v1.2.1
From 9a8678f61eff172811498a682c171399b7216e12 Mon Sep 17 00:00:00 2001
From: Billy Cao
Date: Tue, 29 Nov 2022 11:11:29 +0800
Subject: Support changing checkpoint and vae through override_settings
---
modules/processing.py | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
(limited to 'modules')
diff --git a/modules/processing.py b/modules/processing.py
index edceb532..a5c72e3d 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -20,6 +20,8 @@ import modules.shared as shared
import modules.face_restoration
import modules.images as images
import modules.styles
+import modules.sd_models as sd_models
+import modules.sd_vae as sd_vae
import logging
@@ -424,8 +426,10 @@ def process_images(p: StableDiffusionProcessing) -> Processed:
try:
for k, v in p.override_settings.items():
- setattr(opts, k, v) # we don't call onchange for simplicity which makes changing model impossible
- if k == 'sd_hypernetwork': shared.reload_hypernetworks() # make onchange call for changing hypernet since it is relatively fast to load on-change, while SD models are not
+ setattr(opts, k, v)
+ if k == 'sd_hypernetwork': shared.reload_hypernetworks() # make onchange call for changing hypernet
+ if k == 'sd_model_checkpoint': sd_models.reload_model_weights() # make onchange call for changing SD model
+ if k == 'sd_vae': sd_vae.reload_vae_weights() # make onchange call for changing VAE
res = process_images_inner(p)
@@ -433,6 +437,8 @@ def process_images(p: StableDiffusionProcessing) -> Processed:
for k, v in stored_opts.items():
setattr(opts, k, v)
if k == 'sd_hypernetwork': shared.reload_hypernetworks()
+ if k == 'sd_model_checkpoint': sd_models.reload_model_weights()
+ if k == 'sd_vae': sd_vae.reload_vae_weights()
return res
--
cgit v1.2.1
From 241cbc4d2fed71d33e5dc62c3d61b63a54e9d790 Mon Sep 17 00:00:00 2001
From: wywywywy
Date: Tue, 29 Nov 2022 17:38:16 +0000
Subject: Hijack VQModelInterface back to AutoEncoder
---
modules/sd_hijack_autoencoder.py | 282 +++++++++++++++++++++++++++++++++++++++
1 file changed, 282 insertions(+)
create mode 100644 modules/sd_hijack_autoencoder.py
(limited to 'modules')
diff --git a/modules/sd_hijack_autoencoder.py b/modules/sd_hijack_autoencoder.py
new file mode 100644
index 00000000..ffa72f90
--- /dev/null
+++ b/modules/sd_hijack_autoencoder.py
@@ -0,0 +1,282 @@
+import torch
+import pytorch_lightning as pl
+import torch.nn.functional as F
+from contextlib import contextmanager
+from taming.modules.vqvae.quantize import VectorQuantizer2 as VectorQuantizer
+from ldm.modules.diffusionmodules.model import Encoder, Decoder
+from ldm.util import instantiate_from_config
+
+import ldm.models.autoencoder
+
+class VQModel(pl.LightningModule):
+ def __init__(self,
+ ddconfig,
+ lossconfig,
+ n_embed,
+ embed_dim,
+ ckpt_path=None,
+ ignore_keys=[],
+ image_key="image",
+ colorize_nlabels=None,
+ monitor=None,
+ batch_resize_range=None,
+ scheduler_config=None,
+ lr_g_factor=1.0,
+ remap=None,
+ sane_index_shape=False, # tell vector quantizer to return indices as bhw
+ use_ema=False
+ ):
+ super().__init__()
+ self.embed_dim = embed_dim
+ self.n_embed = n_embed
+ self.image_key = image_key
+ self.encoder = Encoder(**ddconfig)
+ self.decoder = Decoder(**ddconfig)
+ self.loss = instantiate_from_config(lossconfig)
+ self.quantize = VectorQuantizer(n_embed, embed_dim, beta=0.25,
+ remap=remap,
+ sane_index_shape=sane_index_shape)
+ self.quant_conv = torch.nn.Conv2d(ddconfig["z_channels"], embed_dim, 1)
+ self.post_quant_conv = torch.nn.Conv2d(embed_dim, ddconfig["z_channels"], 1)
+ if colorize_nlabels is not None:
+ assert type(colorize_nlabels)==int
+ self.register_buffer("colorize", torch.randn(3, colorize_nlabels, 1, 1))
+ if monitor is not None:
+ self.monitor = monitor
+ self.batch_resize_range = batch_resize_range
+ if self.batch_resize_range is not None:
+ print(f"{self.__class__.__name__}: Using per-batch resizing in range {batch_resize_range}.")
+
+ self.use_ema = use_ema
+ if self.use_ema:
+ self.model_ema = LitEma(self)
+ print(f"Keeping EMAs of {len(list(self.model_ema.buffers()))}.")
+
+ if ckpt_path is not None:
+ self.init_from_ckpt(ckpt_path, ignore_keys=ignore_keys)
+ self.scheduler_config = scheduler_config
+ self.lr_g_factor = lr_g_factor
+
+ @contextmanager
+ def ema_scope(self, context=None):
+ if self.use_ema:
+ self.model_ema.store(self.parameters())
+ self.model_ema.copy_to(self)
+ if context is not None:
+ print(f"{context}: Switched to EMA weights")
+ try:
+ yield None
+ finally:
+ if self.use_ema:
+ self.model_ema.restore(self.parameters())
+ if context is not None:
+ print(f"{context}: Restored training weights")
+
+ def init_from_ckpt(self, path, ignore_keys=list()):
+ sd = torch.load(path, map_location="cpu")["state_dict"]
+ keys = list(sd.keys())
+ for k in keys:
+ for ik in ignore_keys:
+ if k.startswith(ik):
+ print("Deleting key {} from state_dict.".format(k))
+ del sd[k]
+ missing, unexpected = self.load_state_dict(sd, strict=False)
+ print(f"Restored from {path} with {len(missing)} missing and {len(unexpected)} unexpected keys")
+ if len(missing) > 0:
+ print(f"Missing Keys: {missing}")
+ print(f"Unexpected Keys: {unexpected}")
+
+ def on_train_batch_end(self, *args, **kwargs):
+ if self.use_ema:
+ self.model_ema(self)
+
+ def encode(self, x):
+ h = self.encoder(x)
+ h = self.quant_conv(h)
+ quant, emb_loss, info = self.quantize(h)
+ return quant, emb_loss, info
+
+ def encode_to_prequant(self, x):
+ h = self.encoder(x)
+ h = self.quant_conv(h)
+ return h
+
+ def decode(self, quant):
+ quant = self.post_quant_conv(quant)
+ dec = self.decoder(quant)
+ return dec
+
+ def decode_code(self, code_b):
+ quant_b = self.quantize.embed_code(code_b)
+ dec = self.decode(quant_b)
+ return dec
+
+ def forward(self, input, return_pred_indices=False):
+ quant, diff, (_,_,ind) = self.encode(input)
+ dec = self.decode(quant)
+ if return_pred_indices:
+ return dec, diff, ind
+ return dec, diff
+
+ def get_input(self, batch, k):
+ x = batch[k]
+ if len(x.shape) == 3:
+ x = x[..., None]
+ x = x.permute(0, 3, 1, 2).to(memory_format=torch.contiguous_format).float()
+ if self.batch_resize_range is not None:
+ lower_size = self.batch_resize_range[0]
+ upper_size = self.batch_resize_range[1]
+ if self.global_step <= 4:
+ # do the first few batches with max size to avoid later oom
+ new_resize = upper_size
+ else:
+ new_resize = np.random.choice(np.arange(lower_size, upper_size+16, 16))
+ if new_resize != x.shape[2]:
+ x = F.interpolate(x, size=new_resize, mode="bicubic")
+ x = x.detach()
+ return x
+
+ def training_step(self, batch, batch_idx, optimizer_idx):
+ # https://github.com/pytorch/pytorch/issues/37142
+ # try not to fool the heuristics
+ x = self.get_input(batch, self.image_key)
+ xrec, qloss, ind = self(x, return_pred_indices=True)
+
+ if optimizer_idx == 0:
+ # autoencode
+ aeloss, log_dict_ae = self.loss(qloss, x, xrec, optimizer_idx, self.global_step,
+ last_layer=self.get_last_layer(), split="train",
+ predicted_indices=ind)
+
+ self.log_dict(log_dict_ae, prog_bar=False, logger=True, on_step=True, on_epoch=True)
+ return aeloss
+
+ if optimizer_idx == 1:
+ # discriminator
+ discloss, log_dict_disc = self.loss(qloss, x, xrec, optimizer_idx, self.global_step,
+ last_layer=self.get_last_layer(), split="train")
+ self.log_dict(log_dict_disc, prog_bar=False, logger=True, on_step=True, on_epoch=True)
+ return discloss
+
+ def validation_step(self, batch, batch_idx):
+ log_dict = self._validation_step(batch, batch_idx)
+ with self.ema_scope():
+ log_dict_ema = self._validation_step(batch, batch_idx, suffix="_ema")
+ return log_dict
+
+ def _validation_step(self, batch, batch_idx, suffix=""):
+ x = self.get_input(batch, self.image_key)
+ xrec, qloss, ind = self(x, return_pred_indices=True)
+ aeloss, log_dict_ae = self.loss(qloss, x, xrec, 0,
+ self.global_step,
+ last_layer=self.get_last_layer(),
+ split="val"+suffix,
+ predicted_indices=ind
+ )
+
+ discloss, log_dict_disc = self.loss(qloss, x, xrec, 1,
+ self.global_step,
+ last_layer=self.get_last_layer(),
+ split="val"+suffix,
+ predicted_indices=ind
+ )
+ rec_loss = log_dict_ae[f"val{suffix}/rec_loss"]
+ self.log(f"val{suffix}/rec_loss", rec_loss,
+ prog_bar=True, logger=True, on_step=False, on_epoch=True, sync_dist=True)
+ self.log(f"val{suffix}/aeloss", aeloss,
+ prog_bar=True, logger=True, on_step=False, on_epoch=True, sync_dist=True)
+ if version.parse(pl.__version__) >= version.parse('1.4.0'):
+ del log_dict_ae[f"val{suffix}/rec_loss"]
+ self.log_dict(log_dict_ae)
+ self.log_dict(log_dict_disc)
+ return self.log_dict
+
+ def configure_optimizers(self):
+ lr_d = self.learning_rate
+ lr_g = self.lr_g_factor*self.learning_rate
+ print("lr_d", lr_d)
+ print("lr_g", lr_g)
+ opt_ae = torch.optim.Adam(list(self.encoder.parameters())+
+ list(self.decoder.parameters())+
+ list(self.quantize.parameters())+
+ list(self.quant_conv.parameters())+
+ list(self.post_quant_conv.parameters()),
+ lr=lr_g, betas=(0.5, 0.9))
+ opt_disc = torch.optim.Adam(self.loss.discriminator.parameters(),
+ lr=lr_d, betas=(0.5, 0.9))
+
+ if self.scheduler_config is not None:
+ scheduler = instantiate_from_config(self.scheduler_config)
+
+ print("Setting up LambdaLR scheduler...")
+ scheduler = [
+ {
+ 'scheduler': LambdaLR(opt_ae, lr_lambda=scheduler.schedule),
+ 'interval': 'step',
+ 'frequency': 1
+ },
+ {
+ 'scheduler': LambdaLR(opt_disc, lr_lambda=scheduler.schedule),
+ 'interval': 'step',
+ 'frequency': 1
+ },
+ ]
+ return [opt_ae, opt_disc], scheduler
+ return [opt_ae, opt_disc], []
+
+ def get_last_layer(self):
+ return self.decoder.conv_out.weight
+
+ def log_images(self, batch, only_inputs=False, plot_ema=False, **kwargs):
+ log = dict()
+ x = self.get_input(batch, self.image_key)
+ x = x.to(self.device)
+ if only_inputs:
+ log["inputs"] = x
+ return log
+ xrec, _ = self(x)
+ if x.shape[1] > 3:
+ # colorize with random projection
+ assert xrec.shape[1] > 3
+ x = self.to_rgb(x)
+ xrec = self.to_rgb(xrec)
+ log["inputs"] = x
+ log["reconstructions"] = xrec
+ if plot_ema:
+ with self.ema_scope():
+ xrec_ema, _ = self(x)
+ if x.shape[1] > 3: xrec_ema = self.to_rgb(xrec_ema)
+ log["reconstructions_ema"] = xrec_ema
+ return log
+
+ def to_rgb(self, x):
+ assert self.image_key == "segmentation"
+ if not hasattr(self, "colorize"):
+ self.register_buffer("colorize", torch.randn(3, x.shape[1], 1, 1).to(x))
+ x = F.conv2d(x, weight=self.colorize)
+ x = 2.*(x-x.min())/(x.max()-x.min()) - 1.
+ return x
+
+
+class VQModelInterface(VQModel):
+ def __init__(self, embed_dim, *args, **kwargs):
+ super().__init__(embed_dim=embed_dim, *args, **kwargs)
+ self.embed_dim = embed_dim
+
+ def encode(self, x):
+ h = self.encoder(x)
+ h = self.quant_conv(h)
+ return h
+
+ def decode(self, h, force_not_quantize=False):
+ # also go through quantization layer
+ if not force_not_quantize:
+ quant, emb_loss, info = self.quantize(h)
+ else:
+ quant = h
+ quant = self.post_quant_conv(quant)
+ dec = self.decoder(quant)
+ return dec
+
+setattr(ldm.models.autoencoder, "VQModel", VQModel)
+setattr(ldm.models.autoencoder, "VQModelInterface", VQModelInterface)
--
cgit v1.2.1
From 36c3613d16c523e43ec4dedbcbe9a3b93ad7d139 Mon Sep 17 00:00:00 2001
From: wywywywy
Date: Tue, 29 Nov 2022 17:40:02 +0000
Subject: Add autoencoder to sd_hijack
---
modules/sd_hijack.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
(limited to 'modules')
diff --git a/modules/sd_hijack.py b/modules/sd_hijack.py
index b824b5bf..26f9b951 100644
--- a/modules/sd_hijack.py
+++ b/modules/sd_hijack.py
@@ -11,7 +11,7 @@ import modules.textual_inversion.textual_inversion
from modules import prompt_parser, devices, sd_hijack_optimizations, shared, sd_hijack_checkpoint
from modules.hypernetworks import hypernetwork
from modules.shared import opts, device, cmd_opts
-from modules import sd_hijack_clip, sd_hijack_open_clip
+from modules import sd_hijack_clip, sd_hijack_open_clip, sd_hijack_autoencoder
from modules.sd_hijack_optimizations import invokeAI_mps_available
--
cgit v1.2.1
From 7193814cf7b052fe52596cab89c5f0fd95823950 Mon Sep 17 00:00:00 2001
From: wywywywy
Date: Tue, 29 Nov 2022 19:22:53 +0000
Subject: Added purpose of this hijack to comments
---
modules/sd_hijack_autoencoder.py | 4 ++++
1 file changed, 4 insertions(+)
(limited to 'modules')
diff --git a/modules/sd_hijack_autoencoder.py b/modules/sd_hijack_autoencoder.py
index ffa72f90..8e03c7f8 100644
--- a/modules/sd_hijack_autoencoder.py
+++ b/modules/sd_hijack_autoencoder.py
@@ -1,3 +1,7 @@
+# The content of this file comes from the ldm/models/autoencoder.py file of the compvis/stable-diffusion repo
+# The VQModel & VQModelInterface were subsequently removed from ldm/models/autoencoder.py when we moved to the stability-ai/stablediffusion repo
+# As the LDSR upscaler relies on VQModel & VQModelInterface, the hijack aims to put them back into the ldm.models.autoencoder
+
import torch
import pytorch_lightning as pl
import torch.nn.functional as F
--
cgit v1.2.1
From 52cc83d36b7663a77b79fd2258d2ca871af73e55 Mon Sep 17 00:00:00 2001
From: zhaohu xing <920232796@qq.com>
Date: Wed, 30 Nov 2022 14:56:12 +0800
Subject: fix bugs
Signed-off-by: zhaohu xing <920232796@qq.com>
---
modules/sd_hijack.py | 15 ++---
modules/sd_hijack_clip.py | 10 +++-
modules/xlmr.py | 137 ++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 153 insertions(+), 9 deletions(-)
create mode 100644 modules/xlmr.py
(limited to 'modules')
diff --git a/modules/sd_hijack.py b/modules/sd_hijack.py
index 3ec3f98a..edb8b420 100644
--- a/modules/sd_hijack.py
+++ b/modules/sd_hijack.py
@@ -28,7 +28,7 @@ diffusionmodules_model_AttnBlock_forward = ldm.modules.diffusionmodules.model.At
# new memory efficient cross attention blocks do not support hypernets and we already
# have memory efficient cross attention anyway, so this disables SD2.0's memory efficient cross attention
ldm.modules.attention.MemoryEfficientCrossAttention = ldm.modules.attention.CrossAttention
-ldm.modules.attention.BasicTransformerBlock.ATTENTION_MODES["softmax-xformers"] = ldm.modules.attention.CrossAttention
+# ldm.modules.attention.BasicTransformerBlock.ATTENTION_MODES["softmax-xformers"] = ldm.modules.attention.CrossAttention
# silence new console spam from SD2
ldm.modules.attention.print = lambda *args: None
@@ -82,7 +82,12 @@ class StableDiffusionModelHijack:
def hijack(self, m):
- if type(m.cond_stage_model) == ldm.modules.encoders.modules.FrozenCLIPEmbedder:
+ if shared.text_model_name == "XLMR-Large":
+ model_embeddings = m.cond_stage_model.roberta.embeddings
+ model_embeddings.token_embedding = EmbeddingsWithFixes(model_embeddings.word_embeddings, self)
+ m.cond_stage_model = sd_hijack_clip.FrozenCLIPEmbedderWithCustomWords(m.cond_stage_model, self)
+
+ elif type(m.cond_stage_model) == ldm.modules.encoders.modules.FrozenCLIPEmbedder:
model_embeddings = m.cond_stage_model.transformer.text_model.embeddings
model_embeddings.token_embedding = EmbeddingsWithFixes(model_embeddings.token_embedding, self)
m.cond_stage_model = sd_hijack_clip.FrozenCLIPEmbedderWithCustomWords(m.cond_stage_model, self)
@@ -91,11 +96,7 @@ class StableDiffusionModelHijack:
m.cond_stage_model.model.token_embedding = EmbeddingsWithFixes(m.cond_stage_model.model.token_embedding, self)
m.cond_stage_model = sd_hijack_open_clip.FrozenOpenCLIPEmbedderWithCustomWords(m.cond_stage_model, self)
apply_optimizations()
- elif shared.text_model_name == "XLMR-Large":
- model_embeddings = m.cond_stage_model.roberta.embeddings
- model_embeddings.token_embedding = EmbeddingsWithFixes(model_embeddings.word_embeddings, self)
- m.cond_stage_model = sd_hijack_clip.FrozenCLIPEmbedderWithCustomWords(m.cond_stage_model, self)
-
+
self.clip = m.cond_stage_model
fix_checkpoint()
diff --git a/modules/sd_hijack_clip.py b/modules/sd_hijack_clip.py
index b451d1cf..9ea6e1ce 100644
--- a/modules/sd_hijack_clip.py
+++ b/modules/sd_hijack_clip.py
@@ -4,7 +4,7 @@ import torch
from modules import prompt_parser, devices
from modules.shared import opts
-
+import modules.shared as shared
def get_target_prompt_token_count(token_count):
return math.ceil(max(token_count, 1) / 75) * 75
@@ -177,6 +177,9 @@ class FrozenCLIPEmbedderWithCustomWordsBase(torch.nn.Module):
return batch_multipliers, remade_batch_tokens, used_custom_terms, hijack_comments, hijack_fixes, token_count
def forward(self, text):
+ if shared.text_model_name == "XLMR-Large":
+ return self.wrapped.encode(text)
+
use_old = opts.use_old_emphasis_implementation
if use_old:
batch_multipliers, remade_batch_tokens, used_custom_terms, hijack_comments, hijack_fixes, token_count = self.process_text_old(text)
@@ -254,7 +257,10 @@ class FrozenCLIPEmbedderWithCustomWords(FrozenCLIPEmbedderWithCustomWordsBase):
def __init__(self, wrapped, hijack):
super().__init__(wrapped, hijack)
self.tokenizer = wrapped.tokenizer
- self.comma_token = [v for k, v in self.tokenizer.get_vocab().items() if k == ','][0]
+ if shared.text_model_name == "XLMR-Large":
+ self.comma_token = None
+ else :
+ self.comma_token = [v for k, v in self.tokenizer.get_vocab().items() if k == ','][0]
self.token_mults = {}
tokens_with_parens = [(k, v) for k, v in self.tokenizer.get_vocab().items() if '(' in k or ')' in k or '[' in k or ']' in k]
diff --git a/modules/xlmr.py b/modules/xlmr.py
new file mode 100644
index 00000000..beab3fdf
--- /dev/null
+++ b/modules/xlmr.py
@@ -0,0 +1,137 @@
+from transformers import BertPreTrainedModel,BertModel,BertConfig
+import torch.nn as nn
+import torch
+from transformers.models.xlm_roberta.configuration_xlm_roberta import XLMRobertaConfig
+from transformers import XLMRobertaModel,XLMRobertaTokenizer
+from typing import Optional
+
+class BertSeriesConfig(BertConfig):
+ def __init__(self, vocab_size=30522, hidden_size=768, num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072, hidden_act="gelu", hidden_dropout_prob=0.1, attention_probs_dropout_prob=0.1, max_position_embeddings=512, type_vocab_size=2, initializer_range=0.02, layer_norm_eps=1e-12, pad_token_id=0, position_embedding_type="absolute", use_cache=True, classifier_dropout=None,project_dim=512, pooler_fn="average",learn_encoder=False,model_type='bert',**kwargs):
+
+ super().__init__(vocab_size, hidden_size, num_hidden_layers, num_attention_heads, intermediate_size, hidden_act, hidden_dropout_prob, attention_probs_dropout_prob, max_position_embeddings, type_vocab_size, initializer_range, layer_norm_eps, pad_token_id, position_embedding_type, use_cache, classifier_dropout, **kwargs)
+ self.project_dim = project_dim
+ self.pooler_fn = pooler_fn
+ self.learn_encoder = learn_encoder
+
+class RobertaSeriesConfig(XLMRobertaConfig):
+ def __init__(self, pad_token_id=1, bos_token_id=0, eos_token_id=2,project_dim=512,pooler_fn='cls',learn_encoder=False, **kwargs):
+ super().__init__(pad_token_id=pad_token_id, bos_token_id=bos_token_id, eos_token_id=eos_token_id, **kwargs)
+ self.project_dim = project_dim
+ self.pooler_fn = pooler_fn
+ self.learn_encoder = learn_encoder
+
+
+class BertSeriesModelWithTransformation(BertPreTrainedModel):
+
+ _keys_to_ignore_on_load_unexpected = [r"pooler"]
+ _keys_to_ignore_on_load_missing = [r"position_ids", r"predictions.decoder.bias"]
+ config_class = BertSeriesConfig
+
+ def __init__(self, config=None, **kargs):
+ # modify initialization for autoloading
+ if config is None:
+ config = XLMRobertaConfig()
+ config.attention_probs_dropout_prob= 0.1
+ config.bos_token_id=0
+ config.eos_token_id=2
+ config.hidden_act='gelu'
+ config.hidden_dropout_prob=0.1
+ config.hidden_size=1024
+ config.initializer_range=0.02
+ config.intermediate_size=4096
+ config.layer_norm_eps=1e-05
+ config.max_position_embeddings=514
+
+ config.num_attention_heads=16
+ config.num_hidden_layers=24
+ config.output_past=True
+ config.pad_token_id=1
+ config.position_embedding_type= "absolute"
+
+ config.type_vocab_size= 1
+ config.use_cache=True
+ config.vocab_size= 250002
+ config.project_dim = 768
+ config.learn_encoder = False
+ super().__init__(config)
+ self.roberta = XLMRobertaModel(config)
+ self.transformation = nn.Linear(config.hidden_size,config.project_dim)
+ self.pre_LN=nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps)
+ self.tokenizer = XLMRobertaTokenizer.from_pretrained('xlm-roberta-large')
+ self.pooler = lambda x: x[:,0]
+ self.post_init()
+
+ def encode(self,c):
+ device = next(self.parameters()).device
+ text = self.tokenizer(c,
+ truncation=True,
+ max_length=77,
+ return_length=False,
+ return_overflowing_tokens=False,
+ padding="max_length",
+ return_tensors="pt")
+ text["input_ids"] = torch.tensor(text["input_ids"]).to(device)
+ text["attention_mask"] = torch.tensor(
+ text['attention_mask']).to(device)
+ features = self(**text)
+ return features['projection_state']
+
+ def forward(
+ self,
+ input_ids: Optional[torch.Tensor] = None,
+ attention_mask: Optional[torch.Tensor] = None,
+ token_type_ids: Optional[torch.Tensor] = None,
+ position_ids: Optional[torch.Tensor] = None,
+ head_mask: Optional[torch.Tensor] = None,
+ inputs_embeds: Optional[torch.Tensor] = None,
+ encoder_hidden_states: Optional[torch.Tensor] = None,
+ encoder_attention_mask: Optional[torch.Tensor] = None,
+ output_attentions: Optional[bool] = None,
+ return_dict: Optional[bool] = None,
+ output_hidden_states: Optional[bool] = None,
+ ) :
+ r"""
+ """
+
+ return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+
+
+ outputs = self.roberta(
+ input_ids=input_ids,
+ attention_mask=attention_mask,
+ token_type_ids=token_type_ids,
+ position_ids=position_ids,
+ head_mask=head_mask,
+ inputs_embeds=inputs_embeds,
+ encoder_hidden_states=encoder_hidden_states,
+ encoder_attention_mask=encoder_attention_mask,
+ output_attentions=output_attentions,
+ output_hidden_states=True,
+ return_dict=return_dict,
+ )
+
+ # last module outputs
+ sequence_output = outputs[0]
+
+
+ # project every module
+ sequence_output_ln = self.pre_LN(sequence_output)
+
+ # pooler
+ pooler_output = self.pooler(sequence_output_ln)
+ pooler_output = self.transformation(pooler_output)
+ projection_state = self.transformation(outputs.last_hidden_state)
+
+ return {
+ 'pooler_output':pooler_output,
+ 'last_hidden_state':outputs.last_hidden_state,
+ 'hidden_states':outputs.hidden_states,
+ 'attentions':outputs.attentions,
+ 'projection_state':projection_state,
+ 'sequence_out': sequence_output
+ }
+
+
+class RobertaSeriesModelWithTransformation(BertSeriesModelWithTransformation):
+ base_model_prefix = 'roberta'
+ config_class= RobertaSeriesConfig
\ No newline at end of file
--
cgit v1.2.1
From 3a724e91a23a715987e3034f31d8c8f5d416bd3a Mon Sep 17 00:00:00 2001
From: Billy Cao
Date: Wed, 30 Nov 2022 20:52:32 +0800
Subject: Change to steps of 8
---
modules/ui.py | 24 ++++++++++++------------
1 file changed, 12 insertions(+), 12 deletions(-)
(limited to 'modules')
diff --git a/modules/ui.py b/modules/ui.py
index 36dd754e..acf99bda 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -380,8 +380,8 @@ def create_seed_inputs():
with gr.Row(visible=False) as seed_extra_row_2:
seed_extras.append(seed_extra_row_2)
- seed_resize_from_w = gr.Slider(minimum=0, maximum=2048, step=1, label="Resize seed from width", value=0)
- seed_resize_from_h = gr.Slider(minimum=0, maximum=2048, step=1, label="Resize seed from height", value=0)
+ seed_resize_from_w = gr.Slider(minimum=0, maximum=2048, step=8, label="Resize seed from width", value=0)
+ seed_resize_from_h = gr.Slider(minimum=0, maximum=2048, step=8, label="Resize seed from height", value=0)
random_seed.click(fn=lambda: -1, show_progress=False, inputs=[], outputs=[seed])
random_subseed.click(fn=lambda: -1, show_progress=False, inputs=[], outputs=[subseed])
@@ -713,8 +713,8 @@ def create_ui(wrap_gradio_gpu_call):
sampler_index = gr.Radio(label='Sampling method', elem_id="txt2img_sampling", choices=[x.name for x in samplers], value=samplers[0].name, type="index")
with gr.Group():
- width = gr.Slider(minimum=64, maximum=2048, step=1, label="Width", value=512)
- height = gr.Slider(minimum=64, maximum=2048, step=1, label="Height", value=512)
+ width = gr.Slider(minimum=64, maximum=2048, step=8, label="Width", value=512)
+ height = gr.Slider(minimum=64, maximum=2048, step=8, label="Height", value=512)
with gr.Row():
restore_faces = gr.Checkbox(label='Restore faces', value=False, visible=len(shared.face_restorers) > 1)
@@ -722,8 +722,8 @@ def create_ui(wrap_gradio_gpu_call):
enable_hr = gr.Checkbox(label='Highres. fix', value=False)
with gr.Row(visible=False) as hr_options:
- firstphase_width = gr.Slider(minimum=0, maximum=1024, step=1, label="Firstpass width", value=0)
- firstphase_height = gr.Slider(minimum=0, maximum=1024, step=1, label="Firstpass height", value=0)
+ firstphase_width = gr.Slider(minimum=0, maximum=1024, step=8, label="Firstpass width", value=0)
+ firstphase_height = gr.Slider(minimum=0, maximum=1024, step=8, label="Firstpass height", value=0)
denoising_strength = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label='Denoising strength', value=0.7)
with gr.Row(equal_height=True):
@@ -899,8 +899,8 @@ def create_ui(wrap_gradio_gpu_call):
sampler_index = gr.Radio(label='Sampling method', choices=[x.name for x in samplers_for_img2img], value=samplers_for_img2img[0].name, type="index")
with gr.Group():
- width = gr.Slider(minimum=64, maximum=2048, step=1, label="Width", value=512, elem_id="img2img_width")
- height = gr.Slider(minimum=64, maximum=2048, step=1, label="Height", value=512, elem_id="img2img_height")
+ width = gr.Slider(minimum=64, maximum=2048, step=8, label="Width", value=512, elem_id="img2img_width")
+ height = gr.Slider(minimum=64, maximum=2048, step=8, label="Height", value=512, elem_id="img2img_height")
with gr.Row():
restore_faces = gr.Checkbox(label='Restore faces', value=False, visible=len(shared.face_restorers) > 1)
@@ -1228,8 +1228,8 @@ def create_ui(wrap_gradio_gpu_call):
with gr.Tab(label="Preprocess images"):
process_src = gr.Textbox(label='Source directory')
process_dst = gr.Textbox(label='Destination directory')
- process_width = gr.Slider(minimum=64, maximum=2048, step=1, label="Width", value=512)
- process_height = gr.Slider(minimum=64, maximum=2048, step=1, label="Height", value=512)
+ process_width = gr.Slider(minimum=64, maximum=2048, step=8, label="Width", value=512)
+ process_height = gr.Slider(minimum=64, maximum=2048, step=8, label="Height", value=512)
preprocess_txt_action = gr.Dropdown(label='Existing Caption txt Action', value="ignore", choices=["ignore", "copy", "prepend", "append"])
with gr.Row():
@@ -1286,8 +1286,8 @@ def create_ui(wrap_gradio_gpu_call):
dataset_directory = gr.Textbox(label='Dataset directory', placeholder="Path to directory with input images")
log_directory = gr.Textbox(label='Log directory', placeholder="Path to directory where to write outputs", value="textual_inversion")
template_file = gr.Textbox(label='Prompt template file', value=os.path.join(script_path, "textual_inversion_templates", "style_filewords.txt"))
- training_width = gr.Slider(minimum=64, maximum=2048, step=1, label="Width", value=512)
- training_height = gr.Slider(minimum=64, maximum=2048, step=1, label="Height", value=512)
+ training_width = gr.Slider(minimum=64, maximum=2048, step=8, label="Width", value=512)
+ training_height = gr.Slider(minimum=64, maximum=2048, step=8, label="Height", value=512)
steps = gr.Number(label='Max steps', value=100000, precision=0)
create_image_every = gr.Number(label='Save an image to log directory every N steps, 0 to disable', value=500, precision=0)
save_embedding_every = gr.Number(label='Save a copy of embedding to log directory every N steps, 0 to disable', value=500, precision=0)
--
cgit v1.2.1
From a44994e2c926fc1f8479281e5b1e08d7fe9db2bb Mon Sep 17 00:00:00 2001
From: Adi Eyal
Date: Wed, 30 Nov 2022 15:23:53 +0200
Subject: Fixed incorrect negative prompt text in infotext
Previously only the first negative prompt in all_negative_prompts was
being used for infotext. This fixes that by selecting the index-th
negative prompt
---
modules/processing.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
(limited to 'modules')
diff --git a/modules/processing.py b/modules/processing.py
index edceb532..0a73ccbb 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -414,7 +414,7 @@ def create_infotext(p, all_prompts, all_seeds, all_subseeds, comments, iteration
generation_params_text = ", ".join([k if k == v else f'{k}: {generation_parameters_copypaste.quote(v)}' for k, v in generation_params.items() if v is not None])
- negative_prompt_text = "\nNegative prompt: " + p.all_negative_prompts[0] if p.all_negative_prompts[0] else ""
+ negative_prompt_text = "\nNegative prompt: " + p.all_negative_prompts[index] if p.all_negative_prompts[index] else ""
return f"{all_prompts[index]}{negative_prompt_text}\n{generation_params_text}".strip()
--
cgit v1.2.1
From 21effd629d0fdfdbbff2b20a9f4a3767e7e8bd33 Mon Sep 17 00:00:00 2001
From: brkirch
Date: Mon, 28 Nov 2022 21:24:06 -0500
Subject: Add workaround for using MPS with torchsde
---
modules/sd_samplers.py | 14 ++++++++++++++
1 file changed, 14 insertions(+)
(limited to 'modules')
diff --git a/modules/sd_samplers.py b/modules/sd_samplers.py
index 5fefb227..8b11f569 100644
--- a/modules/sd_samplers.py
+++ b/modules/sd_samplers.py
@@ -6,6 +6,7 @@ import tqdm
from PIL import Image
import inspect
import k_diffusion.sampling
+import torchsde._brownian.brownian_interval
import ldm.models.diffusion.ddim
import ldm.models.diffusion.plms
from modules import prompt_parser, devices, processing, images
@@ -367,6 +368,19 @@ class TorchHijack:
return torch.randn_like(x)
+# MPS fix for randn in torchsde
+def torchsde_randn(size, dtype, device, seed):
+ if device.type == 'mps':
+ generator = torch.Generator(devices.cpu).manual_seed(int(seed))
+ return torch.randn(size, dtype=dtype, device=devices.cpu, generator=generator).to(device)
+ else:
+ generator = torch.Generator(device).manual_seed(int(seed))
+ return torch.randn(size, dtype=dtype, device=device, generator=generator)
+
+
+torchsde._brownian.brownian_interval._randn = torchsde_randn
+
+
class KDiffusionSampler:
def __init__(self, funcname, sd_model):
denoiser = k_diffusion.external.CompVisVDenoiser if sd_model.parameterization == "v" else k_diffusion.external.CompVisDenoiser
--
cgit v1.2.1
From 4d5f1691dda971ec7b461dd880426300fd54ccee Mon Sep 17 00:00:00 2001
From: brkirch
Date: Mon, 28 Nov 2022 21:36:35 -0500
Subject: Use devices.autocast instead of torch.autocast
---
modules/hypernetworks/hypernetwork.py | 2 +-
modules/interrogate.py | 3 +--
modules/swinir_model.py | 6 +-----
modules/textual_inversion/dataset.py | 4 ++--
modules/textual_inversion/textual_inversion.py | 2 +-
5 files changed, 6 insertions(+), 11 deletions(-)
(limited to 'modules')
diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py
index 8466887f..eb5ae372 100644
--- a/modules/hypernetworks/hypernetwork.py
+++ b/modules/hypernetworks/hypernetwork.py
@@ -495,7 +495,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, gradient_step,
if shared.state.interrupted:
break
- with torch.autocast("cuda"):
+ with devices.autocast():
x = batch.latent_sample.to(devices.device, non_blocking=pin_memory)
if tag_drop_out != 0 or shuffle_tags:
shared.sd_model.cond_stage_model.to(devices.device)
diff --git a/modules/interrogate.py b/modules/interrogate.py
index 9769aa34..40c6b082 100644
--- a/modules/interrogate.py
+++ b/modules/interrogate.py
@@ -148,8 +148,7 @@ class InterrogateModels:
clip_image = self.clip_preprocess(pil_image).unsqueeze(0).type(self.dtype).to(devices.device_interrogate)
- precision_scope = torch.autocast if shared.cmd_opts.precision == "autocast" else contextlib.nullcontext
- with torch.no_grad(), precision_scope("cuda"):
+ with torch.no_grad(), devices.autocast():
image_features = self.clip_model.encode_image(clip_image).type(self.dtype)
image_features /= image_features.norm(dim=-1, keepdim=True)
diff --git a/modules/swinir_model.py b/modules/swinir_model.py
index facd262d..483eabd4 100644
--- a/modules/swinir_model.py
+++ b/modules/swinir_model.py
@@ -13,10 +13,6 @@ from modules.swinir_model_arch import SwinIR as net
from modules.swinir_model_arch_v2 import Swin2SR as net2
from modules.upscaler import Upscaler, UpscalerData
-precision_scope = (
- torch.autocast if cmd_opts.precision == "autocast" else contextlib.nullcontext
-)
-
class UpscalerSwinIR(Upscaler):
def __init__(self, dirname):
@@ -112,7 +108,7 @@ def upscale(
img = np.moveaxis(img, 2, 0) / 255
img = torch.from_numpy(img).float()
img = img.unsqueeze(0).to(devices.device_swinir)
- with torch.no_grad(), precision_scope("cuda"):
+ with torch.no_grad(), devices.autocast():
_, _, h_old, w_old = img.size()
h_pad = (h_old // window_size + 1) * window_size - h_old
w_pad = (w_old // window_size + 1) * window_size - w_old
diff --git a/modules/textual_inversion/dataset.py b/modules/textual_inversion/dataset.py
index e5725f33..2dc64c3c 100644
--- a/modules/textual_inversion/dataset.py
+++ b/modules/textual_inversion/dataset.py
@@ -82,7 +82,7 @@ class PersonalizedBase(Dataset):
torchdata = torch.from_numpy(npimage).permute(2, 0, 1).to(device=device, dtype=torch.float32)
latent_sample = None
- with torch.autocast("cuda"):
+ with devices.autocast():
latent_dist = model.encode_first_stage(torchdata.unsqueeze(dim=0))
if latent_sampling_method == "once" or (latent_sampling_method == "deterministic" and not isinstance(latent_dist, DiagonalGaussianDistribution)):
@@ -101,7 +101,7 @@ class PersonalizedBase(Dataset):
entry.cond_text = self.create_text(filename_text)
if include_cond and not (self.tag_drop_out != 0 or self.shuffle_tags):
- with torch.autocast("cuda"):
+ with devices.autocast():
entry.cond = cond_model([entry.cond_text]).to(devices.cpu).squeeze(0)
self.dataset.append(entry)
diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py
index 4eb75cb5..daf8d1b8 100644
--- a/modules/textual_inversion/textual_inversion.py
+++ b/modules/textual_inversion/textual_inversion.py
@@ -316,7 +316,7 @@ def train_embedding(embedding_name, learn_rate, batch_size, gradient_step, data_
if shared.state.interrupted:
break
- with torch.autocast("cuda"):
+ with devices.autocast():
# c = stack_conds(batch.cond).to(devices.device)
# mask = torch.tensor(batch.emb_index).to(devices.device, non_blocking=pin_memory)
# print(mask)
--
cgit v1.2.1
From 0fddb4a1c06a6e2122add7eee3b001a6d473baee Mon Sep 17 00:00:00 2001
From: brkirch
Date: Wed, 30 Nov 2022 08:02:39 -0500
Subject: Rework MPS randn fix, add randn_like fix
torch.manual_seed() already sets a CPU generator, so there is no reason to create a CPU generator manually. torch.randn_like also needs a MPS fix for k-diffusion, but a torch hijack with randn_like already exists so it can also be used for that.
---
modules/devices.py | 15 +++------------
modules/sd_samplers.py | 8 +++++---
2 files changed, 8 insertions(+), 15 deletions(-)
(limited to 'modules')
diff --git a/modules/devices.py b/modules/devices.py
index f00079c6..046460fa 100644
--- a/modules/devices.py
+++ b/modules/devices.py
@@ -66,24 +66,15 @@ dtype_vae = torch.float16
def randn(seed, shape):
- # Pytorch currently doesn't handle setting randomness correctly when the metal backend is used.
- if device.type == 'mps':
- generator = torch.Generator(device=cpu)
- generator.manual_seed(seed)
- noise = torch.randn(shape, generator=generator, device=cpu).to(device)
- return noise
-
torch.manual_seed(seed)
+ if device.type == 'mps':
+ return torch.randn(shape, device=cpu).to(device)
return torch.randn(shape, device=device)
def randn_without_seed(shape):
- # Pytorch currently doesn't handle setting randomness correctly when the metal backend is used.
if device.type == 'mps':
- generator = torch.Generator(device=cpu)
- noise = torch.randn(shape, generator=generator, device=cpu).to(device)
- return noise
-
+ return torch.randn(shape, device=cpu).to(device)
return torch.randn(shape, device=device)
diff --git a/modules/sd_samplers.py b/modules/sd_samplers.py
index 8b11f569..4c123d3b 100644
--- a/modules/sd_samplers.py
+++ b/modules/sd_samplers.py
@@ -365,7 +365,10 @@ class TorchHijack:
if noise.shape == x.shape:
return noise
- return torch.randn_like(x)
+ if x.device.type == 'mps':
+ return torch.randn_like(x, device=devices.cpu).to(x.device)
+ else:
+ return torch.randn_like(x)
# MPS fix for randn in torchsde
@@ -429,8 +432,7 @@ class KDiffusionSampler:
self.model_wrap.step = 0
self.eta = p.eta or opts.eta_ancestral
- if self.sampler_noises is not None:
- k_diffusion.sampling.torch = TorchHijack(self.sampler_noises)
+ k_diffusion.sampling.torch = TorchHijack(self.sampler_noises if self.sampler_noises is not None else [])
extra_params_kwargs = {}
for param_name in self.extra_params:
--
cgit v1.2.1
From be2e6de94a5d40bff6d65497fd5ebc275b389f3f Mon Sep 17 00:00:00 2001
From: space-nuko <24979496+space-nuko@users.noreply.github.com>
Date: Thu, 1 Dec 2022 11:34:16 -0800
Subject: Fix clip skip of 1 not being restored from prompts
---
modules/generation_parameters_copypaste.py | 4 ++++
modules/shared.py | 2 +-
2 files changed, 5 insertions(+), 1 deletion(-)
(limited to 'modules')
diff --git a/modules/generation_parameters_copypaste.py b/modules/generation_parameters_copypaste.py
index 01980dca..44fe1a6c 100644
--- a/modules/generation_parameters_copypaste.py
+++ b/modules/generation_parameters_copypaste.py
@@ -184,6 +184,10 @@ Steps: 20, Sampler: Euler a, CFG scale: 7, Seed: 965400086, Size: 512x512, Model
else:
res[k] = v
+ # Missing CLIP skip means it was set to 1 (the default)
+ if "Clip skip" not in res:
+ res["Clip skip"] = "1"
+
return res
diff --git a/modules/shared.py b/modules/shared.py
index c36ee211..b4ecc7ca 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -371,7 +371,7 @@ options_templates.update(options_section(('sd', "Stable Diffusion"), {
"enable_batch_seeds": OptionInfo(True, "Make K-diffusion samplers produce same images in a batch as when making a single image"),
"comma_padding_backtrack": OptionInfo(20, "Increase coherency by padding from the last comma within n tokens when using more than 75 tokens", gr.Slider, {"minimum": 0, "maximum": 74, "step": 1 }),
"filter_nsfw": OptionInfo(False, "Filter NSFW content"),
- 'CLIP_stop_at_last_layers': OptionInfo(1, "Stop At last layers of CLIP model", gr.Slider, {"minimum": 1, "maximum": 12, "step": 1}),
+ 'CLIP_stop_at_last_layers': OptionInfo(1, "Stop at last layers of CLIP model (CLIP skip)", gr.Slider, {"minimum": 1, "maximum": 12, "step": 1}),
"random_artist_categories": OptionInfo([], "Allowed categories for random artists selection when using the Roll button", gr.CheckboxGroup, {"choices": artist_db.categories()}),
}))
--
cgit v1.2.1
From 9c86fb8cace6d8ac0843e0ddad0ba5ae7f3148c9 Mon Sep 17 00:00:00 2001
From: zhaohu xing <920232796@qq.com>
Date: Fri, 2 Dec 2022 16:08:46 +0800
Subject: fix bug
Signed-off-by: zhaohu xing <920232796@qq.com>
---
modules/shared.py | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
(limited to 'modules')
diff --git a/modules/shared.py b/modules/shared.py
index 1408dee3..ac7678c3 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -111,7 +111,11 @@ restricted_opts = {
from omegaconf import OmegaConf
config = OmegaConf.load(f"{cmd_opts.config}")
# XLMR-Large
-text_model_name = config.model.params.cond_stage_config.params.name
+try:
+ text_model_name = config.model.params.cond_stage_config.params.name
+
+except :
+ text_model_name = "stable_diffusion"
cmd_opts.disable_extension_access = (cmd_opts.share or cmd_opts.listen or cmd_opts.server_name) and not cmd_opts.enable_insecure_extension_access
--
cgit v1.2.1
From e46147786914484b422899ee7154ae1685d96ae5 Mon Sep 17 00:00:00 2001
From: SmirkingFace <116507648+smirkingface@users.noreply.github.com>
Date: Fri, 2 Dec 2022 11:12:13 +0100
Subject: Fixed safe.py for pytorch 1.13 ckpt files
---
modules/safe.py | 18 +++++++++++-------
1 file changed, 11 insertions(+), 7 deletions(-)
(limited to 'modules')
diff --git a/modules/safe.py b/modules/safe.py
index a9209e38..10460ad0 100644
--- a/modules/safe.py
+++ b/modules/safe.py
@@ -62,14 +62,12 @@ class RestrictedUnpickler(pickle.Unpickler):
raise Exception(f"global '{module}/{name}' is forbidden")
-allowed_zip_names = ["archive/data.pkl", "archive/version"]
-allowed_zip_names_re = re.compile(r"^archive/data/\d+$")
-
+# Regular expression that accepts 'dirname/version', 'dirname/data.pkl', and 'dirname/data/'
+allowed_zip_names_re = re.compile(r"^([^/]+)/((data/\d+)|version|(data\.pkl))$")
+data_pkl_re = re.compile(r"^([^/]+)/data\.pkl$")
def check_zip_filenames(filename, names):
for name in names:
- if name in allowed_zip_names:
- continue
if allowed_zip_names_re.match(name):
continue
@@ -82,8 +80,14 @@ def check_pt(filename, extra_handler):
# new pytorch format is a zip file
with zipfile.ZipFile(filename) as z:
check_zip_filenames(filename, z.namelist())
-
- with z.open('archive/data.pkl') as file:
+
+ # find filename of data.pkl in zip file: '/data.pkl'
+ data_pkl_filenames = [f for f in z.namelist() if data_pkl_re.match(f)]
+ if len(data_pkl_filenames) == 0:
+ raise Exception(f"data.pkl not found in {filename}")
+ if len(data_pkl_filenames) > 1:
+ raise Exception(f"Multiple data.pkl found in {filename}")
+ with z.open(data_pkl_filenames[0]) as file:
unpickler = RestrictedUnpickler(file)
unpickler.extra_handler = extra_handler
unpickler.load()
--
cgit v1.2.1
From 99b19b1a8f5d25ac43e6a031d7423e541ed31b0e Mon Sep 17 00:00:00 2001
From: jcowens
Date: Fri, 2 Dec 2022 02:53:26 -0800
Subject: fix typo
---
modules/ui_extensions.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
(limited to 'modules')
diff --git a/modules/ui_extensions.py b/modules/ui_extensions.py
index 030f011e..42667941 100644
--- a/modules/ui_extensions.py
+++ b/modules/ui_extensions.py
@@ -17,7 +17,7 @@ available_extensions = {"extensions": []}
def check_access():
- assert not shared.cmd_opts.disable_extension_access, "extension access disabed because of commandline flags"
+ assert not shared.cmd_opts.disable_extension_access, "extension access disabled because of command line flags"
def apply_and_restart(disable_list, update_list):
--
cgit v1.2.1
From da698ca92ed79b9104a62f34291d9b842c433a1b Mon Sep 17 00:00:00 2001
From: SmirkingFace <116507648+smirkingface@users.noreply.github.com>
Date: Fri, 2 Dec 2022 13:47:02 +0100
Subject: Fixed AttributeError where openaimodel is not found
---
modules/sd_hijack.py | 1 +
1 file changed, 1 insertion(+)
(limited to 'modules')
diff --git a/modules/sd_hijack.py b/modules/sd_hijack.py
index b824b5bf..eef6efd2 100644
--- a/modules/sd_hijack.py
+++ b/modules/sd_hijack.py
@@ -17,6 +17,7 @@ from modules.sd_hijack_optimizations import invokeAI_mps_available
import ldm.modules.attention
import ldm.modules.diffusionmodules.model
+import ldm.modules.diffusionmodules.openaimodel
import ldm.models.diffusion.ddim
import ldm.models.diffusion.plms
import ldm.modules.encoders.modules
--
cgit v1.2.1
From 119a945ef7569128eb7d6772468ffc5567c2e161 Mon Sep 17 00:00:00 2001
From: PhytoEpidemic <64293310+PhytoEpidemic@users.noreply.github.com>
Date: Fri, 2 Dec 2022 12:16:29 -0600
Subject: Fix divide by 0 error
Fix of the edge case 0 weight that occasionally will pop up in some specific situations. This was crashing the script.
---
modules/textual_inversion/autocrop.py | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
(limited to 'modules')
diff --git a/modules/textual_inversion/autocrop.py b/modules/textual_inversion/autocrop.py
index 9859974a..68e1103c 100644
--- a/modules/textual_inversion/autocrop.py
+++ b/modules/textual_inversion/autocrop.py
@@ -276,8 +276,8 @@ def poi_average(pois, settings):
weight += poi.weight
x += poi.x * poi.weight
y += poi.y * poi.weight
- avg_x = round(x / weight)
- avg_y = round(y / weight)
+ avg_x = round(weight and x / weight)
+ avg_y = round(weight and y / weight)
return PointOfInterest(avg_x, avg_y)
@@ -338,4 +338,4 @@ class Settings:
self.face_points_weight = face_points_weight
self.annotate_image = annotate_image
self.destop_view_image = False
- self.dnn_model_path = dnn_model_path
\ No newline at end of file
+ self.dnn_model_path = dnn_model_path
--
cgit v1.2.1
From b2f17dd367c5758e406dd22b78ad7456dac1957a Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Sat, 3 Dec 2022 09:15:24 +0300
Subject: prevent include_init_images from being passed to
StableDiffusionProcessingImg2Img in API #4989
---
modules/api/api.py | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
(limited to 'modules')
diff --git a/modules/api/api.py b/modules/api/api.py
index 1de3f98f..54ee7cb0 100644
--- a/modules/api/api.py
+++ b/modules/api/api.py
@@ -152,7 +152,10 @@ class Api:
)
if populate.sampler_name:
populate.sampler_index = None # prevent a warning later on
- p = StableDiffusionProcessingImg2Img(**vars(populate))
+
+ args = vars(populate)
+ args.pop('include_init_images', None) # this is meant to be done by "exclude": True in model, but it's for a reason that I cannot determine.
+ p = StableDiffusionProcessingImg2Img(**args)
imgs = []
for img in init_images:
@@ -170,7 +173,7 @@ class Api:
b64images = list(map(encode_pil_to_base64, processed.images))
- if (not img2imgreq.include_init_images):
+ if not img2imgreq.include_init_images:
img2imgreq.init_images = None
img2imgreq.mask = None
--
cgit v1.2.1
From c7af672186ec09a514f0e78aa21155264e56c130 Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Sat, 3 Dec 2022 09:41:39 +0300
Subject: more simple config option name plus mouseover hint for clip skip
---
modules/shared.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
(limited to 'modules')
diff --git a/modules/shared.py b/modules/shared.py
index b4ecc7ca..42ec4120 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -371,7 +371,7 @@ options_templates.update(options_section(('sd', "Stable Diffusion"), {
"enable_batch_seeds": OptionInfo(True, "Make K-diffusion samplers produce same images in a batch as when making a single image"),
"comma_padding_backtrack": OptionInfo(20, "Increase coherency by padding from the last comma within n tokens when using more than 75 tokens", gr.Slider, {"minimum": 0, "maximum": 74, "step": 1 }),
"filter_nsfw": OptionInfo(False, "Filter NSFW content"),
- 'CLIP_stop_at_last_layers': OptionInfo(1, "Stop at last layers of CLIP model (CLIP skip)", gr.Slider, {"minimum": 1, "maximum": 12, "step": 1}),
+ 'CLIP_stop_at_last_layers': OptionInfo(1, "Clip skip", gr.Slider, {"minimum": 1, "maximum": 12, "step": 1}),
"random_artist_categories": OptionInfo([], "Allowed categories for random artists selection when using the Roll button", gr.CheckboxGroup, {"choices": artist_db.categories()}),
}))
--
cgit v1.2.1
From 2651267e3af5886b8b6b1dc3023f2507f7079118 Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Sat, 3 Dec 2022 15:57:52 +0300
Subject: fix #4407 breaking UI entirely for card other than ones related to
the PR
---
modules/devices.py | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
(limited to 'modules')
diff --git a/modules/devices.py b/modules/devices.py
index 1325569c..547ea46c 100644
--- a/modules/devices.py
+++ b/modules/devices.py
@@ -53,12 +53,10 @@ def torch_gc():
def enable_tf32():
if torch.cuda.is_available():
- for devid in range(0,torch.cuda.device_count()):
- if torch.cuda.get_device_capability(devid) == (7, 5):
- shd = True
- if shd:
+ if any([torch.cuda.get_device_capability(devid) == (7, 5) for devid in range(0, torch.cuda.device_count())]):
torch.backends.cudnn.benchmark = True
torch.backends.cudnn.enabled = True
+
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.allow_tf32 = True
--
cgit v1.2.1
From 46b0d230e7c13e247eabb22e1103ce512e7ed6b1 Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Sat, 3 Dec 2022 16:01:23 +0300
Subject: add comment for #4407 and remove seemingly unnecessary cudnn.enabled
---
modules/devices.py | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
(limited to 'modules')
diff --git a/modules/devices.py b/modules/devices.py
index 547ea46c..d6a76844 100644
--- a/modules/devices.py
+++ b/modules/devices.py
@@ -53,9 +53,11 @@ def torch_gc():
def enable_tf32():
if torch.cuda.is_available():
+
+ # enabling benchmark option seems to enable a range of cards to do fp16 when they otherwise can't
+ # see https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/4407
if any([torch.cuda.get_device_capability(devid) == (7, 5) for devid in range(0, torch.cuda.device_count())]):
torch.backends.cudnn.benchmark = True
- torch.backends.cudnn.enabled = True
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.allow_tf32 = True
--
cgit v1.2.1
From cf3e844d1d31d64f3234a0fbdfcac91cc5834657 Mon Sep 17 00:00:00 2001
From: Vladimir Repin <32306715+mezotaken@users.noreply.github.com>
Date: Sat, 3 Dec 2022 18:05:47 +0300
Subject: add noise strength parameter similar to NAI
---
modules/processing.py | 1 +
modules/shared.py | 1 +
2 files changed, 2 insertions(+)
(limited to 'modules')
diff --git a/modules/processing.py b/modules/processing.py
index 3d2c4dc9..b9cb6d32 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -861,6 +861,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
def sample(self, conditioning, unconditional_conditioning, seeds, subseeds, subseed_strength, prompts):
x = create_random_tensors([opt_C, self.height // opt_f, self.width // opt_f], seeds=seeds, subseeds=subseeds, subseed_strength=self.subseed_strength, seed_resize_from_h=self.seed_resize_from_h, seed_resize_from_w=self.seed_resize_from_w, p=self)
+ x = x*shared.opts.initial_noise_multiplier
samples = self.sampler.sample_img2img(self, self.init_latent, x, conditioning, unconditional_conditioning, image_conditioning=self.image_conditioning)
diff --git a/modules/shared.py b/modules/shared.py
index 8202d8e5..4182e2ac 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -364,6 +364,7 @@ options_templates.update(options_section(('sd', "Stable Diffusion"), {
"sd_hypernetwork": OptionInfo("None", "Hypernetwork", gr.Dropdown, lambda: {"choices": ["None"] + [x for x in hypernetworks.keys()]}, refresh=reload_hypernetworks),
"sd_hypernetwork_strength": OptionInfo(1.0, "Hypernetwork strength", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.001}),
"inpainting_mask_weight": OptionInfo(1.0, "Inpainting conditioning mask strength", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.01}),
+ "initial_noise_multiplier": OptionInfo(1.0, "Multiply initial noise by this factor, may result in less or more detailed img2img", gr.Slider, {"minimum": 0.5, "maximum": 1.5, "step": 0.01 }),
"img2img_color_correction": OptionInfo(False, "Apply color correction to img2img results to match original colors."),
"img2img_fix_steps": OptionInfo(False, "With img2img, do exactly the amount of steps the slider specifies (normally you'd do less with less denoising)."),
"enable_quantization": OptionInfo(False, "Enable quantization in K samplers for sharper and cleaner results. This may change existing seeds. Requires restart to apply."),
--
cgit v1.2.1
From b6e5edd74657e3fd1fbd04f341b7a84625d4aa7a Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Sat, 3 Dec 2022 18:06:33 +0300
Subject: add built-in extension system add support for adding upscalers in
extensions move LDSR, ScuNET and SwinIR to built-in extensions
---
modules/devices.py | 11 +-
modules/extensions.py | 22 +-
modules/ldsr_model.py | 54 ---
modules/ldsr_model_arch.py | 230 ---------
modules/modelloader.py | 20 +-
modules/scunet_model.py | 87 ----
modules/scunet_model_arch.py | 265 ----------
modules/shared.py | 13 +-
modules/swinir_model.py | 157 ------
modules/swinir_model_arch.py | 867 ---------------------------------
modules/swinir_model_arch_v2.py | 1017 ---------------------------------------
modules/ui.py | 1 -
modules/ui_extensions.py | 8 +-
13 files changed, 42 insertions(+), 2710 deletions(-)
delete mode 100644 modules/ldsr_model.py
delete mode 100644 modules/ldsr_model_arch.py
delete mode 100644 modules/scunet_model.py
delete mode 100644 modules/scunet_model_arch.py
delete mode 100644 modules/swinir_model.py
delete mode 100644 modules/swinir_model_arch.py
delete mode 100644 modules/swinir_model_arch_v2.py
(limited to 'modules')
diff --git a/modules/devices.py b/modules/devices.py
index d6a76844..f8cffae1 100644
--- a/modules/devices.py
+++ b/modules/devices.py
@@ -44,6 +44,15 @@ def get_optimal_device():
return cpu
+def get_device_for(task):
+ from modules import shared
+
+ if task in shared.cmd_opts.use_cpu:
+ return cpu
+
+ return get_optimal_device()
+
+
def torch_gc():
if torch.cuda.is_available():
with torch.cuda.device(get_cuda_device_string()):
@@ -67,7 +76,7 @@ def enable_tf32():
errors.run(enable_tf32, "Enabling TF32")
cpu = torch.device("cpu")
-device = device_interrogate = device_gfpgan = device_swinir = device_esrgan = device_scunet = device_codeformer = None
+device = device_interrogate = device_gfpgan = device_esrgan = device_codeformer = None
dtype = torch.float16
dtype_vae = torch.float16
diff --git a/modules/extensions.py b/modules/extensions.py
index db9c4200..b522125c 100644
--- a/modules/extensions.py
+++ b/modules/extensions.py
@@ -8,6 +8,7 @@ from modules import paths, shared
extensions = []
extensions_dir = os.path.join(paths.script_path, "extensions")
+extensions_builtin_dir = os.path.join(paths.script_path, "extensions-builtin")
def active():
@@ -15,12 +16,13 @@ def active():
class Extension:
- def __init__(self, name, path, enabled=True):
+ def __init__(self, name, path, enabled=True, is_builtin=False):
self.name = name
self.path = path
self.enabled = enabled
self.status = ''
self.can_update = False
+ self.is_builtin = is_builtin
repo = None
try:
@@ -79,11 +81,19 @@ def list_extensions():
if not os.path.isdir(extensions_dir):
return
- for dirname in sorted(os.listdir(extensions_dir)):
- path = os.path.join(extensions_dir, dirname)
- if not os.path.isdir(path):
- continue
+ paths = []
+ for dirname in [extensions_dir, extensions_builtin_dir]:
+ if not os.path.isdir(dirname):
+ return
- extension = Extension(name=dirname, path=path, enabled=dirname not in shared.opts.disabled_extensions)
+ for extension_dirname in sorted(os.listdir(dirname)):
+ path = os.path.join(dirname, extension_dirname)
+ if not os.path.isdir(path):
+ continue
+
+ paths.append((extension_dirname, path, dirname == extensions_builtin_dir))
+
+ for dirname, path, is_builtin in paths:
+ extension = Extension(name=dirname, path=path, enabled=dirname not in shared.opts.disabled_extensions, is_builtin=is_builtin)
extensions.append(extension)
diff --git a/modules/ldsr_model.py b/modules/ldsr_model.py
deleted file mode 100644
index 8c4db44a..00000000
--- a/modules/ldsr_model.py
+++ /dev/null
@@ -1,54 +0,0 @@
-import os
-import sys
-import traceback
-
-from basicsr.utils.download_util import load_file_from_url
-
-from modules.upscaler import Upscaler, UpscalerData
-from modules.ldsr_model_arch import LDSR
-from modules import shared
-
-
-class UpscalerLDSR(Upscaler):
- def __init__(self, user_path):
- self.name = "LDSR"
- self.user_path = user_path
- self.model_url = "https://heibox.uni-heidelberg.de/f/578df07c8fc04ffbadf3/?dl=1"
- self.yaml_url = "https://heibox.uni-heidelberg.de/f/31a76b13ea27482981b4/?dl=1"
- super().__init__()
- scaler_data = UpscalerData("LDSR", None, self)
- self.scalers = [scaler_data]
-
- def load_model(self, path: str):
- # Remove incorrect project.yaml file if too big
- yaml_path = os.path.join(self.model_path, "project.yaml")
- old_model_path = os.path.join(self.model_path, "model.pth")
- new_model_path = os.path.join(self.model_path, "model.ckpt")
- if os.path.exists(yaml_path):
- statinfo = os.stat(yaml_path)
- if statinfo.st_size >= 10485760:
- print("Removing invalid LDSR YAML file.")
- os.remove(yaml_path)
- if os.path.exists(old_model_path):
- print("Renaming model from model.pth to model.ckpt")
- os.rename(old_model_path, new_model_path)
- model = load_file_from_url(url=self.model_url, model_dir=self.model_path,
- file_name="model.ckpt", progress=True)
- yaml = load_file_from_url(url=self.yaml_url, model_dir=self.model_path,
- file_name="project.yaml", progress=True)
-
- try:
- return LDSR(model, yaml)
-
- except Exception:
- print("Error importing LDSR:", file=sys.stderr)
- print(traceback.format_exc(), file=sys.stderr)
- return None
-
- def do_upscale(self, img, path):
- ldsr = self.load_model(path)
- if ldsr is None:
- print("NO LDSR!")
- return img
- ddim_steps = shared.opts.ldsr_steps
- return ldsr.super_resolution(img, ddim_steps, self.scale)
diff --git a/modules/ldsr_model_arch.py b/modules/ldsr_model_arch.py
deleted file mode 100644
index 90e0a2f0..00000000
--- a/modules/ldsr_model_arch.py
+++ /dev/null
@@ -1,230 +0,0 @@
-import gc
-import time
-import warnings
-
-import numpy as np
-import torch
-import torchvision
-from PIL import Image
-from einops import rearrange, repeat
-from omegaconf import OmegaConf
-
-from ldm.models.diffusion.ddim import DDIMSampler
-from ldm.util import instantiate_from_config, ismap
-
-warnings.filterwarnings("ignore", category=UserWarning)
-
-
-# Create LDSR Class
-class LDSR:
- def load_model_from_config(self, half_attention):
- print(f"Loading model from {self.modelPath}")
- pl_sd = torch.load(self.modelPath, map_location="cpu")
- sd = pl_sd["state_dict"]
- config = OmegaConf.load(self.yamlPath)
- model = instantiate_from_config(config.model)
- model.load_state_dict(sd, strict=False)
- model.cuda()
- if half_attention:
- model = model.half()
-
- model.eval()
- return {"model": model}
-
- def __init__(self, model_path, yaml_path):
- self.modelPath = model_path
- self.yamlPath = yaml_path
-
- @staticmethod
- def run(model, selected_path, custom_steps, eta):
- example = get_cond(selected_path)
-
- n_runs = 1
- guider = None
- ckwargs = None
- ddim_use_x0_pred = False
- temperature = 1.
- eta = eta
- custom_shape = None
-
- height, width = example["image"].shape[1:3]
- split_input = height >= 128 and width >= 128
-
- if split_input:
- ks = 128
- stride = 64
- vqf = 4 #
- model.split_input_params = {"ks": (ks, ks), "stride": (stride, stride),
- "vqf": vqf,
- "patch_distributed_vq": True,
- "tie_braker": False,
- "clip_max_weight": 0.5,
- "clip_min_weight": 0.01,
- "clip_max_tie_weight": 0.5,
- "clip_min_tie_weight": 0.01}
- else:
- if hasattr(model, "split_input_params"):
- delattr(model, "split_input_params")
-
- x_t = None
- logs = None
- for n in range(n_runs):
- if custom_shape is not None:
- x_t = torch.randn(1, custom_shape[1], custom_shape[2], custom_shape[3]).to(model.device)
- x_t = repeat(x_t, '1 c h w -> b c h w', b=custom_shape[0])
-
- logs = make_convolutional_sample(example, model,
- custom_steps=custom_steps,
- eta=eta, quantize_x0=False,
- custom_shape=custom_shape,
- temperature=temperature, noise_dropout=0.,
- corrector=guider, corrector_kwargs=ckwargs, x_T=x_t,
- ddim_use_x0_pred=ddim_use_x0_pred
- )
- return logs
-
- def super_resolution(self, image, steps=100, target_scale=2, half_attention=False):
- model = self.load_model_from_config(half_attention)
-
- # Run settings
- diffusion_steps = int(steps)
- eta = 1.0
-
- down_sample_method = 'Lanczos'
-
- gc.collect()
- torch.cuda.empty_cache()
-
- im_og = image
- width_og, height_og = im_og.size
- # If we can adjust the max upscale size, then the 4 below should be our variable
- down_sample_rate = target_scale / 4
- wd = width_og * down_sample_rate
- hd = height_og * down_sample_rate
- width_downsampled_pre = int(np.ceil(wd))
- height_downsampled_pre = int(np.ceil(hd))
-
- if down_sample_rate != 1:
- print(
- f'Downsampling from [{width_og}, {height_og}] to [{width_downsampled_pre}, {height_downsampled_pre}]')
- im_og = im_og.resize((width_downsampled_pre, height_downsampled_pre), Image.LANCZOS)
- else:
- print(f"Down sample rate is 1 from {target_scale} / 4 (Not downsampling)")
-
- # pad width and height to multiples of 64, pads with the edge values of image to avoid artifacts
- pad_w, pad_h = np.max(((2, 2), np.ceil(np.array(im_og.size) / 64).astype(int)), axis=0) * 64 - im_og.size
- im_padded = Image.fromarray(np.pad(np.array(im_og), ((0, pad_h), (0, pad_w), (0, 0)), mode='edge'))
-
- logs = self.run(model["model"], im_padded, diffusion_steps, eta)
-
- sample = logs["sample"]
- sample = sample.detach().cpu()
- sample = torch.clamp(sample, -1., 1.)
- sample = (sample + 1.) / 2. * 255
- sample = sample.numpy().astype(np.uint8)
- sample = np.transpose(sample, (0, 2, 3, 1))
- a = Image.fromarray(sample[0])
-
- # remove padding
- a = a.crop((0, 0) + tuple(np.array(im_og.size) * 4))
-
- del model
- gc.collect()
- torch.cuda.empty_cache()
- return a
-
-
-def get_cond(selected_path):
- example = dict()
- up_f = 4
- c = selected_path.convert('RGB')
- c = torch.unsqueeze(torchvision.transforms.ToTensor()(c), 0)
- c_up = torchvision.transforms.functional.resize(c, size=[up_f * c.shape[2], up_f * c.shape[3]],
- antialias=True)
- c_up = rearrange(c_up, '1 c h w -> 1 h w c')
- c = rearrange(c, '1 c h w -> 1 h w c')
- c = 2. * c - 1.
-
- c = c.to(torch.device("cuda"))
- example["LR_image"] = c
- example["image"] = c_up
-
- return example
-
-
-@torch.no_grad()
-def convsample_ddim(model, cond, steps, shape, eta=1.0, callback=None, normals_sequence=None,
- mask=None, x0=None, quantize_x0=False, temperature=1., score_corrector=None,
- corrector_kwargs=None, x_t=None
- ):
- ddim = DDIMSampler(model)
- bs = shape[0]
- shape = shape[1:]
- print(f"Sampling with eta = {eta}; steps: {steps}")
- samples, intermediates = ddim.sample(steps, batch_size=bs, shape=shape, conditioning=cond, callback=callback,
- normals_sequence=normals_sequence, quantize_x0=quantize_x0, eta=eta,
- mask=mask, x0=x0, temperature=temperature, verbose=False,
- score_corrector=score_corrector,
- corrector_kwargs=corrector_kwargs, x_t=x_t)
-
- return samples, intermediates
-
-
-@torch.no_grad()
-def make_convolutional_sample(batch, model, custom_steps=None, eta=1.0, quantize_x0=False, custom_shape=None, temperature=1., noise_dropout=0., corrector=None,
- corrector_kwargs=None, x_T=None, ddim_use_x0_pred=False):
- log = dict()
-
- z, c, x, xrec, xc = model.get_input(batch, model.first_stage_key,
- return_first_stage_outputs=True,
- force_c_encode=not (hasattr(model, 'split_input_params')
- and model.cond_stage_key == 'coordinates_bbox'),
- return_original_cond=True)
-
- if custom_shape is not None:
- z = torch.randn(custom_shape)
- print(f"Generating {custom_shape[0]} samples of shape {custom_shape[1:]}")
-
- z0 = None
-
- log["input"] = x
- log["reconstruction"] = xrec
-
- if ismap(xc):
- log["original_conditioning"] = model.to_rgb(xc)
- if hasattr(model, 'cond_stage_key'):
- log[model.cond_stage_key] = model.to_rgb(xc)
-
- else:
- log["original_conditioning"] = xc if xc is not None else torch.zeros_like(x)
- if model.cond_stage_model:
- log[model.cond_stage_key] = xc if xc is not None else torch.zeros_like(x)
- if model.cond_stage_key == 'class_label':
- log[model.cond_stage_key] = xc[model.cond_stage_key]
-
- with model.ema_scope("Plotting"):
- t0 = time.time()
-
- sample, intermediates = convsample_ddim(model, c, steps=custom_steps, shape=z.shape,
- eta=eta,
- quantize_x0=quantize_x0, mask=None, x0=z0,
- temperature=temperature, score_corrector=corrector, corrector_kwargs=corrector_kwargs,
- x_t=x_T)
- t1 = time.time()
-
- if ddim_use_x0_pred:
- sample = intermediates['pred_x0'][-1]
-
- x_sample = model.decode_first_stage(sample)
-
- try:
- x_sample_noquant = model.decode_first_stage(sample, force_not_quantize=True)
- log["sample_noquant"] = x_sample_noquant
- log["sample_diff"] = torch.abs(x_sample_noquant - x_sample)
- except:
- pass
-
- log["sample"] = x_sample
- log["time"] = t1 - t0
-
- return log
diff --git a/modules/modelloader.py b/modules/modelloader.py
index 7d2f0ade..e647f6fa 100644
--- a/modules/modelloader.py
+++ b/modules/modelloader.py
@@ -124,10 +124,9 @@ def move_files(src_path: str, dest_path: str, ext_filter: str = None):
def load_upscalers():
- sd = shared.script_path
# We can only do this 'magic' method to dynamically load upscalers if they are referenced,
# so we'll try to import any _model.py files before looking in __subclasses__
- modules_dir = os.path.join(sd, "modules")
+ modules_dir = os.path.join(shared.script_path, "modules")
for file in os.listdir(modules_dir):
if "_model.py" in file:
model_name = file.replace("_model.py", "")
@@ -136,22 +135,13 @@ def load_upscalers():
importlib.import_module(full_model)
except:
pass
+
datas = []
- c_o = vars(shared.cmd_opts)
+ commandline_options = vars(shared.cmd_opts)
for cls in Upscaler.__subclasses__():
name = cls.__name__
- module_name = cls.__module__
- module = importlib.import_module(module_name)
- class_ = getattr(module, name)
cmd_name = f"{name.lower().replace('upscaler', '')}_models_path"
- opt_string = None
- try:
- if cmd_name in c_o:
- opt_string = c_o[cmd_name]
- except:
- pass
- scaler = class_(opt_string)
- for child in scaler.scalers:
- datas.append(child)
+ scaler = cls(commandline_options.get(cmd_name, None))
+ datas += scaler.scalers
shared.sd_upscalers = datas
diff --git a/modules/scunet_model.py b/modules/scunet_model.py
deleted file mode 100644
index 52360241..00000000
--- a/modules/scunet_model.py
+++ /dev/null
@@ -1,87 +0,0 @@
-import os.path
-import sys
-import traceback
-
-import PIL.Image
-import numpy as np
-import torch
-from basicsr.utils.download_util import load_file_from_url
-
-import modules.upscaler
-from modules import devices, modelloader
-from modules.scunet_model_arch import SCUNet as net
-
-
-class UpscalerScuNET(modules.upscaler.Upscaler):
- def __init__(self, dirname):
- self.name = "ScuNET"
- self.model_name = "ScuNET GAN"
- self.model_name2 = "ScuNET PSNR"
- self.model_url = "https://github.com/cszn/KAIR/releases/download/v1.0/scunet_color_real_gan.pth"
- self.model_url2 = "https://github.com/cszn/KAIR/releases/download/v1.0/scunet_color_real_psnr.pth"
- self.user_path = dirname
- super().__init__()
- model_paths = self.find_models(ext_filter=[".pth"])
- scalers = []
- add_model2 = True
- for file in model_paths:
- if "http" in file:
- name = self.model_name
- else:
- name = modelloader.friendly_name(file)
- if name == self.model_name2 or file == self.model_url2:
- add_model2 = False
- try:
- scaler_data = modules.upscaler.UpscalerData(name, file, self, 4)
- scalers.append(scaler_data)
- except Exception:
- print(f"Error loading ScuNET model: {file}", file=sys.stderr)
- print(traceback.format_exc(), file=sys.stderr)
- if add_model2:
- scaler_data2 = modules.upscaler.UpscalerData(self.model_name2, self.model_url2, self)
- scalers.append(scaler_data2)
- self.scalers = scalers
-
- def do_upscale(self, img: PIL.Image, selected_file):
- torch.cuda.empty_cache()
-
- model = self.load_model(selected_file)
- if model is None:
- return img
-
- device = devices.device_scunet
- img = np.array(img)
- img = img[:, :, ::-1]
- img = np.moveaxis(img, 2, 0) / 255
- img = torch.from_numpy(img).float()
- img = img.unsqueeze(0).to(device)
-
- with torch.no_grad():
- output = model(img)
- output = output.squeeze().float().cpu().clamp_(0, 1).numpy()
- output = 255. * np.moveaxis(output, 0, 2)
- output = output.astype(np.uint8)
- output = output[:, :, ::-1]
- torch.cuda.empty_cache()
- return PIL.Image.fromarray(output, 'RGB')
-
- def load_model(self, path: str):
- device = devices.device_scunet
- if "http" in path:
- filename = load_file_from_url(url=self.model_url, model_dir=self.model_path, file_name="%s.pth" % self.name,
- progress=True)
- else:
- filename = path
- if not os.path.exists(os.path.join(self.model_path, filename)) or filename is None:
- print(f"ScuNET: Unable to load model from {filename}", file=sys.stderr)
- return None
-
- model = net(in_nc=3, config=[4, 4, 4, 4, 4, 4, 4], dim=64)
- model.load_state_dict(torch.load(filename), strict=True)
- model.eval()
- for k, v in model.named_parameters():
- v.requires_grad = False
- model = model.to(device)
-
- return model
-
diff --git a/modules/scunet_model_arch.py b/modules/scunet_model_arch.py
deleted file mode 100644
index 43ca8d36..00000000
--- a/modules/scunet_model_arch.py
+++ /dev/null
@@ -1,265 +0,0 @@
-# -*- coding: utf-8 -*-
-import numpy as np
-import torch
-import torch.nn as nn
-from einops import rearrange
-from einops.layers.torch import Rearrange
-from timm.models.layers import trunc_normal_, DropPath
-
-
-class WMSA(nn.Module):
- """ Self-attention module in Swin Transformer
- """
-
- def __init__(self, input_dim, output_dim, head_dim, window_size, type):
- super(WMSA, self).__init__()
- self.input_dim = input_dim
- self.output_dim = output_dim
- self.head_dim = head_dim
- self.scale = self.head_dim ** -0.5
- self.n_heads = input_dim // head_dim
- self.window_size = window_size
- self.type = type
- self.embedding_layer = nn.Linear(self.input_dim, 3 * self.input_dim, bias=True)
-
- self.relative_position_params = nn.Parameter(
- torch.zeros((2 * window_size - 1) * (2 * window_size - 1), self.n_heads))
-
- self.linear = nn.Linear(self.input_dim, self.output_dim)
-
- trunc_normal_(self.relative_position_params, std=.02)
- self.relative_position_params = torch.nn.Parameter(
- self.relative_position_params.view(2 * window_size - 1, 2 * window_size - 1, self.n_heads).transpose(1,
- 2).transpose(
- 0, 1))
-
- def generate_mask(self, h, w, p, shift):
- """ generating the mask of SW-MSA
- Args:
- shift: shift parameters in CyclicShift.
- Returns:
- attn_mask: should be (1 1 w p p),
- """
- # supporting square.
- attn_mask = torch.zeros(h, w, p, p, p, p, dtype=torch.bool, device=self.relative_position_params.device)
- if self.type == 'W':
- return attn_mask
-
- s = p - shift
- attn_mask[-1, :, :s, :, s:, :] = True
- attn_mask[-1, :, s:, :, :s, :] = True
- attn_mask[:, -1, :, :s, :, s:] = True
- attn_mask[:, -1, :, s:, :, :s] = True
- attn_mask = rearrange(attn_mask, 'w1 w2 p1 p2 p3 p4 -> 1 1 (w1 w2) (p1 p2) (p3 p4)')
- return attn_mask
-
- def forward(self, x):
- """ Forward pass of Window Multi-head Self-attention module.
- Args:
- x: input tensor with shape of [b h w c];
- attn_mask: attention mask, fill -inf where the value is True;
- Returns:
- output: tensor shape [b h w c]
- """
- if self.type != 'W': x = torch.roll(x, shifts=(-(self.window_size // 2), -(self.window_size // 2)), dims=(1, 2))
- x = rearrange(x, 'b (w1 p1) (w2 p2) c -> b w1 w2 p1 p2 c', p1=self.window_size, p2=self.window_size)
- h_windows = x.size(1)
- w_windows = x.size(2)
- # square validation
- # assert h_windows == w_windows
-
- x = rearrange(x, 'b w1 w2 p1 p2 c -> b (w1 w2) (p1 p2) c', p1=self.window_size, p2=self.window_size)
- qkv = self.embedding_layer(x)
- q, k, v = rearrange(qkv, 'b nw np (threeh c) -> threeh b nw np c', c=self.head_dim).chunk(3, dim=0)
- sim = torch.einsum('hbwpc,hbwqc->hbwpq', q, k) * self.scale
- # Adding learnable relative embedding
- sim = sim + rearrange(self.relative_embedding(), 'h p q -> h 1 1 p q')
- # Using Attn Mask to distinguish different subwindows.
- if self.type != 'W':
- attn_mask = self.generate_mask(h_windows, w_windows, self.window_size, shift=self.window_size // 2)
- sim = sim.masked_fill_(attn_mask, float("-inf"))
-
- probs = nn.functional.softmax(sim, dim=-1)
- output = torch.einsum('hbwij,hbwjc->hbwic', probs, v)
- output = rearrange(output, 'h b w p c -> b w p (h c)')
- output = self.linear(output)
- output = rearrange(output, 'b (w1 w2) (p1 p2) c -> b (w1 p1) (w2 p2) c', w1=h_windows, p1=self.window_size)
-
- if self.type != 'W': output = torch.roll(output, shifts=(self.window_size // 2, self.window_size // 2),
- dims=(1, 2))
- return output
-
- def relative_embedding(self):
- cord = torch.tensor(np.array([[i, j] for i in range(self.window_size) for j in range(self.window_size)]))
- relation = cord[:, None, :] - cord[None, :, :] + self.window_size - 1
- # negative is allowed
- return self.relative_position_params[:, relation[:, :, 0].long(), relation[:, :, 1].long()]
-
-
-class Block(nn.Module):
- def __init__(self, input_dim, output_dim, head_dim, window_size, drop_path, type='W', input_resolution=None):
- """ SwinTransformer Block
- """
- super(Block, self).__init__()
- self.input_dim = input_dim
- self.output_dim = output_dim
- assert type in ['W', 'SW']
- self.type = type
- if input_resolution <= window_size:
- self.type = 'W'
-
- self.ln1 = nn.LayerNorm(input_dim)
- self.msa = WMSA(input_dim, input_dim, head_dim, window_size, self.type)
- self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
- self.ln2 = nn.LayerNorm(input_dim)
- self.mlp = nn.Sequential(
- nn.Linear(input_dim, 4 * input_dim),
- nn.GELU(),
- nn.Linear(4 * input_dim, output_dim),
- )
-
- def forward(self, x):
- x = x + self.drop_path(self.msa(self.ln1(x)))
- x = x + self.drop_path(self.mlp(self.ln2(x)))
- return x
-
-
-class ConvTransBlock(nn.Module):
- def __init__(self, conv_dim, trans_dim, head_dim, window_size, drop_path, type='W', input_resolution=None):
- """ SwinTransformer and Conv Block
- """
- super(ConvTransBlock, self).__init__()
- self.conv_dim = conv_dim
- self.trans_dim = trans_dim
- self.head_dim = head_dim
- self.window_size = window_size
- self.drop_path = drop_path
- self.type = type
- self.input_resolution = input_resolution
-
- assert self.type in ['W', 'SW']
- if self.input_resolution <= self.window_size:
- self.type = 'W'
-
- self.trans_block = Block(self.trans_dim, self.trans_dim, self.head_dim, self.window_size, self.drop_path,
- self.type, self.input_resolution)
- self.conv1_1 = nn.Conv2d(self.conv_dim + self.trans_dim, self.conv_dim + self.trans_dim, 1, 1, 0, bias=True)
- self.conv1_2 = nn.Conv2d(self.conv_dim + self.trans_dim, self.conv_dim + self.trans_dim, 1, 1, 0, bias=True)
-
- self.conv_block = nn.Sequential(
- nn.Conv2d(self.conv_dim, self.conv_dim, 3, 1, 1, bias=False),
- nn.ReLU(True),
- nn.Conv2d(self.conv_dim, self.conv_dim, 3, 1, 1, bias=False)
- )
-
- def forward(self, x):
- conv_x, trans_x = torch.split(self.conv1_1(x), (self.conv_dim, self.trans_dim), dim=1)
- conv_x = self.conv_block(conv_x) + conv_x
- trans_x = Rearrange('b c h w -> b h w c')(trans_x)
- trans_x = self.trans_block(trans_x)
- trans_x = Rearrange('b h w c -> b c h w')(trans_x)
- res = self.conv1_2(torch.cat((conv_x, trans_x), dim=1))
- x = x + res
-
- return x
-
-
-class SCUNet(nn.Module):
- # def __init__(self, in_nc=3, config=[2, 2, 2, 2, 2, 2, 2], dim=64, drop_path_rate=0.0, input_resolution=256):
- def __init__(self, in_nc=3, config=None, dim=64, drop_path_rate=0.0, input_resolution=256):
- super(SCUNet, self).__init__()
- if config is None:
- config = [2, 2, 2, 2, 2, 2, 2]
- self.config = config
- self.dim = dim
- self.head_dim = 32
- self.window_size = 8
-
- # drop path rate for each layer
- dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(config))]
-
- self.m_head = [nn.Conv2d(in_nc, dim, 3, 1, 1, bias=False)]
-
- begin = 0
- self.m_down1 = [ConvTransBlock(dim // 2, dim // 2, self.head_dim, self.window_size, dpr[i + begin],
- 'W' if not i % 2 else 'SW', input_resolution)
- for i in range(config[0])] + \
- [nn.Conv2d(dim, 2 * dim, 2, 2, 0, bias=False)]
-
- begin += config[0]
- self.m_down2 = [ConvTransBlock(dim, dim, self.head_dim, self.window_size, dpr[i + begin],
- 'W' if not i % 2 else 'SW', input_resolution // 2)
- for i in range(config[1])] + \
- [nn.Conv2d(2 * dim, 4 * dim, 2, 2, 0, bias=False)]
-
- begin += config[1]
- self.m_down3 = [ConvTransBlock(2 * dim, 2 * dim, self.head_dim, self.window_size, dpr[i + begin],
- 'W' if not i % 2 else 'SW', input_resolution // 4)
- for i in range(config[2])] + \
- [nn.Conv2d(4 * dim, 8 * dim, 2, 2, 0, bias=False)]
-
- begin += config[2]
- self.m_body = [ConvTransBlock(4 * dim, 4 * dim, self.head_dim, self.window_size, dpr[i + begin],
- 'W' if not i % 2 else 'SW', input_resolution // 8)
- for i in range(config[3])]
-
- begin += config[3]
- self.m_up3 = [nn.ConvTranspose2d(8 * dim, 4 * dim, 2, 2, 0, bias=False), ] + \
- [ConvTransBlock(2 * dim, 2 * dim, self.head_dim, self.window_size, dpr[i + begin],
- 'W' if not i % 2 else 'SW', input_resolution // 4)
- for i in range(config[4])]
-
- begin += config[4]
- self.m_up2 = [nn.ConvTranspose2d(4 * dim, 2 * dim, 2, 2, 0, bias=False), ] + \
- [ConvTransBlock(dim, dim, self.head_dim, self.window_size, dpr[i + begin],
- 'W' if not i % 2 else 'SW', input_resolution // 2)
- for i in range(config[5])]
-
- begin += config[5]
- self.m_up1 = [nn.ConvTranspose2d(2 * dim, dim, 2, 2, 0, bias=False), ] + \
- [ConvTransBlock(dim // 2, dim // 2, self.head_dim, self.window_size, dpr[i + begin],
- 'W' if not i % 2 else 'SW', input_resolution)
- for i in range(config[6])]
-
- self.m_tail = [nn.Conv2d(dim, in_nc, 3, 1, 1, bias=False)]
-
- self.m_head = nn.Sequential(*self.m_head)
- self.m_down1 = nn.Sequential(*self.m_down1)
- self.m_down2 = nn.Sequential(*self.m_down2)
- self.m_down3 = nn.Sequential(*self.m_down3)
- self.m_body = nn.Sequential(*self.m_body)
- self.m_up3 = nn.Sequential(*self.m_up3)
- self.m_up2 = nn.Sequential(*self.m_up2)
- self.m_up1 = nn.Sequential(*self.m_up1)
- self.m_tail = nn.Sequential(*self.m_tail)
- # self.apply(self._init_weights)
-
- def forward(self, x0):
-
- h, w = x0.size()[-2:]
- paddingBottom = int(np.ceil(h / 64) * 64 - h)
- paddingRight = int(np.ceil(w / 64) * 64 - w)
- x0 = nn.ReplicationPad2d((0, paddingRight, 0, paddingBottom))(x0)
-
- x1 = self.m_head(x0)
- x2 = self.m_down1(x1)
- x3 = self.m_down2(x2)
- x4 = self.m_down3(x3)
- x = self.m_body(x4)
- x = self.m_up3(x + x4)
- x = self.m_up2(x + x3)
- x = self.m_up1(x + x2)
- x = self.m_tail(x + x1)
-
- x = x[..., :h, :w]
-
- return x
-
- def _init_weights(self, m):
- if isinstance(m, nn.Linear):
- trunc_normal_(m.weight, std=.02)
- if m.bias is not None:
- nn.init.constant_(m.bias, 0)
- elif isinstance(m, nn.LayerNorm):
- nn.init.constant_(m.bias, 0)
- nn.init.constant_(m.weight, 1.0)
\ No newline at end of file
diff --git a/modules/shared.py b/modules/shared.py
index 8202d8e5..dc45fcaa 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -50,9 +50,6 @@ parser.add_argument("--gfpgan-models-path", type=str, help="Path to directory wi
parser.add_argument("--esrgan-models-path", type=str, help="Path to directory with ESRGAN model file(s).", default=os.path.join(models_path, 'ESRGAN'))
parser.add_argument("--bsrgan-models-path", type=str, help="Path to directory with BSRGAN model file(s).", default=os.path.join(models_path, 'BSRGAN'))
parser.add_argument("--realesrgan-models-path", type=str, help="Path to directory with RealESRGAN model file(s).", default=os.path.join(models_path, 'RealESRGAN'))
-parser.add_argument("--scunet-models-path", type=str, help="Path to directory with ScuNET model file(s).", default=os.path.join(models_path, 'ScuNET'))
-parser.add_argument("--swinir-models-path", type=str, help="Path to directory with SwinIR model file(s).", default=os.path.join(models_path, 'SwinIR'))
-parser.add_argument("--ldsr-models-path", type=str, help="Path to directory with LDSR model file(s).", default=os.path.join(models_path, 'LDSR'))
parser.add_argument("--clip-models-path", type=str, help="Path to directory with CLIP model file(s).", default=None)
parser.add_argument("--xformers", action='store_true', help="enable xformers for cross attention layers")
parser.add_argument("--force-enable-xformers", action='store_true', help="enable xformers for cross attention layers regardless of whether the checking code thinks you can run it; do not make bug reports if this fails to work")
@@ -61,7 +58,7 @@ parser.add_argument("--opt-split-attention", action='store_true', help="force-en
parser.add_argument("--opt-split-attention-invokeai", action='store_true', help="force-enables InvokeAI's cross-attention layer optimization. By default, it's on when cuda is unavailable.")
parser.add_argument("--opt-split-attention-v1", action='store_true', help="enable older version of split attention optimization that does not consume all the VRAM it can find")
parser.add_argument("--disable-opt-split-attention", action='store_true', help="force-disables cross-attention layer optimization")
-parser.add_argument("--use-cpu", nargs='+',choices=['all', 'sd', 'interrogate', 'gfpgan', 'swinir', 'esrgan', 'scunet', 'codeformer'], help="use CPU as torch device for specified modules", default=[], type=str.lower)
+parser.add_argument("--use-cpu", nargs='+', help="use CPU as torch device for specified modules", default=[], type=str.lower)
parser.add_argument("--listen", action='store_true', help="launch gradio with 0.0.0.0 as server name, allowing to respond to network requests")
parser.add_argument("--port", type=int, help="launch gradio with given server port, you need root/admin rights for ports < 1024, defaults to 7860 if available", default=None)
parser.add_argument("--show-negative-prompt", action='store_true', help="does not do anything", default=False)
@@ -95,6 +92,7 @@ parser.add_argument("--tls-certfile", type=str, help="Partially enables TLS, req
parser.add_argument("--server-name", type=str, help="Sets hostname of server", default=None)
script_loading.preload_extensions(extensions.extensions_dir, parser)
+script_loading.preload_extensions(extensions.extensions_builtin_dir, parser)
cmd_opts = parser.parse_args()
@@ -112,8 +110,8 @@ restricted_opts = {
cmd_opts.disable_extension_access = (cmd_opts.share or cmd_opts.listen or cmd_opts.server_name) and not cmd_opts.enable_insecure_extension_access
-devices.device, devices.device_interrogate, devices.device_gfpgan, devices.device_swinir, devices.device_esrgan, devices.device_scunet, devices.device_codeformer = \
-(devices.cpu if any(y in cmd_opts.use_cpu for y in [x, 'all']) else devices.get_optimal_device() for x in ['sd', 'interrogate', 'gfpgan', 'swinir', 'esrgan', 'scunet', 'codeformer'])
+devices.device, devices.device_interrogate, devices.device_gfpgan, devices.device_esrgan, devices.device_codeformer = \
+ (devices.cpu if any(y in cmd_opts.use_cpu for y in [x, 'all']) else devices.get_optimal_device() for x in ['sd', 'interrogate', 'gfpgan', 'esrgan', 'codeformer'])
device = devices.device
weight_load_location = None if cmd_opts.lowram else "cpu"
@@ -326,9 +324,6 @@ options_templates.update(options_section(('upscaling', "Upscaling"), {
"ESRGAN_tile": OptionInfo(192, "Tile size for ESRGAN upscalers. 0 = no tiling.", gr.Slider, {"minimum": 0, "maximum": 512, "step": 16}),
"ESRGAN_tile_overlap": OptionInfo(8, "Tile overlap, in pixels for ESRGAN upscalers. Low values = visible seam.", gr.Slider, {"minimum": 0, "maximum": 48, "step": 1}),
"realesrgan_enabled_models": OptionInfo(["R-ESRGAN 4x+", "R-ESRGAN 4x+ Anime6B"], "Select which Real-ESRGAN models to show in the web UI. (Requires restart)", gr.CheckboxGroup, lambda: {"choices": realesrgan_models_names()}),
- "SWIN_tile": OptionInfo(192, "Tile size for all SwinIR.", gr.Slider, {"minimum": 16, "maximum": 512, "step": 16}),
- "SWIN_tile_overlap": OptionInfo(8, "Tile overlap, in pixels for SwinIR. Low values = visible seam.", gr.Slider, {"minimum": 0, "maximum": 48, "step": 1}),
- "ldsr_steps": OptionInfo(100, "LDSR processing steps. Lower = faster", gr.Slider, {"minimum": 1, "maximum": 200, "step": 1}),
"upscaler_for_img2img": OptionInfo(None, "Upscaler for img2img", gr.Dropdown, lambda: {"choices": [x.name for x in sd_upscalers]}),
"use_scale_latent_for_hires_fix": OptionInfo(False, "Upscale latent space image when doing hires. fix"),
}))
diff --git a/modules/swinir_model.py b/modules/swinir_model.py
deleted file mode 100644
index 483eabd4..00000000
--- a/modules/swinir_model.py
+++ /dev/null
@@ -1,157 +0,0 @@
-import contextlib
-import os
-
-import numpy as np
-import torch
-from PIL import Image
-from basicsr.utils.download_util import load_file_from_url
-from tqdm import tqdm
-
-from modules import modelloader, devices
-from modules.shared import cmd_opts, opts
-from modules.swinir_model_arch import SwinIR as net
-from modules.swinir_model_arch_v2 import Swin2SR as net2
-from modules.upscaler import Upscaler, UpscalerData
-
-
-class UpscalerSwinIR(Upscaler):
- def __init__(self, dirname):
- self.name = "SwinIR"
- self.model_url = "https://github.com/JingyunLiang/SwinIR/releases/download/v0.0" \
- "/003_realSR_BSRGAN_DFOWMFC_s64w8_SwinIR" \
- "-L_x4_GAN.pth "
- self.model_name = "SwinIR 4x"
- self.user_path = dirname
- super().__init__()
- scalers = []
- model_files = self.find_models(ext_filter=[".pt", ".pth"])
- for model in model_files:
- if "http" in model:
- name = self.model_name
- else:
- name = modelloader.friendly_name(model)
- model_data = UpscalerData(name, model, self)
- scalers.append(model_data)
- self.scalers = scalers
-
- def do_upscale(self, img, model_file):
- model = self.load_model(model_file)
- if model is None:
- return img
- model = model.to(devices.device_swinir)
- img = upscale(img, model)
- try:
- torch.cuda.empty_cache()
- except:
- pass
- return img
-
- def load_model(self, path, scale=4):
- if "http" in path:
- dl_name = "%s%s" % (self.model_name.replace(" ", "_"), ".pth")
- filename = load_file_from_url(url=path, model_dir=self.model_path, file_name=dl_name, progress=True)
- else:
- filename = path
- if filename is None or not os.path.exists(filename):
- return None
- if filename.endswith(".v2.pth"):
- model = net2(
- upscale=scale,
- in_chans=3,
- img_size=64,
- window_size=8,
- img_range=1.0,
- depths=[6, 6, 6, 6, 6, 6],
- embed_dim=180,
- num_heads=[6, 6, 6, 6, 6, 6],
- mlp_ratio=2,
- upsampler="nearest+conv",
- resi_connection="1conv",
- )
- params = None
- else:
- model = net(
- upscale=scale,
- in_chans=3,
- img_size=64,
- window_size=8,
- img_range=1.0,
- depths=[6, 6, 6, 6, 6, 6, 6, 6, 6],
- embed_dim=240,
- num_heads=[8, 8, 8, 8, 8, 8, 8, 8, 8],
- mlp_ratio=2,
- upsampler="nearest+conv",
- resi_connection="3conv",
- )
- params = "params_ema"
-
- pretrained_model = torch.load(filename)
- if params is not None:
- model.load_state_dict(pretrained_model[params], strict=True)
- else:
- model.load_state_dict(pretrained_model, strict=True)
- if not cmd_opts.no_half:
- model = model.half()
- return model
-
-
-def upscale(
- img,
- model,
- tile=opts.SWIN_tile,
- tile_overlap=opts.SWIN_tile_overlap,
- window_size=8,
- scale=4,
-):
- img = np.array(img)
- img = img[:, :, ::-1]
- img = np.moveaxis(img, 2, 0) / 255
- img = torch.from_numpy(img).float()
- img = img.unsqueeze(0).to(devices.device_swinir)
- with torch.no_grad(), devices.autocast():
- _, _, h_old, w_old = img.size()
- h_pad = (h_old // window_size + 1) * window_size - h_old
- w_pad = (w_old // window_size + 1) * window_size - w_old
- img = torch.cat([img, torch.flip(img, [2])], 2)[:, :, : h_old + h_pad, :]
- img = torch.cat([img, torch.flip(img, [3])], 3)[:, :, :, : w_old + w_pad]
- output = inference(img, model, tile, tile_overlap, window_size, scale)
- output = output[..., : h_old * scale, : w_old * scale]
- output = output.data.squeeze().float().cpu().clamp_(0, 1).numpy()
- if output.ndim == 3:
- output = np.transpose(
- output[[2, 1, 0], :, :], (1, 2, 0)
- ) # CHW-RGB to HCW-BGR
- output = (output * 255.0).round().astype(np.uint8) # float32 to uint8
- return Image.fromarray(output, "RGB")
-
-
-def inference(img, model, tile, tile_overlap, window_size, scale):
- # test the image tile by tile
- b, c, h, w = img.size()
- tile = min(tile, h, w)
- assert tile % window_size == 0, "tile size should be a multiple of window_size"
- sf = scale
-
- stride = tile - tile_overlap
- h_idx_list = list(range(0, h - tile, stride)) + [h - tile]
- w_idx_list = list(range(0, w - tile, stride)) + [w - tile]
- E = torch.zeros(b, c, h * sf, w * sf, dtype=torch.half, device=devices.device_swinir).type_as(img)
- W = torch.zeros_like(E, dtype=torch.half, device=devices.device_swinir)
-
- with tqdm(total=len(h_idx_list) * len(w_idx_list), desc="SwinIR tiles") as pbar:
- for h_idx in h_idx_list:
- for w_idx in w_idx_list:
- in_patch = img[..., h_idx: h_idx + tile, w_idx: w_idx + tile]
- out_patch = model(in_patch)
- out_patch_mask = torch.ones_like(out_patch)
-
- E[
- ..., h_idx * sf: (h_idx + tile) * sf, w_idx * sf: (w_idx + tile) * sf
- ].add_(out_patch)
- W[
- ..., h_idx * sf: (h_idx + tile) * sf, w_idx * sf: (w_idx + tile) * sf
- ].add_(out_patch_mask)
- pbar.update(1)
- output = E.div_(W)
-
- return output
diff --git a/modules/swinir_model_arch.py b/modules/swinir_model_arch.py
deleted file mode 100644
index 863f42db..00000000
--- a/modules/swinir_model_arch.py
+++ /dev/null
@@ -1,867 +0,0 @@
-# -----------------------------------------------------------------------------------
-# SwinIR: Image Restoration Using Swin Transformer, https://arxiv.org/abs/2108.10257
-# Originally Written by Ze Liu, Modified by Jingyun Liang.
-# -----------------------------------------------------------------------------------
-
-import math
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-import torch.utils.checkpoint as checkpoint
-from timm.models.layers import DropPath, to_2tuple, trunc_normal_
-
-
-class Mlp(nn.Module):
- def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
- super().__init__()
- out_features = out_features or in_features
- hidden_features = hidden_features or in_features
- self.fc1 = nn.Linear(in_features, hidden_features)
- self.act = act_layer()
- self.fc2 = nn.Linear(hidden_features, out_features)
- self.drop = nn.Dropout(drop)
-
- def forward(self, x):
- x = self.fc1(x)
- x = self.act(x)
- x = self.drop(x)
- x = self.fc2(x)
- x = self.drop(x)
- return x
-
-
-def window_partition(x, window_size):
- """
- Args:
- x: (B, H, W, C)
- window_size (int): window size
-
- Returns:
- windows: (num_windows*B, window_size, window_size, C)
- """
- B, H, W, C = x.shape
- x = x.view(B, H // window_size, window_size, W // window_size, window_size, C)
- windows = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(-1, window_size, window_size, C)
- return windows
-
-
-def window_reverse(windows, window_size, H, W):
- """
- Args:
- windows: (num_windows*B, window_size, window_size, C)
- window_size (int): Window size
- H (int): Height of image
- W (int): Width of image
-
- Returns:
- x: (B, H, W, C)
- """
- B = int(windows.shape[0] / (H * W / window_size / window_size))
- x = windows.view(B, H // window_size, W // window_size, window_size, window_size, -1)
- x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1)
- return x
-
-
-class WindowAttention(nn.Module):
- r""" Window based multi-head self attention (W-MSA) module with relative position bias.
- It supports both of shifted and non-shifted window.
-
- Args:
- dim (int): Number of input channels.
- window_size (tuple[int]): The height and width of the window.
- num_heads (int): Number of attention heads.
- qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True
- qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set
- attn_drop (float, optional): Dropout ratio of attention weight. Default: 0.0
- proj_drop (float, optional): Dropout ratio of output. Default: 0.0
- """
-
- def __init__(self, dim, window_size, num_heads, qkv_bias=True, qk_scale=None, attn_drop=0., proj_drop=0.):
-
- super().__init__()
- self.dim = dim
- self.window_size = window_size # Wh, Ww
- self.num_heads = num_heads
- head_dim = dim // num_heads
- self.scale = qk_scale or head_dim ** -0.5
-
- # define a parameter table of relative position bias
- self.relative_position_bias_table = nn.Parameter(
- torch.zeros((2 * window_size[0] - 1) * (2 * window_size[1] - 1), num_heads)) # 2*Wh-1 * 2*Ww-1, nH
-
- # get pair-wise relative position index for each token inside the window
- coords_h = torch.arange(self.window_size[0])
- coords_w = torch.arange(self.window_size[1])
- coords = torch.stack(torch.meshgrid([coords_h, coords_w])) # 2, Wh, Ww
- coords_flatten = torch.flatten(coords, 1) # 2, Wh*Ww
- relative_coords = coords_flatten[:, :, None] - coords_flatten[:, None, :] # 2, Wh*Ww, Wh*Ww
- relative_coords = relative_coords.permute(1, 2, 0).contiguous() # Wh*Ww, Wh*Ww, 2
- relative_coords[:, :, 0] += self.window_size[0] - 1 # shift to start from 0
- relative_coords[:, :, 1] += self.window_size[1] - 1
- relative_coords[:, :, 0] *= 2 * self.window_size[1] - 1
- relative_position_index = relative_coords.sum(-1) # Wh*Ww, Wh*Ww
- self.register_buffer("relative_position_index", relative_position_index)
-
- self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
- self.attn_drop = nn.Dropout(attn_drop)
- self.proj = nn.Linear(dim, dim)
-
- self.proj_drop = nn.Dropout(proj_drop)
-
- trunc_normal_(self.relative_position_bias_table, std=.02)
- self.softmax = nn.Softmax(dim=-1)
-
- def forward(self, x, mask=None):
- """
- Args:
- x: input features with shape of (num_windows*B, N, C)
- mask: (0/-inf) mask with shape of (num_windows, Wh*Ww, Wh*Ww) or None
- """
- B_, N, C = x.shape
- qkv = self.qkv(x).reshape(B_, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
- q, k, v = qkv[0], qkv[1], qkv[2] # make torchscript happy (cannot use tensor as tuple)
-
- q = q * self.scale
- attn = (q @ k.transpose(-2, -1))
-
- relative_position_bias = self.relative_position_bias_table[self.relative_position_index.view(-1)].view(
- self.window_size[0] * self.window_size[1], self.window_size[0] * self.window_size[1], -1) # Wh*Ww,Wh*Ww,nH
- relative_position_bias = relative_position_bias.permute(2, 0, 1).contiguous() # nH, Wh*Ww, Wh*Ww
- attn = attn + relative_position_bias.unsqueeze(0)
-
- if mask is not None:
- nW = mask.shape[0]
- attn = attn.view(B_ // nW, nW, self.num_heads, N, N) + mask.unsqueeze(1).unsqueeze(0)
- attn = attn.view(-1, self.num_heads, N, N)
- attn = self.softmax(attn)
- else:
- attn = self.softmax(attn)
-
- attn = self.attn_drop(attn)
-
- x = (attn @ v).transpose(1, 2).reshape(B_, N, C)
- x = self.proj(x)
- x = self.proj_drop(x)
- return x
-
- def extra_repr(self) -> str:
- return f'dim={self.dim}, window_size={self.window_size}, num_heads={self.num_heads}'
-
- def flops(self, N):
- # calculate flops for 1 window with token length of N
- flops = 0
- # qkv = self.qkv(x)
- flops += N * self.dim * 3 * self.dim
- # attn = (q @ k.transpose(-2, -1))
- flops += self.num_heads * N * (self.dim // self.num_heads) * N
- # x = (attn @ v)
- flops += self.num_heads * N * N * (self.dim // self.num_heads)
- # x = self.proj(x)
- flops += N * self.dim * self.dim
- return flops
-
-
-class SwinTransformerBlock(nn.Module):
- r""" Swin Transformer Block.
-
- Args:
- dim (int): Number of input channels.
- input_resolution (tuple[int]): Input resolution.
- num_heads (int): Number of attention heads.
- window_size (int): Window size.
- shift_size (int): Shift size for SW-MSA.
- mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.
- qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True
- qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set.
- drop (float, optional): Dropout rate. Default: 0.0
- attn_drop (float, optional): Attention dropout rate. Default: 0.0
- drop_path (float, optional): Stochastic depth rate. Default: 0.0
- act_layer (nn.Module, optional): Activation layer. Default: nn.GELU
- norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm
- """
-
- def __init__(self, dim, input_resolution, num_heads, window_size=7, shift_size=0,
- mlp_ratio=4., qkv_bias=True, qk_scale=None, drop=0., attn_drop=0., drop_path=0.,
- act_layer=nn.GELU, norm_layer=nn.LayerNorm):
- super().__init__()
- self.dim = dim
- self.input_resolution = input_resolution
- self.num_heads = num_heads
- self.window_size = window_size
- self.shift_size = shift_size
- self.mlp_ratio = mlp_ratio
- if min(self.input_resolution) <= self.window_size:
- # if window size is larger than input resolution, we don't partition windows
- self.shift_size = 0
- self.window_size = min(self.input_resolution)
- assert 0 <= self.shift_size < self.window_size, "shift_size must in 0-window_size"
-
- self.norm1 = norm_layer(dim)
- self.attn = WindowAttention(
- dim, window_size=to_2tuple(self.window_size), num_heads=num_heads,
- qkv_bias=qkv_bias, qk_scale=qk_scale, attn_drop=attn_drop, proj_drop=drop)
-
- self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
- self.norm2 = norm_layer(dim)
- mlp_hidden_dim = int(dim * mlp_ratio)
- self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)
-
- if self.shift_size > 0:
- attn_mask = self.calculate_mask(self.input_resolution)
- else:
- attn_mask = None
-
- self.register_buffer("attn_mask", attn_mask)
-
- def calculate_mask(self, x_size):
- # calculate attention mask for SW-MSA
- H, W = x_size
- img_mask = torch.zeros((1, H, W, 1)) # 1 H W 1
- h_slices = (slice(0, -self.window_size),
- slice(-self.window_size, -self.shift_size),
- slice(-self.shift_size, None))
- w_slices = (slice(0, -self.window_size),
- slice(-self.window_size, -self.shift_size),
- slice(-self.shift_size, None))
- cnt = 0
- for h in h_slices:
- for w in w_slices:
- img_mask[:, h, w, :] = cnt
- cnt += 1
-
- mask_windows = window_partition(img_mask, self.window_size) # nW, window_size, window_size, 1
- mask_windows = mask_windows.view(-1, self.window_size * self.window_size)
- attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2)
- attn_mask = attn_mask.masked_fill(attn_mask != 0, float(-100.0)).masked_fill(attn_mask == 0, float(0.0))
-
- return attn_mask
-
- def forward(self, x, x_size):
- H, W = x_size
- B, L, C = x.shape
- # assert L == H * W, "input feature has wrong size"
-
- shortcut = x
- x = self.norm1(x)
- x = x.view(B, H, W, C)
-
- # cyclic shift
- if self.shift_size > 0:
- shifted_x = torch.roll(x, shifts=(-self.shift_size, -self.shift_size), dims=(1, 2))
- else:
- shifted_x = x
-
- # partition windows
- x_windows = window_partition(shifted_x, self.window_size) # nW*B, window_size, window_size, C
- x_windows = x_windows.view(-1, self.window_size * self.window_size, C) # nW*B, window_size*window_size, C
-
- # W-MSA/SW-MSA (to be compatible for testing on images whose shapes are the multiple of window size
- if self.input_resolution == x_size:
- attn_windows = self.attn(x_windows, mask=self.attn_mask) # nW*B, window_size*window_size, C
- else:
- attn_windows = self.attn(x_windows, mask=self.calculate_mask(x_size).to(x.device))
-
- # merge windows
- attn_windows = attn_windows.view(-1, self.window_size, self.window_size, C)
- shifted_x = window_reverse(attn_windows, self.window_size, H, W) # B H' W' C
-
- # reverse cyclic shift
- if self.shift_size > 0:
- x = torch.roll(shifted_x, shifts=(self.shift_size, self.shift_size), dims=(1, 2))
- else:
- x = shifted_x
- x = x.view(B, H * W, C)
-
- # FFN
- x = shortcut + self.drop_path(x)
- x = x + self.drop_path(self.mlp(self.norm2(x)))
-
- return x
-
- def extra_repr(self) -> str:
- return f"dim={self.dim}, input_resolution={self.input_resolution}, num_heads={self.num_heads}, " \
- f"window_size={self.window_size}, shift_size={self.shift_size}, mlp_ratio={self.mlp_ratio}"
-
- def flops(self):
- flops = 0
- H, W = self.input_resolution
- # norm1
- flops += self.dim * H * W
- # W-MSA/SW-MSA
- nW = H * W / self.window_size / self.window_size
- flops += nW * self.attn.flops(self.window_size * self.window_size)
- # mlp
- flops += 2 * H * W * self.dim * self.dim * self.mlp_ratio
- # norm2
- flops += self.dim * H * W
- return flops
-
-
-class PatchMerging(nn.Module):
- r""" Patch Merging Layer.
-
- Args:
- input_resolution (tuple[int]): Resolution of input feature.
- dim (int): Number of input channels.
- norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm
- """
-
- def __init__(self, input_resolution, dim, norm_layer=nn.LayerNorm):
- super().__init__()
- self.input_resolution = input_resolution
- self.dim = dim
- self.reduction = nn.Linear(4 * dim, 2 * dim, bias=False)
- self.norm = norm_layer(4 * dim)
-
- def forward(self, x):
- """
- x: B, H*W, C
- """
- H, W = self.input_resolution
- B, L, C = x.shape
- assert L == H * W, "input feature has wrong size"
- assert H % 2 == 0 and W % 2 == 0, f"x size ({H}*{W}) are not even."
-
- x = x.view(B, H, W, C)
-
- x0 = x[:, 0::2, 0::2, :] # B H/2 W/2 C
- x1 = x[:, 1::2, 0::2, :] # B H/2 W/2 C
- x2 = x[:, 0::2, 1::2, :] # B H/2 W/2 C
- x3 = x[:, 1::2, 1::2, :] # B H/2 W/2 C
- x = torch.cat([x0, x1, x2, x3], -1) # B H/2 W/2 4*C
- x = x.view(B, -1, 4 * C) # B H/2*W/2 4*C
-
- x = self.norm(x)
- x = self.reduction(x)
-
- return x
-
- def extra_repr(self) -> str:
- return f"input_resolution={self.input_resolution}, dim={self.dim}"
-
- def flops(self):
- H, W = self.input_resolution
- flops = H * W * self.dim
- flops += (H // 2) * (W // 2) * 4 * self.dim * 2 * self.dim
- return flops
-
-
-class BasicLayer(nn.Module):
- """ A basic Swin Transformer layer for one stage.
-
- Args:
- dim (int): Number of input channels.
- input_resolution (tuple[int]): Input resolution.
- depth (int): Number of blocks.
- num_heads (int): Number of attention heads.
- window_size (int): Local window size.
- mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.
- qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True
- qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set.
- drop (float, optional): Dropout rate. Default: 0.0
- attn_drop (float, optional): Attention dropout rate. Default: 0.0
- drop_path (float | tuple[float], optional): Stochastic depth rate. Default: 0.0
- norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm
- downsample (nn.Module | None, optional): Downsample layer at the end of the layer. Default: None
- use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False.
- """
-
- def __init__(self, dim, input_resolution, depth, num_heads, window_size,
- mlp_ratio=4., qkv_bias=True, qk_scale=None, drop=0., attn_drop=0.,
- drop_path=0., norm_layer=nn.LayerNorm, downsample=None, use_checkpoint=False):
-
- super().__init__()
- self.dim = dim
- self.input_resolution = input_resolution
- self.depth = depth
- self.use_checkpoint = use_checkpoint
-
- # build blocks
- self.blocks = nn.ModuleList([
- SwinTransformerBlock(dim=dim, input_resolution=input_resolution,
- num_heads=num_heads, window_size=window_size,
- shift_size=0 if (i % 2 == 0) else window_size // 2,
- mlp_ratio=mlp_ratio,
- qkv_bias=qkv_bias, qk_scale=qk_scale,
- drop=drop, attn_drop=attn_drop,
- drop_path=drop_path[i] if isinstance(drop_path, list) else drop_path,
- norm_layer=norm_layer)
- for i in range(depth)])
-
- # patch merging layer
- if downsample is not None:
- self.downsample = downsample(input_resolution, dim=dim, norm_layer=norm_layer)
- else:
- self.downsample = None
-
- def forward(self, x, x_size):
- for blk in self.blocks:
- if self.use_checkpoint:
- x = checkpoint.checkpoint(blk, x, x_size)
- else:
- x = blk(x, x_size)
- if self.downsample is not None:
- x = self.downsample(x)
- return x
-
- def extra_repr(self) -> str:
- return f"dim={self.dim}, input_resolution={self.input_resolution}, depth={self.depth}"
-
- def flops(self):
- flops = 0
- for blk in self.blocks:
- flops += blk.flops()
- if self.downsample is not None:
- flops += self.downsample.flops()
- return flops
-
-
-class RSTB(nn.Module):
- """Residual Swin Transformer Block (RSTB).
-
- Args:
- dim (int): Number of input channels.
- input_resolution (tuple[int]): Input resolution.
- depth (int): Number of blocks.
- num_heads (int): Number of attention heads.
- window_size (int): Local window size.
- mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.
- qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True
- qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set.
- drop (float, optional): Dropout rate. Default: 0.0
- attn_drop (float, optional): Attention dropout rate. Default: 0.0
- drop_path (float | tuple[float], optional): Stochastic depth rate. Default: 0.0
- norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm
- downsample (nn.Module | None, optional): Downsample layer at the end of the layer. Default: None
- use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False.
- img_size: Input image size.
- patch_size: Patch size.
- resi_connection: The convolutional block before residual connection.
- """
-
- def __init__(self, dim, input_resolution, depth, num_heads, window_size,
- mlp_ratio=4., qkv_bias=True, qk_scale=None, drop=0., attn_drop=0.,
- drop_path=0., norm_layer=nn.LayerNorm, downsample=None, use_checkpoint=False,
- img_size=224, patch_size=4, resi_connection='1conv'):
- super(RSTB, self).__init__()
-
- self.dim = dim
- self.input_resolution = input_resolution
-
- self.residual_group = BasicLayer(dim=dim,
- input_resolution=input_resolution,
- depth=depth,
- num_heads=num_heads,
- window_size=window_size,
- mlp_ratio=mlp_ratio,
- qkv_bias=qkv_bias, qk_scale=qk_scale,
- drop=drop, attn_drop=attn_drop,
- drop_path=drop_path,
- norm_layer=norm_layer,
- downsample=downsample,
- use_checkpoint=use_checkpoint)
-
- if resi_connection == '1conv':
- self.conv = nn.Conv2d(dim, dim, 3, 1, 1)
- elif resi_connection == '3conv':
- # to save parameters and memory
- self.conv = nn.Sequential(nn.Conv2d(dim, dim // 4, 3, 1, 1), nn.LeakyReLU(negative_slope=0.2, inplace=True),
- nn.Conv2d(dim // 4, dim // 4, 1, 1, 0),
- nn.LeakyReLU(negative_slope=0.2, inplace=True),
- nn.Conv2d(dim // 4, dim, 3, 1, 1))
-
- self.patch_embed = PatchEmbed(
- img_size=img_size, patch_size=patch_size, in_chans=0, embed_dim=dim,
- norm_layer=None)
-
- self.patch_unembed = PatchUnEmbed(
- img_size=img_size, patch_size=patch_size, in_chans=0, embed_dim=dim,
- norm_layer=None)
-
- def forward(self, x, x_size):
- return self.patch_embed(self.conv(self.patch_unembed(self.residual_group(x, x_size), x_size))) + x
-
- def flops(self):
- flops = 0
- flops += self.residual_group.flops()
- H, W = self.input_resolution
- flops += H * W * self.dim * self.dim * 9
- flops += self.patch_embed.flops()
- flops += self.patch_unembed.flops()
-
- return flops
-
-
-class PatchEmbed(nn.Module):
- r""" Image to Patch Embedding
-
- Args:
- img_size (int): Image size. Default: 224.
- patch_size (int): Patch token size. Default: 4.
- in_chans (int): Number of input image channels. Default: 3.
- embed_dim (int): Number of linear projection output channels. Default: 96.
- norm_layer (nn.Module, optional): Normalization layer. Default: None
- """
-
- def __init__(self, img_size=224, patch_size=4, in_chans=3, embed_dim=96, norm_layer=None):
- super().__init__()
- img_size = to_2tuple(img_size)
- patch_size = to_2tuple(patch_size)
- patches_resolution = [img_size[0] // patch_size[0], img_size[1] // patch_size[1]]
- self.img_size = img_size
- self.patch_size = patch_size
- self.patches_resolution = patches_resolution
- self.num_patches = patches_resolution[0] * patches_resolution[1]
-
- self.in_chans = in_chans
- self.embed_dim = embed_dim
-
- if norm_layer is not None:
- self.norm = norm_layer(embed_dim)
- else:
- self.norm = None
-
- def forward(self, x):
- x = x.flatten(2).transpose(1, 2) # B Ph*Pw C
- if self.norm is not None:
- x = self.norm(x)
- return x
-
- def flops(self):
- flops = 0
- H, W = self.img_size
- if self.norm is not None:
- flops += H * W * self.embed_dim
- return flops
-
-
-class PatchUnEmbed(nn.Module):
- r""" Image to Patch Unembedding
-
- Args:
- img_size (int): Image size. Default: 224.
- patch_size (int): Patch token size. Default: 4.
- in_chans (int): Number of input image channels. Default: 3.
- embed_dim (int): Number of linear projection output channels. Default: 96.
- norm_layer (nn.Module, optional): Normalization layer. Default: None
- """
-
- def __init__(self, img_size=224, patch_size=4, in_chans=3, embed_dim=96, norm_layer=None):
- super().__init__()
- img_size = to_2tuple(img_size)
- patch_size = to_2tuple(patch_size)
- patches_resolution = [img_size[0] // patch_size[0], img_size[1] // patch_size[1]]
- self.img_size = img_size
- self.patch_size = patch_size
- self.patches_resolution = patches_resolution
- self.num_patches = patches_resolution[0] * patches_resolution[1]
-
- self.in_chans = in_chans
- self.embed_dim = embed_dim
-
- def forward(self, x, x_size):
- B, HW, C = x.shape
- x = x.transpose(1, 2).view(B, self.embed_dim, x_size[0], x_size[1]) # B Ph*Pw C
- return x
-
- def flops(self):
- flops = 0
- return flops
-
-
-class Upsample(nn.Sequential):
- """Upsample module.
-
- Args:
- scale (int): Scale factor. Supported scales: 2^n and 3.
- num_feat (int): Channel number of intermediate features.
- """
-
- def __init__(self, scale, num_feat):
- m = []
- if (scale & (scale - 1)) == 0: # scale = 2^n
- for _ in range(int(math.log(scale, 2))):
- m.append(nn.Conv2d(num_feat, 4 * num_feat, 3, 1, 1))
- m.append(nn.PixelShuffle(2))
- elif scale == 3:
- m.append(nn.Conv2d(num_feat, 9 * num_feat, 3, 1, 1))
- m.append(nn.PixelShuffle(3))
- else:
- raise ValueError(f'scale {scale} is not supported. ' 'Supported scales: 2^n and 3.')
- super(Upsample, self).__init__(*m)
-
-
-class UpsampleOneStep(nn.Sequential):
- """UpsampleOneStep module (the difference with Upsample is that it always only has 1conv + 1pixelshuffle)
- Used in lightweight SR to save parameters.
-
- Args:
- scale (int): Scale factor. Supported scales: 2^n and 3.
- num_feat (int): Channel number of intermediate features.
-
- """
-
- def __init__(self, scale, num_feat, num_out_ch, input_resolution=None):
- self.num_feat = num_feat
- self.input_resolution = input_resolution
- m = []
- m.append(nn.Conv2d(num_feat, (scale ** 2) * num_out_ch, 3, 1, 1))
- m.append(nn.PixelShuffle(scale))
- super(UpsampleOneStep, self).__init__(*m)
-
- def flops(self):
- H, W = self.input_resolution
- flops = H * W * self.num_feat * 3 * 9
- return flops
-
-
-class SwinIR(nn.Module):
- r""" SwinIR
- A PyTorch impl of : `SwinIR: Image Restoration Using Swin Transformer`, based on Swin Transformer.
-
- Args:
- img_size (int | tuple(int)): Input image size. Default 64
- patch_size (int | tuple(int)): Patch size. Default: 1
- in_chans (int): Number of input image channels. Default: 3
- embed_dim (int): Patch embedding dimension. Default: 96
- depths (tuple(int)): Depth of each Swin Transformer layer.
- num_heads (tuple(int)): Number of attention heads in different layers.
- window_size (int): Window size. Default: 7
- mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default: 4
- qkv_bias (bool): If True, add a learnable bias to query, key, value. Default: True
- qk_scale (float): Override default qk scale of head_dim ** -0.5 if set. Default: None
- drop_rate (float): Dropout rate. Default: 0
- attn_drop_rate (float): Attention dropout rate. Default: 0
- drop_path_rate (float): Stochastic depth rate. Default: 0.1
- norm_layer (nn.Module): Normalization layer. Default: nn.LayerNorm.
- ape (bool): If True, add absolute position embedding to the patch embedding. Default: False
- patch_norm (bool): If True, add normalization after patch embedding. Default: True
- use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False
- upscale: Upscale factor. 2/3/4/8 for image SR, 1 for denoising and compress artifact reduction
- img_range: Image range. 1. or 255.
- upsampler: The reconstruction reconstruction module. 'pixelshuffle'/'pixelshuffledirect'/'nearest+conv'/None
- resi_connection: The convolutional block before residual connection. '1conv'/'3conv'
- """
-
- def __init__(self, img_size=64, patch_size=1, in_chans=3,
- embed_dim=96, depths=[6, 6, 6, 6], num_heads=[6, 6, 6, 6],
- window_size=7, mlp_ratio=4., qkv_bias=True, qk_scale=None,
- drop_rate=0., attn_drop_rate=0., drop_path_rate=0.1,
- norm_layer=nn.LayerNorm, ape=False, patch_norm=True,
- use_checkpoint=False, upscale=2, img_range=1., upsampler='', resi_connection='1conv',
- **kwargs):
- super(SwinIR, self).__init__()
- num_in_ch = in_chans
- num_out_ch = in_chans
- num_feat = 64
- self.img_range = img_range
- if in_chans == 3:
- rgb_mean = (0.4488, 0.4371, 0.4040)
- self.mean = torch.Tensor(rgb_mean).view(1, 3, 1, 1)
- else:
- self.mean = torch.zeros(1, 1, 1, 1)
- self.upscale = upscale
- self.upsampler = upsampler
- self.window_size = window_size
-
- #####################################################################################################
- ################################### 1, shallow feature extraction ###################################
- self.conv_first = nn.Conv2d(num_in_ch, embed_dim, 3, 1, 1)
-
- #####################################################################################################
- ################################### 2, deep feature extraction ######################################
- self.num_layers = len(depths)
- self.embed_dim = embed_dim
- self.ape = ape
- self.patch_norm = patch_norm
- self.num_features = embed_dim
- self.mlp_ratio = mlp_ratio
-
- # split image into non-overlapping patches
- self.patch_embed = PatchEmbed(
- img_size=img_size, patch_size=patch_size, in_chans=embed_dim, embed_dim=embed_dim,
- norm_layer=norm_layer if self.patch_norm else None)
- num_patches = self.patch_embed.num_patches
- patches_resolution = self.patch_embed.patches_resolution
- self.patches_resolution = patches_resolution
-
- # merge non-overlapping patches into image
- self.patch_unembed = PatchUnEmbed(
- img_size=img_size, patch_size=patch_size, in_chans=embed_dim, embed_dim=embed_dim,
- norm_layer=norm_layer if self.patch_norm else None)
-
- # absolute position embedding
- if self.ape:
- self.absolute_pos_embed = nn.Parameter(torch.zeros(1, num_patches, embed_dim))
- trunc_normal_(self.absolute_pos_embed, std=.02)
-
- self.pos_drop = nn.Dropout(p=drop_rate)
-
- # stochastic depth
- dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))] # stochastic depth decay rule
-
- # build Residual Swin Transformer blocks (RSTB)
- self.layers = nn.ModuleList()
- for i_layer in range(self.num_layers):
- layer = RSTB(dim=embed_dim,
- input_resolution=(patches_resolution[0],
- patches_resolution[1]),
- depth=depths[i_layer],
- num_heads=num_heads[i_layer],
- window_size=window_size,
- mlp_ratio=self.mlp_ratio,
- qkv_bias=qkv_bias, qk_scale=qk_scale,
- drop=drop_rate, attn_drop=attn_drop_rate,
- drop_path=dpr[sum(depths[:i_layer]):sum(depths[:i_layer + 1])], # no impact on SR results
- norm_layer=norm_layer,
- downsample=None,
- use_checkpoint=use_checkpoint,
- img_size=img_size,
- patch_size=patch_size,
- resi_connection=resi_connection
-
- )
- self.layers.append(layer)
- self.norm = norm_layer(self.num_features)
-
- # build the last conv layer in deep feature extraction
- if resi_connection == '1conv':
- self.conv_after_body = nn.Conv2d(embed_dim, embed_dim, 3, 1, 1)
- elif resi_connection == '3conv':
- # to save parameters and memory
- self.conv_after_body = nn.Sequential(nn.Conv2d(embed_dim, embed_dim // 4, 3, 1, 1),
- nn.LeakyReLU(negative_slope=0.2, inplace=True),
- nn.Conv2d(embed_dim // 4, embed_dim // 4, 1, 1, 0),
- nn.LeakyReLU(negative_slope=0.2, inplace=True),
- nn.Conv2d(embed_dim // 4, embed_dim, 3, 1, 1))
-
- #####################################################################################################
- ################################ 3, high quality image reconstruction ################################
- if self.upsampler == 'pixelshuffle':
- # for classical SR
- self.conv_before_upsample = nn.Sequential(nn.Conv2d(embed_dim, num_feat, 3, 1, 1),
- nn.LeakyReLU(inplace=True))
- self.upsample = Upsample(upscale, num_feat)
- self.conv_last = nn.Conv2d(num_feat, num_out_ch, 3, 1, 1)
- elif self.upsampler == 'pixelshuffledirect':
- # for lightweight SR (to save parameters)
- self.upsample = UpsampleOneStep(upscale, embed_dim, num_out_ch,
- (patches_resolution[0], patches_resolution[1]))
- elif self.upsampler == 'nearest+conv':
- # for real-world SR (less artifacts)
- self.conv_before_upsample = nn.Sequential(nn.Conv2d(embed_dim, num_feat, 3, 1, 1),
- nn.LeakyReLU(inplace=True))
- self.conv_up1 = nn.Conv2d(num_feat, num_feat, 3, 1, 1)
- if self.upscale == 4:
- self.conv_up2 = nn.Conv2d(num_feat, num_feat, 3, 1, 1)
- self.conv_hr = nn.Conv2d(num_feat, num_feat, 3, 1, 1)
- self.conv_last = nn.Conv2d(num_feat, num_out_ch, 3, 1, 1)
- self.lrelu = nn.LeakyReLU(negative_slope=0.2, inplace=True)
- else:
- # for image denoising and JPEG compression artifact reduction
- self.conv_last = nn.Conv2d(embed_dim, num_out_ch, 3, 1, 1)
-
- self.apply(self._init_weights)
-
- def _init_weights(self, m):
- if isinstance(m, nn.Linear):
- trunc_normal_(m.weight, std=.02)
- if isinstance(m, nn.Linear) and m.bias is not None:
- nn.init.constant_(m.bias, 0)
- elif isinstance(m, nn.LayerNorm):
- nn.init.constant_(m.bias, 0)
- nn.init.constant_(m.weight, 1.0)
-
- @torch.jit.ignore
- def no_weight_decay(self):
- return {'absolute_pos_embed'}
-
- @torch.jit.ignore
- def no_weight_decay_keywords(self):
- return {'relative_position_bias_table'}
-
- def check_image_size(self, x):
- _, _, h, w = x.size()
- mod_pad_h = (self.window_size - h % self.window_size) % self.window_size
- mod_pad_w = (self.window_size - w % self.window_size) % self.window_size
- x = F.pad(x, (0, mod_pad_w, 0, mod_pad_h), 'reflect')
- return x
-
- def forward_features(self, x):
- x_size = (x.shape[2], x.shape[3])
- x = self.patch_embed(x)
- if self.ape:
- x = x + self.absolute_pos_embed
- x = self.pos_drop(x)
-
- for layer in self.layers:
- x = layer(x, x_size)
-
- x = self.norm(x) # B L C
- x = self.patch_unembed(x, x_size)
-
- return x
-
- def forward(self, x):
- H, W = x.shape[2:]
- x = self.check_image_size(x)
-
- self.mean = self.mean.type_as(x)
- x = (x - self.mean) * self.img_range
-
- if self.upsampler == 'pixelshuffle':
- # for classical SR
- x = self.conv_first(x)
- x = self.conv_after_body(self.forward_features(x)) + x
- x = self.conv_before_upsample(x)
- x = self.conv_last(self.upsample(x))
- elif self.upsampler == 'pixelshuffledirect':
- # for lightweight SR
- x = self.conv_first(x)
- x = self.conv_after_body(self.forward_features(x)) + x
- x = self.upsample(x)
- elif self.upsampler == 'nearest+conv':
- # for real-world SR
- x = self.conv_first(x)
- x = self.conv_after_body(self.forward_features(x)) + x
- x = self.conv_before_upsample(x)
- x = self.lrelu(self.conv_up1(torch.nn.functional.interpolate(x, scale_factor=2, mode='nearest')))
- if self.upscale == 4:
- x = self.lrelu(self.conv_up2(torch.nn.functional.interpolate(x, scale_factor=2, mode='nearest')))
- x = self.conv_last(self.lrelu(self.conv_hr(x)))
- else:
- # for image denoising and JPEG compression artifact reduction
- x_first = self.conv_first(x)
- res = self.conv_after_body(self.forward_features(x_first)) + x_first
- x = x + self.conv_last(res)
-
- x = x / self.img_range + self.mean
-
- return x[:, :, :H*self.upscale, :W*self.upscale]
-
- def flops(self):
- flops = 0
- H, W = self.patches_resolution
- flops += H * W * 3 * self.embed_dim * 9
- flops += self.patch_embed.flops()
- for i, layer in enumerate(self.layers):
- flops += layer.flops()
- flops += H * W * 3 * self.embed_dim * self.embed_dim
- flops += self.upsample.flops()
- return flops
-
-
-if __name__ == '__main__':
- upscale = 4
- window_size = 8
- height = (1024 // upscale // window_size + 1) * window_size
- width = (720 // upscale // window_size + 1) * window_size
- model = SwinIR(upscale=2, img_size=(height, width),
- window_size=window_size, img_range=1., depths=[6, 6, 6, 6],
- embed_dim=60, num_heads=[6, 6, 6, 6], mlp_ratio=2, upsampler='pixelshuffledirect')
- print(model)
- print(height, width, model.flops() / 1e9)
-
- x = torch.randn((1, 3, height, width))
- x = model(x)
- print(x.shape)
diff --git a/modules/swinir_model_arch_v2.py b/modules/swinir_model_arch_v2.py
deleted file mode 100644
index 0e28ae6e..00000000
--- a/modules/swinir_model_arch_v2.py
+++ /dev/null
@@ -1,1017 +0,0 @@
-# -----------------------------------------------------------------------------------
-# Swin2SR: Swin2SR: SwinV2 Transformer for Compressed Image Super-Resolution and Restoration, https://arxiv.org/abs/
-# Written by Conde and Choi et al.
-# -----------------------------------------------------------------------------------
-
-import math
-import numpy as np
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-import torch.utils.checkpoint as checkpoint
-from timm.models.layers import DropPath, to_2tuple, trunc_normal_
-
-
-class Mlp(nn.Module):
- def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
- super().__init__()
- out_features = out_features or in_features
- hidden_features = hidden_features or in_features
- self.fc1 = nn.Linear(in_features, hidden_features)
- self.act = act_layer()
- self.fc2 = nn.Linear(hidden_features, out_features)
- self.drop = nn.Dropout(drop)
-
- def forward(self, x):
- x = self.fc1(x)
- x = self.act(x)
- x = self.drop(x)
- x = self.fc2(x)
- x = self.drop(x)
- return x
-
-
-def window_partition(x, window_size):
- """
- Args:
- x: (B, H, W, C)
- window_size (int): window size
- Returns:
- windows: (num_windows*B, window_size, window_size, C)
- """
- B, H, W, C = x.shape
- x = x.view(B, H // window_size, window_size, W // window_size, window_size, C)
- windows = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(-1, window_size, window_size, C)
- return windows
-
-
-def window_reverse(windows, window_size, H, W):
- """
- Args:
- windows: (num_windows*B, window_size, window_size, C)
- window_size (int): Window size
- H (int): Height of image
- W (int): Width of image
- Returns:
- x: (B, H, W, C)
- """
- B = int(windows.shape[0] / (H * W / window_size / window_size))
- x = windows.view(B, H // window_size, W // window_size, window_size, window_size, -1)
- x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1)
- return x
-
-class WindowAttention(nn.Module):
- r""" Window based multi-head self attention (W-MSA) module with relative position bias.
- It supports both of shifted and non-shifted window.
- Args:
- dim (int): Number of input channels.
- window_size (tuple[int]): The height and width of the window.
- num_heads (int): Number of attention heads.
- qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True
- attn_drop (float, optional): Dropout ratio of attention weight. Default: 0.0
- proj_drop (float, optional): Dropout ratio of output. Default: 0.0
- pretrained_window_size (tuple[int]): The height and width of the window in pre-training.
- """
-
- def __init__(self, dim, window_size, num_heads, qkv_bias=True, attn_drop=0., proj_drop=0.,
- pretrained_window_size=[0, 0]):
-
- super().__init__()
- self.dim = dim
- self.window_size = window_size # Wh, Ww
- self.pretrained_window_size = pretrained_window_size
- self.num_heads = num_heads
-
- self.logit_scale = nn.Parameter(torch.log(10 * torch.ones((num_heads, 1, 1))), requires_grad=True)
-
- # mlp to generate continuous relative position bias
- self.cpb_mlp = nn.Sequential(nn.Linear(2, 512, bias=True),
- nn.ReLU(inplace=True),
- nn.Linear(512, num_heads, bias=False))
-
- # get relative_coords_table
- relative_coords_h = torch.arange(-(self.window_size[0] - 1), self.window_size[0], dtype=torch.float32)
- relative_coords_w = torch.arange(-(self.window_size[1] - 1), self.window_size[1], dtype=torch.float32)
- relative_coords_table = torch.stack(
- torch.meshgrid([relative_coords_h,
- relative_coords_w])).permute(1, 2, 0).contiguous().unsqueeze(0) # 1, 2*Wh-1, 2*Ww-1, 2
- if pretrained_window_size[0] > 0:
- relative_coords_table[:, :, :, 0] /= (pretrained_window_size[0] - 1)
- relative_coords_table[:, :, :, 1] /= (pretrained_window_size[1] - 1)
- else:
- relative_coords_table[:, :, :, 0] /= (self.window_size[0] - 1)
- relative_coords_table[:, :, :, 1] /= (self.window_size[1] - 1)
- relative_coords_table *= 8 # normalize to -8, 8
- relative_coords_table = torch.sign(relative_coords_table) * torch.log2(
- torch.abs(relative_coords_table) + 1.0) / np.log2(8)
-
- self.register_buffer("relative_coords_table", relative_coords_table)
-
- # get pair-wise relative position index for each token inside the window
- coords_h = torch.arange(self.window_size[0])
- coords_w = torch.arange(self.window_size[1])
- coords = torch.stack(torch.meshgrid([coords_h, coords_w])) # 2, Wh, Ww
- coords_flatten = torch.flatten(coords, 1) # 2, Wh*Ww
- relative_coords = coords_flatten[:, :, None] - coords_flatten[:, None, :] # 2, Wh*Ww, Wh*Ww
- relative_coords = relative_coords.permute(1, 2, 0).contiguous() # Wh*Ww, Wh*Ww, 2
- relative_coords[:, :, 0] += self.window_size[0] - 1 # shift to start from 0
- relative_coords[:, :, 1] += self.window_size[1] - 1
- relative_coords[:, :, 0] *= 2 * self.window_size[1] - 1
- relative_position_index = relative_coords.sum(-1) # Wh*Ww, Wh*Ww
- self.register_buffer("relative_position_index", relative_position_index)
-
- self.qkv = nn.Linear(dim, dim * 3, bias=False)
- if qkv_bias:
- self.q_bias = nn.Parameter(torch.zeros(dim))
- self.v_bias = nn.Parameter(torch.zeros(dim))
- else:
- self.q_bias = None
- self.v_bias = None
- self.attn_drop = nn.Dropout(attn_drop)
- self.proj = nn.Linear(dim, dim)
- self.proj_drop = nn.Dropout(proj_drop)
- self.softmax = nn.Softmax(dim=-1)
-
- def forward(self, x, mask=None):
- """
- Args:
- x: input features with shape of (num_windows*B, N, C)
- mask: (0/-inf) mask with shape of (num_windows, Wh*Ww, Wh*Ww) or None
- """
- B_, N, C = x.shape
- qkv_bias = None
- if self.q_bias is not None:
- qkv_bias = torch.cat((self.q_bias, torch.zeros_like(self.v_bias, requires_grad=False), self.v_bias))
- qkv = F.linear(input=x, weight=self.qkv.weight, bias=qkv_bias)
- qkv = qkv.reshape(B_, N, 3, self.num_heads, -1).permute(2, 0, 3, 1, 4)
- q, k, v = qkv[0], qkv[1], qkv[2] # make torchscript happy (cannot use tensor as tuple)
-
- # cosine attention
- attn = (F.normalize(q, dim=-1) @ F.normalize(k, dim=-1).transpose(-2, -1))
- logit_scale = torch.clamp(self.logit_scale, max=torch.log(torch.tensor(1. / 0.01)).to(self.logit_scale.device)).exp()
- attn = attn * logit_scale
-
- relative_position_bias_table = self.cpb_mlp(self.relative_coords_table).view(-1, self.num_heads)
- relative_position_bias = relative_position_bias_table[self.relative_position_index.view(-1)].view(
- self.window_size[0] * self.window_size[1], self.window_size[0] * self.window_size[1], -1) # Wh*Ww,Wh*Ww,nH
- relative_position_bias = relative_position_bias.permute(2, 0, 1).contiguous() # nH, Wh*Ww, Wh*Ww
- relative_position_bias = 16 * torch.sigmoid(relative_position_bias)
- attn = attn + relative_position_bias.unsqueeze(0)
-
- if mask is not None:
- nW = mask.shape[0]
- attn = attn.view(B_ // nW, nW, self.num_heads, N, N) + mask.unsqueeze(1).unsqueeze(0)
- attn = attn.view(-1, self.num_heads, N, N)
- attn = self.softmax(attn)
- else:
- attn = self.softmax(attn)
-
- attn = self.attn_drop(attn)
-
- x = (attn @ v).transpose(1, 2).reshape(B_, N, C)
- x = self.proj(x)
- x = self.proj_drop(x)
- return x
-
- def extra_repr(self) -> str:
- return f'dim={self.dim}, window_size={self.window_size}, ' \
- f'pretrained_window_size={self.pretrained_window_size}, num_heads={self.num_heads}'
-
- def flops(self, N):
- # calculate flops for 1 window with token length of N
- flops = 0
- # qkv = self.qkv(x)
- flops += N * self.dim * 3 * self.dim
- # attn = (q @ k.transpose(-2, -1))
- flops += self.num_heads * N * (self.dim // self.num_heads) * N
- # x = (attn @ v)
- flops += self.num_heads * N * N * (self.dim // self.num_heads)
- # x = self.proj(x)
- flops += N * self.dim * self.dim
- return flops
-
-class SwinTransformerBlock(nn.Module):
- r""" Swin Transformer Block.
- Args:
- dim (int): Number of input channels.
- input_resolution (tuple[int]): Input resulotion.
- num_heads (int): Number of attention heads.
- window_size (int): Window size.
- shift_size (int): Shift size for SW-MSA.
- mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.
- qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True
- drop (float, optional): Dropout rate. Default: 0.0
- attn_drop (float, optional): Attention dropout rate. Default: 0.0
- drop_path (float, optional): Stochastic depth rate. Default: 0.0
- act_layer (nn.Module, optional): Activation layer. Default: nn.GELU
- norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm
- pretrained_window_size (int): Window size in pre-training.
- """
-
- def __init__(self, dim, input_resolution, num_heads, window_size=7, shift_size=0,
- mlp_ratio=4., qkv_bias=True, drop=0., attn_drop=0., drop_path=0.,
- act_layer=nn.GELU, norm_layer=nn.LayerNorm, pretrained_window_size=0):
- super().__init__()
- self.dim = dim
- self.input_resolution = input_resolution
- self.num_heads = num_heads
- self.window_size = window_size
- self.shift_size = shift_size
- self.mlp_ratio = mlp_ratio
- if min(self.input_resolution) <= self.window_size:
- # if window size is larger than input resolution, we don't partition windows
- self.shift_size = 0
- self.window_size = min(self.input_resolution)
- assert 0 <= self.shift_size < self.window_size, "shift_size must in 0-window_size"
-
- self.norm1 = norm_layer(dim)
- self.attn = WindowAttention(
- dim, window_size=to_2tuple(self.window_size), num_heads=num_heads,
- qkv_bias=qkv_bias, attn_drop=attn_drop, proj_drop=drop,
- pretrained_window_size=to_2tuple(pretrained_window_size))
-
- self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
- self.norm2 = norm_layer(dim)
- mlp_hidden_dim = int(dim * mlp_ratio)
- self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)
-
- if self.shift_size > 0:
- attn_mask = self.calculate_mask(self.input_resolution)
- else:
- attn_mask = None
-
- self.register_buffer("attn_mask", attn_mask)
-
- def calculate_mask(self, x_size):
- # calculate attention mask for SW-MSA
- H, W = x_size
- img_mask = torch.zeros((1, H, W, 1)) # 1 H W 1
- h_slices = (slice(0, -self.window_size),
- slice(-self.window_size, -self.shift_size),
- slice(-self.shift_size, None))
- w_slices = (slice(0, -self.window_size),
- slice(-self.window_size, -self.shift_size),
- slice(-self.shift_size, None))
- cnt = 0
- for h in h_slices:
- for w in w_slices:
- img_mask[:, h, w, :] = cnt
- cnt += 1
-
- mask_windows = window_partition(img_mask, self.window_size) # nW, window_size, window_size, 1
- mask_windows = mask_windows.view(-1, self.window_size * self.window_size)
- attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2)
- attn_mask = attn_mask.masked_fill(attn_mask != 0, float(-100.0)).masked_fill(attn_mask == 0, float(0.0))
-
- return attn_mask
-
- def forward(self, x, x_size):
- H, W = x_size
- B, L, C = x.shape
- #assert L == H * W, "input feature has wrong size"
-
- shortcut = x
- x = x.view(B, H, W, C)
-
- # cyclic shift
- if self.shift_size > 0:
- shifted_x = torch.roll(x, shifts=(-self.shift_size, -self.shift_size), dims=(1, 2))
- else:
- shifted_x = x
-
- # partition windows
- x_windows = window_partition(shifted_x, self.window_size) # nW*B, window_size, window_size, C
- x_windows = x_windows.view(-1, self.window_size * self.window_size, C) # nW*B, window_size*window_size, C
-
- # W-MSA/SW-MSA (to be compatible for testing on images whose shapes are the multiple of window size
- if self.input_resolution == x_size:
- attn_windows = self.attn(x_windows, mask=self.attn_mask) # nW*B, window_size*window_size, C
- else:
- attn_windows = self.attn(x_windows, mask=self.calculate_mask(x_size).to(x.device))
-
- # merge windows
- attn_windows = attn_windows.view(-1, self.window_size, self.window_size, C)
- shifted_x = window_reverse(attn_windows, self.window_size, H, W) # B H' W' C
-
- # reverse cyclic shift
- if self.shift_size > 0:
- x = torch.roll(shifted_x, shifts=(self.shift_size, self.shift_size), dims=(1, 2))
- else:
- x = shifted_x
- x = x.view(B, H * W, C)
- x = shortcut + self.drop_path(self.norm1(x))
-
- # FFN
- x = x + self.drop_path(self.norm2(self.mlp(x)))
-
- return x
-
- def extra_repr(self) -> str:
- return f"dim={self.dim}, input_resolution={self.input_resolution}, num_heads={self.num_heads}, " \
- f"window_size={self.window_size}, shift_size={self.shift_size}, mlp_ratio={self.mlp_ratio}"
-
- def flops(self):
- flops = 0
- H, W = self.input_resolution
- # norm1
- flops += self.dim * H * W
- # W-MSA/SW-MSA
- nW = H * W / self.window_size / self.window_size
- flops += nW * self.attn.flops(self.window_size * self.window_size)
- # mlp
- flops += 2 * H * W * self.dim * self.dim * self.mlp_ratio
- # norm2
- flops += self.dim * H * W
- return flops
-
-class PatchMerging(nn.Module):
- r""" Patch Merging Layer.
- Args:
- input_resolution (tuple[int]): Resolution of input feature.
- dim (int): Number of input channels.
- norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm
- """
-
- def __init__(self, input_resolution, dim, norm_layer=nn.LayerNorm):
- super().__init__()
- self.input_resolution = input_resolution
- self.dim = dim
- self.reduction = nn.Linear(4 * dim, 2 * dim, bias=False)
- self.norm = norm_layer(2 * dim)
-
- def forward(self, x):
- """
- x: B, H*W, C
- """
- H, W = self.input_resolution
- B, L, C = x.shape
- assert L == H * W, "input feature has wrong size"
- assert H % 2 == 0 and W % 2 == 0, f"x size ({H}*{W}) are not even."
-
- x = x.view(B, H, W, C)
-
- x0 = x[:, 0::2, 0::2, :] # B H/2 W/2 C
- x1 = x[:, 1::2, 0::2, :] # B H/2 W/2 C
- x2 = x[:, 0::2, 1::2, :] # B H/2 W/2 C
- x3 = x[:, 1::2, 1::2, :] # B H/2 W/2 C
- x = torch.cat([x0, x1, x2, x3], -1) # B H/2 W/2 4*C
- x = x.view(B, -1, 4 * C) # B H/2*W/2 4*C
-
- x = self.reduction(x)
- x = self.norm(x)
-
- return x
-
- def extra_repr(self) -> str:
- return f"input_resolution={self.input_resolution}, dim={self.dim}"
-
- def flops(self):
- H, W = self.input_resolution
- flops = (H // 2) * (W // 2) * 4 * self.dim * 2 * self.dim
- flops += H * W * self.dim // 2
- return flops
-
-class BasicLayer(nn.Module):
- """ A basic Swin Transformer layer for one stage.
- Args:
- dim (int): Number of input channels.
- input_resolution (tuple[int]): Input resolution.
- depth (int): Number of blocks.
- num_heads (int): Number of attention heads.
- window_size (int): Local window size.
- mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.
- qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True
- drop (float, optional): Dropout rate. Default: 0.0
- attn_drop (float, optional): Attention dropout rate. Default: 0.0
- drop_path (float | tuple[float], optional): Stochastic depth rate. Default: 0.0
- norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm
- downsample (nn.Module | None, optional): Downsample layer at the end of the layer. Default: None
- use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False.
- pretrained_window_size (int): Local window size in pre-training.
- """
-
- def __init__(self, dim, input_resolution, depth, num_heads, window_size,
- mlp_ratio=4., qkv_bias=True, drop=0., attn_drop=0.,
- drop_path=0., norm_layer=nn.LayerNorm, downsample=None, use_checkpoint=False,
- pretrained_window_size=0):
-
- super().__init__()
- self.dim = dim
- self.input_resolution = input_resolution
- self.depth = depth
- self.use_checkpoint = use_checkpoint
-
- # build blocks
- self.blocks = nn.ModuleList([
- SwinTransformerBlock(dim=dim, input_resolution=input_resolution,
- num_heads=num_heads, window_size=window_size,
- shift_size=0 if (i % 2 == 0) else window_size // 2,
- mlp_ratio=mlp_ratio,
- qkv_bias=qkv_bias,
- drop=drop, attn_drop=attn_drop,
- drop_path=drop_path[i] if isinstance(drop_path, list) else drop_path,
- norm_layer=norm_layer,
- pretrained_window_size=pretrained_window_size)
- for i in range(depth)])
-
- # patch merging layer
- if downsample is not None:
- self.downsample = downsample(input_resolution, dim=dim, norm_layer=norm_layer)
- else:
- self.downsample = None
-
- def forward(self, x, x_size):
- for blk in self.blocks:
- if self.use_checkpoint:
- x = checkpoint.checkpoint(blk, x, x_size)
- else:
- x = blk(x, x_size)
- if self.downsample is not None:
- x = self.downsample(x)
- return x
-
- def extra_repr(self) -> str:
- return f"dim={self.dim}, input_resolution={self.input_resolution}, depth={self.depth}"
-
- def flops(self):
- flops = 0
- for blk in self.blocks:
- flops += blk.flops()
- if self.downsample is not None:
- flops += self.downsample.flops()
- return flops
-
- def _init_respostnorm(self):
- for blk in self.blocks:
- nn.init.constant_(blk.norm1.bias, 0)
- nn.init.constant_(blk.norm1.weight, 0)
- nn.init.constant_(blk.norm2.bias, 0)
- nn.init.constant_(blk.norm2.weight, 0)
-
-class PatchEmbed(nn.Module):
- r""" Image to Patch Embedding
- Args:
- img_size (int): Image size. Default: 224.
- patch_size (int): Patch token size. Default: 4.
- in_chans (int): Number of input image channels. Default: 3.
- embed_dim (int): Number of linear projection output channels. Default: 96.
- norm_layer (nn.Module, optional): Normalization layer. Default: None
- """
-
- def __init__(self, img_size=224, patch_size=4, in_chans=3, embed_dim=96, norm_layer=None):
- super().__init__()
- img_size = to_2tuple(img_size)
- patch_size = to_2tuple(patch_size)
- patches_resolution = [img_size[0] // patch_size[0], img_size[1] // patch_size[1]]
- self.img_size = img_size
- self.patch_size = patch_size
- self.patches_resolution = patches_resolution
- self.num_patches = patches_resolution[0] * patches_resolution[1]
-
- self.in_chans = in_chans
- self.embed_dim = embed_dim
-
- self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size)
- if norm_layer is not None:
- self.norm = norm_layer(embed_dim)
- else:
- self.norm = None
-
- def forward(self, x):
- B, C, H, W = x.shape
- # FIXME look at relaxing size constraints
- # assert H == self.img_size[0] and W == self.img_size[1],
- # f"Input image size ({H}*{W}) doesn't match model ({self.img_size[0]}*{self.img_size[1]})."
- x = self.proj(x).flatten(2).transpose(1, 2) # B Ph*Pw C
- if self.norm is not None:
- x = self.norm(x)
- return x
-
- def flops(self):
- Ho, Wo = self.patches_resolution
- flops = Ho * Wo * self.embed_dim * self.in_chans * (self.patch_size[0] * self.patch_size[1])
- if self.norm is not None:
- flops += Ho * Wo * self.embed_dim
- return flops
-
-class RSTB(nn.Module):
- """Residual Swin Transformer Block (RSTB).
-
- Args:
- dim (int): Number of input channels.
- input_resolution (tuple[int]): Input resolution.
- depth (int): Number of blocks.
- num_heads (int): Number of attention heads.
- window_size (int): Local window size.
- mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.
- qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True
- drop (float, optional): Dropout rate. Default: 0.0
- attn_drop (float, optional): Attention dropout rate. Default: 0.0
- drop_path (float | tuple[float], optional): Stochastic depth rate. Default: 0.0
- norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm
- downsample (nn.Module | None, optional): Downsample layer at the end of the layer. Default: None
- use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False.
- img_size: Input image size.
- patch_size: Patch size.
- resi_connection: The convolutional block before residual connection.
- """
-
- def __init__(self, dim, input_resolution, depth, num_heads, window_size,
- mlp_ratio=4., qkv_bias=True, drop=0., attn_drop=0.,
- drop_path=0., norm_layer=nn.LayerNorm, downsample=None, use_checkpoint=False,
- img_size=224, patch_size=4, resi_connection='1conv'):
- super(RSTB, self).__init__()
-
- self.dim = dim
- self.input_resolution = input_resolution
-
- self.residual_group = BasicLayer(dim=dim,
- input_resolution=input_resolution,
- depth=depth,
- num_heads=num_heads,
- window_size=window_size,
- mlp_ratio=mlp_ratio,
- qkv_bias=qkv_bias,
- drop=drop, attn_drop=attn_drop,
- drop_path=drop_path,
- norm_layer=norm_layer,
- downsample=downsample,
- use_checkpoint=use_checkpoint)
-
- if resi_connection == '1conv':
- self.conv = nn.Conv2d(dim, dim, 3, 1, 1)
- elif resi_connection == '3conv':
- # to save parameters and memory
- self.conv = nn.Sequential(nn.Conv2d(dim, dim // 4, 3, 1, 1), nn.LeakyReLU(negative_slope=0.2, inplace=True),
- nn.Conv2d(dim // 4, dim // 4, 1, 1, 0),
- nn.LeakyReLU(negative_slope=0.2, inplace=True),
- nn.Conv2d(dim // 4, dim, 3, 1, 1))
-
- self.patch_embed = PatchEmbed(
- img_size=img_size, patch_size=patch_size, in_chans=dim, embed_dim=dim,
- norm_layer=None)
-
- self.patch_unembed = PatchUnEmbed(
- img_size=img_size, patch_size=patch_size, in_chans=dim, embed_dim=dim,
- norm_layer=None)
-
- def forward(self, x, x_size):
- return self.patch_embed(self.conv(self.patch_unembed(self.residual_group(x, x_size), x_size))) + x
-
- def flops(self):
- flops = 0
- flops += self.residual_group.flops()
- H, W = self.input_resolution
- flops += H * W * self.dim * self.dim * 9
- flops += self.patch_embed.flops()
- flops += self.patch_unembed.flops()
-
- return flops
-
-class PatchUnEmbed(nn.Module):
- r""" Image to Patch Unembedding
-
- Args:
- img_size (int): Image size. Default: 224.
- patch_size (int): Patch token size. Default: 4.
- in_chans (int): Number of input image channels. Default: 3.
- embed_dim (int): Number of linear projection output channels. Default: 96.
- norm_layer (nn.Module, optional): Normalization layer. Default: None
- """
-
- def __init__(self, img_size=224, patch_size=4, in_chans=3, embed_dim=96, norm_layer=None):
- super().__init__()
- img_size = to_2tuple(img_size)
- patch_size = to_2tuple(patch_size)
- patches_resolution = [img_size[0] // patch_size[0], img_size[1] // patch_size[1]]
- self.img_size = img_size
- self.patch_size = patch_size
- self.patches_resolution = patches_resolution
- self.num_patches = patches_resolution[0] * patches_resolution[1]
-
- self.in_chans = in_chans
- self.embed_dim = embed_dim
-
- def forward(self, x, x_size):
- B, HW, C = x.shape
- x = x.transpose(1, 2).view(B, self.embed_dim, x_size[0], x_size[1]) # B Ph*Pw C
- return x
-
- def flops(self):
- flops = 0
- return flops
-
-
-class Upsample(nn.Sequential):
- """Upsample module.
-
- Args:
- scale (int): Scale factor. Supported scales: 2^n and 3.
- num_feat (int): Channel number of intermediate features.
- """
-
- def __init__(self, scale, num_feat):
- m = []
- if (scale & (scale - 1)) == 0: # scale = 2^n
- for _ in range(int(math.log(scale, 2))):
- m.append(nn.Conv2d(num_feat, 4 * num_feat, 3, 1, 1))
- m.append(nn.PixelShuffle(2))
- elif scale == 3:
- m.append(nn.Conv2d(num_feat, 9 * num_feat, 3, 1, 1))
- m.append(nn.PixelShuffle(3))
- else:
- raise ValueError(f'scale {scale} is not supported. ' 'Supported scales: 2^n and 3.')
- super(Upsample, self).__init__(*m)
-
-class Upsample_hf(nn.Sequential):
- """Upsample module.
-
- Args:
- scale (int): Scale factor. Supported scales: 2^n and 3.
- num_feat (int): Channel number of intermediate features.
- """
-
- def __init__(self, scale, num_feat):
- m = []
- if (scale & (scale - 1)) == 0: # scale = 2^n
- for _ in range(int(math.log(scale, 2))):
- m.append(nn.Conv2d(num_feat, 4 * num_feat, 3, 1, 1))
- m.append(nn.PixelShuffle(2))
- elif scale == 3:
- m.append(nn.Conv2d(num_feat, 9 * num_feat, 3, 1, 1))
- m.append(nn.PixelShuffle(3))
- else:
- raise ValueError(f'scale {scale} is not supported. ' 'Supported scales: 2^n and 3.')
- super(Upsample_hf, self).__init__(*m)
-
-
-class UpsampleOneStep(nn.Sequential):
- """UpsampleOneStep module (the difference with Upsample is that it always only has 1conv + 1pixelshuffle)
- Used in lightweight SR to save parameters.
-
- Args:
- scale (int): Scale factor. Supported scales: 2^n and 3.
- num_feat (int): Channel number of intermediate features.
-
- """
-
- def __init__(self, scale, num_feat, num_out_ch, input_resolution=None):
- self.num_feat = num_feat
- self.input_resolution = input_resolution
- m = []
- m.append(nn.Conv2d(num_feat, (scale ** 2) * num_out_ch, 3, 1, 1))
- m.append(nn.PixelShuffle(scale))
- super(UpsampleOneStep, self).__init__(*m)
-
- def flops(self):
- H, W = self.input_resolution
- flops = H * W * self.num_feat * 3 * 9
- return flops
-
-
-
-class Swin2SR(nn.Module):
- r""" Swin2SR
- A PyTorch impl of : `Swin2SR: SwinV2 Transformer for Compressed Image Super-Resolution and Restoration`.
-
- Args:
- img_size (int | tuple(int)): Input image size. Default 64
- patch_size (int | tuple(int)): Patch size. Default: 1
- in_chans (int): Number of input image channels. Default: 3
- embed_dim (int): Patch embedding dimension. Default: 96
- depths (tuple(int)): Depth of each Swin Transformer layer.
- num_heads (tuple(int)): Number of attention heads in different layers.
- window_size (int): Window size. Default: 7
- mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default: 4
- qkv_bias (bool): If True, add a learnable bias to query, key, value. Default: True
- drop_rate (float): Dropout rate. Default: 0
- attn_drop_rate (float): Attention dropout rate. Default: 0
- drop_path_rate (float): Stochastic depth rate. Default: 0.1
- norm_layer (nn.Module): Normalization layer. Default: nn.LayerNorm.
- ape (bool): If True, add absolute position embedding to the patch embedding. Default: False
- patch_norm (bool): If True, add normalization after patch embedding. Default: True
- use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False
- upscale: Upscale factor. 2/3/4/8 for image SR, 1 for denoising and compress artifact reduction
- img_range: Image range. 1. or 255.
- upsampler: The reconstruction reconstruction module. 'pixelshuffle'/'pixelshuffledirect'/'nearest+conv'/None
- resi_connection: The convolutional block before residual connection. '1conv'/'3conv'
- """
-
- def __init__(self, img_size=64, patch_size=1, in_chans=3,
- embed_dim=96, depths=[6, 6, 6, 6], num_heads=[6, 6, 6, 6],
- window_size=7, mlp_ratio=4., qkv_bias=True,
- drop_rate=0., attn_drop_rate=0., drop_path_rate=0.1,
- norm_layer=nn.LayerNorm, ape=False, patch_norm=True,
- use_checkpoint=False, upscale=2, img_range=1., upsampler='', resi_connection='1conv',
- **kwargs):
- super(Swin2SR, self).__init__()
- num_in_ch = in_chans
- num_out_ch = in_chans
- num_feat = 64
- self.img_range = img_range
- if in_chans == 3:
- rgb_mean = (0.4488, 0.4371, 0.4040)
- self.mean = torch.Tensor(rgb_mean).view(1, 3, 1, 1)
- else:
- self.mean = torch.zeros(1, 1, 1, 1)
- self.upscale = upscale
- self.upsampler = upsampler
- self.window_size = window_size
-
- #####################################################################################################
- ################################### 1, shallow feature extraction ###################################
- self.conv_first = nn.Conv2d(num_in_ch, embed_dim, 3, 1, 1)
-
- #####################################################################################################
- ################################### 2, deep feature extraction ######################################
- self.num_layers = len(depths)
- self.embed_dim = embed_dim
- self.ape = ape
- self.patch_norm = patch_norm
- self.num_features = embed_dim
- self.mlp_ratio = mlp_ratio
-
- # split image into non-overlapping patches
- self.patch_embed = PatchEmbed(
- img_size=img_size, patch_size=patch_size, in_chans=embed_dim, embed_dim=embed_dim,
- norm_layer=norm_layer if self.patch_norm else None)
- num_patches = self.patch_embed.num_patches
- patches_resolution = self.patch_embed.patches_resolution
- self.patches_resolution = patches_resolution
-
- # merge non-overlapping patches into image
- self.patch_unembed = PatchUnEmbed(
- img_size=img_size, patch_size=patch_size, in_chans=embed_dim, embed_dim=embed_dim,
- norm_layer=norm_layer if self.patch_norm else None)
-
- # absolute position embedding
- if self.ape:
- self.absolute_pos_embed = nn.Parameter(torch.zeros(1, num_patches, embed_dim))
- trunc_normal_(self.absolute_pos_embed, std=.02)
-
- self.pos_drop = nn.Dropout(p=drop_rate)
-
- # stochastic depth
- dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))] # stochastic depth decay rule
-
- # build Residual Swin Transformer blocks (RSTB)
- self.layers = nn.ModuleList()
- for i_layer in range(self.num_layers):
- layer = RSTB(dim=embed_dim,
- input_resolution=(patches_resolution[0],
- patches_resolution[1]),
- depth=depths[i_layer],
- num_heads=num_heads[i_layer],
- window_size=window_size,
- mlp_ratio=self.mlp_ratio,
- qkv_bias=qkv_bias,
- drop=drop_rate, attn_drop=attn_drop_rate,
- drop_path=dpr[sum(depths[:i_layer]):sum(depths[:i_layer + 1])], # no impact on SR results
- norm_layer=norm_layer,
- downsample=None,
- use_checkpoint=use_checkpoint,
- img_size=img_size,
- patch_size=patch_size,
- resi_connection=resi_connection
-
- )
- self.layers.append(layer)
-
- if self.upsampler == 'pixelshuffle_hf':
- self.layers_hf = nn.ModuleList()
- for i_layer in range(self.num_layers):
- layer = RSTB(dim=embed_dim,
- input_resolution=(patches_resolution[0],
- patches_resolution[1]),
- depth=depths[i_layer],
- num_heads=num_heads[i_layer],
- window_size=window_size,
- mlp_ratio=self.mlp_ratio,
- qkv_bias=qkv_bias,
- drop=drop_rate, attn_drop=attn_drop_rate,
- drop_path=dpr[sum(depths[:i_layer]):sum(depths[:i_layer + 1])], # no impact on SR results
- norm_layer=norm_layer,
- downsample=None,
- use_checkpoint=use_checkpoint,
- img_size=img_size,
- patch_size=patch_size,
- resi_connection=resi_connection
-
- )
- self.layers_hf.append(layer)
-
- self.norm = norm_layer(self.num_features)
-
- # build the last conv layer in deep feature extraction
- if resi_connection == '1conv':
- self.conv_after_body = nn.Conv2d(embed_dim, embed_dim, 3, 1, 1)
- elif resi_connection == '3conv':
- # to save parameters and memory
- self.conv_after_body = nn.Sequential(nn.Conv2d(embed_dim, embed_dim // 4, 3, 1, 1),
- nn.LeakyReLU(negative_slope=0.2, inplace=True),
- nn.Conv2d(embed_dim // 4, embed_dim // 4, 1, 1, 0),
- nn.LeakyReLU(negative_slope=0.2, inplace=True),
- nn.Conv2d(embed_dim // 4, embed_dim, 3, 1, 1))
-
- #####################################################################################################
- ################################ 3, high quality image reconstruction ################################
- if self.upsampler == 'pixelshuffle':
- # for classical SR
- self.conv_before_upsample = nn.Sequential(nn.Conv2d(embed_dim, num_feat, 3, 1, 1),
- nn.LeakyReLU(inplace=True))
- self.upsample = Upsample(upscale, num_feat)
- self.conv_last = nn.Conv2d(num_feat, num_out_ch, 3, 1, 1)
- elif self.upsampler == 'pixelshuffle_aux':
- self.conv_bicubic = nn.Conv2d(num_in_ch, num_feat, 3, 1, 1)
- self.conv_before_upsample = nn.Sequential(
- nn.Conv2d(embed_dim, num_feat, 3, 1, 1),
- nn.LeakyReLU(inplace=True))
- self.conv_aux = nn.Conv2d(num_feat, num_out_ch, 3, 1, 1)
- self.conv_after_aux = nn.Sequential(
- nn.Conv2d(3, num_feat, 3, 1, 1),
- nn.LeakyReLU(inplace=True))
- self.upsample = Upsample(upscale, num_feat)
- self.conv_last = nn.Conv2d(num_feat, num_out_ch, 3, 1, 1)
-
- elif self.upsampler == 'pixelshuffle_hf':
- self.conv_before_upsample = nn.Sequential(nn.Conv2d(embed_dim, num_feat, 3, 1, 1),
- nn.LeakyReLU(inplace=True))
- self.upsample = Upsample(upscale, num_feat)
- self.upsample_hf = Upsample_hf(upscale, num_feat)
- self.conv_last = nn.Conv2d(num_feat, num_out_ch, 3, 1, 1)
- self.conv_first_hf = nn.Sequential(nn.Conv2d(num_feat, embed_dim, 3, 1, 1),
- nn.LeakyReLU(inplace=True))
- self.conv_after_body_hf = nn.Conv2d(embed_dim, embed_dim, 3, 1, 1)
- self.conv_before_upsample_hf = nn.Sequential(
- nn.Conv2d(embed_dim, num_feat, 3, 1, 1),
- nn.LeakyReLU(inplace=True))
- self.conv_last_hf = nn.Conv2d(num_feat, num_out_ch, 3, 1, 1)
-
- elif self.upsampler == 'pixelshuffledirect':
- # for lightweight SR (to save parameters)
- self.upsample = UpsampleOneStep(upscale, embed_dim, num_out_ch,
- (patches_resolution[0], patches_resolution[1]))
- elif self.upsampler == 'nearest+conv':
- # for real-world SR (less artifacts)
- assert self.upscale == 4, 'only support x4 now.'
- self.conv_before_upsample = nn.Sequential(nn.Conv2d(embed_dim, num_feat, 3, 1, 1),
- nn.LeakyReLU(inplace=True))
- self.conv_up1 = nn.Conv2d(num_feat, num_feat, 3, 1, 1)
- self.conv_up2 = nn.Conv2d(num_feat, num_feat, 3, 1, 1)
- self.conv_hr = nn.Conv2d(num_feat, num_feat, 3, 1, 1)
- self.conv_last = nn.Conv2d(num_feat, num_out_ch, 3, 1, 1)
- self.lrelu = nn.LeakyReLU(negative_slope=0.2, inplace=True)
- else:
- # for image denoising and JPEG compression artifact reduction
- self.conv_last = nn.Conv2d(embed_dim, num_out_ch, 3, 1, 1)
-
- self.apply(self._init_weights)
-
- def _init_weights(self, m):
- if isinstance(m, nn.Linear):
- trunc_normal_(m.weight, std=.02)
- if isinstance(m, nn.Linear) and m.bias is not None:
- nn.init.constant_(m.bias, 0)
- elif isinstance(m, nn.LayerNorm):
- nn.init.constant_(m.bias, 0)
- nn.init.constant_(m.weight, 1.0)
-
- @torch.jit.ignore
- def no_weight_decay(self):
- return {'absolute_pos_embed'}
-
- @torch.jit.ignore
- def no_weight_decay_keywords(self):
- return {'relative_position_bias_table'}
-
- def check_image_size(self, x):
- _, _, h, w = x.size()
- mod_pad_h = (self.window_size - h % self.window_size) % self.window_size
- mod_pad_w = (self.window_size - w % self.window_size) % self.window_size
- x = F.pad(x, (0, mod_pad_w, 0, mod_pad_h), 'reflect')
- return x
-
- def forward_features(self, x):
- x_size = (x.shape[2], x.shape[3])
- x = self.patch_embed(x)
- if self.ape:
- x = x + self.absolute_pos_embed
- x = self.pos_drop(x)
-
- for layer in self.layers:
- x = layer(x, x_size)
-
- x = self.norm(x) # B L C
- x = self.patch_unembed(x, x_size)
-
- return x
-
- def forward_features_hf(self, x):
- x_size = (x.shape[2], x.shape[3])
- x = self.patch_embed(x)
- if self.ape:
- x = x + self.absolute_pos_embed
- x = self.pos_drop(x)
-
- for layer in self.layers_hf:
- x = layer(x, x_size)
-
- x = self.norm(x) # B L C
- x = self.patch_unembed(x, x_size)
-
- return x
-
- def forward(self, x):
- H, W = x.shape[2:]
- x = self.check_image_size(x)
-
- self.mean = self.mean.type_as(x)
- x = (x - self.mean) * self.img_range
-
- if self.upsampler == 'pixelshuffle':
- # for classical SR
- x = self.conv_first(x)
- x = self.conv_after_body(self.forward_features(x)) + x
- x = self.conv_before_upsample(x)
- x = self.conv_last(self.upsample(x))
- elif self.upsampler == 'pixelshuffle_aux':
- bicubic = F.interpolate(x, size=(H * self.upscale, W * self.upscale), mode='bicubic', align_corners=False)
- bicubic = self.conv_bicubic(bicubic)
- x = self.conv_first(x)
- x = self.conv_after_body(self.forward_features(x)) + x
- x = self.conv_before_upsample(x)
- aux = self.conv_aux(x) # b, 3, LR_H, LR_W
- x = self.conv_after_aux(aux)
- x = self.upsample(x)[:, :, :H * self.upscale, :W * self.upscale] + bicubic[:, :, :H * self.upscale, :W * self.upscale]
- x = self.conv_last(x)
- aux = aux / self.img_range + self.mean
- elif self.upsampler == 'pixelshuffle_hf':
- # for classical SR with HF
- x = self.conv_first(x)
- x = self.conv_after_body(self.forward_features(x)) + x
- x_before = self.conv_before_upsample(x)
- x_out = self.conv_last(self.upsample(x_before))
-
- x_hf = self.conv_first_hf(x_before)
- x_hf = self.conv_after_body_hf(self.forward_features_hf(x_hf)) + x_hf
- x_hf = self.conv_before_upsample_hf(x_hf)
- x_hf = self.conv_last_hf(self.upsample_hf(x_hf))
- x = x_out + x_hf
- x_hf = x_hf / self.img_range + self.mean
-
- elif self.upsampler == 'pixelshuffledirect':
- # for lightweight SR
- x = self.conv_first(x)
- x = self.conv_after_body(self.forward_features(x)) + x
- x = self.upsample(x)
- elif self.upsampler == 'nearest+conv':
- # for real-world SR
- x = self.conv_first(x)
- x = self.conv_after_body(self.forward_features(x)) + x
- x = self.conv_before_upsample(x)
- x = self.lrelu(self.conv_up1(torch.nn.functional.interpolate(x, scale_factor=2, mode='nearest')))
- x = self.lrelu(self.conv_up2(torch.nn.functional.interpolate(x, scale_factor=2, mode='nearest')))
- x = self.conv_last(self.lrelu(self.conv_hr(x)))
- else:
- # for image denoising and JPEG compression artifact reduction
- x_first = self.conv_first(x)
- res = self.conv_after_body(self.forward_features(x_first)) + x_first
- x = x + self.conv_last(res)
-
- x = x / self.img_range + self.mean
- if self.upsampler == "pixelshuffle_aux":
- return x[:, :, :H*self.upscale, :W*self.upscale], aux
-
- elif self.upsampler == "pixelshuffle_hf":
- x_out = x_out / self.img_range + self.mean
- return x_out[:, :, :H*self.upscale, :W*self.upscale], x[:, :, :H*self.upscale, :W*self.upscale], x_hf[:, :, :H*self.upscale, :W*self.upscale]
-
- else:
- return x[:, :, :H*self.upscale, :W*self.upscale]
-
- def flops(self):
- flops = 0
- H, W = self.patches_resolution
- flops += H * W * 3 * self.embed_dim * 9
- flops += self.patch_embed.flops()
- for i, layer in enumerate(self.layers):
- flops += layer.flops()
- flops += H * W * 3 * self.embed_dim * self.embed_dim
- flops += self.upsample.flops()
- return flops
-
-
-if __name__ == '__main__':
- upscale = 4
- window_size = 8
- height = (1024 // upscale // window_size + 1) * window_size
- width = (720 // upscale // window_size + 1) * window_size
- model = Swin2SR(upscale=2, img_size=(height, width),
- window_size=window_size, img_range=1., depths=[6, 6, 6, 6],
- embed_dim=60, num_heads=[6, 6, 6, 6], mlp_ratio=2, upsampler='pixelshuffledirect')
- print(model)
- print(height, width, model.flops() / 1e9)
-
- x = torch.randn((1, 3, height, width))
- x = model(x)
- print(x.shape)
\ No newline at end of file
diff --git a/modules/ui.py b/modules/ui.py
index 2eb0b684..3acb9b48 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -28,7 +28,6 @@ import modules.codeformer_model
import modules.generation_parameters_copypaste as parameters_copypaste
import modules.gfpgan_model
import modules.hypernetworks.ui
-import modules.ldsr_model
import modules.scripts
import modules.shared as shared
import modules.styles
diff --git a/modules/ui_extensions.py b/modules/ui_extensions.py
index 42667941..b487ac25 100644
--- a/modules/ui_extensions.py
+++ b/modules/ui_extensions.py
@@ -78,6 +78,12 @@ def extension_table():
"""
for ext in extensions.extensions:
+ remote = ""
+ if ext.is_builtin:
+ remote = "built-in"
+ elif ext.remote:
+ remote = f"""{html.escape("built-in" if ext.is_builtin else ext.remote or '')} """
+
if ext.can_update:
ext_status = f""" {html.escape(ext.status)} """
else:
@@ -86,7 +92,7 @@ def extension_table():
code += f"""
{html.escape(ext.name)}
- {html.escape(ext.remote or '')}
+ {remote}
{ext_status}
"""
--
cgit v1.2.1
From 0d21624ceef52b843c731ddc7fdcd7b8d108a42e Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Sat, 3 Dec 2022 18:16:19 +0300
Subject: move #5216 to the extension
---
modules/sd_hijack.py | 2 +-
modules/sd_hijack_autoencoder.py | 286 ---------------------------------------
2 files changed, 1 insertion(+), 287 deletions(-)
delete mode 100644 modules/sd_hijack_autoencoder.py
(limited to 'modules')
diff --git a/modules/sd_hijack.py b/modules/sd_hijack.py
index 303b1397..95a17093 100644
--- a/modules/sd_hijack.py
+++ b/modules/sd_hijack.py
@@ -11,7 +11,7 @@ import modules.textual_inversion.textual_inversion
from modules import prompt_parser, devices, sd_hijack_optimizations, shared, sd_hijack_checkpoint
from modules.hypernetworks import hypernetwork
from modules.shared import opts, device, cmd_opts
-from modules import sd_hijack_clip, sd_hijack_open_clip, sd_hijack_autoencoder
+from modules import sd_hijack_clip, sd_hijack_open_clip
from modules.sd_hijack_optimizations import invokeAI_mps_available
diff --git a/modules/sd_hijack_autoencoder.py b/modules/sd_hijack_autoencoder.py
deleted file mode 100644
index 8e03c7f8..00000000
--- a/modules/sd_hijack_autoencoder.py
+++ /dev/null
@@ -1,286 +0,0 @@
-# The content of this file comes from the ldm/models/autoencoder.py file of the compvis/stable-diffusion repo
-# The VQModel & VQModelInterface were subsequently removed from ldm/models/autoencoder.py when we moved to the stability-ai/stablediffusion repo
-# As the LDSR upscaler relies on VQModel & VQModelInterface, the hijack aims to put them back into the ldm.models.autoencoder
-
-import torch
-import pytorch_lightning as pl
-import torch.nn.functional as F
-from contextlib import contextmanager
-from taming.modules.vqvae.quantize import VectorQuantizer2 as VectorQuantizer
-from ldm.modules.diffusionmodules.model import Encoder, Decoder
-from ldm.util import instantiate_from_config
-
-import ldm.models.autoencoder
-
-class VQModel(pl.LightningModule):
- def __init__(self,
- ddconfig,
- lossconfig,
- n_embed,
- embed_dim,
- ckpt_path=None,
- ignore_keys=[],
- image_key="image",
- colorize_nlabels=None,
- monitor=None,
- batch_resize_range=None,
- scheduler_config=None,
- lr_g_factor=1.0,
- remap=None,
- sane_index_shape=False, # tell vector quantizer to return indices as bhw
- use_ema=False
- ):
- super().__init__()
- self.embed_dim = embed_dim
- self.n_embed = n_embed
- self.image_key = image_key
- self.encoder = Encoder(**ddconfig)
- self.decoder = Decoder(**ddconfig)
- self.loss = instantiate_from_config(lossconfig)
- self.quantize = VectorQuantizer(n_embed, embed_dim, beta=0.25,
- remap=remap,
- sane_index_shape=sane_index_shape)
- self.quant_conv = torch.nn.Conv2d(ddconfig["z_channels"], embed_dim, 1)
- self.post_quant_conv = torch.nn.Conv2d(embed_dim, ddconfig["z_channels"], 1)
- if colorize_nlabels is not None:
- assert type(colorize_nlabels)==int
- self.register_buffer("colorize", torch.randn(3, colorize_nlabels, 1, 1))
- if monitor is not None:
- self.monitor = monitor
- self.batch_resize_range = batch_resize_range
- if self.batch_resize_range is not None:
- print(f"{self.__class__.__name__}: Using per-batch resizing in range {batch_resize_range}.")
-
- self.use_ema = use_ema
- if self.use_ema:
- self.model_ema = LitEma(self)
- print(f"Keeping EMAs of {len(list(self.model_ema.buffers()))}.")
-
- if ckpt_path is not None:
- self.init_from_ckpt(ckpt_path, ignore_keys=ignore_keys)
- self.scheduler_config = scheduler_config
- self.lr_g_factor = lr_g_factor
-
- @contextmanager
- def ema_scope(self, context=None):
- if self.use_ema:
- self.model_ema.store(self.parameters())
- self.model_ema.copy_to(self)
- if context is not None:
- print(f"{context}: Switched to EMA weights")
- try:
- yield None
- finally:
- if self.use_ema:
- self.model_ema.restore(self.parameters())
- if context is not None:
- print(f"{context}: Restored training weights")
-
- def init_from_ckpt(self, path, ignore_keys=list()):
- sd = torch.load(path, map_location="cpu")["state_dict"]
- keys = list(sd.keys())
- for k in keys:
- for ik in ignore_keys:
- if k.startswith(ik):
- print("Deleting key {} from state_dict.".format(k))
- del sd[k]
- missing, unexpected = self.load_state_dict(sd, strict=False)
- print(f"Restored from {path} with {len(missing)} missing and {len(unexpected)} unexpected keys")
- if len(missing) > 0:
- print(f"Missing Keys: {missing}")
- print(f"Unexpected Keys: {unexpected}")
-
- def on_train_batch_end(self, *args, **kwargs):
- if self.use_ema:
- self.model_ema(self)
-
- def encode(self, x):
- h = self.encoder(x)
- h = self.quant_conv(h)
- quant, emb_loss, info = self.quantize(h)
- return quant, emb_loss, info
-
- def encode_to_prequant(self, x):
- h = self.encoder(x)
- h = self.quant_conv(h)
- return h
-
- def decode(self, quant):
- quant = self.post_quant_conv(quant)
- dec = self.decoder(quant)
- return dec
-
- def decode_code(self, code_b):
- quant_b = self.quantize.embed_code(code_b)
- dec = self.decode(quant_b)
- return dec
-
- def forward(self, input, return_pred_indices=False):
- quant, diff, (_,_,ind) = self.encode(input)
- dec = self.decode(quant)
- if return_pred_indices:
- return dec, diff, ind
- return dec, diff
-
- def get_input(self, batch, k):
- x = batch[k]
- if len(x.shape) == 3:
- x = x[..., None]
- x = x.permute(0, 3, 1, 2).to(memory_format=torch.contiguous_format).float()
- if self.batch_resize_range is not None:
- lower_size = self.batch_resize_range[0]
- upper_size = self.batch_resize_range[1]
- if self.global_step <= 4:
- # do the first few batches with max size to avoid later oom
- new_resize = upper_size
- else:
- new_resize = np.random.choice(np.arange(lower_size, upper_size+16, 16))
- if new_resize != x.shape[2]:
- x = F.interpolate(x, size=new_resize, mode="bicubic")
- x = x.detach()
- return x
-
- def training_step(self, batch, batch_idx, optimizer_idx):
- # https://github.com/pytorch/pytorch/issues/37142
- # try not to fool the heuristics
- x = self.get_input(batch, self.image_key)
- xrec, qloss, ind = self(x, return_pred_indices=True)
-
- if optimizer_idx == 0:
- # autoencode
- aeloss, log_dict_ae = self.loss(qloss, x, xrec, optimizer_idx, self.global_step,
- last_layer=self.get_last_layer(), split="train",
- predicted_indices=ind)
-
- self.log_dict(log_dict_ae, prog_bar=False, logger=True, on_step=True, on_epoch=True)
- return aeloss
-
- if optimizer_idx == 1:
- # discriminator
- discloss, log_dict_disc = self.loss(qloss, x, xrec, optimizer_idx, self.global_step,
- last_layer=self.get_last_layer(), split="train")
- self.log_dict(log_dict_disc, prog_bar=False, logger=True, on_step=True, on_epoch=True)
- return discloss
-
- def validation_step(self, batch, batch_idx):
- log_dict = self._validation_step(batch, batch_idx)
- with self.ema_scope():
- log_dict_ema = self._validation_step(batch, batch_idx, suffix="_ema")
- return log_dict
-
- def _validation_step(self, batch, batch_idx, suffix=""):
- x = self.get_input(batch, self.image_key)
- xrec, qloss, ind = self(x, return_pred_indices=True)
- aeloss, log_dict_ae = self.loss(qloss, x, xrec, 0,
- self.global_step,
- last_layer=self.get_last_layer(),
- split="val"+suffix,
- predicted_indices=ind
- )
-
- discloss, log_dict_disc = self.loss(qloss, x, xrec, 1,
- self.global_step,
- last_layer=self.get_last_layer(),
- split="val"+suffix,
- predicted_indices=ind
- )
- rec_loss = log_dict_ae[f"val{suffix}/rec_loss"]
- self.log(f"val{suffix}/rec_loss", rec_loss,
- prog_bar=True, logger=True, on_step=False, on_epoch=True, sync_dist=True)
- self.log(f"val{suffix}/aeloss", aeloss,
- prog_bar=True, logger=True, on_step=False, on_epoch=True, sync_dist=True)
- if version.parse(pl.__version__) >= version.parse('1.4.0'):
- del log_dict_ae[f"val{suffix}/rec_loss"]
- self.log_dict(log_dict_ae)
- self.log_dict(log_dict_disc)
- return self.log_dict
-
- def configure_optimizers(self):
- lr_d = self.learning_rate
- lr_g = self.lr_g_factor*self.learning_rate
- print("lr_d", lr_d)
- print("lr_g", lr_g)
- opt_ae = torch.optim.Adam(list(self.encoder.parameters())+
- list(self.decoder.parameters())+
- list(self.quantize.parameters())+
- list(self.quant_conv.parameters())+
- list(self.post_quant_conv.parameters()),
- lr=lr_g, betas=(0.5, 0.9))
- opt_disc = torch.optim.Adam(self.loss.discriminator.parameters(),
- lr=lr_d, betas=(0.5, 0.9))
-
- if self.scheduler_config is not None:
- scheduler = instantiate_from_config(self.scheduler_config)
-
- print("Setting up LambdaLR scheduler...")
- scheduler = [
- {
- 'scheduler': LambdaLR(opt_ae, lr_lambda=scheduler.schedule),
- 'interval': 'step',
- 'frequency': 1
- },
- {
- 'scheduler': LambdaLR(opt_disc, lr_lambda=scheduler.schedule),
- 'interval': 'step',
- 'frequency': 1
- },
- ]
- return [opt_ae, opt_disc], scheduler
- return [opt_ae, opt_disc], []
-
- def get_last_layer(self):
- return self.decoder.conv_out.weight
-
- def log_images(self, batch, only_inputs=False, plot_ema=False, **kwargs):
- log = dict()
- x = self.get_input(batch, self.image_key)
- x = x.to(self.device)
- if only_inputs:
- log["inputs"] = x
- return log
- xrec, _ = self(x)
- if x.shape[1] > 3:
- # colorize with random projection
- assert xrec.shape[1] > 3
- x = self.to_rgb(x)
- xrec = self.to_rgb(xrec)
- log["inputs"] = x
- log["reconstructions"] = xrec
- if plot_ema:
- with self.ema_scope():
- xrec_ema, _ = self(x)
- if x.shape[1] > 3: xrec_ema = self.to_rgb(xrec_ema)
- log["reconstructions_ema"] = xrec_ema
- return log
-
- def to_rgb(self, x):
- assert self.image_key == "segmentation"
- if not hasattr(self, "colorize"):
- self.register_buffer("colorize", torch.randn(3, x.shape[1], 1, 1).to(x))
- x = F.conv2d(x, weight=self.colorize)
- x = 2.*(x-x.min())/(x.max()-x.min()) - 1.
- return x
-
-
-class VQModelInterface(VQModel):
- def __init__(self, embed_dim, *args, **kwargs):
- super().__init__(embed_dim=embed_dim, *args, **kwargs)
- self.embed_dim = embed_dim
-
- def encode(self, x):
- h = self.encoder(x)
- h = self.quant_conv(h)
- return h
-
- def decode(self, h, force_not_quantize=False):
- # also go through quantization layer
- if not force_not_quantize:
- quant, emb_loss, info = self.quantize(h)
- else:
- quant = h
- quant = self.post_quant_conv(quant)
- dec = self.decoder(quant)
- return dec
-
-setattr(ldm.models.autoencoder, "VQModel", VQModel)
-setattr(ldm.models.autoencoder, "VQModelInterface", VQModelInterface)
--
cgit v1.2.1
From 4b0dc206edbad90affe609ac0bf2e9be7e197674 Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Sat, 3 Dec 2022 18:45:51 +0300
Subject: use modelloader for #4956
---
modules/interrogate.py | 22 ++++++++--------------
1 file changed, 8 insertions(+), 14 deletions(-)
(limited to 'modules')
diff --git a/modules/interrogate.py b/modules/interrogate.py
index 3a09b366..0068b81c 100644
--- a/modules/interrogate.py
+++ b/modules/interrogate.py
@@ -1,4 +1,3 @@
-import contextlib
import os
import sys
import traceback
@@ -11,12 +10,9 @@ from torchvision import transforms
from torchvision.transforms.functional import InterpolationMode
import modules.shared as shared
-from modules import devices, paths, lowvram
+from modules import devices, paths, lowvram, modelloader
blip_image_eval_size = 384
-blip_local_dir = os.path.join('models', 'Interrogator')
-blip_local_file = os.path.join(blip_local_dir, 'model_base_caption_capfilt_large.pth')
-blip_model_url = 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_caption_capfilt_large.pth'
clip_model_name = 'ViT-L/14'
Category = namedtuple("Category", ["name", "topn", "items"])
@@ -49,16 +45,14 @@ class InterrogateModels:
def load_blip_model(self):
import models.blip
- if not os.path.isfile(blip_local_file):
- if not os.path.isdir(blip_local_dir):
- os.mkdir(blip_local_dir)
+ files = modelloader.load_models(
+ model_path=os.path.join(paths.models_path, "BLIP"),
+ model_url='https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_caption_capfilt_large.pth',
+ ext_filter=[".pth"],
+ download_name='model_base_caption_capfilt_large.pth',
+ )
- print("Downloading BLIP...")
- from requests import get as reqget
- open(blip_local_file, 'wb').write(reqget(blip_model_url, allow_redirects=True).content)
- print("BLIP downloaded to", blip_local_file + '.')
-
- blip_model = models.blip.blip_decoder(pretrained=blip_local_file, image_size=blip_image_eval_size, vit='base', med_config=os.path.join(paths.paths["BLIP"], "configs", "med_config.json"))
+ blip_model = models.blip.blip_decoder(pretrained=files[0], image_size=blip_image_eval_size, vit='base', med_config=os.path.join(paths.paths["BLIP"], "configs", "med_config.json"))
blip_model.eval()
return blip_model
--
cgit v1.2.1
From 60bd4d52a658838c5ee3f6ddfe8d4db55cf1d764 Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Sat, 3 Dec 2022 18:46:09 +0300
Subject: fix incorrect file extension filter for deepdanbooru models
---
modules/deepbooru.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
(limited to 'modules')
diff --git a/modules/deepbooru.py b/modules/deepbooru.py
index 31ec7e17..dfc83357 100644
--- a/modules/deepbooru.py
+++ b/modules/deepbooru.py
@@ -21,7 +21,7 @@ class DeepDanbooru:
files = modelloader.load_models(
model_path=os.path.join(paths.models_path, "torch_deepdanbooru"),
model_url='https://github.com/AUTOMATIC1111/TorchDeepDanbooru/releases/download/v1/model-resnet_custom_v3.pt',
- ext_filter=".pt",
+ ext_filter=[".pt"],
download_name='model-resnet_custom_v3.pt',
)
--
cgit v1.2.1
From 8504db51704d238cc7616f6bf59eb049d3eb101d Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Sun, 4 Dec 2022 01:04:24 +0300
Subject: fix #4459 breaking inpainting when the option is not specified.
---
modules/img2img.py | 17 +++++++++--------
modules/ui.py | 25 ++++++++++++++-----------
2 files changed, 23 insertions(+), 19 deletions(-)
(limited to 'modules')
diff --git a/modules/img2img.py b/modules/img2img.py
index 830cfa15..81da4b13 100644
--- a/modules/img2img.py
+++ b/modules/img2img.py
@@ -4,7 +4,7 @@ import sys
import traceback
import numpy as np
-from PIL import Image, ImageOps, ImageFilter, ImageEnhance
+from PIL import Image, ImageOps, ImageFilter, ImageEnhance, ImageChops
from modules import devices, sd_samplers
from modules.processing import Processed, StableDiffusionProcessingImg2Img, process_images
@@ -66,22 +66,23 @@ def img2img(mode: int, prompt: str, negative_prompt: str, prompt_style: str, pro
if is_inpaint:
# Drawn mask
if mask_mode == 0:
- image = init_img_with_mask
- is_mask_sketch = isinstance(image, dict)
+ is_mask_sketch = isinstance(init_img_with_mask, dict)
is_mask_paint = not is_mask_sketch
if is_mask_sketch:
# Sketch: mask iff. not transparent
- image, mask = image["image"], image["mask"]
- pred = np.array(mask)[..., -1] > 0
+ image, mask = init_img_with_mask["image"], init_img_with_mask["mask"]
+ alpha_mask = ImageOps.invert(image.split()[-1]).convert('L').point(lambda x: 255 if x > 0 else 0, mode='1')
+ mask = ImageChops.lighter(alpha_mask, mask.convert('L')).convert('L')
else:
# Color-sketch: mask iff. painted over
- orig = init_img_with_mask_orig or image
+ image = init_img_with_mask
+ orig = init_img_with_mask_orig or init_img_with_mask
pred = np.any(np.array(image) != np.array(orig), axis=-1)
- mask = Image.fromarray(pred.astype(np.uint8) * 255, "L")
- if is_mask_paint:
+ mask = Image.fromarray(pred.astype(np.uint8) * 255, "L")
mask = ImageEnhance.Brightness(mask).enhance(1 - mask_alpha / 100)
blur = ImageFilter.GaussianBlur(mask_blur)
image = Image.composite(image.filter(blur), orig, mask.filter(blur))
+
image = image.convert("RGB")
# Uploaded mask
else:
diff --git a/modules/ui.py b/modules/ui.py
index 3acb9b48..b2b8de90 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -791,23 +791,26 @@ def create_ui():
init_img = gr.Image(label="Image for img2img", elem_id="img2img_image", show_label=False, source="upload", interactive=True, type="pil", tool=cmd_opts.gradio_img2img_tool).style(height=480)
with gr.TabItem('Inpaint', id='inpaint'):
- init_img_with_mask_orig = gr.State(None)
init_img_with_mask = gr.Image(label="Image for inpainting with mask", show_label=False, elem_id="img2maskimg", source="upload", interactive=True, type="pil", tool=cmd_opts.gradio_inpaint_tool, image_mode="RGBA").style(height=480)
+ init_img_with_mask_orig = gr.State(None)
- def update_orig(image, state):
- if image is not None:
- same_size = state is not None and state.size == image.size
- has_exact_match = np.any(np.all(np.array(image) == np.array(state), axis=-1))
- edited = same_size and has_exact_match
- return image if not edited or state is None else state
+ use_color_sketch = cmd_opts.gradio_inpaint_tool == "color-sketch"
+ if use_color_sketch:
+ def update_orig(image, state):
+ if image is not None:
+ same_size = state is not None and state.size == image.size
+ has_exact_match = np.any(np.all(np.array(image) == np.array(state), axis=-1))
+ edited = same_size and has_exact_match
+ return image if not edited or state is None else state
+
+ init_img_with_mask.change(update_orig, [init_img_with_mask, init_img_with_mask_orig], init_img_with_mask_orig)
- init_img_with_mask.change(update_orig, [init_img_with_mask, init_img_with_mask_orig], init_img_with_mask_orig)
init_img_inpaint = gr.Image(label="Image for img2img", show_label=False, source="upload", interactive=True, type="pil", visible=False, elem_id="img_inpaint_base")
init_mask_inpaint = gr.Image(label="Mask", source="upload", interactive=True, type="pil", visible=False, elem_id="img_inpaint_mask")
- show_mask_alpha = cmd_opts.gradio_inpaint_tool == "color-sketch"
- mask_alpha = gr.Slider(label="Mask transparency", interactive=show_mask_alpha, visible=show_mask_alpha)
- mask_blur = gr.Slider(label='Mask blur', minimum=0, maximum=64, step=1, value=4)
+ with gr.Row():
+ mask_blur = gr.Slider(label='Mask blur', minimum=0, maximum=64, step=1, value=4)
+ mask_alpha = gr.Slider(label="Mask transparency", interactive=use_color_sketch, visible=use_color_sketch)
with gr.Row():
mask_mode = gr.Radio(label="Mask mode", show_label=False, choices=["Draw mask", "Upload mask"], type="index", value="Draw mask", elem_id="mask_mode")
--
cgit v1.2.1
From 44c46f0ed395967cd3830dd481a2db759fda5b3b Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Sun, 4 Dec 2022 12:30:44 +0300
Subject: make it possible to merge inpainting model with non-inpainting one
---
modules/extras.py | 27 +++++++++++++++++++++++++--
1 file changed, 25 insertions(+), 2 deletions(-)
(limited to 'modules')
diff --git a/modules/extras.py b/modules/extras.py
index 6021a024..bc349d5e 100644
--- a/modules/extras.py
+++ b/modules/extras.py
@@ -247,6 +247,7 @@ def run_modelmerger(primary_model_name, secondary_model_name, teritary_model_nam
primary_model_info = sd_models.checkpoints_list[primary_model_name]
secondary_model_info = sd_models.checkpoints_list[secondary_model_name]
teritary_model_info = sd_models.checkpoints_list.get(teritary_model_name, None)
+ result_is_inpainting_model = False
print(f"Loading {primary_model_info.filename}...")
theta_0 = sd_models.read_state_dict(primary_model_info.filename, map_location='cpu')
@@ -280,8 +281,22 @@ def run_modelmerger(primary_model_name, secondary_model_name, teritary_model_nam
for key in tqdm.tqdm(theta_0.keys()):
if 'model' in key and key in theta_1:
+ a = theta_0[key]
+ b = theta_1[key]
- theta_0[key] = theta_func2(theta_0[key], theta_1[key], multiplier)
+ # this enables merging an inpainting model (A) with another one (B);
+ # where normal model would have 4 channels, for latenst space, inpainting model would
+ # have another 4 channels for unmasked picture's latent space, plus one channel for mask, for a total of 9
+ if a.shape != b.shape and a.shape[0:1] + a.shape[2:] == b.shape[0:1] + b.shape[2:]:
+ if a.shape[1] == 4 and b.shape[1] == 9:
+ raise RuntimeError("When merging inpainting model with a normal one, A must be the inpainting model.")
+
+ assert a.shape[1] == 9 and b.shape[1] == 4, f"Bad dimensions for merged layer {key}: A={a.shape}, B={b.shape}"
+
+ theta_0[key][:, 0:4, :, :] = theta_func2(a[:, 0:4, :, :], b, multiplier)
+ result_is_inpainting_model = True
+ else:
+ theta_0[key] = theta_func2(a, b, multiplier)
if save_as_half:
theta_0[key] = theta_0[key].half()
@@ -295,8 +310,16 @@ def run_modelmerger(primary_model_name, secondary_model_name, teritary_model_nam
ckpt_dir = shared.cmd_opts.ckpt_dir or sd_models.model_path
- filename = primary_model_info.model_name + '_' + str(round(1-multiplier, 2)) + '-' + secondary_model_info.model_name + '_' + str(round(multiplier, 2)) + '-' + interp_method.replace(" ", "_") + '-merged.' + checkpoint_format
+ filename = \
+ primary_model_info.model_name + '_' + str(round(1-multiplier, 2)) + '-' + \
+ secondary_model_info.model_name + '_' + str(round(multiplier, 2)) + '-' + \
+ interp_method.replace(" ", "_") + \
+ '-merged.' + \
+ ("inpainting." if result_is_inpainting_model else "") + \
+ checkpoint_format
+
filename = filename if custom_name == '' else (custom_name + '.' + checkpoint_format)
+
output_modelname = os.path.join(ckpt_dir, filename)
print(f"Saving to {output_modelname}...")
--
cgit v1.2.1
From 681c450ecd8f0999cbaf562c5e734c7105320ad9 Mon Sep 17 00:00:00 2001
From: Mackerel
Date: Sun, 4 Dec 2022 01:13:36 -0500
Subject: extras.py: use as little RAM as possible, misc fixes
maximum of 2 models loaded at once. delete unneeded model before next
step. fix 'teritary' -> 'tertiary'. gracefully fail when "add
difference" is selected without a tertiary model
---
modules/extras.py | 37 +++++++++++++++++++------------------
1 file changed, 19 insertions(+), 18 deletions(-)
(limited to 'modules')
diff --git a/modules/extras.py b/modules/extras.py
index bc349d5e..0ad8deec 100644
--- a/modules/extras.py
+++ b/modules/extras.py
@@ -62,7 +62,7 @@ def run_extras(extras_mode, resize_mode, image, image_folder, input_dir, output_
# Also keep track of original file names
imageNameArr = []
outputs = []
-
+
if extras_mode == 1:
#convert file to pillow image
for img in image_folder:
@@ -234,7 +234,7 @@ def run_pnginfo(image):
return '', geninfo, info
-def run_modelmerger(primary_model_name, secondary_model_name, teritary_model_name, interp_method, multiplier, save_as_half, custom_name, checkpoint_format):
+def run_modelmerger(primary_model_name, secondary_model_name, tertiary_model_name, interp_method, multiplier, save_as_half, custom_name, checkpoint_format):
def weighted_sum(theta0, theta1, alpha):
return ((1 - alpha) * theta0) + (alpha * theta1)
@@ -246,30 +246,25 @@ def run_modelmerger(primary_model_name, secondary_model_name, teritary_model_nam
primary_model_info = sd_models.checkpoints_list[primary_model_name]
secondary_model_info = sd_models.checkpoints_list[secondary_model_name]
- teritary_model_info = sd_models.checkpoints_list.get(teritary_model_name, None)
+ tertiary_model_info = sd_models.checkpoints_list.get(tertiary_model_name, None)
result_is_inpainting_model = False
- print(f"Loading {primary_model_info.filename}...")
- theta_0 = sd_models.read_state_dict(primary_model_info.filename, map_location='cpu')
-
- print(f"Loading {secondary_model_info.filename}...")
- theta_1 = sd_models.read_state_dict(secondary_model_info.filename, map_location='cpu')
-
- if teritary_model_info is not None:
- print(f"Loading {teritary_model_info.filename}...")
- theta_2 = sd_models.read_state_dict(teritary_model_info.filename, map_location='cpu')
- else:
- theta_2 = None
-
theta_funcs = {
"Weighted sum": (None, weighted_sum),
"Add difference": (get_difference, add_difference),
}
theta_func1, theta_func2 = theta_funcs[interp_method]
- print(f"Merging...")
+ if theta_func1 and not tertiary_model_info:
+ return ["Failed: Interpolation method requires a tertiary model."] + [gr.Dropdown.update(choices=sd_models.checkpoint_tiles()) for _ in range(4)]
+
+ print(f"Loading {secondary_model_info.filename}...")
+ theta_1 = sd_models.read_state_dict(secondary_model_info.filename, map_location='cpu')
if theta_func1:
+ print(f"Loading {tertiary_model_info.filename}...")
+ theta_2 = sd_models.read_state_dict(tertiary_model_info.filename, map_location='cpu')
+
for key in tqdm.tqdm(theta_1.keys()):
if 'model' in key:
if key in theta_2:
@@ -277,7 +272,12 @@ def run_modelmerger(primary_model_name, secondary_model_name, teritary_model_nam
theta_1[key] = theta_func1(theta_1[key], t2)
else:
theta_1[key] = torch.zeros_like(theta_1[key])
- del theta_2
+ del theta_2
+
+ print(f"Loading {primary_model_info.filename}...")
+ theta_0 = sd_models.read_state_dict(primary_model_info.filename, map_location='cpu')
+
+ print("Merging...")
for key in tqdm.tqdm(theta_0.keys()):
if 'model' in key and key in theta_1:
@@ -307,6 +307,7 @@ def run_modelmerger(primary_model_name, secondary_model_name, teritary_model_nam
theta_0[key] = theta_1[key]
if save_as_half:
theta_0[key] = theta_0[key].half()
+ del theta_1
ckpt_dir = shared.cmd_opts.ckpt_dir or sd_models.model_path
@@ -332,5 +333,5 @@ def run_modelmerger(primary_model_name, secondary_model_name, teritary_model_nam
sd_models.list_models()
- print(f"Checkpoint saved.")
+ print("Checkpoint saved.")
return ["Checkpoint saved to " + output_modelname] + [gr.Dropdown.update(choices=sd_models.checkpoint_tiles()) for _ in range(4)]
--
cgit v1.2.1
From 7057c72ae3f697381a6ccdd1527b954a1280cb40 Mon Sep 17 00:00:00 2001
From: timntorres
Date: Mon, 5 Dec 2022 03:41:36 -0800
Subject: Add opt. to avoid sending size between interfaces.
---
modules/generation_parameters_copypaste.py | 3 +--
modules/shared.py | 1 +
2 files changed, 2 insertions(+), 2 deletions(-)
(limited to 'modules')
diff --git a/modules/generation_parameters_copypaste.py b/modules/generation_parameters_copypaste.py
index 44fe1a6c..e8d5250a 100644
--- a/modules/generation_parameters_copypaste.py
+++ b/modules/generation_parameters_copypaste.py
@@ -121,8 +121,7 @@ def run_bind():
if send_generate_info and paste_fields[tab]["fields"] is not None:
if send_generate_info in paste_fields:
- paste_field_names = ['Prompt', 'Negative prompt', 'Steps', 'Face restoration', 'Size-1', 'Size-2'] + (["Seed"] if shared.opts.send_seed else [])
-
+ paste_field_names = ['Prompt', 'Negative prompt', 'Steps', 'Face restoration'] + (['Size-1', 'Size-2'] if shared.opts.send_size else []) + (["Seed"] if shared.opts.send_seed else [])
button.click(
fn=lambda *x: x,
inputs=[field for field, name in paste_fields[send_generate_info]["fields"] if name in paste_field_names],
diff --git a/modules/shared.py b/modules/shared.py
index dc45fcaa..ab9012af 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -395,6 +395,7 @@ options_templates.update(options_section(('ui', "User interface"), {
"add_model_name_to_info": OptionInfo(False, "Add model name to generation information"),
"disable_weights_auto_swap": OptionInfo(False, "When reading generation parameters from text into UI (from PNG info or pasted text), do not change the selected model/checkpoint."),
"send_seed": OptionInfo(True, "Send seed when sending prompt or image to other interface"),
+ "send_size": OptionInfo(True, "Send size when sending prompt or image to another interface"),
"font": OptionInfo("", "Font for image grids that have text"),
"js_modal_lightbox": OptionInfo(True, "Enable full page image viewer"),
"js_modal_lightbox_initially_zoomed": OptionInfo(True, "Show images zoomed in by default in full page image viewer"),
--
cgit v1.2.1
From 4929503258d80abbc4b5f40da034298fe3803906 Mon Sep 17 00:00:00 2001
From: zhaohu xing <920232796@qq.com>
Date: Tue, 6 Dec 2022 09:03:55 +0800
Subject: fix bugs
Signed-off-by: zhaohu xing <920232796@qq.com>
---
modules/devices.py | 4 ++--
modules/sd_hijack.py | 2 +-
2 files changed, 3 insertions(+), 3 deletions(-)
(limited to 'modules')
diff --git a/modules/devices.py b/modules/devices.py
index e69c1fe3..f00079c6 100644
--- a/modules/devices.py
+++ b/modules/devices.py
@@ -38,8 +38,8 @@ def get_optimal_device():
if torch.cuda.is_available():
return torch.device(get_cuda_device_string())
- # if has_mps():
- # return torch.device("mps")
+ if has_mps():
+ return torch.device("mps")
return cpu
diff --git a/modules/sd_hijack.py b/modules/sd_hijack.py
index edb8b420..cd65d356 100644
--- a/modules/sd_hijack.py
+++ b/modules/sd_hijack.py
@@ -28,7 +28,7 @@ diffusionmodules_model_AttnBlock_forward = ldm.modules.diffusionmodules.model.At
# new memory efficient cross attention blocks do not support hypernets and we already
# have memory efficient cross attention anyway, so this disables SD2.0's memory efficient cross attention
ldm.modules.attention.MemoryEfficientCrossAttention = ldm.modules.attention.CrossAttention
-# ldm.modules.attention.BasicTransformerBlock.ATTENTION_MODES["softmax-xformers"] = ldm.modules.attention.CrossAttention
+ldm.modules.attention.BasicTransformerBlock.ATTENTION_MODES["softmax-xformers"] = ldm.modules.attention.CrossAttention
# silence new console spam from SD2
ldm.modules.attention.print = lambda *args: None
--
cgit v1.2.1
From 5dcc22606d05ebe5ae89c990bd83a3eb068fcb78 Mon Sep 17 00:00:00 2001
From: zhaohu xing <920232796@qq.com>
Date: Tue, 6 Dec 2022 16:04:50 +0800
Subject: add hash and fix undo hijack bug
Signed-off-by: zhaohu xing <920232796@qq.com>
---
modules/sd_hijack.py | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
(limited to 'modules')
diff --git a/modules/sd_hijack.py b/modules/sd_hijack.py
index 9b5890e7..9fed1b6f 100644
--- a/modules/sd_hijack.py
+++ b/modules/sd_hijack.py
@@ -112,7 +112,11 @@ class StableDiffusionModelHijack:
self.layers = flatten(m)
def undo_hijack(self, m):
- if type(m.cond_stage_model) == sd_hijack_clip.FrozenCLIPEmbedderWithCustomWords:
+
+ if shared.text_model_name == "XLMR-Large":
+ m.cond_stage_model = m.cond_stage_model.wrapped
+
+ elif type(m.cond_stage_model) == sd_hijack_clip.FrozenCLIPEmbedderWithCustomWords:
m.cond_stage_model = m.cond_stage_model.wrapped
model_embeddings = m.cond_stage_model.transformer.text_model.embeddings
--
cgit v1.2.1
From 965fc5ac5a6ccdf38342e21c97183011a04e799e Mon Sep 17 00:00:00 2001
From: zhaohu xing <920232796@qq.com>
Date: Tue, 6 Dec 2022 16:15:15 +0800
Subject: delete a file
Signed-off-by: zhaohu xing <920232796@qq.com>
---
modules/shared.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
(limited to 'modules')
diff --git a/modules/shared.py b/modules/shared.py
index 522c56c1..8419b531 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -22,7 +22,7 @@ demo = None
sd_model_file = os.path.join(script_path, 'model.ckpt')
default_sd_model_file = sd_model_file
parser = argparse.ArgumentParser()
-parser.add_argument("--config", type=str, default="configs/altdiffusion/ad-inference.yaml", help="path to config which constructs model",)
+parser.add_argument("--config", type=str, default=os.path.join(script_path, "v1-inference.yaml"), help="path to config which constructs model",)
parser.add_argument("--ckpt", type=str, default=sd_model_file, help="path to checkpoint of stable diffusion model; if specified, this checkpoint will be added to the list of checkpoints and loaded",)
parser.add_argument("--ckpt-dir", type=str, default=None, help="Path to directory with stable diffusion checkpoints")
parser.add_argument("--gfpgan-dir", type=str, help="GFPGAN directory", default=('./src/gfpgan' if os.path.exists('./src/gfpgan') else './GFPGAN'))
--
cgit v1.2.1
From 358a8628f6abb4ca1e1bfddf122687c6fb13be0c Mon Sep 17 00:00:00 2001
From: Andrew Ryan
Date: Thu, 8 Dec 2022 07:09:09 +0000
Subject: Add latent upscale option to img2img
Recently, the option to do latent upscale was added to txt2img highres
fix. This feature runs by scaling the latent sample of the image, and
then running a second pass of img2img.
But, in this edition of highres fix, the image and parameters cannot be
changed between the first pass and second pass. We might want to do a
fixup in img2img before doing the second pass, or might want to run the
second pass at a different resolution.
This change adds the option for img2img to perform its upscale in latent
space, rather than image space, giving very similar results to highres
fix with latent upscale. The result is not exactly the same because
there is an additional latent -> decoder -> image -> encoder -> latent
that won't happen in highres fix, but this conversion has relatively
small losses
---
modules/processing.py | 6 +++++-
modules/ui.py | 2 +-
2 files changed, 6 insertions(+), 2 deletions(-)
(limited to 'modules')
diff --git a/modules/processing.py b/modules/processing.py
index 3d2c4dc9..ab5a34d0 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -795,7 +795,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
for img in self.init_images:
image = img.convert("RGB")
- if crop_region is None:
+ if crop_region is None and self.resize_mode != 3:
image = images.resize_image(self.resize_mode, image, self.width, self.height)
if image_mask is not None:
@@ -804,6 +804,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
self.overlay_images.append(image_masked.convert('RGBA'))
+ # crop_region is not none iif we are doing inpaint full res
if crop_region is not None:
image = image.crop(crop_region)
image = images.resize_image(2, image, self.width, self.height)
@@ -840,6 +841,9 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
self.init_latent = self.sd_model.get_first_stage_encoding(self.sd_model.encode_first_stage(image))
+ if self.resize_mode == 3:
+ self.init_latent = torch.nn.functional.interpolate(self.init_latent, size=(self.height // opt_f, self.width // opt_f), mode="bilinear")
+
if image_mask is not None:
init_mask = latent_mask
latmask = init_mask.convert('RGB').resize((self.init_latent.shape[3], self.init_latent.shape[2]))
diff --git a/modules/ui.py b/modules/ui.py
index b2b8de90..fe4abe05 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -829,7 +829,7 @@ def create_ui():
img2img_batch_output_dir = gr.Textbox(label="Output directory", **shared.hide_dirs)
with gr.Row():
- resize_mode = gr.Radio(label="Resize mode", elem_id="resize_mode", show_label=False, choices=["Just resize", "Crop and resize", "Resize and fill"], type="index", value="Just resize")
+ resize_mode = gr.Radio(label="Resize mode", elem_id="resize_mode", show_label=False, choices=["Just resize", "Crop and resize", "Resize and fill", "Upscale Latent Space"], type="index", value="Just resize")
steps = gr.Slider(minimum=1, maximum=150, step=1, label="Sampling Steps", value=20)
sampler_index = gr.Radio(label='Sampling method', choices=[x.name for x in samplers_for_img2img], value=samplers_for_img2img[0].name, type="index")
--
cgit v1.2.1
From 1ed4f0e22807f3afef925210182cbbee51f0cb2c Mon Sep 17 00:00:00 2001
From: Jay Smith
Date: Thu, 8 Dec 2022 18:14:35 -0600
Subject: Depth2img model support
---
modules/processing.py | 38 ++++++++++++++++++++++++++++++++++----
modules/sd_models.py | 46 ++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 80 insertions(+), 4 deletions(-)
(limited to 'modules')
diff --git a/modules/processing.py b/modules/processing.py
index 3d2c4dc9..0417ffc5 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -21,7 +21,10 @@ import modules.face_restoration
import modules.images as images
import modules.styles
import logging
+from ldm.data.util import AddMiDaS
+from ldm.models.diffusion.ddpm import LatentDepth2ImageDiffusion
+from einops import repeat, rearrange
# some of those options should not be changed at all because they would break the model, so I removed them from options.
opt_C = 4
@@ -150,11 +153,26 @@ class StableDiffusionProcessing():
return image_conditioning
- def img2img_image_conditioning(self, source_image, latent_image, image_mask = None):
- if self.sampler.conditioning_key not in {'hybrid', 'concat'}:
- # Dummy zero conditioning if we're not using inpainting model.
- return latent_image.new_zeros(latent_image.shape[0], 5, 1, 1)
+ def depth2img_image_conditioning(self, source_image):
+ # Use the AddMiDaS helper to Format our source image to suit the MiDaS model
+ transformer = AddMiDaS(model_type="dpt_hybrid")
+ transformed = transformer({"jpg": rearrange(source_image[0], "c h w -> h w c")})
+ midas_in = torch.from_numpy(transformed["midas_in"][None, ...]).to(device=shared.device)
+ midas_in = repeat(midas_in, "1 ... -> n ...", n=self.batch_size)
+
+ conditioning_image = self.sd_model.get_first_stage_encoding(self.sd_model.encode_first_stage(source_image))
+ conditioning = torch.nn.functional.interpolate(
+ self.sd_model.depth_model(midas_in),
+ size=conditioning_image.shape[2:],
+ mode="bicubic",
+ align_corners=False,
+ )
+
+ (depth_min, depth_max) = torch.aminmax(conditioning)
+ conditioning = 2. * (conditioning - depth_min) / (depth_max - depth_min) - 1.
+ return conditioning
+ def inpainting_image_conditioning(self, source_image, latent_image, image_mask = None):
self.is_using_inpainting_conditioning = True
# Handle the different mask inputs
@@ -191,6 +209,18 @@ class StableDiffusionProcessing():
return image_conditioning
+ def img2img_image_conditioning(self, source_image, latent_image, image_mask=None):
+ # HACK: Using introspection as the Depth2Image model doesn't appear to uniquely
+ # identify itself with a field common to all models. The conditioning_key is also hybrid.
+ if isinstance(self.sd_model, LatentDepth2ImageDiffusion):
+ return self.depth2img_image_conditioning(source_image)
+
+ if self.sampler.conditioning_key in {'hybrid', 'concat'}:
+ return self.inpainting_image_conditioning(source_image, latent_image, image_mask=image_mask)
+
+ # Dummy zero conditioning if we're not using inpainting or depth model.
+ return latent_image.new_zeros(latent_image.shape[0], 5, 1, 1)
+
def init(self, all_prompts, all_seeds, all_subseeds):
pass
diff --git a/modules/sd_models.py b/modules/sd_models.py
index 283cf1cd..139952ba 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -7,6 +7,9 @@ import torch
import re
import safetensors.torch
from omegaconf import OmegaConf
+from os import mkdir
+from urllib import request
+import ldm.modules.midas as midas
from ldm.util import instantiate_from_config
@@ -36,6 +39,7 @@ def setup_model():
os.makedirs(model_path)
list_models()
+ enable_midas_autodownload()
def checkpoint_tiles():
@@ -227,6 +231,48 @@ def load_model_weights(model, checkpoint_info, vae_file="auto"):
sd_vae.load_vae(model, vae_file)
+def enable_midas_autodownload():
+ """
+ Gives the ldm.modules.midas.api.load_model function automatic downloading.
+
+ When the 512-depth-ema model, and other future models like it, is loaded,
+ it calls midas.api.load_model to load the associated midas depth model.
+ This function applies a wrapper to download the model to the correct
+ location automatically.
+ """
+
+ midas_path = os.path.join(models_path, 'midas')
+
+ # stable-diffusion-stability-ai hard-codes the midas model path to
+ # a location that differs from where other scripts using this model look.
+ # HACK: Overriding the path here.
+ for k, v in midas.api.ISL_PATHS.items():
+ file_name = os.path.basename(v)
+ midas.api.ISL_PATHS[k] = os.path.join(midas_path, file_name)
+
+ midas_urls = {
+ "dpt_large": "https://github.com/intel-isl/DPT/releases/download/1_0/dpt_large-midas-2f21e586.pt",
+ "dpt_hybrid": "https://github.com/intel-isl/DPT/releases/download/1_0/dpt_hybrid-midas-501f0c75.pt",
+ "midas_v21": "https://github.com/AlexeyAB/MiDaS/releases/download/midas_dpt/midas_v21-f6b98070.pt",
+ "midas_v21_small": "https://github.com/AlexeyAB/MiDaS/releases/download/midas_dpt/midas_v21_small-70d6b9c8.pt",
+ }
+
+ midas.api.load_model_inner = midas.api.load_model
+
+ def load_model_wrapper(model_type):
+ path = midas.api.ISL_PATHS[model_type]
+ if not os.path.exists(path):
+ if not os.path.exists(midas_path):
+ mkdir(midas_path)
+
+ print(f"Downloading midas model weights for {model_type} to {path}")
+ request.urlretrieve(midas_urls[model_type], path)
+ print(f"{model_type} downloaded")
+
+ return midas.api.load_model_inner(model_type)
+
+ midas.api.load_model = load_model_wrapper
+
def load_model(checkpoint_info=None):
from modules import lowvram, sd_hijack
checkpoint_info = checkpoint_info or select_checkpoint()
--
cgit v1.2.1
From ce04ba71b880aeb97b7d47d404ba4ea430891618 Mon Sep 17 00:00:00 2001
From: Ju1-js <40339350+Ju1-js@users.noreply.github.com>
Date: Thu, 8 Dec 2022 22:47:45 -0800
Subject: Make # settings changed message grammatically correct
Make the ": " in the settings changed message not show if 0 settings were changed.
"0 settings changed: ." -> "0 settings changed."
---
modules/ui.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
(limited to 'modules')
diff --git a/modules/ui.py b/modules/ui.py
index b2b8de90..79bb3d1f 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -1450,7 +1450,7 @@ def create_ui():
opts.save(shared.config_filename)
except RuntimeError:
return opts.dumpjson(), f'{len(changed)} settings changed without save: {", ".join(changed)}.'
- return opts.dumpjson(), f'{len(changed)} settings changed: {", ".join(changed)}.'
+ return opts.dumpjson(), f'{len(changed)} settings changed{": " if len(changed) > 0 else ""}{", ".join(changed)}.'
def run_settings_single(value, key):
if not opts.same_type(value, opts.data_labels[key].default):
--
cgit v1.2.1
From 9539c2045a2de9e900b96acbf67e41fafe93c6f6 Mon Sep 17 00:00:00 2001
From: ywx9
Date: Fri, 9 Dec 2022 23:03:06 +0900
Subject: Bug fix
---
modules/api/api.py | 7 +------
1 file changed, 1 insertion(+), 6 deletions(-)
(limited to 'modules')
diff --git a/modules/api/api.py b/modules/api/api.py
index 54ee7cb0..89935a70 100644
--- a/modules/api/api.py
+++ b/modules/api/api.py
@@ -157,12 +157,7 @@ class Api:
args.pop('include_init_images', None) # this is meant to be done by "exclude": True in model, but it's for a reason that I cannot determine.
p = StableDiffusionProcessingImg2Img(**args)
- imgs = []
- for img in init_images:
- img = decode_base64_to_image(img)
- imgs = [img] * p.batch_size
-
- p.init_images = imgs
+ p.init_images = [decode_base64_to_image(x) for x in init_images]
shared.state.begin()
--
cgit v1.2.1
From 7dbfd8a7d8aefec7283b456c6f5b000ae4d3496d Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Sat, 10 Dec 2022 09:14:30 +0300
Subject: do not replace entire unet for the resolution hack
---
modules/sd_hijack.py | 5 +++--
modules/sd_hijack_optimizations.py | 28 ----------------------------
modules/sd_hijack_unet.py | 30 ++++++++++++++++++++++++++++++
3 files changed, 33 insertions(+), 30 deletions(-)
create mode 100644 modules/sd_hijack_unet.py
(limited to 'modules')
diff --git a/modules/sd_hijack.py b/modules/sd_hijack.py
index 92874a79..47dbc1b7 100644
--- a/modules/sd_hijack.py
+++ b/modules/sd_hijack.py
@@ -11,7 +11,7 @@ import modules.textual_inversion.textual_inversion
from modules import prompt_parser, devices, sd_hijack_optimizations, shared, sd_hijack_checkpoint
from modules.hypernetworks import hypernetwork
from modules.shared import opts, device, cmd_opts
-from modules import sd_hijack_clip, sd_hijack_open_clip
+from modules import sd_hijack_clip, sd_hijack_open_clip, sd_hijack_unet
from modules.sd_hijack_optimizations import invokeAI_mps_available
@@ -35,11 +35,12 @@ ldm.modules.attention.BasicTransformerBlock.ATTENTION_MODES["softmax-xformers"]
ldm.modules.attention.print = lambda *args: None
ldm.modules.diffusionmodules.model.print = lambda *args: None
+
def apply_optimizations():
undo_optimizations()
ldm.modules.diffusionmodules.model.nonlinearity = silu
- ldm.modules.diffusionmodules.openaimodel.UNetModel.forward = sd_hijack_optimizations.patched_unet_forward
+ ldm.modules.diffusionmodules.openaimodel.th = sd_hijack_unet.th
if cmd_opts.force_enable_xformers or (cmd_opts.xformers and shared.xformers_available and torch.version.cuda and (6, 0) <= torch.cuda.get_device_capability(shared.device) <= (9, 0)):
print("Applying xformers cross attention optimization.")
diff --git a/modules/sd_hijack_optimizations.py b/modules/sd_hijack_optimizations.py
index 8cd4c954..85909eb9 100644
--- a/modules/sd_hijack_optimizations.py
+++ b/modules/sd_hijack_optimizations.py
@@ -313,31 +313,3 @@ def xformers_attnblock_forward(self, x):
return x + out
except NotImplementedError:
return cross_attention_attnblock_forward(self, x)
-
-def patched_unet_forward(self, x, timesteps=None, context=None, y=None,**kwargs):
- assert (y is not None) == (
- self.num_classes is not None
- ), "must specify y if and only if the model is class-conditional"
- hs = []
- t_emb = timestep_embedding(timesteps, self.model_channels, repeat_only=False)
- emb = self.time_embed(t_emb)
-
- if self.num_classes is not None:
- assert y.shape == (x.shape[0],)
- emb = emb + self.label_emb(y)
-
- h = x.type(self.dtype)
- for module in self.input_blocks:
- h = module(h, emb, context)
- hs.append(h)
- h = self.middle_block(h, emb, context)
- for module in self.output_blocks:
- if h.shape[-2:] != hs[-1].shape[-2:]:
- h = F.interpolate(h, hs[-1].shape[-2:], mode="nearest")
- h = torch.cat([h, hs.pop()], dim=1)
- h = module(h, emb, context)
- h = h.type(x.dtype)
- if self.predict_codebook_ids:
- return self.id_predictor(h)
- else:
- return self.out(h)
diff --git a/modules/sd_hijack_unet.py b/modules/sd_hijack_unet.py
new file mode 100644
index 00000000..1b9d7757
--- /dev/null
+++ b/modules/sd_hijack_unet.py
@@ -0,0 +1,30 @@
+import torch
+
+
+class TorchHijackForUnet:
+ """
+ This is torch, but with cat that resizes tensors to appropriate dimensions if they do not match;
+ this makes it possible to create pictures with dimensions that are muliples of 8 rather than 64
+ """
+
+ def __getattr__(self, item):
+ if item == 'cat':
+ return self.cat
+
+ if hasattr(torch, item):
+ return getattr(torch, item)
+
+ raise AttributeError("'{}' object has no attribute '{}'".format(type(self).__name__, item))
+
+ def cat(self, tensors, *args, **kwargs):
+ if len(tensors) == 2:
+ a, b = tensors
+ if a.shape[-2:] != b.shape[-2:]:
+ a = torch.nn.functional.interpolate(a, b.shape[-2:], mode="nearest")
+
+ tensors = (a, b)
+
+ return torch.cat(tensors, *args, **kwargs)
+
+
+th = TorchHijackForUnet()
--
cgit v1.2.1
From 505ec7e4d960e7bea579182509050fafb10bd00c Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Sat, 10 Dec 2022 09:17:39 +0300
Subject: cleanup some unneeded imports for hijack files
---
modules/sd_hijack.py | 10 ++--------
modules/sd_hijack_optimizations.py | 3 ---
2 files changed, 2 insertions(+), 11 deletions(-)
(limited to 'modules')
diff --git a/modules/sd_hijack.py b/modules/sd_hijack.py
index 47dbc1b7..690a9ec2 100644
--- a/modules/sd_hijack.py
+++ b/modules/sd_hijack.py
@@ -1,16 +1,10 @@
-import math
-import os
-import sys
-import traceback
import torch
-import numpy as np
-from torch import einsum
from torch.nn.functional import silu
import modules.textual_inversion.textual_inversion
-from modules import prompt_parser, devices, sd_hijack_optimizations, shared, sd_hijack_checkpoint
+from modules import devices, sd_hijack_optimizations, shared, sd_hijack_checkpoint
from modules.hypernetworks import hypernetwork
-from modules.shared import opts, device, cmd_opts
+from modules.shared import cmd_opts
from modules import sd_hijack_clip, sd_hijack_open_clip, sd_hijack_unet
from modules.sd_hijack_optimizations import invokeAI_mps_available
diff --git a/modules/sd_hijack_optimizations.py b/modules/sd_hijack_optimizations.py
index 85909eb9..98123fbf 100644
--- a/modules/sd_hijack_optimizations.py
+++ b/modules/sd_hijack_optimizations.py
@@ -5,7 +5,6 @@ import importlib
import torch
from torch import einsum
-import torch.nn.functional as F
from ldm.util import default
from einops import rearrange
@@ -13,8 +12,6 @@ from einops import rearrange
from modules import shared
from modules.hypernetworks import hypernetwork
-from ldm.modules.diffusionmodules.util import timestep_embedding
-
if shared.cmd_opts.xformers or shared.cmd_opts.force_enable_xformers:
try:
--
cgit v1.2.1
From bab91b12798f67c19a2b14dab13a08d5d3e3de26 Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Sat, 10 Dec 2022 09:51:26 +0300
Subject: add Noise multiplier option to infotext
---
modules/generation_parameters_copypaste.py | 1 +
modules/processing.py | 8 ++++++--
modules/shared.py | 2 +-
3 files changed, 8 insertions(+), 3 deletions(-)
(limited to 'modules')
diff --git a/modules/generation_parameters_copypaste.py b/modules/generation_parameters_copypaste.py
index 44fe1a6c..53f34b0a 100644
--- a/modules/generation_parameters_copypaste.py
+++ b/modules/generation_parameters_copypaste.py
@@ -77,6 +77,7 @@ def integrate_settings_paste_fields(component_dict):
'inpainting_mask_weight': 'Conditional mask weight',
'sd_model_checkpoint': 'Model hash',
'eta_noise_seed_delta': 'ENSD',
+ 'initial_noise_multiplier': 'Noise multiplier',
}
settings_paste_fields = [
(component_dict[k], lambda d, k=k, v=v: ui.apply_setting(k, d.get(v, None)))
diff --git a/modules/processing.py b/modules/processing.py
index dd22a2fa..81400d14 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -764,7 +764,7 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
sampler = None
- def __init__(self, init_images: list=None, resize_mode: int=0, denoising_strength: float=0.75, mask: Any=None, mask_blur: int=4, inpainting_fill: int=0, inpaint_full_res: bool=True, inpaint_full_res_padding: int=0, inpainting_mask_invert: int=0, **kwargs):
+ def __init__(self, init_images: list = None, resize_mode: int = 0, denoising_strength: float = 0.75, mask: Any = None, mask_blur: int = 4, inpainting_fill: int = 0, inpaint_full_res: bool = True, inpaint_full_res_padding: int = 0, inpainting_mask_invert: int = 0, initial_noise_multiplier: float = None, **kwargs):
super().__init__(**kwargs)
self.init_images = init_images
@@ -779,6 +779,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
self.inpaint_full_res = inpaint_full_res
self.inpaint_full_res_padding = inpaint_full_res_padding
self.inpainting_mask_invert = inpainting_mask_invert
+ self.initial_noise_multiplier = opts.initial_noise_multiplier if initial_noise_multiplier is None else initial_noise_multiplier
self.mask = None
self.nmask = None
self.image_conditioning = None
@@ -891,7 +892,10 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
def sample(self, conditioning, unconditional_conditioning, seeds, subseeds, subseed_strength, prompts):
x = create_random_tensors([opt_C, self.height // opt_f, self.width // opt_f], seeds=seeds, subseeds=subseeds, subseed_strength=self.subseed_strength, seed_resize_from_h=self.seed_resize_from_h, seed_resize_from_w=self.seed_resize_from_w, p=self)
- x = x*shared.opts.initial_noise_multiplier
+
+ if self.initial_noise_multiplier != 1.0:
+ self.extra_generation_params["Noise multiplier"] = self.initial_noise_multiplier
+ x *= self.initial_noise_multiplier
samples = self.sampler.sample_img2img(self, self.init_latent, x, conditioning, unconditional_conditioning, image_conditioning=self.image_conditioning)
diff --git a/modules/shared.py b/modules/shared.py
index 67f8f77b..200693fe 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -359,7 +359,7 @@ options_templates.update(options_section(('sd', "Stable Diffusion"), {
"sd_hypernetwork": OptionInfo("None", "Hypernetwork", gr.Dropdown, lambda: {"choices": ["None"] + [x for x in hypernetworks.keys()]}, refresh=reload_hypernetworks),
"sd_hypernetwork_strength": OptionInfo(1.0, "Hypernetwork strength", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.001}),
"inpainting_mask_weight": OptionInfo(1.0, "Inpainting conditioning mask strength", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.01}),
- "initial_noise_multiplier": OptionInfo(1.0, "Multiply initial noise by this factor, may result in less or more detailed img2img", gr.Slider, {"minimum": 0.5, "maximum": 1.5, "step": 0.01 }),
+ "initial_noise_multiplier": OptionInfo(1.0, "Noise multiplier for img2img", gr.Slider, {"minimum": 0.5, "maximum": 1.5, "step": 0.01 }),
"img2img_color_correction": OptionInfo(False, "Apply color correction to img2img results to match original colors."),
"img2img_fix_steps": OptionInfo(False, "With img2img, do exactly the amount of steps the slider specifies (normally you'd do less with less denoising)."),
"enable_quantization": OptionInfo(False, "Enable quantization in K samplers for sharper and cleaner results. This may change existing seeds. Requires restart to apply."),
--
cgit v1.2.1
From d06592267c745b4732026c4e0c499c9a4b3900a1 Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Sat, 10 Dec 2022 13:46:18 +0300
Subject: use less javascript for this non-js-only implementation of the clear
prompt button.
---
modules/shared.py | 1 -
modules/ui.py | 23 +++++++++++------------
2 files changed, 11 insertions(+), 13 deletions(-)
(limited to 'modules')
diff --git a/modules/shared.py b/modules/shared.py
index 4223c017..44922c91 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -401,7 +401,6 @@ options_templates.update(options_section(('ui', "User interface"), {
"js_modal_lightbox": OptionInfo(True, "Enable full page image viewer"),
"js_modal_lightbox_initially_zoomed": OptionInfo(True, "Show images zoomed in by default in full page image viewer"),
"show_progress_in_title": OptionInfo(True, "Show generation progress in window title."),
- "clear_prompt_visible": OptionInfo(True, "Show clear prompt button"),
'quicksettings': OptionInfo("sd_model_checkpoint", "Quicksettings list"),
'localization': OptionInfo("None", "Localization (requires restart)", gr.Dropdown, lambda: {"choices": ["None"] + list(localization.localizations.keys())}, refresh=lambda: localization.list_localizations(cmd_opts.localizations_dir)),
}))
diff --git a/modules/ui.py b/modules/ui.py
index c0a7ca8b..28481e33 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -403,16 +403,17 @@ def create_toprow(is_img2img):
paste = gr.Button(value=paste_symbol, elem_id="paste")
save_style = gr.Button(value=save_style_symbol, elem_id="style_create")
prompt_style_apply = gr.Button(value=apply_style_symbol, elem_id="style_apply")
-
- clear_prompt_button = gr.Button(
- value=clear_prompt_symbol,
- elem_id="clear_prompt",
- visible=opts.clear_prompt_visible
- )
-
+ clear_prompt_button = gr.Button(value=clear_prompt_symbol, elem_id=f"{id_part}_clear_prompt")
token_counter = gr.HTML(value=" ", elem_id=f"{id_part}_token_counter")
token_button = gr.Button(visible=False, elem_id=f"{id_part}_token_button")
+ clear_prompt_button.click(
+ fn=lambda *x: x,
+ _js="confirm_clear_prompt",
+ inputs=[prompt, negative_prompt],
+ outputs=[prompt, negative_prompt],
+ )
+
button_interrogate = None
button_deepbooru = None
if is_img2img:
@@ -447,7 +448,7 @@ def create_toprow(is_img2img):
prompt_style2 = gr.Dropdown(label="Style 2", elem_id=f"{id_part}_style2_index", choices=[k for k, v in shared.prompt_styles.styles.items()], value=next(iter(shared.prompt_styles.styles.keys())))
prompt_style2.save_to_config = True
- return prompt, roll, prompt_style, negative_prompt, prompt_style2, submit, button_interrogate, button_deepbooru, prompt_style_apply, save_style, paste, token_counter, token_button, clear_prompt_button
+ return prompt, roll, prompt_style, negative_prompt, prompt_style2, submit, button_interrogate, button_deepbooru, prompt_style_apply, save_style, paste, token_counter, token_button
def setup_progressbar(progressbar, preview, id_part, textinfo=None):
@@ -634,7 +635,7 @@ def create_ui():
modules.scripts.scripts_txt2img.initialize_scripts(is_img2img=False)
with gr.Blocks(analytics_enabled=False) as txt2img_interface:
- txt2img_prompt, roll, txt2img_prompt_style, txt2img_negative_prompt, txt2img_prompt_style2, submit, _, _,txt2img_prompt_style_apply, txt2img_save_style, txt2img_paste, token_counter, token_button, clear_prompt_button = create_toprow(is_img2img=False)
+ txt2img_prompt, roll, txt2img_prompt_style, txt2img_negative_prompt, txt2img_prompt_style2, submit, _, _,txt2img_prompt_style_apply, txt2img_save_style, txt2img_paste, token_counter, token_button = create_toprow(is_img2img=False)
dummy_component = gr.Label(visible=False)
txt_prompt_img = gr.File(label="", elem_id="txt2img_prompt_image", file_count="single", type="bytes", visible=False)
@@ -686,7 +687,6 @@ def create_ui():
connect_reuse_seed(seed, reuse_seed, generation_info, dummy_component, is_subseed=False)
connect_reuse_seed(subseed, reuse_subseed, generation_info, dummy_component, is_subseed=True)
- connect_clear_prompt(clear_prompt_button)
txt2img_args = dict(
fn=wrap_gradio_gpu_call(modules.txt2img.txt2img),
@@ -793,7 +793,7 @@ def create_ui():
modules.scripts.scripts_img2img.initialize_scripts(is_img2img=True)
with gr.Blocks(analytics_enabled=False) as img2img_interface:
- img2img_prompt, roll, img2img_prompt_style, img2img_negative_prompt, img2img_prompt_style2, submit, img2img_interrogate, img2img_deepbooru, img2img_prompt_style_apply, img2img_save_style, img2img_paste,token_counter, token_button, clear_prompt_button = create_toprow(is_img2img=True)
+ img2img_prompt, roll, img2img_prompt_style, img2img_negative_prompt, img2img_prompt_style2, submit, img2img_interrogate, img2img_deepbooru, img2img_prompt_style_apply, img2img_save_style, img2img_paste,token_counter, token_button = create_toprow(is_img2img=True)
with gr.Row(elem_id='img2img_progress_row'):
@@ -884,7 +884,6 @@ def create_ui():
connect_reuse_seed(seed, reuse_seed, generation_info, dummy_component, is_subseed=False)
connect_reuse_seed(subseed, reuse_subseed, generation_info, dummy_component, is_subseed=True)
- connect_clear_prompt(clear_prompt_button)
img2img_prompt_img.change(
fn=modules.images.image_data,
--
cgit v1.2.1
From 991e2dcee9d6baa66b5c0b1969c4c07407be933a Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Sat, 10 Dec 2022 14:54:02 +0300
Subject: remove NSFW filter and its dependency; if you still want it, find it
in the extensions section
---
modules/processing.py | 7 +++----
modules/safety.py | 42 ------------------------------------------
modules/scripts.py | 20 ++++++++++++++++++++
modules/shared.py | 1 -
4 files changed, 23 insertions(+), 47 deletions(-)
delete mode 100644 modules/safety.py
(limited to 'modules')
diff --git a/modules/processing.py b/modules/processing.py
index 81400d14..056c9322 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -13,7 +13,7 @@ from skimage import exposure
from typing import Any, Dict, List, Optional
import modules.sd_hijack
-from modules import devices, prompt_parser, masking, sd_samplers, lowvram, generation_parameters_copypaste
+from modules import devices, prompt_parser, masking, sd_samplers, lowvram, generation_parameters_copypaste, script_callbacks
from modules.sd_hijack import model_hijack
from modules.shared import opts, cmd_opts, state
import modules.shared as shared
@@ -571,9 +571,8 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
devices.torch_gc()
- if opts.filter_nsfw:
- import modules.safety as safety
- x_samples_ddim = modules.safety.censor_batch(x_samples_ddim)
+ if p.scripts is not None:
+ p.scripts.postprocess_batch(p, x_samples_ddim, batch_number=n)
for i, x_sample in enumerate(x_samples_ddim):
x_sample = 255. * np.moveaxis(x_sample.cpu().numpy(), 0, 2)
diff --git a/modules/safety.py b/modules/safety.py
deleted file mode 100644
index cff4b278..00000000
--- a/modules/safety.py
+++ /dev/null
@@ -1,42 +0,0 @@
-import torch
-from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
-from transformers import AutoFeatureExtractor
-from PIL import Image
-
-import modules.shared as shared
-
-safety_model_id = "CompVis/stable-diffusion-safety-checker"
-safety_feature_extractor = None
-safety_checker = None
-
-def numpy_to_pil(images):
- """
- Convert a numpy image or a batch of images to a PIL image.
- """
- if images.ndim == 3:
- images = images[None, ...]
- images = (images * 255).round().astype("uint8")
- pil_images = [Image.fromarray(image) for image in images]
-
- return pil_images
-
-# check and replace nsfw content
-def check_safety(x_image):
- global safety_feature_extractor, safety_checker
-
- if safety_feature_extractor is None:
- safety_feature_extractor = AutoFeatureExtractor.from_pretrained(safety_model_id)
- safety_checker = StableDiffusionSafetyChecker.from_pretrained(safety_model_id)
-
- safety_checker_input = safety_feature_extractor(numpy_to_pil(x_image), return_tensors="pt")
- x_checked_image, has_nsfw_concept = safety_checker(images=x_image, clip_input=safety_checker_input.pixel_values)
-
- return x_checked_image, has_nsfw_concept
-
-
-def censor_batch(x):
- x_samples_ddim_numpy = x.cpu().permute(0, 2, 3, 1).numpy()
- x_checked_image, has_nsfw_concept = check_safety(x_samples_ddim_numpy)
- x = torch.from_numpy(x_checked_image).permute(0, 3, 1, 2)
-
- return x
diff --git a/modules/scripts.py b/modules/scripts.py
index b934d881..23ca195d 100644
--- a/modules/scripts.py
+++ b/modules/scripts.py
@@ -88,6 +88,17 @@ class Script:
pass
+ def postprocess_batch(self, p, *args, **kwargs):
+ """
+ Same as process_batch(), but called for every batch after it has been generated.
+
+ **kwargs will have same items as process_batch, and also:
+ - batch_number - index of current batch, from 0 to number of batches-1
+ - images - torch tensor with all generated images, with values ranging from 0 to 1;
+ """
+
+ pass
+
def postprocess(self, p, processed, *args):
"""
This function is called after processing ends for AlwaysVisible scripts.
@@ -347,6 +358,15 @@ class ScriptRunner:
print(f"Error running postprocess: {script.filename}", file=sys.stderr)
print(traceback.format_exc(), file=sys.stderr)
+ def postprocess_batch(self, p, images, **kwargs):
+ for script in self.alwayson_scripts:
+ try:
+ script_args = p.script_args[script.args_from:script.args_to]
+ script.postprocess_batch(p, *script_args, images=images, **kwargs)
+ except Exception:
+ print(f"Error running postprocess_batch: {script.filename}", file=sys.stderr)
+ print(traceback.format_exc(), file=sys.stderr)
+
def before_component(self, component, **kwargs):
for script in self.scripts:
try:
diff --git a/modules/shared.py b/modules/shared.py
index 44922c91..272267c1 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -367,7 +367,6 @@ options_templates.update(options_section(('sd', "Stable Diffusion"), {
"use_old_emphasis_implementation": OptionInfo(False, "Use old emphasis implementation. Can be useful to reproduce old seeds."),
"enable_batch_seeds": OptionInfo(True, "Make K-diffusion samplers produce same images in a batch as when making a single image"),
"comma_padding_backtrack": OptionInfo(20, "Increase coherency by padding from the last comma within n tokens when using more than 75 tokens", gr.Slider, {"minimum": 0, "maximum": 74, "step": 1 }),
- "filter_nsfw": OptionInfo(False, "Filter NSFW content"),
'CLIP_stop_at_last_layers': OptionInfo(1, "Clip skip", gr.Slider, {"minimum": 1, "maximum": 12, "step": 1}),
"random_artist_categories": OptionInfo([], "Allowed categories for random artists selection when using the Roll button", gr.CheckboxGroup, {"choices": artist_db.categories()}),
}))
--
cgit v1.2.1
From 713c48ddd7f296fe064cf58af7baa31aa5fcffb3 Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Sat, 10 Dec 2022 15:05:22 +0300
Subject: add an 'installed' tag to extensions
---
modules/ui_extensions.py | 13 +++++++++----
1 file changed, 9 insertions(+), 4 deletions(-)
(limited to 'modules')
diff --git a/modules/ui_extensions.py b/modules/ui_extensions.py
index b487ac25..1434f25f 100644
--- a/modules/ui_extensions.py
+++ b/modules/ui_extensions.py
@@ -206,12 +206,13 @@ def refresh_available_extensions_from_data(hide_tags):
if url is None:
continue
+ existing = installed_extension_urls.get(normalize_git_url(url), None)
+ extension_tags = extension_tags + ["installed"] if existing else extension_tags
+
if len([x for x in extension_tags if x in tags_to_hide]) > 0:
hidden += 1
continue
- existing = installed_extension_urls.get(normalize_git_url(url), None)
-
install_code = f""" """
tags_text = ", ".join([f"{x} " for x in extension_tags])
@@ -222,7 +223,11 @@ def refresh_available_extensions_from_data(hide_tags):
{html.escape(description)}
{install_code}
- """
+
+ """
+
+ for tag in [x for x in extension_tags if x not in tags]:
+ tags[tag] = tag
code += """
@@ -272,7 +277,7 @@ def create_ui():
install_extension_button = gr.Button(elem_id="install_extension_button", visible=False)
with gr.Row():
- hide_tags = gr.CheckboxGroup(value=["ads", "localization"], label="Hide extensions with tags", choices=["script", "ads", "localization"])
+ hide_tags = gr.CheckboxGroup(value=["ads", "localization", "installed"], label="Hide extensions with tags", choices=["script", "ads", "localization", "installed"])
install_result = gr.HTML()
available_extensions_table = gr.HTML()
--
cgit v1.2.1
From a1c8ad88283f7b3e861e4722c71e39bf71eec744 Mon Sep 17 00:00:00 2001
From: MrCheeze
Date: Sat, 10 Dec 2022 11:02:47 -0500
Subject: unload depth model if medvram/lowvram enabled
---
modules/lowvram.py | 12 +++++++-----
1 file changed, 7 insertions(+), 5 deletions(-)
(limited to 'modules')
diff --git a/modules/lowvram.py b/modules/lowvram.py
index aa464a95..042a0254 100644
--- a/modules/lowvram.py
+++ b/modules/lowvram.py
@@ -55,18 +55,20 @@ def setup_for_low_vram(sd_model, use_medvram):
if hasattr(sd_model.cond_stage_model, 'model'):
sd_model.cond_stage_model.transformer = sd_model.cond_stage_model.model
- # remove three big modules, cond, first_stage, and unet from the model and then
+ # remove four big modules, cond, first_stage, depth (if applicable), and unet from the model and then
# send the model to GPU. Then put modules back. the modules will be in CPU.
- stored = sd_model.cond_stage_model.transformer, sd_model.first_stage_model, sd_model.model
- sd_model.cond_stage_model.transformer, sd_model.first_stage_model, sd_model.model = None, None, None
+ stored = sd_model.cond_stage_model.transformer, sd_model.first_stage_model, getattr(sd_model, 'depth_model', None), sd_model.model
+ sd_model.cond_stage_model.transformer, sd_model.first_stage_model, sd_model.depth_model, sd_model.model = None, None, None, None
sd_model.to(devices.device)
- sd_model.cond_stage_model.transformer, sd_model.first_stage_model, sd_model.model = stored
+ sd_model.cond_stage_model.transformer, sd_model.first_stage_model, sd_model.depth_model, sd_model.model = stored
- # register hooks for those the first two models
+ # register hooks for those the first three models
sd_model.cond_stage_model.transformer.register_forward_pre_hook(send_me_to_gpu)
sd_model.first_stage_model.register_forward_pre_hook(send_me_to_gpu)
sd_model.first_stage_model.encode = first_stage_model_encode_wrap
sd_model.first_stage_model.decode = first_stage_model_decode_wrap
+ if sd_model.depth_model:
+ sd_model.depth_model.register_forward_pre_hook(send_me_to_gpu)
parents[sd_model.cond_stage_model.transformer] = sd_model.cond_stage_model
if hasattr(sd_model.cond_stage_model, 'model'):
--
cgit v1.2.1
From bd81a09eacf02dad095b98094ab936f276d0343f Mon Sep 17 00:00:00 2001
From: MrCheeze
Date: Sat, 10 Dec 2022 11:29:26 -0500
Subject: fix support for 2.0 inpainting model while maintaining support for
1.5 inpainting model
---
modules/sd_hijack_inpainting.py | 3 +--
modules/sd_models.py | 1 +
2 files changed, 2 insertions(+), 2 deletions(-)
(limited to 'modules')
diff --git a/modules/sd_hijack_inpainting.py b/modules/sd_hijack_inpainting.py
index 938f9a58..5018b047 100644
--- a/modules/sd_hijack_inpainting.py
+++ b/modules/sd_hijack_inpainting.py
@@ -324,12 +324,11 @@ def should_hijack_inpainting(checkpoint_info):
def do_inpainting_hijack():
# most of this stuff seems to no longer be needed because it is already included into SD2.0
- # LatentInpaintDiffusion remains because SD2.0's LatentInpaintDiffusion can't be loaded without specifying a checkpoint
# p_sample_plms is needed because PLMS can't work with dicts as conditionings
# this file should be cleaned up later if weverything tuens out to work fine
# ldm.models.diffusion.ddpm.get_unconditional_conditioning = get_unconditional_conditioning
- ldm.models.diffusion.ddpm.LatentInpaintDiffusion = LatentInpaintDiffusion
+ # ldm.models.diffusion.ddpm.LatentInpaintDiffusion = LatentInpaintDiffusion
# ldm.models.diffusion.ddim.DDIMSampler.p_sample_ddim = p_sample_ddim
# ldm.models.diffusion.ddim.DDIMSampler.sample = sample_ddim
diff --git a/modules/sd_models.py b/modules/sd_models.py
index 5b37f3fe..b64f573f 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -296,6 +296,7 @@ def load_model(checkpoint_info=None):
sd_config.model.params.use_ema = False
sd_config.model.params.conditioning_key = "hybrid"
sd_config.model.params.unet_config.params.in_channels = 9
+ sd_config.model.params.finetune_keys = None
# Create a "fake" config with a different name so that we know to unload it when switching models.
checkpoint_info = checkpoint_info._replace(config=checkpoint_info.config.replace(".yaml", "-inpainting.yaml"))
--
cgit v1.2.1
From 59c6511494c55a578eecdf71fb4590b6bd5d04a7 Mon Sep 17 00:00:00 2001
From: Dean van Dugteren <31391056+deanpress@users.noreply.github.com>
Date: Sun, 11 Dec 2022 17:08:51 +0100
Subject: fix: fallback model_checkpoint if it's empty
This fixes the following error when SD attempts to start with a deleted checkpoint:
```
Traceback (most recent call last):
File "D:\Web\stable-diffusion-webui\launch.py", line 295, in
start()
File "D:\Web\stable-diffusion-webui\launch.py", line 290, in start
webui.webui()
File "D:\Web\stable-diffusion-webui\webui.py", line 132, in webui
initialize()
File "D:\Web\stable-diffusion-webui\webui.py", line 62, in initialize
modules.sd_models.load_model()
File "D:\Web\stable-diffusion-webui\modules\sd_models.py", line 283, in load_model
checkpoint_info = checkpoint_info or select_checkpoint()
File "D:\Web\stable-diffusion-webui\modules\sd_models.py", line 117, in select_checkpoint
checkpoint_info = checkpoints_list.get(model_checkpoint, None)
TypeError: unhashable type: 'list'
```
---
modules/sd_models.py | 4 ++++
1 file changed, 4 insertions(+)
(limited to 'modules')
diff --git a/modules/sd_models.py b/modules/sd_models.py
index 5b37f3fe..b6d75db7 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -111,6 +111,10 @@ def model_hash(filename):
def select_checkpoint():
model_checkpoint = shared.opts.sd_model_checkpoint
+
+ if len(model_checkpoint) == 0:
+ model_checkpoint = shared.default_sd_model_file
+
checkpoint_info = checkpoints_list.get(model_checkpoint, None)
if checkpoint_info is not None:
return checkpoint_info
--
cgit v1.2.1
From ec0a48826fb41c1b1baab45a9030f7eb55568fd0 Mon Sep 17 00:00:00 2001
From: MrCheeze
Date: Sun, 11 Dec 2022 10:19:46 -0500
Subject: unconditionally set use_ema=False if value not specified (True never
worked, and all configs except v1-inpainting-inference.yaml already correctly
set it to False)
---
modules/sd_models.py | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
(limited to 'modules')
diff --git a/modules/sd_models.py b/modules/sd_models.py
index b64f573f..f36b299f 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -293,7 +293,6 @@ def load_model(checkpoint_info=None):
if should_hijack_inpainting(checkpoint_info):
# Hardcoded config for now...
sd_config.model.target = "ldm.models.diffusion.ddpm.LatentInpaintDiffusion"
- sd_config.model.params.use_ema = False
sd_config.model.params.conditioning_key = "hybrid"
sd_config.model.params.unet_config.params.in_channels = 9
sd_config.model.params.finetune_keys = None
@@ -301,6 +300,9 @@ def load_model(checkpoint_info=None):
# Create a "fake" config with a different name so that we know to unload it when switching models.
checkpoint_info = checkpoint_info._replace(config=checkpoint_info.config.replace(".yaml", "-inpainting.yaml"))
+ if not hasattr(sd_config.model.params, "use_ema"):
+ sd_config.model.params.use_ema = False
+
do_inpainting_hijack()
if shared.cmd_opts.no_half:
--
cgit v1.2.1
From 960293d6b24f380f5744c94c9a46acaae6cc8c04 Mon Sep 17 00:00:00 2001
From: Dean Hopkins
Date: Sun, 11 Dec 2022 19:16:44 +0000
Subject: API endpoint to refresh checkpoints
API endpoint to refresh checkpoints
---
modules/api/api.py | 4 ++++
1 file changed, 4 insertions(+)
(limited to 'modules')
diff --git a/modules/api/api.py b/modules/api/api.py
index 89935a70..14d0baaa 100644
--- a/modules/api/api.py
+++ b/modules/api/api.py
@@ -96,6 +96,7 @@ class Api:
self.add_api_route("/sdapi/v1/prompt-styles", self.get_promp_styles, methods=["GET"], response_model=List[PromptStyleItem])
self.add_api_route("/sdapi/v1/artist-categories", self.get_artists_categories, methods=["GET"], response_model=List[str])
self.add_api_route("/sdapi/v1/artists", self.get_artists, methods=["GET"], response_model=List[ArtistItem])
+ self.add_api_route("/sdapi/v1/refresh-checkpoints", self.refresh_checkpoints, methods=["POST"])
def add_api_route(self, path: str, endpoint, **kwargs):
if shared.cmd_opts.api_auth:
@@ -321,6 +322,9 @@ class Api:
def get_artists(self):
return [{"name":x[0], "score":x[1], "category":x[2]} for x in shared.artist_db.artists]
+
+ def refresh_checkpoints(self):
+ shared.refresh_checkpoints()
def launch(self, server_name, port):
self.app.include_router(self.router)
--
cgit v1.2.1
From 2e8b5418e3cd4e9212f2fcdb36305d7a40f97916 Mon Sep 17 00:00:00 2001
From: ThereforeGames <95403634+ThereforeGames@users.noreply.github.com>
Date: Sun, 11 Dec 2022 18:03:36 -0500
Subject: Improve color correction with luminosity blend
---
modules/processing.py | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
(limited to 'modules')
diff --git a/modules/processing.py b/modules/processing.py
index 24c537d1..bc841837 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -27,6 +27,7 @@ from ldm.data.util import AddMiDaS
from ldm.models.diffusion.ddpm import LatentDepth2ImageDiffusion
from einops import repeat, rearrange
+from blendmodes.blend import blendLayers, BlendType
# some of those options should not be changed at all because they would break the model, so I removed them from options.
opt_C = 4
@@ -39,17 +40,19 @@ def setup_color_correction(image):
return correction_target
-def apply_color_correction(correction, image):
+def apply_color_correction(correction, original_image):
logging.info("Applying color correction.")
image = Image.fromarray(cv2.cvtColor(exposure.match_histograms(
cv2.cvtColor(
- np.asarray(image),
+ np.asarray(original_image),
cv2.COLOR_RGB2LAB
),
correction,
channel_axis=2
), cv2.COLOR_LAB2RGB).astype("uint8"))
-
+
+ image = blendLayers(image, original_image, BlendType.LUMINOSITY)
+
return image
--
cgit v1.2.1
From 7077428209cd02f7da23ef843e5027e960f6aa39 Mon Sep 17 00:00:00 2001
From: space-nuko <24979496+space-nuko@users.noreply.github.com>
Date: Tue, 13 Dec 2022 13:05:40 -0800
Subject: Save hypernetwork hash in infotext
---
modules/processing.py | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
(limited to 'modules')
diff --git a/modules/processing.py b/modules/processing.py
index 24c537d1..6dd7491b 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -314,7 +314,7 @@ class Processed:
return json.dumps(obj)
- def infotext(self, p: StableDiffusionProcessing, index):
+ def infotext(self, p: StableDiffusionProcessing, index):
return create_infotext(p, self.all_prompts, self.all_seeds, self.all_subseeds, comments=[], position_in_batch=index % self.batch_size, iteration=index // self.batch_size)
@@ -429,6 +429,7 @@ def create_infotext(p, all_prompts, all_seeds, all_subseeds, comments, iteration
"Model hash": getattr(p, 'sd_model_hash', None if not opts.add_model_hash_to_info or not shared.sd_model.sd_model_hash else shared.sd_model.sd_model_hash),
"Model": (None if not opts.add_model_name_to_info or not shared.sd_model.sd_checkpoint_info.model_name else shared.sd_model.sd_checkpoint_info.model_name.replace(',', '').replace(':', '')),
"Hypernet": (None if shared.loaded_hypernetwork is None else shared.loaded_hypernetwork.name),
+ "Hypernet hash": (None if shared.loaded_hypernetwork is None else sd_models.model_hash(shared.loaded_hypernetwork.filename)),
"Hypernet strength": (None if shared.loaded_hypernetwork is None or shared.opts.sd_hypernetwork_strength >= 1 else shared.opts.sd_hypernetwork_strength),
"Batch size": (None if p.batch_size < 2 else p.batch_size),
"Batch pos": (None if p.batch_size < 2 else position_in_batch),
@@ -446,7 +447,7 @@ def create_infotext(p, all_prompts, all_seeds, all_subseeds, comments, iteration
generation_params_text = ", ".join([k if k == v else f'{k}: {generation_parameters_copypaste.quote(v)}' for k, v in generation_params.items() if v is not None])
- negative_prompt_text = "\nNegative prompt: " + p.all_negative_prompts[index] if p.all_negative_prompts[index] else ""
+ negative_prompt_text = "\nNegative prompt: " + p.all_negative_prompts[index] if p.all_negative_prompts[index] else ""
return f"{all_prompts[index]}{negative_prompt_text}\n{generation_params_text}".strip()
--
cgit v1.2.1
From 9d5948e5f7324b98fa7445accb2fe14487ff809d Mon Sep 17 00:00:00 2001
From: space-nuko <24979496+space-nuko@users.noreply.github.com>
Date: Tue, 13 Dec 2022 14:25:16 -0800
Subject: Correctly restore hypernetwork from hash
---
modules/generation_parameters_copypaste.py | 30 ++++++++++++++++++++++++++++++
1 file changed, 30 insertions(+)
(limited to 'modules')
diff --git a/modules/generation_parameters_copypaste.py b/modules/generation_parameters_copypaste.py
index 565e342d..e4e1d41c 100644
--- a/modules/generation_parameters_copypaste.py
+++ b/modules/generation_parameters_copypaste.py
@@ -14,6 +14,7 @@ re_param_code = r'\s*([\w ]+):\s*("(?:\\|\"|[^\"])+"|[^,]*)(?:,|$)'
re_param = re.compile(re_param_code)
re_params = re.compile(r"^(?:" + re_param_code + "){3,}$")
re_imagesize = re.compile(r"^(\d+)x(\d+)$")
+re_hypernet_hash = re.compile("\(([0-9a-f]+)\)$")
type_of_gr_update = type(gr.update())
paste_fields = {}
bind_list = []
@@ -139,6 +140,30 @@ def run_bind():
)
+def find_hypernetwork_key(hypernet_name, hypernet_hash=None):
+ """Determines the config parameter name to use for the hypernet based on the parameters in the infotext.
+
+ Example: an infotext provides "Hypernet: ke-ta" and "Hypernet hash: 1234abcd". For the "Hypernet" config
+ parameter this means there should be an entry that looks like "ke-ta-10000(1234abcd)" to set it to.
+
+ If the infotext has no hash, then a hypernet with the same name and the most steps will be selected instead.
+ """
+ hypernet_name = hypernet_name.lower()
+ if hypernet_hash is not None:
+ # Try to match the hash in the name
+ for hypernet_key in shared.hypernetworks.keys():
+ result = re_hypernet_hash.search(hypernet_key)
+ if result is not None and result[1] == hypernet_hash:
+ return hypernet_key
+ else:
+ # Fall back to a hypernet with the same name
+ for hypernet_key in shared.hypernetworks.keys():
+ if hypernet_key.lower().startswith(hypernet_name):
+ return hypernet_key
+
+ return None
+
+
def parse_generation_parameters(x: str):
"""parses generation parameters string, the one you see in text field under the picture in UI:
```
@@ -188,6 +213,11 @@ Steps: 20, Sampler: Euler a, CFG scale: 7, Seed: 965400086, Size: 512x512, Model
if "Clip skip" not in res:
res["Clip skip"] = "1"
+ if "Hypernet" in res:
+ hypernet_name = res["Hypernet"]
+ hypernet_hash = res.get("Hypernet hash", None)
+ res["Hypernet"] = find_hypernetwork_key(hypernet_name, hypernet_hash)
+
return res
--
cgit v1.2.1
From 1fcb9595143fc352240635959ea5b9929c02dca6 Mon Sep 17 00:00:00 2001
From: space-nuko <24979496+space-nuko@users.noreply.github.com>
Date: Tue, 13 Dec 2022 14:30:54 -0800
Subject: Correctly restore default hypernetwork strength
---
modules/generation_parameters_copypaste.py | 3 +++
1 file changed, 3 insertions(+)
(limited to 'modules')
diff --git a/modules/generation_parameters_copypaste.py b/modules/generation_parameters_copypaste.py
index e4e1d41c..a33f8d5c 100644
--- a/modules/generation_parameters_copypaste.py
+++ b/modules/generation_parameters_copypaste.py
@@ -213,6 +213,9 @@ Steps: 20, Sampler: Euler a, CFG scale: 7, Seed: 965400086, Size: 512x512, Model
if "Clip skip" not in res:
res["Clip skip"] = "1"
+ if "Hypernet strength" not in res:
+ res["Hypernet strength"] = "1"
+
if "Hypernet" in res:
hypernet_name = res["Hypernet"]
hypernet_hash = res.get("Hypernet hash", None)
--
cgit v1.2.1
From 5f407ebd61bb5c1ca025c5d7fa642e32ac0526ce Mon Sep 17 00:00:00 2001
From: space-nuko <24979496+space-nuko@users.noreply.github.com>
Date: Tue, 13 Dec 2022 14:32:26 -0800
Subject: Fix comment
---
modules/generation_parameters_copypaste.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
(limited to 'modules')
diff --git a/modules/generation_parameters_copypaste.py b/modules/generation_parameters_copypaste.py
index a33f8d5c..fbd91300 100644
--- a/modules/generation_parameters_copypaste.py
+++ b/modules/generation_parameters_copypaste.py
@@ -146,7 +146,7 @@ def find_hypernetwork_key(hypernet_name, hypernet_hash=None):
Example: an infotext provides "Hypernet: ke-ta" and "Hypernet hash: 1234abcd". For the "Hypernet" config
parameter this means there should be an entry that looks like "ke-ta-10000(1234abcd)" to set it to.
- If the infotext has no hash, then a hypernet with the same name and the most steps will be selected instead.
+ If the infotext has no hash, then a hypernet with the same name will be selected instead.
"""
hypernet_name = hypernet_name.lower()
if hypernet_hash is not None:
--
cgit v1.2.1
From 957e15c4642199e0792eba817a15e244246fb012 Mon Sep 17 00:00:00 2001
From: Yuval Aboulafia
Date: Wed, 14 Dec 2022 20:59:33 +0200
Subject: Correct singleton comparisons
---
modules/extras.py | 2 +-
modules/ngrok.py | 4 ++--
modules/ui.py | 8 ++++++--
3 files changed, 9 insertions(+), 5 deletions(-)
(limited to 'modules')
diff --git a/modules/extras.py b/modules/extras.py
index 0ad8deec..69b85465 100644
--- a/modules/extras.py
+++ b/modules/extras.py
@@ -188,7 +188,7 @@ def run_extras(extras_mode, resize_mode, image, image_folder, input_dir, output_
for op in extras_ops:
image, info = op(image, info)
- if opts.use_original_name_batch and image_name != None:
+ if opts.use_original_name_batch and image_name is not None:
basename = os.path.splitext(os.path.basename(image_name))[0]
else:
basename = ''
diff --git a/modules/ngrok.py b/modules/ngrok.py
index 64c9a3c2..3df2c06b 100644
--- a/modules/ngrok.py
+++ b/modules/ngrok.py
@@ -2,7 +2,7 @@ from pyngrok import ngrok, conf, exception
def connect(token, port, region):
account = None
- if token == None:
+ if token is None:
token = 'None'
else:
if ':' in token:
@@ -14,7 +14,7 @@ def connect(token, port, region):
auth_token=token, region=region
)
try:
- if account == None:
+ if account is None:
public_url = ngrok.connect(port, pyngrok_config=config, bind_tls=True).public_url
else:
public_url = ngrok.connect(port, pyngrok_config=config, bind_tls=True, auth=account).public_url
diff --git a/modules/ui.py b/modules/ui.py
index 28481e33..c4bb186d 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -49,10 +49,14 @@ if not cmd_opts.share and not cmd_opts.listen:
gradio.utils.version_check = lambda: None
gradio.utils.get_local_ip_address = lambda: '127.0.0.1'
-if cmd_opts.ngrok != None:
+if cmd_opts.ngrok is not None:
import modules.ngrok as ngrok
print('ngrok authtoken detected, trying to connect...')
- ngrok.connect(cmd_opts.ngrok, cmd_opts.port if cmd_opts.port != None else 7860, cmd_opts.ngrok_region)
+ ngrok.connect(
+ cmd_opts.ngrok,
+ cmd_opts.port if cmd_opts.port is not None else 7860,
+ cmd_opts.ngrok_region
+ )
def gr_show(visible=True):
--
cgit v1.2.1
From c0355caefe3d82e304e6d832699d581fc8f9fbf9 Mon Sep 17 00:00:00 2001
From: Jim Hays
Date: Wed, 14 Dec 2022 21:01:32 -0500
Subject: Fix various typos
---
modules/api/api.py | 18 +++++++++---------
modules/api/models.py | 2 +-
modules/images.py | 4 ++--
modules/processing.py | 14 +++++++-------
modules/safe.py | 4 ++--
modules/scripts.py | 4 ++--
modules/sd_hijack_inpainting.py | 6 +++---
modules/sd_hijack_unet.py | 2 +-
modules/textual_inversion/dataset.py | 10 +++++-----
modules/textual_inversion/textual_inversion.py | 16 ++++++++--------
10 files changed, 40 insertions(+), 40 deletions(-)
(limited to 'modules')
diff --git a/modules/api/api.py b/modules/api/api.py
index 89935a70..33845045 100644
--- a/modules/api/api.py
+++ b/modules/api/api.py
@@ -67,10 +67,10 @@ def encode_pil_to_base64(image):
class Api:
def __init__(self, app: FastAPI, queue_lock: Lock):
if shared.cmd_opts.api_auth:
- self.credenticals = dict()
+ self.credentials = dict()
for auth in shared.cmd_opts.api_auth.split(","):
user, password = auth.split(":")
- self.credenticals[user] = password
+ self.credentials[user] = password
self.router = APIRouter()
self.app = app
@@ -93,7 +93,7 @@ class Api:
self.add_api_route("/sdapi/v1/hypernetworks", self.get_hypernetworks, methods=["GET"], response_model=List[HypernetworkItem])
self.add_api_route("/sdapi/v1/face-restorers", self.get_face_restorers, methods=["GET"], response_model=List[FaceRestorerItem])
self.add_api_route("/sdapi/v1/realesrgan-models", self.get_realesrgan_models, methods=["GET"], response_model=List[RealesrganItem])
- self.add_api_route("/sdapi/v1/prompt-styles", self.get_promp_styles, methods=["GET"], response_model=List[PromptStyleItem])
+ self.add_api_route("/sdapi/v1/prompt-styles", self.get_prompt_styles, methods=["GET"], response_model=List[PromptStyleItem])
self.add_api_route("/sdapi/v1/artist-categories", self.get_artists_categories, methods=["GET"], response_model=List[str])
self.add_api_route("/sdapi/v1/artists", self.get_artists, methods=["GET"], response_model=List[ArtistItem])
@@ -102,9 +102,9 @@ class Api:
return self.app.add_api_route(path, endpoint, dependencies=[Depends(self.auth)], **kwargs)
return self.app.add_api_route(path, endpoint, **kwargs)
- def auth(self, credenticals: HTTPBasicCredentials = Depends(HTTPBasic())):
- if credenticals.username in self.credenticals:
- if compare_digest(credenticals.password, self.credenticals[credenticals.username]):
+ def auth(self, credentials: HTTPBasicCredentials = Depends(HTTPBasic())):
+ if credentials.username in self.credentials:
+ if compare_digest(credentials.password, self.credentials[credentials.username]):
return True
raise HTTPException(status_code=401, detail="Incorrect username or password", headers={"WWW-Authenticate": "Basic"})
@@ -239,7 +239,7 @@ class Api:
def interrogateapi(self, interrogatereq: InterrogateRequest):
image_b64 = interrogatereq.image
if image_b64 is None:
- raise HTTPException(status_code=404, detail="Image not found")
+ raise HTTPException(status_code=404, detail="Image not found")
img = decode_base64_to_image(image_b64)
img = img.convert('RGB')
@@ -252,7 +252,7 @@ class Api:
processed = deepbooru.model.tag(img)
else:
raise HTTPException(status_code=404, detail="Model not found")
-
+
return InterrogateResponse(caption=processed)
def interruptapi(self):
@@ -308,7 +308,7 @@ class Api:
def get_realesrgan_models(self):
return [{"name":x.name,"path":x.data_path, "scale":x.scale} for x in get_realesrgan_models(None)]
- def get_promp_styles(self):
+ def get_prompt_styles(self):
styleList = []
for k in shared.prompt_styles.styles:
style = shared.prompt_styles.styles[k]
diff --git a/modules/api/models.py b/modules/api/models.py
index f77951fc..a22bc6b3 100644
--- a/modules/api/models.py
+++ b/modules/api/models.py
@@ -128,7 +128,7 @@ class ExtrasBaseRequest(BaseModel):
upscaling_resize: float = Field(default=2, title="Upscaling Factor", ge=1, le=4, description="By how much to upscale the image, only used when resize_mode=0.")
upscaling_resize_w: int = Field(default=512, title="Target Width", ge=1, description="Target width for the upscaler to hit. Only used when resize_mode=1.")
upscaling_resize_h: int = Field(default=512, title="Target Height", ge=1, description="Target height for the upscaler to hit. Only used when resize_mode=1.")
- upscaling_crop: bool = Field(default=True, title="Crop to fit", description="Should the upscaler crop the image to fit in the choosen size?")
+ upscaling_crop: bool = Field(default=True, title="Crop to fit", description="Should the upscaler crop the image to fit in the chosen size?")
upscaler_1: str = Field(default="None", title="Main upscaler", description=f"The name of the main upscaler to use, it has to be one of this list: {' , '.join([x.name for x in sd_upscalers])}")
upscaler_2: str = Field(default="None", title="Secondary upscaler", description=f"The name of the secondary upscaler to use, it has to be one of this list: {' , '.join([x.name for x in sd_upscalers])}")
extras_upscaler_2_visibility: float = Field(default=0, title="Secondary upscaler visibility", ge=0, le=1, allow_inf_nan=False, description="Sets the visibility of secondary upscaler, values should be between 0 and 1.")
diff --git a/modules/images.py b/modules/images.py
index 8146f580..93a14289 100644
--- a/modules/images.py
+++ b/modules/images.py
@@ -429,7 +429,7 @@ def save_image(image, path, basename, seed=None, prompt=None, extension='png', i
The directory to save the image. Note, the option `save_to_dirs` will make the image to be saved into a sub directory.
basename (`str`):
The base filename which will be applied to `filename pattern`.
- seed, prompt, short_filename,
+ seed, prompt, short_filename,
extension (`str`):
Image file extension, default is `png`.
pngsectionname (`str`):
@@ -590,7 +590,7 @@ def read_info_from_image(image):
Negative prompt: {json_info["uc"]}
Steps: {json_info["steps"]}, Sampler: {sampler}, CFG scale: {json_info["scale"]}, Seed: {json_info["seed"]}, Size: {image.width}x{image.height}, Clip skip: 2, ENSD: 31337"""
except Exception:
- print(f"Error parsing NovelAI iamge generation parameters:", file=sys.stderr)
+ print(f"Error parsing NovelAI image generation parameters:", file=sys.stderr)
print(traceback.format_exc(), file=sys.stderr)
return geninfo, items
diff --git a/modules/processing.py b/modules/processing.py
index 24c537d1..fe7f4faf 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -147,11 +147,11 @@ class StableDiffusionProcessing():
# The "masked-image" in this case will just be all zeros since the entire image is masked.
image_conditioning = torch.zeros(x.shape[0], 3, height, width, device=x.device)
- image_conditioning = self.sd_model.get_first_stage_encoding(self.sd_model.encode_first_stage(image_conditioning))
+ image_conditioning = self.sd_model.get_first_stage_encoding(self.sd_model.encode_first_stage(image_conditioning))
# Add the fake full 1s mask to the first dimension.
image_conditioning = torch.nn.functional.pad(image_conditioning, (0, 0, 0, 0, 1, 0), value=1.0)
- image_conditioning = image_conditioning.to(x.dtype)
+ image_conditioning = image_conditioning.to(x.dtype)
return image_conditioning
@@ -199,7 +199,7 @@ class StableDiffusionProcessing():
source_image * (1.0 - conditioning_mask),
getattr(self, "inpainting_mask_weight", shared.opts.inpainting_mask_weight)
)
-
+
# Encode the new masked image using first stage of network.
conditioning_image = self.sd_model.get_first_stage_encoding(self.sd_model.encode_first_stage(conditioning_image))
@@ -537,7 +537,7 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
for n in range(p.n_iter):
if state.skipped:
state.skipped = False
-
+
if state.interrupted:
break
@@ -612,7 +612,7 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
image.info["parameters"] = text
output_images.append(image)
- del x_samples_ddim
+ del x_samples_ddim
devices.torch_gc()
@@ -704,7 +704,7 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
samples = samples[:, :, self.truncate_y//2:samples.shape[2]-self.truncate_y//2, self.truncate_x//2:samples.shape[3]-self.truncate_x//2]
- """saves image before applying hires fix, if enabled in options; takes as an arguyment either an image or batch with latent space images"""
+ """saves image before applying hires fix, if enabled in options; takes as an argument either an image or batch with latent space images"""
def save_intermediate(image, index):
if not opts.save or self.do_not_save_samples or not opts.save_images_before_highres_fix:
return
@@ -720,7 +720,7 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
samples = torch.nn.functional.interpolate(samples, size=(self.height // opt_f, self.width // opt_f), mode="bilinear")
- # Avoid making the inpainting conditioning unless necessary as
+ # Avoid making the inpainting conditioning unless necessary as
# this does need some extra compute to decode / encode the image again.
if getattr(self, "inpainting_mask_weight", shared.opts.inpainting_mask_weight) < 1.0:
image_conditioning = self.img2img_image_conditioning(decode_first_stage(self.sd_model, samples), samples)
diff --git a/modules/safe.py b/modules/safe.py
index 10460ad0..20e9d2fa 100644
--- a/modules/safe.py
+++ b/modules/safe.py
@@ -80,7 +80,7 @@ def check_pt(filename, extra_handler):
# new pytorch format is a zip file
with zipfile.ZipFile(filename) as z:
check_zip_filenames(filename, z.namelist())
-
+
# find filename of data.pkl in zip file: '/data.pkl'
data_pkl_filenames = [f for f in z.namelist() if data_pkl_re.match(f)]
if len(data_pkl_filenames) == 0:
@@ -108,7 +108,7 @@ def load(filename, *args, **kwargs):
def load_with_extra(filename, extra_handler=None, *args, **kwargs):
"""
- this functon is intended to be used by extensions that want to load models with
+ this function is intended to be used by extensions that want to load models with
some extra classes in them that the usual unpickler would find suspicious.
Use the extra_handler argument to specify a function that takes module and field name as text,
diff --git a/modules/scripts.py b/modules/scripts.py
index 23ca195d..722f8685 100644
--- a/modules/scripts.py
+++ b/modules/scripts.py
@@ -36,7 +36,7 @@ class Script:
def ui(self, is_img2img):
"""this function should create gradio UI elements. See https://gradio.app/docs/#components
The return value should be an array of all components that are used in processing.
- Values of those returned componenbts will be passed to run() and process() functions.
+ Values of those returned components will be passed to run() and process() functions.
"""
pass
@@ -47,7 +47,7 @@ class Script:
This function should return:
- False if the script should not be shown in UI at all
- - True if the script should be shown in UI if it's scelected in the scripts drowpdown
+ - True if the script should be shown in UI if it's selected in the scripts dropdown
- script.AlwaysVisible if the script should be shown in UI at all times
"""
diff --git a/modules/sd_hijack_inpainting.py b/modules/sd_hijack_inpainting.py
index 938f9a58..d72f83fd 100644
--- a/modules/sd_hijack_inpainting.py
+++ b/modules/sd_hijack_inpainting.py
@@ -209,7 +209,7 @@ def p_sample_plms(self, x, c, t, index, repeat_noise=False, use_original_steps=F
else:
x_in = torch.cat([x] * 2)
t_in = torch.cat([t] * 2)
-
+
if isinstance(c, dict):
assert isinstance(unconditional_conditioning, dict)
c_in = dict()
@@ -278,7 +278,7 @@ def p_sample_plms(self, x, c, t, index, repeat_noise=False, use_original_steps=F
x_prev, pred_x0 = get_x_prev_and_pred_x0(e_t_prime, index)
return x_prev, pred_x0, e_t
-
+
# =================================================================================================
# Monkey patch LatentInpaintDiffusion to load the checkpoint with a proper config.
# Adapted from:
@@ -326,7 +326,7 @@ def do_inpainting_hijack():
# most of this stuff seems to no longer be needed because it is already included into SD2.0
# LatentInpaintDiffusion remains because SD2.0's LatentInpaintDiffusion can't be loaded without specifying a checkpoint
# p_sample_plms is needed because PLMS can't work with dicts as conditionings
- # this file should be cleaned up later if weverything tuens out to work fine
+ # this file should be cleaned up later if everything turns out to work fine
# ldm.models.diffusion.ddpm.get_unconditional_conditioning = get_unconditional_conditioning
ldm.models.diffusion.ddpm.LatentInpaintDiffusion = LatentInpaintDiffusion
diff --git a/modules/sd_hijack_unet.py b/modules/sd_hijack_unet.py
index 1b9d7757..18daf8c1 100644
--- a/modules/sd_hijack_unet.py
+++ b/modules/sd_hijack_unet.py
@@ -4,7 +4,7 @@ import torch
class TorchHijackForUnet:
"""
This is torch, but with cat that resizes tensors to appropriate dimensions if they do not match;
- this makes it possible to create pictures with dimensions that are muliples of 8 rather than 64
+ this makes it possible to create pictures with dimensions that are multiples of 8 rather than 64
"""
def __getattr__(self, item):
diff --git a/modules/textual_inversion/dataset.py b/modules/textual_inversion/dataset.py
index 2dc64c3c..88d68c76 100644
--- a/modules/textual_inversion/dataset.py
+++ b/modules/textual_inversion/dataset.py
@@ -28,9 +28,9 @@ class DatasetEntry:
class PersonalizedBase(Dataset):
- def __init__(self, data_root, width, height, repeats, flip_p=0.5, placeholder_token="*", model=None, cond_model=None, device=None, template_file=None, include_cond=False, batch_size=1, gradient_step=1, shuffle_tags=False, tag_drop_out=0, latent_sampling_method='once'):
+ def __init__(self, data_root, width, height, repeats, flip_p=0.5, placeholder_token="*", model=None, cond_model=None, device=None, template_file=None, include_cond=False, batch_size=1, gradient_step=1, shuffle_tags=False, tag_drop_out=0, latent_sampling_method='once'):
re_word = re.compile(shared.opts.dataset_filename_word_regex) if len(shared.opts.dataset_filename_word_regex) > 0 else None
-
+
self.placeholder_token = placeholder_token
self.width = width
@@ -50,14 +50,14 @@ class PersonalizedBase(Dataset):
self.image_paths = [os.path.join(data_root, file_path) for file_path in os.listdir(data_root)]
-
+
self.shuffle_tags = shuffle_tags
self.tag_drop_out = tag_drop_out
print("Preparing dataset...")
for path in tqdm.tqdm(self.image_paths):
if shared.state.interrupted:
- raise Exception("inturrupted")
+ raise Exception("interrupted")
try:
image = Image.open(path).convert('RGB').resize((self.width, self.height), PIL.Image.BICUBIC)
except Exception:
@@ -144,7 +144,7 @@ class PersonalizedDataLoader(DataLoader):
self.collate_fn = collate_wrapper_random
else:
self.collate_fn = collate_wrapper
-
+
class BatchLoader:
def __init__(self, data):
diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py
index e28c357a..daf3997b 100644
--- a/modules/textual_inversion/textual_inversion.py
+++ b/modules/textual_inversion/textual_inversion.py
@@ -133,7 +133,7 @@ class EmbeddingDatabase:
process_file(fullfn, fn)
except Exception:
- print(f"Error loading emedding {fn}:", file=sys.stderr)
+ print(f"Error loading embedding {fn}:", file=sys.stderr)
print(traceback.format_exc(), file=sys.stderr)
continue
@@ -194,7 +194,7 @@ def write_loss(log_directory, filename, step, epoch_len, values):
csv_writer.writeheader()
epoch = (step - 1) // epoch_len
- epoch_step = (step - 1) % epoch_len
+ epoch_step = (step - 1) % epoch_len
csv_writer.writerow({
"step": step,
@@ -270,9 +270,9 @@ def train_embedding(embedding_name, learn_rate, batch_size, gradient_step, data_
# dataset loading may take a while, so input validations and early returns should be done before this
shared.state.textinfo = f"Preparing dataset from {html.escape(data_root)}..."
old_parallel_processing_allowed = shared.parallel_processing_allowed
-
+
pin_memory = shared.opts.pin_memory
-
+
ds = modules.textual_inversion.dataset.PersonalizedBase(data_root=data_root, width=training_width, height=training_height, repeats=shared.opts.training_image_repeats_per_epoch, placeholder_token=embedding_name, model=shared.sd_model, cond_model=shared.sd_model.cond_stage_model, device=devices.device, template_file=template_file, batch_size=batch_size, gradient_step=gradient_step, shuffle_tags=shuffle_tags, tag_drop_out=tag_drop_out, latent_sampling_method=latent_sampling_method)
latent_sampling_method = ds.latent_sampling_method
@@ -295,12 +295,12 @@ def train_embedding(embedding_name, learn_rate, batch_size, gradient_step, data_
loss_step = 0
_loss_step = 0 #internal
-
+
last_saved_file = ""
last_saved_image = ""
forced_filename = ""
embedding_yet_to_be_embedded = False
-
+
pbar = tqdm.tqdm(total=steps - initial_step)
try:
for i in range((steps-initial_step) * gradient_step):
@@ -327,10 +327,10 @@ def train_embedding(embedding_name, learn_rate, batch_size, gradient_step, data_
c = shared.sd_model.cond_stage_model(batch.cond_text)
loss = shared.sd_model(x, c)[0] / gradient_step
del x
-
+
_loss_step += loss.item()
scaler.scale(loss).backward()
-
+
# go back until we reach gradient accumulation steps
if (j + 1) % gradient_step != 0:
continue
--
cgit v1.2.1
From 35e1017e3ea0a3ad9ec28c9b447200a70a65c0ae Mon Sep 17 00:00:00 2001
From: Akiba
Date: Fri, 16 Dec 2022 20:43:09 +0800
Subject: fix: xformers
---
modules/import_hook.py | 18 ++++++++++++++++++
1 file changed, 18 insertions(+)
create mode 100644 modules/import_hook.py
(limited to 'modules')
diff --git a/modules/import_hook.py b/modules/import_hook.py
new file mode 100644
index 00000000..eb10e4fd
--- /dev/null
+++ b/modules/import_hook.py
@@ -0,0 +1,18 @@
+import builtins
+import sys
+
+old_import = builtins.__import__
+IMPORT_BLACKLIST = []
+
+
+if "xformers" not in "".join(sys.argv):
+ IMPORT_BLACKLIST.append("xformers")
+
+
+def import_hook(*args, **kwargs):
+ if args[0] in IMPORT_BLACKLIST:
+ raise ImportError("Import of %s is blacklisted" % args[0])
+ return old_import(*args, **kwargs)
+
+
+builtins.__import__ = import_hook
--
cgit v1.2.1
From 8b0703b8fcdab153958b11f0dd5e5b6b58565fed Mon Sep 17 00:00:00 2001
From: "Alex \"mcmonkey\" Goodwin"
Date: Fri, 16 Dec 2022 08:18:29 -0800
Subject: Add a workaround patch for DPM2 a issue
DPM2 a and DPM2 a Karras samplers are both affected by an issue described by https://github.com/AUTOMATIC1111/stable-diffusion-webui/issues/3483 and can be resolved by a workaround suggested by the k-diffusion author at https://github.com/crowsonkb/k-diffusion/issues/43#issuecomment-1305195666
---
modules/sd_samplers.py | 3 +++
1 file changed, 3 insertions(+)
(limited to 'modules')
diff --git a/modules/sd_samplers.py b/modules/sd_samplers.py
index 4c123d3b..b8e0ce53 100644
--- a/modules/sd_samplers.py
+++ b/modules/sd_samplers.py
@@ -494,6 +494,9 @@ class KDiffusionSampler:
x = x * sigmas[0]
+ if self.funcname == "sample_dpm_2_ancestral": # workaround dpm2 a issue
+ sigmas = torch.cat([sigmas[:-2], sigmas[-1:]])
+
extra_params_kwargs = self.initialize(p)
if 'sigma_min' in inspect.signature(self.func).parameters:
extra_params_kwargs['sigma_min'] = self.model_wrap.sigmas[0].item()
--
cgit v1.2.1
From 180fdf7809ea18de2d3b04618846d5a4e33c002e Mon Sep 17 00:00:00 2001
From: "Alex \"mcmonkey\" Goodwin"
Date: Fri, 16 Dec 2022 08:42:00 -0800
Subject: apply to DPM2 (non-ancestral) as well
---
modules/sd_samplers.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
(limited to 'modules')
diff --git a/modules/sd_samplers.py b/modules/sd_samplers.py
index b8e0ce53..ae3d8bfa 100644
--- a/modules/sd_samplers.py
+++ b/modules/sd_samplers.py
@@ -494,7 +494,7 @@ class KDiffusionSampler:
x = x * sigmas[0]
- if self.funcname == "sample_dpm_2_ancestral": # workaround dpm2 a issue
+ if self.funcname in ['sample_dpm_2_ancestral', 'sample_dpm_2']:
sigmas = torch.cat([sigmas[:-2], sigmas[-1:]])
extra_params_kwargs = self.initialize(p)
--
cgit v1.2.1
From b7c478c3ebb2b1844efd5d6bddb69095dd10808f Mon Sep 17 00:00:00 2001
From: MMaker
Date: Sat, 17 Dec 2022 00:45:43 -0500
Subject: fix: Modify font size when unable to fit in plot
This prevents scenarios where text without line breaks will start overlapping with each other when generating X/Y plots. This is most evident when generating X/Y plots with checkpoints, as most don't contain spaces and sometimes include extra information such as the epoch, making it extra long.
---
modules/images.py | 23 ++++++++++++++++-------
1 file changed, 16 insertions(+), 7 deletions(-)
(limited to 'modules')
diff --git a/modules/images.py b/modules/images.py
index 8146f580..ad97980c 100644
--- a/modules/images.py
+++ b/modules/images.py
@@ -136,8 +136,19 @@ def draw_grid_annotations(im, width, height, hor_texts, ver_texts):
lines.append(word)
return lines
- def draw_texts(drawing, draw_x, draw_y, lines):
+ def get_font(fontsize):
+ try:
+ return ImageFont.truetype(opts.font or Roboto, fontsize)
+ except Exception:
+ return ImageFont.truetype(Roboto, fontsize)
+
+ def draw_texts(drawing, draw_x, draw_y, lines, initial_fnt, initial_fontsize):
for i, line in enumerate(lines):
+ fnt = initial_fnt
+ fontsize = initial_fontsize
+ while drawing.multiline_textsize(line.text, font=fnt)[0] > line.allowed_width and fontsize > 0:
+ fontsize -= 1
+ fnt = get_font(fontsize)
drawing.multiline_text((draw_x, draw_y + line.size[1] / 2), line.text, font=fnt, fill=color_active if line.is_active else color_inactive, anchor="mm", align="center")
if not line.is_active:
@@ -148,10 +159,7 @@ def draw_grid_annotations(im, width, height, hor_texts, ver_texts):
fontsize = (width + height) // 25
line_spacing = fontsize // 2
- try:
- fnt = ImageFont.truetype(opts.font or Roboto, fontsize)
- except Exception:
- fnt = ImageFont.truetype(Roboto, fontsize)
+ fnt = get_font(fontsize)
color_active = (0, 0, 0)
color_inactive = (153, 153, 153)
@@ -178,6 +186,7 @@ def draw_grid_annotations(im, width, height, hor_texts, ver_texts):
for line in texts:
bbox = calc_d.multiline_textbbox((0, 0), line.text, font=fnt)
line.size = (bbox[2] - bbox[0], bbox[3] - bbox[1])
+ line.allowed_width = allowed_width
hor_text_heights = [sum([line.size[1] + line_spacing for line in lines]) - line_spacing for lines in hor_texts]
ver_text_heights = [sum([line.size[1] + line_spacing for line in lines]) - line_spacing * len(lines) for lines in
@@ -194,13 +203,13 @@ def draw_grid_annotations(im, width, height, hor_texts, ver_texts):
x = pad_left + width * col + width / 2
y = pad_top / 2 - hor_text_heights[col] / 2
- draw_texts(d, x, y, hor_texts[col])
+ draw_texts(d, x, y, hor_texts[col], fnt, fontsize)
for row in range(rows):
x = pad_left / 2
y = pad_top + height * row + height / 2 - ver_text_heights[row] / 2
- draw_texts(d, x, y, ver_texts[row])
+ draw_texts(d, x, y, ver_texts[row], fnt, fontsize)
return result
--
cgit v1.2.1
From 16b4509fa60ec03102b2452b41799dafccd35970 Mon Sep 17 00:00:00 2001
From: brkirch
Date: Sat, 17 Dec 2022 03:21:19 -0500
Subject: Add numpy fix for MPS on PyTorch 1.12.1
When saving training results with torch.save(), an exception is thrown:
"RuntimeError: Can't call numpy() on Tensor that requires grad. Use tensor.detach().numpy() instead."
So for MPS, check if Tensor.requires_grad and detach() if necessary.
---
modules/devices.py | 9 +++++++++
1 file changed, 9 insertions(+)
(limited to 'modules')
diff --git a/modules/devices.py b/modules/devices.py
index f8cffae1..800510b7 100644
--- a/modules/devices.py
+++ b/modules/devices.py
@@ -125,7 +125,16 @@ def layer_norm_fix(*args, **kwargs):
return orig_layer_norm(*args, **kwargs)
+# MPS workaround for https://github.com/pytorch/pytorch/issues/90532
+orig_tensor_numpy = torch.Tensor.numpy
+def numpy_fix(self, *args, **kwargs):
+ if self.requires_grad:
+ self = self.detach()
+ return orig_tensor_numpy(self, *args, **kwargs)
+
+
# PyTorch 1.13 doesn't need these fixes but unfortunately is slower and has regressions that prevent training from working
if has_mps() and version.parse(torch.__version__) < version.parse("1.13"):
torch.Tensor.to = tensor_to_fix
torch.nn.functional.layer_norm = layer_norm_fix
+ torch.Tensor.numpy = numpy_fix
--
cgit v1.2.1
From cca16373def60bfc6d159a3c2dca91d0ba48112a Mon Sep 17 00:00:00 2001
From: brkirch
Date: Sat, 17 Dec 2022 03:24:54 -0500
Subject: Add attributes used by MPS
---
modules/safe.py | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
(limited to 'modules')
diff --git a/modules/safe.py b/modules/safe.py
index 10460ad0..7c89c4c2 100644
--- a/modules/safe.py
+++ b/modules/safe.py
@@ -37,16 +37,16 @@ class RestrictedUnpickler(pickle.Unpickler):
if module == 'collections' and name == 'OrderedDict':
return getattr(collections, name)
- if module == 'torch._utils' and name in ['_rebuild_tensor_v2', '_rebuild_parameter']:
+ if module == 'torch._utils' and name in ['_rebuild_tensor_v2', '_rebuild_parameter', '_rebuild_device_tensor_from_numpy']:
return getattr(torch._utils, name)
- if module == 'torch' and name in ['FloatStorage', 'HalfStorage', 'IntStorage', 'LongStorage', 'DoubleStorage', 'ByteStorage']:
+ if module == 'torch' and name in ['FloatStorage', 'HalfStorage', 'IntStorage', 'LongStorage', 'DoubleStorage', 'ByteStorage', 'float32']:
return getattr(torch, name)
if module == 'torch.nn.modules.container' and name in ['ParameterDict']:
return getattr(torch.nn.modules.container, name)
- if module == 'numpy.core.multiarray' and name == 'scalar':
- return numpy.core.multiarray.scalar
- if module == 'numpy' and name == 'dtype':
- return numpy.dtype
+ if module == 'numpy.core.multiarray' and name in ['scalar', '_reconstruct']:
+ return getattr(numpy.core.multiarray, name)
+ if module == 'numpy' and name in ['dtype', 'ndarray']:
+ return getattr(numpy, name)
if module == '_codecs' and name == 'encode':
return encode
if module == "pytorch_lightning.callbacks" and name == 'model_checkpoint':
--
cgit v1.2.1
From a26fe85056cf0dacef2d78cccf6ab100fd16da1c Mon Sep 17 00:00:00 2001
From: timntorres
Date: Sat, 17 Dec 2022 04:31:03 -0800
Subject: Add upscaler name as a suffix.
---
modules/extras.py | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
(limited to 'modules')
diff --git a/modules/extras.py b/modules/extras.py
index bc349d5e..9b60e360 100644
--- a/modules/extras.py
+++ b/modules/extras.py
@@ -193,8 +193,13 @@ def run_extras(extras_mode, resize_mode, image, image_folder, input_dir, output_
else:
basename = ''
+ # Add upscaler name as a suffix.
+ suffix = f"-{shared.sd_upscalers[extras_upscaler_1].name}"
+ if extras_upscaler_2 and extras_upscaler_2_visibility:
+ suffix += f"-{shared.sd_upscalers[extras_upscaler_2].name}"
+
images.save_image(image, path=outpath, basename=basename, seed=None, prompt=None, extension=opts.samples_format, info=info, short_filename=True,
- no_prompt=True, grid=False, pnginfo_section_name="extras", existing_info=existing_pnginfo, forced_filename=None)
+ no_prompt=True, grid=False, pnginfo_section_name="extras", existing_info=existing_pnginfo, forced_filename=None, suffix=suffix)
if opts.enable_pnginfo:
image.info = existing_pnginfo
--
cgit v1.2.1
From a7a039d53a69f8c32cb889fe322e769b238fec27 Mon Sep 17 00:00:00 2001
From: timntorres
Date: Sat, 17 Dec 2022 06:28:51 -0800
Subject: Add option to include upscaler name in filename.
---
modules/extras.py | 5 +++--
modules/shared.py | 1 +
2 files changed, 4 insertions(+), 2 deletions(-)
(limited to 'modules')
diff --git a/modules/extras.py b/modules/extras.py
index 9b60e360..074a7c22 100644
--- a/modules/extras.py
+++ b/modules/extras.py
@@ -194,8 +194,9 @@ def run_extras(extras_mode, resize_mode, image, image_folder, input_dir, output_
basename = ''
# Add upscaler name as a suffix.
- suffix = f"-{shared.sd_upscalers[extras_upscaler_1].name}"
- if extras_upscaler_2 and extras_upscaler_2_visibility:
+ suffix = f"-{shared.sd_upscalers[extras_upscaler_1].name}" if shared.opts.use_upscaler_name_as_suffix else ""
+ # Add second upscaler if applicable.
+ if suffix and extras_upscaler_2 and extras_upscaler_2_visibility:
suffix += f"-{shared.sd_upscalers[extras_upscaler_2].name}"
images.save_image(image, path=outpath, basename=basename, seed=None, prompt=None, extension=opts.samples_format, info=info, short_filename=True,
diff --git a/modules/shared.py b/modules/shared.py
index dc45fcaa..218894e8 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -293,6 +293,7 @@ options_templates.update(options_section(('saving-images', "Saving images/grids"
"export_for_4chan": OptionInfo(True, "If PNG image is larger than 4MB or any dimension is larger than 4000, downscale and save copy as JPG"),
"use_original_name_batch": OptionInfo(False, "Use original name for output filename during batch process in extras tab"),
+ "use_upscaler_name_as_suffix": OptionInfo(False, "Add upscaler name to the end of filename in the extras tab"),
"save_selected_only": OptionInfo(True, "When using 'Save' button, only save a single selected image"),
"do_not_add_watermark": OptionInfo(False, "Do not add watermark to images"),
--
cgit v1.2.1
From 6fd91c9179f51dd2f73f03eeabd12bfd081941c5 Mon Sep 17 00:00:00 2001
From: timntorres
Date: Sat, 17 Dec 2022 08:59:02 -0800
Subject: Update OptionInfo to match preexisting option.
---
modules/shared.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
(limited to 'modules')
diff --git a/modules/shared.py b/modules/shared.py
index 218894e8..230c377e 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -293,7 +293,7 @@ options_templates.update(options_section(('saving-images', "Saving images/grids"
"export_for_4chan": OptionInfo(True, "If PNG image is larger than 4MB or any dimension is larger than 4000, downscale and save copy as JPG"),
"use_original_name_batch": OptionInfo(False, "Use original name for output filename during batch process in extras tab"),
- "use_upscaler_name_as_suffix": OptionInfo(False, "Add upscaler name to the end of filename in the extras tab"),
+ "use_upscaler_name_as_suffix": OptionInfo(False, "Use upscaler name as filename suffix in the extras tab"),
"save_selected_only": OptionInfo(True, "When using 'Save' button, only save a single selected image"),
"do_not_add_watermark": OptionInfo(False, "Do not add watermark to images"),
--
cgit v1.2.1
From c02ef0f4286c618d30ee028778f58ca7809c7d93 Mon Sep 17 00:00:00 2001
From: Billy Cao
Date: Sun, 18 Dec 2022 20:51:59 +0800
Subject: Fix PIL being imported before its installed (for new users only)
---
modules/shared.py | 1 +
1 file changed, 1 insertion(+)
(limited to 'modules')
diff --git a/modules/shared.py b/modules/shared.py
index c36ee211..734ea2fe 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -5,6 +5,7 @@ import os
import sys
import time
+from PIL import Image
import gradio as gr
import tqdm
--
cgit v1.2.1
From 7ba9bc2fdbfae8115294962510492faafeb48573 Mon Sep 17 00:00:00 2001
From: "Alex \"mcmonkey\" Goodwin"
Date: Sun, 18 Dec 2022 19:16:42 -0800
Subject: fix dpm2 in img2img as well
---
modules/sd_samplers.py | 3 +++
1 file changed, 3 insertions(+)
(limited to 'modules')
diff --git a/modules/sd_samplers.py b/modules/sd_samplers.py
index ae3d8bfa..1a1b8919 100644
--- a/modules/sd_samplers.py
+++ b/modules/sd_samplers.py
@@ -454,6 +454,9 @@ class KDiffusionSampler:
else:
sigmas = self.model_wrap.get_sigmas(steps)
+ if self.funcname in ['sample_dpm_2_ancestral', 'sample_dpm_2']:
+ sigmas = torch.cat([sigmas[:-2], sigmas[-1:]])
+
sigma_sched = sigmas[steps - t_enc - 1:]
xi = x + noise * sigma_sched[0]
--
cgit v1.2.1
From 22f1527fa79a03dbc8b1a4eec3b22369a877f4bd Mon Sep 17 00:00:00 2001
From: Philpax
Date: Tue, 20 Dec 2022 20:36:49 +1100
Subject: feat(api): add override_settings_restore_afterwards
---
modules/processing.py | 29 ++++++++++++++++-------------
1 file changed, 16 insertions(+), 13 deletions(-)
(limited to 'modules')
diff --git a/modules/processing.py b/modules/processing.py
index 24c537d1..f7335da2 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -77,7 +77,7 @@ class StableDiffusionProcessing():
"""
The first set of paramaters: sd_models -> do_not_reload_embeddings represent the minimum required to create a StableDiffusionProcessing
"""
- def __init__(self, sd_model=None, outpath_samples=None, outpath_grids=None, prompt: str = "", styles: List[str] = None, seed: int = -1, subseed: int = -1, subseed_strength: float = 0, seed_resize_from_h: int = -1, seed_resize_from_w: int = -1, seed_enable_extras: bool = True, sampler_name: str = None, batch_size: int = 1, n_iter: int = 1, steps: int = 50, cfg_scale: float = 7.0, width: int = 512, height: int = 512, restore_faces: bool = False, tiling: bool = False, do_not_save_samples: bool = False, do_not_save_grid: bool = False, extra_generation_params: Dict[Any, Any] = None, overlay_images: Any = None, negative_prompt: str = None, eta: float = None, do_not_reload_embeddings: bool = False, denoising_strength: float = 0, ddim_discretize: str = None, s_churn: float = 0.0, s_tmax: float = None, s_tmin: float = 0.0, s_noise: float = 1.0, override_settings: Dict[str, Any] = None, sampler_index: int = None):
+ def __init__(self, sd_model=None, outpath_samples=None, outpath_grids=None, prompt: str = "", styles: List[str] = None, seed: int = -1, subseed: int = -1, subseed_strength: float = 0, seed_resize_from_h: int = -1, seed_resize_from_w: int = -1, seed_enable_extras: bool = True, sampler_name: str = None, batch_size: int = 1, n_iter: int = 1, steps: int = 50, cfg_scale: float = 7.0, width: int = 512, height: int = 512, restore_faces: bool = False, tiling: bool = False, do_not_save_samples: bool = False, do_not_save_grid: bool = False, extra_generation_params: Dict[Any, Any] = None, overlay_images: Any = None, negative_prompt: str = None, eta: float = None, do_not_reload_embeddings: bool = False, denoising_strength: float = 0, ddim_discretize: str = None, s_churn: float = 0.0, s_tmax: float = None, s_tmin: float = 0.0, s_noise: float = 1.0, override_settings: Dict[str, Any] = None, override_settings_restore_afterwards: bool = True, sampler_index: int = None):
if sampler_index is not None:
print("sampler_index argument for StableDiffusionProcessing does not do anything; use sampler_name", file=sys.stderr)
@@ -118,6 +118,7 @@ class StableDiffusionProcessing():
self.s_tmax = s_tmax or float('inf') # not representable as a standard ui option
self.s_noise = s_noise or opts.s_noise
self.override_settings = {k: v for k, v in (override_settings or {}).items() if k not in shared.restricted_opts}
+ self.override_settings_restore_afterwards = override_settings_restore_afterwards
self.is_using_inpainting_conditioning = False
if not seed_enable_extras:
@@ -147,11 +148,11 @@ class StableDiffusionProcessing():
# The "masked-image" in this case will just be all zeros since the entire image is masked.
image_conditioning = torch.zeros(x.shape[0], 3, height, width, device=x.device)
- image_conditioning = self.sd_model.get_first_stage_encoding(self.sd_model.encode_first_stage(image_conditioning))
+ image_conditioning = self.sd_model.get_first_stage_encoding(self.sd_model.encode_first_stage(image_conditioning))
# Add the fake full 1s mask to the first dimension.
image_conditioning = torch.nn.functional.pad(image_conditioning, (0, 0, 0, 0, 1, 0), value=1.0)
- image_conditioning = image_conditioning.to(x.dtype)
+ image_conditioning = image_conditioning.to(x.dtype)
return image_conditioning
@@ -199,7 +200,7 @@ class StableDiffusionProcessing():
source_image * (1.0 - conditioning_mask),
getattr(self, "inpainting_mask_weight", shared.opts.inpainting_mask_weight)
)
-
+
# Encode the new masked image using first stage of network.
conditioning_image = self.sd_model.get_first_stage_encoding(self.sd_model.encode_first_stage(conditioning_image))
@@ -463,12 +464,14 @@ def process_images(p: StableDiffusionProcessing) -> Processed:
res = process_images_inner(p)
- finally: # restore opts to original state
- for k, v in stored_opts.items():
- setattr(opts, k, v)
- if k == 'sd_hypernetwork': shared.reload_hypernetworks()
- if k == 'sd_model_checkpoint': sd_models.reload_model_weights()
- if k == 'sd_vae': sd_vae.reload_vae_weights()
+ finally:
+ # restore opts to original state
+ if p.override_settings_restore_afterwards:
+ for k, v in stored_opts.items():
+ setattr(opts, k, v)
+ if k == 'sd_hypernetwork': shared.reload_hypernetworks()
+ if k == 'sd_model_checkpoint': sd_models.reload_model_weights()
+ if k == 'sd_vae': sd_vae.reload_vae_weights()
return res
@@ -537,7 +540,7 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
for n in range(p.n_iter):
if state.skipped:
state.skipped = False
-
+
if state.interrupted:
break
@@ -612,7 +615,7 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
image.info["parameters"] = text
output_images.append(image)
- del x_samples_ddim
+ del x_samples_ddim
devices.torch_gc()
@@ -720,7 +723,7 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
samples = torch.nn.functional.interpolate(samples, size=(self.height // opt_f, self.width // opt_f), mode="bilinear")
- # Avoid making the inpainting conditioning unless necessary as
+ # Avoid making the inpainting conditioning unless necessary as
# this does need some extra compute to decode / encode the image again.
if getattr(self, "inpainting_mask_weight", shared.opts.inpainting_mask_weight) < 1.0:
image_conditioning = self.img2img_image_conditioning(decode_first_stage(self.sd_model, samples), samples)
--
cgit v1.2.1
From 35b1775b32a07f1b7c9dccad61f7aa77027a00fa Mon Sep 17 00:00:00 2001
From: brkirch
Date: Mon, 19 Dec 2022 17:25:14 -0500
Subject: Use other MPS optimization for large q.shape[0] * q.shape[1]
Check if q.shape[0] * q.shape[1] is 2**18 or larger and use the lower memory usage MPS optimization if it is. This should prevent most crashes that were occurring at certain resolutions (e.g. 1024x1024, 2048x512, 512x2048).
Also included is a change to check slice_size and prevent it from being divisible by 4096 which also results in a crash. Otherwise a crash can occur at 1024x512 or 512x1024 resolution.
---
modules/sd_hijack_optimizations.py | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
(limited to 'modules')
diff --git a/modules/sd_hijack_optimizations.py b/modules/sd_hijack_optimizations.py
index 98123fbf..02c87f40 100644
--- a/modules/sd_hijack_optimizations.py
+++ b/modules/sd_hijack_optimizations.py
@@ -127,7 +127,7 @@ def check_for_psutil():
invokeAI_mps_available = check_for_psutil()
-# -- Taken from https://github.com/invoke-ai/InvokeAI --
+# -- Taken from https://github.com/invoke-ai/InvokeAI and modified --
if invokeAI_mps_available:
import psutil
mem_total_gb = psutil.virtual_memory().total // (1 << 30)
@@ -152,14 +152,16 @@ def einsum_op_slice_1(q, k, v, slice_size):
return r
def einsum_op_mps_v1(q, k, v):
- if q.shape[1] <= 4096: # (512x512) max q.shape[1]: 4096
+ if q.shape[0] * q.shape[1] <= 2**16: # (512x512) max q.shape[1]: 4096
return einsum_op_compvis(q, k, v)
else:
slice_size = math.floor(2**30 / (q.shape[0] * q.shape[1]))
+ if slice_size % 4096 == 0:
+ slice_size -= 1
return einsum_op_slice_1(q, k, v, slice_size)
def einsum_op_mps_v2(q, k, v):
- if mem_total_gb > 8 and q.shape[1] <= 4096:
+ if mem_total_gb > 8 and q.shape[0] * q.shape[1] <= 2**16:
return einsum_op_compvis(q, k, v)
else:
return einsum_op_slice_0(q, k, v, 1)
@@ -188,7 +190,7 @@ def einsum_op(q, k, v):
return einsum_op_cuda(q, k, v)
if q.device.type == 'mps':
- if mem_total_gb >= 32:
+ if mem_total_gb >= 32 and q.shape[0] % 32 != 0 and q.shape[0] * q.shape[1] < 2**18:
return einsum_op_mps_v1(q, k, v)
return einsum_op_mps_v2(q, k, v)
--
cgit v1.2.1
From 13e0295ab682299e3280eb6ff28be0870f2bc57c Mon Sep 17 00:00:00 2001
From: Akiba
Date: Sat, 24 Dec 2022 11:17:21 +0800
Subject: fix: xformers use importlib
---
modules/import_hook.py | 15 +--------------
1 file changed, 1 insertion(+), 14 deletions(-)
(limited to 'modules')
diff --git a/modules/import_hook.py b/modules/import_hook.py
index eb10e4fd..7403135d 100644
--- a/modules/import_hook.py
+++ b/modules/import_hook.py
@@ -1,18 +1,5 @@
-import builtins
import sys
-old_import = builtins.__import__
-IMPORT_BLACKLIST = []
-
if "xformers" not in "".join(sys.argv):
- IMPORT_BLACKLIST.append("xformers")
-
-
-def import_hook(*args, **kwargs):
- if args[0] in IMPORT_BLACKLIST:
- raise ImportError("Import of %s is blacklisted" % args[0])
- return old_import(*args, **kwargs)
-
-
-builtins.__import__ = import_hook
+ sys.modules["xformers"] = None
--
cgit v1.2.1
From 0c747d4013f41f6c887a63d256af884aa8872f91 Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Sat, 24 Dec 2022 07:57:56 +0300
Subject: add a comment for disable xformers hack
---
modules/import_hook.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
(limited to 'modules')
diff --git a/modules/import_hook.py b/modules/import_hook.py
index 7403135d..28c67dfa 100644
--- a/modules/import_hook.py
+++ b/modules/import_hook.py
@@ -1,5 +1,5 @@
import sys
-
-if "xformers" not in "".join(sys.argv):
+# this will break any attempt to import xformers which will prevent stability diffusion repo from trying to use it
+if "--xformers" not in "".join(sys.argv):
sys.modules["xformers"] = None
--
cgit v1.2.1
From 399b229783a7b5fddab0a258740b4d59d668ee12 Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Sat, 24 Dec 2022 09:03:45 +0300
Subject: eliminate duplicated code add an option to samplers for skipping next
to last sigma
---
modules/sd_samplers.py | 31 ++++++++++++++-----------------
1 file changed, 14 insertions(+), 17 deletions(-)
(limited to 'modules')
diff --git a/modules/sd_samplers.py b/modules/sd_samplers.py
index 1a1b8919..d26e48dc 100644
--- a/modules/sd_samplers.py
+++ b/modules/sd_samplers.py
@@ -23,16 +23,16 @@ samplers_k_diffusion = [
('Euler', 'sample_euler', ['k_euler'], {}),
('LMS', 'sample_lms', ['k_lms'], {}),
('Heun', 'sample_heun', ['k_heun'], {}),
- ('DPM2', 'sample_dpm_2', ['k_dpm_2'], {}),
- ('DPM2 a', 'sample_dpm_2_ancestral', ['k_dpm_2_a'], {}),
+ ('DPM2', 'sample_dpm_2', ['k_dpm_2'], {'discard_next_to_last_sigma': True}),
+ ('DPM2 a', 'sample_dpm_2_ancestral', ['k_dpm_2_a'], {'discard_next_to_last_sigma': True}),
('DPM++ 2S a', 'sample_dpmpp_2s_ancestral', ['k_dpmpp_2s_a'], {}),
('DPM++ 2M', 'sample_dpmpp_2m', ['k_dpmpp_2m'], {}),
('DPM++ SDE', 'sample_dpmpp_sde', ['k_dpmpp_sde'], {}),
('DPM fast', 'sample_dpm_fast', ['k_dpm_fast'], {}),
('DPM adaptive', 'sample_dpm_adaptive', ['k_dpm_ad'], {}),
('LMS Karras', 'sample_lms', ['k_lms_ka'], {'scheduler': 'karras'}),
- ('DPM2 Karras', 'sample_dpm_2', ['k_dpm_2_ka'], {'scheduler': 'karras'}),
- ('DPM2 a Karras', 'sample_dpm_2_ancestral', ['k_dpm_2_a_ka'], {'scheduler': 'karras'}),
+ ('DPM2 Karras', 'sample_dpm_2', ['k_dpm_2_ka'], {'scheduler': 'karras', 'discard_next_to_last_sigma': True}),
+ ('DPM2 a Karras', 'sample_dpm_2_ancestral', ['k_dpm_2_a_ka'], {'scheduler': 'karras', 'discard_next_to_last_sigma': True}),
('DPM++ 2S a Karras', 'sample_dpmpp_2s_ancestral', ['k_dpmpp_2s_a_ka'], {'scheduler': 'karras'}),
('DPM++ 2M Karras', 'sample_dpmpp_2m', ['k_dpmpp_2m_ka'], {'scheduler': 'karras'}),
('DPM++ SDE Karras', 'sample_dpmpp_sde', ['k_dpmpp_sde_ka'], {'scheduler': 'karras'}),
@@ -444,9 +444,7 @@ class KDiffusionSampler:
return extra_params_kwargs
- def sample_img2img(self, p, x, noise, conditioning, unconditional_conditioning, steps=None, image_conditioning=None):
- steps, t_enc = setup_img2img_steps(p, steps)
-
+ def get_sigmas(self, p, steps):
if p.sampler_noise_scheduler_override:
sigmas = p.sampler_noise_scheduler_override(steps)
elif self.config is not None and self.config.options.get('scheduler', None) == 'karras':
@@ -454,9 +452,16 @@ class KDiffusionSampler:
else:
sigmas = self.model_wrap.get_sigmas(steps)
- if self.funcname in ['sample_dpm_2_ancestral', 'sample_dpm_2']:
+ if self.config is not None and self.config.options.get('discard_next_to_last_sigma', False):
sigmas = torch.cat([sigmas[:-2], sigmas[-1:]])
+ return sigmas
+
+ def sample_img2img(self, p, x, noise, conditioning, unconditional_conditioning, steps=None, image_conditioning=None):
+ steps, t_enc = setup_img2img_steps(p, steps)
+
+ sigmas = self.get_sigmas(p, steps)
+
sigma_sched = sigmas[steps - t_enc - 1:]
xi = x + noise * sigma_sched[0]
@@ -488,18 +493,10 @@ class KDiffusionSampler:
def sample(self, p, x, conditioning, unconditional_conditioning, steps=None, image_conditioning = None):
steps = steps or p.steps
- if p.sampler_noise_scheduler_override:
- sigmas = p.sampler_noise_scheduler_override(steps)
- elif self.config is not None and self.config.options.get('scheduler', None) == 'karras':
- sigmas = k_diffusion.sampling.get_sigmas_karras(n=steps, sigma_min=0.1, sigma_max=10, device=shared.device)
- else:
- sigmas = self.model_wrap.get_sigmas(steps)
+ sigmas = self.get_sigmas(p, steps)
x = x * sigmas[0]
- if self.funcname in ['sample_dpm_2_ancestral', 'sample_dpm_2']:
- sigmas = torch.cat([sigmas[:-2], sigmas[-1:]])
-
extra_params_kwargs = self.initialize(p)
if 'sigma_min' in inspect.signature(self.func).parameters:
extra_params_kwargs['sigma_min'] = self.model_wrap.sigmas[0].item()
--
cgit v1.2.1
From 9441c28c947588d756e279a8cd5db6c0b4a8d2e4 Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Sat, 24 Dec 2022 09:46:35 +0300
Subject: add an option for img2img background color
---
modules/images.py | 11 +++++++++++
modules/processing.py | 2 +-
modules/shared.py | 1 +
modules/ui.py | 2 +-
4 files changed, 14 insertions(+), 2 deletions(-)
(limited to 'modules')
diff --git a/modules/images.py b/modules/images.py
index ad97980c..8bcbc8d9 100644
--- a/modules/images.py
+++ b/modules/images.py
@@ -622,3 +622,14 @@ def image_data(data):
pass
return '', None
+
+
+def flatten(img, bgcolor):
+ """replaces transparency with bgcolor (example: "#ffffff"), returning an RGB mode image with no transparency"""
+
+ if img.mode == "RGBA":
+ background = Image.new('RGBA', img.size, bgcolor)
+ background.paste(img, mask=img)
+ img = background
+
+ return img.convert('RGB')
diff --git a/modules/processing.py b/modules/processing.py
index bc841837..7c4bcd74 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -832,7 +832,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
self.color_corrections = []
imgs = []
for img in self.init_images:
- image = img.convert("RGB")
+ image = images.flatten(img, opts.img2img_background_color)
if crop_region is None:
image = images.resize_image(self.resize_mode, image, self.width, self.height)
diff --git a/modules/shared.py b/modules/shared.py
index 215c1358..dcce9299 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -363,6 +363,7 @@ options_templates.update(options_section(('sd', "Stable Diffusion"), {
"initial_noise_multiplier": OptionInfo(1.0, "Noise multiplier for img2img", gr.Slider, {"minimum": 0.5, "maximum": 1.5, "step": 0.01 }),
"img2img_color_correction": OptionInfo(False, "Apply color correction to img2img results to match original colors."),
"img2img_fix_steps": OptionInfo(False, "With img2img, do exactly the amount of steps the slider specifies (normally you'd do less with less denoising)."),
+ "img2img_background_color": OptionInfo("#ffffff", "With img2img, fill image's transparent parts with this color.", gr.ColorPicker, {}),
"enable_quantization": OptionInfo(False, "Enable quantization in K samplers for sharper and cleaner results. This may change existing seeds. Requires restart to apply."),
"enable_emphasis": OptionInfo(True, "Emphasis: use (text) to make model pay more attention to text and [text] to make it pay less attention"),
"use_old_emphasis_implementation": OptionInfo(False, "Use old emphasis implementation. Can be useful to reproduce old seeds."),
diff --git a/modules/ui.py b/modules/ui.py
index 28481e33..76919b0f 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -812,7 +812,7 @@ def create_ui():
with gr.Tabs(elem_id="mode_img2img") as tabs_img2img_mode:
with gr.TabItem('img2img', id='img2img'):
- init_img = gr.Image(label="Image for img2img", elem_id="img2img_image", show_label=False, source="upload", interactive=True, type="pil", tool=cmd_opts.gradio_img2img_tool).style(height=480)
+ init_img = gr.Image(label="Image for img2img", elem_id="img2img_image", show_label=False, source="upload", interactive=True, type="pil", tool=cmd_opts.gradio_img2img_tool, image_mode="RGBA").style(height=480)
with gr.TabItem('Inpaint', id='inpaint'):
init_img_with_mask = gr.Image(label="Image for inpainting with mask", show_label=False, elem_id="img2maskimg", source="upload", interactive=True, type="pil", tool=cmd_opts.gradio_inpaint_tool, image_mode="RGBA").style(height=480)
--
cgit v1.2.1
From c0a8401b5a8368d03bb14fc63abbdedb1e802d8d Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Sat, 24 Dec 2022 11:12:17 +0300
Subject: rename the option for img2img latent upscale
---
modules/processing.py | 2 +-
modules/ui.py | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
(limited to 'modules')
diff --git a/modules/processing.py b/modules/processing.py
index 75b0067c..d2288f26 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -846,7 +846,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
self.overlay_images.append(image_masked.convert('RGBA'))
- # crop_region is not none iif we are doing inpaint full res
+ # crop_region is not None if we are doing inpaint full res
if crop_region is not None:
image = image.crop(crop_region)
image = images.resize_image(2, image, self.width, self.height)
diff --git a/modules/ui.py b/modules/ui.py
index faba69a4..9dec61d5 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -857,7 +857,7 @@ def create_ui():
img2img_batch_output_dir = gr.Textbox(label="Output directory", **shared.hide_dirs)
with gr.Row():
- resize_mode = gr.Radio(label="Resize mode", elem_id="resize_mode", show_label=False, choices=["Just resize", "Crop and resize", "Resize and fill", "Upscale Latent Space"], type="index", value="Just resize")
+ resize_mode = gr.Radio(label="Resize mode", elem_id="resize_mode", show_label=False, choices=["Just resize", "Crop and resize", "Resize and fill", "Just resize (latent upscale)"], type="index", value="Just resize")
steps = gr.Slider(minimum=1, maximum=150, step=1, label="Sampling Steps", value=20)
sampler_index = gr.Radio(label='Sampling method', choices=[x.name for x in samplers_for_img2img], value=samplers_for_img2img[0].name, type="index")
--
cgit v1.2.1
From f23a822f1c9cb3bd2e8772c75af429e06515eaef Mon Sep 17 00:00:00 2001
From: Philpax
Date: Sat, 24 Dec 2022 20:45:16 +1100
Subject: feat(api): include job_timestamp in progress
---
modules/shared.py | 1 +
1 file changed, 1 insertion(+)
(limited to 'modules')
diff --git a/modules/shared.py b/modules/shared.py
index 8ea3b441..f356dbf7 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -171,6 +171,7 @@ class State:
"interrupted": self.skipped,
"job": self.job,
"job_count": self.job_count,
+ "job_timestamp": self.job_timestamp,
"job_no": self.job_no,
"sampling_step": self.sampling_step,
"sampling_steps": self.sampling_steps,
--
cgit v1.2.1
From 11dd79e346bd780bc5c3119df962e7a9c20f2493 Mon Sep 17 00:00:00 2001
From: AbstractQbit <38468635+AbstractQbit@users.noreply.github.com>
Date: Sat, 24 Dec 2022 14:00:17 +0300
Subject: Add an option for faster low quality previews
---
modules/sd_samplers.py | 23 ++++++++++++++++-------
modules/shared.py | 5 +++--
2 files changed, 19 insertions(+), 9 deletions(-)
(limited to 'modules')
diff --git a/modules/sd_samplers.py b/modules/sd_samplers.py
index d26e48dc..fbb56af4 100644
--- a/modules/sd_samplers.py
+++ b/modules/sd_samplers.py
@@ -106,20 +106,29 @@ def setup_img2img_steps(p, steps=None):
return steps, t_enc
-def single_sample_to_image(sample):
- x_sample = processing.decode_first_stage(shared.sd_model, sample.unsqueeze(0))[0]
+def single_sample_to_image(sample, approximation=False):
+ if approximation:
+ # https://discuss.huggingface.co/t/decoding-latents-to-rgb-without-upscaling/23204/2
+ coefs = torch.tensor(
+ [[ 0.298, 0.207, 0.208],
+ [ 0.187, 0.286, 0.173],
+ [-0.158, 0.189, 0.264],
+ [-0.184, -0.271, -0.473]]).to(sample.device)
+ x_sample = torch.einsum("lxy,lr -> rxy", sample, coefs)
+ else:
+ x_sample = processing.decode_first_stage(shared.sd_model, sample.unsqueeze(0))[0]
x_sample = torch.clamp((x_sample + 1.0) / 2.0, min=0.0, max=1.0)
x_sample = 255. * np.moveaxis(x_sample.cpu().numpy(), 0, 2)
x_sample = x_sample.astype(np.uint8)
return Image.fromarray(x_sample)
-def sample_to_image(samples, index=0):
- return single_sample_to_image(samples[index])
+def sample_to_image(samples, index=0, approximation=False):
+ return single_sample_to_image(samples[index], approximation)
-def samples_to_image_grid(samples):
- return images.image_grid([single_sample_to_image(sample) for sample in samples])
+def samples_to_image_grid(samples, approximation=False):
+ return images.image_grid([single_sample_to_image(sample, approximation) for sample in samples])
def store_latent(decoded):
@@ -127,7 +136,7 @@ def store_latent(decoded):
if opts.show_progress_every_n_steps > 0 and shared.state.sampling_step % opts.show_progress_every_n_steps == 0:
if not shared.parallel_processing_allowed:
- shared.state.current_image = sample_to_image(decoded)
+ shared.state.current_image = sample_to_image(decoded, approximation=opts.show_progress_approximate)
class InterruptedException(BaseException):
diff --git a/modules/shared.py b/modules/shared.py
index 8ea3b441..1067b1d3 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -212,9 +212,9 @@ class State:
import modules.sd_samplers
if opts.show_progress_grid:
- self.current_image = modules.sd_samplers.samples_to_image_grid(self.current_latent)
+ self.current_image = modules.sd_samplers.samples_to_image_grid(self.current_latent, approximation=opts.show_progress_approximate)
else:
- self.current_image = modules.sd_samplers.sample_to_image(self.current_latent)
+ self.current_image = modules.sd_samplers.sample_to_image(self.current_latent, approximation=opts.show_progress_approximate)
self.current_image_sampling_step = self.sampling_step
@@ -391,6 +391,7 @@ options_templates.update(options_section(('interrogate', "Interrogate Options"),
options_templates.update(options_section(('ui', "User interface"), {
"show_progressbar": OptionInfo(True, "Show progressbar"),
"show_progress_every_n_steps": OptionInfo(0, "Show image creation progress every N sampling steps. Set to 0 to disable. Set to -1 to show after completion of batch.", gr.Slider, {"minimum": -1, "maximum": 32, "step": 1}),
+ "show_progress_approximate": OptionInfo(False, "Calculate small previews using fast linear approximation instead of VAE"),
"show_progress_grid": OptionInfo(True, "Show previews of all images generated in a batch as a grid"),
"return_grid": OptionInfo(True, "Show grid in results for web"),
"do_not_show_images": OptionInfo(False, "Do not show any images in results for web"),
--
cgit v1.2.1
From 6247f21a637399900643a4915e8a223688e0ed22 Mon Sep 17 00:00:00 2001
From: Philpax
Date: Sat, 24 Dec 2022 22:04:53 +1100
Subject: fix(api): don't save extras output to disk
---
modules/api/api.py | 6 +++---
modules/extras.py | 17 +++++++++--------
2 files changed, 12 insertions(+), 11 deletions(-)
(limited to 'modules')
diff --git a/modules/api/api.py b/modules/api/api.py
index 3257445d..b43dd16b 100644
--- a/modules/api/api.py
+++ b/modules/api/api.py
@@ -181,7 +181,7 @@ class Api:
reqDict['image'] = decode_base64_to_image(reqDict['image'])
with self.queue_lock:
- result = run_extras(extras_mode=0, image_folder="", input_dir="", output_dir="", **reqDict)
+ result = run_extras(extras_mode=0, image_folder="", input_dir="", output_dir="", save_output=False, **reqDict)
return ExtrasSingleImageResponse(image=encode_pil_to_base64(result[0][0]), html_info=result[1])
@@ -197,7 +197,7 @@ class Api:
reqDict.pop('imageList')
with self.queue_lock:
- result = run_extras(extras_mode=1, image="", input_dir="", output_dir="", **reqDict)
+ result = run_extras(extras_mode=1, image="", input_dir="", output_dir="", save_output=False, **reqDict)
return ExtrasBatchImagesResponse(images=list(map(encode_pil_to_base64, result[0])), html_info=result[1])
@@ -322,7 +322,7 @@ class Api:
def get_artists(self):
return [{"name":x[0], "score":x[1], "category":x[2]} for x in shared.artist_db.artists]
-
+
def refresh_checkpoints(self):
shared.refresh_checkpoints()
diff --git a/modules/extras.py b/modules/extras.py
index 6fa7d856..68939dea 100644
--- a/modules/extras.py
+++ b/modules/extras.py
@@ -55,7 +55,7 @@ class LruCache(OrderedDict):
cached_images: LruCache = LruCache(max_size=5)
-def run_extras(extras_mode, resize_mode, image, image_folder, input_dir, output_dir, show_extras_results, gfpgan_visibility, codeformer_visibility, codeformer_weight, upscaling_resize, upscaling_resize_w, upscaling_resize_h, upscaling_crop, extras_upscaler_1, extras_upscaler_2, extras_upscaler_2_visibility, upscale_first: bool):
+def run_extras(extras_mode, resize_mode, image, image_folder, input_dir, output_dir, show_extras_results, gfpgan_visibility, codeformer_visibility, codeformer_weight, upscaling_resize, upscaling_resize_w, upscaling_resize_h, upscaling_crop, extras_upscaler_1, extras_upscaler_2, extras_upscaler_2_visibility, upscale_first: bool, save_output: bool = True):
devices.torch_gc()
imageArr = []
@@ -193,14 +193,15 @@ def run_extras(extras_mode, resize_mode, image, image_folder, input_dir, output_
else:
basename = ''
- # Add upscaler name as a suffix.
- suffix = f"-{shared.sd_upscalers[extras_upscaler_1].name}" if shared.opts.use_upscaler_name_as_suffix else ""
- # Add second upscaler if applicable.
- if suffix and extras_upscaler_2 and extras_upscaler_2_visibility:
- suffix += f"-{shared.sd_upscalers[extras_upscaler_2].name}"
+ if save_output:
+ # Add upscaler name as a suffix.
+ suffix = f"-{shared.sd_upscalers[extras_upscaler_1].name}" if shared.opts.use_upscaler_name_as_suffix else ""
+ # Add second upscaler if applicable.
+ if suffix and extras_upscaler_2 and extras_upscaler_2_visibility:
+ suffix += f"-{shared.sd_upscalers[extras_upscaler_2].name}"
- images.save_image(image, path=outpath, basename=basename, seed=None, prompt=None, extension=opts.samples_format, info=info, short_filename=True,
- no_prompt=True, grid=False, pnginfo_section_name="extras", existing_info=existing_pnginfo, forced_filename=None, suffix=suffix)
+ images.save_image(image, path=outpath, basename=basename, seed=None, prompt=None, extension=opts.samples_format, info=info, short_filename=True,
+ no_prompt=True, grid=False, pnginfo_section_name="extras", existing_info=existing_pnginfo, forced_filename=None, suffix=suffix)
if opts.enable_pnginfo:
image.info = existing_pnginfo
--
cgit v1.2.1
From 5a650055de3792223a91925aba8130ebdee29e35 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?linuxmobile=20=28=20=E3=83=AA=E3=83=8A=E3=83=83=E3=82=AF?=
=?UTF-8?q?=E3=82=B9=20=29?=
Date: Sat, 24 Dec 2022 09:25:35 -0300
Subject: Removed lenght in sd_model at line 115
Commit eba60a4 is what is causing this error, delete the length check in sd_model starting at line 115 and it's fine.
https://github.com/AUTOMATIC1111/stable-diffusion-webui/issues/5971#issuecomment-1364507379
---
modules/sd_models.py | 3 ---
1 file changed, 3 deletions(-)
(limited to 'modules')
diff --git a/modules/sd_models.py b/modules/sd_models.py
index 1254e5ae..6ca06211 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -111,9 +111,6 @@ def model_hash(filename):
def select_checkpoint():
model_checkpoint = shared.opts.sd_model_checkpoint
-
- if len(model_checkpoint) == 0:
- model_checkpoint = shared.default_sd_model_file
checkpoint_info = checkpoints_list.get(model_checkpoint, None)
if checkpoint_info is not None:
--
cgit v1.2.1
From 03d7b394539558f6f560155d87a4fc66eb675e30 Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Sat, 24 Dec 2022 12:40:32 +0300
Subject: added an option to filter out deepbooru tags
---
modules/deepbooru.py | 4 +++-
modules/shared.py | 1 +
2 files changed, 4 insertions(+), 1 deletion(-)
(limited to 'modules')
diff --git a/modules/deepbooru.py b/modules/deepbooru.py
index dfc83357..122fce7f 100644
--- a/modules/deepbooru.py
+++ b/modules/deepbooru.py
@@ -79,7 +79,9 @@ class DeepDanbooru:
res = []
- for tag in tags:
+ filtertags = set([x.strip().replace(' ', '_') for x in shared.opts.deepbooru_filter_tags.split(",")])
+
+ for tag in [x for x in tags if x not in filtertags]:
probability = probability_dict[tag]
tag_outformat = tag
if use_spaces:
diff --git a/modules/shared.py b/modules/shared.py
index 8ea3b441..a75de535 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -386,6 +386,7 @@ options_templates.update(options_section(('interrogate', "Interrogate Options"),
"deepbooru_sort_alpha": OptionInfo(True, "Interrogate: deepbooru sort alphabetically"),
"deepbooru_use_spaces": OptionInfo(False, "use spaces for tags in deepbooru"),
"deepbooru_escape": OptionInfo(True, "escape (\\) brackets in deepbooru (so they are used as literal brackets and not for emphasis)"),
+ "deepbooru_filter_tags": OptionInfo("", "filter out those tags from deepbooru output (separated by comma)"),
}))
options_templates.update(options_section(('ui', "User interface"), {
--
cgit v1.2.1
From 0b8acce6a9a1418fa88a506450cd1b92e2d48986 Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Sat, 24 Dec 2022 18:38:16 +0300
Subject: separate part of denoiser code into a function to make it easier for
extensions to override it
---
modules/sd_samplers.py | 17 +++++++++++------
1 file changed, 11 insertions(+), 6 deletions(-)
(limited to 'modules')
diff --git a/modules/sd_samplers.py b/modules/sd_samplers.py
index d26e48dc..8efe74df 100644
--- a/modules/sd_samplers.py
+++ b/modules/sd_samplers.py
@@ -288,6 +288,16 @@ class CFGDenoiser(torch.nn.Module):
self.init_latent = None
self.step = 0
+ def combine_denoised(self, x_out, conds_list, uncond, cond_scale):
+ denoised_uncond = x_out[-uncond.shape[0]:]
+ denoised = torch.clone(denoised_uncond)
+
+ for i, conds in enumerate(conds_list):
+ for cond_index, weight in conds:
+ denoised[i] += (x_out[cond_index] - denoised_uncond[i]) * (weight * cond_scale)
+
+ return denoised
+
def forward(self, x, sigma, uncond, cond, cond_scale, image_cond):
if state.interrupted or state.skipped:
raise InterruptedException
@@ -329,12 +339,7 @@ class CFGDenoiser(torch.nn.Module):
x_out[-uncond.shape[0]:] = self.inner_model(x_in[-uncond.shape[0]:], sigma_in[-uncond.shape[0]:], cond={"c_crossattn": [uncond], "c_concat": [image_cond_in[-uncond.shape[0]:]]})
- denoised_uncond = x_out[-uncond.shape[0]:]
- denoised = torch.clone(denoised_uncond)
-
- for i, conds in enumerate(conds_list):
- for cond_index, weight in conds:
- denoised[i] += (x_out[cond_index] - denoised_uncond[i]) * (weight * cond_scale)
+ denoised = self.combine_denoised(x_out, conds_list, uncond, cond_scale)
if self.mask is not None:
denoised = self.init_latent * self.mask + self.nmask * denoised
--
cgit v1.2.1
From 3bf5591efe9a9f219c6088be322a87adc4f48f95 Mon Sep 17 00:00:00 2001
From: Yuval Aboulafia
Date: Sat, 24 Dec 2022 21:35:29 +0200
Subject: fix F541 f-string without any placeholders
---
modules/codeformer/vqgan_arch.py | 4 ++--
modules/hypernetworks/hypernetwork.py | 4 ++--
modules/images.py | 2 +-
modules/interrogate.py | 2 +-
modules/safe.py | 8 ++++----
modules/sd_models.py | 8 ++++----
modules/sd_vae.py | 2 +-
modules/textual_inversion/textual_inversion.py | 2 +-
8 files changed, 16 insertions(+), 16 deletions(-)
(limited to 'modules')
diff --git a/modules/codeformer/vqgan_arch.py b/modules/codeformer/vqgan_arch.py
index c06c590c..e7293683 100644
--- a/modules/codeformer/vqgan_arch.py
+++ b/modules/codeformer/vqgan_arch.py
@@ -382,7 +382,7 @@ class VQAutoEncoder(nn.Module):
self.load_state_dict(torch.load(model_path, map_location='cpu')['params'])
logger.info(f'vqgan is loaded from: {model_path} [params]')
else:
- raise ValueError(f'Wrong params!')
+ raise ValueError('Wrong params!')
def forward(self, x):
@@ -431,7 +431,7 @@ class VQGANDiscriminator(nn.Module):
elif 'params' in chkpt:
self.load_state_dict(torch.load(model_path, map_location='cpu')['params'])
else:
- raise ValueError(f'Wrong params!')
+ raise ValueError('Wrong params!')
def forward(self, x):
return self.main(x)
\ No newline at end of file
diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py
index c406ffb3..9d3034ae 100644
--- a/modules/hypernetworks/hypernetwork.py
+++ b/modules/hypernetworks/hypernetwork.py
@@ -277,7 +277,7 @@ def load_hypernetwork(filename):
print(traceback.format_exc(), file=sys.stderr)
else:
if shared.loaded_hypernetwork is not None:
- print(f"Unloading hypernetwork")
+ print("Unloading hypernetwork")
shared.loaded_hypernetwork = None
@@ -417,7 +417,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, gradient_step,
initial_step = hypernetwork.step or 0
if initial_step >= steps:
- shared.state.textinfo = f"Model has already been trained beyond specified max steps"
+ shared.state.textinfo = "Model has already been trained beyond specified max steps"
return hypernetwork, filename
scheduler = LearnRateScheduler(learn_rate, steps, initial_step)
diff --git a/modules/images.py b/modules/images.py
index 809ad9f7..31d4528d 100644
--- a/modules/images.py
+++ b/modules/images.py
@@ -599,7 +599,7 @@ def read_info_from_image(image):
Negative prompt: {json_info["uc"]}
Steps: {json_info["steps"]}, Sampler: {sampler}, CFG scale: {json_info["scale"]}, Seed: {json_info["seed"]}, Size: {image.width}x{image.height}, Clip skip: 2, ENSD: 31337"""
except Exception:
- print(f"Error parsing NovelAI image generation parameters:", file=sys.stderr)
+ print("Error parsing NovelAI image generation parameters:", file=sys.stderr)
print(traceback.format_exc(), file=sys.stderr)
return geninfo, items
diff --git a/modules/interrogate.py b/modules/interrogate.py
index 0068b81c..46935210 100644
--- a/modules/interrogate.py
+++ b/modules/interrogate.py
@@ -172,7 +172,7 @@ class InterrogateModels:
res += ", " + match
except Exception:
- print(f"Error interrogating", file=sys.stderr)
+ print("Error interrogating", file=sys.stderr)
print(traceback.format_exc(), file=sys.stderr)
res += ""
diff --git a/modules/safe.py b/modules/safe.py
index 479c8b86..1d4c20b9 100644
--- a/modules/safe.py
+++ b/modules/safe.py
@@ -137,15 +137,15 @@ def load_with_extra(filename, extra_handler=None, *args, **kwargs):
except pickle.UnpicklingError:
print(f"Error verifying pickled file from {filename}:", file=sys.stderr)
print(traceback.format_exc(), file=sys.stderr)
- print(f"-----> !!!! The file is most likely corrupted !!!! <-----", file=sys.stderr)
- print(f"You can skip this check with --disable-safe-unpickle commandline argument, but that is not going to help you.\n\n", file=sys.stderr)
+ print("-----> !!!! The file is most likely corrupted !!!! <-----", file=sys.stderr)
+ print("You can skip this check with --disable-safe-unpickle commandline argument, but that is not going to help you.\n\n", file=sys.stderr)
return None
except Exception:
print(f"Error verifying pickled file from {filename}:", file=sys.stderr)
print(traceback.format_exc(), file=sys.stderr)
- print(f"\nThe file may be malicious, so the program is not going to read it.", file=sys.stderr)
- print(f"You can skip this check with --disable-safe-unpickle commandline argument.\n\n", file=sys.stderr)
+ print("\nThe file may be malicious, so the program is not going to read it.", file=sys.stderr)
+ print("You can skip this check with --disable-safe-unpickle commandline argument.\n\n", file=sys.stderr)
return None
return unsafe_torch_load(filename, *args, **kwargs)
diff --git a/modules/sd_models.py b/modules/sd_models.py
index 6ca06211..ecdd91c5 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -117,13 +117,13 @@ def select_checkpoint():
return checkpoint_info
if len(checkpoints_list) == 0:
- print(f"No checkpoints found. When searching for checkpoints, looked at:", file=sys.stderr)
+ print("No checkpoints found. When searching for checkpoints, looked at:", file=sys.stderr)
if shared.cmd_opts.ckpt is not None:
print(f" - file {os.path.abspath(shared.cmd_opts.ckpt)}", file=sys.stderr)
print(f" - directory {model_path}", file=sys.stderr)
if shared.cmd_opts.ckpt_dir is not None:
print(f" - directory {os.path.abspath(shared.cmd_opts.ckpt_dir)}", file=sys.stderr)
- print(f"Can't run without a checkpoint. Find and place a .ckpt file into any of those locations. The program will exit.", file=sys.stderr)
+ print("Can't run without a checkpoint. Find and place a .ckpt file into any of those locations. The program will exit.", file=sys.stderr)
exit(1)
checkpoint_info = next(iter(checkpoints_list.values()))
@@ -324,7 +324,7 @@ def load_model(checkpoint_info=None):
script_callbacks.model_loaded_callback(sd_model)
- print(f"Model loaded.")
+ print("Model loaded.")
return sd_model
@@ -359,5 +359,5 @@ def reload_model_weights(sd_model=None, info=None):
if not shared.cmd_opts.lowvram and not shared.cmd_opts.medvram:
sd_model.to(devices.device)
- print(f"Weights loaded.")
+ print("Weights loaded.")
return sd_model
diff --git a/modules/sd_vae.py b/modules/sd_vae.py
index 25638a83..3856418e 100644
--- a/modules/sd_vae.py
+++ b/modules/sd_vae.py
@@ -208,5 +208,5 @@ def reload_vae_weights(sd_model=None, vae_file="auto"):
if not shared.cmd_opts.lowvram and not shared.cmd_opts.medvram:
sd_model.to(devices.device)
- print(f"VAE Weights loaded.")
+ print("VAE Weights loaded.")
return sd_model
diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py
index daf3997b..f6112578 100644
--- a/modules/textual_inversion/textual_inversion.py
+++ b/modules/textual_inversion/textual_inversion.py
@@ -263,7 +263,7 @@ def train_embedding(embedding_name, learn_rate, batch_size, gradient_step, data_
initial_step = embedding.step or 0
if initial_step >= steps:
- shared.state.textinfo = f"Model has already been trained beyond specified max steps"
+ shared.state.textinfo = "Model has already been trained beyond specified max steps"
return embedding, filename
scheduler = LearnRateScheduler(learn_rate, steps, initial_step)
--
cgit v1.2.1
From 56e557c6ff8a6480887c9c585bf908045ee8e791 Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Sat, 24 Dec 2022 22:39:00 +0300
Subject: added cheap NN approximation for VAE
---
modules/sd_samplers.py | 29 +++++++++++++-----------
modules/sd_vae_approx.py | 58 ++++++++++++++++++++++++++++++++++++++++++++++++
modules/shared.py | 6 ++---
3 files changed, 77 insertions(+), 16 deletions(-)
create mode 100644 modules/sd_vae_approx.py
(limited to 'modules')
diff --git a/modules/sd_samplers.py b/modules/sd_samplers.py
index 27ef4ff8..177b5338 100644
--- a/modules/sd_samplers.py
+++ b/modules/sd_samplers.py
@@ -9,7 +9,7 @@ import k_diffusion.sampling
import torchsde._brownian.brownian_interval
import ldm.models.diffusion.ddim
import ldm.models.diffusion.plms
-from modules import prompt_parser, devices, processing, images
+from modules import prompt_parser, devices, processing, images, sd_vae_approx
from modules.shared import opts, cmd_opts, state
import modules.shared as shared
@@ -106,28 +106,31 @@ def setup_img2img_steps(p, steps=None):
return steps, t_enc
-def single_sample_to_image(sample, approximation=False):
- if approximation:
- # https://discuss.huggingface.co/t/decoding-latents-to-rgb-without-upscaling/23204/2
- coefs = torch.tensor(
- [[ 0.298, 0.207, 0.208],
- [ 0.187, 0.286, 0.173],
- [-0.158, 0.189, 0.264],
- [-0.184, -0.271, -0.473]]).to(sample.device)
- x_sample = torch.einsum("lxy,lr -> rxy", sample, coefs)
+approximation_indexes = {"Full": 0, "Approx NN": 1, "Approx cheap": 2}
+
+
+def single_sample_to_image(sample, approximation=None):
+ if approximation is None:
+ approximation = approximation_indexes.get(opts.show_progress_type, 0)
+
+ if approximation == 2:
+ x_sample = sd_vae_approx.cheap_approximation(sample)
+ elif approximation == 1:
+ x_sample = sd_vae_approx.model()(sample.to(devices.device, devices.dtype).unsqueeze(0))[0].detach()
else:
x_sample = processing.decode_first_stage(shared.sd_model, sample.unsqueeze(0))[0]
+
x_sample = torch.clamp((x_sample + 1.0) / 2.0, min=0.0, max=1.0)
x_sample = 255. * np.moveaxis(x_sample.cpu().numpy(), 0, 2)
x_sample = x_sample.astype(np.uint8)
return Image.fromarray(x_sample)
-def sample_to_image(samples, index=0, approximation=False):
+def sample_to_image(samples, index=0, approximation=None):
return single_sample_to_image(samples[index], approximation)
-def samples_to_image_grid(samples, approximation=False):
+def samples_to_image_grid(samples, approximation=None):
return images.image_grid([single_sample_to_image(sample, approximation) for sample in samples])
@@ -136,7 +139,7 @@ def store_latent(decoded):
if opts.show_progress_every_n_steps > 0 and shared.state.sampling_step % opts.show_progress_every_n_steps == 0:
if not shared.parallel_processing_allowed:
- shared.state.current_image = sample_to_image(decoded, approximation=opts.show_progress_approximate)
+ shared.state.current_image = sample_to_image(decoded)
class InterruptedException(BaseException):
diff --git a/modules/sd_vae_approx.py b/modules/sd_vae_approx.py
new file mode 100644
index 00000000..0a58542d
--- /dev/null
+++ b/modules/sd_vae_approx.py
@@ -0,0 +1,58 @@
+import os
+
+import torch
+from torch import nn
+from modules import devices, paths
+
+sd_vae_approx_model = None
+
+
+class VAEApprox(nn.Module):
+ def __init__(self):
+ super(VAEApprox, self).__init__()
+ self.conv1 = nn.Conv2d(4, 8, (7, 7))
+ self.conv2 = nn.Conv2d(8, 16, (5, 5))
+ self.conv3 = nn.Conv2d(16, 32, (3, 3))
+ self.conv4 = nn.Conv2d(32, 64, (3, 3))
+ self.conv5 = nn.Conv2d(64, 32, (3, 3))
+ self.conv6 = nn.Conv2d(32, 16, (3, 3))
+ self.conv7 = nn.Conv2d(16, 8, (3, 3))
+ self.conv8 = nn.Conv2d(8, 3, (3, 3))
+
+ def forward(self, x):
+ extra = 11
+ x = nn.functional.interpolate(x, (x.shape[2] * 2, x.shape[3] * 2))
+ x = nn.functional.pad(x, (extra, extra, extra, extra))
+
+ for layer in [self.conv1, self.conv2, self.conv3, self.conv4, self.conv5, self.conv6, self.conv7, self.conv8, ]:
+ x = layer(x)
+ x = nn.functional.leaky_relu(x, 0.1)
+
+ return x
+
+
+def model():
+ global sd_vae_approx_model
+
+ if sd_vae_approx_model is None:
+ sd_vae_approx_model = VAEApprox()
+ sd_vae_approx_model.load_state_dict(torch.load(os.path.join(paths.models_path, "VAE-approx", "model.pt")))
+ sd_vae_approx_model.eval()
+ sd_vae_approx_model.to(devices.device, devices.dtype)
+
+ return sd_vae_approx_model
+
+
+def cheap_approximation(sample):
+ # https://discuss.huggingface.co/t/decoding-latents-to-rgb-without-upscaling/23204/2
+
+ coefs = torch.tensor([
+ [0.298, 0.207, 0.208],
+ [0.187, 0.286, 0.173],
+ [-0.158, 0.189, 0.264],
+ [-0.184, -0.271, -0.473],
+ ]).to(sample.device)
+
+ x_sample = torch.einsum("lxy,lr -> rxy", sample, coefs)
+
+ return x_sample
diff --git a/modules/shared.py b/modules/shared.py
index eb3e5aec..3cc3c724 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -212,9 +212,9 @@ class State:
import modules.sd_samplers
if opts.show_progress_grid:
- self.current_image = modules.sd_samplers.samples_to_image_grid(self.current_latent, approximation=opts.show_progress_approximate)
+ self.current_image = modules.sd_samplers.samples_to_image_grid(self.current_latent)
else:
- self.current_image = modules.sd_samplers.sample_to_image(self.current_latent, approximation=opts.show_progress_approximate)
+ self.current_image = modules.sd_samplers.sample_to_image(self.current_latent)
self.current_image_sampling_step = self.sampling_step
@@ -392,7 +392,7 @@ options_templates.update(options_section(('interrogate', "Interrogate Options"),
options_templates.update(options_section(('ui', "User interface"), {
"show_progressbar": OptionInfo(True, "Show progressbar"),
"show_progress_every_n_steps": OptionInfo(0, "Show image creation progress every N sampling steps. Set to 0 to disable. Set to -1 to show after completion of batch.", gr.Slider, {"minimum": -1, "maximum": 32, "step": 1}),
- "show_progress_approximate": OptionInfo(False, "Calculate small previews using fast linear approximation instead of VAE"),
+ "show_progress_type": OptionInfo("Full", "Image creation progress mode", gr.Radio, {"choices": ["Full", "Approx NN", "Approx cheap"]}),
"show_progress_grid": OptionInfo(True, "Show previews of all images generated in a batch as a grid"),
"return_grid": OptionInfo(True, "Show grid in results for web"),
"do_not_show_images": OptionInfo(False, "Do not show any images in results for web"),
--
cgit v1.2.1
From c5bdba2089dc7060be2631bcbc83313b6358cbf2 Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Sat, 24 Dec 2022 22:41:35 +0300
Subject: change wording a bit
---
modules/shared.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
(limited to 'modules')
diff --git a/modules/shared.py b/modules/shared.py
index 3cc3c724..d4ddeea0 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -392,7 +392,7 @@ options_templates.update(options_section(('interrogate', "Interrogate Options"),
options_templates.update(options_section(('ui', "User interface"), {
"show_progressbar": OptionInfo(True, "Show progressbar"),
"show_progress_every_n_steps": OptionInfo(0, "Show image creation progress every N sampling steps. Set to 0 to disable. Set to -1 to show after completion of batch.", gr.Slider, {"minimum": -1, "maximum": 32, "step": 1}),
- "show_progress_type": OptionInfo("Full", "Image creation progress mode", gr.Radio, {"choices": ["Full", "Approx NN", "Approx cheap"]}),
+ "show_progress_type": OptionInfo("Full", "Image creation progress preview mode", gr.Radio, {"choices": ["Full", "Approx NN", "Approx cheap"]}),
"show_progress_grid": OptionInfo(True, "Show previews of all images generated in a batch as a grid"),
"return_grid": OptionInfo(True, "Show grid in results for web"),
"do_not_show_images": OptionInfo(False, "Do not show any images in results for web"),
--
cgit v1.2.1
From 5f1dfbbc959855fd90ba80c0c76301d2063772fa Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Sat, 24 Dec 2022 18:02:22 -0500
Subject: implement train api
---
modules/api/api.py | 94 ++++++++++++++++++++++++++++++++++-
modules/api/models.py | 9 ++++
modules/hypernetworks/hypernetwork.py | 26 ++++++++++
modules/hypernetworks/ui.py | 31 ++----------
4 files changed, 132 insertions(+), 28 deletions(-)
(limited to 'modules')
diff --git a/modules/api/api.py b/modules/api/api.py
index b43dd16b..1ceba75d 100644
--- a/modules/api/api.py
+++ b/modules/api/api.py
@@ -10,13 +10,17 @@ from fastapi.security import HTTPBasic, HTTPBasicCredentials
from secrets import compare_digest
import modules.shared as shared
-from modules import sd_samplers, deepbooru
+from modules import sd_samplers, deepbooru, sd_hijack
from modules.api.models import *
from modules.processing import StableDiffusionProcessingTxt2Img, StableDiffusionProcessingImg2Img, process_images
from modules.extras import run_extras, run_pnginfo
+from modules.textual_inversion.textual_inversion import create_embedding, train_embedding
+from modules.textual_inversion.preprocess import preprocess
+from modules.hypernetworks.hypernetwork import create_hypernetwork, train_hypernetwork
from PIL import PngImagePlugin,Image
from modules.sd_models import checkpoints_list
from modules.realesrgan_model import get_realesrgan_models
+from modules import devices
from typing import List
def upscaler_to_index(name: str):
@@ -97,6 +101,11 @@ class Api:
self.add_api_route("/sdapi/v1/artist-categories", self.get_artists_categories, methods=["GET"], response_model=List[str])
self.add_api_route("/sdapi/v1/artists", self.get_artists, methods=["GET"], response_model=List[ArtistItem])
self.add_api_route("/sdapi/v1/refresh-checkpoints", self.refresh_checkpoints, methods=["POST"])
+ self.add_api_route("/sdapi/v1/create/embedding", self.create_embedding, methods=["POST"], response_model=CreateResponse)
+ self.add_api_route("/sdapi/v1/create/hypernetwork", self.create_hypernetwork, methods=["POST"], response_model=CreateResponse)
+ self.add_api_route("/sdapi/v1/preprocess", self.preprocess, methods=["POST"], response_model=PreprocessResponse)
+ self.add_api_route("/sdapi/v1/train/embedding", self.train_embedding, methods=["POST"], response_model=TrainResponse)
+ self.add_api_route("/sdapi/v1/train/hypernetwork", self.train_hypernetwork, methods=["POST"], response_model=TrainResponse)
def add_api_route(self, path: str, endpoint, **kwargs):
if shared.cmd_opts.api_auth:
@@ -326,6 +335,89 @@ class Api:
def refresh_checkpoints(self):
shared.refresh_checkpoints()
+ def create_embedding(self, args: dict):
+ try:
+ shared.state.begin()
+ filename = create_embedding(**args) # create empty embedding
+ sd_hijack.model_hijack.embedding_db.load_textual_inversion_embeddings() # reload embeddings so new one can be immediately used
+ shared.state.end()
+ return CreateResponse(info = "create embedding filename: {filename}".format(filename = filename))
+ except AssertionError as e:
+ shared.state.end()
+ return TrainResponse(info = "create embedding error: {error}".format(error = e))
+
+ def create_hypernetwork(self, args: dict):
+ try:
+ shared.state.begin()
+ filename = create_hypernetwork(**args) # create empty embedding
+ shared.state.end()
+ return CreateResponse(info = "create hypernetwork filename: {filename}".format(filename = filename))
+ except AssertionError as e:
+ shared.state.end()
+ return TrainResponse(info = "create hypernetwork error: {error}".format(error = e))
+
+ def preprocess(self, args: dict):
+ try:
+ shared.state.begin()
+ preprocess(**args) # quick operation unless blip/booru interrogation is enabled
+ shared.state.end()
+ return PreprocessResponse(info = 'preprocess complete')
+ except KeyError as e:
+ shared.state.end()
+ return PreprocessResponse(info = "preprocess error: invalid token: {error}".format(error = e))
+ except AssertionError as e:
+ shared.state.end()
+ return PreprocessResponse(info = "preprocess error: {error}".format(error = e))
+ except FileNotFoundError as e:
+ shared.state.end()
+ return PreprocessResponse(info = 'preprocess error: {error}'.format(error = e))
+
+ def train_embedding(self, args: dict):
+ try:
+ shared.state.begin()
+ apply_optimizations = shared.opts.training_xattention_optimizations
+ error = None
+ filename = ''
+ if not apply_optimizations:
+ sd_hijack.undo_optimizations()
+ try:
+ embedding, filename = train_embedding(**args) # can take a long time to complete
+ except Exception as e:
+ error = e
+ finally:
+ if not apply_optimizations:
+ sd_hijack.apply_optimizations()
+ shared.state.end()
+ return TrainResponse(info = "train embedding complete: filename: {filename} error: {error}".format(filename = filename, error = error))
+ except AssertionError as msg:
+ shared.state.end()
+ return TrainResponse(info = "train embedding error: {msg}".format(msg = msg))
+
+ def train_hypernetwork(self, args: dict):
+ try:
+ shared.state.begin()
+ initial_hypernetwork = shared.loaded_hypernetwork
+ apply_optimizations = shared.opts.training_xattention_optimizations
+ error = None
+ filename = ''
+ if not apply_optimizations:
+ sd_hijack.undo_optimizations()
+ try:
+ hypernetwork, filename = train_hypernetwork(*args)
+ except Exception as e:
+ error = e
+ finally:
+ shared.loaded_hypernetwork = initial_hypernetwork
+ shared.sd_model.cond_stage_model.to(devices.device)
+ shared.sd_model.first_stage_model.to(devices.device)
+ if not apply_optimizations:
+ sd_hijack.apply_optimizations()
+ shared.state.end()
+ return TrainResponse(info = "train embedding complete: filename: {filename} error: {error}".format(filename = filename, error = error))
+ except AssertionError as msg:
+ shared.state.end()
+ return TrainResponse(info = "train embedding error: {error}".format(error = error))
+
def launch(self, server_name, port):
self.app.include_router(self.router)
uvicorn.run(self.app, host=server_name, port=port)
diff --git a/modules/api/models.py b/modules/api/models.py
index a22bc6b3..c446ce7a 100644
--- a/modules/api/models.py
+++ b/modules/api/models.py
@@ -175,6 +175,15 @@ class InterrogateRequest(BaseModel):
class InterrogateResponse(BaseModel):
caption: str = Field(default=None, title="Caption", description="The generated caption for the image.")
+class TrainResponse(BaseModel):
+ info: str = Field(title="Train info", description="Response string from train embedding or hypernetwork task.")
+
+class CreateResponse(BaseModel):
+ info: str = Field(title="Create info", description="Response string from create embedding or hypernetwork task.")
+
+class PreprocessResponse(BaseModel):
+ info: str = Field(title="Preprocess info", description="Response string from preprocessing task.")
+
fields = {}
for key, metadata in opts.data_labels.items():
value = opts.data.get(key)
diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py
index c406ffb3..3182ff03 100644
--- a/modules/hypernetworks/hypernetwork.py
+++ b/modules/hypernetworks/hypernetwork.py
@@ -378,6 +378,32 @@ def report_statistics(loss_info:dict):
print(e)
+def create_hypernetwork(name, enable_sizes, overwrite_old, layer_structure=None, activation_func=None, weight_init=None, add_layer_norm=False, use_dropout=False):
+ # Remove illegal characters from name.
+ name = "".join( x for x in name if (x.isalnum() or x in "._- "))
+
+ fn = os.path.join(shared.cmd_opts.hypernetwork_dir, f"{name}.pt")
+ if not overwrite_old:
+ assert not os.path.exists(fn), f"file {fn} already exists"
+
+ if type(layer_structure) == str:
+ layer_structure = [float(x.strip()) for x in layer_structure.split(",")]
+
+ hypernet = modules.hypernetworks.hypernetwork.Hypernetwork(
+ name=name,
+ enable_sizes=[int(x) for x in enable_sizes],
+ layer_structure=layer_structure,
+ activation_func=activation_func,
+ weight_init=weight_init,
+ add_layer_norm=add_layer_norm,
+ use_dropout=use_dropout,
+ )
+ hypernet.save(fn)
+
+ shared.reload_hypernetworks()
+
+ return fn
+
def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, gradient_step, data_root, log_directory, training_width, training_height, steps, shuffle_tags, tag_drop_out, latent_sampling_method, create_image_every, save_hypernetwork_every, template_file, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height):
# images allows training previews to have infotext. Importing it at the top causes a circular import problem.
diff --git a/modules/hypernetworks/ui.py b/modules/hypernetworks/ui.py
index c2d4b51c..e7f9e593 100644
--- a/modules/hypernetworks/ui.py
+++ b/modules/hypernetworks/ui.py
@@ -3,39 +3,16 @@ import os
import re
import gradio as gr
-import modules.textual_inversion.preprocess
-import modules.textual_inversion.textual_inversion
+import modules.hypernetworks.hypernetwork
from modules import devices, sd_hijack, shared
-from modules.hypernetworks import hypernetwork
not_available = ["hardswish", "multiheadattention"]
-keys = list(x for x in hypernetwork.HypernetworkModule.activation_dict.keys() if x not in not_available)
+keys = list(x for x in modules.hypernetworks.hypernetwork.HypernetworkModule.activation_dict.keys() if x not in not_available)
def create_hypernetwork(name, enable_sizes, overwrite_old, layer_structure=None, activation_func=None, weight_init=None, add_layer_norm=False, use_dropout=False):
- # Remove illegal characters from name.
- name = "".join( x for x in name if (x.isalnum() or x in "._- "))
+ filename = modules.hypernetworks.hypernetwork.create_hypernetwork(name, enable_sizes, overwrite_old, layer_structure, activation_func, weight_init, add_layer_norm, use_dropout)
- fn = os.path.join(shared.cmd_opts.hypernetwork_dir, f"{name}.pt")
- if not overwrite_old:
- assert not os.path.exists(fn), f"file {fn} already exists"
-
- if type(layer_structure) == str:
- layer_structure = [float(x.strip()) for x in layer_structure.split(",")]
-
- hypernet = modules.hypernetworks.hypernetwork.Hypernetwork(
- name=name,
- enable_sizes=[int(x) for x in enable_sizes],
- layer_structure=layer_structure,
- activation_func=activation_func,
- weight_init=weight_init,
- add_layer_norm=add_layer_norm,
- use_dropout=use_dropout,
- )
- hypernet.save(fn)
-
- shared.reload_hypernetworks()
-
- return gr.Dropdown.update(choices=sorted([x for x in shared.hypernetworks.keys()])), f"Created: {fn}", ""
+ return gr.Dropdown.update(choices=sorted([x for x in shared.hypernetworks.keys()])), f"Created: {filename}", ""
def train_hypernetwork(*args):
--
cgit v1.2.1
From f60c24f8121186f8d85f1096a96ddf685f625d04 Mon Sep 17 00:00:00 2001
From: eaglgenes101
Date: Sat, 24 Dec 2022 22:16:01 -0500
Subject: Add CSS classes for the settings panels
---
modules/ui.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
(limited to 'modules')
diff --git a/modules/ui.py b/modules/ui.py
index 9dec61d5..65af8966 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -657,7 +657,7 @@ def create_ui():
setup_progressbar(progressbar, txt2img_preview, 'txt2img')
with gr.Row().style(equal_height=False):
- with gr.Column(variant='panel'):
+ with gr.Column(variant='panel', elem_id="txt2img_settings"):
steps = gr.Slider(minimum=1, maximum=150, step=1, label="Sampling Steps", value=20)
sampler_index = gr.Radio(label='Sampling method', elem_id="txt2img_sampling", choices=[x.name for x in samplers], value=samplers[0].name, type="index")
@@ -812,7 +812,7 @@ def create_ui():
setup_progressbar(progressbar, img2img_preview, 'img2img')
with gr.Row().style(equal_height=False):
- with gr.Column(variant='panel'):
+ with gr.Column(variant='panel', elem_id="img2img_settings"):
with gr.Tabs(elem_id="mode_img2img") as tabs_img2img_mode:
with gr.TabItem('img2img', id='img2img'):
--
cgit v1.2.1
From 61a273236ffd1366456cac7040e30972ca65dc2c Mon Sep 17 00:00:00 2001
From: Allen Benz
Date: Sat, 24 Dec 2022 20:23:12 -0800
Subject: Fix clip interrogate from the webui
A recent change made the image RGBA, which makes the clip interrogator unhappy.
deepbooru and calling the interrogator from the api already do the conversion so this is the only place that needed it.
---
modules/ui.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
(limited to 'modules')
diff --git a/modules/ui.py b/modules/ui.py
index 9dec61d5..7bf5abd9 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -270,7 +270,7 @@ def apply_styles(prompt, prompt_neg, style1_name, style2_name):
def interrogate(image):
- prompt = shared.interrogator.interrogate(image)
+ prompt = shared.interrogator.interrogate(image.convert("RGB"))
return gr_show(True) if prompt is None else prompt
--
cgit v1.2.1
From 8eef9d8e782aa0655241e43f67059aa7bef3bdca Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Sun, 25 Dec 2022 09:03:56 +0300
Subject: a way to add an exception to unpickler without explicitly calling
load_with_extra
---
modules/safe.py | 39 ++++++++++++++++++++++++++++++++++++++-
1 file changed, 38 insertions(+), 1 deletion(-)
(limited to 'modules')
diff --git a/modules/safe.py b/modules/safe.py
index 479c8b86..ec23a53c 100644
--- a/modules/safe.py
+++ b/modules/safe.py
@@ -103,7 +103,7 @@ def check_pt(filename, extra_handler):
def load(filename, *args, **kwargs):
- return load_with_extra(filename, *args, **kwargs)
+ return load_with_extra(filename, extra_handler=global_extra_handler, *args, **kwargs)
def load_with_extra(filename, extra_handler=None, *args, **kwargs):
@@ -151,5 +151,42 @@ def load_with_extra(filename, extra_handler=None, *args, **kwargs):
return unsafe_torch_load(filename, *args, **kwargs)
+class Extra:
+ """
+ A class for temporarily setting the global handler for when you can't explicitly call load_with_extra
+ (because it's not your code making the torch.load call). The intended use is like this:
+
+```
+import torch
+from modules import safe
+
+def handler(module, name):
+ if module == 'torch' and name in ['float64', 'float16']:
+ return getattr(torch, name)
+
+ return None
+
+with safe.Extra(handler):
+ x = torch.load('model.pt')
+```
+ """
+
+ def __init__(self, handler):
+ self.handler = handler
+
+ def __enter__(self):
+ global global_extra_handler
+
+ assert global_extra_handler is None, 'already inside an Extra() block'
+ global_extra_handler = self.handler
+
+ def __exit__(self, exc_type, exc_val, exc_tb):
+ global global_extra_handler
+
+ global_extra_handler = None
+
+
unsafe_torch_load = torch.load
torch.load = load
+global_extra_handler = None
+
--
cgit v1.2.1
From 5be9387b230794a8c771120577cb213490c940c0 Mon Sep 17 00:00:00 2001
From: Philpax
Date: Sun, 25 Dec 2022 21:45:44 +1100
Subject: fix(api): only begin/end state in lock
---
modules/api/api.py | 12 ++++--------
1 file changed, 4 insertions(+), 8 deletions(-)
(limited to 'modules')
diff --git a/modules/api/api.py b/modules/api/api.py
index 1ceba75d..59b81c93 100644
--- a/modules/api/api.py
+++ b/modules/api/api.py
@@ -130,14 +130,12 @@ class Api:
if populate.sampler_name:
populate.sampler_index = None # prevent a warning later on
p = StableDiffusionProcessingTxt2Img(**vars(populate))
- # Override object param
-
- shared.state.begin()
with self.queue_lock:
+ shared.state.begin()
processed = process_images(p)
+ shared.state.end()
- shared.state.end()
b64images = list(map(encode_pil_to_base64, processed.images))
@@ -169,12 +167,10 @@ class Api:
p.init_images = [decode_base64_to_image(x) for x in init_images]
- shared.state.begin()
-
with self.queue_lock:
+ shared.state.begin()
processed = process_images(p)
-
- shared.state.end()
+ shared.state.end()
b64images = list(map(encode_pil_to_base64, processed.images))
--
cgit v1.2.1
From 893933e05ad267778111b4fad6d1ecb80937afdf Mon Sep 17 00:00:00 2001
From: hitomi
Date: Sun, 25 Dec 2022 20:49:25 +0800
Subject: Add memory cache for VAE weights
---
modules/sd_vae.py | 31 +++++++++++++++++++++++++------
modules/shared.py | 1 +
2 files changed, 26 insertions(+), 6 deletions(-)
(limited to 'modules')
diff --git a/modules/sd_vae.py b/modules/sd_vae.py
index 3856418e..ac71d62d 100644
--- a/modules/sd_vae.py
+++ b/modules/sd_vae.py
@@ -1,5 +1,6 @@
import torch
import os
+import collections
from collections import namedtuple
from modules import shared, devices, script_callbacks
from modules.paths import models_path
@@ -30,6 +31,7 @@ base_vae = None
loaded_vae_file = None
checkpoint_info = None
+checkpoints_loaded = collections.OrderedDict()
def get_base_vae(model):
if base_vae is not None and checkpoint_info == model.sd_checkpoint_info and model:
@@ -149,13 +151,30 @@ def load_vae(model, vae_file=None):
global first_load, vae_dict, vae_list, loaded_vae_file
# save_settings = False
+ cache_enabled = shared.opts.sd_vae_checkpoint_cache > 0
+
if vae_file:
- assert os.path.isfile(vae_file), f"VAE file doesn't exist: {vae_file}"
- print(f"Loading VAE weights from: {vae_file}")
- store_base_vae(model)
- vae_ckpt = torch.load(vae_file, map_location=shared.weight_load_location)
- vae_dict_1 = {k: v for k, v in vae_ckpt["state_dict"].items() if k[0:4] != "loss" and k not in vae_ignore_keys}
- _load_vae_dict(model, vae_dict_1)
+ if cache_enabled and vae_file in checkpoints_loaded:
+ # use vae checkpoint cache
+ print(f"Loading VAE weights [{get_filename(vae_file)}] from cache")
+ store_base_vae(model)
+ _load_vae_dict(model, checkpoints_loaded[vae_file])
+ else:
+ assert os.path.isfile(vae_file), f"VAE file doesn't exist: {vae_file}"
+ print(f"Loading VAE weights from: {vae_file}")
+ store_base_vae(model)
+ vae_ckpt = torch.load(vae_file, map_location=shared.weight_load_location)
+ vae_dict_1 = {k: v for k, v in vae_ckpt["state_dict"].items() if k[0:4] != "loss" and k not in vae_ignore_keys}
+ _load_vae_dict(model, vae_dict_1)
+
+ if cache_enabled:
+ # cache newly loaded vae
+ checkpoints_loaded[vae_file] = vae_dict_1.copy()
+
+ # clean up cache if limit is reached
+ if cache_enabled:
+ while len(checkpoints_loaded) > shared.opts.sd_vae_checkpoint_cache + 1: # we need to count the current model
+ checkpoints_loaded.popitem(last=False) # LRU
# If vae used is not in dict, update it
# It will be removed on refresh though
diff --git a/modules/shared.py b/modules/shared.py
index d4ddeea0..671d30e1 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -356,6 +356,7 @@ options_templates.update(options_section(('training', "Training"), {
options_templates.update(options_section(('sd', "Stable Diffusion"), {
"sd_model_checkpoint": OptionInfo(None, "Stable Diffusion checkpoint", gr.Dropdown, lambda: {"choices": list_checkpoint_tiles()}, refresh=refresh_checkpoints),
"sd_checkpoint_cache": OptionInfo(0, "Checkpoints to cache in RAM", gr.Slider, {"minimum": 0, "maximum": 10, "step": 1}),
+ "sd_vae_checkpoint_cache": OptionInfo(0, "VAE Checkpoints to cache in RAM", gr.Slider, {"minimum": 0, "maximum": 10, "step": 1}),
"sd_vae": OptionInfo("auto", "SD VAE", gr.Dropdown, lambda: {"choices": sd_vae.vae_list}, refresh=sd_vae.refresh_vae_list),
"sd_vae_as_default": OptionInfo(False, "Ignore selected VAE for stable diffusion checkpoints that have their own .vae.pt next to them"),
"sd_hypernetwork": OptionInfo("None", "Hypernetwork", gr.Dropdown, lambda: {"choices": ["None"] + [x for x in hypernetworks.keys()]}, refresh=reload_hypernetworks),
--
cgit v1.2.1
From 4af3ca5393151d61363c30eef4965e694eeac15e Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Mon, 26 Dec 2022 10:11:28 +0300
Subject: make it so that blank ENSD does not break image generation
---
modules/processing.py | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
(limited to 'modules')
diff --git a/modules/processing.py b/modules/processing.py
index 4a406084..0a9a8f95 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -338,13 +338,14 @@ def slerp(val, low, high):
def create_random_tensors(shape, seeds, subseeds=None, subseed_strength=0.0, seed_resize_from_h=0, seed_resize_from_w=0, p=None):
+ eta_noise_seed_delta = opts.eta_noise_seed_delta or 0
xs = []
# if we have multiple seeds, this means we are working with batch size>1; this then
# enables the generation of additional tensors with noise that the sampler will use during its processing.
# Using those pre-generated tensors instead of simple torch.randn allows a batch with seeds [100, 101] to
# produce the same images as with two batches [100], [101].
- if p is not None and p.sampler is not None and (len(seeds) > 1 and opts.enable_batch_seeds or opts.eta_noise_seed_delta > 0):
+ if p is not None and p.sampler is not None and (len(seeds) > 1 and opts.enable_batch_seeds or eta_noise_seed_delta > 0):
sampler_noises = [[] for _ in range(p.sampler.number_of_needed_noises(p))]
else:
sampler_noises = None
@@ -384,8 +385,8 @@ def create_random_tensors(shape, seeds, subseeds=None, subseed_strength=0.0, see
if sampler_noises is not None:
cnt = p.sampler.number_of_needed_noises(p)
- if opts.eta_noise_seed_delta > 0:
- torch.manual_seed(seed + opts.eta_noise_seed_delta)
+ if eta_noise_seed_delta > 0:
+ torch.manual_seed(seed + eta_noise_seed_delta)
for j in range(cnt):
sampler_noises[j].append(devices.randn_without_seed(tuple(noise_shape)))
--
cgit v1.2.1
From ae955b0146a52ea2474c79655ede0d361829ef63 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Mon, 26 Dec 2022 09:53:26 -0500
Subject: fix rgba to rgb when using jpeg output
---
modules/images.py | 3 +++
1 file changed, 3 insertions(+)
(limited to 'modules')
diff --git a/modules/images.py b/modules/images.py
index 31d4528d..962a955d 100644
--- a/modules/images.py
+++ b/modules/images.py
@@ -525,6 +525,9 @@ def save_image(image, path, basename, seed=None, prompt=None, extension='png', i
image_to_save.save(temp_file_path, format=image_format, quality=opts.jpeg_quality, pnginfo=pnginfo_data)
elif extension.lower() in (".jpg", ".jpeg", ".webp"):
+ if image_to_save.mode == 'RGBA':
+ image_to_save = image_to_save.convert("RGB")
+
image_to_save.save(temp_file_path, format=image_format, quality=opts.jpeg_quality)
if opts.enable_pnginfo and info is not None:
--
cgit v1.2.1
From 5ba04f9ec050a66e918571f07e8863f157f05b44 Mon Sep 17 00:00:00 2001
From: Nicolas Patry
Date: Wed, 21 Dec 2022 13:45:58 +0100
Subject: Attempting to solve slow loads for `safetensors`.
Fixes #5893
---
modules/sd_models.py | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
(limited to 'modules')
diff --git a/modules/sd_models.py b/modules/sd_models.py
index ecdd91c5..cd938656 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -168,7 +168,10 @@ def get_state_dict_from_checkpoint(pl_sd):
def read_state_dict(checkpoint_file, print_global_state=False, map_location=None):
_, extension = os.path.splitext(checkpoint_file)
if extension.lower() == ".safetensors":
- pl_sd = safetensors.torch.load_file(checkpoint_file, device=map_location or shared.weight_load_location)
+ device = map_location or shared.weight_load_location
+ if device is None:
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
+ pl_sd = safetensors.torch.load_file(checkpoint_file, device=device)
else:
pl_sd = torch.load(checkpoint_file, map_location=map_location or shared.weight_load_location)
--
cgit v1.2.1
From 5958bbd244703f7c248a91e86dea5d52acc85505 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Fri, 30 Dec 2022 19:36:36 -0500
Subject: add additional memory states
---
modules/memmon.py | 3 +++
1 file changed, 3 insertions(+)
(limited to 'modules')
diff --git a/modules/memmon.py b/modules/memmon.py
index 9fb9b687..a7060f58 100644
--- a/modules/memmon.py
+++ b/modules/memmon.py
@@ -71,10 +71,13 @@ class MemUsageMonitor(threading.Thread):
def read(self):
if not self.disabled:
free, total = torch.cuda.mem_get_info()
+ self.data["free"] = free
self.data["total"] = total
torch_stats = torch.cuda.memory_stats(self.device)
+ self.data["active"] = torch_stats["active.all.current"]
self.data["active_peak"] = torch_stats["active_bytes.all.peak"]
+ self.data["reserved"] = torch_stats["reserved_bytes.all.current"]
self.data["reserved_peak"] = torch_stats["reserved_bytes.all.peak"]
self.data["system_peak"] = total - self.data["min_free"]
--
cgit v1.2.1
From d3aa2a48e1e896b6ffafda5367200a4bbd46b0d7 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Fri, 30 Dec 2022 19:38:53 -0500
Subject: remove unnecessary console message
---
modules/sd_hijack_inpainting.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
(limited to 'modules')
diff --git a/modules/sd_hijack_inpainting.py b/modules/sd_hijack_inpainting.py
index bb5499b3..06b75772 100644
--- a/modules/sd_hijack_inpainting.py
+++ b/modules/sd_hijack_inpainting.py
@@ -178,7 +178,7 @@ def sample_plms(self,
# sampling
C, H, W = shape
size = (batch_size, C, H, W)
- print(f'Data shape for PLMS sampling is {size}')
+ # print(f'Data shape for PLMS sampling is {size}') # remove unnecessary message
samples, intermediates = self.plms_sampling(conditioning, size,
callback=callback,
--
cgit v1.2.1
From 463048344fc036b262aa132584b65ee6e9fec6cf Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Fri, 30 Dec 2022 19:41:47 -0500
Subject: fix shared state dictionary
---
modules/shared.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
(limited to 'modules')
diff --git a/modules/shared.py b/modules/shared.py
index d4ddeea0..9a13fb60 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -168,7 +168,7 @@ class State:
def dict(self):
obj = {
"skipped": self.skipped,
- "interrupted": self.skipped,
+ "interrupted": self.interrupted,
"job": self.job,
"job_count": self.job_count,
"job_no": self.job_no,
--
cgit v1.2.1
From fef98723b2b1c7a9893ead41bbefcb36192babd6 Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Sat, 31 Dec 2022 12:44:26 +0300
Subject: set sd_model for API later, inside the lock, to prevent multiple
requests with different models ending up with incorrect results #5877 #6012
---
modules/api/api.py | 11 +++++------
1 file changed, 5 insertions(+), 6 deletions(-)
(limited to 'modules')
diff --git a/modules/api/api.py b/modules/api/api.py
index 59b81c93..11daff0d 100644
--- a/modules/api/api.py
+++ b/modules/api/api.py
@@ -121,7 +121,6 @@ class Api:
def text2imgapi(self, txt2imgreq: StableDiffusionTxt2ImgProcessingAPI):
populate = txt2imgreq.copy(update={ # Override __init__ params
- "sd_model": shared.sd_model,
"sampler_name": validate_sampler_name(txt2imgreq.sampler_name or txt2imgreq.sampler_index),
"do_not_save_samples": True,
"do_not_save_grid": True
@@ -129,9 +128,10 @@ class Api:
)
if populate.sampler_name:
populate.sampler_index = None # prevent a warning later on
- p = StableDiffusionProcessingTxt2Img(**vars(populate))
with self.queue_lock:
+ p = StableDiffusionProcessingTxt2Img(sd_model=shared.sd_model, **vars(populate))
+
shared.state.begin()
processed = process_images(p)
shared.state.end()
@@ -151,7 +151,6 @@ class Api:
mask = decode_base64_to_image(mask)
populate = img2imgreq.copy(update={ # Override __init__ params
- "sd_model": shared.sd_model,
"sampler_name": validate_sampler_name(img2imgreq.sampler_name or img2imgreq.sampler_index),
"do_not_save_samples": True,
"do_not_save_grid": True,
@@ -163,11 +162,11 @@ class Api:
args = vars(populate)
args.pop('include_init_images', None) # this is meant to be done by "exclude": True in model, but it's for a reason that I cannot determine.
- p = StableDiffusionProcessingImg2Img(**args)
-
- p.init_images = [decode_base64_to_image(x) for x in init_images]
with self.queue_lock:
+ p = StableDiffusionProcessingImg2Img(sd_model=shared.sd_model, **args)
+ p.init_images = [decode_base64_to_image(x) for x in init_images]
+
shared.state.begin()
processed = process_images(p)
shared.state.end()
--
cgit v1.2.1
From 65be1df7bb55b21a3d76630a397c820218cbd12a Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Sat, 31 Dec 2022 07:46:04 -0500
Subject: initialize result so not to cause exception on empty results
---
modules/interrogate.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
(limited to 'modules')
diff --git a/modules/interrogate.py b/modules/interrogate.py
index 46935210..6f761c5a 100644
--- a/modules/interrogate.py
+++ b/modules/interrogate.py
@@ -135,7 +135,7 @@ class InterrogateModels:
return caption[0]
def interrogate(self, pil_image):
- res = None
+ res = ""
try:
--
cgit v1.2.1
From f34c7341720fb2059992926c9f9ae6ff25f7385b Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Sat, 31 Dec 2022 18:06:35 +0300
Subject: alt-diffusion integration
---
modules/sd_hijack.py | 18 ++++++++++--------
modules/sd_hijack_clip.py | 14 +++++---------
modules/sd_hijack_xlmr.py | 34 ++++++++++++++++++++++++++++++++++
modules/shared.py | 10 +---------
4 files changed, 50 insertions(+), 26 deletions(-)
create mode 100644 modules/sd_hijack_xlmr.py
(limited to 'modules')
diff --git a/modules/sd_hijack.py b/modules/sd_hijack.py
index bce23b03..edcbaf52 100644
--- a/modules/sd_hijack.py
+++ b/modules/sd_hijack.py
@@ -5,7 +5,7 @@ import modules.textual_inversion.textual_inversion
from modules import devices, sd_hijack_optimizations, shared, sd_hijack_checkpoint
from modules.hypernetworks import hypernetwork
from modules.shared import cmd_opts
-from modules import sd_hijack_clip, sd_hijack_open_clip, sd_hijack_unet
+from modules import sd_hijack_clip, sd_hijack_open_clip, sd_hijack_unet, sd_hijack_xlmr, xlmr
from modules.sd_hijack_optimizations import invokeAI_mps_available
@@ -68,6 +68,7 @@ def fix_checkpoint():
ldm.modules.diffusionmodules.openaimodel.ResBlock.forward = sd_hijack_checkpoint.ResBlock_forward
ldm.modules.diffusionmodules.openaimodel.AttentionBlock.forward = sd_hijack_checkpoint.AttentionBlock_forward
+
class StableDiffusionModelHijack:
fixes = None
comments = []
@@ -79,21 +80,22 @@ class StableDiffusionModelHijack:
def hijack(self, m):
- if shared.text_model_name == "XLMR-Large":
+ if type(m.cond_stage_model) == xlmr.BertSeriesModelWithTransformation:
model_embeddings = m.cond_stage_model.roberta.embeddings
model_embeddings.token_embedding = EmbeddingsWithFixes(model_embeddings.word_embeddings, self)
- m.cond_stage_model = sd_hijack_clip.FrozenCLIPEmbedderWithCustomWords(m.cond_stage_model, self)
-
+ m.cond_stage_model = sd_hijack_xlmr.FrozenXLMREmbedderWithCustomWords(m.cond_stage_model, self)
+
elif type(m.cond_stage_model) == ldm.modules.encoders.modules.FrozenCLIPEmbedder:
model_embeddings = m.cond_stage_model.transformer.text_model.embeddings
model_embeddings.token_embedding = EmbeddingsWithFixes(model_embeddings.token_embedding, self)
m.cond_stage_model = sd_hijack_clip.FrozenCLIPEmbedderWithCustomWords(m.cond_stage_model, self)
- apply_optimizations()
+
elif type(m.cond_stage_model) == ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder:
m.cond_stage_model.model.token_embedding = EmbeddingsWithFixes(m.cond_stage_model.model.token_embedding, self)
m.cond_stage_model = sd_hijack_open_clip.FrozenOpenCLIPEmbedderWithCustomWords(m.cond_stage_model, self)
- apply_optimizations()
-
+
+ apply_optimizations()
+
self.clip = m.cond_stage_model
fix_checkpoint()
@@ -109,7 +111,7 @@ class StableDiffusionModelHijack:
def undo_hijack(self, m):
- if shared.text_model_name == "XLMR-Large":
+ if type(m.cond_stage_model) == xlmr.BertSeriesModelWithTransformation:
m.cond_stage_model = m.cond_stage_model.wrapped
elif type(m.cond_stage_model) == sd_hijack_clip.FrozenCLIPEmbedderWithCustomWords:
diff --git a/modules/sd_hijack_clip.py b/modules/sd_hijack_clip.py
index 9ea6e1ce..6ec50cca 100644
--- a/modules/sd_hijack_clip.py
+++ b/modules/sd_hijack_clip.py
@@ -4,7 +4,6 @@ import torch
from modules import prompt_parser, devices
from modules.shared import opts
-import modules.shared as shared
def get_target_prompt_token_count(token_count):
return math.ceil(max(token_count, 1) / 75) * 75
@@ -177,9 +176,6 @@ class FrozenCLIPEmbedderWithCustomWordsBase(torch.nn.Module):
return batch_multipliers, remade_batch_tokens, used_custom_terms, hijack_comments, hijack_fixes, token_count
def forward(self, text):
- if shared.text_model_name == "XLMR-Large":
- return self.wrapped.encode(text)
-
use_old = opts.use_old_emphasis_implementation
if use_old:
batch_multipliers, remade_batch_tokens, used_custom_terms, hijack_comments, hijack_fixes, token_count = self.process_text_old(text)
@@ -257,13 +253,13 @@ class FrozenCLIPEmbedderWithCustomWords(FrozenCLIPEmbedderWithCustomWordsBase):
def __init__(self, wrapped, hijack):
super().__init__(wrapped, hijack)
self.tokenizer = wrapped.tokenizer
- if shared.text_model_name == "XLMR-Large":
- self.comma_token = None
- else :
- self.comma_token = [v for k, v in self.tokenizer.get_vocab().items() if k == ','][0]
+
+ vocab = self.tokenizer.get_vocab()
+
+ self.comma_token = vocab.get(',', None)
self.token_mults = {}
- tokens_with_parens = [(k, v) for k, v in self.tokenizer.get_vocab().items() if '(' in k or ')' in k or '[' in k or ']' in k]
+ tokens_with_parens = [(k, v) for k, v in vocab.items() if '(' in k or ')' in k or '[' in k or ']' in k]
for text, ident in tokens_with_parens:
mult = 1.0
for c in text:
diff --git a/modules/sd_hijack_xlmr.py b/modules/sd_hijack_xlmr.py
new file mode 100644
index 00000000..4ac51c38
--- /dev/null
+++ b/modules/sd_hijack_xlmr.py
@@ -0,0 +1,34 @@
+import open_clip.tokenizer
+import torch
+
+from modules import sd_hijack_clip, devices
+from modules.shared import opts
+
+
+class FrozenXLMREmbedderWithCustomWords(sd_hijack_clip.FrozenCLIPEmbedderWithCustomWords):
+ def __init__(self, wrapped, hijack):
+ super().__init__(wrapped, hijack)
+
+ self.id_start = wrapped.config.bos_token_id
+ self.id_end = wrapped.config.eos_token_id
+ self.id_pad = wrapped.config.pad_token_id
+
+ self.comma_token = self.tokenizer.get_vocab().get(',', None) # alt diffusion doesn't have bits for comma
+
+ def encode_with_transformers(self, tokens):
+ # there's no CLIP Skip here because all hidden layers have size of 1024 and the last one uses a
+ # trained layer to transform those 1024 into 768 for unet; so you can't choose which transformer
+ # layer to work with - you have to use the last
+
+ attention_mask = (tokens != self.id_pad).to(device=tokens.device, dtype=torch.int64)
+ features = self.wrapped(input_ids=tokens, attention_mask=attention_mask)
+ z = features['projection_state']
+
+ return z
+
+ def encode_embedding_init_text(self, init_text, nvpt):
+ embedding_layer = self.wrapped.roberta.embeddings
+ ids = self.wrapped.tokenizer(init_text, max_length=nvpt, return_tensors="pt", add_special_tokens=False)["input_ids"]
+ embedded = embedding_layer.token_embedding.wrapped(ids.to(devices.device)).squeeze(0)
+
+ return embedded
diff --git a/modules/shared.py b/modules/shared.py
index 2b31e717..715b9169 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -23,7 +23,7 @@ demo = None
sd_model_file = os.path.join(script_path, 'model.ckpt')
default_sd_model_file = sd_model_file
parser = argparse.ArgumentParser()
-parser.add_argument("--config", type=str, default=os.path.join(script_path, "v1-inference.yaml"), help="path to config which constructs model",)
+parser.add_argument("--config", type=str, default=os.path.join(script_path, "configs/v1-inference.yaml"), help="path to config which constructs model",)
parser.add_argument("--ckpt", type=str, default=sd_model_file, help="path to checkpoint of stable diffusion model; if specified, this checkpoint will be added to the list of checkpoints and loaded",)
parser.add_argument("--ckpt-dir", type=str, default=None, help="Path to directory with stable diffusion checkpoints")
parser.add_argument("--gfpgan-dir", type=str, help="GFPGAN directory", default=('./src/gfpgan' if os.path.exists('./src/gfpgan') else './GFPGAN'))
@@ -108,14 +108,6 @@ restricted_opts = {
"outdir_txt2img_grids",
"outdir_save",
}
-from omegaconf import OmegaConf
-config = OmegaConf.load(f"{cmd_opts.config}")
-# XLMR-Large
-try:
- text_model_name = config.model.params.cond_stage_config.params.name
-
-except :
- text_model_name = "stable_diffusion"
cmd_opts.disable_extension_access = (cmd_opts.share or cmd_opts.listen or cmd_opts.server_name) and not cmd_opts.enable_insecure_extension_access
--
cgit v1.2.1
From f55ac33d446185680604e872ceda2ae858821d5c Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Sat, 31 Dec 2022 11:27:02 -0500
Subject: validate textual inversion embeddings
---
modules/sd_models.py | 3 ++
modules/textual_inversion/textual_inversion.py | 43 +++++++++++++++++++++++---
modules/ui.py | 2 --
3 files changed, 41 insertions(+), 7 deletions(-)
(limited to 'modules')
diff --git a/modules/sd_models.py b/modules/sd_models.py
index ecdd91c5..ebd4dff7 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -325,6 +325,9 @@ def load_model(checkpoint_info=None):
script_callbacks.model_loaded_callback(sd_model)
print("Model loaded.")
+
+ sd_hijack.model_hijack.embedding_db.load_textual_inversion_embeddings(force_reload = True) # Reload embeddings after model load as they may or may not fit the model
+
return sd_model
diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py
index f6112578..103ace60 100644
--- a/modules/textual_inversion/textual_inversion.py
+++ b/modules/textual_inversion/textual_inversion.py
@@ -23,6 +23,8 @@ class Embedding:
self.vec = vec
self.name = name
self.step = step
+ self.shape = None
+ self.vectors = 0
self.cached_checksum = None
self.sd_checkpoint = None
self.sd_checkpoint_name = None
@@ -57,8 +59,10 @@ class EmbeddingDatabase:
def __init__(self, embeddings_dir):
self.ids_lookup = {}
self.word_embeddings = {}
+ self.skipped_embeddings = []
self.dir_mtime = None
self.embeddings_dir = embeddings_dir
+ self.expected_shape = -1
def register_embedding(self, embedding, model):
@@ -75,14 +79,35 @@ class EmbeddingDatabase:
return embedding
- def load_textual_inversion_embeddings(self):
+ def get_expected_shape(self):
+ expected_shape = -1 # initialize with unknown
+ idx = torch.tensor(0).to(shared.device)
+ if expected_shape == -1:
+ try: # matches sd15 signature
+ first_embedding = shared.sd_model.cond_stage_model.wrapped.transformer.text_model.embeddings.token_embedding.wrapped(idx)
+ expected_shape = first_embedding.shape[0]
+ except:
+ pass
+ if expected_shape == -1:
+ try: # matches sd20 signature
+ first_embedding = shared.sd_model.cond_stage_model.wrapped.model.token_embedding.wrapped(idx)
+ expected_shape = first_embedding.shape[0]
+ except:
+ pass
+ if expected_shape == -1:
+ print('Could not determine expected embeddings shape from model')
+ return expected_shape
+
+ def load_textual_inversion_embeddings(self, force_reload = False):
mt = os.path.getmtime(self.embeddings_dir)
- if self.dir_mtime is not None and mt <= self.dir_mtime:
+ if not force_reload and self.dir_mtime is not None and mt <= self.dir_mtime:
return
self.dir_mtime = mt
self.ids_lookup.clear()
self.word_embeddings.clear()
+ self.skipped_embeddings = []
+ self.expected_shape = self.get_expected_shape()
def process_file(path, filename):
name = os.path.splitext(filename)[0]
@@ -122,7 +147,14 @@ class EmbeddingDatabase:
embedding.step = data.get('step', None)
embedding.sd_checkpoint = data.get('sd_checkpoint', None)
embedding.sd_checkpoint_name = data.get('sd_checkpoint_name', None)
- self.register_embedding(embedding, shared.sd_model)
+ embedding.vectors = vec.shape[0]
+ embedding.shape = vec.shape[-1]
+
+ if (self.expected_shape == -1) or (self.expected_shape == embedding.shape):
+ self.register_embedding(embedding, shared.sd_model)
+ else:
+ self.skipped_embeddings.append(name)
+ # print('Skipping embedding {name}: shape was {shape} expected {expected}'.format(name = name, shape = embedding.shape, expected = self.expected_shape))
for fn in os.listdir(self.embeddings_dir):
try:
@@ -137,8 +169,9 @@ class EmbeddingDatabase:
print(traceback.format_exc(), file=sys.stderr)
continue
- print(f"Loaded a total of {len(self.word_embeddings)} textual inversion embeddings.")
- print("Embeddings:", ', '.join(self.word_embeddings.keys()))
+ print("Textual inversion embeddings {num} loaded: {val}".format(num = len(self.word_embeddings), val = ', '.join(self.word_embeddings.keys())))
+ if (len(self.skipped_embeddings) > 0):
+ print("Textual inversion embeddings {num} skipped: {val}".format(num = len(self.skipped_embeddings), val = ', '.join(self.skipped_embeddings)))
def find_embedding_at_position(self, tokens, offset):
token = tokens[offset]
diff --git a/modules/ui.py b/modules/ui.py
index 57ee0465..397dd804 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -1157,8 +1157,6 @@ def create_ui():
with gr.Column(variant='panel'):
submit_result = gr.Textbox(elem_id="modelmerger_result", show_label=False)
- sd_hijack.model_hijack.embedding_db.load_textual_inversion_embeddings()
-
with gr.Blocks(analytics_enabled=False) as train_interface:
with gr.Row().style(equal_height=False):
gr.HTML(value="See wiki for detailed explanation.
")
--
cgit v1.2.1
From bdbe09827b39be63c9c0b3636132ca58da38ebf6 Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Sat, 31 Dec 2022 22:49:09 +0300
Subject: changed embedding accepted shape detection to use existing code and
support the new alt-diffusion model, and reformatted messages a bit #6149
---
modules/textual_inversion/textual_inversion.py | 30 ++++++--------------------
1 file changed, 6 insertions(+), 24 deletions(-)
(limited to 'modules')
diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py
index 103ace60..66f40367 100644
--- a/modules/textual_inversion/textual_inversion.py
+++ b/modules/textual_inversion/textual_inversion.py
@@ -80,23 +80,8 @@ class EmbeddingDatabase:
return embedding
def get_expected_shape(self):
- expected_shape = -1 # initialize with unknown
- idx = torch.tensor(0).to(shared.device)
- if expected_shape == -1:
- try: # matches sd15 signature
- first_embedding = shared.sd_model.cond_stage_model.wrapped.transformer.text_model.embeddings.token_embedding.wrapped(idx)
- expected_shape = first_embedding.shape[0]
- except:
- pass
- if expected_shape == -1:
- try: # matches sd20 signature
- first_embedding = shared.sd_model.cond_stage_model.wrapped.model.token_embedding.wrapped(idx)
- expected_shape = first_embedding.shape[0]
- except:
- pass
- if expected_shape == -1:
- print('Could not determine expected embeddings shape from model')
- return expected_shape
+ vec = shared.sd_model.cond_stage_model.encode_embedding_init_text(",", 1)
+ return vec.shape[1]
def load_textual_inversion_embeddings(self, force_reload = False):
mt = os.path.getmtime(self.embeddings_dir)
@@ -112,8 +97,6 @@ class EmbeddingDatabase:
def process_file(path, filename):
name = os.path.splitext(filename)[0]
- data = []
-
if os.path.splitext(filename.upper())[-1] in ['.PNG', '.WEBP', '.JXL', '.AVIF']:
embed_image = Image.open(path)
if hasattr(embed_image, 'text') and 'sd-ti-embedding' in embed_image.text:
@@ -150,11 +133,10 @@ class EmbeddingDatabase:
embedding.vectors = vec.shape[0]
embedding.shape = vec.shape[-1]
- if (self.expected_shape == -1) or (self.expected_shape == embedding.shape):
+ if self.expected_shape == -1 or self.expected_shape == embedding.shape:
self.register_embedding(embedding, shared.sd_model)
else:
self.skipped_embeddings.append(name)
- # print('Skipping embedding {name}: shape was {shape} expected {expected}'.format(name = name, shape = embedding.shape, expected = self.expected_shape))
for fn in os.listdir(self.embeddings_dir):
try:
@@ -169,9 +151,9 @@ class EmbeddingDatabase:
print(traceback.format_exc(), file=sys.stderr)
continue
- print("Textual inversion embeddings {num} loaded: {val}".format(num = len(self.word_embeddings), val = ', '.join(self.word_embeddings.keys())))
- if (len(self.skipped_embeddings) > 0):
- print("Textual inversion embeddings {num} skipped: {val}".format(num = len(self.skipped_embeddings), val = ', '.join(self.skipped_embeddings)))
+ print(f"Textual inversion embeddings loaded({len(self.word_embeddings)}): {', '.join(self.word_embeddings.keys())}")
+ if len(self.skipped_embeddings) > 0:
+ print(f"Textual inversion embeddings skipped({len(self.skipped_embeddings)}): {', '.join(self.skipped_embeddings)}")
def find_embedding_at_position(self, tokens, offset):
token = tokens[offset]
--
cgit v1.2.1
From f4535f6e4f001314bd155bc6e1b6908e02792b9a Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Sat, 31 Dec 2022 23:40:55 +0300
Subject: make it so that memory/embeddings info is displayed in a separate UI
element from generation parameters, and is preserved when you change the
displayed infotext by clicking on gallery images
---
modules/img2img.py | 2 +-
modules/processing.py | 5 +++--
modules/txt2img.py | 2 +-
modules/ui.py | 31 +++++++++++++++++--------------
4 files changed, 22 insertions(+), 18 deletions(-)
(limited to 'modules')
diff --git a/modules/img2img.py b/modules/img2img.py
index 81da4b13..ca58b5d8 100644
--- a/modules/img2img.py
+++ b/modules/img2img.py
@@ -162,4 +162,4 @@ def img2img(mode: int, prompt: str, negative_prompt: str, prompt_style: str, pro
if opts.do_not_show_images:
processed.images = []
- return processed.images, generation_info_js, plaintext_to_html(processed.info)
+ return processed.images, generation_info_js, plaintext_to_html(processed.info), plaintext_to_html(processed.comments)
diff --git a/modules/processing.py b/modules/processing.py
index 0a9a8f95..42dc19ea 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -239,7 +239,7 @@ class StableDiffusionProcessing():
class Processed:
- def __init__(self, p: StableDiffusionProcessing, images_list, seed=-1, info="", subseed=None, all_prompts=None, all_negative_prompts=None, all_seeds=None, all_subseeds=None, index_of_first_image=0, infotexts=None):
+ def __init__(self, p: StableDiffusionProcessing, images_list, seed=-1, info="", subseed=None, all_prompts=None, all_negative_prompts=None, all_seeds=None, all_subseeds=None, index_of_first_image=0, infotexts=None, comments=""):
self.images = images_list
self.prompt = p.prompt
self.negative_prompt = p.negative_prompt
@@ -247,6 +247,7 @@ class Processed:
self.subseed = subseed
self.subseed_strength = p.subseed_strength
self.info = info
+ self.comments = comments
self.width = p.width
self.height = p.height
self.sampler_name = p.sampler_name
@@ -646,7 +647,7 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
devices.torch_gc()
- res = Processed(p, output_images, p.all_seeds[0], infotext() + "".join(["\n\n" + x for x in comments]), subseed=p.all_subseeds[0], index_of_first_image=index_of_first_image, infotexts=infotexts)
+ res = Processed(p, output_images, p.all_seeds[0], infotext(), comments="".join(["\n\n" + x for x in comments]), subseed=p.all_subseeds[0], index_of_first_image=index_of_first_image, infotexts=infotexts)
if p.scripts is not None:
p.scripts.postprocess(p, res)
diff --git a/modules/txt2img.py b/modules/txt2img.py
index c8f81176..7f61e19a 100644
--- a/modules/txt2img.py
+++ b/modules/txt2img.py
@@ -59,4 +59,4 @@ def txt2img(prompt: str, negative_prompt: str, prompt_style: str, prompt_style2:
if opts.do_not_show_images:
processed.images = []
- return processed.images, generation_info_js, plaintext_to_html(processed.info)
+ return processed.images, generation_info_js, plaintext_to_html(processed.info), plaintext_to_html(processed.comments)
diff --git a/modules/ui.py b/modules/ui.py
index 397dd804..f550ad00 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -159,7 +159,7 @@ def save_files(js_data, images, do_make_zip, index):
zip_file.writestr(filenames[i], f.read())
fullfns.insert(0, zip_filepath)
- return gr.File.update(value=fullfns, visible=True), '', '', plaintext_to_html(f"Saved: {filenames[0]}")
+ return gr.File.update(value=fullfns, visible=True), plaintext_to_html(f"Saved: {filenames[0]}")
@@ -593,6 +593,8 @@ Requested path was: {f}
with gr.Group():
html_info = gr.HTML()
+ html_log = gr.HTML()
+
generation_info = gr.Textbox(visible=False)
if tabname == 'txt2img' or tabname == 'img2img':
generation_info_button = gr.Button(visible=False, elem_id=f"{tabname}_generation_info_button")
@@ -615,16 +617,16 @@ Requested path was: {f}
],
outputs=[
download_files,
- html_info,
- html_info,
- html_info,
+ html_log,
]
)
else:
html_info_x = gr.HTML()
html_info = gr.HTML()
+ html_log = gr.HTML()
+
parameters_copypaste.bind_buttons(buttons, result_gallery, "txt2img" if tabname == "txt2img" else None)
- return result_gallery, generation_info if tabname != "extras" else html_info_x, html_info
+ return result_gallery, generation_info if tabname != "extras" else html_info_x, html_info, html_log
def create_ui():
@@ -686,14 +688,14 @@ def create_ui():
with gr.Group():
custom_inputs = modules.scripts.scripts_txt2img.setup_ui()
- txt2img_gallery, generation_info, html_info = create_output_panel("txt2img", opts.outdir_txt2img_samples)
+ txt2img_gallery, generation_info, html_info, html_log = create_output_panel("txt2img", opts.outdir_txt2img_samples)
parameters_copypaste.bind_buttons({"txt2img": txt2img_paste}, None, txt2img_prompt)
connect_reuse_seed(seed, reuse_seed, generation_info, dummy_component, is_subseed=False)
connect_reuse_seed(subseed, reuse_subseed, generation_info, dummy_component, is_subseed=True)
txt2img_args = dict(
- fn=wrap_gradio_gpu_call(modules.txt2img.txt2img),
+ fn=wrap_gradio_gpu_call(modules.txt2img.txt2img, extra_outputs=[None, '', '']),
_js="submit",
inputs=[
txt2img_prompt,
@@ -720,7 +722,8 @@ def create_ui():
outputs=[
txt2img_gallery,
generation_info,
- html_info
+ html_info,
+ html_log,
],
show_progress=False,
)
@@ -799,7 +802,6 @@ def create_ui():
with gr.Blocks(analytics_enabled=False) as img2img_interface:
img2img_prompt, roll, img2img_prompt_style, img2img_negative_prompt, img2img_prompt_style2, submit, img2img_interrogate, img2img_deepbooru, img2img_prompt_style_apply, img2img_save_style, img2img_paste,token_counter, token_button = create_toprow(is_img2img=True)
-
with gr.Row(elem_id='img2img_progress_row'):
img2img_prompt_img = gr.File(label="", elem_id="img2img_prompt_image", file_count="single", type="bytes", visible=False)
@@ -883,7 +885,7 @@ def create_ui():
with gr.Group():
custom_inputs = modules.scripts.scripts_img2img.setup_ui()
- img2img_gallery, generation_info, html_info = create_output_panel("img2img", opts.outdir_img2img_samples)
+ img2img_gallery, generation_info, html_info, html_log = create_output_panel("img2img", opts.outdir_img2img_samples)
parameters_copypaste.bind_buttons({"img2img": img2img_paste}, None, img2img_prompt)
connect_reuse_seed(seed, reuse_seed, generation_info, dummy_component, is_subseed=False)
@@ -915,7 +917,7 @@ def create_ui():
)
img2img_args = dict(
- fn=wrap_gradio_gpu_call(modules.img2img.img2img),
+ fn=wrap_gradio_gpu_call(modules.img2img.img2img, extra_outputs=[None, '', '']),
_js="submit_img2img",
inputs=[
dummy_component,
@@ -954,7 +956,8 @@ def create_ui():
outputs=[
img2img_gallery,
generation_info,
- html_info
+ html_info,
+ html_log,
],
show_progress=False,
)
@@ -1078,10 +1081,10 @@ def create_ui():
with gr.Group():
upscale_before_face_fix = gr.Checkbox(label='Upscale Before Restoring Faces', value=False)
- result_images, html_info_x, html_info = create_output_panel("extras", opts.outdir_extras_samples)
+ result_images, html_info_x, html_info, html_log = create_output_panel("extras", opts.outdir_extras_samples)
submit.click(
- fn=wrap_gradio_gpu_call(modules.extras.run_extras),
+ fn=wrap_gradio_gpu_call(modules.extras.run_extras, extra_outputs=[None, '']),
_js="get_extras_tab_index",
inputs=[
dummy_component,
--
cgit v1.2.1
From 360feed9b55fb03060c236773867b08b4265645d Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Sun, 1 Jan 2023 00:38:58 +0300
Subject: HAPPY NEW YEAR
make save to zip into its own button instead of a checkbox
---
modules/ui.py | 30 ++++++++++++++++++++++--------
1 file changed, 22 insertions(+), 8 deletions(-)
(limited to 'modules')
diff --git a/modules/ui.py b/modules/ui.py
index f550ad00..279b5110 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -570,13 +570,14 @@ Requested path was: {f}
generation_info = None
with gr.Column():
- with gr.Row():
+ with gr.Row(elem_id=f"image_buttons_{tabname}"):
+ open_folder_button = gr.Button(folder_symbol, elem_id="hidden_element" if shared.cmd_opts.hide_ui_dir_config else 'open_folder')
+
if tabname != "extras":
save = gr.Button('Save', elem_id=f'save_{tabname}')
+ save_zip = gr.Button('Zip', elem_id=f'save_zip_{tabname}')
buttons = parameters_copypaste.create_buttons(["img2img", "inpaint", "extras"])
- button_id = "hidden_element" if shared.cmd_opts.hide_ui_dir_config else 'open_folder'
- open_folder_button = gr.Button(folder_symbol, elem_id=button_id)
open_folder_button.click(
fn=lambda: open_folder(opts.outdir_samples or outdir),
@@ -585,9 +586,6 @@ Requested path was: {f}
)
if tabname != "extras":
- with gr.Row():
- do_make_zip = gr.Checkbox(label="Make Zip when Save?", value=False)
-
with gr.Row():
download_files = gr.File(None, file_count="multiple", interactive=False, show_label=False, visible=False)
@@ -608,11 +606,11 @@ Requested path was: {f}
save.click(
fn=wrap_gradio_call(save_files),
- _js="(x, y, z, w) => [x, y, z, selected_gallery_index()]",
+ _js="(x, y, z, w) => [x, y, false, selected_gallery_index()]",
inputs=[
generation_info,
result_gallery,
- do_make_zip,
+ html_info,
html_info,
],
outputs=[
@@ -620,6 +618,22 @@ Requested path was: {f}
html_log,
]
)
+
+ save_zip.click(
+ fn=wrap_gradio_call(save_files),
+ _js="(x, y, z, w) => [x, y, true, selected_gallery_index()]",
+ inputs=[
+ generation_info,
+ result_gallery,
+ html_info,
+ html_info,
+ ],
+ outputs=[
+ download_files,
+ html_log,
+ ]
+ )
+
else:
html_info_x = gr.HTML()
html_info = gr.HTML()
--
cgit v1.2.1
From 29a3a7eb13478297bc7093971b48827ab8246f45 Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Sun, 1 Jan 2023 01:19:10 +0300
Subject: show sampler selection in dropdown, add option selection to revert to
old radio group
---
modules/shared.py | 1 +
modules/ui.py | 22 +++++++++++++++-------
2 files changed, 16 insertions(+), 7 deletions(-)
(limited to 'modules')
diff --git a/modules/shared.py b/modules/shared.py
index 715b9169..948b9542 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -406,6 +406,7 @@ options_templates.update(options_section(('ui', "User interface"), {
"js_modal_lightbox": OptionInfo(True, "Enable full page image viewer"),
"js_modal_lightbox_initially_zoomed": OptionInfo(True, "Show images zoomed in by default in full page image viewer"),
"show_progress_in_title": OptionInfo(True, "Show generation progress in window title."),
+ "samplers_in_dropdown": OptionInfo(True, "Use dropdown for sampler selection instead of radio group"),
'quicksettings': OptionInfo("sd_model_checkpoint", "Quicksettings list"),
'localization': OptionInfo("None", "Localization (requires restart)", gr.Dropdown, lambda: {"choices": ["None"] + list(localization.localizations.keys())}, refresh=lambda: localization.list_localizations(cmd_opts.localizations_dir)),
}))
diff --git a/modules/ui.py b/modules/ui.py
index 279b5110..c7b8ea5d 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -643,6 +643,19 @@ Requested path was: {f}
return result_gallery, generation_info if tabname != "extras" else html_info_x, html_info, html_log
+def create_sampler_and_steps_selection(choices, tabname):
+ if opts.samplers_in_dropdown:
+ with gr.Row(elem_id=f"sampler_selection_{tabname}"):
+ sampler_index = gr.Dropdown(label='Sampling method', elem_id=f"{tabname}_sampling", choices=[x.name for x in choices], value=choices[0].name, type="index")
+ steps = gr.Slider(minimum=1, maximum=150, step=1, elem_id=f"{tabname}_steps", label="Sampling Steps", value=20)
+ else:
+ with gr.Group(elem_id=f"sampler_selection_{tabname}"):
+ steps = gr.Slider(minimum=1, maximum=150, step=1, elem_id=f"{tabname}_steps", label="Sampling Steps", value=20)
+ sampler_index = gr.Radio(label='Sampling method', elem_id=f"{tabname}_sampling", choices=[x.name for x in choices], value=choices[0].name, type="index")
+
+ return steps, sampler_index
+
+
def create_ui():
import modules.img2img
import modules.txt2img
@@ -660,9 +673,6 @@ def create_ui():
dummy_component = gr.Label(visible=False)
txt_prompt_img = gr.File(label="", elem_id="txt2img_prompt_image", file_count="single", type="bytes", visible=False)
-
-
-
with gr.Row(elem_id='txt2img_progress_row'):
with gr.Column(scale=1):
pass
@@ -674,8 +684,7 @@ def create_ui():
with gr.Row().style(equal_height=False):
with gr.Column(variant='panel', elem_id="txt2img_settings"):
- steps = gr.Slider(minimum=1, maximum=150, step=1, label="Sampling Steps", value=20)
- sampler_index = gr.Radio(label='Sampling method', elem_id="txt2img_sampling", choices=[x.name for x in samplers], value=samplers[0].name, type="index")
+ steps, sampler_index = create_sampler_and_steps_selection(samplers, "txt2img")
with gr.Group():
width = gr.Slider(minimum=64, maximum=2048, step=8, label="Width", value=512)
@@ -875,8 +884,7 @@ def create_ui():
with gr.Row():
resize_mode = gr.Radio(label="Resize mode", elem_id="resize_mode", show_label=False, choices=["Just resize", "Crop and resize", "Resize and fill", "Just resize (latent upscale)"], type="index", value="Just resize")
- steps = gr.Slider(minimum=1, maximum=150, step=1, label="Sampling Steps", value=20)
- sampler_index = gr.Radio(label='Sampling method', choices=[x.name for x in samplers_for_img2img], value=samplers_for_img2img[0].name, type="index")
+ steps, sampler_index = create_sampler_and_steps_selection(samplers_for_img2img, "img2img")
with gr.Group():
width = gr.Slider(minimum=64, maximum=2048, step=8, label="Width", value=512, elem_id="img2img_width")
--
cgit v1.2.1
From 210449b374d522c94a67fe54289a9eb515933a9f Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Sun, 1 Jan 2023 02:41:15 +0300
Subject: fix 'RuntimeError: Expected all tensors to be on the same device'
error preventing models from loading on lowvram/medvram.
---
modules/sd_hijack_clip.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
(limited to 'modules')
diff --git a/modules/sd_hijack_clip.py b/modules/sd_hijack_clip.py
index 6ec50cca..ca92b142 100644
--- a/modules/sd_hijack_clip.py
+++ b/modules/sd_hijack_clip.py
@@ -298,6 +298,6 @@ class FrozenCLIPEmbedderWithCustomWords(FrozenCLIPEmbedderWithCustomWordsBase):
def encode_embedding_init_text(self, init_text, nvpt):
embedding_layer = self.wrapped.transformer.text_model.embeddings
ids = self.wrapped.tokenizer(init_text, max_length=nvpt, return_tensors="pt", add_special_tokens=False)["input_ids"]
- embedded = embedding_layer.token_embedding.wrapped(ids.to(devices.device)).squeeze(0)
+ embedded = embedding_layer.token_embedding.wrapped(ids.to(embedding_layer.token_embedding.wrapped.weight.device)).squeeze(0)
return embedded
--
cgit v1.2.1
From 16b9661d2741b241c3964fcbd56559c078b84822 Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Sun, 1 Jan 2023 09:51:37 +0300
Subject: change karras scheduler sigmas to values recommended by SD from old
0.1 to 10 with an option to revert to old
---
modules/sd_samplers.py | 4 +++-
modules/shared.py | 6 +++++-
2 files changed, 8 insertions(+), 2 deletions(-)
(limited to 'modules')
diff --git a/modules/sd_samplers.py b/modules/sd_samplers.py
index 177b5338..e904d860 100644
--- a/modules/sd_samplers.py
+++ b/modules/sd_samplers.py
@@ -465,7 +465,9 @@ class KDiffusionSampler:
if p.sampler_noise_scheduler_override:
sigmas = p.sampler_noise_scheduler_override(steps)
elif self.config is not None and self.config.options.get('scheduler', None) == 'karras':
- sigmas = k_diffusion.sampling.get_sigmas_karras(n=steps, sigma_min=0.1, sigma_max=10, device=shared.device)
+ sigma_min, sigma_max = (0.1, 10) if opts.use_old_karras_scheduler_sigmas else (self.model_wrap.sigmas[0].item(), self.model_wrap.sigmas[-1].item())
+
+ sigmas = k_diffusion.sampling.get_sigmas_karras(n=steps, sigma_min=sigma_min, sigma_max=sigma_max, device=shared.device)
else:
sigmas = self.model_wrap.get_sigmas(steps)
diff --git a/modules/shared.py b/modules/shared.py
index 948b9542..7f430b93 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -368,13 +368,17 @@ options_templates.update(options_section(('sd', "Stable Diffusion"), {
"img2img_background_color": OptionInfo("#ffffff", "With img2img, fill image's transparent parts with this color.", gr.ColorPicker, {}),
"enable_quantization": OptionInfo(False, "Enable quantization in K samplers for sharper and cleaner results. This may change existing seeds. Requires restart to apply."),
"enable_emphasis": OptionInfo(True, "Emphasis: use (text) to make model pay more attention to text and [text] to make it pay less attention"),
- "use_old_emphasis_implementation": OptionInfo(False, "Use old emphasis implementation. Can be useful to reproduce old seeds."),
"enable_batch_seeds": OptionInfo(True, "Make K-diffusion samplers produce same images in a batch as when making a single image"),
"comma_padding_backtrack": OptionInfo(20, "Increase coherency by padding from the last comma within n tokens when using more than 75 tokens", gr.Slider, {"minimum": 0, "maximum": 74, "step": 1 }),
'CLIP_stop_at_last_layers': OptionInfo(1, "Clip skip", gr.Slider, {"minimum": 1, "maximum": 12, "step": 1}),
"random_artist_categories": OptionInfo([], "Allowed categories for random artists selection when using the Roll button", gr.CheckboxGroup, {"choices": artist_db.categories()}),
}))
+options_templates.update(options_section(('compatibility', "Compatibility"), {
+ "use_old_emphasis_implementation": OptionInfo(False, "Use old emphasis implementation. Can be useful to reproduce old seeds."),
+ "use_old_karras_scheduler_sigmas": OptionInfo(False, "Use old karras scheduler sigmas (0.1 to 10)."),
+}))
+
options_templates.update(options_section(('interrogate', "Interrogate Options"), {
"interrogate_keep_models_in_memory": OptionInfo(False, "Interrogate: keep models in VRAM"),
"interrogate_use_builtin_artists": OptionInfo(True, "Interrogate: use artists from artists.csv"),
--
cgit v1.2.1
From 11d432d92d63660c516540dcb48faac87669b4f0 Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Sun, 1 Jan 2023 10:35:38 +0300
Subject: add refresh buttons to checkpoint merger
---
modules/ui.py | 6 ++++++
1 file changed, 6 insertions(+)
(limited to 'modules')
diff --git a/modules/ui.py b/modules/ui.py
index c7b8ea5d..4cc2ce4f 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -1167,8 +1167,14 @@ def create_ui():
with gr.Row():
primary_model_name = gr.Dropdown(modules.sd_models.checkpoint_tiles(), elem_id="modelmerger_primary_model_name", label="Primary model (A)")
+ create_refresh_button(primary_model_name, modules.sd_models.list_models, lambda: {"choices": modules.sd_models.checkpoint_tiles()}, "refresh_checkpoint_A")
+
secondary_model_name = gr.Dropdown(modules.sd_models.checkpoint_tiles(), elem_id="modelmerger_secondary_model_name", label="Secondary model (B)")
+ create_refresh_button(secondary_model_name, modules.sd_models.list_models, lambda: {"choices": modules.sd_models.checkpoint_tiles()}, "refresh_checkpoint_B")
+
tertiary_model_name = gr.Dropdown(modules.sd_models.checkpoint_tiles(), elem_id="modelmerger_tertiary_model_name", label="Tertiary model (C)")
+ create_refresh_button(tertiary_model_name, modules.sd_models.list_models, lambda: {"choices": modules.sd_models.checkpoint_tiles()}, "refresh_checkpoint_C")
+
custom_name = gr.Textbox(label="Custom Name (Optional)")
interp_amount = gr.Slider(minimum=0.0, maximum=1.0, step=0.05, label='Multiplier (M) - set to 0 to get model A', value=0.3)
interp_method = gr.Radio(choices=["Weighted sum", "Add difference"], value="Weighted sum", label="Interpolation Method")
--
cgit v1.2.1
From 76f256fe8f844641f4e9b41f35c7dd2cba5090d6 Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Sun, 1 Jan 2023 11:08:39 +0300
Subject: Bump gradio version #YOLO
---
modules/ui_tempdir.py | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
(limited to 'modules')
diff --git a/modules/ui_tempdir.py b/modules/ui_tempdir.py
index 07210d14..8d519310 100644
--- a/modules/ui_tempdir.py
+++ b/modules/ui_tempdir.py
@@ -15,7 +15,8 @@ Savedfile = namedtuple("Savedfile", ["name"])
def save_pil_to_file(pil_image, dir=None):
already_saved_as = getattr(pil_image, 'already_saved_as', None)
if already_saved_as and os.path.isfile(already_saved_as):
- shared.demo.temp_dirs = shared.demo.temp_dirs | {os.path.abspath(os.path.dirname(already_saved_as))}
+ shared.demo.temp_file_sets[0] = shared.demo.temp_file_sets[0] | {os.path.abspath(already_saved_as)}
+
file_obj = Savedfile(already_saved_as)
return file_obj
--
cgit v1.2.1
From b46b97fa297b3a4a654da77cf98a775a2bcab4c7 Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Sun, 1 Jan 2023 11:38:17 +0300
Subject: more fixes for gradio update
---
modules/generation_parameters_copypaste.py | 2 +-
modules/ui_tempdir.py | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
(limited to 'modules')
diff --git a/modules/generation_parameters_copypaste.py b/modules/generation_parameters_copypaste.py
index fbd91300..54b3372d 100644
--- a/modules/generation_parameters_copypaste.py
+++ b/modules/generation_parameters_copypaste.py
@@ -38,7 +38,7 @@ def quote(text):
def image_from_url_text(filedata):
if type(filedata) == dict and filedata["is_file"]:
filename = filedata["name"]
- is_in_right_dir = any(Path(temp_dir).resolve() in Path(filename).resolve().parents for temp_dir in shared.demo.temp_dirs)
+ is_in_right_dir = any([filename in fileset for fileset in shared.demo.temp_file_sets])
assert is_in_right_dir, 'trying to open image file outside of allowed directories'
return Image.open(filename)
diff --git a/modules/ui_tempdir.py b/modules/ui_tempdir.py
index 8d519310..363d449d 100644
--- a/modules/ui_tempdir.py
+++ b/modules/ui_tempdir.py
@@ -45,7 +45,7 @@ def on_tmpdir_changed():
os.makedirs(shared.opts.temp_dir, exist_ok=True)
- shared.demo.temp_dirs = shared.demo.temp_dirs | {os.path.abspath(shared.opts.temp_dir)}
+ shared.demo.temp_file_sets[0] = shared.demo.temp_file_sets[0] | {os.path.abspath(shared.opts.temp_dir)}
def cleanup_tmpdr():
--
cgit v1.2.1
From e5f1a37cb9b537d95b2df47c96b4a4f7242fd294 Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Sun, 1 Jan 2023 13:08:40 +0300
Subject: make refresh buttons look more nice
---
modules/ui.py | 6 +++---
modules/ui_components.py | 18 ++++++++++++++++++
2 files changed, 21 insertions(+), 3 deletions(-)
create mode 100644 modules/ui_components.py
(limited to 'modules')
diff --git a/modules/ui.py b/modules/ui.py
index 4cc2ce4f..32fa80d1 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -19,7 +19,7 @@ import numpy as np
from PIL import Image, PngImagePlugin
from modules.call_queue import wrap_gradio_gpu_call, wrap_queued_call, wrap_gradio_call
-from modules import sd_hijack, sd_models, localization, script_callbacks, ui_extensions, deepbooru
+from modules import sd_hijack, sd_models, localization, script_callbacks, ui_extensions, deepbooru, ui_components
from modules.paths import script_path
from modules.shared import opts, cmd_opts, restricted_opts
@@ -532,7 +532,7 @@ def create_refresh_button(refresh_component, refresh_method, refreshed_args, ele
return gr.update(**(args or {}))
- refresh_button = gr.Button(value=refresh_symbol, elem_id=elem_id)
+ refresh_button = ui_components.ToolButton(value=refresh_symbol, elem_id=elem_id)
refresh_button.click(
fn=refresh,
inputs=[],
@@ -1476,7 +1476,7 @@ def create_ui():
res = comp(label=info.label, value=fun(), elem_id=elem_id, **(args or {}))
create_refresh_button(res, info.refresh, info.component_args, "refresh_" + key)
else:
- with gr.Row(variant="compact"):
+ with ui_components.FormRow():
res = comp(label=info.label, value=fun(), elem_id=elem_id, **(args or {}))
create_refresh_button(res, info.refresh, info.component_args, "refresh_" + key)
else:
diff --git a/modules/ui_components.py b/modules/ui_components.py
new file mode 100644
index 00000000..d0519d2d
--- /dev/null
+++ b/modules/ui_components.py
@@ -0,0 +1,18 @@
+import gradio as gr
+
+
+class ToolButton(gr.Button, gr.components.FormComponent):
+ """Small button with single emoji as text, fits inside gradio forms"""
+
+ def __init__(self, **kwargs):
+ super().__init__(variant="tool", **kwargs)
+
+ def get_block_name(self):
+ return "button"
+
+
+class FormRow(gr.Row, gr.components.FormComponent):
+ """Same as gr.Row but fits inside gradio forms"""
+
+ def get_block_name(self):
+ return "row"
--
cgit v1.2.1
From 5f12b23b8bb7fca585a3a1e844881d06f171364e Mon Sep 17 00:00:00 2001
From: AlUlkesh <99896447+AlUlkesh@users.noreply.github.com>
Date: Wed, 28 Dec 2022 22:18:19 +0100
Subject: Adding image numbers on grids
New grid option in settings enables adding of image numbers on grids. This makes identifying the images, especially in larger batches, much easier.
Revert "Adding image numbers on grids"
This reverts commit 3530c283b4b1d3a3cab40efbffe4cf2697938b6f.
Implements Callback for image grid loop
Necessary to make "Add image's number to its picture in the grid" extension possible.
---
modules/images.py | 1 +
modules/script_callbacks.py | 20 ++++++++++++++++++++
2 files changed, 21 insertions(+)
(limited to 'modules')
diff --git a/modules/images.py b/modules/images.py
index 31d4528d..5afd3891 100644
--- a/modules/images.py
+++ b/modules/images.py
@@ -43,6 +43,7 @@ def image_grid(imgs, batch_size=1, rows=None):
grid = Image.new('RGB', size=(cols * w, rows * h), color='black')
for i, img in enumerate(imgs):
+ script_callbacks.image_grid_loop_callback(img)
grid.paste(img, box=(i % cols * w, i // cols * h))
return grid
diff --git a/modules/script_callbacks.py b/modules/script_callbacks.py
index 8e22f875..0c854407 100644
--- a/modules/script_callbacks.py
+++ b/modules/script_callbacks.py
@@ -51,6 +51,11 @@ class UiTrainTabParams:
self.txt2img_preview_params = txt2img_preview_params
+class ImageGridLoopParams:
+ def __init__(self, img):
+ self.img = img
+
+
ScriptCallback = namedtuple("ScriptCallback", ["script", "callback"])
callback_map = dict(
callbacks_app_started=[],
@@ -63,6 +68,7 @@ callback_map = dict(
callbacks_cfg_denoiser=[],
callbacks_before_component=[],
callbacks_after_component=[],
+ callbacks_image_grid_loop=[],
)
@@ -154,6 +160,12 @@ def after_component_callback(component, **kwargs):
except Exception:
report_exception(c, 'after_component_callback')
+def image_grid_loop_callback(component, **kwargs):
+ for c in callback_map['callbacks_image_grid_loop']:
+ try:
+ c.callback(component, **kwargs)
+ except Exception:
+ report_exception(c, 'image_grid_loop')
def add_callback(callbacks, fun):
stack = [x for x in inspect.stack() if x.filename != __file__]
@@ -255,3 +267,11 @@ def on_before_component(callback):
def on_after_component(callback):
"""register a function to be called after a component is created. See on_before_component for more."""
add_callback(callback_map['callbacks_after_component'], callback)
+
+
+def on_image_grid_loop(callback):
+ """register a function to be called inside the image grid loop.
+ The callback is called with one argument:
+ - params: ImageGridLoopParams - parameters to be used inside the image grid loop.
+ """
+ add_callback(callback_map['callbacks_image_grid_loop'], callback)
--
cgit v1.2.1
From 524d532b387732d4d32f237e792c7f201a934400 Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Sun, 1 Jan 2023 14:07:40 +0300
Subject: moved roll artist to built-in extensions
---
modules/ui.py | 37 +++----------------------------------
1 file changed, 3 insertions(+), 34 deletions(-)
(limited to 'modules')
diff --git a/modules/ui.py b/modules/ui.py
index 32fa80d1..27da2c2c 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -80,7 +80,6 @@ css_hide_progressbar = """
# Important that they exactly match script.js for tooltip to work.
random_symbol = '\U0001f3b2\ufe0f' # 🎲️
reuse_symbol = '\u267b\ufe0f' # ♻️
-art_symbol = '\U0001f3a8' # 🎨
paste_symbol = '\u2199\ufe0f' # ↙
folder_symbol = '\U0001f4c2' # 📂
refresh_symbol = '\U0001f504' # 🔄
@@ -234,13 +233,6 @@ def check_progress_call_initial(id_part):
return check_progress_call(id_part)
-def roll_artist(prompt):
- allowed_cats = set([x for x in shared.artist_db.categories() if len(opts.random_artist_categories)==0 or x in opts.random_artist_categories])
- artist = random.choice([x for x in shared.artist_db.artists if x.category in allowed_cats])
-
- return prompt + ", " + artist.name if prompt != '' else artist.name
-
-
def visit(x, func, path=""):
if hasattr(x, 'children'):
for c in x.children:
@@ -403,7 +395,6 @@ def create_toprow(is_img2img):
)
with gr.Column(scale=1, elem_id="roll_col"):
- roll = gr.Button(value=art_symbol, elem_id="roll", visible=len(shared.artist_db.artists) > 0)
paste = gr.Button(value=paste_symbol, elem_id="paste")
save_style = gr.Button(value=save_style_symbol, elem_id="style_create")
prompt_style_apply = gr.Button(value=apply_style_symbol, elem_id="style_apply")
@@ -452,7 +443,7 @@ def create_toprow(is_img2img):
prompt_style2 = gr.Dropdown(label="Style 2", elem_id=f"{id_part}_style2_index", choices=[k for k, v in shared.prompt_styles.styles.items()], value=next(iter(shared.prompt_styles.styles.keys())))
prompt_style2.save_to_config = True
- return prompt, roll, prompt_style, negative_prompt, prompt_style2, submit, button_interrogate, button_deepbooru, prompt_style_apply, save_style, paste, token_counter, token_button
+ return prompt, prompt_style, negative_prompt, prompt_style2, submit, button_interrogate, button_deepbooru, prompt_style_apply, save_style, paste, token_counter, token_button
def setup_progressbar(progressbar, preview, id_part, textinfo=None):
@@ -668,7 +659,7 @@ def create_ui():
modules.scripts.scripts_txt2img.initialize_scripts(is_img2img=False)
with gr.Blocks(analytics_enabled=False) as txt2img_interface:
- txt2img_prompt, roll, txt2img_prompt_style, txt2img_negative_prompt, txt2img_prompt_style2, submit, _, _,txt2img_prompt_style_apply, txt2img_save_style, txt2img_paste, token_counter, token_button = create_toprow(is_img2img=False)
+ txt2img_prompt, txt2img_prompt_style, txt2img_negative_prompt, txt2img_prompt_style2, submit, _, _,txt2img_prompt_style_apply, txt2img_save_style, txt2img_paste, token_counter, token_button = create_toprow(is_img2img=False)
dummy_component = gr.Label(visible=False)
txt_prompt_img = gr.File(label="", elem_id="txt2img_prompt_image", file_count="single", type="bytes", visible=False)
@@ -771,16 +762,6 @@ def create_ui():
outputs=[hr_options],
)
- roll.click(
- fn=roll_artist,
- _js="update_txt2img_tokens",
- inputs=[
- txt2img_prompt,
- ],
- outputs=[
- txt2img_prompt,
- ]
- )
txt2img_paste_fields = [
(txt2img_prompt, "Prompt"),
@@ -823,7 +804,7 @@ def create_ui():
modules.scripts.scripts_img2img.initialize_scripts(is_img2img=True)
with gr.Blocks(analytics_enabled=False) as img2img_interface:
- img2img_prompt, roll, img2img_prompt_style, img2img_negative_prompt, img2img_prompt_style2, submit, img2img_interrogate, img2img_deepbooru, img2img_prompt_style_apply, img2img_save_style, img2img_paste,token_counter, token_button = create_toprow(is_img2img=True)
+ img2img_prompt, img2img_prompt_style, img2img_negative_prompt, img2img_prompt_style2, submit, img2img_interrogate, img2img_deepbooru, img2img_prompt_style_apply, img2img_save_style, img2img_paste,token_counter, token_button = create_toprow(is_img2img=True)
with gr.Row(elem_id='img2img_progress_row'):
img2img_prompt_img = gr.File(label="", elem_id="img2img_prompt_image", file_count="single", type="bytes", visible=False)
@@ -999,18 +980,6 @@ def create_ui():
outputs=[img2img_prompt],
)
-
- roll.click(
- fn=roll_artist,
- _js="update_img2img_tokens",
- inputs=[
- img2img_prompt,
- ],
- outputs=[
- img2img_prompt,
- ]
- )
-
prompts = [(txt2img_prompt, txt2img_negative_prompt), (img2img_prompt, img2img_negative_prompt)]
style_dropdowns = [(txt2img_prompt_style, txt2img_prompt_style2), (img2img_prompt_style, img2img_prompt_style2)]
style_js_funcs = ["update_txt2img_tokens", "update_img2img_tokens"]
--
cgit v1.2.1
From e672cfb07418a1a3130d3bf21c14a0d3819f81fb Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Sun, 1 Jan 2023 18:37:37 +0300
Subject: rework of callback for #6094
---
modules/images.py | 10 ++++++----
modules/script_callbacks.py | 26 +++++++++++++++-----------
2 files changed, 21 insertions(+), 15 deletions(-)
(limited to 'modules')
diff --git a/modules/images.py b/modules/images.py
index 719aaf3b..f84fd485 100644
--- a/modules/images.py
+++ b/modules/images.py
@@ -39,12 +39,14 @@ def image_grid(imgs, batch_size=1, rows=None):
cols = math.ceil(len(imgs) / rows)
+ params = script_callbacks.ImageGridLoopParams(imgs, cols, rows)
+ script_callbacks.image_grid_callback(params)
+
w, h = imgs[0].size
- grid = Image.new('RGB', size=(cols * w, rows * h), color='black')
+ grid = Image.new('RGB', size=(params.cols * w, params.rows * h), color='black')
- for i, img in enumerate(imgs):
- script_callbacks.image_grid_loop_callback(img)
- grid.paste(img, box=(i % cols * w, i // cols * h))
+ for i, img in enumerate(params.imgs):
+ grid.paste(img, box=(i % params.cols * w, i // params.cols * h))
return grid
diff --git a/modules/script_callbacks.py b/modules/script_callbacks.py
index 0c854407..de69fd9f 100644
--- a/modules/script_callbacks.py
+++ b/modules/script_callbacks.py
@@ -52,8 +52,10 @@ class UiTrainTabParams:
class ImageGridLoopParams:
- def __init__(self, img):
- self.img = img
+ def __init__(self, imgs, cols, rows):
+ self.imgs = imgs
+ self.cols = cols
+ self.rows = rows
ScriptCallback = namedtuple("ScriptCallback", ["script", "callback"])
@@ -68,7 +70,7 @@ callback_map = dict(
callbacks_cfg_denoiser=[],
callbacks_before_component=[],
callbacks_after_component=[],
- callbacks_image_grid_loop=[],
+ callbacks_image_grid=[],
)
@@ -160,12 +162,14 @@ def after_component_callback(component, **kwargs):
except Exception:
report_exception(c, 'after_component_callback')
-def image_grid_loop_callback(component, **kwargs):
- for c in callback_map['callbacks_image_grid_loop']:
+
+def image_grid_callback(params: ImageGridLoopParams):
+ for c in callback_map['callbacks_image_grid']:
try:
- c.callback(component, **kwargs)
+ c.callback(params)
except Exception:
- report_exception(c, 'image_grid_loop')
+ report_exception(c, 'image_grid')
+
def add_callback(callbacks, fun):
stack = [x for x in inspect.stack() if x.filename != __file__]
@@ -269,9 +273,9 @@ def on_after_component(callback):
add_callback(callback_map['callbacks_after_component'], callback)
-def on_image_grid_loop(callback):
- """register a function to be called inside the image grid loop.
+def on_image_grid(callback):
+ """register a function to be called before making an image grid.
The callback is called with one argument:
- - params: ImageGridLoopParams - parameters to be used inside the image grid loop.
+ - params: ImageGridLoopParams - parameters to be used for grid creation. Can be modified.
"""
- add_callback(callback_map['callbacks_image_grid_loop'], callback)
+ add_callback(callback_map['callbacks_image_grid'], callback)
--
cgit v1.2.1
From a005fccddd5a37c57f1afe5234660b59b9a41508 Mon Sep 17 00:00:00 2001
From: me <25877290+Kryptortio@users.noreply.github.com>
Date: Sun, 1 Jan 2023 14:51:12 +0100
Subject: Add a lot more elem_id/HTML id, modified some that were duplicates
for seed section
---
modules/generation_parameters_copypaste.py | 2 +-
modules/ui.py | 254 ++++++++++++++---------------
2 files changed, 128 insertions(+), 128 deletions(-)
(limited to 'modules')
diff --git a/modules/generation_parameters_copypaste.py b/modules/generation_parameters_copypaste.py
index 54b3372d..8e7f0df0 100644
--- a/modules/generation_parameters_copypaste.py
+++ b/modules/generation_parameters_copypaste.py
@@ -93,7 +93,7 @@ def integrate_settings_paste_fields(component_dict):
def create_buttons(tabs_list):
buttons = {}
for tab in tabs_list:
- buttons[tab] = gr.Button(f"Send to {tab}")
+ buttons[tab] = gr.Button(f"Send to {tab}", elem_id=f"{tab}_tab")
return buttons
diff --git a/modules/ui.py b/modules/ui.py
index 27da2c2c..7070ea15 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -272,17 +272,17 @@ def interrogate_deepbooru(image):
return gr_show(True) if prompt is None else prompt
-def create_seed_inputs():
+def create_seed_inputs(target_interface):
with gr.Row():
with gr.Box():
- with gr.Row(elem_id='seed_row'):
- seed = (gr.Textbox if cmd_opts.use_textbox_seed else gr.Number)(label='Seed', value=-1)
+ with gr.Row(elem_id=target_interface + '_seed_row'):
+ seed = (gr.Textbox if cmd_opts.use_textbox_seed else gr.Number)(label='Seed', value=-1, elem_id=target_interface + '_seed')
seed.style(container=False)
- random_seed = gr.Button(random_symbol, elem_id='random_seed')
- reuse_seed = gr.Button(reuse_symbol, elem_id='reuse_seed')
+ random_seed = gr.Button(random_symbol, elem_id=target_interface + '_random_seed')
+ reuse_seed = gr.Button(reuse_symbol, elem_id=target_interface + '_reuse_seed')
- with gr.Box(elem_id='subseed_show_box'):
- seed_checkbox = gr.Checkbox(label='Extra', elem_id='subseed_show', value=False)
+ with gr.Box(elem_id=target_interface + '_subseed_show_box'):
+ seed_checkbox = gr.Checkbox(label='Extra', elem_id=target_interface + '_subseed_show', value=False)
# Components to show/hide based on the 'Extra' checkbox
seed_extras = []
@@ -290,17 +290,17 @@ def create_seed_inputs():
with gr.Row(visible=False) as seed_extra_row_1:
seed_extras.append(seed_extra_row_1)
with gr.Box():
- with gr.Row(elem_id='subseed_row'):
- subseed = gr.Number(label='Variation seed', value=-1)
+ with gr.Row(elem_id=target_interface + '_subseed_row'):
+ subseed = gr.Number(label='Variation seed', value=-1, elem_id=target_interface + '_subseed')
subseed.style(container=False)
- random_subseed = gr.Button(random_symbol, elem_id='random_subseed')
- reuse_subseed = gr.Button(reuse_symbol, elem_id='reuse_subseed')
- subseed_strength = gr.Slider(label='Variation strength', value=0.0, minimum=0, maximum=1, step=0.01)
+ random_subseed = gr.Button(random_symbol, elem_id=target_interface + '_random_subseed')
+ reuse_subseed = gr.Button(reuse_symbol, elem_id=target_interface + '_reuse_subseed')
+ subseed_strength = gr.Slider(label='Variation strength', value=0.0, minimum=0, maximum=1, step=0.01, elem_id=target_interface + '_subseed_strength')
with gr.Row(visible=False) as seed_extra_row_2:
seed_extras.append(seed_extra_row_2)
- seed_resize_from_w = gr.Slider(minimum=0, maximum=2048, step=8, label="Resize seed from width", value=0)
- seed_resize_from_h = gr.Slider(minimum=0, maximum=2048, step=8, label="Resize seed from height", value=0)
+ seed_resize_from_w = gr.Slider(minimum=0, maximum=2048, step=8, label="Resize seed from width", value=0, elem_id=target_interface + '_seed_resize_from_w')
+ seed_resize_from_h = gr.Slider(minimum=0, maximum=2048, step=8, label="Resize seed from height", value=0, elem_id=target_interface + '_seed_resize_from_h')
random_seed.click(fn=lambda: -1, show_progress=False, inputs=[], outputs=[seed])
random_subseed.click(fn=lambda: -1, show_progress=False, inputs=[], outputs=[subseed])
@@ -678,28 +678,28 @@ def create_ui():
steps, sampler_index = create_sampler_and_steps_selection(samplers, "txt2img")
with gr.Group():
- width = gr.Slider(minimum=64, maximum=2048, step=8, label="Width", value=512)
- height = gr.Slider(minimum=64, maximum=2048, step=8, label="Height", value=512)
+ width = gr.Slider(minimum=64, maximum=2048, step=8, label="Width", value=512, elem_id="txt2img_width")
+ height = gr.Slider(minimum=64, maximum=2048, step=8, label="Height", value=512, elem_id="txt2img_height")
with gr.Row():
- restore_faces = gr.Checkbox(label='Restore faces', value=False, visible=len(shared.face_restorers) > 1)
- tiling = gr.Checkbox(label='Tiling', value=False)
- enable_hr = gr.Checkbox(label='Highres. fix', value=False)
+ restore_faces = gr.Checkbox(label='Restore faces', value=False, visible=len(shared.face_restorers) > 1, elem_id="txt2img_restore_faces")
+ tiling = gr.Checkbox(label='Tiling', value=False, elem_id="txt2img_tiling")
+ enable_hr = gr.Checkbox(label='Highres. fix', value=False, elem_id="txt2img_enable_hr")
with gr.Row(visible=False) as hr_options:
- firstphase_width = gr.Slider(minimum=0, maximum=1024, step=8, label="Firstpass width", value=0)
- firstphase_height = gr.Slider(minimum=0, maximum=1024, step=8, label="Firstpass height", value=0)
- denoising_strength = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label='Denoising strength', value=0.7)
+ firstphase_width = gr.Slider(minimum=0, maximum=1024, step=8, label="Firstpass width", value=0, elem_id="txt2img_firstphase_width")
+ firstphase_height = gr.Slider(minimum=0, maximum=1024, step=8, label="Firstpass height", value=0, elem_id="txt2img_firstphase_height")
+ denoising_strength = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label='Denoising strength', value=0.7, elem_id="txt2img_denoising_strength")
with gr.Row(equal_height=True):
- batch_count = gr.Slider(minimum=1, step=1, label='Batch count', value=1)
- batch_size = gr.Slider(minimum=1, maximum=8, step=1, label='Batch size', value=1)
+ batch_count = gr.Slider(minimum=1, step=1, label='Batch count', value=1, elem_id="txt2img_batch_count")
+ batch_size = gr.Slider(minimum=1, maximum=8, step=1, label='Batch size', value=1, elem_id="txt2img_batch_size")
- cfg_scale = gr.Slider(minimum=1.0, maximum=30.0, step=0.5, label='CFG Scale', value=7.0)
+ cfg_scale = gr.Slider(minimum=1.0, maximum=30.0, step=0.5, label='CFG Scale', value=7.0, elem_id="txt2img_cfg_scale")
- seed, reuse_seed, subseed, reuse_subseed, subseed_strength, seed_resize_from_h, seed_resize_from_w, seed_checkbox = create_seed_inputs()
+ seed, reuse_seed, subseed, reuse_subseed, subseed_strength, seed_resize_from_h, seed_resize_from_w, seed_checkbox = create_seed_inputs('txt2img')
- with gr.Group():
+ with gr.Group(elem_id="txt2img_script_container"):
custom_inputs = modules.scripts.scripts_txt2img.setup_ui()
txt2img_gallery, generation_info, html_info, html_log = create_output_panel("txt2img", opts.outdir_txt2img_samples)
@@ -821,10 +821,10 @@ def create_ui():
with gr.Column(variant='panel', elem_id="img2img_settings"):
with gr.Tabs(elem_id="mode_img2img") as tabs_img2img_mode:
- with gr.TabItem('img2img', id='img2img'):
+ with gr.TabItem('img2img', id='img2img', elem_id="img2img_img2img_tab"):
init_img = gr.Image(label="Image for img2img", elem_id="img2img_image", show_label=False, source="upload", interactive=True, type="pil", tool=cmd_opts.gradio_img2img_tool, image_mode="RGBA").style(height=480)
- with gr.TabItem('Inpaint', id='inpaint'):
+ with gr.TabItem('Inpaint', id='inpaint', elem_id="img2img_inpaint_tab"):
init_img_with_mask = gr.Image(label="Image for inpainting with mask", show_label=False, elem_id="img2maskimg", source="upload", interactive=True, type="pil", tool=cmd_opts.gradio_inpaint_tool, image_mode="RGBA").style(height=480)
init_img_with_mask_orig = gr.State(None)
@@ -843,24 +843,24 @@ def create_ui():
init_mask_inpaint = gr.Image(label="Mask", source="upload", interactive=True, type="pil", visible=False, elem_id="img_inpaint_mask")
with gr.Row():
- mask_blur = gr.Slider(label='Mask blur', minimum=0, maximum=64, step=1, value=4)
- mask_alpha = gr.Slider(label="Mask transparency", interactive=use_color_sketch, visible=use_color_sketch)
+ mask_blur = gr.Slider(label='Mask blur', minimum=0, maximum=64, step=1, value=4, elem_id="img2img_mask_blur")
+ mask_alpha = gr.Slider(label="Mask transparency", interactive=use_color_sketch, visible=use_color_sketch, elem_id="img2img_mask_alpha")
with gr.Row():
mask_mode = gr.Radio(label="Mask mode", show_label=False, choices=["Draw mask", "Upload mask"], type="index", value="Draw mask", elem_id="mask_mode")
- inpainting_mask_invert = gr.Radio(label='Masking mode', show_label=False, choices=['Inpaint masked', 'Inpaint not masked'], value='Inpaint masked', type="index")
+ inpainting_mask_invert = gr.Radio(label='Masking mode', show_label=False, choices=['Inpaint masked', 'Inpaint not masked'], value='Inpaint masked', type="index", elem_id="img2img_mask_mode")
- inpainting_fill = gr.Radio(label='Masked content', choices=['fill', 'original', 'latent noise', 'latent nothing'], value='original', type="index")
+ inpainting_fill = gr.Radio(label='Masked content', choices=['fill', 'original', 'latent noise', 'latent nothing'], value='original', type="index", elem_id="img2img_inpainting_fill")
with gr.Row():
- inpaint_full_res = gr.Checkbox(label='Inpaint at full resolution', value=False)
- inpaint_full_res_padding = gr.Slider(label='Inpaint at full resolution padding, pixels', minimum=0, maximum=256, step=4, value=32)
+ inpaint_full_res = gr.Checkbox(label='Inpaint at full resolution', value=False, elem_id="img2img_inpaint_full_res")
+ inpaint_full_res_padding = gr.Slider(label='Inpaint at full resolution padding, pixels', minimum=0, maximum=256, step=4, value=32, elem_id="img2img_inpaint_full_res_padding")
- with gr.TabItem('Batch img2img', id='batch'):
+ with gr.TabItem('Batch img2img', id='batch', elem_id="img2img_batch_tab"):
hidden = ' Disabled when launched with --hide-ui-dir-config.' if shared.cmd_opts.hide_ui_dir_config else ''
gr.HTML(f"Process images in a directory on the same machine where the server is running. Use an empty output directory to save pictures normally instead of writing to the output directory.{hidden}
")
- img2img_batch_input_dir = gr.Textbox(label="Input directory", **shared.hide_dirs)
- img2img_batch_output_dir = gr.Textbox(label="Output directory", **shared.hide_dirs)
+ img2img_batch_input_dir = gr.Textbox(label="Input directory", **shared.hide_dirs, elem_id="img2img_batch_input_dir")
+ img2img_batch_output_dir = gr.Textbox(label="Output directory", **shared.hide_dirs, elem_id="img2img_batch_output_dir")
with gr.Row():
resize_mode = gr.Radio(label="Resize mode", elem_id="resize_mode", show_label=False, choices=["Just resize", "Crop and resize", "Resize and fill", "Just resize (latent upscale)"], type="index", value="Just resize")
@@ -872,20 +872,20 @@ def create_ui():
height = gr.Slider(minimum=64, maximum=2048, step=8, label="Height", value=512, elem_id="img2img_height")
with gr.Row():
- restore_faces = gr.Checkbox(label='Restore faces', value=False, visible=len(shared.face_restorers) > 1)
- tiling = gr.Checkbox(label='Tiling', value=False)
+ restore_faces = gr.Checkbox(label='Restore faces', value=False, visible=len(shared.face_restorers) > 1, elem_id="img2img_restore_faces")
+ tiling = gr.Checkbox(label='Tiling', value=False, elem_id="img2img_tiling")
with gr.Row():
- batch_count = gr.Slider(minimum=1, step=1, label='Batch count', value=1)
- batch_size = gr.Slider(minimum=1, maximum=8, step=1, label='Batch size', value=1)
+ batch_count = gr.Slider(minimum=1, step=1, label='Batch count', value=1, elem_id="img2img_batch_count")
+ batch_size = gr.Slider(minimum=1, maximum=8, step=1, label='Batch size', value=1, elem_id="img2img_batch_size")
with gr.Group():
- cfg_scale = gr.Slider(minimum=1.0, maximum=30.0, step=0.5, label='CFG Scale', value=7.0)
- denoising_strength = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label='Denoising strength', value=0.75)
+ cfg_scale = gr.Slider(minimum=1.0, maximum=30.0, step=0.5, label='CFG Scale', value=7.0, elem_id="img2img_cfg_scale")
+ denoising_strength = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label='Denoising strength', value=0.75, elem_id="img2img_denoising_strength")
- seed, reuse_seed, subseed, reuse_subseed, subseed_strength, seed_resize_from_h, seed_resize_from_w, seed_checkbox = create_seed_inputs()
+ seed, reuse_seed, subseed, reuse_subseed, subseed_strength, seed_resize_from_h, seed_resize_from_w, seed_checkbox = create_seed_inputs('img2img')
- with gr.Group():
+ with gr.Group(elem_id="img2img_script_container"):
custom_inputs = modules.scripts.scripts_img2img.setup_ui()
img2img_gallery, generation_info, html_info, html_log = create_output_panel("img2img", opts.outdir_img2img_samples)
@@ -1032,45 +1032,45 @@ def create_ui():
with gr.Row().style(equal_height=False):
with gr.Column(variant='panel'):
with gr.Tabs(elem_id="mode_extras"):
- with gr.TabItem('Single Image'):
- extras_image = gr.Image(label="Source", source="upload", interactive=True, type="pil")
+ with gr.TabItem('Single Image', elem_id="extras_single_tab"):
+ extras_image = gr.Image(label="Source", source="upload", interactive=True, type="pil", elem_id="extras_image")
- with gr.TabItem('Batch Process'):
- image_batch = gr.File(label="Batch Process", file_count="multiple", interactive=True, type="file")
+ with gr.TabItem('Batch Process', elem_id="extras_batch_process_tab"):
+ image_batch = gr.File(label="Batch Process", file_count="multiple", interactive=True, type="file", elem_id="extras_image_batch")
- with gr.TabItem('Batch from Directory'):
- extras_batch_input_dir = gr.Textbox(label="Input directory", **shared.hide_dirs, placeholder="A directory on the same machine where the server is running.")
- extras_batch_output_dir = gr.Textbox(label="Output directory", **shared.hide_dirs, placeholder="Leave blank to save images to the default path.")
- show_extras_results = gr.Checkbox(label='Show result images', value=True)
+ with gr.TabItem('Batch from Directory', elem_id="extras_batch_directory_tab"):
+ extras_batch_input_dir = gr.Textbox(label="Input directory", **shared.hide_dirs, placeholder="A directory on the same machine where the server is running.", elem_id="extras_batch_input_dir")
+ extras_batch_output_dir = gr.Textbox(label="Output directory", **shared.hide_dirs, placeholder="Leave blank to save images to the default path.", elem_id="extras_batch_output_dir")
+ show_extras_results = gr.Checkbox(label='Show result images', value=True, elem_id="extras_show_extras_results")
submit = gr.Button('Generate', elem_id="extras_generate", variant='primary')
with gr.Tabs(elem_id="extras_resize_mode"):
- with gr.TabItem('Scale by'):
- upscaling_resize = gr.Slider(minimum=1.0, maximum=8.0, step=0.05, label="Resize", value=4)
- with gr.TabItem('Scale to'):
+ with gr.TabItem('Scale by', elem_id="extras_scale_by_tab"):
+ upscaling_resize = gr.Slider(minimum=1.0, maximum=8.0, step=0.05, label="Resize", value=4, elem_id="extras_upscaling_resize")
+ with gr.TabItem('Scale to', elem_id="extras_scale_to_tab"):
with gr.Group():
with gr.Row():
- upscaling_resize_w = gr.Number(label="Width", value=512, precision=0)
- upscaling_resize_h = gr.Number(label="Height", value=512, precision=0)
- upscaling_crop = gr.Checkbox(label='Crop to fit', value=True)
+ upscaling_resize_w = gr.Number(label="Width", value=512, precision=0, elem_id="extras_upscaling_resize_w")
+ upscaling_resize_h = gr.Number(label="Height", value=512, precision=0, elem_id="extras_upscaling_resize_h")
+ upscaling_crop = gr.Checkbox(label='Crop to fit', value=True, elem_id="extras_upscaling_crop")
with gr.Group():
extras_upscaler_1 = gr.Radio(label='Upscaler 1', elem_id="extras_upscaler_1", choices=[x.name for x in shared.sd_upscalers], value=shared.sd_upscalers[0].name, type="index")
with gr.Group():
extras_upscaler_2 = gr.Radio(label='Upscaler 2', elem_id="extras_upscaler_2", choices=[x.name for x in shared.sd_upscalers], value=shared.sd_upscalers[0].name, type="index")
- extras_upscaler_2_visibility = gr.Slider(minimum=0.0, maximum=1.0, step=0.001, label="Upscaler 2 visibility", value=1)
+ extras_upscaler_2_visibility = gr.Slider(minimum=0.0, maximum=1.0, step=0.001, label="Upscaler 2 visibility", value=1, elem_id="extras_upscaler_2_visibility")
with gr.Group():
- gfpgan_visibility = gr.Slider(minimum=0.0, maximum=1.0, step=0.001, label="GFPGAN visibility", value=0, interactive=modules.gfpgan_model.have_gfpgan)
+ gfpgan_visibility = gr.Slider(minimum=0.0, maximum=1.0, step=0.001, label="GFPGAN visibility", value=0, interactive=modules.gfpgan_model.have_gfpgan, elem_id="extras_gfpgan_visibility")
with gr.Group():
- codeformer_visibility = gr.Slider(minimum=0.0, maximum=1.0, step=0.001, label="CodeFormer visibility", value=0, interactive=modules.codeformer_model.have_codeformer)
- codeformer_weight = gr.Slider(minimum=0.0, maximum=1.0, step=0.001, label="CodeFormer weight (0 = maximum effect, 1 = minimum effect)", value=0, interactive=modules.codeformer_model.have_codeformer)
+ codeformer_visibility = gr.Slider(minimum=0.0, maximum=1.0, step=0.001, label="CodeFormer visibility", value=0, interactive=modules.codeformer_model.have_codeformer, elem_id="extras_codeformer_visibility")
+ codeformer_weight = gr.Slider(minimum=0.0, maximum=1.0, step=0.001, label="CodeFormer weight (0 = maximum effect, 1 = minimum effect)", value=0, interactive=modules.codeformer_model.have_codeformer, elem_id="extras_codeformer_weight")
with gr.Group():
- upscale_before_face_fix = gr.Checkbox(label='Upscale Before Restoring Faces', value=False)
+ upscale_before_face_fix = gr.Checkbox(label='Upscale Before Restoring Faces', value=False, elem_id="extras_upscale_before_face_fix")
result_images, html_info_x, html_info, html_log = create_output_panel("extras", opts.outdir_extras_samples)
@@ -1117,7 +1117,7 @@ def create_ui():
with gr.Column(variant='panel'):
html = gr.HTML()
- generation_info = gr.Textbox(visible=False)
+ generation_info = gr.Textbox(visible=False, elem_id="pnginfo_generation_info")
html2 = gr.HTML()
with gr.Row():
buttons = parameters_copypaste.create_buttons(["txt2img", "img2img", "inpaint", "extras"])
@@ -1144,13 +1144,13 @@ def create_ui():
tertiary_model_name = gr.Dropdown(modules.sd_models.checkpoint_tiles(), elem_id="modelmerger_tertiary_model_name", label="Tertiary model (C)")
create_refresh_button(tertiary_model_name, modules.sd_models.list_models, lambda: {"choices": modules.sd_models.checkpoint_tiles()}, "refresh_checkpoint_C")
- custom_name = gr.Textbox(label="Custom Name (Optional)")
- interp_amount = gr.Slider(minimum=0.0, maximum=1.0, step=0.05, label='Multiplier (M) - set to 0 to get model A', value=0.3)
- interp_method = gr.Radio(choices=["Weighted sum", "Add difference"], value="Weighted sum", label="Interpolation Method")
+ custom_name = gr.Textbox(label="Custom Name (Optional)", elem_id="modelmerger_custom_name")
+ interp_amount = gr.Slider(minimum=0.0, maximum=1.0, step=0.05, label='Multiplier (M) - set to 0 to get model A', value=0.3, elem_id="modelmerger_interp_amount")
+ interp_method = gr.Radio(choices=["Weighted sum", "Add difference"], value="Weighted sum", label="Interpolation Method", elem_id="modelmerger_interp_method")
with gr.Row():
- checkpoint_format = gr.Radio(choices=["ckpt", "safetensors"], value="ckpt", label="Checkpoint format")
- save_as_half = gr.Checkbox(value=False, label="Save as float16")
+ checkpoint_format = gr.Radio(choices=["ckpt", "safetensors"], value="ckpt", label="Checkpoint format", elem_id="modelmerger_checkpoint_format")
+ save_as_half = gr.Checkbox(value=False, label="Save as float16", elem_id="modelmerger_save_as_half")
modelmerger_merge = gr.Button(elem_id="modelmerger_merge", label="Merge", variant='primary')
@@ -1165,58 +1165,58 @@ def create_ui():
with gr.Tabs(elem_id="train_tabs"):
with gr.Tab(label="Create embedding"):
- new_embedding_name = gr.Textbox(label="Name")
- initialization_text = gr.Textbox(label="Initialization text", value="*")
- nvpt = gr.Slider(label="Number of vectors per token", minimum=1, maximum=75, step=1, value=1)
- overwrite_old_embedding = gr.Checkbox(value=False, label="Overwrite Old Embedding")
+ new_embedding_name = gr.Textbox(label="Name", elem_id="train_new_embedding_name")
+ initialization_text = gr.Textbox(label="Initialization text", value="*", elem_id="train_initialization_text")
+ nvpt = gr.Slider(label="Number of vectors per token", minimum=1, maximum=75, step=1, value=1, elem_id="train_nvpt")
+ overwrite_old_embedding = gr.Checkbox(value=False, label="Overwrite Old Embedding", elem_id="train_overwrite_old_embedding")
with gr.Row():
with gr.Column(scale=3):
gr.HTML(value="")
with gr.Column():
- create_embedding = gr.Button(value="Create embedding", variant='primary')
+ create_embedding = gr.Button(value="Create embedding", variant='primary', elem_id="train_create_embedding")
with gr.Tab(label="Create hypernetwork"):
- new_hypernetwork_name = gr.Textbox(label="Name")
- new_hypernetwork_sizes = gr.CheckboxGroup(label="Modules", value=["768", "320", "640", "1280"], choices=["768", "1024", "320", "640", "1280"])
- new_hypernetwork_layer_structure = gr.Textbox("1, 2, 1", label="Enter hypernetwork layer structure", placeholder="1st and last digit must be 1. ex:'1, 2, 1'")
- new_hypernetwork_activation_func = gr.Dropdown(value="linear", label="Select activation function of hypernetwork. Recommended : Swish / Linear(none)", choices=modules.hypernetworks.ui.keys)
- new_hypernetwork_initialization_option = gr.Dropdown(value = "Normal", label="Select Layer weights initialization. Recommended: Kaiming for relu-like, Xavier for sigmoid-like, Normal otherwise", choices=["Normal", "KaimingUniform", "KaimingNormal", "XavierUniform", "XavierNormal"])
- new_hypernetwork_add_layer_norm = gr.Checkbox(label="Add layer normalization")
- new_hypernetwork_use_dropout = gr.Checkbox(label="Use dropout")
- overwrite_old_hypernetwork = gr.Checkbox(value=False, label="Overwrite Old Hypernetwork")
+ new_hypernetwork_name = gr.Textbox(label="Name", elem_id="train_new_hypernetwork_name")
+ new_hypernetwork_sizes = gr.CheckboxGroup(label="Modules", value=["768", "320", "640", "1280"], choices=["768", "1024", "320", "640", "1280"], elem_id="train_new_hypernetwork_sizes")
+ new_hypernetwork_layer_structure = gr.Textbox("1, 2, 1", label="Enter hypernetwork layer structure", placeholder="1st and last digit must be 1. ex:'1, 2, 1'", elem_id="train_new_hypernetwork_layer_structure")
+ new_hypernetwork_activation_func = gr.Dropdown(value="linear", label="Select activation function of hypernetwork. Recommended : Swish / Linear(none)", choices=modules.hypernetworks.ui.keys, elem_id="train_new_hypernetwork_activation_func")
+ new_hypernetwork_initialization_option = gr.Dropdown(value = "Normal", label="Select Layer weights initialization. Recommended: Kaiming for relu-like, Xavier for sigmoid-like, Normal otherwise", choices=["Normal", "KaimingUniform", "KaimingNormal", "XavierUniform", "XavierNormal"], elem_id="train_new_hypernetwork_initialization_option")
+ new_hypernetwork_add_layer_norm = gr.Checkbox(label="Add layer normalization", elem_id="train_new_hypernetwork_add_layer_norm")
+ new_hypernetwork_use_dropout = gr.Checkbox(label="Use dropout", elem_id="train_new_hypernetwork_use_dropout")
+ overwrite_old_hypernetwork = gr.Checkbox(value=False, label="Overwrite Old Hypernetwork", elem_id="train_overwrite_old_hypernetwork")
with gr.Row():
with gr.Column(scale=3):
gr.HTML(value="")
with gr.Column():
- create_hypernetwork = gr.Button(value="Create hypernetwork", variant='primary')
+ create_hypernetwork = gr.Button(value="Create hypernetwork", variant='primary', elem_id="train_create_hypernetwork")
with gr.Tab(label="Preprocess images"):
- process_src = gr.Textbox(label='Source directory')
- process_dst = gr.Textbox(label='Destination directory')
- process_width = gr.Slider(minimum=64, maximum=2048, step=8, label="Width", value=512)
- process_height = gr.Slider(minimum=64, maximum=2048, step=8, label="Height", value=512)
- preprocess_txt_action = gr.Dropdown(label='Existing Caption txt Action', value="ignore", choices=["ignore", "copy", "prepend", "append"])
+ process_src = gr.Textbox(label='Source directory', elem_id="train_process_src")
+ process_dst = gr.Textbox(label='Destination directory', elem_id="train_process_dst")
+ process_width = gr.Slider(minimum=64, maximum=2048, step=8, label="Width", value=512, elem_id="train_process_width")
+ process_height = gr.Slider(minimum=64, maximum=2048, step=8, label="Height", value=512, elem_id="train_process_height")
+ preprocess_txt_action = gr.Dropdown(label='Existing Caption txt Action', value="ignore", choices=["ignore", "copy", "prepend", "append"], elem_id="train_preprocess_txt_action")
with gr.Row():
- process_flip = gr.Checkbox(label='Create flipped copies')
- process_split = gr.Checkbox(label='Split oversized images')
- process_focal_crop = gr.Checkbox(label='Auto focal point crop')
- process_caption = gr.Checkbox(label='Use BLIP for caption')
- process_caption_deepbooru = gr.Checkbox(label='Use deepbooru for caption', visible=True)
+ process_flip = gr.Checkbox(label='Create flipped copies', elem_id="train_process_flip")
+ process_split = gr.Checkbox(label='Split oversized images', elem_id="train_process_split")
+ process_focal_crop = gr.Checkbox(label='Auto focal point crop', elem_id="train_process_focal_crop")
+ process_caption = gr.Checkbox(label='Use BLIP for caption', elem_id="train_process_caption")
+ process_caption_deepbooru = gr.Checkbox(label='Use deepbooru for caption', visible=True, elem_id="train_process_caption_deepbooru")
with gr.Row(visible=False) as process_split_extra_row:
- process_split_threshold = gr.Slider(label='Split image threshold', value=0.5, minimum=0.0, maximum=1.0, step=0.05)
- process_overlap_ratio = gr.Slider(label='Split image overlap ratio', value=0.2, minimum=0.0, maximum=0.9, step=0.05)
+ process_split_threshold = gr.Slider(label='Split image threshold', value=0.5, minimum=0.0, maximum=1.0, step=0.05, elem_id="train_process_split_threshold")
+ process_overlap_ratio = gr.Slider(label='Split image overlap ratio', value=0.2, minimum=0.0, maximum=0.9, step=0.05, elem_id="train_process_overlap_ratio")
with gr.Row(visible=False) as process_focal_crop_row:
- process_focal_crop_face_weight = gr.Slider(label='Focal point face weight', value=0.9, minimum=0.0, maximum=1.0, step=0.05)
- process_focal_crop_entropy_weight = gr.Slider(label='Focal point entropy weight', value=0.15, minimum=0.0, maximum=1.0, step=0.05)
- process_focal_crop_edges_weight = gr.Slider(label='Focal point edges weight', value=0.5, minimum=0.0, maximum=1.0, step=0.05)
- process_focal_crop_debug = gr.Checkbox(label='Create debug image')
+ process_focal_crop_face_weight = gr.Slider(label='Focal point face weight', value=0.9, minimum=0.0, maximum=1.0, step=0.05, elem_id="train_process_focal_crop_face_weight")
+ process_focal_crop_entropy_weight = gr.Slider(label='Focal point entropy weight', value=0.15, minimum=0.0, maximum=1.0, step=0.05, elem_id="train_process_focal_crop_entropy_weight")
+ process_focal_crop_edges_weight = gr.Slider(label='Focal point edges weight', value=0.5, minimum=0.0, maximum=1.0, step=0.05, elem_id="train_process_focal_crop_edges_weight")
+ process_focal_crop_debug = gr.Checkbox(label='Create debug image', elem_id="train_process_focal_crop_debug")
with gr.Row():
with gr.Column(scale=3):
@@ -1224,8 +1224,8 @@ def create_ui():
with gr.Column():
with gr.Row():
- interrupt_preprocessing = gr.Button("Interrupt")
- run_preprocess = gr.Button(value="Preprocess", variant='primary')
+ interrupt_preprocessing = gr.Button("Interrupt", elem_id="train_interrupt_preprocessing")
+ run_preprocess = gr.Button(value="Preprocess", variant='primary', elem_id="train_run_preprocess")
process_split.change(
fn=lambda show: gr_show(show),
@@ -1248,31 +1248,31 @@ def create_ui():
train_hypernetwork_name = gr.Dropdown(label='Hypernetwork', elem_id="train_hypernetwork", choices=[x for x in shared.hypernetworks.keys()])
create_refresh_button(train_hypernetwork_name, shared.reload_hypernetworks, lambda: {"choices": sorted([x for x in shared.hypernetworks.keys()])}, "refresh_train_hypernetwork_name")
with gr.Row():
- embedding_learn_rate = gr.Textbox(label='Embedding Learning rate', placeholder="Embedding Learning rate", value="0.005")
- hypernetwork_learn_rate = gr.Textbox(label='Hypernetwork Learning rate', placeholder="Hypernetwork Learning rate", value="0.00001")
-
- batch_size = gr.Number(label='Batch size', value=1, precision=0)
- gradient_step = gr.Number(label='Gradient accumulation steps', value=1, precision=0)
- dataset_directory = gr.Textbox(label='Dataset directory', placeholder="Path to directory with input images")
- log_directory = gr.Textbox(label='Log directory', placeholder="Path to directory where to write outputs", value="textual_inversion")
- template_file = gr.Textbox(label='Prompt template file', value=os.path.join(script_path, "textual_inversion_templates", "style_filewords.txt"))
- training_width = gr.Slider(minimum=64, maximum=2048, step=8, label="Width", value=512)
- training_height = gr.Slider(minimum=64, maximum=2048, step=8, label="Height", value=512)
- steps = gr.Number(label='Max steps', value=100000, precision=0)
- create_image_every = gr.Number(label='Save an image to log directory every N steps, 0 to disable', value=500, precision=0)
- save_embedding_every = gr.Number(label='Save a copy of embedding to log directory every N steps, 0 to disable', value=500, precision=0)
- save_image_with_stored_embedding = gr.Checkbox(label='Save images with embedding in PNG chunks', value=True)
- preview_from_txt2img = gr.Checkbox(label='Read parameters (prompt, etc...) from txt2img tab when making previews', value=False)
+ embedding_learn_rate = gr.Textbox(label='Embedding Learning rate', placeholder="Embedding Learning rate", value="0.005", elem_id="train_embedding_learn_rate")
+ hypernetwork_learn_rate = gr.Textbox(label='Hypernetwork Learning rate', placeholder="Hypernetwork Learning rate", value="0.00001", elem_id="train_hypernetwork_learn_rate")
+
+ batch_size = gr.Number(label='Batch size', value=1, precision=0, elem_id="train_batch_size")
+ gradient_step = gr.Number(label='Gradient accumulation steps', value=1, precision=0, elem_id="train_gradient_step")
+ dataset_directory = gr.Textbox(label='Dataset directory', placeholder="Path to directory with input images", elem_id="train_dataset_directory")
+ log_directory = gr.Textbox(label='Log directory', placeholder="Path to directory where to write outputs", value="textual_inversion", elem_id="train_log_directory")
+ template_file = gr.Textbox(label='Prompt template file', value=os.path.join(script_path, "textual_inversion_templates", "style_filewords.txt"), elem_id="train_template_file")
+ training_width = gr.Slider(minimum=64, maximum=2048, step=8, label="Width", value=512, elem_id="train_training_width")
+ training_height = gr.Slider(minimum=64, maximum=2048, step=8, label="Height", value=512, elem_id="train_training_height")
+ steps = gr.Number(label='Max steps', value=100000, precision=0, elem_id="train_steps")
+ create_image_every = gr.Number(label='Save an image to log directory every N steps, 0 to disable', value=500, precision=0, elem_id="train_create_image_every")
+ save_embedding_every = gr.Number(label='Save a copy of embedding to log directory every N steps, 0 to disable', value=500, precision=0, elem_id="train_save_embedding_every")
+ save_image_with_stored_embedding = gr.Checkbox(label='Save images with embedding in PNG chunks', value=True, elem_id="train_save_image_with_stored_embedding")
+ preview_from_txt2img = gr.Checkbox(label='Read parameters (prompt, etc...) from txt2img tab when making previews', value=False, elem_id="train_preview_from_txt2img")
with gr.Row():
- shuffle_tags = gr.Checkbox(label="Shuffle tags by ',' when creating prompts.", value=False)
- tag_drop_out = gr.Slider(minimum=0, maximum=1, step=0.1, label="Drop out tags when creating prompts.", value=0)
+ shuffle_tags = gr.Checkbox(label="Shuffle tags by ',' when creating prompts.", value=False, elem_id="train_shuffle_tags")
+ tag_drop_out = gr.Slider(minimum=0, maximum=1, step=0.1, label="Drop out tags when creating prompts.", value=0, elem_id="train_tag_drop_out")
with gr.Row():
- latent_sampling_method = gr.Radio(label='Choose latent sampling method', value="once", choices=['once', 'deterministic', 'random'])
+ latent_sampling_method = gr.Radio(label='Choose latent sampling method', value="once", choices=['once', 'deterministic', 'random'], elem_id="train_latent_sampling_method")
with gr.Row():
- interrupt_training = gr.Button(value="Interrupt")
- train_hypernetwork = gr.Button(value="Train Hypernetwork", variant='primary')
- train_embedding = gr.Button(value="Train Embedding", variant='primary')
+ interrupt_training = gr.Button(value="Interrupt", elem_id="train_interrupt_training")
+ train_hypernetwork = gr.Button(value="Train Hypernetwork", variant='primary', elem_id="train_train_hypernetwork")
+ train_embedding = gr.Button(value="Train Embedding", variant='primary', elem_id="train_train_embedding")
params = script_callbacks.UiTrainTabParams(txt2img_preview_params)
@@ -1490,7 +1490,7 @@ def create_ui():
return gr.update(value=value), opts.dumpjson()
with gr.Blocks(analytics_enabled=False) as settings_interface:
- settings_submit = gr.Button(value="Apply settings", variant='primary')
+ settings_submit = gr.Button(value="Apply settings", variant='primary', elem_id="settings_submit")
result = gr.HTML()
settings_cols = 3
@@ -1541,8 +1541,8 @@ def create_ui():
download_localization = gr.Button(value='Download localization template', elem_id="download_localization")
with gr.Row():
- reload_script_bodies = gr.Button(value='Reload custom script bodies (No ui updates, No restart)', variant='secondary')
- restart_gradio = gr.Button(value='Restart Gradio and Refresh components (Custom Scripts, ui.py, js and css only)', variant='primary')
+ reload_script_bodies = gr.Button(value='Reload custom script bodies (No ui updates, No restart)', variant='secondary', elem_id="settings_reload_script_bodies")
+ restart_gradio = gr.Button(value='Restart Gradio and Refresh components (Custom Scripts, ui.py, js and css only)', variant='primary', elem_id="settings_restart_gradio")
request_notifications.click(
fn=lambda: None,
--
cgit v1.2.1
From 311354c0bb8930ea939d6aa6b3edd50c69301320 Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Mon, 2 Jan 2023 00:38:09 +0300
Subject: fix the issue with training on SD2.0
---
modules/sd_models.py | 2 ++
modules/textual_inversion/textual_inversion.py | 3 +--
2 files changed, 3 insertions(+), 2 deletions(-)
(limited to 'modules')
diff --git a/modules/sd_models.py b/modules/sd_models.py
index ebd4dff7..bff8d6c9 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -228,6 +228,8 @@ def load_model_weights(model, checkpoint_info, vae_file="auto"):
model.sd_model_checkpoint = checkpoint_file
model.sd_checkpoint_info = checkpoint_info
+ model.logvar = model.logvar.to(devices.device) # fix for training
+
sd_vae.delete_base_vae()
sd_vae.clear_loaded_vae()
vae_file = sd_vae.resolve_vae(checkpoint_file, vae_file=vae_file)
diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py
index 66f40367..1e5722e7 100644
--- a/modules/textual_inversion/textual_inversion.py
+++ b/modules/textual_inversion/textual_inversion.py
@@ -282,7 +282,7 @@ def train_embedding(embedding_name, learn_rate, batch_size, gradient_step, data_
return embedding, filename
scheduler = LearnRateScheduler(learn_rate, steps, initial_step)
- # dataset loading may take a while, so input validations and early returns should be done before this
+ # dataset loading may take a while, so input validations and early returns should be done before this
shared.state.textinfo = f"Preparing dataset from {html.escape(data_root)}..."
old_parallel_processing_allowed = shared.parallel_processing_allowed
@@ -310,7 +310,6 @@ def train_embedding(embedding_name, learn_rate, batch_size, gradient_step, data_
loss_step = 0
_loss_step = 0 #internal
-
last_saved_file = ""
last_saved_image = ""
forced_filename = ""
--
cgit v1.2.1
From b5819d9bf1794071139c640b5f1e72c84a0e051a Mon Sep 17 00:00:00 2001
From: Philpax
Date: Mon, 2 Jan 2023 10:17:33 +1100
Subject: feat(api): add /sdapi/v1/embeddings
---
modules/api/api.py | 8 ++++++++
modules/api/models.py | 3 +++
2 files changed, 11 insertions(+)
(limited to 'modules')
diff --git a/modules/api/api.py b/modules/api/api.py
index 11daff0d..30bf3dac 100644
--- a/modules/api/api.py
+++ b/modules/api/api.py
@@ -100,6 +100,7 @@ class Api:
self.add_api_route("/sdapi/v1/prompt-styles", self.get_prompt_styles, methods=["GET"], response_model=List[PromptStyleItem])
self.add_api_route("/sdapi/v1/artist-categories", self.get_artists_categories, methods=["GET"], response_model=List[str])
self.add_api_route("/sdapi/v1/artists", self.get_artists, methods=["GET"], response_model=List[ArtistItem])
+ self.add_api_route("/sdapi/v1/embeddings", self.get_embeddings, methods=["GET"], response_model=EmbeddingsResponse)
self.add_api_route("/sdapi/v1/refresh-checkpoints", self.refresh_checkpoints, methods=["POST"])
self.add_api_route("/sdapi/v1/create/embedding", self.create_embedding, methods=["POST"], response_model=CreateResponse)
self.add_api_route("/sdapi/v1/create/hypernetwork", self.create_hypernetwork, methods=["POST"], response_model=CreateResponse)
@@ -327,6 +328,13 @@ class Api:
def get_artists(self):
return [{"name":x[0], "score":x[1], "category":x[2]} for x in shared.artist_db.artists]
+ def get_embeddings(self):
+ db = sd_hijack.model_hijack.embedding_db
+ return {
+ "loaded": sorted(db.word_embeddings.keys()),
+ "skipped": sorted(db.skipped_embeddings),
+ }
+
def refresh_checkpoints(self):
shared.refresh_checkpoints()
diff --git a/modules/api/models.py b/modules/api/models.py
index c446ce7a..a8472dc9 100644
--- a/modules/api/models.py
+++ b/modules/api/models.py
@@ -249,3 +249,6 @@ class ArtistItem(BaseModel):
score: float = Field(title="Score")
category: str = Field(title="Category")
+class EmbeddingsResponse(BaseModel):
+ loaded: List[str] = Field(title="Loaded", description="Embeddings loaded for the current model")
+ skipped: List[str] = Field(title="Skipped", description="Embeddings skipped for the current model (likely due to architecture incompatibility)")
\ No newline at end of file
--
cgit v1.2.1
From c65909ad16a1962129114c6251de092f49479b06 Mon Sep 17 00:00:00 2001
From: Philpax
Date: Mon, 2 Jan 2023 12:21:22 +1100
Subject: feat(api): return more data for embeddings
---
modules/api/api.py | 17 +++++++++++++++--
modules/api/models.py | 11 +++++++++--
modules/textual_inversion/textual_inversion.py | 8 ++++----
3 files changed, 28 insertions(+), 8 deletions(-)
(limited to 'modules')
diff --git a/modules/api/api.py b/modules/api/api.py
index 30bf3dac..9c670f00 100644
--- a/modules/api/api.py
+++ b/modules/api/api.py
@@ -330,9 +330,22 @@ class Api:
def get_embeddings(self):
db = sd_hijack.model_hijack.embedding_db
+
+ def convert_embedding(embedding):
+ return {
+ "step": embedding.step,
+ "sd_checkpoint": embedding.sd_checkpoint,
+ "sd_checkpoint_name": embedding.sd_checkpoint_name,
+ "shape": embedding.shape,
+ "vectors": embedding.vectors,
+ }
+
+ def convert_embeddings(embeddings):
+ return {embedding.name: convert_embedding(embedding) for embedding in embeddings.values()}
+
return {
- "loaded": sorted(db.word_embeddings.keys()),
- "skipped": sorted(db.skipped_embeddings),
+ "loaded": convert_embeddings(db.word_embeddings),
+ "skipped": convert_embeddings(db.skipped_embeddings),
}
def refresh_checkpoints(self):
diff --git a/modules/api/models.py b/modules/api/models.py
index a8472dc9..4a632c68 100644
--- a/modules/api/models.py
+++ b/modules/api/models.py
@@ -249,6 +249,13 @@ class ArtistItem(BaseModel):
score: float = Field(title="Score")
category: str = Field(title="Category")
+class EmbeddingItem(BaseModel):
+ step: Optional[int] = Field(title="Step", description="The number of steps that were used to train this embedding, if available")
+ sd_checkpoint: Optional[str] = Field(title="SD Checkpoint", description="The hash of the checkpoint this embedding was trained on, if available")
+ sd_checkpoint_name: Optional[str] = Field(title="SD Checkpoint Name", description="The name of the checkpoint this embedding was trained on, if available. Note that this is the name that was used by the trainer; for a stable identifier, use `sd_checkpoint` instead")
+ shape: int = Field(title="Shape", description="The length of each individual vector in the embedding")
+ vectors: int = Field(title="Vectors", description="The number of vectors in the embedding")
+
class EmbeddingsResponse(BaseModel):
- loaded: List[str] = Field(title="Loaded", description="Embeddings loaded for the current model")
- skipped: List[str] = Field(title="Skipped", description="Embeddings skipped for the current model (likely due to architecture incompatibility)")
\ No newline at end of file
+ loaded: Dict[str, EmbeddingItem] = Field(title="Loaded", description="Embeddings loaded for the current model")
+ skipped: Dict[str, EmbeddingItem] = Field(title="Skipped", description="Embeddings skipped for the current model (likely due to architecture incompatibility)")
\ No newline at end of file
diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py
index 1e5722e7..fd253477 100644
--- a/modules/textual_inversion/textual_inversion.py
+++ b/modules/textual_inversion/textual_inversion.py
@@ -59,7 +59,7 @@ class EmbeddingDatabase:
def __init__(self, embeddings_dir):
self.ids_lookup = {}
self.word_embeddings = {}
- self.skipped_embeddings = []
+ self.skipped_embeddings = {}
self.dir_mtime = None
self.embeddings_dir = embeddings_dir
self.expected_shape = -1
@@ -91,7 +91,7 @@ class EmbeddingDatabase:
self.dir_mtime = mt
self.ids_lookup.clear()
self.word_embeddings.clear()
- self.skipped_embeddings = []
+ self.skipped_embeddings.clear()
self.expected_shape = self.get_expected_shape()
def process_file(path, filename):
@@ -136,7 +136,7 @@ class EmbeddingDatabase:
if self.expected_shape == -1 or self.expected_shape == embedding.shape:
self.register_embedding(embedding, shared.sd_model)
else:
- self.skipped_embeddings.append(name)
+ self.skipped_embeddings[name] = embedding
for fn in os.listdir(self.embeddings_dir):
try:
@@ -153,7 +153,7 @@ class EmbeddingDatabase:
print(f"Textual inversion embeddings loaded({len(self.word_embeddings)}): {', '.join(self.word_embeddings.keys())}")
if len(self.skipped_embeddings) > 0:
- print(f"Textual inversion embeddings skipped({len(self.skipped_embeddings)}): {', '.join(self.skipped_embeddings)}")
+ print(f"Textual inversion embeddings skipped({len(self.skipped_embeddings)}): {', '.join(self.skipped_embeddings.keys())}")
def find_embedding_at_position(self, tokens, offset):
token = tokens[offset]
--
cgit v1.2.1
From ef27a18b6b7cb1a8eebdc9b2e88d25baf2c2414d Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Mon, 2 Jan 2023 19:42:10 +0300
Subject: Hires fix rework
---
modules/generation_parameters_copypaste.py | 32 ++++++++++++++
modules/images.py | 24 +++++++++--
modules/processing.py | 68 ++++++++++++------------------
modules/shared.py | 7 ++-
modules/txt2img.py | 6 +--
modules/ui.py | 15 +++----
6 files changed, 95 insertions(+), 57 deletions(-)
(limited to 'modules')
diff --git a/modules/generation_parameters_copypaste.py b/modules/generation_parameters_copypaste.py
index 8e7f0df0..d6fa822b 100644
--- a/modules/generation_parameters_copypaste.py
+++ b/modules/generation_parameters_copypaste.py
@@ -1,5 +1,6 @@
import base64
import io
+import math
import os
import re
from pathlib import Path
@@ -164,6 +165,35 @@ def find_hypernetwork_key(hypernet_name, hypernet_hash=None):
return None
+def restore_old_hires_fix_params(res):
+ """for infotexts that specify old First pass size parameter, convert it into
+ width, height, and hr scale"""
+
+ firstpass_width = res.get('First pass size-1', None)
+ firstpass_height = res.get('First pass size-2', None)
+
+ if firstpass_width is None or firstpass_height is None:
+ return
+
+ firstpass_width, firstpass_height = int(firstpass_width), int(firstpass_height)
+ width = int(res.get("Size-1", 512))
+ height = int(res.get("Size-2", 512))
+
+ if firstpass_width == 0 or firstpass_height == 0:
+ # old algorithm for auto-calculating first pass size
+ desired_pixel_count = 512 * 512
+ actual_pixel_count = width * height
+ scale = math.sqrt(desired_pixel_count / actual_pixel_count)
+ firstpass_width = math.ceil(scale * width / 64) * 64
+ firstpass_height = math.ceil(scale * height / 64) * 64
+
+ hr_scale = width / firstpass_width if firstpass_width > 0 else height / firstpass_height
+
+ res['Size-1'] = firstpass_width
+ res['Size-2'] = firstpass_height
+ res['Hires upscale'] = hr_scale
+
+
def parse_generation_parameters(x: str):
"""parses generation parameters string, the one you see in text field under the picture in UI:
```
@@ -221,6 +251,8 @@ Steps: 20, Sampler: Euler a, CFG scale: 7, Seed: 965400086, Size: 512x512, Model
hypernet_hash = res.get("Hypernet hash", None)
res["Hypernet"] = find_hypernetwork_key(hypernet_name, hypernet_hash)
+ restore_old_hires_fix_params(res)
+
return res
diff --git a/modules/images.py b/modules/images.py
index f84fd485..c3a5fc8b 100644
--- a/modules/images.py
+++ b/modules/images.py
@@ -230,16 +230,32 @@ def draw_prompt_matrix(im, width, height, all_prompts):
return draw_grid_annotations(im, width, height, hor_texts, ver_texts)
-def resize_image(resize_mode, im, width, height):
+def resize_image(resize_mode, im, width, height, upscaler_name=None):
+ """
+ Resizes an image with the specified resize_mode, width, and height.
+
+ Args:
+ resize_mode: The mode to use when resizing the image.
+ 0: Resize the image to the specified width and height.
+ 1: Resize the image to fill the specified width and height, maintaining the aspect ratio, and then center the image within the dimensions, cropping the excess.
+ 2: Resize the image to fit within the specified width and height, maintaining the aspect ratio, and then center the image within the dimensions, filling empty with data from image.
+ im: The image to resize.
+ width: The width to resize the image to.
+ height: The height to resize the image to.
+ upscaler_name: The name of the upscaler to use. If not provided, defaults to opts.upscaler_for_img2img.
+ """
+
+ upscaler_name = upscaler_name or opts.upscaler_for_img2img
+
def resize(im, w, h):
- if opts.upscaler_for_img2img is None or opts.upscaler_for_img2img == "None" or im.mode == 'L':
+ if upscaler_name is None or upscaler_name == "None" or im.mode == 'L':
return im.resize((w, h), resample=LANCZOS)
scale = max(w / im.width, h / im.height)
if scale > 1.0:
- upscalers = [x for x in shared.sd_upscalers if x.name == opts.upscaler_for_img2img]
- assert len(upscalers) > 0, f"could not find upscaler named {opts.upscaler_for_img2img}"
+ upscalers = [x for x in shared.sd_upscalers if x.name == upscaler_name]
+ assert len(upscalers) > 0, f"could not find upscaler named {upscaler_name}"
upscaler = upscalers[0]
im = upscaler.scaler.upscale(im, scale, upscaler.data_path)
diff --git a/modules/processing.py b/modules/processing.py
index 42dc19ea..4654570c 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -658,14 +658,18 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
sampler = None
- def __init__(self, enable_hr: bool=False, denoising_strength: float=0.75, firstphase_width: int=0, firstphase_height: int=0, **kwargs):
+ def __init__(self, enable_hr: bool = False, denoising_strength: float = 0.75, firstphase_width: int = 0, firstphase_height: int = 0, hr_scale: float = 2.0, hr_upscaler: str = None, **kwargs):
super().__init__(**kwargs)
self.enable_hr = enable_hr
self.denoising_strength = denoising_strength
- self.firstphase_width = firstphase_width
- self.firstphase_height = firstphase_height
- self.truncate_x = 0
- self.truncate_y = 0
+ self.hr_scale = hr_scale
+ self.hr_upscaler = hr_upscaler
+
+ if firstphase_width != 0 or firstphase_height != 0:
+ print("firstphase_width/firstphase_height no longer supported; use hr_scale", file=sys.stderr)
+ self.hr_scale = self.width / firstphase_width
+ self.width = firstphase_width
+ self.height = firstphase_height
def init(self, all_prompts, all_seeds, all_subseeds):
if self.enable_hr:
@@ -674,47 +678,29 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
else:
state.job_count = state.job_count * 2
- self.extra_generation_params["First pass size"] = f"{self.firstphase_width}x{self.firstphase_height}"
-
- if self.firstphase_width == 0 or self.firstphase_height == 0:
- desired_pixel_count = 512 * 512
- actual_pixel_count = self.width * self.height
- scale = math.sqrt(desired_pixel_count / actual_pixel_count)
- self.firstphase_width = math.ceil(scale * self.width / 64) * 64
- self.firstphase_height = math.ceil(scale * self.height / 64) * 64
- firstphase_width_truncated = int(scale * self.width)
- firstphase_height_truncated = int(scale * self.height)
-
- else:
-
- width_ratio = self.width / self.firstphase_width
- height_ratio = self.height / self.firstphase_height
-
- if width_ratio > height_ratio:
- firstphase_width_truncated = self.firstphase_width
- firstphase_height_truncated = self.firstphase_width * self.height / self.width
- else:
- firstphase_width_truncated = self.firstphase_height * self.width / self.height
- firstphase_height_truncated = self.firstphase_height
-
- self.truncate_x = int(self.firstphase_width - firstphase_width_truncated) // opt_f
- self.truncate_y = int(self.firstphase_height - firstphase_height_truncated) // opt_f
+ self.extra_generation_params["Hires upscale"] = self.hr_scale
+ if self.hr_upscaler is not None:
+ self.extra_generation_params["Hires upscaler"] = self.hr_upscaler
def sample(self, conditioning, unconditional_conditioning, seeds, subseeds, subseed_strength, prompts):
self.sampler = sd_samplers.create_sampler(self.sampler_name, self.sd_model)
+ latent_scale_mode = shared.latent_upscale_modes.get(self.hr_upscaler, None) if self.hr_upscaler is not None else shared.latent_upscale_default_mode
+ if self.enable_hr and latent_scale_mode is None:
+ assert len([x for x in shared.sd_upscalers if x.name == self.hr_upscaler]) > 0, f"could not find upscaler named {self.hr_upscaler}"
+
+ x = create_random_tensors([opt_C, self.height // opt_f, self.width // opt_f], seeds=seeds, subseeds=subseeds, subseed_strength=self.subseed_strength, seed_resize_from_h=self.seed_resize_from_h, seed_resize_from_w=self.seed_resize_from_w, p=self)
+ samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning, image_conditioning=self.txt2img_image_conditioning(x))
+
if not self.enable_hr:
- x = create_random_tensors([opt_C, self.height // opt_f, self.width // opt_f], seeds=seeds, subseeds=subseeds, subseed_strength=self.subseed_strength, seed_resize_from_h=self.seed_resize_from_h, seed_resize_from_w=self.seed_resize_from_w, p=self)
- samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning, image_conditioning=self.txt2img_image_conditioning(x))
return samples
- x = create_random_tensors([opt_C, self.firstphase_height // opt_f, self.firstphase_width // opt_f], seeds=seeds, subseeds=subseeds, subseed_strength=self.subseed_strength, seed_resize_from_h=self.seed_resize_from_h, seed_resize_from_w=self.seed_resize_from_w, p=self)
- samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning, image_conditioning=self.txt2img_image_conditioning(x, self.firstphase_width, self.firstphase_height))
-
- samples = samples[:, :, self.truncate_y//2:samples.shape[2]-self.truncate_y//2, self.truncate_x//2:samples.shape[3]-self.truncate_x//2]
+ target_width = int(self.width * self.hr_scale)
+ target_height = int(self.height * self.hr_scale)
- """saves image before applying hires fix, if enabled in options; takes as an argument either an image or batch with latent space images"""
def save_intermediate(image, index):
+ """saves image before applying hires fix, if enabled in options; takes as an argument either an image or batch with latent space images"""
+
if not opts.save or self.do_not_save_samples or not opts.save_images_before_highres_fix:
return
@@ -723,11 +709,11 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
images.save_image(image, self.outpath_samples, "", seeds[index], prompts[index], opts.samples_format, suffix="-before-highres-fix")
- if opts.use_scale_latent_for_hires_fix:
+ if latent_scale_mode is not None:
for i in range(samples.shape[0]):
save_intermediate(samples, i)
- samples = torch.nn.functional.interpolate(samples, size=(self.height // opt_f, self.width // opt_f), mode="bilinear")
+ samples = torch.nn.functional.interpolate(samples, size=(target_height // opt_f, target_width // opt_f), mode=latent_scale_mode)
# Avoid making the inpainting conditioning unless necessary as
# this does need some extra compute to decode / encode the image again.
@@ -747,7 +733,7 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
save_intermediate(image, i)
- image = images.resize_image(0, image, self.width, self.height)
+ image = images.resize_image(0, image, target_width, target_height, upscaler_name=self.hr_upscaler)
image = np.array(image).astype(np.float32) / 255.0
image = np.moveaxis(image, 2, 0)
batch_images.append(image)
@@ -764,7 +750,7 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
self.sampler = sd_samplers.create_sampler(self.sampler_name, self.sd_model)
- noise = create_random_tensors(samples.shape[1:], seeds=seeds, subseeds=subseeds, subseed_strength=subseed_strength, seed_resize_from_h=self.seed_resize_from_h, seed_resize_from_w=self.seed_resize_from_w, p=self)
+ noise = create_random_tensors(samples.shape[1:], seeds=seeds, subseeds=subseeds, subseed_strength=subseed_strength, p=self)
# GC now before running the next img2img to prevent running out of memory
x = None
diff --git a/modules/shared.py b/modules/shared.py
index 7f430b93..b65559ee 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -327,7 +327,6 @@ options_templates.update(options_section(('upscaling', "Upscaling"), {
"ESRGAN_tile_overlap": OptionInfo(8, "Tile overlap, in pixels for ESRGAN upscalers. Low values = visible seam.", gr.Slider, {"minimum": 0, "maximum": 48, "step": 1}),
"realesrgan_enabled_models": OptionInfo(["R-ESRGAN 4x+", "R-ESRGAN 4x+ Anime6B"], "Select which Real-ESRGAN models to show in the web UI. (Requires restart)", gr.CheckboxGroup, lambda: {"choices": realesrgan_models_names()}),
"upscaler_for_img2img": OptionInfo(None, "Upscaler for img2img", gr.Dropdown, lambda: {"choices": [x.name for x in sd_upscalers]}),
- "use_scale_latent_for_hires_fix": OptionInfo(False, "Upscale latent space image when doing hires. fix"),
}))
options_templates.update(options_section(('face-restoration', "Face restoration"), {
@@ -545,6 +544,12 @@ opts = Options()
if os.path.exists(config_filename):
opts.load(config_filename)
+latent_upscale_default_mode = "Latent"
+latent_upscale_modes = {
+ "Latent": "bilinear",
+ "Latent (nearest)": "nearest",
+}
+
sd_upscalers = []
sd_model = None
diff --git a/modules/txt2img.py b/modules/txt2img.py
index 7f61e19a..e189a899 100644
--- a/modules/txt2img.py
+++ b/modules/txt2img.py
@@ -8,7 +8,7 @@ import modules.processing as processing
from modules.ui import plaintext_to_html
-def txt2img(prompt: str, negative_prompt: str, prompt_style: str, prompt_style2: str, steps: int, sampler_index: int, restore_faces: bool, tiling: bool, n_iter: int, batch_size: int, cfg_scale: float, seed: int, subseed: int, subseed_strength: float, seed_resize_from_h: int, seed_resize_from_w: int, seed_enable_extras: bool, height: int, width: int, enable_hr: bool, denoising_strength: float, firstphase_width: int, firstphase_height: int, *args):
+def txt2img(prompt: str, negative_prompt: str, prompt_style: str, prompt_style2: str, steps: int, sampler_index: int, restore_faces: bool, tiling: bool, n_iter: int, batch_size: int, cfg_scale: float, seed: int, subseed: int, subseed_strength: float, seed_resize_from_h: int, seed_resize_from_w: int, seed_enable_extras: bool, height: int, width: int, enable_hr: bool, denoising_strength: float, hr_scale: float, hr_upscaler: str, *args):
p = StableDiffusionProcessingTxt2Img(
sd_model=shared.sd_model,
outpath_samples=opts.outdir_samples or opts.outdir_txt2img_samples,
@@ -33,8 +33,8 @@ def txt2img(prompt: str, negative_prompt: str, prompt_style: str, prompt_style2:
tiling=tiling,
enable_hr=enable_hr,
denoising_strength=denoising_strength if enable_hr else None,
- firstphase_width=firstphase_width if enable_hr else None,
- firstphase_height=firstphase_height if enable_hr else None,
+ hr_scale=hr_scale,
+ hr_upscaler=hr_upscaler,
)
p.scripts = modules.scripts.scripts_txt2img
diff --git a/modules/ui.py b/modules/ui.py
index 7070ea15..27cd9ddd 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -684,11 +684,11 @@ def create_ui():
with gr.Row():
restore_faces = gr.Checkbox(label='Restore faces', value=False, visible=len(shared.face_restorers) > 1, elem_id="txt2img_restore_faces")
tiling = gr.Checkbox(label='Tiling', value=False, elem_id="txt2img_tiling")
- enable_hr = gr.Checkbox(label='Highres. fix', value=False, elem_id="txt2img_enable_hr")
+ enable_hr = gr.Checkbox(label='Hires. fix', value=False, elem_id="txt2img_enable_hr")
with gr.Row(visible=False) as hr_options:
- firstphase_width = gr.Slider(minimum=0, maximum=1024, step=8, label="Firstpass width", value=0, elem_id="txt2img_firstphase_width")
- firstphase_height = gr.Slider(minimum=0, maximum=1024, step=8, label="Firstpass height", value=0, elem_id="txt2img_firstphase_height")
+ hr_upscaler = gr.Dropdown(label="Upscaler", elem_id="txt2img_hr_upscaler", choices=[*shared.latent_upscale_modes, *[x.name for x in shared.sd_upscalers]], value=shared.latent_upscale_default_mode)
+ hr_scale = gr.Slider(minimum=1.0, maximum=4.0, step=0.05, label="Upscale by", value=2.0, elem_id="txt2img_hr_scale")
denoising_strength = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label='Denoising strength', value=0.7, elem_id="txt2img_denoising_strength")
with gr.Row(equal_height=True):
@@ -729,8 +729,8 @@ def create_ui():
width,
enable_hr,
denoising_strength,
- firstphase_width,
- firstphase_height,
+ hr_scale,
+ hr_upscaler,
] + custom_inputs,
outputs=[
@@ -762,7 +762,6 @@ def create_ui():
outputs=[hr_options],
)
-
txt2img_paste_fields = [
(txt2img_prompt, "Prompt"),
(txt2img_negative_prompt, "Negative prompt"),
@@ -781,8 +780,8 @@ def create_ui():
(denoising_strength, "Denoising strength"),
(enable_hr, lambda d: "Denoising strength" in d),
(hr_options, lambda d: gr.Row.update(visible="Denoising strength" in d)),
- (firstphase_width, "First pass size-1"),
- (firstphase_height, "First pass size-2"),
+ (hr_scale, "Hires upscale"),
+ (hr_upscaler, "Hires upscaler"),
*modules.scripts.scripts_txt2img.infotext_fields
]
parameters_copypaste.add_paste_fields("txt2img", None, txt2img_paste_fields)
--
cgit v1.2.1
From 4dbde228ff48dbb105241b1ed25c21ce3f87d182 Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Mon, 2 Jan 2023 20:01:16 +0300
Subject: make it possible to use fractional values for SD upscale.
---
modules/upscaler.py | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
(limited to 'modules')
diff --git a/modules/upscaler.py b/modules/upscaler.py
index c4e6e6bd..231680cb 100644
--- a/modules/upscaler.py
+++ b/modules/upscaler.py
@@ -53,10 +53,10 @@ class Upscaler:
def do_upscale(self, img: PIL.Image, selected_model: str):
return img
- def upscale(self, img: PIL.Image, scale: int, selected_model: str = None):
+ def upscale(self, img: PIL.Image, scale, selected_model: str = None):
self.scale = scale
- dest_w = img.width * scale
- dest_h = img.height * scale
+ dest_w = int(img.width * scale)
+ dest_h = int(img.height * scale)
for i in range(3):
shape = (img.width, img.height)
--
cgit v1.2.1
From 84dd7e8e2495c4fc2997e97f8267aa831eb90d11 Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Mon, 2 Jan 2023 20:30:02 +0300
Subject: error out with a readable message in chwewckpoint merger for
incompatible tensor shapes (ie when trying to merge SD1.5 with SD2.0)
---
modules/extras.py | 2 ++
modules/ui.py | 2 +-
2 files changed, 3 insertions(+), 1 deletion(-)
(limited to 'modules')
diff --git a/modules/extras.py b/modules/extras.py
index 68939dea..5e270250 100644
--- a/modules/extras.py
+++ b/modules/extras.py
@@ -303,6 +303,8 @@ def run_modelmerger(primary_model_name, secondary_model_name, tertiary_model_nam
theta_0[key][:, 0:4, :, :] = theta_func2(a[:, 0:4, :, :], b, multiplier)
result_is_inpainting_model = True
else:
+ assert a.shape == b.shape, f'Incompatible shapes for layer {key}: A is {a.shape}, and B is {b.shape}'
+
theta_0[key] = theta_func2(a, b, multiplier)
if save_as_half:
diff --git a/modules/ui.py b/modules/ui.py
index 27cd9ddd..67a51888 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -1663,7 +1663,7 @@ def create_ui():
print("Error loading/saving model file:", file=sys.stderr)
print(traceback.format_exc(), file=sys.stderr)
modules.sd_models.list_models() # to remove the potentially missing models from the list
- return ["Error loading/saving model file. It doesn't exist or the name contains illegal characters"] + [gr.Dropdown.update(choices=modules.sd_models.checkpoint_tiles()) for _ in range(3)]
+ return [f"Error merging checkpoints: {e}"] + [gr.Dropdown.update(choices=modules.sd_models.checkpoint_tiles()) for _ in range(4)]
return results
modelmerger_merge.click(
--
cgit v1.2.1
From 8d12a729b8b036cb765cf2d87576d5ae256135c8 Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Mon, 2 Jan 2023 20:46:51 +0300
Subject: fix possible error with accessing nonexistent setting
---
modules/ui.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
(limited to 'modules')
diff --git a/modules/ui.py b/modules/ui.py
index 67a51888..9350a80f 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -491,7 +491,7 @@ def apply_setting(key, value):
return
valtype = type(opts.data_labels[key].default)
- oldval = opts.data[key]
+ oldval = opts.data.get(key, None)
opts.data[key] = valtype(value) if valtype != type(None) else value
if oldval != value and opts.data_labels[key].onchange is not None:
opts.data_labels[key].onchange()
--
cgit v1.2.1
From 251ecee6949c36e9df1d99a950b3e1af2b5fa2b6 Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Mon, 2 Jan 2023 22:44:46 +0300
Subject: make "send to" buttons send actual dimension of the sent image rather
than fields
---
modules/generation_parameters_copypaste.py | 58 ++++++++++++++++++++----------
1 file changed, 40 insertions(+), 18 deletions(-)
(limited to 'modules')
diff --git a/modules/generation_parameters_copypaste.py b/modules/generation_parameters_copypaste.py
index d6fa822b..ec60319a 100644
--- a/modules/generation_parameters_copypaste.py
+++ b/modules/generation_parameters_copypaste.py
@@ -103,35 +103,57 @@ def bind_buttons(buttons, send_image, send_generate_info):
bind_list.append([buttons, send_image, send_generate_info])
+def send_image_and_dimensions(x):
+ if isinstance(x, Image.Image):
+ img = x
+ else:
+ img = image_from_url_text(x)
+
+ if shared.opts.send_size and isinstance(img, Image.Image):
+ w = img.width
+ h = img.height
+ else:
+ w = gr.update()
+ h = gr.update()
+
+ return img, w, h
+
+
def run_bind():
- for buttons, send_image, send_generate_info in bind_list:
+ for buttons, source_image_component, send_generate_info in bind_list:
for tab in buttons:
button = buttons[tab]
- if send_image and paste_fields[tab]["init_img"]:
- if type(send_image) == gr.Gallery:
- button.click(
- fn=lambda x: image_from_url_text(x),
- _js="extract_image_from_gallery",
- inputs=[send_image],
- outputs=[paste_fields[tab]["init_img"]],
- )
+ destination_image_component = paste_fields[tab]["init_img"]
+ fields = paste_fields[tab]["fields"]
+
+ destination_width_component = next(iter([field for field, name in fields if name == "Size-1"] if fields else []), None)
+ destination_height_component = next(iter([field for field, name in fields if name == "Size-2"] if fields else []), None)
+
+ if source_image_component and destination_image_component:
+ if isinstance(source_image_component, gr.Gallery):
+ func = send_image_and_dimensions if destination_width_component else image_from_url_text
+ jsfunc = "extract_image_from_gallery"
else:
- button.click(
- fn=lambda x: x,
- inputs=[send_image],
- outputs=[paste_fields[tab]["init_img"]],
- )
+ func = send_image_and_dimensions if destination_width_component else lambda x: x
+ jsfunc = None
+
+ button.click(
+ fn=func,
+ _js=jsfunc,
+ inputs=[source_image_component],
+ outputs=[destination_image_component, destination_width_component, destination_height_component] if destination_width_component else [destination_image_component],
+ )
- if send_generate_info and paste_fields[tab]["fields"] is not None:
+ if send_generate_info and fields is not None:
if send_generate_info in paste_fields:
- paste_field_names = ['Prompt', 'Negative prompt', 'Steps', 'Face restoration'] + (['Size-1', 'Size-2'] if shared.opts.send_size else []) + (["Seed"] if shared.opts.send_seed else [])
+ paste_field_names = ['Prompt', 'Negative prompt', 'Steps', 'Face restoration'] + (["Seed"] if shared.opts.send_seed else [])
button.click(
fn=lambda *x: x,
inputs=[field for field, name in paste_fields[send_generate_info]["fields"] if name in paste_field_names],
- outputs=[field for field, name in paste_fields[tab]["fields"] if name in paste_field_names],
+ outputs=[field for field, name in fields if name in paste_field_names],
)
else:
- connect_paste(button, paste_fields[tab]["fields"], send_generate_info)
+ connect_paste(button, fields, send_generate_info)
button.click(
fn=None,
--
cgit v1.2.1
From 269f6e867651cadef40d2c939a79d13291280bcd Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Tue, 3 Jan 2023 07:20:20 +0300
Subject: change settings UI to use vertical tabs
---
modules/ui.py | 45 +++++++++++++++++----------------------------
1 file changed, 17 insertions(+), 28 deletions(-)
(limited to 'modules')
diff --git a/modules/ui.py b/modules/ui.py
index 9350a80f..f8c973ba 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -1489,41 +1489,34 @@ def create_ui():
return gr.update(value=value), opts.dumpjson()
with gr.Blocks(analytics_enabled=False) as settings_interface:
- settings_submit = gr.Button(value="Apply settings", variant='primary', elem_id="settings_submit")
- result = gr.HTML()
+ with gr.Row():
+ settings_submit = gr.Button(value="Apply settings", variant='primary', elem_id="settings_submit")
+ restart_gradio = gr.Button(value='Restart UI', variant='primary', elem_id="settings_restart_gradio")
- settings_cols = 3
- items_per_col = int(len(opts.data_labels) * 0.9 / settings_cols)
+ result = gr.HTML(elem_id="settings_result")
quicksettings_names = [x.strip() for x in opts.quicksettings.split(",")]
quicksettings_names = set(x for x in quicksettings_names if x != 'quicksettings')
quicksettings_list = []
- cols_displayed = 0
- items_displayed = 0
previous_section = None
- column = None
- with gr.Row(elem_id="settings").style(equal_height=False):
+ current_tab = None
+ with gr.Tabs(elem_id="settings"):
for i, (k, item) in enumerate(opts.data_labels.items()):
section_must_be_skipped = item.section[0] is None
if previous_section != item.section and not section_must_be_skipped:
- if cols_displayed < settings_cols and (items_displayed >= items_per_col or previous_section is None):
- if column is not None:
- column.__exit__()
+ elem_id, text = item.section
- column = gr.Column(variant='panel')
- column.__enter__()
+ if current_tab is not None:
+ current_tab.__exit__()
- items_displayed = 0
- cols_displayed += 1
+ current_tab = gr.TabItem(elem_id="settings_{}".format(elem_id), label=text)
+ current_tab.__enter__()
previous_section = item.section
- elem_id, text = item.section
- gr.HTML(elem_id="settings_header_text_{}".format(elem_id), value=''.format(text))
-
if k in quicksettings_names and not shared.cmd_opts.freeze_settings:
quicksettings_list.append((i, k, item))
components.append(dummy_component)
@@ -1533,15 +1526,14 @@ def create_ui():
component = create_setting_component(k)
component_dict[k] = component
components.append(component)
- items_displayed += 1
- with gr.Row():
- request_notifications = gr.Button(value='Request browser notifications', elem_id="request_notifications")
- download_localization = gr.Button(value='Download localization template', elem_id="download_localization")
+ if current_tab is not None:
+ current_tab.__exit__()
- with gr.Row():
- reload_script_bodies = gr.Button(value='Reload custom script bodies (No ui updates, No restart)', variant='secondary', elem_id="settings_reload_script_bodies")
- restart_gradio = gr.Button(value='Restart Gradio and Refresh components (Custom Scripts, ui.py, js and css only)', variant='primary', elem_id="settings_restart_gradio")
+ with gr.TabItem("Actions"):
+ request_notifications = gr.Button(value='Request browser notifications', elem_id="request_notifications")
+ download_localization = gr.Button(value='Download localization template', elem_id="download_localization")
+ reload_script_bodies = gr.Button(value='Reload custom script bodies (No ui updates, No restart)', variant='secondary', elem_id="settings_reload_script_bodies")
request_notifications.click(
fn=lambda: None,
@@ -1578,9 +1570,6 @@ def create_ui():
outputs=[],
)
- if column is not None:
- column.__exit__()
-
interfaces = [
(txt2img_interface, "txt2img", "txt2img"),
(img2img_interface, "img2img", "img2img"),
--
cgit v1.2.1
From 18c03cdeac6272734b0c09afd3fbe47d1372dd07 Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Tue, 3 Jan 2023 09:04:29 +0300
Subject: styling rework to make things more compact
---
modules/ui.py | 121 ++++++++++++++++++++++++-----------------------
modules/ui_components.py | 7 +++
2 files changed, 68 insertions(+), 60 deletions(-)
(limited to 'modules')
diff --git a/modules/ui.py b/modules/ui.py
index f8c973ba..f787b518 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -19,7 +19,8 @@ import numpy as np
from PIL import Image, PngImagePlugin
from modules.call_queue import wrap_gradio_gpu_call, wrap_queued_call, wrap_gradio_call
-from modules import sd_hijack, sd_models, localization, script_callbacks, ui_extensions, deepbooru, ui_components
+from modules import sd_hijack, sd_models, localization, script_callbacks, ui_extensions, deepbooru
+from modules.ui_components import FormRow, FormGroup, ToolButton
from modules.paths import script_path
from modules.shared import opts, cmd_opts, restricted_opts
@@ -273,31 +274,27 @@ def interrogate_deepbooru(image):
def create_seed_inputs(target_interface):
- with gr.Row():
- with gr.Box():
- with gr.Row(elem_id=target_interface + '_seed_row'):
- seed = (gr.Textbox if cmd_opts.use_textbox_seed else gr.Number)(label='Seed', value=-1, elem_id=target_interface + '_seed')
- seed.style(container=False)
- random_seed = gr.Button(random_symbol, elem_id=target_interface + '_random_seed')
- reuse_seed = gr.Button(reuse_symbol, elem_id=target_interface + '_reuse_seed')
-
- with gr.Box(elem_id=target_interface + '_subseed_show_box'):
+ with FormRow(elem_id=target_interface + '_seed_row'):
+ seed = (gr.Textbox if cmd_opts.use_textbox_seed else gr.Number)(label='Seed', value=-1, elem_id=target_interface + '_seed')
+ seed.style(container=False)
+ random_seed = gr.Button(random_symbol, elem_id=target_interface + '_random_seed')
+ reuse_seed = gr.Button(reuse_symbol, elem_id=target_interface + '_reuse_seed')
+
+ with gr.Group(elem_id=target_interface + '_subseed_show_box'):
seed_checkbox = gr.Checkbox(label='Extra', elem_id=target_interface + '_subseed_show', value=False)
# Components to show/hide based on the 'Extra' checkbox
seed_extras = []
- with gr.Row(visible=False) as seed_extra_row_1:
+ with FormRow(visible=False, elem_id=target_interface + '_subseed_row') as seed_extra_row_1:
seed_extras.append(seed_extra_row_1)
- with gr.Box():
- with gr.Row(elem_id=target_interface + '_subseed_row'):
- subseed = gr.Number(label='Variation seed', value=-1, elem_id=target_interface + '_subseed')
- subseed.style(container=False)
- random_subseed = gr.Button(random_symbol, elem_id=target_interface + '_random_subseed')
- reuse_subseed = gr.Button(reuse_symbol, elem_id=target_interface + '_reuse_subseed')
+ subseed = gr.Number(label='Variation seed', value=-1, elem_id=target_interface + '_subseed')
+ subseed.style(container=False)
+ random_subseed = gr.Button(random_symbol, elem_id=target_interface + '_random_subseed')
+ reuse_subseed = gr.Button(reuse_symbol, elem_id=target_interface + '_reuse_subseed')
subseed_strength = gr.Slider(label='Variation strength', value=0.0, minimum=0, maximum=1, step=0.01, elem_id=target_interface + '_subseed_strength')
- with gr.Row(visible=False) as seed_extra_row_2:
+ with FormRow(visible=False) as seed_extra_row_2:
seed_extras.append(seed_extra_row_2)
seed_resize_from_w = gr.Slider(minimum=0, maximum=2048, step=8, label="Resize seed from width", value=0, elem_id=target_interface + '_seed_resize_from_w')
seed_resize_from_h = gr.Slider(minimum=0, maximum=2048, step=8, label="Resize seed from height", value=0, elem_id=target_interface + '_seed_resize_from_h')
@@ -523,7 +520,7 @@ def create_refresh_button(refresh_component, refresh_method, refreshed_args, ele
return gr.update(**(args or {}))
- refresh_button = ui_components.ToolButton(value=refresh_symbol, elem_id=elem_id)
+ refresh_button = ToolButton(value=refresh_symbol, elem_id=elem_id)
refresh_button.click(
fn=refresh,
inputs=[],
@@ -636,11 +633,11 @@ Requested path was: {f}
def create_sampler_and_steps_selection(choices, tabname):
if opts.samplers_in_dropdown:
- with gr.Row(elem_id=f"sampler_selection_{tabname}"):
+ with FormRow(elem_id=f"sampler_selection_{tabname}"):
sampler_index = gr.Dropdown(label='Sampling method', elem_id=f"{tabname}_sampling", choices=[x.name for x in choices], value=choices[0].name, type="index")
steps = gr.Slider(minimum=1, maximum=150, step=1, elem_id=f"{tabname}_steps", label="Sampling Steps", value=20)
else:
- with gr.Group(elem_id=f"sampler_selection_{tabname}"):
+ with FormGroup(elem_id=f"sampler_selection_{tabname}"):
steps = gr.Slider(minimum=1, maximum=150, step=1, elem_id=f"{tabname}_steps", label="Sampling Steps", value=20)
sampler_index = gr.Radio(label='Sampling method', elem_id=f"{tabname}_sampling", choices=[x.name for x in choices], value=choices[0].name, type="index")
@@ -677,29 +674,29 @@ def create_ui():
with gr.Column(variant='panel', elem_id="txt2img_settings"):
steps, sampler_index = create_sampler_and_steps_selection(samplers, "txt2img")
- with gr.Group():
- width = gr.Slider(minimum=64, maximum=2048, step=8, label="Width", value=512, elem_id="txt2img_width")
- height = gr.Slider(minimum=64, maximum=2048, step=8, label="Height", value=512, elem_id="txt2img_height")
+ with FormRow():
+ with gr.Column(elem_id="txt2img_column_size", scale=4):
+ width = gr.Slider(minimum=64, maximum=2048, step=8, label="Width", value=512, elem_id="txt2img_width")
+ height = gr.Slider(minimum=64, maximum=2048, step=8, label="Height", value=512, elem_id="txt2img_height")
+ with gr.Column(elem_id="txt2img_column_batch"):
+ batch_count = gr.Slider(minimum=1, step=1, label='Batch count', value=1, elem_id="txt2img_batch_count")
+ batch_size = gr.Slider(minimum=1, maximum=8, step=1, label='Batch size', value=1, elem_id="txt2img_batch_size")
- with gr.Row():
+ cfg_scale = gr.Slider(minimum=1.0, maximum=30.0, step=0.5, label='CFG Scale', value=7.0, elem_id="txt2img_cfg_scale")
+
+ seed, reuse_seed, subseed, reuse_subseed, subseed_strength, seed_resize_from_h, seed_resize_from_w, seed_checkbox = create_seed_inputs('txt2img')
+
+ with FormRow(elem_id="txt2img_checkboxes"):
restore_faces = gr.Checkbox(label='Restore faces', value=False, visible=len(shared.face_restorers) > 1, elem_id="txt2img_restore_faces")
tiling = gr.Checkbox(label='Tiling', value=False, elem_id="txt2img_tiling")
enable_hr = gr.Checkbox(label='Hires. fix', value=False, elem_id="txt2img_enable_hr")
- with gr.Row(visible=False) as hr_options:
+ with FormRow(visible=False) as hr_options:
hr_upscaler = gr.Dropdown(label="Upscaler", elem_id="txt2img_hr_upscaler", choices=[*shared.latent_upscale_modes, *[x.name for x in shared.sd_upscalers]], value=shared.latent_upscale_default_mode)
hr_scale = gr.Slider(minimum=1.0, maximum=4.0, step=0.05, label="Upscale by", value=2.0, elem_id="txt2img_hr_scale")
denoising_strength = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label='Denoising strength', value=0.7, elem_id="txt2img_denoising_strength")
- with gr.Row(equal_height=True):
- batch_count = gr.Slider(minimum=1, step=1, label='Batch count', value=1, elem_id="txt2img_batch_count")
- batch_size = gr.Slider(minimum=1, maximum=8, step=1, label='Batch size', value=1, elem_id="txt2img_batch_size")
-
- cfg_scale = gr.Slider(minimum=1.0, maximum=30.0, step=0.5, label='CFG Scale', value=7.0, elem_id="txt2img_cfg_scale")
-
- seed, reuse_seed, subseed, reuse_subseed, subseed_strength, seed_resize_from_h, seed_resize_from_w, seed_checkbox = create_seed_inputs('txt2img')
-
- with gr.Group(elem_id="txt2img_script_container"):
+ with FormGroup(elem_id="txt2img_script_container"):
custom_inputs = modules.scripts.scripts_txt2img.setup_ui()
txt2img_gallery, generation_info, html_info, html_log = create_output_panel("txt2img", opts.outdir_txt2img_samples)
@@ -816,7 +813,7 @@ def create_ui():
img2img_preview = gr.Image(elem_id='img2img_preview', visible=False)
setup_progressbar(progressbar, img2img_preview, 'img2img')
- with gr.Row().style(equal_height=False):
+ with FormRow().style(equal_height=False):
with gr.Column(variant='panel', elem_id="img2img_settings"):
with gr.Tabs(elem_id="mode_img2img") as tabs_img2img_mode:
@@ -841,19 +838,23 @@ def create_ui():
init_img_inpaint = gr.Image(label="Image for img2img", show_label=False, source="upload", interactive=True, type="pil", visible=False, elem_id="img_inpaint_base")
init_mask_inpaint = gr.Image(label="Mask", source="upload", interactive=True, type="pil", visible=False, elem_id="img_inpaint_mask")
- with gr.Row():
+ with FormRow():
mask_blur = gr.Slider(label='Mask blur', minimum=0, maximum=64, step=1, value=4, elem_id="img2img_mask_blur")
mask_alpha = gr.Slider(label="Mask transparency", interactive=use_color_sketch, visible=use_color_sketch, elem_id="img2img_mask_alpha")
- with gr.Row():
- mask_mode = gr.Radio(label="Mask mode", show_label=False, choices=["Draw mask", "Upload mask"], type="index", value="Draw mask", elem_id="mask_mode")
- inpainting_mask_invert = gr.Radio(label='Masking mode', show_label=False, choices=['Inpaint masked', 'Inpaint not masked'], value='Inpaint masked', type="index", elem_id="img2img_mask_mode")
+ with FormRow():
+ mask_mode = gr.Radio(label="Mask source", choices=["Draw mask", "Upload mask"], type="index", value="Draw mask", elem_id="mask_mode")
+ inpainting_mask_invert = gr.Radio(label='Mask mode', choices=['Inpaint masked', 'Inpaint not masked'], value='Inpaint masked', type="index", elem_id="img2img_mask_mode")
- inpainting_fill = gr.Radio(label='Masked content', choices=['fill', 'original', 'latent noise', 'latent nothing'], value='original', type="index", elem_id="img2img_inpainting_fill")
+ with FormRow():
+ inpainting_fill = gr.Radio(label='Masked content', choices=['fill', 'original', 'latent noise', 'latent nothing'], value='original', type="index", elem_id="img2img_inpainting_fill")
- with gr.Row():
- inpaint_full_res = gr.Checkbox(label='Inpaint at full resolution', value=False, elem_id="img2img_inpaint_full_res")
- inpaint_full_res_padding = gr.Slider(label='Inpaint at full resolution padding, pixels', minimum=0, maximum=256, step=4, value=32, elem_id="img2img_inpaint_full_res_padding")
+ with FormRow():
+ with gr.Column():
+ inpaint_full_res = gr.Radio(label="Inpaint area", choices=["Whole picture", "Only masked"], type="index", value="Whole picture", elem_id="img2img_inpaint_full_res")
+
+ with gr.Column(scale=4):
+ inpaint_full_res_padding = gr.Slider(label='Only masked padding, pixels', minimum=0, maximum=256, step=4, value=32, elem_id="img2img_inpaint_full_res_padding")
with gr.TabItem('Batch img2img', id='batch', elem_id="img2img_batch_tab"):
hidden = ' Disabled when launched with --hide-ui-dir-config.' if shared.cmd_opts.hide_ui_dir_config else ''
@@ -861,30 +862,30 @@ def create_ui():
img2img_batch_input_dir = gr.Textbox(label="Input directory", **shared.hide_dirs, elem_id="img2img_batch_input_dir")
img2img_batch_output_dir = gr.Textbox(label="Output directory", **shared.hide_dirs, elem_id="img2img_batch_output_dir")
- with gr.Row():
- resize_mode = gr.Radio(label="Resize mode", elem_id="resize_mode", show_label=False, choices=["Just resize", "Crop and resize", "Resize and fill", "Just resize (latent upscale)"], type="index", value="Just resize")
+ with FormRow():
+ resize_mode = gr.Radio(label="Resize mode", elem_id="resize_mode", choices=["Just resize", "Crop and resize", "Resize and fill", "Just resize (latent upscale)"], type="index", value="Just resize")
steps, sampler_index = create_sampler_and_steps_selection(samplers_for_img2img, "img2img")
- with gr.Group():
- width = gr.Slider(minimum=64, maximum=2048, step=8, label="Width", value=512, elem_id="img2img_width")
- height = gr.Slider(minimum=64, maximum=2048, step=8, label="Height", value=512, elem_id="img2img_height")
-
- with gr.Row():
- restore_faces = gr.Checkbox(label='Restore faces', value=False, visible=len(shared.face_restorers) > 1, elem_id="img2img_restore_faces")
- tiling = gr.Checkbox(label='Tiling', value=False, elem_id="img2img_tiling")
+ with FormRow():
+ with gr.Column(elem_id="img2img_column_size", scale=4):
+ width = gr.Slider(minimum=64, maximum=2048, step=8, label="Width", value=512, elem_id="img2img_width")
+ height = gr.Slider(minimum=64, maximum=2048, step=8, label="Height", value=512, elem_id="img2img_height")
+ with gr.Column(elem_id="img2img_column_batch"):
+ batch_count = gr.Slider(minimum=1, step=1, label='Batch count', value=1, elem_id="img2img_batch_count")
+ batch_size = gr.Slider(minimum=1, maximum=8, step=1, label='Batch size', value=1, elem_id="img2img_batch_size")
- with gr.Row():
- batch_count = gr.Slider(minimum=1, step=1, label='Batch count', value=1, elem_id="img2img_batch_count")
- batch_size = gr.Slider(minimum=1, maximum=8, step=1, label='Batch size', value=1, elem_id="img2img_batch_size")
-
- with gr.Group():
+ with FormGroup():
cfg_scale = gr.Slider(minimum=1.0, maximum=30.0, step=0.5, label='CFG Scale', value=7.0, elem_id="img2img_cfg_scale")
denoising_strength = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label='Denoising strength', value=0.75, elem_id="img2img_denoising_strength")
seed, reuse_seed, subseed, reuse_subseed, subseed_strength, seed_resize_from_h, seed_resize_from_w, seed_checkbox = create_seed_inputs('img2img')
- with gr.Group(elem_id="img2img_script_container"):
+ with FormRow(elem_id="img2img_checkboxes"):
+ restore_faces = gr.Checkbox(label='Restore faces', value=False, visible=len(shared.face_restorers) > 1, elem_id="img2img_restore_faces")
+ tiling = gr.Checkbox(label='Tiling', value=False, elem_id="img2img_tiling")
+
+ with FormGroup(elem_id="img2img_script_container"):
custom_inputs = modules.scripts.scripts_img2img.setup_ui()
img2img_gallery, generation_info, html_info, html_log = create_output_panel("img2img", opts.outdir_img2img_samples)
@@ -1444,7 +1445,7 @@ def create_ui():
res = comp(label=info.label, value=fun(), elem_id=elem_id, **(args or {}))
create_refresh_button(res, info.refresh, info.component_args, "refresh_" + key)
else:
- with ui_components.FormRow():
+ with FormRow():
res = comp(label=info.label, value=fun(), elem_id=elem_id, **(args or {}))
create_refresh_button(res, info.refresh, info.component_args, "refresh_" + key)
else:
diff --git a/modules/ui_components.py b/modules/ui_components.py
index d0519d2d..91eb0e3d 100644
--- a/modules/ui_components.py
+++ b/modules/ui_components.py
@@ -16,3 +16,10 @@ class FormRow(gr.Row, gr.components.FormComponent):
def get_block_name(self):
return "row"
+
+
+class FormGroup(gr.Group, gr.components.FormComponent):
+ """Same as gr.Row but fits inside gradio forms"""
+
+ def get_block_name(self):
+ return "group"
--
cgit v1.2.1
From 2bc86712ec16cada01a2353f1d978c1aabc84dbb Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Tue, 3 Jan 2023 09:13:35 +0300
Subject: make quicksettings UI elements appear in same order as they are
listed in the setting
---
modules/ui.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
(limited to 'modules')
diff --git a/modules/ui.py b/modules/ui.py
index f787b518..d7b911da 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -1497,7 +1497,7 @@ def create_ui():
result = gr.HTML(elem_id="settings_result")
quicksettings_names = [x.strip() for x in opts.quicksettings.split(",")]
- quicksettings_names = set(x for x in quicksettings_names if x != 'quicksettings')
+ quicksettings_names = {x: i for i, x in enumerate(quicksettings_names) if x != 'quicksettings'}
quicksettings_list = []
@@ -1604,7 +1604,7 @@ def create_ui():
with gr.Blocks(css=css, analytics_enabled=False, title="Stable Diffusion") as demo:
with gr.Row(elem_id="quicksettings"):
- for i, k, item in quicksettings_list:
+ for i, k, item in sorted(quicksettings_list, key=lambda x: quicksettings_names.get(x[1], x[0])):
component = create_setting_component(k, is_quicksettings=True)
component_dict[k] = component
--
cgit v1.2.1
From 9d4eff097deff6153c4023f158bd9fbd4f3e88b3 Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Tue, 3 Jan 2023 10:01:06 +0300
Subject: add a button to show all setting pages
---
modules/ui.py | 2 ++
1 file changed, 2 insertions(+)
(limited to 'modules')
diff --git a/modules/ui.py b/modules/ui.py
index d7b911da..2c92c422 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -1536,6 +1536,8 @@ def create_ui():
download_localization = gr.Button(value='Download localization template', elem_id="download_localization")
reload_script_bodies = gr.Button(value='Reload custom script bodies (No ui updates, No restart)', variant='secondary', elem_id="settings_reload_script_bodies")
+ gr.Button(value="Show all pages", elem_id="settings_show_all_pages")
+
request_notifications.click(
fn=lambda: None,
inputs=[],
--
cgit v1.2.1
From a1cf55a9d1c82f8e56c00d549bca5c8fa069f412 Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Tue, 3 Jan 2023 10:39:21 +0300
Subject: add option to reorder items in main UI
---
modules/shared.py | 13 ++++++
modules/ui.py | 130 +++++++++++++++++++++++++++++++++++-------------------
2 files changed, 97 insertions(+), 46 deletions(-)
(limited to 'modules')
diff --git a/modules/shared.py b/modules/shared.py
index b65559ee..23657a93 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -109,6 +109,17 @@ restricted_opts = {
"outdir_save",
}
+ui_reorder_categories = [
+ "sampler",
+ "dimensions",
+ "cfg",
+ "seed",
+ "checkboxes",
+ "hires_fix",
+ "batch",
+ "scripts",
+]
+
cmd_opts.disable_extension_access = (cmd_opts.share or cmd_opts.listen or cmd_opts.server_name) and not cmd_opts.enable_insecure_extension_access
devices.device, devices.device_interrogate, devices.device_gfpgan, devices.device_esrgan, devices.device_codeformer = \
@@ -410,7 +421,9 @@ options_templates.update(options_section(('ui', "User interface"), {
"js_modal_lightbox_initially_zoomed": OptionInfo(True, "Show images zoomed in by default in full page image viewer"),
"show_progress_in_title": OptionInfo(True, "Show generation progress in window title."),
"samplers_in_dropdown": OptionInfo(True, "Use dropdown for sampler selection instead of radio group"),
+ "dimensions_and_batch_together": OptionInfo(True, "Show Witdth/Height and Batch sliders in same row"),
'quicksettings': OptionInfo("sd_model_checkpoint", "Quicksettings list"),
+ 'ui_reorder': OptionInfo(", ".join(ui_reorder_categories), "txt2img/ing2img UI item order"),
'localization': OptionInfo("None", "Localization (requires restart)", gr.Dropdown, lambda: {"choices": ["None"] + list(localization.localizations.keys())}, refresh=lambda: localization.list_localizations(cmd_opts.localizations_dir)),
}))
diff --git a/modules/ui.py b/modules/ui.py
index 2c92c422..f2e7c0d6 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -644,6 +644,13 @@ def create_sampler_and_steps_selection(choices, tabname):
return steps, sampler_index
+def ordered_ui_categories():
+ user_order = {x.strip(): i for i, x in enumerate(shared.opts.ui_reorder.split(","))}
+
+ for i, category in sorted(enumerate(shared.ui_reorder_categories), key=lambda x: user_order.get(x[1], x[0] + 1000)):
+ yield category
+
+
def create_ui():
import modules.img2img
import modules.txt2img
@@ -672,32 +679,48 @@ def create_ui():
with gr.Row().style(equal_height=False):
with gr.Column(variant='panel', elem_id="txt2img_settings"):
- steps, sampler_index = create_sampler_and_steps_selection(samplers, "txt2img")
-
- with FormRow():
- with gr.Column(elem_id="txt2img_column_size", scale=4):
- width = gr.Slider(minimum=64, maximum=2048, step=8, label="Width", value=512, elem_id="txt2img_width")
- height = gr.Slider(minimum=64, maximum=2048, step=8, label="Height", value=512, elem_id="txt2img_height")
- with gr.Column(elem_id="txt2img_column_batch"):
- batch_count = gr.Slider(minimum=1, step=1, label='Batch count', value=1, elem_id="txt2img_batch_count")
- batch_size = gr.Slider(minimum=1, maximum=8, step=1, label='Batch size', value=1, elem_id="txt2img_batch_size")
-
- cfg_scale = gr.Slider(minimum=1.0, maximum=30.0, step=0.5, label='CFG Scale', value=7.0, elem_id="txt2img_cfg_scale")
-
- seed, reuse_seed, subseed, reuse_subseed, subseed_strength, seed_resize_from_h, seed_resize_from_w, seed_checkbox = create_seed_inputs('txt2img')
-
- with FormRow(elem_id="txt2img_checkboxes"):
- restore_faces = gr.Checkbox(label='Restore faces', value=False, visible=len(shared.face_restorers) > 1, elem_id="txt2img_restore_faces")
- tiling = gr.Checkbox(label='Tiling', value=False, elem_id="txt2img_tiling")
- enable_hr = gr.Checkbox(label='Hires. fix', value=False, elem_id="txt2img_enable_hr")
+ for category in ordered_ui_categories():
+ if category == "sampler":
+ steps, sampler_index = create_sampler_and_steps_selection(samplers, "txt2img")
- with FormRow(visible=False) as hr_options:
- hr_upscaler = gr.Dropdown(label="Upscaler", elem_id="txt2img_hr_upscaler", choices=[*shared.latent_upscale_modes, *[x.name for x in shared.sd_upscalers]], value=shared.latent_upscale_default_mode)
- hr_scale = gr.Slider(minimum=1.0, maximum=4.0, step=0.05, label="Upscale by", value=2.0, elem_id="txt2img_hr_scale")
- denoising_strength = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label='Denoising strength', value=0.7, elem_id="txt2img_denoising_strength")
-
- with FormGroup(elem_id="txt2img_script_container"):
- custom_inputs = modules.scripts.scripts_txt2img.setup_ui()
+ elif category == "dimensions":
+ with FormRow():
+ with gr.Column(elem_id="txt2img_column_size", scale=4):
+ width = gr.Slider(minimum=64, maximum=2048, step=8, label="Width", value=512, elem_id="txt2img_width")
+ height = gr.Slider(minimum=64, maximum=2048, step=8, label="Height", value=512, elem_id="txt2img_height")
+
+ if opts.dimensions_and_batch_together:
+ with gr.Column(elem_id="txt2img_column_batch"):
+ batch_count = gr.Slider(minimum=1, step=1, label='Batch count', value=1, elem_id="txt2img_batch_count")
+ batch_size = gr.Slider(minimum=1, maximum=8, step=1, label='Batch size', value=1, elem_id="txt2img_batch_size")
+
+ elif category == "cfg":
+ cfg_scale = gr.Slider(minimum=1.0, maximum=30.0, step=0.5, label='CFG Scale', value=7.0, elem_id="txt2img_cfg_scale")
+
+ elif category == "seed":
+ seed, reuse_seed, subseed, reuse_subseed, subseed_strength, seed_resize_from_h, seed_resize_from_w, seed_checkbox = create_seed_inputs('txt2img')
+
+ elif category == "checkboxes":
+ with FormRow(elem_id="txt2img_checkboxes"):
+ restore_faces = gr.Checkbox(label='Restore faces', value=False, visible=len(shared.face_restorers) > 1, elem_id="txt2img_restore_faces")
+ tiling = gr.Checkbox(label='Tiling', value=False, elem_id="txt2img_tiling")
+ enable_hr = gr.Checkbox(label='Hires. fix', value=False, elem_id="txt2img_enable_hr")
+
+ elif category == "hires_fix":
+ with FormRow(visible=False, elem_id="txt2img_hires_fix") as hr_options:
+ hr_upscaler = gr.Dropdown(label="Upscaler", elem_id="txt2img_hr_upscaler", choices=[*shared.latent_upscale_modes, *[x.name for x in shared.sd_upscalers]], value=shared.latent_upscale_default_mode)
+ hr_scale = gr.Slider(minimum=1.0, maximum=4.0, step=0.05, label="Upscale by", value=2.0, elem_id="txt2img_hr_scale")
+ denoising_strength = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label='Denoising strength', value=0.7, elem_id="txt2img_denoising_strength")
+
+ elif category == "batch":
+ if not opts.dimensions_and_batch_together:
+ with FormRow(elem_id="txt2img_column_batch"):
+ batch_count = gr.Slider(minimum=1, step=1, label='Batch count', value=1, elem_id="txt2img_batch_count")
+ batch_size = gr.Slider(minimum=1, maximum=8, step=1, label='Batch size', value=1, elem_id="txt2img_batch_size")
+
+ elif category == "scripts":
+ with FormGroup(elem_id="txt2img_script_container"):
+ custom_inputs = modules.scripts.scripts_txt2img.setup_ui()
txt2img_gallery, generation_info, html_info, html_log = create_output_panel("txt2img", opts.outdir_txt2img_samples)
parameters_copypaste.bind_buttons({"txt2img": txt2img_paste}, None, txt2img_prompt)
@@ -865,28 +888,43 @@ def create_ui():
with FormRow():
resize_mode = gr.Radio(label="Resize mode", elem_id="resize_mode", choices=["Just resize", "Crop and resize", "Resize and fill", "Just resize (latent upscale)"], type="index", value="Just resize")
- steps, sampler_index = create_sampler_and_steps_selection(samplers_for_img2img, "img2img")
-
- with FormRow():
- with gr.Column(elem_id="img2img_column_size", scale=4):
- width = gr.Slider(minimum=64, maximum=2048, step=8, label="Width", value=512, elem_id="img2img_width")
- height = gr.Slider(minimum=64, maximum=2048, step=8, label="Height", value=512, elem_id="img2img_height")
- with gr.Column(elem_id="img2img_column_batch"):
- batch_count = gr.Slider(minimum=1, step=1, label='Batch count', value=1, elem_id="img2img_batch_count")
- batch_size = gr.Slider(minimum=1, maximum=8, step=1, label='Batch size', value=1, elem_id="img2img_batch_size")
-
- with FormGroup():
- cfg_scale = gr.Slider(minimum=1.0, maximum=30.0, step=0.5, label='CFG Scale', value=7.0, elem_id="img2img_cfg_scale")
- denoising_strength = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label='Denoising strength', value=0.75, elem_id="img2img_denoising_strength")
-
- seed, reuse_seed, subseed, reuse_subseed, subseed_strength, seed_resize_from_h, seed_resize_from_w, seed_checkbox = create_seed_inputs('img2img')
+ for category in ordered_ui_categories():
+ if category == "sampler":
+ steps, sampler_index = create_sampler_and_steps_selection(samplers_for_img2img, "img2img")
- with FormRow(elem_id="img2img_checkboxes"):
- restore_faces = gr.Checkbox(label='Restore faces', value=False, visible=len(shared.face_restorers) > 1, elem_id="img2img_restore_faces")
- tiling = gr.Checkbox(label='Tiling', value=False, elem_id="img2img_tiling")
-
- with FormGroup(elem_id="img2img_script_container"):
- custom_inputs = modules.scripts.scripts_img2img.setup_ui()
+ elif category == "dimensions":
+ with FormRow():
+ with gr.Column(elem_id="img2img_column_size", scale=4):
+ width = gr.Slider(minimum=64, maximum=2048, step=8, label="Width", value=512, elem_id="img2img_width")
+ height = gr.Slider(minimum=64, maximum=2048, step=8, label="Height", value=512, elem_id="img2img_height")
+
+ if opts.dimensions_and_batch_together:
+ with gr.Column(elem_id="img2img_column_batch"):
+ batch_count = gr.Slider(minimum=1, step=1, label='Batch count', value=1, elem_id="img2img_batch_count")
+ batch_size = gr.Slider(minimum=1, maximum=8, step=1, label='Batch size', value=1, elem_id="img2img_batch_size")
+
+ elif category == "cfg":
+ with FormGroup():
+ cfg_scale = gr.Slider(minimum=1.0, maximum=30.0, step=0.5, label='CFG Scale', value=7.0, elem_id="img2img_cfg_scale")
+ denoising_strength = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label='Denoising strength', value=0.75, elem_id="img2img_denoising_strength")
+
+ elif category == "seed":
+ seed, reuse_seed, subseed, reuse_subseed, subseed_strength, seed_resize_from_h, seed_resize_from_w, seed_checkbox = create_seed_inputs('img2img')
+
+ elif category == "checkboxes":
+ with FormRow(elem_id="img2img_checkboxes"):
+ restore_faces = gr.Checkbox(label='Restore faces', value=False, visible=len(shared.face_restorers) > 1, elem_id="img2img_restore_faces")
+ tiling = gr.Checkbox(label='Tiling', value=False, elem_id="img2img_tiling")
+
+ elif category == "batch":
+ if not opts.dimensions_and_batch_together:
+ with FormRow(elem_id="img2img_column_batch"):
+ batch_count = gr.Slider(minimum=1, step=1, label='Batch count', value=1, elem_id="img2img_batch_count")
+ batch_size = gr.Slider(minimum=1, maximum=8, step=1, label='Batch size', value=1, elem_id="img2img_batch_size")
+
+ elif category == "scripts":
+ with FormGroup(elem_id="img2img_script_container"):
+ custom_inputs = modules.scripts.scripts_img2img.setup_ui()
img2img_gallery, generation_info, html_info, html_log = create_output_panel("img2img", opts.outdir_img2img_samples)
parameters_copypaste.bind_buttons({"img2img": img2img_paste}, None, img2img_prompt)
--
cgit v1.2.1
From c0ee1488702d5a6ae35fbf7e0422f9f685394920 Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Tue, 3 Jan 2023 14:18:48 +0300
Subject: add support for running with gradio 3.9 installed
---
modules/generation_parameters_copypaste.py | 4 ++--
modules/ui_tempdir.py | 23 +++++++++++++++++++++--
2 files changed, 23 insertions(+), 4 deletions(-)
(limited to 'modules')
diff --git a/modules/generation_parameters_copypaste.py b/modules/generation_parameters_copypaste.py
index ec60319a..d94f11a3 100644
--- a/modules/generation_parameters_copypaste.py
+++ b/modules/generation_parameters_copypaste.py
@@ -7,7 +7,7 @@ from pathlib import Path
import gradio as gr
from modules.shared import script_path
-from modules import shared
+from modules import shared, ui_tempdir
import tempfile
from PIL import Image
@@ -39,7 +39,7 @@ def quote(text):
def image_from_url_text(filedata):
if type(filedata) == dict and filedata["is_file"]:
filename = filedata["name"]
- is_in_right_dir = any([filename in fileset for fileset in shared.demo.temp_file_sets])
+ is_in_right_dir = ui_tempdir.check_tmp_file(shared.demo, filename)
assert is_in_right_dir, 'trying to open image file outside of allowed directories'
return Image.open(filename)
diff --git a/modules/ui_tempdir.py b/modules/ui_tempdir.py
index 363d449d..21945235 100644
--- a/modules/ui_tempdir.py
+++ b/modules/ui_tempdir.py
@@ -1,6 +1,7 @@
import os
import tempfile
from collections import namedtuple
+from pathlib import Path
import gradio as gr
@@ -12,10 +13,28 @@ from modules import shared
Savedfile = namedtuple("Savedfile", ["name"])
+def register_tmp_file(gradio, filename):
+ if hasattr(gradio, 'temp_file_sets'): # gradio 3.15
+ gradio.temp_file_sets[0] = gradio.temp_file_sets[0] | {os.path.abspath(filename)}
+
+ if hasattr(gradio, 'temp_dirs'): # gradio 3.9
+ gradio.temp_dirs = gradio.temp_dirs | {os.path.abspath(os.path.dirname(filename))}
+
+
+def check_tmp_file(gradio, filename):
+ if hasattr(gradio, 'temp_file_sets'):
+ return any([filename in fileset for fileset in gradio.temp_file_sets])
+
+ if hasattr(gradio, 'temp_dirs'):
+ return any(Path(temp_dir).resolve() in Path(filename).resolve().parents for temp_dir in gradio.temp_dirs)
+
+ return False
+
+
def save_pil_to_file(pil_image, dir=None):
already_saved_as = getattr(pil_image, 'already_saved_as', None)
if already_saved_as and os.path.isfile(already_saved_as):
- shared.demo.temp_file_sets[0] = shared.demo.temp_file_sets[0] | {os.path.abspath(already_saved_as)}
+ register_tmp_file(shared.demo, already_saved_as)
file_obj = Savedfile(already_saved_as)
return file_obj
@@ -45,7 +64,7 @@ def on_tmpdir_changed():
os.makedirs(shared.opts.temp_dir, exist_ok=True)
- shared.demo.temp_file_sets[0] = shared.demo.temp_file_sets[0] | {os.path.abspath(shared.opts.temp_dir)}
+ register_tmp_file(shared.demo, os.path.join(shared.opts.temp_dir, "x"))
def cleanup_tmpdr():
--
cgit v1.2.1
From bddebe09edeb6a18f2c06986d5658a7be3a563ea Mon Sep 17 00:00:00 2001
From: Shondoit
Date: Tue, 3 Jan 2023 10:26:37 +0100
Subject: Save Optimizer next to TI embedding
Also add check to load only .PT and .BIN files as embeddings. (since we add .optim files in the same directory)
---
modules/shared.py | 2 +-
modules/textual_inversion/textual_inversion.py | 40 ++++++++++++++++++++------
2 files changed, 33 insertions(+), 9 deletions(-)
(limited to 'modules')
diff --git a/modules/shared.py b/modules/shared.py
index 23657a93..c541d18c 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -355,7 +355,7 @@ options_templates.update(options_section(('system', "System"), {
options_templates.update(options_section(('training', "Training"), {
"unload_models_when_training": OptionInfo(False, "Move VAE and CLIP to RAM when training if possible. Saves VRAM."),
"pin_memory": OptionInfo(False, "Turn on pin_memory for DataLoader. Makes training slightly faster but can increase memory usage."),
- "save_optimizer_state": OptionInfo(False, "Saves Optimizer state as separate *.optim file. Training can be resumed with HN itself and matching optim file."),
+ "save_optimizer_state": OptionInfo(False, "Saves Optimizer state as separate *.optim file. Training of embedding or HN can be resumed with the matching optim file."),
"dataset_filename_word_regex": OptionInfo("", "Filename word regex"),
"dataset_filename_join_string": OptionInfo(" ", "Filename join string"),
"training_image_repeats_per_epoch": OptionInfo(1, "Number of repeats for a single input image per epoch; used only for displaying epoch number", gr.Number, {"precision": 0}),
diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py
index fd253477..16176e90 100644
--- a/modules/textual_inversion/textual_inversion.py
+++ b/modules/textual_inversion/textual_inversion.py
@@ -28,6 +28,7 @@ class Embedding:
self.cached_checksum = None
self.sd_checkpoint = None
self.sd_checkpoint_name = None
+ self.optimizer_state_dict = None
def save(self, filename):
embedding_data = {
@@ -41,6 +42,13 @@ class Embedding:
torch.save(embedding_data, filename)
+ if shared.opts.save_optimizer_state and self.optimizer_state_dict is not None:
+ optimizer_saved_dict = {
+ 'hash': self.checksum(),
+ 'optimizer_state_dict': self.optimizer_state_dict,
+ }
+ torch.save(optimizer_saved_dict, filename + '.optim')
+
def checksum(self):
if self.cached_checksum is not None:
return self.cached_checksum
@@ -95,9 +103,10 @@ class EmbeddingDatabase:
self.expected_shape = self.get_expected_shape()
def process_file(path, filename):
- name = os.path.splitext(filename)[0]
+ name, ext = os.path.splitext(filename)
+ ext = ext.upper()
- if os.path.splitext(filename.upper())[-1] in ['.PNG', '.WEBP', '.JXL', '.AVIF']:
+ if ext in ['.PNG', '.WEBP', '.JXL', '.AVIF']:
embed_image = Image.open(path)
if hasattr(embed_image, 'text') and 'sd-ti-embedding' in embed_image.text:
data = embedding_from_b64(embed_image.text['sd-ti-embedding'])
@@ -105,8 +114,10 @@ class EmbeddingDatabase:
else:
data = extract_image_data_embed(embed_image)
name = data.get('name', name)
- else:
+ elif ext in ['.BIN', '.PT']:
data = torch.load(path, map_location="cpu")
+ else:
+ return
# textual inversion embeddings
if 'string_to_param' in data:
@@ -300,6 +311,20 @@ def train_embedding(embedding_name, learn_rate, batch_size, gradient_step, data_
embedding.vec.requires_grad = True
optimizer = torch.optim.AdamW([embedding.vec], lr=scheduler.learn_rate, weight_decay=0.0)
+ if shared.opts.save_optimizer_state:
+ optimizer_state_dict = None
+ if os.path.exists(filename + '.optim'):
+ optimizer_saved_dict = torch.load(filename + '.optim', map_location='cpu')
+ if embedding.checksum() == optimizer_saved_dict.get('hash', None):
+ optimizer_state_dict = optimizer_saved_dict.get('optimizer_state_dict', None)
+
+ if optimizer_state_dict is not None:
+ optimizer.load_state_dict(optimizer_state_dict)
+ print("Loaded existing optimizer from checkpoint")
+ else:
+ print("No saved optimizer exists in checkpoint")
+
+
scaler = torch.cuda.amp.GradScaler()
batch_size = ds.batch_size
@@ -366,9 +391,7 @@ def train_embedding(embedding_name, learn_rate, batch_size, gradient_step, data_
# Before saving, change name to match current checkpoint.
embedding_name_every = f'{embedding_name}-{steps_done}'
last_saved_file = os.path.join(embedding_dir, f'{embedding_name_every}.pt')
- #if shared.opts.save_optimizer_state:
- #embedding.optimizer_state_dict = optimizer.state_dict()
- save_embedding(embedding, checkpoint, embedding_name_every, last_saved_file, remove_cached_checksum=True)
+ save_embedding(embedding, optimizer, checkpoint, embedding_name_every, last_saved_file, remove_cached_checksum=True)
embedding_yet_to_be_embedded = True
write_loss(log_directory, "textual_inversion_loss.csv", embedding.step, steps_per_epoch, {
@@ -458,7 +481,7 @@ Last saved image: {html.escape(last_saved_image)}
"""
filename = os.path.join(shared.cmd_opts.embeddings_dir, f'{embedding_name}.pt')
- save_embedding(embedding, checkpoint, embedding_name, filename, remove_cached_checksum=True)
+ save_embedding(embedding, optimizer, checkpoint, embedding_name, filename, remove_cached_checksum=True)
except Exception:
print(traceback.format_exc(), file=sys.stderr)
pass
@@ -470,7 +493,7 @@ Last saved image: {html.escape(last_saved_image)}
return embedding, filename
-def save_embedding(embedding, checkpoint, embedding_name, filename, remove_cached_checksum=True):
+def save_embedding(embedding, optimizer, checkpoint, embedding_name, filename, remove_cached_checksum=True):
old_embedding_name = embedding.name
old_sd_checkpoint = embedding.sd_checkpoint if hasattr(embedding, "sd_checkpoint") else None
old_sd_checkpoint_name = embedding.sd_checkpoint_name if hasattr(embedding, "sd_checkpoint_name") else None
@@ -481,6 +504,7 @@ def save_embedding(embedding, checkpoint, embedding_name, filename, remove_cache
if remove_cached_checksum:
embedding.cached_checksum = None
embedding.name = embedding_name
+ embedding.optimizer_state_dict = optimizer.state_dict()
embedding.save(filename)
except:
embedding.sd_checkpoint = old_sd_checkpoint
--
cgit v1.2.1
From e9fb9bb0c25f59109a816fc53c385bed58965c24 Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Tue, 3 Jan 2023 17:40:20 +0300
Subject: fix hires fix not working in API when user does not specify upscaler
---
modules/processing.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
(limited to 'modules')
diff --git a/modules/processing.py b/modules/processing.py
index 4654570c..a172af0b 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -685,7 +685,7 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
def sample(self, conditioning, unconditional_conditioning, seeds, subseeds, subseed_strength, prompts):
self.sampler = sd_samplers.create_sampler(self.sampler_name, self.sd_model)
- latent_scale_mode = shared.latent_upscale_modes.get(self.hr_upscaler, None) if self.hr_upscaler is not None else shared.latent_upscale_default_mode
+ latent_scale_mode = shared.latent_upscale_modes.get(self.hr_upscaler, None) if self.hr_upscaler is not None else shared.latent_upscale_modes.get(shared.latent_upscale_default_mode, "nearest")
if self.enable_hr and latent_scale_mode is None:
assert len([x for x in shared.sd_upscalers if x.name == self.hr_upscaler]) > 0, f"could not find upscaler named {self.hr_upscaler}"
--
cgit v1.2.1
From aaa4c2aacbb6523077334093c81bd475d757f7a1 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Tue, 3 Jan 2023 09:45:16 -0500
Subject: add api logging
---
modules/api/api.py | 24 +++++++++++++++++++++++-
modules/shared.py | 1 +
2 files changed, 24 insertions(+), 1 deletion(-)
(limited to 'modules')
diff --git a/modules/api/api.py b/modules/api/api.py
index 9c670f00..53135470 100644
--- a/modules/api/api.py
+++ b/modules/api/api.py
@@ -1,11 +1,12 @@
import base64
import io
import time
+import datetime
import uvicorn
from threading import Lock
from io import BytesIO
from gradio.processing_utils import decode_base64_to_file
-from fastapi import APIRouter, Depends, FastAPI, HTTPException
+from fastapi import APIRouter, Depends, FastAPI, HTTPException, Request, Response
from fastapi.security import HTTPBasic, HTTPBasicCredentials
from secrets import compare_digest
@@ -67,6 +68,26 @@ def encode_pil_to_base64(image):
bytes_data = output_bytes.getvalue()
return base64.b64encode(bytes_data)
+def init_api_middleware(app: FastAPI):
+ @app.middleware("http")
+ async def log_and_time(req: Request, call_next):
+ ts = time.time()
+ res: Response = await call_next(req)
+ duration = str(round(time.time() - ts, 4))
+ res.headers["X-Process-Time"] = duration
+ if shared.cmd_opts.api_log:
+ print('API {t} {code} {prot}/{ver} {method} {p} {cli} {duration}'.format(
+ t = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f"),
+ code = res.status_code,
+ ver = req.scope.get('http_version', '0.0'),
+ cli = req.scope.get('client', ('0:0.0.0', 0))[0],
+ prot = req.scope.get('scheme', 'err'),
+ method = req.scope.get('method', 'err'),
+ p = req.scope.get('path', 'err'),
+ duration = duration,
+ ))
+ return res
+
class Api:
def __init__(self, app: FastAPI, queue_lock: Lock):
@@ -78,6 +99,7 @@ class Api:
self.router = APIRouter()
self.app = app
+ init_api_middleware(self.app)
self.queue_lock = queue_lock
self.add_api_route("/sdapi/v1/txt2img", self.text2imgapi, methods=["POST"], response_model=TextToImageResponse)
self.add_api_route("/sdapi/v1/img2img", self.img2imgapi, methods=["POST"], response_model=ImageToImageResponse)
diff --git a/modules/shared.py b/modules/shared.py
index 23657a93..2a03d716 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -82,6 +82,7 @@ parser.add_argument('--vae-path', type=str, help='Path to Variational Autoencode
parser.add_argument("--disable-safe-unpickle", action='store_true', help="disable checking pytorch models for malicious code", default=False)
parser.add_argument("--api", action='store_true', help="use api=True to launch the API together with the webui (use --nowebui instead for only the API)")
parser.add_argument("--api-auth", type=str, help='Set authentication for API like "username:password"; or comma-delimit multiple like "u1:p1,u2:p2,u3:p3"', default=None)
+parser.add_argument("--api-log", action='store_true', help="use api-log=True to enable logging of all API requests")
parser.add_argument("--nowebui", action='store_true', help="use api=True to launch the API instead of the webui")
parser.add_argument("--ui-debug-mode", action='store_true', help="Don't load model to quickly launch UI")
parser.add_argument("--device-id", type=str, help="Select the default CUDA device to use (export CUDA_VISIBLE_DEVICES=0,1,etc might be needed before)", default=None)
--
cgit v1.2.1
From 1d9dc48efda2e8da6d13fc62e65500198a9b041c Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Tue, 3 Jan 2023 10:21:51 -0500
Subject: init job and add info to model merge
---
modules/extras.py | 14 ++++++++++++--
1 file changed, 12 insertions(+), 2 deletions(-)
(limited to 'modules')
diff --git a/modules/extras.py b/modules/extras.py
index 5e270250..7e222313 100644
--- a/modules/extras.py
+++ b/modules/extras.py
@@ -242,6 +242,9 @@ def run_pnginfo(image):
def run_modelmerger(primary_model_name, secondary_model_name, tertiary_model_name, interp_method, multiplier, save_as_half, custom_name, checkpoint_format):
+ shared.state.begin()
+ shared.state.job = 'model-merge'
+
def weighted_sum(theta0, theta1, alpha):
return ((1 - alpha) * theta0) + (alpha * theta1)
@@ -263,8 +266,11 @@ def run_modelmerger(primary_model_name, secondary_model_name, tertiary_model_nam
theta_func1, theta_func2 = theta_funcs[interp_method]
if theta_func1 and not tertiary_model_info:
+ shared.state.textinfo = "Failed: Interpolation method requires a tertiary model."
+ shared.state.end()
return ["Failed: Interpolation method requires a tertiary model."] + [gr.Dropdown.update(choices=sd_models.checkpoint_tiles()) for _ in range(4)]
+ shared.state.textinfo = f"Loading {secondary_model_info.filename}..."
print(f"Loading {secondary_model_info.filename}...")
theta_1 = sd_models.read_state_dict(secondary_model_info.filename, map_location='cpu')
@@ -281,6 +287,7 @@ def run_modelmerger(primary_model_name, secondary_model_name, tertiary_model_nam
theta_1[key] = torch.zeros_like(theta_1[key])
del theta_2
+ shared.state.textinfo = f"Loading {primary_model_info.filename}..."
print(f"Loading {primary_model_info.filename}...")
theta_0 = sd_models.read_state_dict(primary_model_info.filename, map_location='cpu')
@@ -291,6 +298,7 @@ def run_modelmerger(primary_model_name, secondary_model_name, tertiary_model_nam
a = theta_0[key]
b = theta_1[key]
+ shared.state.textinfo = f'Merging layer {key}'
# this enables merging an inpainting model (A) with another one (B);
# where normal model would have 4 channels, for latenst space, inpainting model would
# have another 4 channels for unmasked picture's latent space, plus one channel for mask, for a total of 9
@@ -303,8 +311,6 @@ def run_modelmerger(primary_model_name, secondary_model_name, tertiary_model_nam
theta_0[key][:, 0:4, :, :] = theta_func2(a[:, 0:4, :, :], b, multiplier)
result_is_inpainting_model = True
else:
- assert a.shape == b.shape, f'Incompatible shapes for layer {key}: A is {a.shape}, and B is {b.shape}'
-
theta_0[key] = theta_func2(a, b, multiplier)
if save_as_half:
@@ -332,6 +338,7 @@ def run_modelmerger(primary_model_name, secondary_model_name, tertiary_model_nam
output_modelname = os.path.join(ckpt_dir, filename)
+ shared.state.textinfo = f"Saving to {output_modelname}..."
print(f"Saving to {output_modelname}...")
_, extension = os.path.splitext(output_modelname)
@@ -343,4 +350,7 @@ def run_modelmerger(primary_model_name, secondary_model_name, tertiary_model_nam
sd_models.list_models()
print("Checkpoint saved.")
+ shared.state.textinfo = "Checkpoint saved to " + output_modelname
+ shared.state.end()
+
return ["Checkpoint saved to " + output_modelname] + [gr.Dropdown.update(choices=sd_models.checkpoint_tiles()) for _ in range(4)]
--
cgit v1.2.1
From 192ddc04d6de0d780f73aa5fbaa8c66cd4642e1c Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Tue, 3 Jan 2023 10:34:51 -0500
Subject: add job info to modules
---
modules/extras.py | 17 +++++++++++++----
modules/hypernetworks/hypernetwork.py | 1 +
modules/textual_inversion/preprocess.py | 1 +
modules/textual_inversion/textual_inversion.py | 1 +
4 files changed, 16 insertions(+), 4 deletions(-)
(limited to 'modules')
diff --git a/modules/extras.py b/modules/extras.py
index 7e222313..d665440a 100644
--- a/modules/extras.py
+++ b/modules/extras.py
@@ -58,6 +58,9 @@ cached_images: LruCache = LruCache(max_size=5)
def run_extras(extras_mode, resize_mode, image, image_folder, input_dir, output_dir, show_extras_results, gfpgan_visibility, codeformer_visibility, codeformer_weight, upscaling_resize, upscaling_resize_w, upscaling_resize_h, upscaling_crop, extras_upscaler_1, extras_upscaler_2, extras_upscaler_2_visibility, upscale_first: bool, save_output: bool = True):
devices.torch_gc()
+ shared.state.begin()
+ shared.state.job = 'extras'
+
imageArr = []
# Also keep track of original file names
imageNameArr = []
@@ -94,6 +97,7 @@ def run_extras(extras_mode, resize_mode, image, image_folder, input_dir, output_
# Extra operation definitions
def run_gfpgan(image: Image.Image, info: str) -> Tuple[Image.Image, str]:
+ shared.state.job = 'extras-gfpgan'
restored_img = modules.gfpgan_model.gfpgan_fix_faces(np.array(image, dtype=np.uint8))
res = Image.fromarray(restored_img)
@@ -104,6 +108,7 @@ def run_extras(extras_mode, resize_mode, image, image_folder, input_dir, output_
return (res, info)
def run_codeformer(image: Image.Image, info: str) -> Tuple[Image.Image, str]:
+ shared.state.job = 'extras-codeformer'
restored_img = modules.codeformer_model.codeformer.restore(np.array(image, dtype=np.uint8), w=codeformer_weight)
res = Image.fromarray(restored_img)
@@ -114,6 +119,7 @@ def run_extras(extras_mode, resize_mode, image, image_folder, input_dir, output_
return (res, info)
def upscale(image, scaler_index, resize, mode, resize_w, resize_h, crop):
+ shared.state.job = 'extras-upscale'
upscaler = shared.sd_upscalers[scaler_index]
res = upscaler.scaler.upscale(image, resize, upscaler.data_path)
if mode == 1 and crop:
@@ -180,6 +186,9 @@ def run_extras(extras_mode, resize_mode, image, image_folder, input_dir, output_
for image, image_name in zip(imageArr, imageNameArr):
if image is None:
return outputs, "Please select an input image.", ''
+
+ shared.state.textinfo = f'Processing image {image_name}'
+
existing_pnginfo = image.info or {}
image = image.convert("RGB")
@@ -193,6 +202,10 @@ def run_extras(extras_mode, resize_mode, image, image_folder, input_dir, output_
else:
basename = ''
+ if opts.enable_pnginfo: # append info before save
+ image.info = existing_pnginfo
+ image.info["extras"] = info
+
if save_output:
# Add upscaler name as a suffix.
suffix = f"-{shared.sd_upscalers[extras_upscaler_1].name}" if shared.opts.use_upscaler_name_as_suffix else ""
@@ -203,10 +216,6 @@ def run_extras(extras_mode, resize_mode, image, image_folder, input_dir, output_
images.save_image(image, path=outpath, basename=basename, seed=None, prompt=None, extension=opts.samples_format, info=info, short_filename=True,
no_prompt=True, grid=False, pnginfo_section_name="extras", existing_info=existing_pnginfo, forced_filename=None, suffix=suffix)
- if opts.enable_pnginfo:
- image.info = existing_pnginfo
- image.info["extras"] = info
-
if extras_mode != 2 or show_extras_results :
outputs.append(image)
diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py
index 109e8078..450fecac 100644
--- a/modules/hypernetworks/hypernetwork.py
+++ b/modules/hypernetworks/hypernetwork.py
@@ -417,6 +417,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, gradient_step,
shared.loaded_hypernetwork = Hypernetwork()
shared.loaded_hypernetwork.load(path)
+ shared.state.job = "train-hypernetwork"
shared.state.textinfo = "Initializing hypernetwork training..."
shared.state.job_count = steps
diff --git a/modules/textual_inversion/preprocess.py b/modules/textual_inversion/preprocess.py
index 56b9b2eb..feb876c6 100644
--- a/modules/textual_inversion/preprocess.py
+++ b/modules/textual_inversion/preprocess.py
@@ -124,6 +124,7 @@ def preprocess_work(process_src, process_dst, process_width, process_height, pre
files = listfiles(src)
+ shared.state.job = "preprocess"
shared.state.textinfo = "Preprocessing..."
shared.state.job_count = len(files)
diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py
index fd253477..2c1251d6 100644
--- a/modules/textual_inversion/textual_inversion.py
+++ b/modules/textual_inversion/textual_inversion.py
@@ -245,6 +245,7 @@ def train_embedding(embedding_name, learn_rate, batch_size, gradient_step, data_
create_image_every = create_image_every or 0
validate_train_inputs(embedding_name, learn_rate, batch_size, gradient_step, data_root, template_file, steps, save_embedding_every, create_image_every, log_directory, name="embedding")
+ shared.state.job = "train-embedding"
shared.state.textinfo = "Initializing textual inversion training..."
shared.state.job_count = steps
--
cgit v1.2.1
From 2d5a5076bb2a0c05cc27d75a1bcadab7f32a46d0 Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Tue, 3 Jan 2023 18:38:21 +0300
Subject: Make it so that upscalers are not repeated when restarting UI.
---
modules/modelloader.py | 20 ++++++++++++++++++++
1 file changed, 20 insertions(+)
(limited to 'modules')
diff --git a/modules/modelloader.py b/modules/modelloader.py
index e647f6fa..6a1a7ac8 100644
--- a/modules/modelloader.py
+++ b/modules/modelloader.py
@@ -123,6 +123,23 @@ def move_files(src_path: str, dest_path: str, ext_filter: str = None):
pass
+builtin_upscaler_classes = []
+forbidden_upscaler_classes = set()
+
+
+def list_builtin_upscalers():
+ load_upscalers()
+
+ builtin_upscaler_classes.clear()
+ builtin_upscaler_classes.extend(Upscaler.__subclasses__())
+
+
+def forbid_loaded_nonbuiltin_upscalers():
+ for cls in Upscaler.__subclasses__():
+ if cls not in builtin_upscaler_classes:
+ forbidden_upscaler_classes.add(cls)
+
+
def load_upscalers():
# We can only do this 'magic' method to dynamically load upscalers if they are referenced,
# so we'll try to import any _model.py files before looking in __subclasses__
@@ -139,6 +156,9 @@ def load_upscalers():
datas = []
commandline_options = vars(shared.cmd_opts)
for cls in Upscaler.__subclasses__():
+ if cls in forbidden_upscaler_classes:
+ continue
+
name = cls.__name__
cmd_name = f"{name.lower().replace('upscaler', '')}_models_path"
scaler = cls(commandline_options.get(cmd_name, None))
--
cgit v1.2.1
From 8f96f9289981a66741ba770d14f3d27ce335a0fb Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Tue, 3 Jan 2023 18:39:14 +0300
Subject: call script callbacks for reloaded model after loading embeddings
---
modules/sd_models.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
(limited to 'modules')
diff --git a/modules/sd_models.py b/modules/sd_models.py
index bff8d6c9..b98b05fc 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -324,12 +324,12 @@ def load_model(checkpoint_info=None):
sd_model.eval()
shared.sd_model = sd_model
+ sd_hijack.model_hijack.embedding_db.load_textual_inversion_embeddings(force_reload=True) # Reload embeddings after model load as they may or may not fit the model
+
script_callbacks.model_loaded_callback(sd_model)
print("Model loaded.")
- sd_hijack.model_hijack.embedding_db.load_textual_inversion_embeddings(force_reload = True) # Reload embeddings after model load as they may or may not fit the model
-
return sd_model
--
cgit v1.2.1
From cec209981ee988536c2521297baf9bc1b256005f Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Tue, 3 Jan 2023 10:58:52 -0500
Subject: log only sdapi
---
modules/api/api.py | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
(limited to 'modules')
diff --git a/modules/api/api.py b/modules/api/api.py
index 53135470..78751c57 100644
--- a/modules/api/api.py
+++ b/modules/api/api.py
@@ -68,22 +68,23 @@ def encode_pil_to_base64(image):
bytes_data = output_bytes.getvalue()
return base64.b64encode(bytes_data)
-def init_api_middleware(app: FastAPI):
+def api_middleware(app: FastAPI):
@app.middleware("http")
async def log_and_time(req: Request, call_next):
ts = time.time()
res: Response = await call_next(req)
duration = str(round(time.time() - ts, 4))
res.headers["X-Process-Time"] = duration
- if shared.cmd_opts.api_log:
- print('API {t} {code} {prot}/{ver} {method} {p} {cli} {duration}'.format(
+ endpoint = req.scope.get('path', 'err')
+ if shared.cmd_opts.api_log and endpoint.startswith('/sdapi'):
+ print('API {t} {code} {prot}/{ver} {method} {endpoint} {cli} {duration}'.format(
t = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f"),
code = res.status_code,
ver = req.scope.get('http_version', '0.0'),
cli = req.scope.get('client', ('0:0.0.0', 0))[0],
prot = req.scope.get('scheme', 'err'),
method = req.scope.get('method', 'err'),
- p = req.scope.get('path', 'err'),
+ endpoint = endpoint,
duration = duration,
))
return res
--
cgit v1.2.1
From d8d206c1685d1e7027d4af82ed18d106f41d1cc4 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Tue, 3 Jan 2023 11:01:04 -0500
Subject: add state to interrogate
---
modules/interrogate.py | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
(limited to 'modules')
diff --git a/modules/interrogate.py b/modules/interrogate.py
index 6f761c5a..738d8ff7 100644
--- a/modules/interrogate.py
+++ b/modules/interrogate.py
@@ -136,7 +136,8 @@ class InterrogateModels:
def interrogate(self, pil_image):
res = ""
-
+ shared.state.begin()
+ shared.state.job = 'interrogate'
try:
if shared.cmd_opts.lowvram or shared.cmd_opts.medvram:
@@ -177,5 +178,6 @@ class InterrogateModels:
res += ""
self.unload()
+ shared.state.end()
return res
--
cgit v1.2.1
From 82cfc227d735c140447d5b8dca29a71ee9bde127 Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Tue, 3 Jan 2023 20:23:17 +0300
Subject: added licenses screen to settings added footer removed unused
inpainting code
---
modules/sd_hijack_inpainting.py | 232 ----------------------------------------
modules/ui.py | 15 ++-
2 files changed, 13 insertions(+), 234 deletions(-)
(limited to 'modules')
diff --git a/modules/sd_hijack_inpainting.py b/modules/sd_hijack_inpainting.py
index 06b75772..3c214a35 100644
--- a/modules/sd_hijack_inpainting.py
+++ b/modules/sd_hijack_inpainting.py
@@ -12,191 +12,6 @@ from ldm.models.diffusion.ddpm import LatentDiffusion
from ldm.models.diffusion.plms import PLMSSampler
from ldm.models.diffusion.ddim import DDIMSampler, noise_like
-# =================================================================================================
-# Monkey patch DDIMSampler methods from RunwayML repo directly.
-# Adapted from:
-# https://github.com/runwayml/stable-diffusion/blob/main/ldm/models/diffusion/ddim.py
-# =================================================================================================
-@torch.no_grad()
-def sample_ddim(self,
- S,
- batch_size,
- shape,
- conditioning=None,
- callback=None,
- normals_sequence=None,
- img_callback=None,
- quantize_x0=False,
- eta=0.,
- mask=None,
- x0=None,
- temperature=1.,
- noise_dropout=0.,
- score_corrector=None,
- corrector_kwargs=None,
- verbose=True,
- x_T=None,
- log_every_t=100,
- unconditional_guidance_scale=1.,
- unconditional_conditioning=None,
- # this has to come in the same format as the conditioning, # e.g. as encoded tokens, ...
- **kwargs
- ):
- if conditioning is not None:
- if isinstance(conditioning, dict):
- ctmp = conditioning[list(conditioning.keys())[0]]
- while isinstance(ctmp, list):
- ctmp = ctmp[0]
- cbs = ctmp.shape[0]
- if cbs != batch_size:
- print(f"Warning: Got {cbs} conditionings but batch-size is {batch_size}")
- else:
- if conditioning.shape[0] != batch_size:
- print(f"Warning: Got {conditioning.shape[0]} conditionings but batch-size is {batch_size}")
-
- self.make_schedule(ddim_num_steps=S, ddim_eta=eta, verbose=verbose)
- # sampling
- C, H, W = shape
- size = (batch_size, C, H, W)
- print(f'Data shape for DDIM sampling is {size}, eta {eta}')
-
- samples, intermediates = self.ddim_sampling(conditioning, size,
- callback=callback,
- img_callback=img_callback,
- quantize_denoised=quantize_x0,
- mask=mask, x0=x0,
- ddim_use_original_steps=False,
- noise_dropout=noise_dropout,
- temperature=temperature,
- score_corrector=score_corrector,
- corrector_kwargs=corrector_kwargs,
- x_T=x_T,
- log_every_t=log_every_t,
- unconditional_guidance_scale=unconditional_guidance_scale,
- unconditional_conditioning=unconditional_conditioning,
- )
- return samples, intermediates
-
-@torch.no_grad()
-def p_sample_ddim(self, x, c, t, index, repeat_noise=False, use_original_steps=False, quantize_denoised=False,
- temperature=1., noise_dropout=0., score_corrector=None, corrector_kwargs=None,
- unconditional_guidance_scale=1., unconditional_conditioning=None):
- b, *_, device = *x.shape, x.device
-
- if unconditional_conditioning is None or unconditional_guidance_scale == 1.:
- e_t = self.model.apply_model(x, t, c)
- else:
- x_in = torch.cat([x] * 2)
- t_in = torch.cat([t] * 2)
- if isinstance(c, dict):
- assert isinstance(unconditional_conditioning, dict)
- c_in = dict()
- for k in c:
- if isinstance(c[k], list):
- c_in[k] = [
- torch.cat([unconditional_conditioning[k][i], c[k][i]])
- for i in range(len(c[k]))
- ]
- else:
- c_in[k] = torch.cat([unconditional_conditioning[k], c[k]])
- else:
- c_in = torch.cat([unconditional_conditioning, c])
- e_t_uncond, e_t = self.model.apply_model(x_in, t_in, c_in).chunk(2)
- e_t = e_t_uncond + unconditional_guidance_scale * (e_t - e_t_uncond)
-
- if score_corrector is not None:
- assert self.model.parameterization == "eps"
- e_t = score_corrector.modify_score(self.model, e_t, x, t, c, **corrector_kwargs)
-
- alphas = self.model.alphas_cumprod if use_original_steps else self.ddim_alphas
- alphas_prev = self.model.alphas_cumprod_prev if use_original_steps else self.ddim_alphas_prev
- sqrt_one_minus_alphas = self.model.sqrt_one_minus_alphas_cumprod if use_original_steps else self.ddim_sqrt_one_minus_alphas
- sigmas = self.model.ddim_sigmas_for_original_num_steps if use_original_steps else self.ddim_sigmas
- # select parameters corresponding to the currently considered timestep
- a_t = torch.full((b, 1, 1, 1), alphas[index], device=device)
- a_prev = torch.full((b, 1, 1, 1), alphas_prev[index], device=device)
- sigma_t = torch.full((b, 1, 1, 1), sigmas[index], device=device)
- sqrt_one_minus_at = torch.full((b, 1, 1, 1), sqrt_one_minus_alphas[index],device=device)
-
- # current prediction for x_0
- pred_x0 = (x - sqrt_one_minus_at * e_t) / a_t.sqrt()
- if quantize_denoised:
- pred_x0, _, *_ = self.model.first_stage_model.quantize(pred_x0)
- # direction pointing to x_t
- dir_xt = (1. - a_prev - sigma_t**2).sqrt() * e_t
- noise = sigma_t * noise_like(x.shape, device, repeat_noise) * temperature
- if noise_dropout > 0.:
- noise = torch.nn.functional.dropout(noise, p=noise_dropout)
- x_prev = a_prev.sqrt() * pred_x0 + dir_xt + noise
- return x_prev, pred_x0
-
-
-# =================================================================================================
-# Monkey patch PLMSSampler methods.
-# This one was not actually patched correctly in the RunwayML repo, but we can replicate the changes.
-# Adapted from:
-# https://github.com/CompVis/stable-diffusion/blob/main/ldm/models/diffusion/plms.py
-# =================================================================================================
-@torch.no_grad()
-def sample_plms(self,
- S,
- batch_size,
- shape,
- conditioning=None,
- callback=None,
- normals_sequence=None,
- img_callback=None,
- quantize_x0=False,
- eta=0.,
- mask=None,
- x0=None,
- temperature=1.,
- noise_dropout=0.,
- score_corrector=None,
- corrector_kwargs=None,
- verbose=True,
- x_T=None,
- log_every_t=100,
- unconditional_guidance_scale=1.,
- unconditional_conditioning=None,
- # this has to come in the same format as the conditioning, # e.g. as encoded tokens, ...
- **kwargs
- ):
- if conditioning is not None:
- if isinstance(conditioning, dict):
- ctmp = conditioning[list(conditioning.keys())[0]]
- while isinstance(ctmp, list):
- ctmp = ctmp[0]
- cbs = ctmp.shape[0]
- if cbs != batch_size:
- print(f"Warning: Got {cbs} conditionings but batch-size is {batch_size}")
- else:
- if conditioning.shape[0] != batch_size:
- print(f"Warning: Got {conditioning.shape[0]} conditionings but batch-size is {batch_size}")
-
- self.make_schedule(ddim_num_steps=S, ddim_eta=eta, verbose=verbose)
- # sampling
- C, H, W = shape
- size = (batch_size, C, H, W)
- # print(f'Data shape for PLMS sampling is {size}') # remove unnecessary message
-
- samples, intermediates = self.plms_sampling(conditioning, size,
- callback=callback,
- img_callback=img_callback,
- quantize_denoised=quantize_x0,
- mask=mask, x0=x0,
- ddim_use_original_steps=False,
- noise_dropout=noise_dropout,
- temperature=temperature,
- score_corrector=score_corrector,
- corrector_kwargs=corrector_kwargs,
- x_T=x_T,
- log_every_t=log_every_t,
- unconditional_guidance_scale=unconditional_guidance_scale,
- unconditional_conditioning=unconditional_conditioning,
- )
- return samples, intermediates
-
@torch.no_grad()
def p_sample_plms(self, x, c, t, index, repeat_noise=False, use_original_steps=False, quantize_denoised=False,
@@ -280,44 +95,6 @@ def p_sample_plms(self, x, c, t, index, repeat_noise=False, use_original_steps=F
return x_prev, pred_x0, e_t
-# =================================================================================================
-# Monkey patch LatentInpaintDiffusion to load the checkpoint with a proper config.
-# Adapted from:
-# https://github.com/runwayml/stable-diffusion/blob/main/ldm/models/diffusion/ddpm.py
-# =================================================================================================
-
-@torch.no_grad()
-def get_unconditional_conditioning(self, batch_size, null_label=None):
- if null_label is not None:
- xc = null_label
- if isinstance(xc, ListConfig):
- xc = list(xc)
- if isinstance(xc, dict) or isinstance(xc, list):
- c = self.get_learned_conditioning(xc)
- else:
- if hasattr(xc, "to"):
- xc = xc.to(self.device)
- c = self.get_learned_conditioning(xc)
- else:
- # todo: get null label from cond_stage_model
- raise NotImplementedError()
- c = repeat(c, "1 ... -> b ...", b=batch_size).to(self.device)
- return c
-
-
-class LatentInpaintDiffusion(LatentDiffusion):
- def __init__(
- self,
- concat_keys=("mask", "masked_image"),
- masked_image_key="masked_image",
- *args,
- **kwargs,
- ):
- super().__init__(*args, **kwargs)
- self.masked_image_key = masked_image_key
- assert self.masked_image_key in concat_keys
- self.concat_keys = concat_keys
-
def should_hijack_inpainting(checkpoint_info):
ckpt_basename = os.path.basename(checkpoint_info.filename).lower()
@@ -326,15 +103,6 @@ def should_hijack_inpainting(checkpoint_info):
def do_inpainting_hijack():
- # most of this stuff seems to no longer be needed because it is already included into SD2.0
# p_sample_plms is needed because PLMS can't work with dicts as conditionings
- # this file should be cleaned up later if everything turns out to work fine
-
- # ldm.models.diffusion.ddpm.get_unconditional_conditioning = get_unconditional_conditioning
- # ldm.models.diffusion.ddpm.LatentInpaintDiffusion = LatentInpaintDiffusion
-
- # ldm.models.diffusion.ddim.DDIMSampler.p_sample_ddim = p_sample_ddim
- # ldm.models.diffusion.ddim.DDIMSampler.sample = sample_ddim
ldm.models.diffusion.plms.PLMSSampler.p_sample_plms = p_sample_plms
- # ldm.models.diffusion.plms.PLMSSampler.sample = sample_plms
diff --git a/modules/ui.py b/modules/ui.py
index f2e7c0d6..d941cb5f 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -1529,8 +1529,10 @@ def create_ui():
with gr.Blocks(analytics_enabled=False) as settings_interface:
with gr.Row():
- settings_submit = gr.Button(value="Apply settings", variant='primary', elem_id="settings_submit")
- restart_gradio = gr.Button(value='Restart UI', variant='primary', elem_id="settings_restart_gradio")
+ with gr.Column(scale=6):
+ settings_submit = gr.Button(value="Apply settings", variant='primary', elem_id="settings_submit")
+ with gr.Column():
+ restart_gradio = gr.Button(value='Reload UI', variant='primary', elem_id="settings_restart_gradio")
result = gr.HTML(elem_id="settings_result")
@@ -1574,6 +1576,11 @@ def create_ui():
download_localization = gr.Button(value='Download localization template', elem_id="download_localization")
reload_script_bodies = gr.Button(value='Reload custom script bodies (No ui updates, No restart)', variant='secondary', elem_id="settings_reload_script_bodies")
+ if os.path.exists("html/licenses.html"):
+ with open("html/licenses.html", encoding="utf8") as file:
+ with gr.TabItem("Licenses"):
+ gr.HTML(file.read(), elem_id="licenses")
+
gr.Button(value="Show all pages", elem_id="settings_show_all_pages")
request_notifications.click(
@@ -1659,6 +1666,10 @@ def create_ui():
if os.path.exists(os.path.join(script_path, "notification.mp3")):
audio_notification = gr.Audio(interactive=False, value=os.path.join(script_path, "notification.mp3"), elem_id="audio_notification", visible=False)
+ if os.path.exists("html/footer.html"):
+ with open("html/footer.html", encoding="utf8") as file:
+ gr.HTML(file.read(), elem_id="footer")
+
text_settings = gr.Textbox(elem_id="settings_json", value=lambda: opts.dumpjson(), visible=False)
settings_submit.click(
fn=wrap_gradio_call(run_settings, extra_outputs=[gr.update()]),
--
cgit v1.2.1
From 3e22e294135ed0327ce9d9738655ff03c53df3c0 Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Tue, 3 Jan 2023 21:49:24 +0300
Subject: fix broken send to extras button
---
modules/generation_parameters_copypaste.py | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
(limited to 'modules')
diff --git a/modules/generation_parameters_copypaste.py b/modules/generation_parameters_copypaste.py
index d94f11a3..4baf4d9a 100644
--- a/modules/generation_parameters_copypaste.py
+++ b/modules/generation_parameters_copypaste.py
@@ -37,7 +37,10 @@ def quote(text):
def image_from_url_text(filedata):
- if type(filedata) == dict and filedata["is_file"]:
+ if type(filedata) == list and len(filedata) > 0 and type(filedata[0]) == dict and filedata[0].get("is_file", False):
+ filedata = filedata[0]
+
+ if type(filedata) == dict and filedata.get("is_file", False):
filename = filedata["name"]
is_in_right_dir = ui_tempdir.check_tmp_file(shared.demo, filename)
assert is_in_right_dir, 'trying to open image file outside of allowed directories'
--
cgit v1.2.1
From 917b5bd8d0cd47c9dc241c1852ccd440a8c61668 Mon Sep 17 00:00:00 2001
From: Max Weber
Date: Tue, 3 Jan 2023 18:19:56 -0700
Subject: ui: save dropdown sampling method to the ui-config
---
modules/ui.py | 1 +
1 file changed, 1 insertion(+)
(limited to 'modules')
diff --git a/modules/ui.py b/modules/ui.py
index d941cb5f..bfc93634 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -635,6 +635,7 @@ def create_sampler_and_steps_selection(choices, tabname):
if opts.samplers_in_dropdown:
with FormRow(elem_id=f"sampler_selection_{tabname}"):
sampler_index = gr.Dropdown(label='Sampling method', elem_id=f"{tabname}_sampling", choices=[x.name for x in choices], value=choices[0].name, type="index")
+ sampler_index.save_to_config = True
steps = gr.Slider(minimum=1, maximum=150, step=1, elem_id=f"{tabname}_steps", label="Sampling Steps", value=20)
else:
with FormGroup(elem_id=f"sampler_selection_{tabname}"):
--
cgit v1.2.1
From e5b7ee910e7bb88f08e8876b5732cb034c6fe529 Mon Sep 17 00:00:00 2001
From: MMaker
Date: Wed, 4 Jan 2023 04:22:01 -0500
Subject: fix: Save full res of intermediate step
---
modules/processing.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
(limited to 'modules')
diff --git a/modules/processing.py b/modules/processing.py
index a172af0b..93e75ba6 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -705,7 +705,7 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
return
if not isinstance(image, Image.Image):
- image = sd_samplers.sample_to_image(image, index)
+ image = sd_samplers.sample_to_image(image, index, approximation=0)
images.save_image(image, self.outpath_samples, "", seeds[index], prompts[index], opts.samples_format, suffix="-before-highres-fix")
--
cgit v1.2.1
From 02d7abf5141431b9a3a8a189bb3136c71abd5e79 Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Wed, 4 Jan 2023 12:35:07 +0300
Subject: helpful error message when trying to load 2.0 without config failing
to load model weights from settings won't break generation for currently
loaded model anymore
---
modules/errors.py | 25 +++++++++++++++++++++++--
modules/sd_models.py | 26 ++++++++++++++++++--------
modules/shared.py | 9 +++++++--
3 files changed, 48 insertions(+), 12 deletions(-)
(limited to 'modules')
diff --git a/modules/errors.py b/modules/errors.py
index 372dc51a..a668c014 100644
--- a/modules/errors.py
+++ b/modules/errors.py
@@ -2,9 +2,30 @@ import sys
import traceback
+def print_error_explanation(message):
+ lines = message.strip().split("\n")
+ max_len = max([len(x) for x in lines])
+
+ print('=' * max_len, file=sys.stderr)
+ for line in lines:
+ print(line, file=sys.stderr)
+ print('=' * max_len, file=sys.stderr)
+
+
+def display(e: Exception, task):
+ print(f"{task or 'error'}: {type(e).__name__}", file=sys.stderr)
+ print(traceback.format_exc(), file=sys.stderr)
+
+ message = str(e)
+ if "copying a param with shape torch.Size([640, 1024]) from checkpoint, the shape in current model is torch.Size([640, 768])" in message:
+ print_error_explanation("""
+The most likely cause of this is you are trying to load Stable Diffusion 2.0 model without specifying its connfig file.
+See https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Features#stable-diffusion-20 for how to solve this.
+ """)
+
+
def run(code, task):
try:
code()
except Exception as e:
- print(f"{task}: {type(e).__name__}", file=sys.stderr)
- print(traceback.format_exc(), file=sys.stderr)
+ display(task, e)
diff --git a/modules/sd_models.py b/modules/sd_models.py
index b98b05fc..6846b74a 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -278,6 +278,7 @@ def enable_midas_autodownload():
midas.api.load_model = load_model_wrapper
+
def load_model(checkpoint_info=None):
from modules import lowvram, sd_hijack
checkpoint_info = checkpoint_info or select_checkpoint()
@@ -312,6 +313,7 @@ def load_model(checkpoint_info=None):
sd_config.model.params.unet_config.params.use_fp16 = False
sd_model = instantiate_from_config(sd_config.model)
+
load_model_weights(sd_model, checkpoint_info)
if shared.cmd_opts.lowvram or shared.cmd_opts.medvram:
@@ -336,10 +338,12 @@ def load_model(checkpoint_info=None):
def reload_model_weights(sd_model=None, info=None):
from modules import lowvram, devices, sd_hijack
checkpoint_info = info or select_checkpoint()
-
+
if not sd_model:
sd_model = shared.sd_model
+ current_checkpoint_info = sd_model.sd_checkpoint_info
+
if sd_model.sd_model_checkpoint == checkpoint_info.filename:
return
@@ -356,13 +360,19 @@ def reload_model_weights(sd_model=None, info=None):
sd_hijack.model_hijack.undo_hijack(sd_model)
- load_model_weights(sd_model, checkpoint_info)
-
- sd_hijack.model_hijack.hijack(sd_model)
- script_callbacks.model_loaded_callback(sd_model)
-
- if not shared.cmd_opts.lowvram and not shared.cmd_opts.medvram:
- sd_model.to(devices.device)
+ try:
+ load_model_weights(sd_model, checkpoint_info)
+ except Exception as e:
+ print("Failed to load checkpoint, restoring previous")
+ load_model_weights(sd_model, current_checkpoint_info)
+ raise
+ finally:
+ sd_hijack.model_hijack.hijack(sd_model)
+ script_callbacks.model_loaded_callback(sd_model)
+
+ if not shared.cmd_opts.lowvram and not shared.cmd_opts.medvram:
+ sd_model.to(devices.device)
print("Weights loaded.")
+
return sd_model
diff --git a/modules/shared.py b/modules/shared.py
index 23657a93..7588c47b 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -14,7 +14,7 @@ import modules.interrogate
import modules.memmon
import modules.styles
import modules.devices as devices
-from modules import localization, sd_vae, extensions, script_loading
+from modules import localization, sd_vae, extensions, script_loading, errors
from modules.paths import models_path, script_path, sd_path
@@ -494,7 +494,12 @@ class Options:
return False
if self.data_labels[key].onchange is not None:
- self.data_labels[key].onchange()
+ try:
+ self.data_labels[key].onchange()
+ except Exception as e:
+ errors.display(e, f"changing setting {key} to {value}")
+ setattr(self, key, oldval)
+ return False
return True
--
cgit v1.2.1
From 8d8a05a3bbb50fdfeab51679a919d2487bd97976 Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Wed, 4 Jan 2023 12:47:42 +0300
Subject: find configs for models at runtime rather than when starting
---
modules/sd_hijack_inpainting.py | 5 ++++-
modules/sd_models.py | 31 ++++++++++++++++++-------------
2 files changed, 22 insertions(+), 14 deletions(-)
(limited to 'modules')
diff --git a/modules/sd_hijack_inpainting.py b/modules/sd_hijack_inpainting.py
index 3c214a35..31d2c898 100644
--- a/modules/sd_hijack_inpainting.py
+++ b/modules/sd_hijack_inpainting.py
@@ -97,8 +97,11 @@ def p_sample_plms(self, x, c, t, index, repeat_noise=False, use_original_steps=F
def should_hijack_inpainting(checkpoint_info):
+ from modules import sd_models
+
ckpt_basename = os.path.basename(checkpoint_info.filename).lower()
- cfg_basename = os.path.basename(checkpoint_info.config).lower()
+ cfg_basename = os.path.basename(sd_models.find_checkpoint_config(checkpoint_info)).lower()
+
return "inpainting" in ckpt_basename and not "inpainting" in cfg_basename
diff --git a/modules/sd_models.py b/modules/sd_models.py
index 6846b74a..6dca4ddf 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -20,7 +20,7 @@ from modules.sd_hijack_inpainting import do_inpainting_hijack, should_hijack_inp
model_dir = "Stable-diffusion"
model_path = os.path.abspath(os.path.join(models_path, model_dir))
-CheckpointInfo = namedtuple("CheckpointInfo", ['filename', 'title', 'hash', 'model_name', 'config'])
+CheckpointInfo = namedtuple("CheckpointInfo", ['filename', 'title', 'hash', 'model_name'])
checkpoints_list = {}
checkpoints_loaded = collections.OrderedDict()
@@ -48,6 +48,14 @@ def checkpoint_tiles():
return sorted([x.title for x in checkpoints_list.values()], key = alphanumeric_key)
+def find_checkpoint_config(info):
+ config = os.path.splitext(info.filename)[0] + ".yaml"
+ if os.path.exists(config):
+ return config
+
+ return shared.cmd_opts.config
+
+
def list_models():
checkpoints_list.clear()
model_list = modelloader.load_models(model_path=model_path, command_path=shared.cmd_opts.ckpt_dir, ext_filter=[".ckpt", ".safetensors"])
@@ -73,7 +81,7 @@ def list_models():
if os.path.exists(cmd_ckpt):
h = model_hash(cmd_ckpt)
title, short_model_name = modeltitle(cmd_ckpt, h)
- checkpoints_list[title] = CheckpointInfo(cmd_ckpt, title, h, short_model_name, shared.cmd_opts.config)
+ checkpoints_list[title] = CheckpointInfo(cmd_ckpt, title, h, short_model_name)
shared.opts.data['sd_model_checkpoint'] = title
elif cmd_ckpt is not None and cmd_ckpt != shared.default_sd_model_file:
print(f"Checkpoint in --ckpt argument not found (Possible it was moved to {model_path}: {cmd_ckpt}", file=sys.stderr)
@@ -81,12 +89,7 @@ def list_models():
h = model_hash(filename)
title, short_model_name = modeltitle(filename, h)
- basename, _ = os.path.splitext(filename)
- config = basename + ".yaml"
- if not os.path.exists(config):
- config = shared.cmd_opts.config
-
- checkpoints_list[title] = CheckpointInfo(filename, title, h, short_model_name, config)
+ checkpoints_list[title] = CheckpointInfo(filename, title, h, short_model_name)
def get_closet_checkpoint_match(searchString):
@@ -282,9 +285,10 @@ def enable_midas_autodownload():
def load_model(checkpoint_info=None):
from modules import lowvram, sd_hijack
checkpoint_info = checkpoint_info or select_checkpoint()
+ checkpoint_config = find_checkpoint_config(checkpoint_info)
- if checkpoint_info.config != shared.cmd_opts.config:
- print(f"Loading config from: {checkpoint_info.config}")
+ if checkpoint_config != shared.cmd_opts.config:
+ print(f"Loading config from: {checkpoint_config}")
if shared.sd_model:
sd_hijack.model_hijack.undo_hijack(shared.sd_model)
@@ -292,7 +296,7 @@ def load_model(checkpoint_info=None):
gc.collect()
devices.torch_gc()
- sd_config = OmegaConf.load(checkpoint_info.config)
+ sd_config = OmegaConf.load(checkpoint_config)
if should_hijack_inpainting(checkpoint_info):
# Hardcoded config for now...
@@ -302,7 +306,7 @@ def load_model(checkpoint_info=None):
sd_config.model.params.finetune_keys = None
# Create a "fake" config with a different name so that we know to unload it when switching models.
- checkpoint_info = checkpoint_info._replace(config=checkpoint_info.config.replace(".yaml", "-inpainting.yaml"))
+ checkpoint_info = checkpoint_info._replace(config=checkpoint_config.replace(".yaml", "-inpainting.yaml"))
if not hasattr(sd_config.model.params, "use_ema"):
sd_config.model.params.use_ema = False
@@ -343,11 +347,12 @@ def reload_model_weights(sd_model=None, info=None):
sd_model = shared.sd_model
current_checkpoint_info = sd_model.sd_checkpoint_info
+ checkpoint_config = find_checkpoint_config(current_checkpoint_info)
if sd_model.sd_model_checkpoint == checkpoint_info.filename:
return
- if sd_model.sd_checkpoint_info.config != checkpoint_info.config or should_hijack_inpainting(checkpoint_info) != should_hijack_inpainting(sd_model.sd_checkpoint_info):
+ if checkpoint_config != find_checkpoint_config(checkpoint_info) or should_hijack_inpainting(checkpoint_info) != should_hijack_inpainting(sd_model.sd_checkpoint_info):
del sd_model
checkpoints_loaded.clear()
load_model(checkpoint_info)
--
cgit v1.2.1
From 96cf15bedecbed97ef9b70b8413d543a9aee5adf Mon Sep 17 00:00:00 2001
From: MMaker
Date: Wed, 4 Jan 2023 05:12:06 -0500
Subject: Add new latent upscale modes
---
modules/shared.py | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
(limited to 'modules')
diff --git a/modules/shared.py b/modules/shared.py
index 7588c47b..a10f69a9 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -564,8 +564,11 @@ if os.path.exists(config_filename):
latent_upscale_default_mode = "Latent"
latent_upscale_modes = {
- "Latent": "bilinear",
- "Latent (nearest)": "nearest",
+ "Latent": {"mode": "bilinear", "antialias": False},
+ "Latent (antialiased)": {"mode": "bilinear", "antialias": True},
+ "Latent (bicubic)": {"mode": "bicubic", "antialias": False},
+ "Latent (bicubic, antialiased)": {"mode": "bicubic", "antialias": True},
+ "Latent (nearest)": {"mode": "nearest", "antialias": False},
}
sd_upscalers = []
--
cgit v1.2.1
From 15fd0b8bc4734ea85bca1acfb12b51465ab9817d Mon Sep 17 00:00:00 2001
From: MMaker
Date: Wed, 4 Jan 2023 05:12:54 -0500
Subject: Update processing.py
---
modules/processing.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
(limited to 'modules')
diff --git a/modules/processing.py b/modules/processing.py
index a172af0b..7c72b56a 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -713,7 +713,7 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
for i in range(samples.shape[0]):
save_intermediate(samples, i)
- samples = torch.nn.functional.interpolate(samples, size=(target_height // opt_f, target_width // opt_f), mode=latent_scale_mode)
+ samples = torch.nn.functional.interpolate(samples, size=(target_height // opt_f, target_width // opt_f), mode=latent_scale_mode["mode"], antialias=latent_scale_mode["antialias"])
# Avoid making the inpainting conditioning unless necessary as
# this does need some extra compute to decode / encode the image again.
--
cgit v1.2.1
From 4ec6470a1a2d9430b91266426f995e48f59564e1 Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Wed, 4 Jan 2023 13:26:23 +0300
Subject: fix checkpoint list API
---
modules/api/api.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
(limited to 'modules')
diff --git a/modules/api/api.py b/modules/api/api.py
index 9c670f00..2b1f180c 100644
--- a/modules/api/api.py
+++ b/modules/api/api.py
@@ -18,7 +18,7 @@ from modules.textual_inversion.textual_inversion import create_embedding, train_
from modules.textual_inversion.preprocess import preprocess
from modules.hypernetworks.hypernetwork import create_hypernetwork, train_hypernetwork
from PIL import PngImagePlugin,Image
-from modules.sd_models import checkpoints_list
+from modules.sd_models import checkpoints_list, find_checkpoint_config
from modules.realesrgan_model import get_realesrgan_models
from modules import devices
from typing import List
@@ -303,7 +303,7 @@ class Api:
return upscalers
def get_sd_models(self):
- return [{"title":x.title, "model_name":x.model_name, "hash":x.hash, "filename": x.filename, "config": x.config} for x in checkpoints_list.values()]
+ return [{"title":x.title, "model_name":x.model_name, "hash":x.hash, "filename": x.filename, "config": find_checkpoint_config(x)} for x in checkpoints_list.values()]
def get_hypernetworks(self):
return [{"name": name, "path": shared.hypernetworks[name]} for name in shared.hypernetworks]
--
cgit v1.2.1
From b2151b934fe0a3613570c6abd7615d3788fd1c8f Mon Sep 17 00:00:00 2001
From: MMaker
Date: Wed, 4 Jan 2023 05:36:18 -0500
Subject: Rename bicubic antialiased option
Comma was causing the the value in PNG info to be quoted, which causes the upscaler dropdown option to be blank when sending to UI
---
modules/shared.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
(limited to 'modules')
diff --git a/modules/shared.py b/modules/shared.py
index a10f69a9..c1b20081 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -567,7 +567,7 @@ latent_upscale_modes = {
"Latent": {"mode": "bilinear", "antialias": False},
"Latent (antialiased)": {"mode": "bilinear", "antialias": True},
"Latent (bicubic)": {"mode": "bicubic", "antialias": False},
- "Latent (bicubic, antialiased)": {"mode": "bicubic", "antialias": True},
+ "Latent (bicubic antialiased)": {"mode": "bicubic", "antialias": True},
"Latent (nearest)": {"mode": "nearest", "antialias": False},
}
--
cgit v1.2.1
From 3bd737767b071878ea980e94b8705f603bcf545e Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Wed, 4 Jan 2023 14:20:32 +0300
Subject: disable broken API logging
---
modules/api/api.py | 1 -
1 file changed, 1 deletion(-)
(limited to 'modules')
diff --git a/modules/api/api.py b/modules/api/api.py
index a6c1d6ed..6267afdc 100644
--- a/modules/api/api.py
+++ b/modules/api/api.py
@@ -100,7 +100,6 @@ class Api:
self.router = APIRouter()
self.app = app
- init_api_middleware(self.app)
self.queue_lock = queue_lock
self.add_api_route("/sdapi/v1/txt2img", self.text2imgapi, methods=["POST"], response_model=TextToImageResponse)
self.add_api_route("/sdapi/v1/img2img", self.img2imgapi, methods=["POST"], response_model=ImageToImageResponse)
--
cgit v1.2.1
From 0cd6399b8b1699b8b7acad6f0ad2988111fe618e Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Wed, 4 Jan 2023 14:29:13 +0300
Subject: fix broken inpainting model
---
modules/sd_models.py | 3 ---
1 file changed, 3 deletions(-)
(limited to 'modules')
diff --git a/modules/sd_models.py b/modules/sd_models.py
index 6dca4ddf..a568823d 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -305,9 +305,6 @@ def load_model(checkpoint_info=None):
sd_config.model.params.unet_config.params.in_channels = 9
sd_config.model.params.finetune_keys = None
- # Create a "fake" config with a different name so that we know to unload it when switching models.
- checkpoint_info = checkpoint_info._replace(config=checkpoint_config.replace(".yaml", "-inpainting.yaml"))
-
if not hasattr(sd_config.model.params, "use_ema"):
sd_config.model.params.use_ema = False
--
cgit v1.2.1
From 11b8160a086c434d5baf4971edda46e6d2126800 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Wed, 4 Jan 2023 06:36:57 -0500
Subject: fix typo
---
modules/api/api.py | 1 +
1 file changed, 1 insertion(+)
(limited to 'modules')
diff --git a/modules/api/api.py b/modules/api/api.py
index 6267afdc..48a70a44 100644
--- a/modules/api/api.py
+++ b/modules/api/api.py
@@ -101,6 +101,7 @@ class Api:
self.router = APIRouter()
self.app = app
self.queue_lock = queue_lock
+ api_middleware(self.app)
self.add_api_route("/sdapi/v1/txt2img", self.text2imgapi, methods=["POST"], response_model=TextToImageResponse)
self.add_api_route("/sdapi/v1/img2img", self.img2imgapi, methods=["POST"], response_model=ImageToImageResponse)
self.add_api_route("/sdapi/v1/extra-single-image", self.extras_single_image_api, methods=["POST"], response_model=ExtrasSingleImageResponse)
--
cgit v1.2.1
From 642142556d8ecdea9beb86d7618b628b1803ab98 Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Wed, 4 Jan 2023 15:09:53 +0300
Subject: use commandline-supplied cuda device name instead of cuda:0 for
safetensors PR that doesn't fix anything
---
modules/sd_models.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
(limited to 'modules')
diff --git a/modules/sd_models.py b/modules/sd_models.py
index ee918f24..76a89e88 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -173,7 +173,7 @@ def read_state_dict(checkpoint_file, print_global_state=False, map_location=None
if extension.lower() == ".safetensors":
device = map_location or shared.weight_load_location
if device is None:
- device = "cuda:0" if torch.cuda.is_available() else "cpu"
+ device = devices.get_cuda_device_string() if torch.cuda.is_available() else "cpu"
pl_sd = safetensors.torch.load_file(checkpoint_file, device=device)
else:
pl_sd = torch.load(checkpoint_file, map_location=map_location or shared.weight_load_location)
--
cgit v1.2.1
From 21ee77db314ede7ccbb18787962347c09a4df0c7 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic
Date: Wed, 4 Jan 2023 08:04:38 -0500
Subject: add cross-attention info
---
modules/sd_hijack.py | 12 +++++++++++-
1 file changed, 11 insertions(+), 1 deletion(-)
(limited to 'modules')
diff --git a/modules/sd_hijack.py b/modules/sd_hijack.py
index edcbaf52..fa2cd4bb 100644
--- a/modules/sd_hijack.py
+++ b/modules/sd_hijack.py
@@ -35,26 +35,35 @@ def apply_optimizations():
ldm.modules.diffusionmodules.model.nonlinearity = silu
ldm.modules.diffusionmodules.openaimodel.th = sd_hijack_unet.th
+
+ optimization_method = None
if cmd_opts.force_enable_xformers or (cmd_opts.xformers and shared.xformers_available and torch.version.cuda and (6, 0) <= torch.cuda.get_device_capability(shared.device) <= (9, 0)):
print("Applying xformers cross attention optimization.")
ldm.modules.attention.CrossAttention.forward = sd_hijack_optimizations.xformers_attention_forward
ldm.modules.diffusionmodules.model.AttnBlock.forward = sd_hijack_optimizations.xformers_attnblock_forward
+ optimization_method = 'xformers'
elif cmd_opts.opt_split_attention_v1:
print("Applying v1 cross attention optimization.")
ldm.modules.attention.CrossAttention.forward = sd_hijack_optimizations.split_cross_attention_forward_v1
+ optimization_method = 'V1'
elif not cmd_opts.disable_opt_split_attention and (cmd_opts.opt_split_attention_invokeai or not torch.cuda.is_available()):
if not invokeAI_mps_available and shared.device.type == 'mps':
print("The InvokeAI cross attention optimization for MPS requires the psutil package which is not installed.")
print("Applying v1 cross attention optimization.")
ldm.modules.attention.CrossAttention.forward = sd_hijack_optimizations.split_cross_attention_forward_v1
+ optimization_method = 'V1'
else:
print("Applying cross attention optimization (InvokeAI).")
ldm.modules.attention.CrossAttention.forward = sd_hijack_optimizations.split_cross_attention_forward_invokeAI
+ optimization_method = 'InvokeAI'
elif not cmd_opts.disable_opt_split_attention and (cmd_opts.opt_split_attention or torch.cuda.is_available()):
print("Applying cross attention optimization (Doggettx).")
ldm.modules.attention.CrossAttention.forward = sd_hijack_optimizations.split_cross_attention_forward
ldm.modules.diffusionmodules.model.AttnBlock.forward = sd_hijack_optimizations.cross_attention_attnblock_forward
+ optimization_method = 'Doggettx'
+
+ return optimization_method
def undo_optimizations():
@@ -75,6 +84,7 @@ class StableDiffusionModelHijack:
layers = None
circular_enabled = False
clip = None
+ optimization_method = None
embedding_db = modules.textual_inversion.textual_inversion.EmbeddingDatabase(cmd_opts.embeddings_dir)
@@ -94,7 +104,7 @@ class StableDiffusionModelHijack:
m.cond_stage_model.model.token_embedding = EmbeddingsWithFixes(m.cond_stage_model.model.token_embedding, self)
m.cond_stage_model = sd_hijack_open_clip.FrozenOpenCLIPEmbedderWithCustomWords(m.cond_stage_model, self)
- apply_optimizations()
+ self.optimization_method = apply_optimizations()
self.clip = m.cond_stage_model
--
cgit v1.2.1
From 1cfd8aec4ae5a6ca1afd67b44cb4ef6dd14d8c34 Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Wed, 4 Jan 2023 16:05:42 +0300
Subject: make it possible to work with opts.show_progress_every_n_steps = -1
with medvram
---
modules/shared.py | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
(limited to 'modules')
diff --git a/modules/shared.py b/modules/shared.py
index 4fcc6edd..54a6ba23 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -214,12 +214,13 @@ class State:
"""sets self.current_image from self.current_latent if enough sampling steps have been made after the last call to this"""
def set_current_image(self):
+ if not parallel_processing_allowed:
+ return
+
if self.sampling_step - self.current_image_sampling_step >= opts.show_progress_every_n_steps and opts.show_progress_every_n_steps > 0:
self.do_set_current_image()
def do_set_current_image(self):
- if not parallel_processing_allowed:
- return
if self.current_latent is None:
return
@@ -231,6 +232,7 @@ class State:
self.current_image_sampling_step = self.sampling_step
+
state = State()
artist_db = modules.artists.ArtistsDatabase(os.path.join(script_path, 'artists.csv'))
--
cgit v1.2.1
From 4d66bf2c0d27702cc83b9cc57ebb1f359d18d938 Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Wed, 4 Jan 2023 17:24:46 +0300
Subject: add infotext to "-before-highres-fix" images
---
modules/processing.py | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
(limited to 'modules')
diff --git a/modules/processing.py b/modules/processing.py
index fd7c7015..c03e77e7 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -136,6 +136,7 @@ class StableDiffusionProcessing():
self.all_negative_prompts = None
self.all_seeds = None
self.all_subseeds = None
+ self.iteration = 0
def txt2img_image_conditioning(self, x, width=None, height=None):
if self.sampler.conditioning_key not in {'hybrid', 'concat'}:
@@ -544,6 +545,8 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
state.job_count = p.n_iter
for n in range(p.n_iter):
+ p.iteration = n
+
if state.skipped:
state.skipped = False
@@ -707,7 +710,8 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
if not isinstance(image, Image.Image):
image = sd_samplers.sample_to_image(image, index, approximation=0)
- images.save_image(image, self.outpath_samples, "", seeds[index], prompts[index], opts.samples_format, suffix="-before-highres-fix")
+ info = create_infotext(self, self.all_prompts, self.all_seeds, self.all_subseeds, [], iteration=self.iteration, position_in_batch=index)
+ images.save_image(image, self.outpath_samples, "", seeds[index], prompts[index], opts.samples_format, info=info, suffix="-before-highres-fix")
if latent_scale_mode is not None:
for i in range(samples.shape[0]):
--
cgit v1.2.1