aboutsummaryrefslogtreecommitdiff
path: root/webui.py
diff options
context:
space:
mode:
Diffstat (limited to 'webui.py')
-rw-r--r--webui.py561
1 files changed, 287 insertions, 274 deletions
diff --git a/webui.py b/webui.py
index 13e5112a..8de1bcf2 100644
--- a/webui.py
+++ b/webui.py
@@ -1,14 +1,13 @@
-import argparse, os, sys, glob
+import argparse
+import os
+import sys
from collections import namedtuple
-
import torch
import torch.nn as nn
import numpy as np
import gradio as gr
from omegaconf import OmegaConf
from PIL import Image, ImageFont, ImageDraw, PngImagePlugin
-from itertools import islice
-from einops import rearrange, repeat
from torch import autocast
import mimetypes
import random
@@ -22,14 +21,13 @@ import k_diffusion.sampling
from ldm.util import instantiate_from_config
from ldm.models.diffusion.ddim import DDIMSampler
from ldm.models.diffusion.plms import PLMSSampler
-import ldm.modules.encoders.modules
try:
# this silences the annoying "Some weights of the model checkpoint were not used when initializing..." message at start.
from transformers import logging
logging.set_verbosity_error()
-except:
+except Exception:
pass
# this is a fix for Windows users. Without it, javascript files will be served with text/html content-type and the bowser will not show any UI
@@ -41,13 +39,13 @@ opt_C = 4
opt_f = 8
LANCZOS = (Image.Resampling.LANCZOS if hasattr(Image, 'Resampling') else Image.LANCZOS)
-invalid_filename_chars = '<>:"/\|?*\n'
+invalid_filename_chars = '<>:"/\\|?*\n'
config_filename = "config.json"
parser = argparse.ArgumentParser()
parser.add_argument("--config", type=str, default="configs/stable-diffusion/v1-inference.yaml", help="path to config which constructs model",)
parser.add_argument("--ckpt", type=str, default="models/ldm/stable-diffusion-v1/model.ckpt", help="path to checkpoint of model",)
-parser.add_argument("--gfpgan-dir", type=str, help="GFPGAN directory", default=('./src/gfpgan' if os.path.exists('./src/gfpgan') else './GFPGAN')) # i disagree with where you're putting it but since all guidefags are doing it this way, there you go
+parser.add_argument("--gfpgan-dir", type=str, help="GFPGAN directory", default=('./src/gfpgan' if os.path.exists('./src/gfpgan') else './GFPGAN'))
parser.add_argument("--no-half", action='store_true', help="do not switch the model to 16-bit floats")
parser.add_argument("--no-progressbar-hiding", action='store_true', help="do not hide progressbar in gradio UI (we hide it because it slows down ML if you have hardware accleration in browser)")
parser.add_argument("--max-batch-count", type=int, default=16, help="maximum batch count value for the UI")
@@ -64,7 +62,7 @@ css_hide_progressbar = """
SamplerData = namedtuple('SamplerData', ['name', 'constructor'])
samplers = [
- *[SamplerData(x[0], lambda m, funcname=x[1]: KDiffusionSampler(m, funcname)) for x in [
+ *[SamplerData(x[0], lambda funcname=x[1]: KDiffusionSampler(funcname)) for x in [
('LMS', 'sample_lms'),
('Heun', 'sample_heun'),
('Euler', 'sample_euler'),
@@ -72,8 +70,8 @@ samplers = [
('DPM 2', 'sample_dpm_2'),
('DPM 2 Ancestral', 'sample_dpm_2_ancestral'),
] if hasattr(k_diffusion.sampling, x[1])],
- SamplerData('DDIM', lambda m: DDIMSampler(model)),
- SamplerData('PLMS', lambda m: PLMSSampler(model)),
+ SamplerData('DDIM', lambda: VanillaStableDiffusionSampler(DDIMSampler)),
+ SamplerData('PLMS', lambda: VanillaStableDiffusionSampler(PLMSSampler)),
]
samplers_for_img2img = [x for x in samplers if x.name != 'DDIM' and x.name != 'PLMS']
@@ -102,7 +100,7 @@ try:
),
]
have_realesrgan = True
-except:
+except Exception:
print("Error loading Real-ESRGAN:", file=sys.stderr)
print(traceback.format_exc(), file=sys.stderr)
@@ -111,24 +109,30 @@ except:
class Options:
+ class OptionInfo:
+ def __init__(self, default=None, label="", component=None, component_args=None):
+ self.default = default
+ self.label = label
+ self.component = component
+ self.component_args = component_args
+
data = None
data_labels = {
- "outdir": ("", "Output dictectory; if empty, defaults to 'outputs/*'"),
- "samples_save": (True, "Save indiviual samples"),
- "samples_format": ('png', 'File format for indiviual samples'),
- "grid_save": (True, "Save image grids"),
- "grid_format": ('png', 'File format for grids'),
- "grid_extended_filename": (False, "Add extended info (seed, prompt) to filename when saving grid"),
- "n_rows": (-1, "Grid row count; use -1 for autodetect and 0 for it to be same as batch size", -1, 16),
- "jpeg_quality": (80, "Quality for saved jpeg images", 1, 100),
- "verify_input": (True, "Check input, and produce warning if it's too long"),
- "enable_pnginfo": (True, "Save text information about generation parameters as chunks to png files"),
- "prompt_matrix_add_to_start": (True, "In prompt matrix, add the variable combination of text to the start of the prompt, rather than the end"),
- "sd_upscale_overlap": (64, "Overlap for tiles for SD upscale. The smaller it is, the less smooth transition from one tile to another", 0, 256, 16),
+ "outdir": OptionInfo("", "Output dictectory; if empty, defaults to 'outputs/*'"),
+ "samples_save": OptionInfo(True, "Save indiviual samples"),
+ "samples_format": OptionInfo('png', 'File format for indiviual samples'),
+ "grid_save": OptionInfo(True, "Save image grids"),
+ "grid_format": OptionInfo('png', 'File format for grids'),
+ "grid_extended_filename": OptionInfo(False, "Add extended info (seed, prompt) to filename when saving grid"),
+ "n_rows": OptionInfo(-1, "Grid row count; use -1 for autodetect and 0 for it to be same as batch size", gr.Slider, {"minimum": -1, "maximum": 16, "step": 1}),
+ "jpeg_quality": OptionInfo(80, "Quality for saved jpeg images", gr.Slider, {"minimum": 1, "maximum": 100, "step": 1}),
+ "enable_pnginfo": OptionInfo(True, "Save text information about generation parameters as chunks to png files"),
+ "prompt_matrix_add_to_start": OptionInfo(True, "In prompt matrix, add the variable combination of text to the start of the prompt, rather than the end"),
+ "sd_upscale_overlap": OptionInfo(64, "Overlap for tiles for SD upscale. The smaller it is, the less smooth transition from one tile to another", gr.Slider, {"minimum": 0, "maximum": 256, "step": 16}),
}
def __init__(self):
- self.data = {k: v[0] for k, v in self.data_labels.items()}
+ self.data = {k: v.default for k, v in self.data_labels.items()}
def __setattr__(self, key, value):
if self.data is not None:
@@ -143,7 +147,7 @@ class Options:
return self.data[item]
if item in self.data_labels:
- return self.data_labels[item][0]
+ return self.data_labels[item].default
return super(Options, self).__getattribute__(item)
@@ -156,11 +160,6 @@ class Options:
self.data = json.load(file)
-def chunk(it, size):
- it = iter(it)
- return iter(lambda: tuple(islice(it, size)), ())
-
-
def load_model_from_config(config, ckpt, verbose=False):
print(f"Loading model from {ckpt}")
pl_sd = torch.load(ckpt, map_location="cpu")
@@ -181,36 +180,6 @@ def load_model_from_config(config, ckpt, verbose=False):
return model
-class CFGDenoiser(nn.Module):
- def __init__(self, model):
- super().__init__()
- self.inner_model = model
-
- def forward(self, x, sigma, uncond, cond, cond_scale):
- x_in = torch.cat([x] * 2)
- sigma_in = torch.cat([sigma] * 2)
- cond_in = torch.cat([uncond, cond])
- uncond, cond = self.inner_model(x_in, sigma_in, cond=cond_in).chunk(2)
- return uncond + (cond - uncond) * cond_scale
-
-
-class KDiffusionSampler:
- def __init__(self, m, funcname):
- self.model = m
- self.model_wrap = k_diffusion.external.CompVisDenoiser(m)
- self.funcname = funcname
- self.func = getattr(k_diffusion.sampling, self.funcname)
-
- def sample(self, S, conditioning, batch_size, shape, verbose, unconditional_guidance_scale, unconditional_conditioning, eta, x_T):
- sigmas = self.model_wrap.get_sigmas(S)
- x = x_T * sigmas[0]
- model_wrap_cfg = CFGDenoiser(self.model_wrap)
-
- samples_ddim = self.func(model_wrap_cfg, x, sigmas, extra_args={'cond': conditioning, 'uncond': unconditional_conditioning, 'cond_scale': unconditional_guidance_scale}, disable=False)
-
- return samples_ddim, None
-
-
def create_random_tensors(shape, seeds):
xs = []
for seed in seeds:
@@ -256,7 +225,7 @@ def plaintext_to_html(text):
return text
-def load_GFPGAN():
+def load_gfpgan():
model_name = 'GFPGANv1.3'
model_path = os.path.join(cmd_opts.gfpgan_dir, 'experiments/pretrained_models', model_name + '.pth')
if not os.path.isfile(model_path):
@@ -358,7 +327,7 @@ def combine_grid(grid):
def draw_prompt_matrix(im, width, height, all_prompts):
- def wrap(text, d, font, line_length):
+ def wrap(text, font, line_length):
lines = ['']
for word in text.split():
line = f'{lines[-1]} {word}'.strip()
@@ -368,16 +337,16 @@ def draw_prompt_matrix(im, width, height, all_prompts):
lines.append(word)
return '\n'.join(lines)
- def draw_texts(pos, x, y, texts, sizes):
+ def draw_texts(pos, draw_x, draw_y, texts, sizes):
for i, (text, size) in enumerate(zip(texts, sizes)):
active = pos & (1 << i) != 0
if not active:
text = '\u0336'.join(text) + '\u0336'
- d.multiline_text((x, y + size[1] / 2), text, font=fnt, fill=color_active if active else color_inactive, anchor="mm", align="center")
+ d.multiline_text((draw_x, draw_y + size[1] / 2), text, font=fnt, fill=color_active if active else color_inactive, anchor="mm", align="center")
- y += size[1] + line_spacing
+ draw_y += size[1] + line_spacing
fontsize = (width + height) // 25
line_spacing = fontsize // 2
@@ -399,8 +368,8 @@ def draw_prompt_matrix(im, width, height, all_prompts):
d = ImageDraw.Draw(result)
boundary = math.ceil(len(prompts) / 2)
- prompts_horiz = [wrap(x, d, fnt, width) for x in prompts[:boundary]]
- prompts_vert = [wrap(x, d, fnt, pad_left) for x in prompts[boundary:]]
+ prompts_horiz = [wrap(x, fnt, width) for x in prompts[:boundary]]
+ prompts_vert = [wrap(x, fnt, pad_left) for x in prompts[boundary:]]
sizes_hor = [(x[2] - x[0], x[3] - x[1]) for x in [d.multiline_textbbox((0, 0), x, font=fnt) for x in prompts_horiz]]
sizes_ver = [(x[2] - x[0], x[3] - x[1]) for x in [d.multiline_textbbox((0, 0), x, font=fnt) for x in prompts_vert]]
@@ -458,25 +427,6 @@ def resize_image(resize_mode, im, width, height):
return res
-def check_prompt_length(prompt, comments):
- """this function tests if prompt is too long, and if so, adds a message to comments"""
-
- tokenizer = model.cond_stage_model.tokenizer
- max_length = model.cond_stage_model.max_length
-
- info = model.cond_stage_model.tokenizer([prompt], truncation=True, max_length=max_length, return_overflowing_tokens=True, padding="max_length", return_tensors="pt")
- ovf = info['overflowing_tokens'][0]
- overflowing_count = ovf.shape[0]
- if overflowing_count == 0:
- return
-
- vocab = {v: k for k, v in tokenizer.get_vocab().items()}
- overflowing_words = [vocab.get(int(x), "") for x in ovf]
- overflowing_text = tokenizer.convert_tokens_to_string(''.join(overflowing_words))
-
- comments.append(f"Warning: too many input tokens; some ({len(overflowing_words)}) have been truncated:\n{overflowing_text}\n")
-
-
def wrap_gradio_call(func):
def f(*p1, **p2):
t = time.perf_counter()
@@ -494,7 +444,7 @@ def wrap_gradio_call(func):
GFPGAN = None
if os.path.exists(cmd_opts.gfpgan_dir):
try:
- GFPGAN = load_GFPGAN()
+ GFPGAN = load_gfpgan()
print("Loaded GFPGAN")
except Exception:
print("Error loading GFPGAN:", file=sys.stderr)
@@ -506,11 +456,11 @@ class StableDiffuionModelHijack:
word_embeddings = {}
word_embeddings_checksums = {}
fixes = None
- used_custom_terms = []
+ comments = None
dir_mtime = None
- def load_textual_inversion_embeddings(self, dir, model):
- mt = os.path.getmtime(dir)
+ def load_textual_inversion_embeddings(self, dirname, model):
+ mt = os.path.getmtime(dirname)
if self.dir_mtime is not None and mt <= self.dir_mtime:
return
@@ -543,10 +493,10 @@ class StableDiffuionModelHijack:
self.ids_lookup[first_id] = []
self.ids_lookup[first_id].append((ids, name))
- for fn in os.listdir(dir):
+ for fn in os.listdir(dirname):
try:
- process_file(os.path.join(dir, fn), fn)
- except:
+ process_file(os.path.join(dirname, fn), fn)
+ except Exception:
print(f"Error loading emedding {fn}:", file=sys.stderr)
print(traceback.format_exc(), file=sys.stderr)
continue
@@ -561,10 +511,10 @@ class StableDiffuionModelHijack:
class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module):
- def __init__(self, wrapped, embeddings):
+ def __init__(self, wrapped, hijack):
super().__init__()
self.wrapped = wrapped
- self.embeddings = embeddings
+ self.hijack = hijack
self.tokenizer = wrapped.tokenizer
self.max_length = wrapped.max_length
self.token_mults = {}
@@ -586,12 +536,13 @@ class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module):
self.token_mults[ident] = mult
def forward(self, text):
- self.embeddings.fixes = []
- self.embeddings.used_custom_terms = []
+ self.hijack.fixes = []
+ self.hijack.comments = []
remade_batch_tokens = []
id_start = self.wrapped.tokenizer.bos_token_id
id_end = self.wrapped.tokenizer.eos_token_id
maxlen = self.wrapped.max_length - 2
+ used_custom_terms = []
cache = {}
batch_tokens = self.wrapped.tokenizer(text, truncation=False, add_special_tokens=False)["input_ids"]
@@ -611,7 +562,7 @@ class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module):
while i < len(tokens):
token = tokens[i]
- possible_matches = self.embeddings.ids_lookup.get(token, None)
+ possible_matches = self.hijack.ids_lookup.get(token, None)
mult_change = self.token_mults.get(token)
if mult_change is not None:
@@ -628,7 +579,7 @@ class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module):
multipliers.append(mult)
i += len(ids) - 1
found = True
- self.embeddings.used_custom_terms.append((word, self.embeddings.word_embeddings_checksums[word]))
+ used_custom_terms.append((word, self.hijack.word_embeddings_checksums[word]))
break
if not found:
@@ -637,6 +588,14 @@ class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module):
i += 1
+ if len(remade_tokens) > maxlen - 2:
+ vocab = {v: k for k, v in self.wrapped.tokenizer.get_vocab().items()}
+ ovf = remade_tokens[maxlen - 2:]
+ overflowing_words = [vocab.get(int(x), "") for x in ovf]
+ overflowing_text = self.wrapped.tokenizer.convert_tokens_to_string(''.join(overflowing_words))
+
+ self.hijack.comments.append(f"Warning: too many input tokens; some ({len(overflowing_words)}) have been truncated:\n{overflowing_text}\n")
+
remade_tokens = remade_tokens + [id_end] * (maxlen - 2 - len(remade_tokens))
remade_tokens = [id_start] + remade_tokens[0:maxlen-2] + [id_end]
cache[tuple_tokens] = (remade_tokens, fixes, multipliers)
@@ -645,9 +604,12 @@ class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module):
multipliers = [1.0] + multipliers[0:maxlen - 2] + [1.0]
remade_batch_tokens.append(remade_tokens)
- self.embeddings.fixes.append(fixes)
+ self.hijack.fixes.append(fixes)
batch_multipliers.append(multipliers)
+ if len(used_custom_terms) > 0:
+ self.hijack.comments.append("Used custom terms: " + ", ".join([f'{word} [{checksum}]' for word, checksum in used_custom_terms]))
+
tokens = torch.asarray(remade_batch_tokens).to(self.wrapped.device)
outputs = self.wrapped.transformer(input_ids=tokens)
z = outputs.last_hidden_state
@@ -679,71 +641,123 @@ class EmbeddingsWithFixes(nn.Module):
for offset, word in fixes:
tensor[offset] = self.embeddings.word_embeddings[word]
-
return inputs_embeds
-def process_images(outpath, func_init, func_sample, prompt, seed, sampler_index, batch_size, n_iter, steps, cfg_scale, width, height, prompt_matrix, use_GFPGAN, do_not_save_grid=False, extra_generation_params=None):
+class StableDiffusionProcessing:
+ def __init__(self, outpath=None, prompt="", seed=-1, sampler_index=0, batch_size=1, n_iter=1, steps=50, cfg_scale=7.0, width=512, height=512, prompt_matrix=False, use_GFPGAN=False, do_not_save_grid=False, extra_generation_params=None):
+ self.outpath: str = outpath
+ self.prompt: str = prompt
+ self.seed: int = seed
+ self.sampler_index: int = sampler_index
+ self.batch_size: int = batch_size
+ self.n_iter: int = n_iter
+ self.steps: int = steps
+ self.cfg_scale: float = cfg_scale
+ self.width: int = width
+ self.height: int = height
+ self.prompt_matrix: bool = prompt_matrix
+ self.use_GFPGAN: bool = use_GFPGAN
+ self.do_not_save_grid: bool = do_not_save_grid
+ self.extra_generation_params: dict = extra_generation_params
+
+ def init(self):
+ pass
+
+ def sample(self, x, conditioning, unconditional_conditioning):
+ raise NotImplementedError()
+
+
+class VanillaStableDiffusionSampler:
+ def __init__(self, constructor):
+ self.sampler = constructor(sd_model)
+
+ def sample(self, p: StableDiffusionProcessing, x, conditioning, unconditional_conditioning):
+ samples_ddim, _ = self.sampler.sample(S=p.steps, conditioning=conditioning, batch_size=int(x.shape[0]), shape=x[0].shape, verbose=False, unconditional_guidance_scale=p.cfg_scale, unconditional_conditioning=unconditional_conditioning, x_T=x)
+ return samples_ddim
+
+
+class CFGDenoiser(nn.Module):
+ def __init__(self, model):
+ super().__init__()
+ self.inner_model = model
+
+ def forward(self, x, sigma, uncond, cond, cond_scale):
+ x_in = torch.cat([x] * 2)
+ sigma_in = torch.cat([sigma] * 2)
+ cond_in = torch.cat([uncond, cond])
+ uncond, cond = self.inner_model(x_in, sigma_in, cond=cond_in).chunk(2)
+ return uncond + (cond - uncond) * cond_scale
+
+
+class KDiffusionSampler:
+ def __init__(self, funcname):
+ self.model_wrap = k_diffusion.external.CompVisDenoiser(sd_model)
+ self.funcname = funcname
+ self.func = getattr(k_diffusion.sampling, self.funcname)
+ self.model_wrap_cfg = CFGDenoiser(self.model_wrap)
+
+ def sample(self, p: StableDiffusionProcessing, x, conditioning, unconditional_conditioning):
+ sigmas = self.model_wrap.get_sigmas(p.steps)
+ x = x * sigmas[0]
+
+ samples_ddim = self.func(self.model_wrap_cfg, x, sigmas, extra_args={'cond': conditioning, 'uncond': unconditional_conditioning, 'cond_scale': p.cfg_scale}, disable=False)
+ return samples_ddim
+
+
+def process_images(p: StableDiffusionProcessing):
"""this is the main loop that both txt2img and img2img use; it calls func_init once inside all the scopes and func_sample once per batch"""
- assert prompt is not None
+ prompt = p.prompt
+ model = sd_model
+
+ assert p.prompt is not None
torch_gc()
- if seed == -1:
- seed = random.randrange(4294967294)
- seed = int(seed)
+ seed = int(random.randrange(4294967294) if p.seed == -1 else p.seed)
- os.makedirs(outpath, exist_ok=True)
+ os.makedirs(p.outpath, exist_ok=True)
- sample_path = os.path.join(outpath, "samples")
+ sample_path = os.path.join(p.outpath, "samples")
os.makedirs(sample_path, exist_ok=True)
base_count = len(os.listdir(sample_path))
- grid_count = len(os.listdir(outpath)) - 1
+ grid_count = len(os.listdir(p.outpath)) - 1
comments = []
prompt_matrix_parts = []
- if prompt_matrix:
+ if p.prompt_matrix:
all_prompts = []
prompt_matrix_parts = prompt.split("|")
combination_count = 2 ** (len(prompt_matrix_parts) - 1)
for combination_num in range(combination_count):
- selected_prompts = [text.strip().strip(',') for n, text in enumerate(prompt_matrix_parts[1:]) if combination_num & (1<<n)]
+ selected_prompts = [text.strip().strip(',') for n, text in enumerate(prompt_matrix_parts[1:]) if combination_num & (1 << n)]
if opts.prompt_matrix_add_to_start:
selected_prompts = selected_prompts + [prompt_matrix_parts[0]]
else:
selected_prompts = [prompt_matrix_parts[0]] + selected_prompts
- all_prompts.append( ", ".join(selected_prompts))
+ all_prompts.append(", ".join(selected_prompts))
- n_iter = math.ceil(len(all_prompts) / batch_size)
+ p.n_iter = math.ceil(len(all_prompts) / p.batch_size)
all_seeds = len(all_prompts) * [seed]
- print(f"Prompt matrix will create {len(all_prompts)} images using a total of {n_iter} batches.")
+ print(f"Prompt matrix will create {len(all_prompts)} images using a total of {p.n_iter} batches.")
else:
-
- if opts.verify_input:
- try:
- check_prompt_length(prompt, comments)
- except:
- import traceback
- print("Error verifying input:", file=sys.stderr)
- print(traceback.format_exc(), file=sys.stderr)
-
- all_prompts = batch_size * n_iter * [prompt]
+ all_prompts = p.batch_size * p.n_iter * [prompt]
all_seeds = [seed + x for x in range(len(all_prompts))]
generation_params = {
- "Steps": steps,
- "Sampler": samplers[sampler_index].name,
- "CFG scale": cfg_scale,
+ "Steps": p.steps,
+ "Sampler": samplers[p.sampler_index].name,
+ "CFG scale": p.cfg_scale,
"Seed": seed,
- "GFPGAN": ("GFPGAN" if use_GFPGAN and GFPGAN is not None else None)
+ "GFPGAN": ("GFPGAN" if p.use_GFPGAN and GFPGAN is not None else None)
}
- if extra_generation_params is not None:
- generation_params.update(extra_generation_params)
+ if p.extra_generation_params is not None:
+ generation_params.update(p.extra_generation_params)
generation_params_text = ", ".join([k if k == v else f'{k}: {v}' for k, v in generation_params.items() if v is not None])
@@ -755,32 +769,32 @@ def process_images(outpath, func_init, func_sample, prompt, seed, sampler_index,
output_images = []
with torch.no_grad(), autocast("cuda"), model.ema_scope():
- init_data = func_init()
+ p.init()
- for n in range(n_iter):
- prompts = all_prompts[n * batch_size:(n + 1) * batch_size]
- seeds = all_seeds[n * batch_size:(n + 1) * batch_size]
+ for n in range(p.n_iter):
+ prompts = all_prompts[n * p.batch_size:(n + 1) * p.batch_size]
+ seeds = all_seeds[n * p.batch_size:(n + 1) * p.batch_size]
uc = model.get_learned_conditioning(len(prompts) * [""])
c = model.get_learned_conditioning(prompts)
- if len(model_hijack.used_custom_terms) > 0:
- comments.append("Used custom terms: " + ", ".join([f'{word} [{checksum}]' for word, checksum in model_hijack.used_custom_terms]))
+ if len(model_hijack.comments) > 0:
+ comments += model_hijack.comments
# we manually generate all input noises because each one should have a specific seed
- x = create_random_tensors([opt_C, height // opt_f, width // opt_f], seeds=seeds)
+ x = create_random_tensors([opt_C, p.height // opt_f, p.width // opt_f], seeds=seeds)
- samples_ddim = func_sample(init_data=init_data, x=x, conditioning=c, unconditional_conditioning=uc)
+ samples_ddim = p.sample(x=x, conditioning=c, unconditional_conditioning=uc)
x_samples_ddim = model.decode_first_stage(samples_ddim)
x_samples_ddim = torch.clamp((x_samples_ddim + 1.0) / 2.0, min=0.0, max=1.0)
- if prompt_matrix or opts.samples_save or opts.grid_save:
+ if p.prompt_matrix or opts.samples_save or opts.grid_save:
for i, x_sample in enumerate(x_samples_ddim):
- x_sample = 255. * rearrange(x_sample.cpu().numpy(), 'c h w -> h w c')
+ x_sample = 255. * np.moveaxis(x_sample.cpu().numpy(), 0, 2)
x_sample = x_sample.astype(np.uint8)
- if use_GFPGAN and GFPGAN is not None:
+ if p.use_GFPGAN and GFPGAN is not None:
torch_gc()
cropped_faces, restored_faces, restored_img = GFPGAN.enhance(x_sample, has_aligned=False, only_center_face=False, paste_back=True)
x_sample = restored_img
@@ -791,44 +805,44 @@ def process_images(outpath, func_init, func_sample, prompt, seed, sampler_index,
output_images.append(image)
base_count += 1
- if (prompt_matrix or opts.grid_save) and not do_not_save_grid:
- if prompt_matrix:
- grid = image_grid(output_images, batch_size, force_n_rows=1 << ((len(prompt_matrix_parts)-1)//2))
+ if (p.prompt_matrix or opts.grid_save) and not p.do_not_save_grid:
+ if p.prompt_matrix:
+ grid = image_grid(output_images, p.batch_size, force_n_rows=1 << ((len(prompt_matrix_parts)-1)//2))
try:
- grid = draw_prompt_matrix(grid, width, height, prompt_matrix_parts)
- except:
+ grid = draw_prompt_matrix(grid, p.width, p.height, prompt_matrix_parts)
+ except Exception:
import traceback
print("Error creating prompt_matrix text:", file=sys.stderr)
print(traceback.format_exc(), file=sys.stderr)
output_images.insert(0, grid)
else:
- grid = image_grid(output_images, batch_size)
+ grid = image_grid(output_images, p.batch_size)
- save_image(grid, outpath, f"grid-{grid_count:04}", seed, prompt, opts.grid_format, info=infotext(), short_filename=not opts.grid_extended_filename)
+ save_image(grid, p.outpath, f"grid-{grid_count:04}", seed, prompt, opts.grid_format, info=infotext(), short_filename=not opts.grid_extended_filename)
grid_count += 1
torch_gc()
return output_images, seed, infotext()
-def txt2img(prompt: str, ddim_steps: int, sampler_index: int, use_GFPGAN: bool, prompt_matrix: bool, ddim_eta: float, n_iter: int, batch_size: int, cfg_scale: float, seed: int, height: int, width: int):
- outpath = opts.outdir or "outputs/txt2img-samples"
+class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
+ sampler = None
- sampler = samplers[sampler_index].constructor(model)
+ def init(self):
+ self.sampler = samplers[self.sampler_index].constructor()
- def init():
- pass
-
- def sample(init_data, x, conditioning, unconditional_conditioning):
- samples_ddim, _ = sampler.sample(S=ddim_steps, conditioning=conditioning, batch_size=int(x.shape[0]), shape=x[0].shape, verbose=False, unconditional_guidance_scale=cfg_scale, unconditional_conditioning=unconditional_conditioning, eta=ddim_eta, x_T=x)
+ def sample(self, x, conditioning, unconditional_conditioning):
+ samples_ddim = self.sampler.sample(self, x, conditioning, unconditional_conditioning)
return samples_ddim
- output_images, seed, info = process_images(
+
+def txt2img(prompt: str, ddim_steps: int, sampler_index: int, use_GFPGAN: bool, prompt_matrix: bool, n_iter: int, batch_size: int, cfg_scale: float, seed: int, height: int, width: int):
+ outpath = opts.outdir or "outputs/txt2img-samples"
+
+ p = StableDiffusionProcessingTxt2Img(
outpath=outpath,
- func_init=init,
- func_sample=sample,
prompt=prompt,
seed=seed,
sampler_index=sampler_index,
@@ -842,7 +856,7 @@ def txt2img(prompt: str, ddim_steps: int, sampler_index: int, use_GFPGAN: bool,
use_GFPGAN=use_GFPGAN
)
- del sampler
+ output_images, seed, info = process_images(p)
return output_images, seed, plaintext_to_html(info)
@@ -858,7 +872,7 @@ class Flagging(gr.FlaggingCallback):
os.makedirs("log/images", exist_ok=True)
# those must match the "txt2img" function
- prompt, ddim_steps, sampler_name, use_GFPGAN, prompt_matrix, ddim_eta, n_iter, n_samples, cfg_scale, request_seed, height, width, images, seed, comment = flag_data
+ prompt, ddim_steps, sampler_name, use_gfpgan, prompt_matrix, ddim_eta, n_iter, n_samples, cfg_scale, request_seed, height, width, images, seed, comment = flag_data
filenames = []
@@ -896,7 +910,6 @@ txt2img_interface = gr.Interface(
gr.Radio(label='Sampling method', choices=[x.name for x in samplers], value=samplers[0].name, type="index"),
gr.Checkbox(label='Fix faces using GFPGAN', value=False, visible=GFPGAN is not None),
gr.Checkbox(label='Create prompt matrix (separate multiple prompts using |, and get all combinations of them)', value=False),
- gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label="DDIM ETA", value=0.0, visible=False),
gr.Slider(minimum=1, maximum=cmd_opts.max_batch_count, step=1, label='Batch count (how many batches of images to generate)', value=1),
gr.Slider(minimum=1, maximum=8, step=1, label='Batch size (how many images are in a batch; memory-hungry)', value=1),
gr.Slider(minimum=1.0, maximum=15.0, step=0.5, label='Classifier Free Guidance Scale (how strongly the image should follow the prompt)', value=7.0),
@@ -914,73 +927,97 @@ txt2img_interface = gr.Interface(
)
-def img2img(prompt: str, init_img, ddim_steps: int, sampler_index: int, use_GFPGAN: bool, prompt_matrix, loopback: bool, sd_upscale: bool, n_iter: int, batch_size: int, cfg_scale: float, denoising_strength: float, seed: int, height: int, width: int, resize_mode: int):
- outpath = opts.outdir or "outputs/img2img-samples"
+class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
+ sampler = None
- sampler = samplers_for_img2img[sampler_index].constructor(model)
+ def __init__(self, init_images=None, resize_mode=0, denoising_strength=0.75, **kwargs):
+ super().__init__(**kwargs)
- assert 0. <= denoising_strength <= 1., 'can only work with strength in [0.0, 1.0]'
+ self.init_images = init_images
+ self.resize_mode: int = resize_mode
+ self.denoising_strength: float = denoising_strength
+ self.init_latent = None
+
+ def init(self):
+ self.sampler = samplers_for_img2img[self.sampler_index].constructor()
- def init():
- image = init_img.convert("RGB")
- image = resize_image(resize_mode, image, width, height)
- image = np.array(image).astype(np.float32) / 255.0
- image = image[None].transpose(0, 3, 1, 2)
- image = torch.from_numpy(image)
+ imgs = []
+ for img in self.init_images:
+ image = img.convert("RGB")
+ image = resize_image(self.resize_mode, image, self.width, self.height)
+ image = np.array(image).astype(np.float32) / 255.0
+ image = np.moveaxis(image, 2, 0)
+ imgs.append(image)
- init_image = 2. * image - 1.
- init_image = init_image.to(device)
- init_image = repeat(init_image, '1 ... -> b ...', b=batch_size)
- init_latent = model.get_first_stage_encoding(model.encode_first_stage(init_image)) # move to latent space
+ if len(imgs) == 1:
+ batch_images = np.expand_dims(imgs[0], axis=0).repeat(self.batch_size, axis=0)
+ elif len(imgs) <= self.batch_size:
+ self.batch_size = len(imgs)
+ batch_images = np.array(imgs)
+ else:
+ raise RuntimeError(f"bad number of images passed: {len(imgs)}; expecting {self.batch_size} or less")
- return init_latent,
+ image = torch.from_numpy(batch_images)
+ image = 2. * image - 1.
+ image = image.to(device)
- def sample(init_data, x, conditioning, unconditional_conditioning):
- t_enc = int(denoising_strength * ddim_steps)
+ self.init_latent = sd_model.get_first_stage_encoding(sd_model.encode_first_stage(image))
- x0, = init_data
+ def sample(self, x, conditioning, unconditional_conditioning):
+ t_enc = int(self.denoising_strength * self.steps)
- sigmas = sampler.model_wrap.get_sigmas(ddim_steps)
- noise = x * sigmas[ddim_steps - t_enc - 1]
+ sigmas = self.sampler.model_wrap.get_sigmas(self.steps)
+ noise = x * sigmas[self.steps - t_enc - 1]
- xi = x0 + noise
- sigma_sched = sigmas[ddim_steps - t_enc - 1:]
- model_wrap_cfg = CFGDenoiser(sampler.model_wrap)
- samples_ddim = sampler.func(model_wrap_cfg, xi, sigma_sched, extra_args={'cond': conditioning, 'uncond': unconditional_conditioning, 'cond_scale': cfg_scale}, disable=False)
+ xi = self.init_latent + noise
+ sigma_sched = sigmas[self.steps - t_enc - 1:]
+ samples_ddim = self.sampler.func(self.sampler.model_wrap_cfg, xi, sigma_sched, extra_args={'cond': conditioning, 'uncond': unconditional_conditioning, 'cond_scale': self.cfg_scale}, disable=False)
return samples_ddim
+
+def img2img(prompt: str, init_img, ddim_steps: int, sampler_index: int, use_GFPGAN: bool, prompt_matrix, loopback: bool, sd_upscale: bool, n_iter: int, batch_size: int, cfg_scale: float, denoising_strength: float, seed: int, height: int, width: int, resize_mode: int):
+ outpath = opts.outdir or "outputs/img2img-samples"
+
+ assert 0. <= denoising_strength <= 1., 'can only work with strength in [0.0, 1.0]'
+
+ p = StableDiffusionProcessingImg2Img(
+ outpath=outpath,
+ prompt=prompt,
+ seed=seed,
+ sampler_index=sampler_index,
+ batch_size=batch_size,
+ n_iter=n_iter,
+ steps=ddim_steps,
+ cfg_scale=cfg_scale,
+ width=width,
+ height=height,
+ prompt_matrix=prompt_matrix,
+ use_GFPGAN=use_GFPGAN,
+ init_images=[init_img],
+ resize_mode=resize_mode,
+ denoising_strength=denoising_strength,
+ extra_generation_params={"Denoising Strength": denoising_strength}
+ )
+
if loopback:
output_images, info = None, None
history = []
initial_seed = None
for i in range(n_iter):
- output_images, seed, info = process_images(
- outpath=outpath,
- func_init=init,
- func_sample=sample,
- prompt=prompt,
- seed=seed,
- sampler_index=sampler_index,
- batch_size=1,
- n_iter=1,
- steps=ddim_steps,
- cfg_scale=cfg_scale,
- width=width,
- height=height,
- prompt_matrix=prompt_matrix,
- use_GFPGAN=use_GFPGAN,
- do_not_save_grid=True,
- extra_generation_params={"Denoising Strength": denoising_strength},
- )
+ p.n_iter = 1
+ p.batch_size = 1
+ p.do_not_save_grid = True
+
+ output_images, seed, info = process_images(p)
if initial_seed is None:
initial_seed = seed
- init_img = output_images[0]
- seed = seed + 1
- denoising_strength = max(denoising_strength * 0.95, 0.1)
- history.append(init_img)
+ p.init_img = output_images[0]
+ p.seed = seed + 1
+ p.denoising_strength = max(p.denoising_strength * 0.95, 0.1)
+ history.append(output_images[0])
grid_count = len(os.listdir(outpath)) - 1
grid = image_grid(history, batch_size, force_n_rows=1)
@@ -1000,39 +1037,36 @@ def img2img(prompt: str, init_img, ddim_steps: int, sampler_index: int, use_GFPG
grid = split_grid(img, tile_w=width, tile_h=height, overlap=opts.sd_upscale_overlap)
+ p.n_iter = 1
+ p.do_not_save_grid = True
+
+ work = []
+ work_results = []
+
+ for y, h, row in grid.tiles:
+ for tiledata in row:
+ work.append(tiledata[2])
+
+ batch_count = math.ceil(len(work) / p.batch_size)
+ print(f"SD upscaling will process a total of {len(work)} images tiled as {len(grid.tiles[0][2])}x{len(grid.tiles)} in a total of {batch_count} batches.")
- print(f"SD upscaling will process a total of {len(grid.tiles[0][2])}x{len(grid.tiles)} images.")
+ for i in range(batch_count):
+ p.init_images = work[i*p.batch_size:(i+1)*p.batch_size]
+ output_images, seed, info = process_images(p)
+
+ if initial_seed is None:
+ initial_seed = seed
+ initial_info = info
+
+ p.seed = seed + 1
+ work_results += output_images
+
+ image_index = 0
for y, h, row in grid.tiles:
for tiledata in row:
- init_img = tiledata[2]
-
- output_images, seed, info = process_images(
- outpath=outpath,
- func_init=init,
- func_sample=sample,
- prompt=prompt,
- seed=seed,
- sampler_index=sampler_index,
- batch_size=1, # since process_images can't work with multiple different images we have to do this for now
- n_iter=1,
- steps=ddim_steps,
- cfg_scale=cfg_scale,
- width=width,
- height=height,
- prompt_matrix=prompt_matrix,
- use_GFPGAN=use_GFPGAN,
- do_not_save_grid=True,
- extra_generation_params={"Denoising Strength": denoising_strength},
- )
-
- if initial_seed is None:
- initial_seed = seed
- initial_info = info
-
- seed += 1
-
- tiledata[2] = output_images[0]
+ tiledata[2] = work_results[image_index]
+ image_index += 1
combined_image = combine_grid(grid)
@@ -1044,25 +1078,7 @@ def img2img(prompt: str, init_img, ddim_steps: int, sampler_index: int, use_GFPG
info = initial_info
else:
- output_images, seed, info = process_images(
- outpath=outpath,
- func_init=init,
- func_sample=sample,
- prompt=prompt,
- seed=seed,
- sampler_index=sampler_index,
- batch_size=batch_size,
- n_iter=n_iter,
- steps=ddim_steps,
- cfg_scale=cfg_scale,
- width=width,
- height=height,
- prompt_matrix=prompt_matrix,
- use_GFPGAN=use_GFPGAN,
- extra_generation_params={"Denoising Strength": denoising_strength},
- )
-
- del sampler
+ output_images, seed, info = process_images(p)
return output_images, seed, plaintext_to_html(info)
@@ -1178,22 +1194,19 @@ def run_settings(*args):
def create_setting_component(key):
def fun():
- return opts.data[key] if key in opts.data else opts.data_labels[key][0]
+ return opts.data[key] if key in opts.data else opts.data_labels[key].default
+
+ info = opts.data_labels[key]
+ t = type(info.default)
- labelinfo = opts.data_labels[key]
- t = type(labelinfo[0])
- label = labelinfo[1]
- if t == str:
- item = gr.Textbox(label=label, value=fun, lines=1)
+ if info.component is not None:
+ item = info.component(label=info.label, value=fun, **(info.component_args or {}))
+ elif t == str:
+ item = gr.Textbox(label=info.label, value=fun, lines=1)
elif t == int:
- if len(labelinfo) == 5:
- item = gr.Slider(minimum=labelinfo[2], maximum=labelinfo[3], step=labelinfo[4], label=label, value=fun)
- elif len(labelinfo) == 4:
- item = gr.Slider(minimum=labelinfo[2], maximum=labelinfo[3], step=1, label=label, value=fun)
- else:
- item = gr.Number(label=label, value=fun)
+ item = gr.Number(label=info.label, value=fun)
elif t == bool:
- item = gr.Checkbox(label=label, value=fun)
+ item = gr.Checkbox(label=info.label, value=fun)
else:
raise Exception(f'bad options item type: {str(t)} for key {key}')
@@ -1219,14 +1232,14 @@ interfaces = [
(settings_interface, "Settings"),
]
-config = OmegaConf.load(cmd_opts.config)
-model = load_model_from_config(config, cmd_opts.ckpt)
+sd_config = OmegaConf.load(cmd_opts.config)
+sd_model = load_model_from_config(sd_config, cmd_opts.ckpt)
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
-model = (model if cmd_opts.no_half else model.half()).to(device)
+sd_model = (sd_model if cmd_opts.no_half else sd_model.half()).to(device)
model_hijack = StableDiffuionModelHijack()
-model_hijack.hijack(model)
+model_hijack.hijack(sd_model)
demo = gr.TabbedInterface(
interface_list=[x[0] for x in interfaces],