From e715e46b6aa7f2e5e147cfa1fa2f49b1d926a074 Mon Sep 17 00:00:00 2001 From: CodeHatchling Date: Tue, 28 Nov 2023 16:10:22 -0700 Subject: Implements "scheduling" for blending of the original latents and a latent blending formula that preserves details in blend transition areas. --- modules/sd_samplers_cfg_denoiser.py | 61 +++++++++++++++++++++++++++++++++++-- 1 file changed, 59 insertions(+), 2 deletions(-) (limited to 'modules/sd_samplers_cfg_denoiser.py') diff --git a/modules/sd_samplers_cfg_denoiser.py b/modules/sd_samplers_cfg_denoiser.py index b8101d38..c4d6fda6 100644 --- a/modules/sd_samplers_cfg_denoiser.py +++ b/modules/sd_samplers_cfg_denoiser.py @@ -43,6 +43,9 @@ class CFGDenoiser(torch.nn.Module): self.model_wrap = None self.mask = None self.nmask = None + self.mask_blend_power = 1 + self.mask_blend_scale = 1 + self.mask_blend_offset = 0 self.init_latent = None self.steps = None """number of steps as specified by user in UI""" @@ -56,6 +59,9 @@ class CFGDenoiser(torch.nn.Module): self.sampler = sampler self.model_wrap = None self.p = None + + # NOTE: masking before denoising can cause the original latents to be oversmoothed + # as the original latents do not have noise self.mask_before_denoising = False @property @@ -89,6 +95,55 @@ class CFGDenoiser(torch.nn.Module): self.sampler.sampler_extra_args['uncond'] = uc def forward(self, x, sigma, uncond, cond, cond_scale, s_min_uncond, image_cond): + def latent_blend(a, b, t): + """ + Interpolates two latent image representations according to the parameter t, + where the interpolated vectors' magnitudes are also interpolated separately. + The "detail_preservation" factor biases the magnitude interpolation towards + the larger of the two magnitudes. + """ + # Record the original latent vector magnitudes. + # We bring them to a power so that larger magnitudes are favored over smaller ones. + # 64-bit operations are used here to allow large exponents. + detail_preservation = 32 + a_magnitude = torch.norm(a, p=2, dim=1).to(torch.float64) ** detail_preservation + b_magnitude = torch.norm(b, p=2, dim=1).to(torch.float64) ** detail_preservation + + one_minus_t = 1 - t + + # Interpolate the powered magnitudes, then un-power them (bring them back to a power of 1). + interp_magnitude = (a_magnitude * one_minus_t + b_magnitude * t) ** (1 / detail_preservation) + + # Linearly interpolate the image vectors. + image_interp = a * one_minus_t + b * t + + # Calculate the magnitude of the interpolated vectors. (We will remove this magnitude.) + # 64-bit operations are used here to allow large exponents. + image_interp_magnitude = torch.norm(image_interp, p=2, dim=1).to(torch.float64) + 0.0001 + + # Change the linearly interpolated image vectors' magnitudes to the value we want. + # This is the last 64-bit operation. + image_interp *= (interp_magnitude / image_interp_magnitude).to(image_interp.dtype) + + return image_interp + + def get_modified_nmask(nmask, _sigma): + """ + Converts a negative mask representing the transparency of the original latent vectors being overlayed + to a mask that is scaled according to the denoising strength for this step. + + Where: + 0 = fully opaque, infinite density, fully masked + 1 = fully transparent, zero density, fully unmasked + + We bring this transparency to a power, as this allows one to simulate N number of blending operations + where N can be any positive real value. Using this one can control the balance of influence between + the denoiser and the original latents according to the sigma value. + + NOTE: "mask" is not used + """ + return torch.pow(nmask, (_sigma ** self.mask_blend_power) * self.mask_blend_scale + self.mask_blend_offset) + if state.interrupted or state.skipped: raise sd_samplers_common.InterruptedException @@ -105,8 +160,9 @@ class CFGDenoiser(torch.nn.Module): assert not is_edit_model or all(len(conds) == 1 for conds in conds_list), "AND is not supported for InstructPix2Pix checkpoint (unless using Image CFG scale = 1.0)" + # Blend in the original latents (before) if self.mask_before_denoising and self.mask is not None: - x = self.init_latent * self.mask + self.nmask * x + x = latent_blend(self.init_latent, x, get_modified_nmask(self.nmask, sigma)) batch_size = len(conds_list) repeats = [len(conds_list[i]) for i in range(batch_size)] @@ -207,8 +263,9 @@ class CFGDenoiser(torch.nn.Module): else: denoised = self.combine_denoised(x_out, conds_list, uncond, cond_scale) + # Blend in the original latents (after) if not self.mask_before_denoising and self.mask is not None: - denoised = self.init_latent * self.mask + self.nmask * denoised + denoised = latent_blend(self.init_latent, denoised, get_modified_nmask(self.nmask, sigma)) self.sampler.last_latent = self.get_pred_x0(torch.cat([x_in[i:i + 1] for i in denoised_image_indexes]), torch.cat([x_out[i:i + 1] for i in denoised_image_indexes]), sigma) -- cgit v1.2.1 From c5c7fa06aae1ae9f8b6d29ae2da3874921d4729b Mon Sep 17 00:00:00 2001 From: CodeHatchling Date: Tue, 28 Nov 2023 22:35:07 -0700 Subject: Added slider for detail preservation strength, removed largely needless offset parameter, changed labels in UI and for saving to/pasting data from PNG files. --- modules/sd_samplers_cfg_denoiser.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'modules/sd_samplers_cfg_denoiser.py') diff --git a/modules/sd_samplers_cfg_denoiser.py b/modules/sd_samplers_cfg_denoiser.py index c4d6fda6..598cd487 100644 --- a/modules/sd_samplers_cfg_denoiser.py +++ b/modules/sd_samplers_cfg_denoiser.py @@ -45,7 +45,7 @@ class CFGDenoiser(torch.nn.Module): self.nmask = None self.mask_blend_power = 1 self.mask_blend_scale = 1 - self.mask_blend_offset = 0 + self.inpaint_detail_preservation = 16 self.init_latent = None self.steps = None """number of steps as specified by user in UI""" @@ -105,14 +105,13 @@ class CFGDenoiser(torch.nn.Module): # Record the original latent vector magnitudes. # We bring them to a power so that larger magnitudes are favored over smaller ones. # 64-bit operations are used here to allow large exponents. - detail_preservation = 32 - a_magnitude = torch.norm(a, p=2, dim=1).to(torch.float64) ** detail_preservation - b_magnitude = torch.norm(b, p=2, dim=1).to(torch.float64) ** detail_preservation + a_magnitude = torch.norm(a, p=2, dim=1).to(torch.float64) ** self.inpaint_detail_preservation + b_magnitude = torch.norm(b, p=2, dim=1).to(torch.float64) ** self.inpaint_detail_preservation one_minus_t = 1 - t # Interpolate the powered magnitudes, then un-power them (bring them back to a power of 1). - interp_magnitude = (a_magnitude * one_minus_t + b_magnitude * t) ** (1 / detail_preservation) + interp_magnitude = (a_magnitude * one_minus_t + b_magnitude * t) ** (1 / self.inpaint_detail_preservation) # Linearly interpolate the image vectors. image_interp = a * one_minus_t + b * t @@ -142,7 +141,7 @@ class CFGDenoiser(torch.nn.Module): NOTE: "mask" is not used """ - return torch.pow(nmask, (_sigma ** self.mask_blend_power) * self.mask_blend_scale + self.mask_blend_offset) + return torch.pow(nmask, (_sigma ** self.mask_blend_power) * self.mask_blend_scale) if state.interrupted or state.skipped: raise sd_samplers_common.InterruptedException -- cgit v1.2.1 From c7a1ff87207544dd4bcf3aefffa67a4a38678c16 Mon Sep 17 00:00:00 2001 From: CodeHatchling Date: Tue, 28 Nov 2023 23:31:10 -0700 Subject: Tweaked default values. --- modules/sd_samplers_cfg_denoiser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'modules/sd_samplers_cfg_denoiser.py') diff --git a/modules/sd_samplers_cfg_denoiser.py b/modules/sd_samplers_cfg_denoiser.py index 598cd487..ceb612d7 100644 --- a/modules/sd_samplers_cfg_denoiser.py +++ b/modules/sd_samplers_cfg_denoiser.py @@ -44,8 +44,8 @@ class CFGDenoiser(torch.nn.Module): self.mask = None self.nmask = None self.mask_blend_power = 1 - self.mask_blend_scale = 1 - self.inpaint_detail_preservation = 16 + self.mask_blend_scale = 0.5 + self.inpaint_detail_preservation = 4 self.init_latent = None self.steps = None """number of steps as specified by user in UI""" -- cgit v1.2.1 From bb04d400c95df01d191ef6c1a43e66b95425fa33 Mon Sep 17 00:00:00 2001 From: CodeHatchling Date: Sat, 2 Dec 2023 21:08:26 -0700 Subject: Rewrote latent_blend() to use in-place operations and to aggressively "del" references with the intention of minimizing allocations and easing garbage collection. --- modules/sd_samplers_cfg_denoiser.py | 41 +++++++++++++++++++++++++------------ 1 file changed, 28 insertions(+), 13 deletions(-) (limited to 'modules/sd_samplers_cfg_denoiser.py') diff --git a/modules/sd_samplers_cfg_denoiser.py b/modules/sd_samplers_cfg_denoiser.py index ceb612d7..efbe7a40 100644 --- a/modules/sd_samplers_cfg_denoiser.py +++ b/modules/sd_samplers_cfg_denoiser.py @@ -102,29 +102,44 @@ class CFGDenoiser(torch.nn.Module): The "detail_preservation" factor biases the magnitude interpolation towards the larger of the two magnitudes. """ - # Record the original latent vector magnitudes. - # We bring them to a power so that larger magnitudes are favored over smaller ones. - # 64-bit operations are used here to allow large exponents. - a_magnitude = torch.norm(a, p=2, dim=1).to(torch.float64) ** self.inpaint_detail_preservation - b_magnitude = torch.norm(b, p=2, dim=1).to(torch.float64) ** self.inpaint_detail_preservation + # NOTE: We use inplace operations wherever possible. one_minus_t = 1 - t - # Interpolate the powered magnitudes, then un-power them (bring them back to a power of 1). - interp_magnitude = (a_magnitude * one_minus_t + b_magnitude * t) ** (1 / self.inpaint_detail_preservation) - # Linearly interpolate the image vectors. - image_interp = a * one_minus_t + b * t + a_scaled = a * one_minus_t + b_scaled = b * t + image_interp = a_scaled + image_interp.add_(b_scaled) + result_type = image_interp.dtype + del a_scaled, b_scaled # Calculate the magnitude of the interpolated vectors. (We will remove this magnitude.) # 64-bit operations are used here to allow large exponents. - image_interp_magnitude = torch.norm(image_interp, p=2, dim=1).to(torch.float64) + 0.0001 + current_magnitude = torch.norm(image_interp, p=2, dim=1).to(torch.float64).add_(0.00001) + + # Interpolate the powered magnitudes, then un-power them (bring them back to a power of 1). + a_magnitude = torch.norm(a, p=2, dim=1).to(torch.float64).pow_(self.inpaint_detail_preservation) * one_minus_t + b_magnitude = torch.norm(b, p=2, dim=1).to(torch.float64).pow_(self.inpaint_detail_preservation) * t + desired_magnitude = a_magnitude + desired_magnitude.add_(b_magnitude).pow_(1 / self.inpaint_detail_preservation) + del a_magnitude, b_magnitude, one_minus_t # Change the linearly interpolated image vectors' magnitudes to the value we want. # This is the last 64-bit operation. - image_interp *= (interp_magnitude / image_interp_magnitude).to(image_interp.dtype) - - return image_interp + image_interp_scaling_factor = desired_magnitude + image_interp_scaling_factor.div_(current_magnitude) + image_interp_scaled = image_interp + image_interp_scaled.mul_(image_interp_scaling_factor) + del current_magnitude + del desired_magnitude + del image_interp + del image_interp_scaling_factor + + image_interp_scaled = image_interp_scaled.to(result_type) + del result_type + + return image_interp_scaled def get_modified_nmask(nmask, _sigma): """ -- cgit v1.2.1 From aaacf4823241450d88315af9d465d6815119fe0d Mon Sep 17 00:00:00 2001 From: CodeHatchling Date: Mon, 4 Dec 2023 01:27:22 -0700 Subject: Organized the settings and UI of soft inpainting to allow for toggling the feature, and centralizes default values to reduce the amount of copy-pasta. --- modules/sd_samplers_cfg_denoiser.py | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) (limited to 'modules/sd_samplers_cfg_denoiser.py') diff --git a/modules/sd_samplers_cfg_denoiser.py b/modules/sd_samplers_cfg_denoiser.py index efbe7a40..0ee0b7dd 100644 --- a/modules/sd_samplers_cfg_denoiser.py +++ b/modules/sd_samplers_cfg_denoiser.py @@ -6,6 +6,7 @@ import modules.shared as shared from modules.script_callbacks import CFGDenoiserParams, cfg_denoiser_callback from modules.script_callbacks import CFGDenoisedParams, cfg_denoised_callback from modules.script_callbacks import AfterCFGCallbackParams, cfg_after_cfg_callback +import modules.soft_inpainting as si def catenate_conds(conds): @@ -43,9 +44,7 @@ class CFGDenoiser(torch.nn.Module): self.model_wrap = None self.mask = None self.nmask = None - self.mask_blend_power = 1 - self.mask_blend_scale = 0.5 - self.inpaint_detail_preservation = 4 + self.soft_inpainting: si.SoftInpaintingParameters = None self.init_latent = None self.steps = None """number of steps as specified by user in UI""" @@ -95,7 +94,8 @@ class CFGDenoiser(torch.nn.Module): self.sampler.sampler_extra_args['uncond'] = uc def forward(self, x, sigma, uncond, cond, cond_scale, s_min_uncond, image_cond): - def latent_blend(a, b, t): + def latent_blend(a, b, t, one_minus_t=None): + """ Interpolates two latent image representations according to the parameter t, where the interpolated vectors' magnitudes are also interpolated separately. @@ -104,7 +104,11 @@ class CFGDenoiser(torch.nn.Module): """ # NOTE: We use inplace operations wherever possible. - one_minus_t = 1 - t + if one_minus_t is None: + one_minus_t = 1 - t + + if self.soft_inpainting is None: + return a * one_minus_t + b * t # Linearly interpolate the image vectors. a_scaled = a * one_minus_t @@ -119,10 +123,10 @@ class CFGDenoiser(torch.nn.Module): current_magnitude = torch.norm(image_interp, p=2, dim=1).to(torch.float64).add_(0.00001) # Interpolate the powered magnitudes, then un-power them (bring them back to a power of 1). - a_magnitude = torch.norm(a, p=2, dim=1).to(torch.float64).pow_(self.inpaint_detail_preservation) * one_minus_t - b_magnitude = torch.norm(b, p=2, dim=1).to(torch.float64).pow_(self.inpaint_detail_preservation) * t + a_magnitude = torch.norm(a, p=2, dim=1).to(torch.float64).pow_(self.soft_inpainting.inpaint_detail_preservation) * one_minus_t + b_magnitude = torch.norm(b, p=2, dim=1).to(torch.float64).pow_(self.soft_inpainting.inpaint_detail_preservation) * t desired_magnitude = a_magnitude - desired_magnitude.add_(b_magnitude).pow_(1 / self.inpaint_detail_preservation) + desired_magnitude.add_(b_magnitude).pow_(1 / self.soft_inpainting.inpaint_detail_preservation) del a_magnitude, b_magnitude, one_minus_t # Change the linearly interpolated image vectors' magnitudes to the value we want. @@ -156,7 +160,10 @@ class CFGDenoiser(torch.nn.Module): NOTE: "mask" is not used """ - return torch.pow(nmask, (_sigma ** self.mask_blend_power) * self.mask_blend_scale) + if self.soft_inpainting is None: + return nmask + + return torch.pow(nmask, (_sigma ** self.soft_inpainting.mask_blend_power) * self.soft_inpainting.mask_blend_scale) if state.interrupted or state.skipped: raise sd_samplers_common.InterruptedException @@ -176,7 +183,10 @@ class CFGDenoiser(torch.nn.Module): # Blend in the original latents (before) if self.mask_before_denoising and self.mask is not None: - x = latent_blend(self.init_latent, x, get_modified_nmask(self.nmask, sigma)) + if self.soft_inpainting is None: + x = latent_blend(self.init_latent, x, self.nmask, self.mask) + else: + x = latent_blend(self.init_latent, x, get_modified_nmask(self.nmask, sigma)) batch_size = len(conds_list) repeats = [len(conds_list[i]) for i in range(batch_size)] @@ -279,7 +289,10 @@ class CFGDenoiser(torch.nn.Module): # Blend in the original latents (after) if not self.mask_before_denoising and self.mask is not None: - denoised = latent_blend(self.init_latent, denoised, get_modified_nmask(self.nmask, sigma)) + if self.soft_inpainting is None: + denoised = latent_blend(self.init_latent, denoised, self.nmask, self.mask) + else: + denoised = latent_blend(self.init_latent, denoised, get_modified_nmask(self.nmask, sigma)) self.sampler.last_latent = self.get_pred_x0(torch.cat([x_in[i:i + 1] for i in denoised_image_indexes]), torch.cat([x_out[i:i + 1] for i in denoised_image_indexes]), sigma) -- cgit v1.2.1 From 976c1053efeb5054692ed3cfa294cf79196f3946 Mon Sep 17 00:00:00 2001 From: CodeHatchling Date: Mon, 4 Dec 2023 16:06:58 -0700 Subject: Cleaned up code, moved main code contributions into soft_inpainting.py --- modules/sd_samplers_cfg_denoiser.py | 84 +++++-------------------------------- 1 file changed, 10 insertions(+), 74 deletions(-) (limited to 'modules/sd_samplers_cfg_denoiser.py') diff --git a/modules/sd_samplers_cfg_denoiser.py b/modules/sd_samplers_cfg_denoiser.py index 0ee0b7dd..a700e692 100644 --- a/modules/sd_samplers_cfg_denoiser.py +++ b/modules/sd_samplers_cfg_denoiser.py @@ -94,76 +94,6 @@ class CFGDenoiser(torch.nn.Module): self.sampler.sampler_extra_args['uncond'] = uc def forward(self, x, sigma, uncond, cond, cond_scale, s_min_uncond, image_cond): - def latent_blend(a, b, t, one_minus_t=None): - - """ - Interpolates two latent image representations according to the parameter t, - where the interpolated vectors' magnitudes are also interpolated separately. - The "detail_preservation" factor biases the magnitude interpolation towards - the larger of the two magnitudes. - """ - # NOTE: We use inplace operations wherever possible. - - if one_minus_t is None: - one_minus_t = 1 - t - - if self.soft_inpainting is None: - return a * one_minus_t + b * t - - # Linearly interpolate the image vectors. - a_scaled = a * one_minus_t - b_scaled = b * t - image_interp = a_scaled - image_interp.add_(b_scaled) - result_type = image_interp.dtype - del a_scaled, b_scaled - - # Calculate the magnitude of the interpolated vectors. (We will remove this magnitude.) - # 64-bit operations are used here to allow large exponents. - current_magnitude = torch.norm(image_interp, p=2, dim=1).to(torch.float64).add_(0.00001) - - # Interpolate the powered magnitudes, then un-power them (bring them back to a power of 1). - a_magnitude = torch.norm(a, p=2, dim=1).to(torch.float64).pow_(self.soft_inpainting.inpaint_detail_preservation) * one_minus_t - b_magnitude = torch.norm(b, p=2, dim=1).to(torch.float64).pow_(self.soft_inpainting.inpaint_detail_preservation) * t - desired_magnitude = a_magnitude - desired_magnitude.add_(b_magnitude).pow_(1 / self.soft_inpainting.inpaint_detail_preservation) - del a_magnitude, b_magnitude, one_minus_t - - # Change the linearly interpolated image vectors' magnitudes to the value we want. - # This is the last 64-bit operation. - image_interp_scaling_factor = desired_magnitude - image_interp_scaling_factor.div_(current_magnitude) - image_interp_scaled = image_interp - image_interp_scaled.mul_(image_interp_scaling_factor) - del current_magnitude - del desired_magnitude - del image_interp - del image_interp_scaling_factor - - image_interp_scaled = image_interp_scaled.to(result_type) - del result_type - - return image_interp_scaled - - def get_modified_nmask(nmask, _sigma): - """ - Converts a negative mask representing the transparency of the original latent vectors being overlayed - to a mask that is scaled according to the denoising strength for this step. - - Where: - 0 = fully opaque, infinite density, fully masked - 1 = fully transparent, zero density, fully unmasked - - We bring this transparency to a power, as this allows one to simulate N number of blending operations - where N can be any positive real value. Using this one can control the balance of influence between - the denoiser and the original latents according to the sigma value. - - NOTE: "mask" is not used - """ - if self.soft_inpainting is None: - return nmask - - return torch.pow(nmask, (_sigma ** self.soft_inpainting.mask_blend_power) * self.soft_inpainting.mask_blend_scale) if state.interrupted or state.skipped: raise sd_samplers_common.InterruptedException @@ -184,9 +114,12 @@ class CFGDenoiser(torch.nn.Module): # Blend in the original latents (before) if self.mask_before_denoising and self.mask is not None: if self.soft_inpainting is None: - x = latent_blend(self.init_latent, x, self.nmask, self.mask) + x = self.init_latent * self.mask + self.nmask * x else: - x = latent_blend(self.init_latent, x, get_modified_nmask(self.nmask, sigma)) + x = si.latent_blend(self.soft_inpainting, + self.init_latent, + x, + si.get_modified_nmask(self.soft_inpainting, self.nmask, sigma)) batch_size = len(conds_list) repeats = [len(conds_list[i]) for i in range(batch_size)] @@ -290,9 +223,12 @@ class CFGDenoiser(torch.nn.Module): # Blend in the original latents (after) if not self.mask_before_denoising and self.mask is not None: if self.soft_inpainting is None: - denoised = latent_blend(self.init_latent, denoised, self.nmask, self.mask) + denoised = self.init_latent * self.mask + self.nmask * denoised else: - denoised = latent_blend(self.init_latent, denoised, get_modified_nmask(self.nmask, sigma)) + denoised = si.latent_blend(self.soft_inpainting, + self.init_latent, + denoised, + si.get_modified_nmask(self.soft_inpainting, self.nmask, sigma)) self.sampler.last_latent = self.get_pred_x0(torch.cat([x_in[i:i + 1] for i in denoised_image_indexes]), torch.cat([x_out[i:i + 1] for i in denoised_image_indexes]), sigma) -- cgit v1.2.1 From e90d4334ad37024a802f4ef27069b625a6508f72 Mon Sep 17 00:00:00 2001 From: CodeHatchling Date: Wed, 6 Dec 2023 16:54:42 -0700 Subject: A custom blending function can be provided by p, replacing the use of soft_inpainting. --- modules/sd_samplers_cfg_denoiser.py | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) (limited to 'modules/sd_samplers_cfg_denoiser.py') diff --git a/modules/sd_samplers_cfg_denoiser.py b/modules/sd_samplers_cfg_denoiser.py index a700e692..f13e8dcc 100644 --- a/modules/sd_samplers_cfg_denoiser.py +++ b/modules/sd_samplers_cfg_denoiser.py @@ -6,7 +6,6 @@ import modules.shared as shared from modules.script_callbacks import CFGDenoiserParams, cfg_denoiser_callback from modules.script_callbacks import CFGDenoisedParams, cfg_denoised_callback from modules.script_callbacks import AfterCFGCallbackParams, cfg_after_cfg_callback -import modules.soft_inpainting as si def catenate_conds(conds): @@ -44,7 +43,6 @@ class CFGDenoiser(torch.nn.Module): self.model_wrap = None self.mask = None self.nmask = None - self.soft_inpainting: si.SoftInpaintingParameters = None self.init_latent = None self.steps = None """number of steps as specified by user in UI""" @@ -94,7 +92,6 @@ class CFGDenoiser(torch.nn.Module): self.sampler.sampler_extra_args['uncond'] = uc def forward(self, x, sigma, uncond, cond, cond_scale, s_min_uncond, image_cond): - if state.interrupted or state.skipped: raise sd_samplers_common.InterruptedException @@ -111,15 +108,24 @@ class CFGDenoiser(torch.nn.Module): assert not is_edit_model or all(len(conds) == 1 for conds in conds_list), "AND is not supported for InstructPix2Pix checkpoint (unless using Image CFG scale = 1.0)" + # If we use masks, blending between the denoised and original latent images occurs here. + def apply_blend(latent): + if hasattr(self.p, "denoiser_masked_blend_function") and callable(self.p.denoiser_masked_blend_function): + return self.p.denoiser_masked_blend_function( + self, + # Using an argument dictionary so that arguments can be added without breaking extensions. + args= + { + "denoiser": self, + "current_latent": latent, + "sigma": sigma + }) + else: + return self.init_latent * self.mask + self.nmask * latent + # Blend in the original latents (before) if self.mask_before_denoising and self.mask is not None: - if self.soft_inpainting is None: - x = self.init_latent * self.mask + self.nmask * x - else: - x = si.latent_blend(self.soft_inpainting, - self.init_latent, - x, - si.get_modified_nmask(self.soft_inpainting, self.nmask, sigma)) + x = apply_blend(x) batch_size = len(conds_list) repeats = [len(conds_list[i]) for i in range(batch_size)] @@ -222,13 +228,7 @@ class CFGDenoiser(torch.nn.Module): # Blend in the original latents (after) if not self.mask_before_denoising and self.mask is not None: - if self.soft_inpainting is None: - denoised = self.init_latent * self.mask + self.nmask * denoised - else: - denoised = si.latent_blend(self.soft_inpainting, - self.init_latent, - denoised, - si.get_modified_nmask(self.soft_inpainting, self.nmask, sigma)) + denoised = apply_blend(denoised) self.sampler.last_latent = self.get_pred_x0(torch.cat([x_in[i:i + 1] for i in denoised_image_indexes]), torch.cat([x_out[i:i + 1] for i in denoised_image_indexes]), sigma) -- cgit v1.2.1 From ac4578912395627731f2cd8529f87a95df1f7644 Mon Sep 17 00:00:00 2001 From: CodeHatchling Date: Wed, 6 Dec 2023 21:16:27 -0700 Subject: Removed soft inpainting, added hooks for softpainting to work instead. --- modules/sd_samplers_cfg_denoiser.py | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) (limited to 'modules/sd_samplers_cfg_denoiser.py') diff --git a/modules/sd_samplers_cfg_denoiser.py b/modules/sd_samplers_cfg_denoiser.py index f13e8dcc..eb9d5daf 100644 --- a/modules/sd_samplers_cfg_denoiser.py +++ b/modules/sd_samplers_cfg_denoiser.py @@ -109,19 +109,16 @@ class CFGDenoiser(torch.nn.Module): assert not is_edit_model or all(len(conds) == 1 for conds in conds_list), "AND is not supported for InstructPix2Pix checkpoint (unless using Image CFG scale = 1.0)" # If we use masks, blending between the denoised and original latent images occurs here. - def apply_blend(latent): - if hasattr(self.p, "denoiser_masked_blend_function") and callable(self.p.denoiser_masked_blend_function): - return self.p.denoiser_masked_blend_function( - self, - # Using an argument dictionary so that arguments can be added without breaking extensions. - args= - { - "denoiser": self, - "current_latent": latent, - "sigma": sigma - }) - else: - return self.init_latent * self.mask + self.nmask * latent + def apply_blend(current_latent): + blended_latent = current_latent * self.nmask + self.init_latent * self.mask + + if self.p.scripts is not None: + from modules import scripts + mba = scripts.MaskBlendArgs(current_latent, self.nmask, self.init_latent, self.mask, blended_latent, denoiser=self, sigma=sigma) + self.p.scripts.on_mask_blend(self.p, mba) + blended_latent = mba.blended_latent + + return blended_latent # Blend in the original latents (before) if self.mask_before_denoising and self.mask is not None: -- cgit v1.2.1 From 425507bd10c55f1f804eb5015db74520668f46f9 Mon Sep 17 00:00:00 2001 From: continue-revolution Date: Sun, 7 Jan 2024 10:25:01 -0600 Subject: add p to cfgdenoiserparams --- modules/sd_samplers_cfg_denoiser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'modules/sd_samplers_cfg_denoiser.py') diff --git a/modules/sd_samplers_cfg_denoiser.py b/modules/sd_samplers_cfg_denoiser.py index eb9d5daf..f4ded6bd 100644 --- a/modules/sd_samplers_cfg_denoiser.py +++ b/modules/sd_samplers_cfg_denoiser.py @@ -146,7 +146,7 @@ class CFGDenoiser(torch.nn.Module): sigma_in = torch.cat([torch.stack([sigma[i] for _ in range(n)]) for i, n in enumerate(repeats)] + [sigma] + [sigma]) image_cond_in = torch.cat([torch.stack([image_cond[i] for _ in range(n)]) for i, n in enumerate(repeats)] + [image_uncond] + [torch.zeros_like(self.init_latent)]) - denoiser_params = CFGDenoiserParams(x_in, image_cond_in, sigma_in, state.sampling_step, state.sampling_steps, tensor, uncond) + denoiser_params = CFGDenoiserParams(x_in, image_cond_in, sigma_in, state.sampling_step, state.sampling_steps, tensor, uncond, self.p) cfg_denoiser_callback(denoiser_params) x_in = denoiser_params.x image_cond_in = denoiser_params.image_cond -- cgit v1.2.1 From f56cebf5ba24313447b2204c3f804379767201c9 Mon Sep 17 00:00:00 2001 From: continue-revolution Date: Sun, 7 Jan 2024 12:35:35 -0600 Subject: add self instead --- modules/sd_samplers_cfg_denoiser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'modules/sd_samplers_cfg_denoiser.py') diff --git a/modules/sd_samplers_cfg_denoiser.py b/modules/sd_samplers_cfg_denoiser.py index f4ded6bd..6d76aa96 100644 --- a/modules/sd_samplers_cfg_denoiser.py +++ b/modules/sd_samplers_cfg_denoiser.py @@ -146,7 +146,7 @@ class CFGDenoiser(torch.nn.Module): sigma_in = torch.cat([torch.stack([sigma[i] for _ in range(n)]) for i, n in enumerate(repeats)] + [sigma] + [sigma]) image_cond_in = torch.cat([torch.stack([image_cond[i] for _ in range(n)]) for i, n in enumerate(repeats)] + [image_uncond] + [torch.zeros_like(self.init_latent)]) - denoiser_params = CFGDenoiserParams(x_in, image_cond_in, sigma_in, state.sampling_step, state.sampling_steps, tensor, uncond, self.p) + denoiser_params = CFGDenoiserParams(x_in, image_cond_in, sigma_in, state.sampling_step, state.sampling_steps, tensor, uncond, self) cfg_denoiser_callback(denoiser_params) x_in = denoiser_params.x image_cond_in = denoiser_params.image_cond -- cgit v1.2.1 From 757dda9ade9d47cb2a755dad0475c8c4fbcaa114 Mon Sep 17 00:00:00 2001 From: AUTOMATIC1111 <16777216c@gmail.com> Date: Sat, 27 Jan 2024 22:30:12 +0300 Subject: Add Pad conds v0 option --- modules/sd_samplers_cfg_denoiser.py | 70 ++++++++++++++++++++++++++++++++----- 1 file changed, 61 insertions(+), 9 deletions(-) (limited to 'modules/sd_samplers_cfg_denoiser.py') diff --git a/modules/sd_samplers_cfg_denoiser.py b/modules/sd_samplers_cfg_denoiser.py index 6d76aa96..ef237396 100644 --- a/modules/sd_samplers_cfg_denoiser.py +++ b/modules/sd_samplers_cfg_denoiser.py @@ -53,6 +53,7 @@ class CFGDenoiser(torch.nn.Module): self.step = 0 self.image_cfg_scale = None self.padded_cond_uncond = False + self.padded_cond_uncond_v0 = False self.sampler = sampler self.model_wrap = None self.p = None @@ -91,6 +92,62 @@ class CFGDenoiser(torch.nn.Module): self.sampler.sampler_extra_args['cond'] = c self.sampler.sampler_extra_args['uncond'] = uc + def pad_cond_uncond(self, cond, uncond): + empty = shared.sd_model.cond_stage_model_empty_prompt + num_repeats = (cond.shape[1] - cond.shape[1]) // empty.shape[1] + + if num_repeats < 0: + cond = pad_cond(cond, -num_repeats, empty) + self.padded_cond_uncond = True + elif num_repeats > 0: + uncond = pad_cond(uncond, num_repeats, empty) + self.padded_cond_uncond = True + + return cond, uncond + + def pad_cond_uncond_v0(self, cond, uncond): + """ + Pads the 'uncond' tensor to match the shape of the 'cond' tensor. + + If 'uncond' is a dictionary, it is assumed that the 'crossattn' key holds the tensor to be padded. + If 'uncond' is a tensor, it is padded directly. + + If the number of columns in 'uncond' is less than the number of columns in 'cond', the last column of 'uncond' + is repeated to match the number of columns in 'cond'. + + If the number of columns in 'uncond' is greater than the number of columns in 'cond', 'uncond' is truncated + to match the number of columns in 'cond'. + + Args: + cond (torch.Tensor or DictWithShape): The condition tensor to match the shape of 'uncond'. + uncond (torch.Tensor or DictWithShape): The tensor to be padded, or a dictionary containing the tensor to be padded. + + Returns: + tuple: A tuple containing the 'cond' tensor and the padded 'uncond' tensor. + + Note: + This is the padding that was always used in DDIM before version 1.6.0 + """ + + is_dict_cond = isinstance(uncond, dict) + uncond_vec = uncond['crossattn'] if is_dict_cond else uncond + + if uncond_vec.shape[1] < cond.shape[1]: + last_vector = uncond_vec[:, -1:] + last_vector_repeated = last_vector.repeat([1, cond.shape[1] - uncond_vec.shape[1], 1]) + uncond_vec = torch.hstack([uncond_vec, last_vector_repeated]) + self.padded_cond_uncond_v0 = True + elif uncond_vec.shape[1] > cond.shape[1]: + uncond_vec = uncond_vec[:, :cond.shape[1]] + self.padded_cond_uncond_v0 = True + + if is_dict_cond: + uncond['crossattn'] = uncond_vec + else: + uncond = uncond_vec + + return cond, uncond + def forward(self, x, sigma, uncond, cond, cond_scale, s_min_uncond, image_cond): if state.interrupted or state.skipped: raise sd_samplers_common.InterruptedException @@ -162,16 +219,11 @@ class CFGDenoiser(torch.nn.Module): sigma_in = sigma_in[:-batch_size] self.padded_cond_uncond = False + self.padded_cond_uncond_v0 = False if shared.opts.pad_cond_uncond and tensor.shape[1] != uncond.shape[1]: - empty = shared.sd_model.cond_stage_model_empty_prompt - num_repeats = (tensor.shape[1] - uncond.shape[1]) // empty.shape[1] - - if num_repeats < 0: - tensor = pad_cond(tensor, -num_repeats, empty) - self.padded_cond_uncond = True - elif num_repeats > 0: - uncond = pad_cond(uncond, num_repeats, empty) - self.padded_cond_uncond = True + tensor, uncond = self.pad_cond_uncond(tensor, uncond) + elif shared.opts.pad_cond_uncond_v0 and tensor.shape[1] != uncond.shape[1]: + tensor, uncond = self.pad_cond_uncond_v0(tensor, uncond) if tensor.shape[1] == uncond.shape[1] or skip_uncond: if is_edit_model: -- cgit v1.2.1 From baaf39b6f92f24275a1b264a634514bac571dfae Mon Sep 17 00:00:00 2001 From: AUTOMATIC1111 <16777216c@gmail.com> Date: Mon, 29 Jan 2024 10:20:27 +0300 Subject: fix the typo -- thanks Cyberbeing --- modules/sd_samplers_cfg_denoiser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'modules/sd_samplers_cfg_denoiser.py') diff --git a/modules/sd_samplers_cfg_denoiser.py b/modules/sd_samplers_cfg_denoiser.py index ef237396..941dff4b 100644 --- a/modules/sd_samplers_cfg_denoiser.py +++ b/modules/sd_samplers_cfg_denoiser.py @@ -94,7 +94,7 @@ class CFGDenoiser(torch.nn.Module): def pad_cond_uncond(self, cond, uncond): empty = shared.sd_model.cond_stage_model_empty_prompt - num_repeats = (cond.shape[1] - cond.shape[1]) // empty.shape[1] + num_repeats = (cond.shape[1] - uncond.shape[1]) // empty.shape[1] if num_repeats < 0: cond = pad_cond(cond, -num_repeats, empty) -- cgit v1.2.1