Merge branch 'master' into hypernetwork-training

author: AUTOMATIC <16777216c@gmail.com> 2022-10-11 11:14:36 +0300
committer: AUTOMATIC <16777216c@gmail.com> 2022-10-11 11:14:36 +0300
commit: 5de806184f6687e46cf936b92055146dc6cf2994 (patch)
tree: d84c2daa8798c3d2f8e99e17234a40065491182d /modules/processing.py
parent: 12c4d5c6b5bf9dd50d0601c36af4f99b65316d58 (diff)
parent: 948533950c9db5069a874d925fadd50bac00fdb5 (diff)
1 files changed, 40 insertions, 11 deletions
diff --git a/modules/processing.py b/modules/processing.py
index f773a30e..698b3069 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -46,6 +46,12 @@ def apply_color_correction(correction, image):
     return image
 
 
+def get_correct_sampler(p):
+    if isinstance(p, modules.processing.StableDiffusionProcessingTxt2Img):
+        return sd_samplers.samplers
+    elif isinstance(p, modules.processing.StableDiffusionProcessingImg2Img):
+        return sd_samplers.samplers_for_img2img
+
 class StableDiffusionProcessing:
     def __init__(self, sd_model=None, outpath_samples=None, outpath_grids=None, prompt="", styles=None, seed=-1, subseed=-1, subseed_strength=0, seed_resize_from_h=-1, seed_resize_from_w=-1, seed_enable_extras=True, sampler_index=0, batch_size=1, n_iter=1, steps=50, cfg_scale=7.0, width=512, height=512, restore_faces=False, tiling=False, do_not_save_samples=False, do_not_save_grid=False, extra_generation_params=None, overlay_images=None, negative_prompt=None, eta=None):
         self.sd_model = sd_model
@@ -123,6 +129,7 @@ class Processed:
         self.index_of_first_image = index_of_first_image
         self.styles = p.styles
         self.job_timestamp = state.job_timestamp
+        self.clip_skip = opts.CLIP_stop_at_last_layers
 
         self.eta = p.eta
         self.ddim_discretize = p.ddim_discretize
@@ -169,6 +176,7 @@ class Processed:
             "infotexts": self.infotexts,
             "styles": self.styles,
             "job_timestamp": self.job_timestamp,
+            "clip_skip": self.clip_skip,
         }
 
         return json.dumps(obj)
@@ -199,7 +207,7 @@ def create_random_tensors(shape, seeds, subseeds=None, subseed_strength=0.0, see
     # enables the generation of additional tensors with noise that the sampler will use during its processing.
     # Using those pre-generated tensors instead of simple torch.randn allows a batch with seeds [100, 101] to
     # produce the same images as with two batches [100], [101].
-    if p is not None and p.sampler is not None and len(seeds) > 1 and opts.enable_batch_seeds:
+    if p is not None and p.sampler is not None and (len(seeds) > 1 and opts.enable_batch_seeds or opts.eta_noise_seed_delta > 0):
         sampler_noises = [[] for _ in range(p.sampler.number_of_needed_noises(p))]
     else:
         sampler_noises = None
@@ -239,6 +247,9 @@ def create_random_tensors(shape, seeds, subseeds=None, subseed_strength=0.0, see
         if sampler_noises is not None:
             cnt = p.sampler.number_of_needed_noises(p)
 
+            if opts.eta_noise_seed_delta > 0:
+                torch.manual_seed(seed + opts.eta_noise_seed_delta)
+
             for j in range(cnt):
                 sampler_noises[j].append(devices.randn_without_seed(tuple(noise_shape)))
 
@@ -251,6 +262,13 @@ def create_random_tensors(shape, seeds, subseeds=None, subseed_strength=0.0, see
     return x
 
 
+def decode_first_stage(model, x):
+    with devices.autocast(disable=x.dtype == devices.dtype_vae):
+        x = model.decode_first_stage(x)
+
+    return x
+
+
 def get_fixed_seed(seed):
     if seed is None or seed == '' or seed == -1:
         return int(random.randrange(4294967294))
@@ -266,14 +284,18 @@ def fix_seed(p):
 def create_infotext(p, all_prompts, all_seeds, all_subseeds, comments, iteration=0, position_in_batch=0):
     index = position_in_batch + iteration * p.batch_size
 
+    clip_skip = getattr(p, 'clip_skip', opts.CLIP_stop_at_last_layers)
+
     generation_params = {
         "Steps": p.steps,
-        "Sampler": sd_samplers.samplers[p.sampler_index].name,
+        "Sampler": get_correct_sampler(p)[p.sampler_index].name,
         "CFG scale": p.cfg_scale,
         "Seed": all_seeds[index],
         "Face restoration": (opts.face_restoration_model if p.restore_faces else None),
         "Size": f"{p.width}x{p.height}",
         "Model hash": getattr(p, 'sd_model_hash', None if not opts.add_model_hash_to_info or not shared.sd_model.sd_model_hash else shared.sd_model.sd_model_hash),
+        "Model": (None if not opts.add_model_name_to_info or not shared.sd_model.sd_checkpoint_info.model_name else shared.sd_model.sd_checkpoint_info.model_name.replace(',', '').replace(':', '')),
+        "Hypernet": (None if shared.loaded_hypernetwork is None else shared.loaded_hypernetwork.name.replace(',', '').replace(':', '')),
         "Batch size": (None if p.batch_size < 2 else p.batch_size),
         "Batch pos": (None if p.batch_size < 2 else position_in_batch),
         "Variation seed": (None if p.subseed_strength == 0 else all_subseeds[index]),
@@ -281,6 +303,8 @@ def create_infotext(p, all_prompts, all_seeds, all_subseeds, comments, iteration
         "Seed resize from": (None if p.seed_resize_from_w == 0 or p.seed_resize_from_h == 0 else f"{p.seed_resize_from_w}x{p.seed_resize_from_h}"),
         "Denoising strength": getattr(p, 'denoising_strength', None),
         "Eta": (None if p.sampler is None or p.sampler.eta == p.sampler.default_eta else p.sampler.eta),
+        "Clip skip": None if clip_skip <= 1 else clip_skip,
+        "ENSD": None if opts.eta_noise_seed_delta == 0 else opts.eta_noise_seed_delta,
     }
 
     generation_params.update(p.extra_generation_params)
@@ -312,6 +336,7 @@ def process_images(p: StableDiffusionProcessing) -> Processed:
         os.makedirs(p.outpath_grids, exist_ok=True)
 
     modules.sd_hijack.model_hijack.apply_circular(p.tiling)
+    modules.sd_hijack.model_hijack.clear_comments()
 
     comments = {}
 
@@ -341,7 +366,7 @@ def process_images(p: StableDiffusionProcessing) -> Processed:
     infotexts = []
     output_images = []
 
-    with torch.no_grad():
+    with torch.no_grad(), p.sd_model.ema_scope():
         with devices.autocast():
             p.init(all_prompts, all_seeds, all_subseeds)
 
@@ -349,6 +374,9 @@ def process_images(p: StableDiffusionProcessing) -> Processed:
             state.job_count = p.n_iter
 
         for n in range(p.n_iter):
+            if state.skipped:
+                state.skipped = False
+            
             if state.interrupted:
                 break
 
@@ -375,15 +403,14 @@ def process_images(p: StableDiffusionProcessing) -> Processed:
             with devices.autocast():
                 samples_ddim = p.sample(conditioning=c, unconditional_conditioning=uc, seeds=seeds, subseeds=subseeds, subseed_strength=p.subseed_strength)
 
-            if state.interrupted:
+            if state.interrupted or state.skipped:
 
-                # if we are interruped, sample returns just noise
+                # if we are interrupted, sample returns just noise
                 # use the image collected previously in sampler loop
                 samples_ddim = shared.state.current_latent
 
-            samples_ddim = samples_ddim.to(devices.dtype)
-
-            x_samples_ddim = p.sd_model.decode_first_stage(samples_ddim)
+            samples_ddim = samples_ddim.to(devices.dtype_vae)
+            x_samples_ddim = decode_first_stage(p.sd_model, samples_ddim)
             x_samples_ddim = torch.clamp((x_samples_ddim + 1.0) / 2.0, min=0.0, max=1.0)
 
             del samples_ddim
@@ -436,7 +463,8 @@ def process_images(p: StableDiffusionProcessing) -> Processed:
 
                 text = infotext(n, i)
                 infotexts.append(text)
-                image.info["parameters"] = text
+                if opts.enable_pnginfo:
+                    image.info["parameters"] = text
                 output_images.append(image)
 
             del x_samples_ddim 
@@ -455,7 +483,8 @@ def process_images(p: StableDiffusionProcessing) -> Processed:
             if opts.return_grid:
                 text = infotext()
                 infotexts.insert(0, text)
-                grid.info["parameters"] = text
+                if opts.enable_pnginfo:
+                    grid.info["parameters"] = text
                 output_images.insert(0, grid)
                 index_of_first_image = 1
 
@@ -514,7 +543,7 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
         if self.scale_latent:
             samples = torch.nn.functional.interpolate(samples, size=(self.height // opt_f, self.width // opt_f), mode="bilinear")
         else:
-            decoded_samples = self.sd_model.decode_first_stage(samples)
+            decoded_samples = decode_first_stage(self.sd_model, samples)
 
             if opts.upscaler_for_img2img is None or opts.upscaler_for_img2img == "None":
                 decoded_samples = torch.nn.functional.interpolate(decoded_samples, size=(self.height, self.width), mode="bilinear")
author	AUTOMATIC <16777216c@gmail.com>	2022-10-11 11:14:36 +0300
committer	AUTOMATIC <16777216c@gmail.com>	2022-10-11 11:14:36 +0300
commit	5de806184f6687e46cf936b92055146dc6cf2994 (patch)
tree	d84c2daa8798c3d2f8e99e17234a40065491182d /modules/processing.py
parent	12c4d5c6b5bf9dd50d0601c36af4f99b65316d58 (diff)
parent	948533950c9db5069a874d925fadd50bac00fdb5 (diff)