From 594c8e7b263d9b37f4b18b56b159aeb6d1bba1b4 Mon Sep 17 00:00:00 2001
From: AUTOMATIC1111 <16777216c@gmail.com>
Date: Thu, 13 Jul 2023 11:35:52 +0300
Subject: fix CLIP doing the unneeded normalization revert SD2.1 back to use
 the original repo add SDXL's force_zero_embeddings to negative prompt

---
 modules/processing.py       |  2 +-
 modules/prompt_parser.py    | 14 ++++++++++----
 modules/sd_hijack.py        |  2 +-
 modules/sd_hijack_clip.py   | 15 +++++++++++++++
 modules/sd_models_config.py |  1 -
 modules/sd_models_xl.py     |  3 ++-
 6 files changed, 29 insertions(+), 8 deletions(-)

(limited to 'modules')

diff --git a/modules/processing.py b/modules/processing.py
index 85d35423..f01a6907 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -344,7 +344,7 @@ class StableDiffusionProcessing:
 
     def setup_conds(self):
         prompts = prompt_parser.SdConditioning(self.prompts, width=self.width, height=self.height)
-        negative_prompts = prompt_parser.SdConditioning(self.negative_prompts, width=self.width, height=self.height)
+        negative_prompts = prompt_parser.SdConditioning(self.negative_prompts, width=self.width, height=self.height, is_negative_prompt=True)
 
         sampler_config = sd_samplers.find_sampler_config(self.sampler_name)
         self.step_multiplier = 2 if sampler_config and sampler_config.options.get("second_order", False) else 1
diff --git a/modules/prompt_parser.py b/modules/prompt_parser.py
index 33810669..b29d079d 100644
--- a/modules/prompt_parser.py
+++ b/modules/prompt_parser.py
@@ -116,11 +116,17 @@ class SdConditioning(list):
     A list with prompts for stable diffusion's conditioner model.
     Can also specify width and height of created image - SDXL needs it.
     """
-    def __init__(self, prompts, width=None, height=None):
+    def __init__(self, prompts, is_negative_prompt=False, width=None, height=None, copy_from=None):
         super().__init__()
         self.extend(prompts)
-        self.width = width or getattr(prompts, 'width', None)
-        self.height = height or getattr(prompts, 'height', None)
+
+        if copy_from is None:
+            copy_from = prompts
+
+        self.is_negative_prompt = is_negative_prompt or getattr(copy_from, 'is_negative_prompt', False)
+        self.width = width or getattr(copy_from, 'width', None)
+        self.height = height or getattr(copy_from, 'height', None)
+
 
 
 def get_learned_conditioning(model, prompts: SdConditioning | list[str], steps):
@@ -153,7 +159,7 @@ def get_learned_conditioning(model, prompts: SdConditioning | list[str], steps):
             res.append(cached)
             continue
 
-        texts = [x[1] for x in prompt_schedule]
+        texts = SdConditioning([x[1] for x in prompt_schedule], copy_from=prompts)
         conds = model.get_learned_conditioning(texts)
 
         cond_schedule = []
diff --git a/modules/sd_hijack.py b/modules/sd_hijack.py
index 266811f9..647cdfbe 100644
--- a/modules/sd_hijack.py
+++ b/modules/sd_hijack.py
@@ -190,7 +190,7 @@ class StableDiffusionModelHijack:
                 if typename == 'FrozenCLIPEmbedder':
                     model_embeddings = m.cond_stage_model.transformer.text_model.embeddings
                     model_embeddings.token_embedding = EmbeddingsWithFixes(model_embeddings.token_embedding, self)
-                    m.cond_stage_model = sd_hijack_clip.FrozenCLIPEmbedderWithCustomWords(embedder, self)
+                    m.cond_stage_model = sd_hijack_clip.FrozenCLIPEmbedderForSDXLWithCustomWords(embedder, self)
                     conditioner.embedders[i] = m.cond_stage_model
                 if typename == 'FrozenOpenCLIPEmbedder2':
                     embedder.model.token_embedding = EmbeddingsWithFixes(embedder.model.token_embedding, self)
diff --git a/modules/sd_hijack_clip.py b/modules/sd_hijack_clip.py
index 6c17a81d..b3771909 100644
--- a/modules/sd_hijack_clip.py
+++ b/modules/sd_hijack_clip.py
@@ -323,3 +323,18 @@ class FrozenCLIPEmbedderWithCustomWords(FrozenCLIPEmbedderWithCustomWordsBase):
         embedded = embedding_layer.token_embedding.wrapped(ids.to(embedding_layer.token_embedding.wrapped.weight.device)).squeeze(0)
 
         return embedded
+
+
+class FrozenCLIPEmbedderForSDXLWithCustomWords(FrozenCLIPEmbedderWithCustomWords):
+    def __init__(self, wrapped, hijack):
+        super().__init__(wrapped, hijack)
+
+    def encode_with_transformers(self, tokens):
+        outputs = self.wrapped.transformer(input_ids=tokens, output_hidden_states=self.wrapped.layer == "hidden")
+
+        if self.wrapped.layer == "last":
+            z = outputs.last_hidden_state
+        else:
+            z = outputs.hidden_states[self.wrapped.layer_idx]
+
+        return z
diff --git a/modules/sd_models_config.py b/modules/sd_models_config.py
index 2e92479a..04c09ab0 100644
--- a/modules/sd_models_config.py
+++ b/modules/sd_models_config.py
@@ -12,7 +12,6 @@ sd_xl_repo_configs_path = os.path.join(paths.paths['Stable Diffusion XL'], "conf
 config_default = shared.sd_default_config
 config_sd2 = os.path.join(sd_repo_configs_path, "v2-inference.yaml")
 config_sd2v = os.path.join(sd_repo_configs_path, "v2-inference-v.yaml")
-config_sd2v = os.path.join(sd_xl_repo_configs_path, "sd_2_1_768.yaml")
 config_sd2_inpainting = os.path.join(sd_repo_configs_path, "v2-inpainting-inference.yaml")
 config_sdxl = os.path.join(sd_xl_repo_configs_path, "sd_xl_base.yaml")
 config_depth_model = os.path.join(sd_repo_configs_path, "v2-midas-inference.yaml")
diff --git a/modules/sd_models_xl.py b/modules/sd_models_xl.py
index 1dd4459f..b799ff46 100644
--- a/modules/sd_models_xl.py
+++ b/modules/sd_models_xl.py
@@ -22,7 +22,8 @@ def get_learned_conditioning(self: sgm.models.diffusion.DiffusionEngine, batch:
         "target_size_as_tuple": torch.tensor([height, width]).repeat(len(batch), 1).to(devices.device, devices.dtype),
     }
 
-    c = self.conditioner(sdxl_conds)
+    force_zero_negative_prompt = getattr(batch, 'is_negative_prompt', False) and all(x == '' for x in batch)
+    c = self.conditioner(sdxl_conds, force_zero_embeddings=['txt'] if force_zero_negative_prompt else [])
 
     return c
 
-- 
cgit v1.2.1