From eaa9f5162fbca2ebcb2682eb861bc7e5510a2b66 Mon Sep 17 00:00:00 2001
From: Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com>
Date: Tue, 24 Oct 2023 01:49:05 +0800
Subject: Add CPU fp8 support

Since norm layer need fp32, I only convert the linear operation layer(conv2d/linear)

And TE have some pytorch function not support bf16 amp in CPU. I add a condition to indicate if the autocast is for unet.
---
 modules/processing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index 40598f5c..2df8a7ea 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -865,7 +865,7 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
             if p.n_iter > 1:
                 shared.state.job = f"Batch {n+1} out of {p.n_iter}"
 
-            with devices.without_autocast() if devices.unet_needs_upcast else devices.autocast():
+            with devices.without_autocast() if devices.unet_needs_upcast else devices.autocast(unet=True):
                 samples_ddim = p.sample(conditioning=p.c, unconditional_conditioning=p.uc, seeds=p.seeds, subseeds=p.subseeds, subseed_strength=p.subseed_strength, prompts=p.prompts)
 
             if getattr(samples_ddim, 'already_decoded', False):
-- 
cgit v1.2.1


From d4d3134f6d2d232c7bcfa80900a362921e644976 Mon Sep 17 00:00:00 2001
From: Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com>
Date: Sat, 28 Oct 2023 15:24:26 +0800
Subject: ManualCast for 10/16 series gpu

---
 modules/processing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index 2df8a7ea..40598f5c 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -865,7 +865,7 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
             if p.n_iter > 1:
                 shared.state.job = f"Batch {n+1} out of {p.n_iter}"
 
-            with devices.without_autocast() if devices.unet_needs_upcast else devices.autocast(unet=True):
+            with devices.without_autocast() if devices.unet_needs_upcast else devices.autocast():
                 samples_ddim = p.sample(conditioning=p.c, unconditional_conditioning=p.uc, seeds=p.seeds, subseeds=p.subseeds, subseed_strength=p.subseed_strength, prompts=p.prompts)
 
             if getattr(samples_ddim, 'already_decoded', False):
-- 
cgit v1.2.1


From ea272152e0b50dbb2bd675ec020607f3d50c37d0 Mon Sep 17 00:00:00 2001
From: Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com>
Date: Sat, 16 Dec 2023 15:08:08 +0800
Subject: Add FP8 settings into PNG info

---
 modules/processing.py | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'modules/processing.py')

diff --git a/modules/processing.py b/modules/processing.py
index bea01ec6..179f2c0f 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -688,6 +688,8 @@ def create_infotext(p, all_prompts, all_seeds, all_subseeds, comments=None, iter
         "Size": f"{p.width}x{p.height}",
         "Model hash": p.sd_model_hash if opts.add_model_hash_to_info else None,
         "Model": p.sd_model_name if opts.add_model_name_to_info else None,
+        "FP8 weight": opts.fp8_storage if devices.fp8 else None,
+        "Cache FP16 weight for LoRA": opts.cache_fp16_weight if devices.fp8 else None,
         "VAE hash": p.sd_vae_hash if opts.add_vae_hash_to_info else None,
         "VAE": p.sd_vae_name if opts.add_vae_name_to_info else None,
         "Variation seed": (None if p.subseed_strength == 0 else (p.all_subseeds[0] if use_main_prompt else all_subseeds[index])),
-- 
cgit v1.2.1