Add CPU fp8 support

Since norm layer need fp32, I only convert the linear operation layer(conv2d/linear) And TE have some pytorch function not support bf16 amp in CPU. I add a condition to indicate if the autocast is for unet.
author: Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com> 2023-10-24 01:49:05 +0800
committer: Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com> 2023-10-24 01:49:05 +0800
commit: eaa9f5162fbca2ebcb2682eb861bc7e5510a2b66 (patch)
tree: f8bf60786db8d42a0a0e85deb56c885780bda654 /modules/processing.py
parent: 5f9ddfa46f28ca2aa9e0bd832f6bbd67069be63e (diff)
1 files changed, 1 insertions, 1 deletions
diff --git a/modules/processing.py b/modules/processing.py
index 40598f5c..2df8a7ea 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -865,7 +865,7 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
             if p.n_iter > 1:
                 shared.state.job = f"Batch {n+1} out of {p.n_iter}"
 
-            with devices.without_autocast() if devices.unet_needs_upcast else devices.autocast():
+            with devices.without_autocast() if devices.unet_needs_upcast else devices.autocast(unet=True):
                 samples_ddim = p.sample(conditioning=p.c, unconditional_conditioning=p.uc, seeds=p.seeds, subseeds=p.subseeds, subseed_strength=p.subseed_strength, prompts=p.prompts)
 
             if getattr(samples_ddim, 'already_decoded', False):
author	Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com>	2023-10-24 01:49:05 +0800
committer	Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com>	2023-10-24 01:49:05 +0800
commit	eaa9f5162fbca2ebcb2682eb861bc7e5510a2b66 (patch)
tree	f8bf60786db8d42a0a0e85deb56c885780bda654 /modules/processing.py
parent	5f9ddfa46f28ca2aa9e0bd832f6bbd67069be63e (diff)