Refactored Metal/mps fixes.

author: Elias Oenal <git@eliasoenal.com> 2022-09-12 16:32:44 +0200
committer: Elias Oenal <git@eliasoenal.com> 2022-09-12 16:32:44 +0200
commit: b7f95869b4542d356a12da6860b1e6c227784560 (patch)
tree: 9164bffa56d0408a13dc46310b8b25195167dd28 /modules/processing.py
parent: 5dc05c0d0dc6a0040b0beb93f082ab314513d069 (diff)
1 files changed, 19 insertions, 23 deletions
diff --git a/modules/processing.py b/modules/processing.py
index 80bf7cc0..542d1136 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -1,6 +1,3 @@
-# Metal backend fixes written and placed
-# into the public domain by Elias Oenal <sd@eliasoenal.com>
-
 import contextlib
 import json
 import math
@@ -109,17 +106,19 @@ def create_random_tensors(shape, seeds, subseeds=None, subseed_strength=0.0, see
         noise_shape = shape if seed_resize_from_h <= 0 or seed_resize_from_w <= 0 else (shape[0], seed_resize_from_h//8, seed_resize_from_w//8)
 
         # Pytorch currently doesn't handle seeting randomness correctly when the metal backend is used.
+        generator = torch
         if shared.device.type == 'mps':
-            g = torch.Generator(device='cpu')
+            shared.device_seed_type = 'cpu'
+            generator = torch.Generator(device=shared.device_seed_type)
 
         subnoise = None
         if subseeds is not None:
             subseed = 0 if i >= len(subseeds) else subseeds[i]
-            if shared.device.type == 'mps':
-                g.manual_seed(subseed)
-                subnoise = torch.randn(noise_shape, generator=g, device='cpu').to('mps')
-            else: # cpu or cuda
-                torch.manual_seed(subseed)
+            generator.manual_seed(subseed)
+
+            if shared.device.type != shared.device_seed_type:
+                subnoise = torch.randn(noise_shape, generator=generator, device=shared.device_seed_type).to(shared.device)
+            else:
                 subnoise = torch.randn(noise_shape, device=shared.device)
 
         # randn results depend on device; gpu and cpu get different results for same seed;
@@ -128,12 +127,11 @@ def create_random_tensors(shape, seeds, subseeds=None, subseed_strength=0.0, see
         # it will break everyone's seeds.
         # When using the mps backend falling back to the cpu device is needed, since mps currently
         # does not implement seeding properly.
-        if shared.device.type == 'mps':
-            g.manual_seed(seed)
-            noise = torch.randn(noise_shape, generator=g, device='cpu').to('mps')
-        else: # cpu or cuda
-            torch.manual_seed(seed)
-            x = torch.randn(shape, device=shared.device)
+        generator.manual_seed(seed)
+        if shared.device.type != shared.device_seed_type:
+            noise = torch.randn(noise_shape, generator=generator, device=shared.device_seed_type).to(shared.device)
+        else:
+            noise = torch.randn(noise_shape, device=shared.device)
 
         if subnoise is not None:
             #noise = subnoise * subseed_strength + noise * (1 - subseed_strength)
@@ -143,12 +141,10 @@ def create_random_tensors(shape, seeds, subseeds=None, subseed_strength=0.0, see
             #noise = torch.nn.functional.interpolate(noise.unsqueeze(1), size=shape[1:], mode="bilinear").squeeze()
             # noise_shape = (64, 80)
             # shape = (64, 72)
-
-            if shared.device.type == 'mps':
-                g.manual_seed(seed)
-                x = torch.randn(shape, generator=g, device='cpu').to('mps')
+            generator.manual_seed(seed)
+            if shared.device.type != shared.device_seed_type:
+                x = torch.randn(shape, generator=generator, device=shared.device_seed_type).to(shared.device)
             else:
-                torch.manual_seed(seed)
                 x = torch.randn(shape, device=shared.device)
             dx = (shape[2] - noise_shape[2]) // 2 # -4
             dy = (shape[1] - noise_shape[1]) // 2
@@ -484,10 +480,10 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
         if self.image_mask is not None:
             init_mask = latent_mask
             latmask = init_mask.convert('RGB').resize((self.init_latent.shape[3], self.init_latent.shape[2]))
+            precision = np.float64
             if shared.device.type == 'mps': # mps backend does not support float64
-                latmask = np.moveaxis(np.array(latmask, dtype=np.float32), 2, 0) / 255
-            else:
-                latmask = np.moveaxis(np.array(latmask, dtype=np.float64), 2, 0) / 255
+                precision = np.float32
+            latmask = np.moveaxis(np.array(latmask, dtype=precision), 2, 0) / 255
             latmask = latmask[0]
             latmask = np.around(latmask)
             latmask = np.tile(latmask[None], (4, 1, 1))
author	Elias Oenal <git@eliasoenal.com>	2022-09-12 16:32:44 +0200
committer	Elias Oenal <git@eliasoenal.com>	2022-09-12 16:32:44 +0200
commit	b7f95869b4542d356a12da6860b1e6c227784560 (patch)
tree	9164bffa56d0408a13dc46310b8b25195167dd28 /modules/processing.py
parent	5dc05c0d0dc6a0040b0beb93f082ab314513d069 (diff)