From cd6c55c1ab14fcab15329cde599cf79e8d555657 Mon Sep 17 00:00:00 2001
From: pepe10-gpu <pepe.dannyboy@gmail.com>
Date: Sun, 6 Nov 2022 17:05:51 -0800
Subject: 16xx card fix

cudnn
---
 modules/devices.py | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'modules/devices.py')

diff --git a/modules/devices.py b/modules/devices.py
index 7511e1dc..858bf399 100644
--- a/modules/devices.py
+++ b/modules/devices.py
@@ -39,10 +39,13 @@ def torch_gc():
 
 def enable_tf32():
     if torch.cuda.is_available():
+        torch.backends.cudnn.benchmark = True
+        torch.backends.cudnn.enabled = True
         torch.backends.cuda.matmul.allow_tf32 = True
         torch.backends.cudnn.allow_tf32 = True
 
 
+
 errors.run(enable_tf32, "Enabling TF32")
 
 device = device_interrogate = device_gfpgan = device_swinir = device_esrgan = device_scunet = device_codeformer = None
-- 
cgit v1.2.1


From 29eff4a194d22f0f0e7a7a976d746a71a4193cf5 Mon Sep 17 00:00:00 2001
From: pepe10-gpu <pepe.dannyboy@gmail.com>
Date: Mon, 7 Nov 2022 18:06:48 -0800
Subject: terrible hack

---
 modules/devices.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'modules/devices.py')

diff --git a/modules/devices.py b/modules/devices.py
index 858bf399..4c63f465 100644
--- a/modules/devices.py
+++ b/modules/devices.py
@@ -39,8 +39,15 @@ def torch_gc():
 
 def enable_tf32():
     if torch.cuda.is_available():
-        torch.backends.cudnn.benchmark = True
-        torch.backends.cudnn.enabled = True
+        #TODO: make this better; find a way to check if it is a turing card
+        turing = ["1630","1650","1660","Quadro RTX 3000","Quadro RTX 4000","Quadro RTX 4000","Quadro RTX 5000","Quadro RTX 5000","Quadro RTX 6000","Quadro RTX 6000","Quadro RTX 8000","Quadro RTX T400","Quadro RTX T400","Quadro RTX T600","Quadro RTX T1000","Quadro RTX T1000","2060","2070","2080","Titan RTX","Tesla T4","MX450","MX550"]
+        for devid in range(0,torch.cuda.device_count()):
+            for i in turing:
+                if i in torch.cuda.get_device_name(devid):
+                    shd = True
+        if shd:
+            torch.backends.cudnn.benchmark = True
+            torch.backends.cudnn.enabled = True
         torch.backends.cuda.matmul.allow_tf32 = True
         torch.backends.cudnn.allow_tf32 = True
 
-- 
cgit v1.2.1


From 62e9fec3df8518da3a2c35fa090bb54946c856b2 Mon Sep 17 00:00:00 2001
From: pepe10-gpu <pepe.dannyboy@gmail.com>
Date: Tue, 8 Nov 2022 15:19:09 -0800
Subject: actual better fix

thanks C43H66N12O12S2
---
 modules/devices.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'modules/devices.py')

diff --git a/modules/devices.py b/modules/devices.py
index 4c63f465..058a5e00 100644
--- a/modules/devices.py
+++ b/modules/devices.py
@@ -39,12 +39,9 @@ def torch_gc():
 
 def enable_tf32():
     if torch.cuda.is_available():
-        #TODO: make this better; find a way to check if it is a turing card
-        turing = ["1630","1650","1660","Quadro RTX 3000","Quadro RTX 4000","Quadro RTX 4000","Quadro RTX 5000","Quadro RTX 5000","Quadro RTX 6000","Quadro RTX 6000","Quadro RTX 8000","Quadro RTX T400","Quadro RTX T400","Quadro RTX T600","Quadro RTX T1000","Quadro RTX T1000","2060","2070","2080","Titan RTX","Tesla T4","MX450","MX550"]
         for devid in range(0,torch.cuda.device_count()):
-            for i in turing:
-                if i in torch.cuda.get_device_name(devid):
-                    shd = True
+            if torch.cuda.get_device_capability(devid) == (7, 5):
+                shd = True
         if shd:
             torch.backends.cudnn.benchmark = True
             torch.backends.cudnn.enabled = True
-- 
cgit v1.2.1


From 76ab31e18898d4c2aacb9725cfbe25b230bff974 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=BA=90=E6=96=87=E9=9B=A8?=
 <41315874+fumiama@users.noreply.github.com>
Date: Sat, 12 Nov 2022 11:02:40 +0800
Subject: Fix wrong mps selection below MasOS 12.3

---
 modules/devices.py | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

(limited to 'modules/devices.py')

diff --git a/modules/devices.py b/modules/devices.py
index 7511e1dc..9a3d29d7 100644
--- a/modules/devices.py
+++ b/modules/devices.py
@@ -3,8 +3,15 @@ import contextlib
 import torch
 from modules import errors
 
-# has_mps is only available in nightly pytorch (for now), `getattr` for compatibility
-has_mps = getattr(torch, 'has_mps', False)
+# has_mps is only available in nightly pytorch (for now) and MasOS 12.3+.
+# check `getattr` and try it for compatibility
+def has_mps() -> bool:
+    if getattr(torch, 'has_mps', False): return False
+    try:
+        torch.zeros(1).to(torch.device("mps"))
+        return True
+    except Exception:
+        return False
 
 cpu = torch.device("cpu")
 
@@ -25,7 +32,7 @@ def get_optimal_device():
         else:
             return torch.device("cuda")
 
-    if has_mps:
+    if has_mps():
         return torch.device("mps")
 
     return cpu
-- 
cgit v1.2.1


From 1130d5df669911a5c67696be90bccca3ecf5f487 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=BA=90=E6=96=87=E9=9B=A8?=
 <41315874+fumiama@users.noreply.github.com>
Date: Sat, 12 Nov 2022 11:09:28 +0800
Subject: Update devices.py

---
 modules/devices.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'modules/devices.py')

diff --git a/modules/devices.py b/modules/devices.py
index 9a3d29d7..bd3e4ffb 100644
--- a/modules/devices.py
+++ b/modules/devices.py
@@ -6,7 +6,7 @@ from modules import errors
 # has_mps is only available in nightly pytorch (for now) and MasOS 12.3+.
 # check `getattr` and try it for compatibility
 def has_mps() -> bool:
-    if getattr(torch, 'has_mps', False): return False
+    if not getattr(torch, 'has_mps', False): return False
     try:
         torch.zeros(1).to(torch.device("mps"))
         return True
-- 
cgit v1.2.1


From 0ab0a50f9ae14bd7ce7ec518323ebd31c7971155 Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Sat, 12 Nov 2022 10:00:49 +0300
Subject: change formatting to match the main program in devices.py

---
 modules/devices.py | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

(limited to 'modules/devices.py')

diff --git a/modules/devices.py b/modules/devices.py
index bd3e4ffb..67165bf6 100644
--- a/modules/devices.py
+++ b/modules/devices.py
@@ -3,23 +3,27 @@ import contextlib
 import torch
 from modules import errors
 
+
 # has_mps is only available in nightly pytorch (for now) and MasOS 12.3+.
 # check `getattr` and try it for compatibility
 def has_mps() -> bool:
-    if not getattr(torch, 'has_mps', False): return False
+    if not getattr(torch, 'has_mps', False):
+        return False
     try:
         torch.zeros(1).to(torch.device("mps"))
         return True
     except Exception:
         return False
 
-cpu = torch.device("cpu")
 
 def extract_device_id(args, name):
     for x in range(len(args)):
-        if name in args[x]: return args[x+1]
+        if name in args[x]:
+            return args[x + 1]
+
     return None
 
+
 def get_optimal_device():
     if torch.cuda.is_available():
         from modules import shared
@@ -52,10 +56,12 @@ def enable_tf32():
 
 errors.run(enable_tf32, "Enabling TF32")
 
+cpu = torch.device("cpu")
 device = device_interrogate = device_gfpgan = device_swinir = device_esrgan = device_scunet = device_codeformer = None
 dtype = torch.float16
 dtype_vae = torch.float16
 
+
 def randn(seed, shape):
     # Pytorch currently doesn't handle setting randomness correctly when the metal backend is used.
     if device.type == 'mps':
@@ -89,6 +95,11 @@ def autocast(disable=False):
 
     return torch.autocast("cuda")
 
+
 # MPS workaround for https://github.com/pytorch/pytorch/issues/79383
-def mps_contiguous(input_tensor, device): return input_tensor.contiguous() if device.type == 'mps' else input_tensor
-def mps_contiguous_to(input_tensor, device): return mps_contiguous(input_tensor, device).to(device)
+def mps_contiguous(input_tensor, device):
+    return input_tensor.contiguous() if device.type == 'mps' else input_tensor
+
+
+def mps_contiguous_to(input_tensor, device):
+    return mps_contiguous(input_tensor, device).to(device)
-- 
cgit v1.2.1


From abfa22c16fb3d9b1ed8d049c7b68e94d1cca5b82 Mon Sep 17 00:00:00 2001
From: brkirch <brkirch@users.noreply.github.com>
Date: Mon, 7 Nov 2022 19:25:43 -0500
Subject: Revert "MPS Upscalers Fix"

This reverts commit 768b95394a8500da639b947508f78296524f1836.
---
 modules/devices.py | 9 ---------
 1 file changed, 9 deletions(-)

(limited to 'modules/devices.py')

diff --git a/modules/devices.py b/modules/devices.py
index 67165bf6..a87d0d4c 100644
--- a/modules/devices.py
+++ b/modules/devices.py
@@ -94,12 +94,3 @@ def autocast(disable=False):
         return contextlib.nullcontext()
 
     return torch.autocast("cuda")
-
-
-# MPS workaround for https://github.com/pytorch/pytorch/issues/79383
-def mps_contiguous(input_tensor, device):
-    return input_tensor.contiguous() if device.type == 'mps' else input_tensor
-
-
-def mps_contiguous_to(input_tensor, device):
-    return mps_contiguous(input_tensor, device).to(device)
-- 
cgit v1.2.1


From e247b7400a592c0a19c197cd080aeec38ee02b68 Mon Sep 17 00:00:00 2001
From: brkirch <brkirch@users.noreply.github.com>
Date: Thu, 17 Nov 2022 03:52:17 -0500
Subject: Add fixes for PyTorch 1.12.1

Fix typo "MasOS" -> "macOS"

If MPS is available and PyTorch is an earlier version than 1.13:
* Monkey patch torch.Tensor.to to ensure all tensors sent to MPS are contiguous
* Monkey patch torch.nn.functional.layer_norm to ensure input tensor is contiguous (required for this program to work with MPS on unmodified PyTorch 1.12.1)
---
 modules/devices.py | 28 +++++++++++++++++++++++++++-
 1 file changed, 27 insertions(+), 1 deletion(-)

(limited to 'modules/devices.py')

diff --git a/modules/devices.py b/modules/devices.py
index a87d0d4c..6e8277e5 100644
--- a/modules/devices.py
+++ b/modules/devices.py
@@ -2,9 +2,10 @@ import sys, os, shlex
 import contextlib
 import torch
 from modules import errors
+from packaging import version
 
 
-# has_mps is only available in nightly pytorch (for now) and MasOS 12.3+.
+# has_mps is only available in nightly pytorch (for now) and macOS 12.3+.
 # check `getattr` and try it for compatibility
 def has_mps() -> bool:
     if not getattr(torch, 'has_mps', False):
@@ -94,3 +95,28 @@ def autocast(disable=False):
         return contextlib.nullcontext()
 
     return torch.autocast("cuda")
+
+
+# MPS workaround for https://github.com/pytorch/pytorch/issues/79383
+orig_tensor_to = torch.Tensor.to
+def tensor_to_fix(self, *args, **kwargs):
+    if self.device.type != 'mps' and \
+       ((len(args) > 0 and isinstance(args[0], torch.device) and args[0].type == 'mps') or \
+       (isinstance(kwargs.get('device'), torch.device) and kwargs['device'].type == 'mps')):
+        self = self.contiguous()
+    return orig_tensor_to(self, *args, **kwargs)
+
+
+# MPS workaround for https://github.com/pytorch/pytorch/issues/80800 
+orig_layer_norm = torch.nn.functional.layer_norm
+def layer_norm_fix(*args, **kwargs):
+    if len(args) > 0 and isinstance(args[0], torch.Tensor) and args[0].device.type == 'mps':
+        args = list(args)
+        args[0] = args[0].contiguous()
+    return orig_layer_norm(*args, **kwargs)
+
+
+# PyTorch 1.13 doesn't need these fixes but unfortunately is slower and has regressions that prevent training from working
+if has_mps() and version.parse(torch.__version__) < version.parse("1.13"):
+    torch.Tensor.to = tensor_to_fix
+    torch.nn.functional.layer_norm = layer_norm_fix
-- 
cgit v1.2.1


From c67c40f983997594f76b2312f92c3761e8d83715 Mon Sep 17 00:00:00 2001
From: Matthew McGoogan <mlmcgoogan@gmail.com>
Date: Sat, 26 Nov 2022 23:25:16 +0000
Subject: torch.cuda.empty_cache() defaults to cuda:0 device unless explicitly
 set otherwise first. Updating torch_gc() to use the device set by --device-id
 if specified to avoid OOM edge cases on multi-GPU systems.

---
 modules/devices.py | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

(limited to 'modules/devices.py')

diff --git a/modules/devices.py b/modules/devices.py
index 67165bf6..93d82bbc 100644
--- a/modules/devices.py
+++ b/modules/devices.py
@@ -44,8 +44,18 @@ def get_optimal_device():
 
 def torch_gc():
     if torch.cuda.is_available():
-        torch.cuda.empty_cache()
-        torch.cuda.ipc_collect()
+        from modules import shared
+
+        device_id = shared.cmd_opts.device_id
+        
+        if device_id is not None:
+            cuda_device = f"cuda:{device_id}"
+        else:
+            cuda_device = "cuda"
+        
+        with torch.cuda.device(cuda_device):
+            torch.cuda.empty_cache()
+            torch.cuda.ipc_collect()
 
 
 def enable_tf32():
-- 
cgit v1.2.1


From 5b2c316890b7b8af95f0d0334d1fd34b9a687b99 Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Sun, 27 Nov 2022 13:08:54 +0300
Subject: eliminate duplicated code from #5095

---
 modules/devices.py | 30 +++++++++++-------------------
 1 file changed, 11 insertions(+), 19 deletions(-)

(limited to 'modules/devices.py')

diff --git a/modules/devices.py b/modules/devices.py
index 93d82bbc..dd50fe24 100644
--- a/modules/devices.py
+++ b/modules/devices.py
@@ -24,17 +24,18 @@ def extract_device_id(args, name):
     return None
 
 
-def get_optimal_device():
-    if torch.cuda.is_available():
-        from modules import shared
+def get_cuda_device_string():
+    from modules import shared
+
+    if shared.cmd_opts.device_id is not None:
+        return f"cuda:{shared.cmd_opts.device_id}"
 
-        device_id = shared.cmd_opts.device_id
+    return "cuda"
 
-        if device_id is not None:
-            cuda_device = f"cuda:{device_id}"
-            return torch.device(cuda_device)
-        else:
-            return torch.device("cuda")
+
+def get_optimal_device():
+    if torch.cuda.is_available():
+        return torch.device(get_cuda_device_string())
 
     if has_mps():
         return torch.device("mps")
@@ -44,16 +45,7 @@ def get_optimal_device():
 
 def torch_gc():
     if torch.cuda.is_available():
-        from modules import shared
-
-        device_id = shared.cmd_opts.device_id
-        
-        if device_id is not None:
-            cuda_device = f"cuda:{device_id}"
-        else:
-            cuda_device = "cuda"
-        
-        with torch.cuda.device(cuda_device):
+        with torch.cuda.device(get_cuda_device_string()):
             torch.cuda.empty_cache()
             torch.cuda.ipc_collect()
 
-- 
cgit v1.2.1


From 0fddb4a1c06a6e2122add7eee3b001a6d473baee Mon Sep 17 00:00:00 2001
From: brkirch <brkirch@users.noreply.github.com>
Date: Wed, 30 Nov 2022 08:02:39 -0500
Subject: Rework MPS randn fix, add randn_like fix

torch.manual_seed() already sets a CPU generator, so there is no reason to create a CPU generator manually. torch.randn_like also needs a MPS fix for k-diffusion, but a torch hijack with randn_like already exists so it can also be used for that.
---
 modules/devices.py | 15 +++------------
 1 file changed, 3 insertions(+), 12 deletions(-)

(limited to 'modules/devices.py')

diff --git a/modules/devices.py b/modules/devices.py
index f00079c6..046460fa 100644
--- a/modules/devices.py
+++ b/modules/devices.py
@@ -66,24 +66,15 @@ dtype_vae = torch.float16
 
 
 def randn(seed, shape):
-    # Pytorch currently doesn't handle setting randomness correctly when the metal backend is used.
-    if device.type == 'mps':
-        generator = torch.Generator(device=cpu)
-        generator.manual_seed(seed)
-        noise = torch.randn(shape, generator=generator, device=cpu).to(device)
-        return noise
-
     torch.manual_seed(seed)
+    if device.type == 'mps':
+        return torch.randn(shape, device=cpu).to(device)
     return torch.randn(shape, device=device)
 
 
 def randn_without_seed(shape):
-    # Pytorch currently doesn't handle setting randomness correctly when the metal backend is used.
     if device.type == 'mps':
-        generator = torch.Generator(device=cpu)
-        noise = torch.randn(shape, generator=generator, device=cpu).to(device)
-        return noise
-
+        return torch.randn(shape, device=cpu).to(device)
     return torch.randn(shape, device=device)
 
 
-- 
cgit v1.2.1


From 2651267e3af5886b8b6b1dc3023f2507f7079118 Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Sat, 3 Dec 2022 15:57:52 +0300
Subject: fix #4407 breaking UI entirely for card other than ones related to
 the PR

---
 modules/devices.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'modules/devices.py')

diff --git a/modules/devices.py b/modules/devices.py
index 1325569c..547ea46c 100644
--- a/modules/devices.py
+++ b/modules/devices.py
@@ -53,12 +53,10 @@ def torch_gc():
 
 def enable_tf32():
     if torch.cuda.is_available():
-        for devid in range(0,torch.cuda.device_count()):
-            if torch.cuda.get_device_capability(devid) == (7, 5):
-                shd = True
-        if shd:
+        if any([torch.cuda.get_device_capability(devid) == (7, 5) for devid in range(0, torch.cuda.device_count())]):
             torch.backends.cudnn.benchmark = True
             torch.backends.cudnn.enabled = True
+
         torch.backends.cuda.matmul.allow_tf32 = True
         torch.backends.cudnn.allow_tf32 = True
 
-- 
cgit v1.2.1


From 46b0d230e7c13e247eabb22e1103ce512e7ed6b1 Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Sat, 3 Dec 2022 16:01:23 +0300
Subject: add comment for #4407 and remove seemingly unnecessary cudnn.enabled

---
 modules/devices.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'modules/devices.py')

diff --git a/modules/devices.py b/modules/devices.py
index 547ea46c..d6a76844 100644
--- a/modules/devices.py
+++ b/modules/devices.py
@@ -53,9 +53,11 @@ def torch_gc():
 
 def enable_tf32():
     if torch.cuda.is_available():
+
+        # enabling benchmark option seems to enable a range of cards to do fp16 when they otherwise can't
+        # see https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/4407
         if any([torch.cuda.get_device_capability(devid) == (7, 5) for devid in range(0, torch.cuda.device_count())]):
             torch.backends.cudnn.benchmark = True
-            torch.backends.cudnn.enabled = True
 
         torch.backends.cuda.matmul.allow_tf32 = True
         torch.backends.cudnn.allow_tf32 = True
-- 
cgit v1.2.1


From b6e5edd74657e3fd1fbd04f341b7a84625d4aa7a Mon Sep 17 00:00:00 2001
From: AUTOMATIC <16777216c@gmail.com>
Date: Sat, 3 Dec 2022 18:06:33 +0300
Subject: add built-in extension system add support for adding upscalers in
 extensions move LDSR, ScuNET and SwinIR to built-in extensions

---
 modules/devices.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

(limited to 'modules/devices.py')

diff --git a/modules/devices.py b/modules/devices.py
index d6a76844..f8cffae1 100644
--- a/modules/devices.py
+++ b/modules/devices.py
@@ -44,6 +44,15 @@ def get_optimal_device():
     return cpu
 
 
+def get_device_for(task):
+    from modules import shared
+
+    if task in shared.cmd_opts.use_cpu:
+        return cpu
+
+    return get_optimal_device()
+
+
 def torch_gc():
     if torch.cuda.is_available():
         with torch.cuda.device(get_cuda_device_string()):
@@ -67,7 +76,7 @@ def enable_tf32():
 errors.run(enable_tf32, "Enabling TF32")
 
 cpu = torch.device("cpu")
-device = device_interrogate = device_gfpgan = device_swinir = device_esrgan = device_scunet = device_codeformer = None
+device = device_interrogate = device_gfpgan = device_esrgan = device_codeformer = None
 dtype = torch.float16
 dtype_vae = torch.float16
 
-- 
cgit v1.2.1


From 16b4509fa60ec03102b2452b41799dafccd35970 Mon Sep 17 00:00:00 2001
From: brkirch <brkirch@users.noreply.github.com>
Date: Sat, 17 Dec 2022 03:21:19 -0500
Subject: Add numpy fix for MPS on PyTorch 1.12.1

When saving training results with torch.save(), an exception is thrown:
"RuntimeError: Can't call numpy() on Tensor that requires grad. Use tensor.detach().numpy() instead."

So for MPS, check if Tensor.requires_grad and detach() if necessary.
---
 modules/devices.py | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'modules/devices.py')

diff --git a/modules/devices.py b/modules/devices.py
index f8cffae1..800510b7 100644
--- a/modules/devices.py
+++ b/modules/devices.py
@@ -125,7 +125,16 @@ def layer_norm_fix(*args, **kwargs):
     return orig_layer_norm(*args, **kwargs)
 
 
+# MPS workaround for https://github.com/pytorch/pytorch/issues/90532
+orig_tensor_numpy = torch.Tensor.numpy
+def numpy_fix(self, *args, **kwargs):
+    if self.requires_grad:
+        self = self.detach()
+    return orig_tensor_numpy(self, *args, **kwargs)
+
+
 # PyTorch 1.13 doesn't need these fixes but unfortunately is slower and has regressions that prevent training from working
 if has_mps() and version.parse(torch.__version__) < version.parse("1.13"):
     torch.Tensor.to = tensor_to_fix
     torch.nn.functional.layer_norm = layer_norm_fix
+    torch.Tensor.numpy = numpy_fix
-- 
cgit v1.2.1