From cd6c55c1ab14fcab15329cde599cf79e8d555657 Mon Sep 17 00:00:00 2001 From: pepe10-gpu Date: Sun, 6 Nov 2022 17:05:51 -0800 Subject: 16xx card fix cudnn --- modules/devices.py | 3 +++ 1 file changed, 3 insertions(+) (limited to 'modules/devices.py') diff --git a/modules/devices.py b/modules/devices.py index 7511e1dc..858bf399 100644 --- a/modules/devices.py +++ b/modules/devices.py @@ -39,10 +39,13 @@ def torch_gc(): def enable_tf32(): if torch.cuda.is_available(): + torch.backends.cudnn.benchmark = True + torch.backends.cudnn.enabled = True torch.backends.cuda.matmul.allow_tf32 = True torch.backends.cudnn.allow_tf32 = True + errors.run(enable_tf32, "Enabling TF32") device = device_interrogate = device_gfpgan = device_swinir = device_esrgan = device_scunet = device_codeformer = None -- cgit v1.2.1 From 29eff4a194d22f0f0e7a7a976d746a71a4193cf5 Mon Sep 17 00:00:00 2001 From: pepe10-gpu Date: Mon, 7 Nov 2022 18:06:48 -0800 Subject: terrible hack --- modules/devices.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'modules/devices.py') diff --git a/modules/devices.py b/modules/devices.py index 858bf399..4c63f465 100644 --- a/modules/devices.py +++ b/modules/devices.py @@ -39,8 +39,15 @@ def torch_gc(): def enable_tf32(): if torch.cuda.is_available(): - torch.backends.cudnn.benchmark = True - torch.backends.cudnn.enabled = True + #TODO: make this better; find a way to check if it is a turing card + turing = ["1630","1650","1660","Quadro RTX 3000","Quadro RTX 4000","Quadro RTX 4000","Quadro RTX 5000","Quadro RTX 5000","Quadro RTX 6000","Quadro RTX 6000","Quadro RTX 8000","Quadro RTX T400","Quadro RTX T400","Quadro RTX T600","Quadro RTX T1000","Quadro RTX T1000","2060","2070","2080","Titan RTX","Tesla T4","MX450","MX550"] + for devid in range(0,torch.cuda.device_count()): + for i in turing: + if i in torch.cuda.get_device_name(devid): + shd = True + if shd: + torch.backends.cudnn.benchmark = True + torch.backends.cudnn.enabled = True torch.backends.cuda.matmul.allow_tf32 = True torch.backends.cudnn.allow_tf32 = True -- cgit v1.2.1 From 62e9fec3df8518da3a2c35fa090bb54946c856b2 Mon Sep 17 00:00:00 2001 From: pepe10-gpu Date: Tue, 8 Nov 2022 15:19:09 -0800 Subject: actual better fix thanks C43H66N12O12S2 --- modules/devices.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'modules/devices.py') diff --git a/modules/devices.py b/modules/devices.py index 4c63f465..058a5e00 100644 --- a/modules/devices.py +++ b/modules/devices.py @@ -39,12 +39,9 @@ def torch_gc(): def enable_tf32(): if torch.cuda.is_available(): - #TODO: make this better; find a way to check if it is a turing card - turing = ["1630","1650","1660","Quadro RTX 3000","Quadro RTX 4000","Quadro RTX 4000","Quadro RTX 5000","Quadro RTX 5000","Quadro RTX 6000","Quadro RTX 6000","Quadro RTX 8000","Quadro RTX T400","Quadro RTX T400","Quadro RTX T600","Quadro RTX T1000","Quadro RTX T1000","2060","2070","2080","Titan RTX","Tesla T4","MX450","MX550"] for devid in range(0,torch.cuda.device_count()): - for i in turing: - if i in torch.cuda.get_device_name(devid): - shd = True + if torch.cuda.get_device_capability(devid) == (7, 5): + shd = True if shd: torch.backends.cudnn.benchmark = True torch.backends.cudnn.enabled = True -- cgit v1.2.1 From 76ab31e18898d4c2aacb9725cfbe25b230bff974 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=BA=90=E6=96=87=E9=9B=A8?= <41315874+fumiama@users.noreply.github.com> Date: Sat, 12 Nov 2022 11:02:40 +0800 Subject: Fix wrong mps selection below MasOS 12.3 --- modules/devices.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'modules/devices.py') diff --git a/modules/devices.py b/modules/devices.py index 7511e1dc..9a3d29d7 100644 --- a/modules/devices.py +++ b/modules/devices.py @@ -3,8 +3,15 @@ import contextlib import torch from modules import errors -# has_mps is only available in nightly pytorch (for now), `getattr` for compatibility -has_mps = getattr(torch, 'has_mps', False) +# has_mps is only available in nightly pytorch (for now) and MasOS 12.3+. +# check `getattr` and try it for compatibility +def has_mps() -> bool: + if getattr(torch, 'has_mps', False): return False + try: + torch.zeros(1).to(torch.device("mps")) + return True + except Exception: + return False cpu = torch.device("cpu") @@ -25,7 +32,7 @@ def get_optimal_device(): else: return torch.device("cuda") - if has_mps: + if has_mps(): return torch.device("mps") return cpu -- cgit v1.2.1 From 1130d5df669911a5c67696be90bccca3ecf5f487 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=BA=90=E6=96=87=E9=9B=A8?= <41315874+fumiama@users.noreply.github.com> Date: Sat, 12 Nov 2022 11:09:28 +0800 Subject: Update devices.py --- modules/devices.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'modules/devices.py') diff --git a/modules/devices.py b/modules/devices.py index 9a3d29d7..bd3e4ffb 100644 --- a/modules/devices.py +++ b/modules/devices.py @@ -6,7 +6,7 @@ from modules import errors # has_mps is only available in nightly pytorch (for now) and MasOS 12.3+. # check `getattr` and try it for compatibility def has_mps() -> bool: - if getattr(torch, 'has_mps', False): return False + if not getattr(torch, 'has_mps', False): return False try: torch.zeros(1).to(torch.device("mps")) return True -- cgit v1.2.1 From 0ab0a50f9ae14bd7ce7ec518323ebd31c7971155 Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Sat, 12 Nov 2022 10:00:49 +0300 Subject: change formatting to match the main program in devices.py --- modules/devices.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) (limited to 'modules/devices.py') diff --git a/modules/devices.py b/modules/devices.py index bd3e4ffb..67165bf6 100644 --- a/modules/devices.py +++ b/modules/devices.py @@ -3,23 +3,27 @@ import contextlib import torch from modules import errors + # has_mps is only available in nightly pytorch (for now) and MasOS 12.3+. # check `getattr` and try it for compatibility def has_mps() -> bool: - if not getattr(torch, 'has_mps', False): return False + if not getattr(torch, 'has_mps', False): + return False try: torch.zeros(1).to(torch.device("mps")) return True except Exception: return False -cpu = torch.device("cpu") def extract_device_id(args, name): for x in range(len(args)): - if name in args[x]: return args[x+1] + if name in args[x]: + return args[x + 1] + return None + def get_optimal_device(): if torch.cuda.is_available(): from modules import shared @@ -52,10 +56,12 @@ def enable_tf32(): errors.run(enable_tf32, "Enabling TF32") +cpu = torch.device("cpu") device = device_interrogate = device_gfpgan = device_swinir = device_esrgan = device_scunet = device_codeformer = None dtype = torch.float16 dtype_vae = torch.float16 + def randn(seed, shape): # Pytorch currently doesn't handle setting randomness correctly when the metal backend is used. if device.type == 'mps': @@ -89,6 +95,11 @@ def autocast(disable=False): return torch.autocast("cuda") + # MPS workaround for https://github.com/pytorch/pytorch/issues/79383 -def mps_contiguous(input_tensor, device): return input_tensor.contiguous() if device.type == 'mps' else input_tensor -def mps_contiguous_to(input_tensor, device): return mps_contiguous(input_tensor, device).to(device) +def mps_contiguous(input_tensor, device): + return input_tensor.contiguous() if device.type == 'mps' else input_tensor + + +def mps_contiguous_to(input_tensor, device): + return mps_contiguous(input_tensor, device).to(device) -- cgit v1.2.1 From abfa22c16fb3d9b1ed8d049c7b68e94d1cca5b82 Mon Sep 17 00:00:00 2001 From: brkirch Date: Mon, 7 Nov 2022 19:25:43 -0500 Subject: Revert "MPS Upscalers Fix" This reverts commit 768b95394a8500da639b947508f78296524f1836. --- modules/devices.py | 9 --------- 1 file changed, 9 deletions(-) (limited to 'modules/devices.py') diff --git a/modules/devices.py b/modules/devices.py index 67165bf6..a87d0d4c 100644 --- a/modules/devices.py +++ b/modules/devices.py @@ -94,12 +94,3 @@ def autocast(disable=False): return contextlib.nullcontext() return torch.autocast("cuda") - - -# MPS workaround for https://github.com/pytorch/pytorch/issues/79383 -def mps_contiguous(input_tensor, device): - return input_tensor.contiguous() if device.type == 'mps' else input_tensor - - -def mps_contiguous_to(input_tensor, device): - return mps_contiguous(input_tensor, device).to(device) -- cgit v1.2.1 From e247b7400a592c0a19c197cd080aeec38ee02b68 Mon Sep 17 00:00:00 2001 From: brkirch Date: Thu, 17 Nov 2022 03:52:17 -0500 Subject: Add fixes for PyTorch 1.12.1 Fix typo "MasOS" -> "macOS" If MPS is available and PyTorch is an earlier version than 1.13: * Monkey patch torch.Tensor.to to ensure all tensors sent to MPS are contiguous * Monkey patch torch.nn.functional.layer_norm to ensure input tensor is contiguous (required for this program to work with MPS on unmodified PyTorch 1.12.1) --- modules/devices.py | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) (limited to 'modules/devices.py') diff --git a/modules/devices.py b/modules/devices.py index a87d0d4c..6e8277e5 100644 --- a/modules/devices.py +++ b/modules/devices.py @@ -2,9 +2,10 @@ import sys, os, shlex import contextlib import torch from modules import errors +from packaging import version -# has_mps is only available in nightly pytorch (for now) and MasOS 12.3+. +# has_mps is only available in nightly pytorch (for now) and macOS 12.3+. # check `getattr` and try it for compatibility def has_mps() -> bool: if not getattr(torch, 'has_mps', False): @@ -94,3 +95,28 @@ def autocast(disable=False): return contextlib.nullcontext() return torch.autocast("cuda") + + +# MPS workaround for https://github.com/pytorch/pytorch/issues/79383 +orig_tensor_to = torch.Tensor.to +def tensor_to_fix(self, *args, **kwargs): + if self.device.type != 'mps' and \ + ((len(args) > 0 and isinstance(args[0], torch.device) and args[0].type == 'mps') or \ + (isinstance(kwargs.get('device'), torch.device) and kwargs['device'].type == 'mps')): + self = self.contiguous() + return orig_tensor_to(self, *args, **kwargs) + + +# MPS workaround for https://github.com/pytorch/pytorch/issues/80800 +orig_layer_norm = torch.nn.functional.layer_norm +def layer_norm_fix(*args, **kwargs): + if len(args) > 0 and isinstance(args[0], torch.Tensor) and args[0].device.type == 'mps': + args = list(args) + args[0] = args[0].contiguous() + return orig_layer_norm(*args, **kwargs) + + +# PyTorch 1.13 doesn't need these fixes but unfortunately is slower and has regressions that prevent training from working +if has_mps() and version.parse(torch.__version__) < version.parse("1.13"): + torch.Tensor.to = tensor_to_fix + torch.nn.functional.layer_norm = layer_norm_fix -- cgit v1.2.1 From c67c40f983997594f76b2312f92c3761e8d83715 Mon Sep 17 00:00:00 2001 From: Matthew McGoogan Date: Sat, 26 Nov 2022 23:25:16 +0000 Subject: torch.cuda.empty_cache() defaults to cuda:0 device unless explicitly set otherwise first. Updating torch_gc() to use the device set by --device-id if specified to avoid OOM edge cases on multi-GPU systems. --- modules/devices.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'modules/devices.py') diff --git a/modules/devices.py b/modules/devices.py index 67165bf6..93d82bbc 100644 --- a/modules/devices.py +++ b/modules/devices.py @@ -44,8 +44,18 @@ def get_optimal_device(): def torch_gc(): if torch.cuda.is_available(): - torch.cuda.empty_cache() - torch.cuda.ipc_collect() + from modules import shared + + device_id = shared.cmd_opts.device_id + + if device_id is not None: + cuda_device = f"cuda:{device_id}" + else: + cuda_device = "cuda" + + with torch.cuda.device(cuda_device): + torch.cuda.empty_cache() + torch.cuda.ipc_collect() def enable_tf32(): -- cgit v1.2.1 From 5b2c316890b7b8af95f0d0334d1fd34b9a687b99 Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Sun, 27 Nov 2022 13:08:54 +0300 Subject: eliminate duplicated code from #5095 --- modules/devices.py | 30 +++++++++++------------------- 1 file changed, 11 insertions(+), 19 deletions(-) (limited to 'modules/devices.py') diff --git a/modules/devices.py b/modules/devices.py index 93d82bbc..dd50fe24 100644 --- a/modules/devices.py +++ b/modules/devices.py @@ -24,17 +24,18 @@ def extract_device_id(args, name): return None -def get_optimal_device(): - if torch.cuda.is_available(): - from modules import shared +def get_cuda_device_string(): + from modules import shared + + if shared.cmd_opts.device_id is not None: + return f"cuda:{shared.cmd_opts.device_id}" - device_id = shared.cmd_opts.device_id + return "cuda" - if device_id is not None: - cuda_device = f"cuda:{device_id}" - return torch.device(cuda_device) - else: - return torch.device("cuda") + +def get_optimal_device(): + if torch.cuda.is_available(): + return torch.device(get_cuda_device_string()) if has_mps(): return torch.device("mps") @@ -44,16 +45,7 @@ def get_optimal_device(): def torch_gc(): if torch.cuda.is_available(): - from modules import shared - - device_id = shared.cmd_opts.device_id - - if device_id is not None: - cuda_device = f"cuda:{device_id}" - else: - cuda_device = "cuda" - - with torch.cuda.device(cuda_device): + with torch.cuda.device(get_cuda_device_string()): torch.cuda.empty_cache() torch.cuda.ipc_collect() -- cgit v1.2.1 From 0fddb4a1c06a6e2122add7eee3b001a6d473baee Mon Sep 17 00:00:00 2001 From: brkirch Date: Wed, 30 Nov 2022 08:02:39 -0500 Subject: Rework MPS randn fix, add randn_like fix torch.manual_seed() already sets a CPU generator, so there is no reason to create a CPU generator manually. torch.randn_like also needs a MPS fix for k-diffusion, but a torch hijack with randn_like already exists so it can also be used for that. --- modules/devices.py | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) (limited to 'modules/devices.py') diff --git a/modules/devices.py b/modules/devices.py index f00079c6..046460fa 100644 --- a/modules/devices.py +++ b/modules/devices.py @@ -66,24 +66,15 @@ dtype_vae = torch.float16 def randn(seed, shape): - # Pytorch currently doesn't handle setting randomness correctly when the metal backend is used. - if device.type == 'mps': - generator = torch.Generator(device=cpu) - generator.manual_seed(seed) - noise = torch.randn(shape, generator=generator, device=cpu).to(device) - return noise - torch.manual_seed(seed) + if device.type == 'mps': + return torch.randn(shape, device=cpu).to(device) return torch.randn(shape, device=device) def randn_without_seed(shape): - # Pytorch currently doesn't handle setting randomness correctly when the metal backend is used. if device.type == 'mps': - generator = torch.Generator(device=cpu) - noise = torch.randn(shape, generator=generator, device=cpu).to(device) - return noise - + return torch.randn(shape, device=cpu).to(device) return torch.randn(shape, device=device) -- cgit v1.2.1 From 2651267e3af5886b8b6b1dc3023f2507f7079118 Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Sat, 3 Dec 2022 15:57:52 +0300 Subject: fix #4407 breaking UI entirely for card other than ones related to the PR --- modules/devices.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'modules/devices.py') diff --git a/modules/devices.py b/modules/devices.py index 1325569c..547ea46c 100644 --- a/modules/devices.py +++ b/modules/devices.py @@ -53,12 +53,10 @@ def torch_gc(): def enable_tf32(): if torch.cuda.is_available(): - for devid in range(0,torch.cuda.device_count()): - if torch.cuda.get_device_capability(devid) == (7, 5): - shd = True - if shd: + if any([torch.cuda.get_device_capability(devid) == (7, 5) for devid in range(0, torch.cuda.device_count())]): torch.backends.cudnn.benchmark = True torch.backends.cudnn.enabled = True + torch.backends.cuda.matmul.allow_tf32 = True torch.backends.cudnn.allow_tf32 = True -- cgit v1.2.1 From 46b0d230e7c13e247eabb22e1103ce512e7ed6b1 Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Sat, 3 Dec 2022 16:01:23 +0300 Subject: add comment for #4407 and remove seemingly unnecessary cudnn.enabled --- modules/devices.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'modules/devices.py') diff --git a/modules/devices.py b/modules/devices.py index 547ea46c..d6a76844 100644 --- a/modules/devices.py +++ b/modules/devices.py @@ -53,9 +53,11 @@ def torch_gc(): def enable_tf32(): if torch.cuda.is_available(): + + # enabling benchmark option seems to enable a range of cards to do fp16 when they otherwise can't + # see https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/4407 if any([torch.cuda.get_device_capability(devid) == (7, 5) for devid in range(0, torch.cuda.device_count())]): torch.backends.cudnn.benchmark = True - torch.backends.cudnn.enabled = True torch.backends.cuda.matmul.allow_tf32 = True torch.backends.cudnn.allow_tf32 = True -- cgit v1.2.1 From b6e5edd74657e3fd1fbd04f341b7a84625d4aa7a Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Sat, 3 Dec 2022 18:06:33 +0300 Subject: add built-in extension system add support for adding upscalers in extensions move LDSR, ScuNET and SwinIR to built-in extensions --- modules/devices.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'modules/devices.py') diff --git a/modules/devices.py b/modules/devices.py index d6a76844..f8cffae1 100644 --- a/modules/devices.py +++ b/modules/devices.py @@ -44,6 +44,15 @@ def get_optimal_device(): return cpu +def get_device_for(task): + from modules import shared + + if task in shared.cmd_opts.use_cpu: + return cpu + + return get_optimal_device() + + def torch_gc(): if torch.cuda.is_available(): with torch.cuda.device(get_cuda_device_string()): @@ -67,7 +76,7 @@ def enable_tf32(): errors.run(enable_tf32, "Enabling TF32") cpu = torch.device("cpu") -device = device_interrogate = device_gfpgan = device_swinir = device_esrgan = device_scunet = device_codeformer = None +device = device_interrogate = device_gfpgan = device_esrgan = device_codeformer = None dtype = torch.float16 dtype_vae = torch.float16 -- cgit v1.2.1 From 16b4509fa60ec03102b2452b41799dafccd35970 Mon Sep 17 00:00:00 2001 From: brkirch Date: Sat, 17 Dec 2022 03:21:19 -0500 Subject: Add numpy fix for MPS on PyTorch 1.12.1 When saving training results with torch.save(), an exception is thrown: "RuntimeError: Can't call numpy() on Tensor that requires grad. Use tensor.detach().numpy() instead." So for MPS, check if Tensor.requires_grad and detach() if necessary. --- modules/devices.py | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'modules/devices.py') diff --git a/modules/devices.py b/modules/devices.py index f8cffae1..800510b7 100644 --- a/modules/devices.py +++ b/modules/devices.py @@ -125,7 +125,16 @@ def layer_norm_fix(*args, **kwargs): return orig_layer_norm(*args, **kwargs) +# MPS workaround for https://github.com/pytorch/pytorch/issues/90532 +orig_tensor_numpy = torch.Tensor.numpy +def numpy_fix(self, *args, **kwargs): + if self.requires_grad: + self = self.detach() + return orig_tensor_numpy(self, *args, **kwargs) + + # PyTorch 1.13 doesn't need these fixes but unfortunately is slower and has regressions that prevent training from working if has_mps() and version.parse(torch.__version__) < version.parse("1.13"): torch.Tensor.to = tensor_to_fix torch.nn.functional.layer_norm = layer_norm_fix + torch.Tensor.numpy = numpy_fix -- cgit v1.2.1