diff options
Diffstat (limited to 'modules/devices.py')
-rw-r--r-- | modules/devices.py | 26 |
1 files changed, 21 insertions, 5 deletions
diff --git a/modules/devices.py b/modules/devices.py index dfffaf24..28c0c54d 100644 --- a/modules/devices.py +++ b/modules/devices.py @@ -3,8 +3,7 @@ import contextlib from functools import lru_cache import torch -from modules import errors, shared -from modules import torch_utils +from modules import errors, shared, npu_specific if sys.platform == "darwin": from modules import mac_specific @@ -58,6 +57,9 @@ def get_optimal_device_name(): if has_xpu(): return xpu_specific.get_xpu_device_string() + if npu_specific.has_npu: + return npu_specific.get_npu_device_string() + return "cpu" @@ -85,6 +87,16 @@ def torch_gc(): if has_xpu(): xpu_specific.torch_xpu_gc() + if npu_specific.has_npu: + torch_npu_set_device() + npu_specific.torch_npu_gc() + + +def torch_npu_set_device(): + # Work around due to bug in torch_npu, revert me after fixed, @see https://gitee.com/ascend/pytorch/issues/I8KECW?from=project-issue + if npu_specific.has_npu: + torch.npu.set_device(0) + def enable_tf32(): if torch.cuda.is_available(): @@ -141,7 +153,12 @@ def manual_cast_forward(target_dtype): args = [arg.to(target_dtype) if isinstance(arg, torch.Tensor) else arg for arg in args] kwargs = {k: v.to(target_dtype) if isinstance(v, torch.Tensor) else v for k, v in kwargs.items()} - org_dtype = torch_utils.get_param(self).dtype + org_dtype = target_dtype + for param in self.parameters(): + if param.dtype != target_dtype: + org_dtype = param.dtype + break + if org_dtype != target_dtype: self.to(target_dtype) result = self.org_forward(*args, **kwargs) @@ -170,7 +187,7 @@ def manual_cast(target_dtype): continue applied = True org_forward = module_type.forward - if module_type == torch.nn.MultiheadAttention and has_xpu(): + if module_type == torch.nn.MultiheadAttention: module_type.forward = manual_cast_forward(torch.float32) else: module_type.forward = manual_cast_forward(target_dtype) @@ -252,4 +269,3 @@ def first_time_calculation(): x = torch.zeros((1, 1, 3, 3)).to(device, dtype) conv2d = torch.nn.Conv2d(1, 1, (3, 3)).to(device, dtype) conv2d(x) - |