1 files changed, 21 insertions, 5 deletions
diff --git a/modules/devices.py b/modules/devices.py
index dfffaf24..28c0c54d 100644
--- a/modules/devices.py
+++ b/modules/devices.py
@@ -3,8 +3,7 @@ import contextlib
 from functools import lru_cache
 
 import torch
-from modules import errors, shared
-from modules import torch_utils
+from modules import errors, shared, npu_specific
 
 if sys.platform == "darwin":
     from modules import mac_specific
@@ -58,6 +57,9 @@ def get_optimal_device_name():
     if has_xpu():
         return xpu_specific.get_xpu_device_string()
 
+    if npu_specific.has_npu:
+        return npu_specific.get_npu_device_string()
+
     return "cpu"
 
 
@@ -85,6 +87,16 @@ def torch_gc():
     if has_xpu():
         xpu_specific.torch_xpu_gc()
 
+    if npu_specific.has_npu:
+        torch_npu_set_device()
+        npu_specific.torch_npu_gc()
+
+
+def torch_npu_set_device():
+    # Work around due to bug in torch_npu, revert me after fixed, @see https://gitee.com/ascend/pytorch/issues/I8KECW?from=project-issue
+    if npu_specific.has_npu:
+        torch.npu.set_device(0)
+
 
 def enable_tf32():
     if torch.cuda.is_available():
@@ -141,7 +153,12 @@ def manual_cast_forward(target_dtype):
             args = [arg.to(target_dtype) if isinstance(arg, torch.Tensor) else arg for arg in args]
             kwargs = {k: v.to(target_dtype) if isinstance(v, torch.Tensor) else v for k, v in kwargs.items()}
 
-        org_dtype = torch_utils.get_param(self).dtype
+        org_dtype = target_dtype
+        for param in self.parameters():
+            if param.dtype != target_dtype:
+                org_dtype = param.dtype
+                break
+
         if org_dtype != target_dtype:
             self.to(target_dtype)
         result = self.org_forward(*args, **kwargs)
@@ -170,7 +187,7 @@ def manual_cast(target_dtype):
             continue
         applied = True
         org_forward = module_type.forward
-        if module_type == torch.nn.MultiheadAttention and has_xpu():
+        if module_type == torch.nn.MultiheadAttention:
             module_type.forward = manual_cast_forward(torch.float32)
         else:
             module_type.forward = manual_cast_forward(target_dtype)
@@ -252,4 +269,3 @@ def first_time_calculation():
     x = torch.zeros((1, 1, 3, 3)).to(device, dtype)
     conv2d = torch.nn.Conv2d(1, 1, (3, 3)).to(device, dtype)
     conv2d(x)
-