1 files changed, 16 insertions, 2 deletions
diff --git a/modules/devices.py b/modules/devices.py
index c01f0602..ea1f712f 100644
--- a/modules/devices.py
+++ b/modules/devices.py
@@ -8,6 +8,13 @@ from modules import errors, shared
 if sys.platform == "darwin":
     from modules import mac_specific
 
+if shared.cmd_opts.use_ipex:
+    from modules import xpu_specific
+
+
+def has_xpu() -> bool:
+    return shared.cmd_opts.use_ipex and xpu_specific.has_xpu
+
 
 def has_mps() -> bool:
     if sys.platform != "darwin":
@@ -30,6 +37,9 @@ def get_optimal_device_name():
     if has_mps():
         return "mps"
 
+    if has_xpu():
+        return xpu_specific.get_xpu_device_string()
+
     return "cpu"
 
 
@@ -38,7 +48,7 @@ def get_optimal_device():
 
 
 def get_device_for(task):
-    if task in shared.cmd_opts.use_cpu:
+    if task in shared.cmd_opts.use_cpu or "all" in shared.cmd_opts.use_cpu:
         return cpu
 
     return get_optimal_device()
@@ -54,13 +64,17 @@ def torch_gc():
     if has_mps():
         mac_specific.torch_mps_gc()
 
+    if has_xpu():
+        xpu_specific.torch_xpu_gc()
+
 
 def enable_tf32():
     if torch.cuda.is_available():
 
         # enabling benchmark option seems to enable a range of cards to do fp16 when they otherwise can't
         # see https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/4407
-        if any(torch.cuda.get_device_capability(devid) == (7, 5) for devid in range(0, torch.cuda.device_count())):
+        device_id = (int(shared.cmd_opts.device_id) if shared.cmd_opts.device_id is not None and shared.cmd_opts.device_id.isdigit() else 0) or torch.cuda.current_device()
+        if torch.cuda.get_device_capability(device_id) == (7, 5) and torch.cuda.get_device_name(device_id).startswith("NVIDIA GeForce GTX 16"):
             torch.backends.cudnn.benchmark = True
 
         torch.backends.cuda.matmul.allow_tf32 = True