aboutsummaryrefslogtreecommitdiff
path: root/modules/devices.py
diff options
context:
space:
mode:
Diffstat (limited to 'modules/devices.py')
-rw-r--r--modules/devices.py20
1 files changed, 19 insertions, 1 deletions
diff --git a/modules/devices.py b/modules/devices.py
index c705a3cb..1ed6ffdc 100644
--- a/modules/devices.py
+++ b/modules/devices.py
@@ -1,5 +1,7 @@
import sys
import contextlib
+from functools import lru_cache
+
import torch
from modules import errors
@@ -65,7 +67,7 @@ def enable_tf32():
# enabling benchmark option seems to enable a range of cards to do fp16 when they otherwise can't
# see https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/4407
- if any([torch.cuda.get_device_capability(devid) == (7, 5) for devid in range(0, torch.cuda.device_count())]):
+ if any(torch.cuda.get_device_capability(devid) == (7, 5) for devid in range(0, torch.cuda.device_count())):
torch.backends.cudnn.benchmark = True
torch.backends.cuda.matmul.allow_tf32 = True
@@ -154,3 +156,19 @@ def test_for_nans(x, where):
message += " Use --disable-nan-check commandline argument to disable this check."
raise NansException(message)
+
+
+@lru_cache
+def first_time_calculation():
+ """
+ just do any calculation with pytorch layers - the first time this is done it allocaltes about 700MB of memory and
+ spends about 2.7 seconds doing that, at least wih NVidia.
+ """
+
+ x = torch.zeros((1, 1)).to(device, dtype)
+ linear = torch.nn.Linear(1, 1).to(device, dtype)
+ linear(x)
+
+ x = torch.zeros((1, 1, 3, 3)).to(device, dtype)
+ conv2d = torch.nn.Conv2d(1, 1, (3, 3)).to(device, dtype)
+ conv2d(x)