From 58c19545c83fa6925c9ce2216ee64964eb5129ce Mon Sep 17 00:00:00 2001
From: hidenorly <twitte.harold@gmail.com>
Date: Tue, 21 Nov 2023 01:13:53 +0900
Subject: Add FP32 fallback support on sd_vae_approx

This tries to execute interpolate with FP32 if it failed.

Background is that
on some environment such as Mx chip MacOS devices, we get error as follows:

```
"torch/nn/functional.py", line 3931, in interpolate
        return torch._C._nn.upsample_nearest2d(input, output_size, scale_factors)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    RuntimeError: "upsample_nearest2d_channels_last" not implemented for 'Half'
```

In this case, ```--no-half``` doesn't help to solve. Therefore this commits add the FP32 fallback execution to solve it.

Note that the submodule may require additional modifications. The following is the example modification on the other submodule.

```repositories/stable-diffusion-stability-ai/ldm/modules/diffusionmodules/openaimodel.py

class Upsample(nn.Module):
..snip..
    def forward(self, x):
        assert x.shape[1] == self.channels
        if self.dims == 3:
            x = F.interpolate(
                x, (x.shape[2], x.shape[3] * 2, x.shape[4] * 2), mode="nearest"
            )
        else:
            try:
                x = F.interpolate(x, scale_factor=2, mode="nearest")
            except:
                x = F.interpolate(x.to(th.float32), scale_factor=2, mode="nearest").to(x.dtype)
        if self.use_conv:
            x = self.conv(x)
        return x
..snip..
```

You can see the FP32 fallback execution as same as sd_vae_approx.py.
---
 modules/sd_vae_approx.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/modules/sd_vae_approx.py b/modules/sd_vae_approx.py
index 3965e223..8370493f 100644
--- a/modules/sd_vae_approx.py
+++ b/modules/sd_vae_approx.py
@@ -21,7 +21,13 @@ class VAEApprox(nn.Module):
 
     def forward(self, x):
         extra = 11
-        x = nn.functional.interpolate(x, (x.shape[2] * 2, x.shape[3] * 2))
+        try:
+            x = nn.functional.interpolate(x, (x.shape[2] * 2, x.shape[3] * 2))
+        except RuntimeError as e:
+            if "not implemented for" in str(e) and "Half" in str(e):
+                x = nn.functional.interpolate(x.to(torch.float32), (x.shape[2] * 2, x.shape[3] * 2)).to(x.dtype)
+            else:
+                print(f"An unexpected RuntimeError occurred: {str(e)}")
         x = nn.functional.pad(x, (extra, extra, extra, extra))
 
         for layer in [self.conv1, self.conv2, self.conv3, self.conv4, self.conv5, self.conv6, self.conv7, self.conv8, ]:
-- 
cgit v1.2.1


From 39eae9f009c8302eed77b0942e1e634f6125d53e Mon Sep 17 00:00:00 2001
From: hidenorly <twitte.harold@gmail.com>
Date: Wed, 29 Nov 2023 04:07:48 +0900
Subject: Revert "Add FP32 fallback support on sd_vae_approx"

This reverts commit 58c19545c83fa6925c9ce2216ee64964eb5129ce.
Since the modification is expected to move to mac_specific.py
(https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/14046#issuecomment-1826731532)
---
 modules/sd_vae_approx.py | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/modules/sd_vae_approx.py b/modules/sd_vae_approx.py
index 8370493f..3965e223 100644
--- a/modules/sd_vae_approx.py
+++ b/modules/sd_vae_approx.py
@@ -21,13 +21,7 @@ class VAEApprox(nn.Module):
 
     def forward(self, x):
         extra = 11
-        try:
-            x = nn.functional.interpolate(x, (x.shape[2] * 2, x.shape[3] * 2))
-        except RuntimeError as e:
-            if "not implemented for" in str(e) and "Half" in str(e):
-                x = nn.functional.interpolate(x.to(torch.float32), (x.shape[2] * 2, x.shape[3] * 2)).to(x.dtype)
-            else:
-                print(f"An unexpected RuntimeError occurred: {str(e)}")
+        x = nn.functional.interpolate(x, (x.shape[2] * 2, x.shape[3] * 2))
         x = nn.functional.pad(x, (extra, extra, extra, extra))
 
         for layer in [self.conv1, self.conv2, self.conv3, self.conv4, self.conv5, self.conv6, self.conv7, self.conv8, ]:
-- 
cgit v1.2.1


From a0096c58977c01ddc6a2b83a8a7b64da6fd4a51e Mon Sep 17 00:00:00 2001
From: hidenorly <twitte.harold@gmail.com>
Date: Wed, 29 Nov 2023 04:45:04 +0900
Subject: Add FP32 fallback support on torch.nn.functional.interpolate

This tries to execute interpolate with FP32 if it failed.

Background is that
on some environment such as Mx chip MacOS devices, we get error as follows:

```
"torch/nn/functional.py", line 3931, in interpolate
        return torch._C._nn.upsample_nearest2d(input, output_size, scale_factors)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    RuntimeError: "upsample_nearest2d_channels_last" not implemented for 'Half'
```

In this case, ```--no-half``` doesn't help to solve. Therefore this commits add the FP32 fallback execution to solve it.

Note that the ```upsample_nearest2d``` is called from ```torch.nn.functional.interpolate```.
And the fallback for torch.nn.functional.interpolate is necessary at
```modules/sd_vae_approx.py``` 's ```VAEApprox.forward```
```repositories/stable-diffusion-stability-ai/ldm/modules/diffusionmodules/openaimodel.py``` 's ```Upsample.forward```
---
 modules/mac_specific.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/modules/mac_specific.py b/modules/mac_specific.py
index 89256c5b..3538e659 100644
--- a/modules/mac_specific.py
+++ b/modules/mac_specific.py
@@ -1,6 +1,8 @@
 import logging
 
 import torch
+from typing import Optional, List
+from torch import Tensor
 import platform
 from modules.sd_hijack_utils import CondFunc
 from packaging import version
@@ -51,6 +53,17 @@ def cumsum_fix(input, cumsum_func, *args, **kwargs):
     return cumsum_func(input, *args, **kwargs)
 
 
+# MPS workaround for https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/14046
+def interpolate_with_fp32_fallback(orig_func, *args, **kwargs) -> Tensor:
+    try:
+        return orig_func(*args, **kwargs)
+    except RuntimeError as e:
+        if "not implemented for" in str(e) and "Half" in str(e):
+            input_tensor = args[0]
+            return orig_func(input_tensor.to(torch.float32), *args[1:], **kwargs).to(input_tensor.dtype)
+        else:
+            print(f"An unexpected RuntimeError occurred: {str(e)}")
+
 if has_mps:
     if platform.mac_ver()[0].startswith("13.2."):
         # MPS workaround for https://github.com/pytorch/pytorch/issues/95188, thanks to danieldk (https://github.com/explosion/curated-transformers/pull/124)
@@ -77,6 +90,9 @@ if has_mps:
         # MPS workaround for https://github.com/pytorch/pytorch/issues/96113
         CondFunc('torch.nn.functional.layer_norm', lambda orig_func, x, normalized_shape, weight, bias, eps, **kwargs: orig_func(x.float(), normalized_shape, weight.float() if weight is not None else None, bias.float() if bias is not None else bias, eps).to(x.dtype), lambda _, input, *args, **kwargs: len(args) == 4 and input.device.type == 'mps')
 
+        # MPS workaround for https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/14046
+        CondFunc('torch.nn.functional.interpolate', interpolate_with_fp32_fallback, None)
+
         # MPS workaround for https://github.com/pytorch/pytorch/issues/92311
         if platform.processor() == 'i386':
             for funcName in ['torch.argmax', 'torch.Tensor.argmax']:
-- 
cgit v1.2.1


From 81c00728b8ec0b6c0e70ea10c7687aad065a95cb Mon Sep 17 00:00:00 2001
From: hidenorly <twitte.harold@gmail.com>
Date: Wed, 29 Nov 2023 04:59:35 +0900
Subject: Fix the Ruff error about unused import

---
 modules/mac_specific.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/modules/mac_specific.py b/modules/mac_specific.py
index 3538e659..d96d86d7 100644
--- a/modules/mac_specific.py
+++ b/modules/mac_specific.py
@@ -1,7 +1,6 @@
 import logging
 
 import torch
-from typing import Optional, List
 from torch import Tensor
 import platform
 from modules.sd_hijack_utils import CondFunc
-- 
cgit v1.2.1