Merge branch 'dev' into extra-norm-module

author: Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com> 2023-08-14 13:34:51 +0800
committer: Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com> 2023-08-14 13:34:51 +0800
commit: e7c03ccdcefb2a80129703931ef1f8455708945b (patch)
tree: 6769bdb3a32bdf99cf25422206ff03457bdfa86c /modules/sub_quadratic_attention.py
parent: d9cc27cb29926c9cc5dce331da8fbaf996cf4973 (diff)
parent: 007ecfbb29771aa7cdcf0263ab1811bc75fa5446 (diff)
1 files changed, 2 insertions, 2 deletions
diff --git a/modules/sub_quadratic_attention.py b/modules/sub_quadratic_attention.py
index 497568eb..ae4ee4bb 100644
--- a/modules/sub_quadratic_attention.py
+++ b/modules/sub_quadratic_attention.py
@@ -58,7 +58,7 @@ def _summarize_chunk(
     scale: float,
 ) -> AttnChunk:
     attn_weights = torch.baddbmm(
-        torch.empty(1, 1, 1, device=query.device, dtype=query.dtype),
+        torch.zeros(1, 1, 1, device=query.device, dtype=query.dtype),
         query,
         key.transpose(1,2),
         alpha=scale,
@@ -121,7 +121,7 @@ def _get_attention_scores_no_kv_chunking(
     scale: float,
 ) -> Tensor:
     attn_scores = torch.baddbmm(
-        torch.empty(1, 1, 1, device=query.device, dtype=query.dtype),
+        torch.zeros(1, 1, 1, device=query.device, dtype=query.dtype),
         query,
         key.transpose(1,2),
         alpha=scale,
author	Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com>	2023-08-14 13:34:51 +0800
committer	Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com>	2023-08-14 13:34:51 +0800
commit	e7c03ccdcefb2a80129703931ef1f8455708945b (patch)
tree	6769bdb3a32bdf99cf25422206ff03457bdfa86c /modules/sub_quadratic_attention.py
parent	d9cc27cb29926c9cc5dce331da8fbaf996cf4973 (diff)
parent	007ecfbb29771aa7cdcf0263ab1811bc75fa5446 (diff)