aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--modules/devices.py4
-rw-r--r--modules/sd_hijack.py2
-rw-r--r--modules/shared.py6
-rw-r--r--v2-inference.yaml67
4 files changed, 75 insertions, 4 deletions
diff --git a/modules/devices.py b/modules/devices.py
index 397b4b95..f8cffae1 100644
--- a/modules/devices.py
+++ b/modules/devices.py
@@ -38,8 +38,8 @@ def get_optimal_device():
if torch.cuda.is_available():
return torch.device(get_cuda_device_string())
- # if has_mps():
- # return torch.device("mps")
+ if has_mps():
+ return torch.device("mps")
return cpu
diff --git a/modules/sd_hijack.py b/modules/sd_hijack.py
index eb679ef9..9b5890e7 100644
--- a/modules/sd_hijack.py
+++ b/modules/sd_hijack.py
@@ -29,7 +29,7 @@ diffusionmodules_model_AttnBlock_forward = ldm.modules.diffusionmodules.model.At
# new memory efficient cross attention blocks do not support hypernets and we already
# have memory efficient cross attention anyway, so this disables SD2.0's memory efficient cross attention
ldm.modules.attention.MemoryEfficientCrossAttention = ldm.modules.attention.CrossAttention
-# ldm.modules.attention.BasicTransformerBlock.ATTENTION_MODES["softmax-xformers"] = ldm.modules.attention.CrossAttention
+ldm.modules.attention.BasicTransformerBlock.ATTENTION_MODES["softmax-xformers"] = ldm.modules.attention.CrossAttention
# silence new console spam from SD2
ldm.modules.attention.print = lambda *args: None
diff --git a/modules/shared.py b/modules/shared.py
index 7d82f4ee..522c56c1 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -110,7 +110,11 @@ restricted_opts = {
from omegaconf import OmegaConf
config = OmegaConf.load(f"{cmd_opts.config}")
# XLMR-Large
-text_model_name = config.model.params.cond_stage_config.params.name
+try:
+ text_model_name = config.model.params.cond_stage_config.params.name
+
+except :
+ text_model_name = "stable_diffusion"
cmd_opts.disable_extension_access = (cmd_opts.share or cmd_opts.listen or cmd_opts.server_name) and not cmd_opts.enable_insecure_extension_access
diff --git a/v2-inference.yaml b/v2-inference.yaml
new file mode 100644
index 00000000..0eb25395
--- /dev/null
+++ b/v2-inference.yaml
@@ -0,0 +1,67 @@
+model:
+ base_learning_rate: 1.0e-4
+ target: ldm.models.diffusion.ddpm.LatentDiffusion
+ params:
+ linear_start: 0.00085
+ linear_end: 0.0120
+ num_timesteps_cond: 1
+ log_every_t: 200
+ timesteps: 1000
+ first_stage_key: "jpg"
+ cond_stage_key: "txt"
+ image_size: 64
+ channels: 4
+ cond_stage_trainable: false
+ conditioning_key: crossattn
+ monitor: val/loss_simple_ema
+ scale_factor: 0.18215
+ use_ema: False # we set this to false because this is an inference only config
+
+ unet_config:
+ target: ldm.modules.diffusionmodules.openaimodel.UNetModel
+ params:
+ use_checkpoint: True
+ use_fp16: True
+ image_size: 32 # unused
+ in_channels: 4
+ out_channels: 4
+ model_channels: 320
+ attention_resolutions: [ 4, 2, 1 ]
+ num_res_blocks: 2
+ channel_mult: [ 1, 2, 4, 4 ]
+ num_head_channels: 64 # need to fix for flash-attn
+ use_spatial_transformer: True
+ use_linear_in_transformer: True
+ transformer_depth: 1
+ context_dim: 1024
+ legacy: False
+
+ first_stage_config:
+ target: ldm.models.autoencoder.AutoencoderKL
+ params:
+ embed_dim: 4
+ monitor: val/rec_loss
+ ddconfig:
+ #attn_type: "vanilla-xformers"
+ double_z: true
+ z_channels: 4
+ resolution: 256
+ in_channels: 3
+ out_ch: 3
+ ch: 128
+ ch_mult:
+ - 1
+ - 2
+ - 4
+ - 4
+ num_res_blocks: 2
+ attn_resolutions: []
+ dropout: 0.0
+ lossconfig:
+ target: torch.nn.Identity
+
+ cond_stage_config:
+ target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
+ params:
+ freeze: True
+ layer: "penultimate" \ No newline at end of file