aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CHANGELOG.md162
-rw-r--r--README.md4
-rw-r--r--extensions-builtin/Lora/network_oft.py37
-rw-r--r--extensions-builtin/Lora/networks.py3
-rw-r--r--extensions-builtin/extra-options-section/scripts/extra_options_section.py16
-rw-r--r--extensions-builtin/hypertile/hypertile.py24
-rw-r--r--extensions-builtin/hypertile/scripts/hypertile_script.py56
-rw-r--r--extensions-builtin/hypertile/scripts/hypertile_xyz.py51
-rw-r--r--javascript/extraNetworks.js10
-rw-r--r--javascript/imageviewer.js2
-rw-r--r--javascript/settings.js25
-rw-r--r--javascript/ui.js41
-rw-r--r--modules/api/api.py15
-rw-r--r--modules/api/models.py3
-rw-r--r--modules/cache.py2
-rw-r--r--modules/cmd_args.py1
-rw-r--r--modules/devices.py15
-rw-r--r--modules/generation_parameters_copypaste.py13
-rw-r--r--modules/gradio_extensons.py10
-rw-r--r--modules/images.py1
-rw-r--r--modules/import_hook.py11
-rw-r--r--modules/launch_utils.py36
-rw-r--r--modules/mac_specific.py15
-rw-r--r--modules/models/diffusion/ddpm_edit.py7
-rw-r--r--modules/options.py77
-rw-r--r--modules/postprocessing.py92
-rw-r--r--modules/processing.py117
-rw-r--r--modules/scripts.py80
-rw-r--r--modules/scripts_postprocessing.py86
-rw-r--r--modules/sd_disable_initialization.py2
-rw-r--r--modules/sd_hijack.py10
-rw-r--r--modules/sd_models.py17
-rw-r--r--modules/sd_samplers_cfg_denoiser.py21
-rw-r--r--modules/sd_samplers_timesteps_impl.py4
-rw-r--r--modules/sd_unet.py14
-rw-r--r--modules/shared_items.py16
-rw-r--r--modules/shared_options.py129
-rw-r--r--modules/styles.py169
-rw-r--r--modules/textual_inversion/autocrop.py239
-rw-r--r--modules/textual_inversion/preprocess.py232
-rw-r--r--modules/textual_inversion/ui.py7
-rw-r--r--modules/ui.py107
-rw-r--r--modules/ui_extensions.py17
-rw-r--r--modules/ui_extra_networks.py14
-rw-r--r--modules/ui_extra_networks_user_metadata.py2
-rw-r--r--modules/ui_loadsave.py2
-rw-r--r--modules/ui_postprocessing.py18
-rw-r--r--modules/ui_toprow.py10
-rw-r--r--modules/upscaler.py6
-rw-r--r--modules/xpu_specific.py59
-rw-r--r--script.js29
-rw-r--r--scripts/postprocessing_caption.py30
-rw-r--r--scripts/postprocessing_codeformer.py16
-rw-r--r--scripts/postprocessing_create_flipped_copies.py32
-rw-r--r--scripts/postprocessing_focal_crop.py54
-rw-r--r--scripts/postprocessing_gfpgan.py13
-rw-r--r--scripts/postprocessing_split_oversized.py71
-rw-r--r--scripts/postprocessing_upscale.py12
-rw-r--r--scripts/processing_autosized_crop.py64
-rw-r--r--scripts/soft_inpainting.py747
-rw-r--r--style.css23
-rwxr-xr-xwebui.sh9
62 files changed, 2436 insertions, 771 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2c72359f..67429bbf 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,165 @@
+## 1.7.0
+
+### Features:
+* settings tab rework: add search field, add categories, split UI settings page into many
+* add altdiffusion-m18 support ([#13364](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13364))
+* support inference with LyCORIS GLora networks ([#13610](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13610))
+* add lora-embedding bundle system ([#13568](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13568))
+* option to move prompt from top row into generation parameters
+* add support for SSD-1B ([#13865](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13865))
+* support inference with OFT networks ([#13692](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13692))
+* script metadata and DAG sorting mechanism ([#13944](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13944))
+* support HyperTile optimization ([#13948](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13948))
+* add support for SD 2.1 Turbo ([#14170](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/14170))
+* remove Train->Preprocessing tab and put all its functionality into Extras tab
+* initial IPEX support for Intel Arc GPU ([#14171](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/14171))
+
+### Minor:
+* allow reading model hash from images in img2img batch mode ([#12767](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12767))
+* add option to align with sgm repo's sampling implementation ([#12818](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12818))
+* extra field for lora metadata viewer: `ss_output_name` ([#12838](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12838))
+* add action in settings page to calculate all SD checkpoint hashes ([#12909](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12909))
+* add button to copy prompt to style editor ([#12975](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12975))
+* add --skip-load-model-at-start option ([#13253](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13253))
+* write infotext to gif images
+* read infotext from gif images ([#13068](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13068))
+* allow configuring the initial state of InputAccordion in ui-config.json ([#13189](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13189))
+* allow editing whitespace delimiters for ctrl+up/ctrl+down prompt editing ([#13444](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13444))
+* prevent accidentally closing popup dialogs ([#13480](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13480))
+* added option to play notification sound or not ([#13631](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13631))
+* show the preview image in the full screen image viewer if available ([#13459](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13459))
+* support for webui.settings.bat ([#13638](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13638))
+* add an option to not print stack traces on ctrl+c
+* start/restart generation by Ctrl (Alt) + Enter ([#13644](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13644))
+* update prompts_from_file script to allow concatenating entries with the general prompt ([#13733](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13733))
+* added a visible checkbox to input accordion
+* added an option to hide all txt2img/img2img parameters in an accordion ([#13826](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13826))
+* added 'Path' sorting option for Extra network cards ([#13968](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13968))
+* enable prompt hotkeys in style editor ([#13931](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13931))
+* option to show batch img2img results in UI ([#14009](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/14009))
+* infotext updates: add option to disregard certain infotext fields, add option to not include VAE in infotext, add explanation to infotext settings page, move some options to infotext settings page
+* add FP32 fallback support on sd_vae_approx ([#14046](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/14046))
+* support XYZ scripts / split hires path from unet ([#14126](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/14126))
+* allow use of mutiple styles csv files ([#14125](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/14125))
+
+### Extensions and API:
+* update gradio to 3.41.2
+* support installed extensions list api ([#12774](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12774))
+* update pnginfo API to return dict with parsed values
+* add noisy latent to `ExtraNoiseParams` for callback ([#12856](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12856))
+* show extension datetime in UTC ([#12864](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12864), [#12865](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12865), [#13281](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13281))
+* add an option to choose how to combine hires fix and refiner
+* include program version in info response. ([#13135](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13135))
+* sd_unet support for SDXL
+* patch DDPM.register_betas so that users can put given_betas in model yaml ([#13276](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13276))
+* xyz_grid: add prepare ([#13266](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13266))
+* allow multiple localization files with same language in extensions ([#13077](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13077))
+* add onEdit function for js and rework token-counter.js to use it
+* fix the key error exception when processing override_settings keys ([#13567](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13567))
+* ability for extensions to return custom data via api in response.images ([#13463](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13463))
+* call state.jobnext() before postproces*() ([#13762](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13762))
+* add option to set notification sound volume ([#13884](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13884))
+* update Ruff to 0.1.6 ([#14059](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/14059))
+* add Block component creation callback ([#14119](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/14119))
+* catch uncaught exception with ui creation scripts ([#14120](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/14120))
+* use extension name for determining an extension is installed in the index ([#14063](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/14063))
+* update is_installed() from launch_utils.py to fix reinstalling already installed packages ([#14192](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/14192))
+
+### Bug Fixes:
+* fix pix2pix producing bad results
+* fix defaults settings page breaking when any of main UI tabs are hidden
+* fix error that causes some extra networks to be disabled if both <lora:> and <lyco:> are present in the prompt
+* fix for Reload UI function: if you reload UI on one tab, other opened tabs will no longer stop working
+* prevent duplicate resize handler ([#12795](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12795))
+* small typo: vae resolve bug ([#12797](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12797))
+* hide broken image crop tool ([#12792](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12792))
+* don't show hidden samplers in dropdown for XYZ script ([#12780](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12780))
+* fix style editing dialog breaking if it's opened in both img2img and txt2img tabs
+* hide --gradio-auth and --api-auth values from /internal/sysinfo report
+* add missing infotext for RNG in options ([#12819](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12819))
+* fix notification not playing when built-in webui tab is inactive ([#12834](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12834))
+* honor `--skip-install` for extension installers ([#12832](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12832))
+* don't print blank stdout in extension installers ([#12833](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12833), [#12855](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12855))
+* get progressbar to display correctly in extensions tab
+* keep order in list of checkpoints when loading model that doesn't have a checksum
+* fix inpainting models in txt2img creating black pictures
+* fix generation params regex ([#12876](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12876))
+* fix batch img2img output dir with script ([#12926](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12926))
+* fix #13080 - Hypernetwork/TI preview generation ([#13084](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13084))
+* fix bug with sigma min/max overrides. ([#12995](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12995))
+* more accurate check for enabling cuDNN benchmark on 16XX cards ([#12924](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12924))
+* don't use multicond parser for negative prompt counter ([#13118](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13118))
+* fix data-sort-name containing spaces ([#13412](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13412))
+* update card on correct tab when editing metadata ([#13411](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13411))
+* fix viewing/editing metadata when filename contains an apostrophe ([#13395](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13395))
+* fix: --sd_model in "Prompts from file or textbox" script is not working ([#13302](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13302))
+* better Support for Portable Git ([#13231](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13231))
+* fix issues when webui_dir is not work_dir ([#13210](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13210))
+* fix: lora-bias-backup don't reset cache ([#13178](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13178))
+* account for customizable extra network separators whyen removing extra network text from the prompt ([#12877](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12877))
+* re fix batch img2img output dir with script ([#13170](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13170))
+* fix `--ckpt-dir` path separator and option use `short name` for checkpoint dropdown ([#13139](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13139))
+* consolidated allowed preview formats, Fix extra network `.gif` not woking as preview ([#13121](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13121))
+* fix venv_dir=- environment variable not working as expected on linux ([#13469](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13469))
+* repair unload sd checkpoint button
+* edit-attention fixes ([#13533](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13533))
+* fix bug when using --gfpgan-models-path ([#13718](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13718))
+* properly apply sort order for extra network cards when selected from dropdown
+* fixes generation restart not working for some users when 'Ctrl+Enter' is pressed ([#13962](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13962))
+* thread safe extra network list_items ([#13014](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13014))
+* fix not able to exit metadata popup when pop up is too big ([#14156](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/14156))
+* fix auto focal point crop for opencv >= 4.8 ([#14121](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/14121))
+* make 'use-cpu all' actually apply to 'all' ([#14131](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/14131))
+* extras tab batch: actually use original filename
+* make webui not crash when running with --disable-all-extensions option
+
+### Other:
+* non-local condition ([#12814](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12814))
+* fix minor typos ([#12827](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12827))
+* remove xformers Python version check ([#12842](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12842))
+* style: file-metadata word-break ([#12837](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12837))
+* revert SGM noise multiplier change for img2img because it breaks hires fix
+* do not change quicksettings dropdown option when value returned is `None` ([#12854](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12854))
+* [RC 1.6.0 - zoom is partly hidden] Update style.css ([#12839](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12839))
+* chore: change extension time format ([#12851](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12851))
+* WEBUI.SH - Use torch 2.1.0 release candidate for Navi 3 ([#12929](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12929))
+* add Fallback at images.read_info_from_image if exif data was invalid ([#13028](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13028))
+* update cmd arg description ([#12986](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12986))
+* fix: update shared.opts.data when add_option ([#12957](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12957), [#13213](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13213))
+* restore missing tooltips ([#12976](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12976))
+* use default dropdown padding on mobile ([#12880](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12880))
+* put enable console prompts option into settings from commandline args ([#13119](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13119))
+* fix some deprecated types ([#12846](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12846))
+* bump to torchsde==0.2.6 ([#13418](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13418))
+* update dragdrop.js ([#13372](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13372))
+* use orderdict as lru cache:opt/bug ([#13313](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13313))
+* XYZ if not include sub grids do not save sub grid ([#13282](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13282))
+* initialize state.time_start befroe state.job_count ([#13229](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13229))
+* fix fieldname regex ([#13458](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13458))
+* change denoising_strength default to None. ([#13466](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13466))
+* fix regression ([#13475](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13475))
+* fix IndexError ([#13630](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13630))
+* fix: checkpoints_loaded:{checkpoint:state_dict}, model.load_state_dict issue in dict value empty ([#13535](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13535))
+* update bug_report.yml ([#12991](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12991))
+* requirements_versions httpx==0.24.1 ([#13839](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13839))
+* fix parenthesis auto selection ([#13829](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13829))
+* fix #13796 ([#13797](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13797))
+* corrected a typo in `modules/cmd_args.py` ([#13855](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13855))
+* feat: fix randn found element of type float at pos 2 ([#14004](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/14004))
+* adds tqdm handler to logging_config.py for progress bar integration ([#13996](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13996))
+* hotfix: call shared.state.end() after postprocessing done ([#13977](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13977))
+* fix dependency address patch 1 ([#13929](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13929))
+* save sysinfo as .json ([#14035](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/14035))
+* move exception_records related methods to errors.py ([#14084](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/14084))
+* compatibility ([#13936](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13936))
+* json.dump(ensure_ascii=False) ([#14108](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/14108))
+* dir buttons start with / so only the correct dir will be shown and no… ([#13957](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/13957))
+* alternate implementation for unet forward replacement that does not depend on hijack being applied
+* re-add `keyedit_delimiters_whitespace` setting lost as part of commit e294e46 ([#14178](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/14178))
+* fix `save_samples` being checked early when saving masked composite ([#14177](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/14177))
+* slight optimization for mask and mask_composite ([#14181](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/14181))
+* add import_hook hack to work around basicsr/torchvision incompatibility ([#14186](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/14186))
+
## 1.6.1
### Bug Fixes:
diff --git a/README.md b/README.md
index f412a79e..9f9f33b1 100644
--- a/README.md
+++ b/README.md
@@ -121,7 +121,9 @@ Alternatively, use online services (like Google Colab):
# Debian-based:
sudo apt install wget git python3 python3-venv libgl1 libglib2.0-0
# Red Hat-based:
-sudo dnf install wget git python3
+sudo dnf install wget git python3 gperftools-libs libglvnd-glx
+# openSUSE-based:
+sudo zypper install wget git python3 libtcmalloc4 libglvnd
# Arch-based:
sudo pacman -S wget git python3
```
diff --git a/extensions-builtin/Lora/network_oft.py b/extensions-builtin/Lora/network_oft.py
index 05c37811..fa647020 100644
--- a/extensions-builtin/Lora/network_oft.py
+++ b/extensions-builtin/Lora/network_oft.py
@@ -21,6 +21,8 @@ class NetworkModuleOFT(network.NetworkModule):
self.lin_module = None
self.org_module: list[torch.Module] = [self.sd_module]
+ self.scale = 1.0
+
# kohya-ss
if "oft_blocks" in weights.w.keys():
self.is_kohya = True
@@ -53,12 +55,18 @@ class NetworkModuleOFT(network.NetworkModule):
self.constraint = None
self.block_size, self.num_blocks = factorization(self.out_dim, self.dim)
- def calc_updown_kb(self, orig_weight, multiplier):
+ def calc_updown(self, orig_weight):
oft_blocks = self.oft_blocks.to(orig_weight.device, dtype=orig_weight.dtype)
- oft_blocks = oft_blocks - oft_blocks.transpose(1, 2) # ensure skew-symmetric orthogonal matrix
+ eye = torch.eye(self.block_size, device=self.oft_blocks.device)
+
+ if self.is_kohya:
+ block_Q = oft_blocks - oft_blocks.transpose(1, 2) # ensure skew-symmetric orthogonal matrix
+ norm_Q = torch.norm(block_Q.flatten())
+ new_norm_Q = torch.clamp(norm_Q, max=self.constraint)
+ block_Q = block_Q * ((new_norm_Q + 1e-8) / (norm_Q + 1e-8))
+ oft_blocks = torch.matmul(eye + block_Q, (eye - block_Q).float().inverse())
R = oft_blocks.to(orig_weight.device, dtype=orig_weight.dtype)
- R = R * multiplier + torch.eye(self.block_size, device=orig_weight.device)
# This errors out for MultiheadAttention, might need to be handled up-stream
merged_weight = rearrange(orig_weight, '(k n) ... -> k n ...', k=self.num_blocks, n=self.block_size)
@@ -72,26 +80,3 @@ class NetworkModuleOFT(network.NetworkModule):
updown = merged_weight.to(orig_weight.device, dtype=orig_weight.dtype) - orig_weight
output_shape = orig_weight.shape
return self.finalize_updown(updown, orig_weight, output_shape)
-
- def calc_updown(self, orig_weight):
- # if alpha is a very small number as in coft, calc_scale() will return a almost zero number so we ignore it
- multiplier = self.multiplier()
- return self.calc_updown_kb(orig_weight, multiplier)
-
- # override to remove the multiplier/scale factor; it's already multiplied in get_weight
- def finalize_updown(self, updown, orig_weight, output_shape, ex_bias=None):
- if self.bias is not None:
- updown = updown.reshape(self.bias.shape)
- updown += self.bias.to(orig_weight.device, dtype=orig_weight.dtype)
- updown = updown.reshape(output_shape)
-
- if len(output_shape) == 4:
- updown = updown.reshape(output_shape)
-
- if orig_weight.size().numel() == updown.size().numel():
- updown = updown.reshape(orig_weight.shape)
-
- if ex_bias is not None:
- ex_bias = ex_bias * self.multiplier()
-
- return updown, ex_bias
diff --git a/extensions-builtin/Lora/networks.py b/extensions-builtin/Lora/networks.py
index 7f814706..629bf853 100644
--- a/extensions-builtin/Lora/networks.py
+++ b/extensions-builtin/Lora/networks.py
@@ -159,7 +159,8 @@ def load_network(name, network_on_disk):
bundle_embeddings = {}
for key_network, weight in sd.items():
- key_network_without_network_parts, network_part = key_network.split(".", 1)
+ key_network_without_network_parts, _, network_part = key_network.partition(".")
+
if key_network_without_network_parts == "bundle_emb":
emb_name, vec_name = network_part.split(".", 1)
emb_dict = bundle_embeddings.get(emb_name, {})
diff --git a/extensions-builtin/extra-options-section/scripts/extra_options_section.py b/extensions-builtin/extra-options-section/scripts/extra_options_section.py
index 983f87ff..ac2c3de4 100644
--- a/extensions-builtin/extra-options-section/scripts/extra_options_section.py
+++ b/extensions-builtin/extra-options-section/scripts/extra_options_section.py
@@ -23,11 +23,12 @@ class ExtraOptionsSection(scripts.Script):
self.setting_names = []
self.infotext_fields = []
extra_options = shared.opts.extra_options_img2img if is_img2img else shared.opts.extra_options_txt2img
+ elem_id_tabname = "extra_options_" + ("img2img" if is_img2img else "txt2img")
mapping = {k: v for v, k in generation_parameters_copypaste.infotext_to_setting_name_mapping}
with gr.Blocks() as interface:
- with gr.Accordion("Options", open=False) if shared.opts.extra_options_accordion and extra_options else gr.Group():
+ with gr.Accordion("Options", open=False, elem_id=elem_id_tabname) if shared.opts.extra_options_accordion and extra_options else gr.Group(elem_id=elem_id_tabname):
row_count = math.ceil(len(extra_options) / shared.opts.extra_options_cols)
@@ -64,11 +65,14 @@ class ExtraOptionsSection(scripts.Script):
p.override_settings[name] = value
-shared.options_templates.update(shared.options_section(('ui', "User interface"), {
- "extra_options_txt2img": shared.OptionInfo([], "Options in main UI - txt2img", ui_components.DropdownMulti, lambda: {"choices": list(shared.opts.data_labels.keys())}).js("info", "settingsHintsShowQuicksettings").info("setting entries that also appear in txt2img interfaces").needs_reload_ui(),
- "extra_options_img2img": shared.OptionInfo([], "Options in main UI - img2img", ui_components.DropdownMulti, lambda: {"choices": list(shared.opts.data_labels.keys())}).js("info", "settingsHintsShowQuicksettings").info("setting entries that also appear in img2img interfaces").needs_reload_ui(),
- "extra_options_cols": shared.OptionInfo(1, "Options in main UI - number of columns", gr.Number, {"precision": 0}).needs_reload_ui(),
- "extra_options_accordion": shared.OptionInfo(False, "Options in main UI - place into an accordion").needs_reload_ui()
+shared.options_templates.update(shared.options_section(('settings_in_ui', "Settings in UI", "ui"), {
+ "settings_in_ui": shared.OptionHTML("""
+This page allows you to add some settings to the main interface of txt2img and img2img tabs.
+"""),
+ "extra_options_txt2img": shared.OptionInfo([], "Settings for txt2img", ui_components.DropdownMulti, lambda: {"choices": list(shared.opts.data_labels.keys())}).js("info", "settingsHintsShowQuicksettings").info("setting entries that also appear in txt2img interfaces").needs_reload_ui(),
+ "extra_options_img2img": shared.OptionInfo([], "Settings for img2img", ui_components.DropdownMulti, lambda: {"choices": list(shared.opts.data_labels.keys())}).js("info", "settingsHintsShowQuicksettings").info("setting entries that also appear in img2img interfaces").needs_reload_ui(),
+ "extra_options_cols": shared.OptionInfo(1, "Number of columns for added settings", gr.Slider, {"step": 1, "minimum": 1, "maximum": 20}).info("displayed amount will depend on the actual browser window width").needs_reload_ui(),
+ "extra_options_accordion": shared.OptionInfo(False, "Place added settings into an accordion").needs_reload_ui()
}))
diff --git a/extensions-builtin/hypertile/hypertile.py b/extensions-builtin/hypertile/hypertile.py
index feb02fd2..0f40e2d3 100644
--- a/extensions-builtin/hypertile/hypertile.py
+++ b/extensions-builtin/hypertile/hypertile.py
@@ -6,7 +6,6 @@ Original author: @tfernd Github: https://github.com/tfernd/HyperTile
from __future__ import annotations
-import functools
from dataclasses import dataclass
from typing import Callable
@@ -189,20 +188,27 @@ DEPTH_LAYERS_XL = {
RNG_INSTANCE = random.Random()
-
-def random_divisor(value: int, min_value: int, /, max_options: int = 1) -> int:
+@cache
+def get_divisors(value: int, min_value: int, /, max_options: int = 1) -> list[int]:
"""
- Returns a random divisor of value that
+ Returns divisors of value that
x * min_value <= value
- if max_options is 1, the behavior is deterministic
+ in big -> small order, amount of divisors is limited by max_options
"""
+ max_options = max(1, max_options) # at least 1 option should be returned
min_value = min(min_value, value)
-
- # All big divisors of value (inclusive)
divisors = [i for i in range(min_value, value + 1) if value % i == 0] # divisors in small -> big order
-
ns = [value // i for i in divisors[:max_options]] # has at least 1 element # big -> small order
+ return ns
+
+def random_divisor(value: int, min_value: int, /, max_options: int = 1) -> int:
+ """
+ Returns a random divisor of value that
+ x * min_value <= value
+ if max_options is 1, the behavior is deterministic
+ """
+ ns = get_divisors(value, min_value, max_options=max_options) # get cached divisors
idx = RNG_INSTANCE.randint(0, len(ns) - 1)
return ns[idx]
@@ -212,7 +218,7 @@ def set_hypertile_seed(seed: int) -> None:
RNG_INSTANCE.seed(seed)
-@functools.cache
+@cache
def largest_tile_size_available(width: int, height: int) -> int:
"""
Calculates the largest tile size available for a given width and height
diff --git a/extensions-builtin/hypertile/scripts/hypertile_script.py b/extensions-builtin/hypertile/scripts/hypertile_script.py
index 3cc29cd1..395d584b 100644
--- a/extensions-builtin/hypertile/scripts/hypertile_script.py
+++ b/extensions-builtin/hypertile/scripts/hypertile_script.py
@@ -1,5 +1,6 @@
import hypertile
from modules import scripts, script_callbacks, shared
+from scripts.hypertile_xyz import add_axis_options
class ScriptHypertile(scripts.Script):
@@ -16,8 +17,42 @@ class ScriptHypertile(scripts.Script):
configure_hypertile(p.width, p.height, enable_unet=shared.opts.hypertile_enable_unet)
+ self.add_infotext(p)
+
def before_hr(self, p, *args):
- configure_hypertile(p.hr_upscale_to_x, p.hr_upscale_to_y, enable_unet=shared.opts.hypertile_enable_unet_secondpass or shared.opts.hypertile_enable_unet)
+
+ enable = shared.opts.hypertile_enable_unet_secondpass or shared.opts.hypertile_enable_unet
+
+ # exclusive hypertile seed for the second pass
+ if enable:
+ hypertile.set_hypertile_seed(p.all_seeds[0])
+
+ configure_hypertile(p.hr_upscale_to_x, p.hr_upscale_to_y, enable_unet=enable)
+
+ if enable and not shared.opts.hypertile_enable_unet:
+ p.extra_generation_params["Hypertile U-Net second pass"] = True
+
+ self.add_infotext(p, add_unet_params=True)
+
+ def add_infotext(self, p, add_unet_params=False):
+ def option(name):
+ value = getattr(shared.opts, name)
+ default_value = shared.opts.get_default(name)
+ return None if value == default_value else value
+
+ if shared.opts.hypertile_enable_unet:
+ p.extra_generation_params["Hypertile U-Net"] = True
+
+ if shared.opts.hypertile_enable_unet or add_unet_params:
+ p.extra_generation_params["Hypertile U-Net max depth"] = option('hypertile_max_depth_unet')
+ p.extra_generation_params["Hypertile U-Net max tile size"] = option('hypertile_max_tile_unet')
+ p.extra_generation_params["Hypertile U-Net swap size"] = option('hypertile_swap_size_unet')
+
+ if shared.opts.hypertile_enable_vae:
+ p.extra_generation_params["Hypertile VAE"] = True
+ p.extra_generation_params["Hypertile VAE max depth"] = option('hypertile_max_depth_vae')
+ p.extra_generation_params["Hypertile VAE max tile size"] = option('hypertile_max_tile_vae')
+ p.extra_generation_params["Hypertile VAE swap size"] = option('hypertile_swap_size_vae')
def configure_hypertile(width, height, enable_unet=True):
@@ -53,16 +88,16 @@ def on_ui_settings():
benefit.
"""),
- "hypertile_enable_unet": shared.OptionInfo(False, "Enable Hypertile U-Net").info("noticeable change in details of the generated picture; if enabled, overrides the setting below"),
- "hypertile_enable_unet_secondpass": shared.OptionInfo(False, "Enable Hypertile U-Net for hires fix second pass"),
- "hypertile_max_depth_unet": shared.OptionInfo(3, "Hypertile U-Net max depth", gr.Slider, {"minimum": 0, "maximum": 3, "step": 1}),
- "hypertile_max_tile_unet": shared.OptionInfo(256, "Hypertile U-net max tile size", gr.Slider, {"minimum": 0, "maximum": 512, "step": 16}),
- "hypertile_swap_size_unet": shared.OptionInfo(3, "Hypertile U-net swap size", gr.Slider, {"minimum": 0, "maximum": 6, "step": 1}),
+ "hypertile_enable_unet": shared.OptionInfo(False, "Enable Hypertile U-Net", infotext="Hypertile U-Net").info("enables hypertile for all modes, including hires fix second pass; noticeable change in details of the generated picture"),
+ "hypertile_enable_unet_secondpass": shared.OptionInfo(False, "Enable Hypertile U-Net for hires fix second pass", infotext="Hypertile U-Net second pass").info("enables hypertile just for hires fix second pass - regardless of whether the above setting is enabled"),
+ "hypertile_max_depth_unet": shared.OptionInfo(3, "Hypertile U-Net max depth", gr.Slider, {"minimum": 0, "maximum": 3, "step": 1}, infotext="Hypertile U-Net max depth").info("larger = more neural network layers affected; minor effect on performance"),
+ "hypertile_max_tile_unet": shared.OptionInfo(256, "Hypertile U-Net max tile size", gr.Slider, {"minimum": 0, "maximum": 512, "step": 16}, infotext="Hypertile U-Net max tile size").info("larger = worse performance"),
+ "hypertile_swap_size_unet": shared.OptionInfo(3, "Hypertile U-Net swap size", gr.Slider, {"minimum": 0, "maximum": 64, "step": 1}, infotext="Hypertile U-Net swap size"),
- "hypertile_enable_vae": shared.OptionInfo(False, "Enable Hypertile VAE").info("minimal change in the generated picture"),
- "hypertile_max_depth_vae": shared.OptionInfo(3, "Hypertile VAE max depth", gr.Slider, {"minimum": 0, "maximum": 3, "step": 1}),
- "hypertile_max_tile_vae": shared.OptionInfo(128, "Hypertile VAE max tile size", gr.Slider, {"minimum": 0, "maximum": 512, "step": 16}),
- "hypertile_swap_size_vae": shared.OptionInfo(3, "Hypertile VAE swap size ", gr.Slider, {"minimum": 0, "maximum": 6, "step": 1}),
+ "hypertile_enable_vae": shared.OptionInfo(False, "Enable Hypertile VAE", infotext="Hypertile VAE").info("minimal change in the generated picture"),
+ "hypertile_max_depth_vae": shared.OptionInfo(3, "Hypertile VAE max depth", gr.Slider, {"minimum": 0, "maximum": 3, "step": 1}, infotext="Hypertile VAE max depth"),
+ "hypertile_max_tile_vae": shared.OptionInfo(128, "Hypertile VAE max tile size", gr.Slider, {"minimum": 0, "maximum": 512, "step": 16}, infotext="Hypertile VAE max tile size"),
+ "hypertile_swap_size_vae": shared.OptionInfo(3, "Hypertile VAE swap size ", gr.Slider, {"minimum": 0, "maximum": 64, "step": 1}, infotext="Hypertile VAE swap size"),
}
for name, opt in options.items():
@@ -71,3 +106,4 @@ def on_ui_settings():
script_callbacks.on_ui_settings(on_ui_settings)
+script_callbacks.on_before_ui(add_axis_options)
diff --git a/extensions-builtin/hypertile/scripts/hypertile_xyz.py b/extensions-builtin/hypertile/scripts/hypertile_xyz.py
new file mode 100644
index 00000000..9e96ae3c
--- /dev/null
+++ b/extensions-builtin/hypertile/scripts/hypertile_xyz.py
@@ -0,0 +1,51 @@
+from modules import scripts
+from modules.shared import opts
+
+xyz_grid = [x for x in scripts.scripts_data if x.script_class.__module__ == "xyz_grid.py"][0].module
+
+def int_applier(value_name:str, min_range:int = -1, max_range:int = -1):
+ """
+ Returns a function that applies the given value to the given value_name in opts.data.
+ """
+ def validate(value_name:str, value:str):
+ value = int(value)
+ # validate value
+ if not min_range == -1:
+ assert value >= min_range, f"Value {value} for {value_name} must be greater than or equal to {min_range}"
+ if not max_range == -1:
+ assert value <= max_range, f"Value {value} for {value_name} must be less than or equal to {max_range}"
+ def apply_int(p, x, xs):
+ validate(value_name, x)
+ opts.data[value_name] = int(x)
+ return apply_int
+
+def bool_applier(value_name:str):
+ """
+ Returns a function that applies the given value to the given value_name in opts.data.
+ """
+ def validate(value_name:str, value:str):
+ assert value.lower() in ["true", "false"], f"Value {value} for {value_name} must be either true or false"
+ def apply_bool(p, x, xs):
+ validate(value_name, x)
+ value_boolean = x.lower() == "true"
+ opts.data[value_name] = value_boolean
+ return apply_bool
+
+def add_axis_options():
+ extra_axis_options = [
+ xyz_grid.AxisOption("[Hypertile] Unet First pass Enabled", str, bool_applier("hypertile_enable_unet"), choices=xyz_grid.boolean_choice(reverse=True)),
+ xyz_grid.AxisOption("[Hypertile] Unet Second pass Enabled", str, bool_applier("hypertile_enable_unet_secondpass"), choices=xyz_grid.boolean_choice(reverse=True)),
+ xyz_grid.AxisOption("[Hypertile] Unet Max Depth", int, int_applier("hypertile_max_depth_unet", 0, 3), choices=lambda: [str(x) for x in range(4)]),
+ xyz_grid.AxisOption("[Hypertile] Unet Max Tile Size", int, int_applier("hypertile_max_tile_unet", 0, 512)),
+ xyz_grid.AxisOption("[Hypertile] Unet Swap Size", int, int_applier("hypertile_swap_size_unet", 0, 64)),
+ xyz_grid.AxisOption("[Hypertile] VAE Enabled", str, bool_applier("hypertile_enable_vae"), choices=xyz_grid.boolean_choice(reverse=True)),
+ xyz_grid.AxisOption("[Hypertile] VAE Max Depth", int, int_applier("hypertile_max_depth_vae", 0, 3), choices=lambda: [str(x) for x in range(4)]),
+ xyz_grid.AxisOption("[Hypertile] VAE Max Tile Size", int, int_applier("hypertile_max_tile_vae", 0, 512)),
+ xyz_grid.AxisOption("[Hypertile] VAE Swap Size", int, int_applier("hypertile_swap_size_vae", 0, 64)),
+ ]
+ set_a = {opt.label for opt in xyz_grid.axis_options}
+ set_b = {opt.label for opt in extra_axis_options}
+ if set_a.intersection(set_b):
+ return
+
+ xyz_grid.axis_options.extend(extra_axis_options)
diff --git a/javascript/extraNetworks.js b/javascript/extraNetworks.js
index a1bf29a8..98a7abb7 100644
--- a/javascript/extraNetworks.js
+++ b/javascript/extraNetworks.js
@@ -130,6 +130,10 @@ function extraNetworksMovePromptToTab(tabname, id, showPrompt, showNegativePromp
} else {
promptContainer.insertBefore(prompt, promptContainer.firstChild);
}
+
+ if (elem) {
+ elem.classList.toggle('extra-page-prompts-active', showNegativePrompt || showPrompt);
+ }
}
@@ -388,3 +392,9 @@ function extraNetworksRefreshSingleCard(page, tabname, name) {
}
});
}
+
+window.addEventListener("keydown", function(event) {
+ if (event.key == "Escape") {
+ closePopup();
+ }
+});
diff --git a/javascript/imageviewer.js b/javascript/imageviewer.js
index e4dae91b..625c5d14 100644
--- a/javascript/imageviewer.js
+++ b/javascript/imageviewer.js
@@ -34,7 +34,7 @@ function updateOnBackgroundChange() {
if (modalImage && modalImage.offsetParent) {
let currentButton = selected_gallery_button();
let preview = gradioApp().querySelectorAll('.livePreview > img');
- if (preview.length > 0) {
+ if (opts.js_live_preview_in_modal_lightbox && preview.length > 0) {
// show preview image if available
modalImage.src = preview[preview.length - 1].src;
} else if (currentButton?.children?.length > 0 && modalImage.src != currentButton.children[0].src) {
diff --git a/javascript/settings.js b/javascript/settings.js
index 4e79ec00..e6009290 100644
--- a/javascript/settings.js
+++ b/javascript/settings.js
@@ -44,3 +44,28 @@ onUiLoaded(function() {
buttonShowAllPages.addEventListener("click", settingsShowAllTabs);
});
+
+
+onOptionsChanged(function() {
+ if (gradioApp().querySelector('#settings .settings-category')) return;
+
+ var sectionMap = {};
+ gradioApp().querySelectorAll('#settings > div > button').forEach(function(x) {
+ sectionMap[x.textContent.trim()] = x;
+ });
+
+ opts._categories.forEach(function(x) {
+ var section = x[0];
+ var category = x[1];
+
+ var span = document.createElement('SPAN');
+ span.textContent = category;
+ span.className = 'settings-category';
+
+ var sectionElem = sectionMap[section];
+ if (!sectionElem) return;
+
+ sectionElem.parentElement.insertBefore(span, sectionElem);
+ });
+});
+
diff --git a/javascript/ui.js b/javascript/ui.js
index 2e262602..18c9f891 100644
--- a/javascript/ui.js
+++ b/javascript/ui.js
@@ -170,6 +170,23 @@ function submit_img2img() {
return res;
}
+function submit_extras() {
+ showSubmitButtons('extras', false);
+
+ var id = randomId();
+
+ requestProgress(id, gradioApp().getElementById('extras_gallery_container'), gradioApp().getElementById('extras_gallery'), function() {
+ showSubmitButtons('extras', true);
+ });
+
+ var res = create_submit_args(arguments);
+
+ res[0] = id;
+
+ console.log(res);
+ return res;
+}
+
function restoreProgressTxt2img() {
showRestoreProgressButton("txt2img", false);
var id = localGet("txt2img_task_id");
@@ -198,9 +215,33 @@ function restoreProgressImg2img() {
}
+/**
+ * Configure the width and height elements on `tabname` to accept
+ * pasting of resolutions in the form of "width x height".
+ */
+function setupResolutionPasting(tabname) {
+ var width = gradioApp().querySelector(`#${tabname}_width input[type=number]`);
+ var height = gradioApp().querySelector(`#${tabname}_height input[type=number]`);
+ for (const el of [width, height]) {
+ el.addEventListener('paste', function(event) {
+ var pasteData = event.clipboardData.getData('text/plain');
+ var parsed = pasteData.match(/^\s*(\d+)\D+(\d+)\s*$/);
+ if (parsed) {
+ width.value = parsed[1];
+ height.value = parsed[2];
+ updateInput(width);
+ updateInput(height);
+ event.preventDefault();
+ }
+ });
+ }
+}
+
onUiLoaded(function() {
showRestoreProgressButton('txt2img', localGet("txt2img_task_id"));
showRestoreProgressButton('img2img', localGet("img2img_task_id"));
+ setupResolutionPasting('txt2img');
+ setupResolutionPasting('img2img');
});
diff --git a/modules/api/api.py b/modules/api/api.py
index 09083874..b3d74e51 100644
--- a/modules/api/api.py
+++ b/modules/api/api.py
@@ -22,7 +22,6 @@ from modules.api import models
from modules.shared import opts
from modules.processing import StableDiffusionProcessingTxt2Img, StableDiffusionProcessingImg2Img, process_images
from modules.textual_inversion.textual_inversion import create_embedding, train_embedding
-from modules.textual_inversion.preprocess import preprocess
from modules.hypernetworks.hypernetwork import create_hypernetwork, train_hypernetwork
from PIL import PngImagePlugin, Image
from modules.sd_models_config import find_checkpoint_config_near_filename
@@ -235,7 +234,6 @@ class Api:
self.add_api_route("/sdapi/v1/refresh-vae", self.refresh_vae, methods=["POST"])
self.add_api_route("/sdapi/v1/create/embedding", self.create_embedding, methods=["POST"], response_model=models.CreateResponse)
self.add_api_route("/sdapi/v1/create/hypernetwork", self.create_hypernetwork, methods=["POST"], response_model=models.CreateResponse)
- self.add_api_route("/sdapi/v1/preprocess", self.preprocess, methods=["POST"], response_model=models.PreprocessResponse)
self.add_api_route("/sdapi/v1/train/embedding", self.train_embedding, methods=["POST"], response_model=models.TrainResponse)
self.add_api_route("/sdapi/v1/train/hypernetwork", self.train_hypernetwork, methods=["POST"], response_model=models.TrainResponse)
self.add_api_route("/sdapi/v1/memory", self.get_memory, methods=["GET"], response_model=models.MemoryResponse)
@@ -675,19 +673,6 @@ class Api:
finally:
shared.state.end()
- def preprocess(self, args: dict):
- try:
- shared.state.begin(job="preprocess")
- preprocess(**args) # quick operation unless blip/booru interrogation is enabled
- shared.state.end()
- return models.PreprocessResponse(info='preprocess complete')
- except KeyError as e:
- return models.PreprocessResponse(info=f"preprocess error: invalid token: {e}")
- except Exception as e:
- return models.PreprocessResponse(info=f"preprocess error: {e}")
- finally:
- shared.state.end()
-
def train_embedding(self, args: dict):
try:
shared.state.begin(job="train_embedding")
diff --git a/modules/api/models.py b/modules/api/models.py
index a0d80af8..33894b3e 100644
--- a/modules/api/models.py
+++ b/modules/api/models.py
@@ -202,9 +202,6 @@ class TrainResponse(BaseModel):
class CreateResponse(BaseModel):
info: str = Field(title="Create info", description="Response string from create embedding or hypernetwork task.")
-class PreprocessResponse(BaseModel):
- info: str = Field(title="Preprocess info", description="Response string from preprocessing task.")
-
fields = {}
for key, metadata in opts.data_labels.items():
value = opts.data.get(key)
diff --git a/modules/cache.py b/modules/cache.py
index ff26a213..2d37e7b9 100644
--- a/modules/cache.py
+++ b/modules/cache.py
@@ -32,7 +32,7 @@ def dump_cache():
with cache_lock:
cache_filename_tmp = cache_filename + "-"
with open(cache_filename_tmp, "w", encoding="utf8") as file:
- json.dump(cache_data, file, indent=4)
+ json.dump(cache_data, file, indent=4, ensure_ascii=False)
os.replace(cache_filename_tmp, cache_filename)
diff --git a/modules/cmd_args.py b/modules/cmd_args.py
index a9fb9bfa..da93eb26 100644
--- a/modules/cmd_args.py
+++ b/modules/cmd_args.py
@@ -70,6 +70,7 @@ parser.add_argument("--opt-sdp-no-mem-attention", action='store_true', help="pre
parser.add_argument("--disable-opt-split-attention", action='store_true', help="prefer no cross-attention layer optimization for automatic choice of optimization")
parser.add_argument("--disable-nan-check", action='store_true', help="do not check if produced images/latent spaces have nans; useful for running without a checkpoint in CI")
parser.add_argument("--use-cpu", nargs='+', help="use CPU as torch device for specified modules", default=[], type=str.lower)
+parser.add_argument("--use-ipex", action="store_true", help="use Intel XPU as torch device")
parser.add_argument("--disable-model-loading-ram-optimization", action='store_true', help="disable an optimization that reduces RAM use when loading a model")
parser.add_argument("--listen", action='store_true', help="launch gradio with 0.0.0.0 as server name, allowing to respond to network requests")
parser.add_argument("--port", type=int, help="launch gradio with given server port, you need root/admin rights for ports < 1024, defaults to 7860 if available", default=None)
diff --git a/modules/devices.py b/modules/devices.py
index 1d4eb563..ea1f712f 100644
--- a/modules/devices.py
+++ b/modules/devices.py
@@ -8,6 +8,13 @@ from modules import errors, shared
if sys.platform == "darwin":
from modules import mac_specific
+if shared.cmd_opts.use_ipex:
+ from modules import xpu_specific
+
+
+def has_xpu() -> bool:
+ return shared.cmd_opts.use_ipex and xpu_specific.has_xpu
+
def has_mps() -> bool:
if sys.platform != "darwin":
@@ -30,6 +37,9 @@ def get_optimal_device_name():
if has_mps():
return "mps"
+ if has_xpu():
+ return xpu_specific.get_xpu_device_string()
+
return "cpu"
@@ -38,7 +48,7 @@ def get_optimal_device():
def get_device_for(task):
- if task in shared.cmd_opts.use_cpu:
+ if task in shared.cmd_opts.use_cpu or "all" in shared.cmd_opts.use_cpu:
return cpu
return get_optimal_device()
@@ -54,6 +64,9 @@ def torch_gc():
if has_mps():
mac_specific.torch_mps_gc()
+ if has_xpu():
+ xpu_specific.torch_xpu_gc()
+
def enable_tf32():
if torch.cuda.is_available():
diff --git a/modules/generation_parameters_copypaste.py b/modules/generation_parameters_copypaste.py
index 0a606515..4efe53e0 100644
--- a/modules/generation_parameters_copypaste.py
+++ b/modules/generation_parameters_copypaste.py
@@ -1,3 +1,4 @@
+from __future__ import annotations
import base64
import io
import json
@@ -15,9 +16,6 @@ re_imagesize = re.compile(r"^(\d+)x(\d+)$")
re_hypernet_hash = re.compile("\(([0-9a-f]+)\)$")
type_of_gr_update = type(gr.update())
-paste_fields = {}
-registered_param_bindings = []
-
class ParamBinding:
def __init__(self, paste_button, tabname, source_text_component=None, source_image_component=None, source_tabname=None, override_settings_component=None, paste_field_names=None):
@@ -30,6 +28,10 @@ class ParamBinding:
self.paste_field_names = paste_field_names or []
+paste_fields: dict[str, dict] = {}
+registered_param_bindings: list[ParamBinding] = []
+
+
def reset():
paste_fields.clear()
registered_param_bindings.clear()
@@ -113,7 +115,6 @@ def register_paste_params_button(binding: ParamBinding):
def connect_paste_params_buttons():
- binding: ParamBinding
for binding in registered_param_bindings:
destination_image_component = paste_fields[binding.tabname]["init_img"]
fields = paste_fields[binding.tabname]["fields"]
@@ -313,6 +314,9 @@ Steps: 20, Sampler: Euler a, CFG scale: 7, Seed: 965400086, Size: 512x512, Model
if "VAE Decoder" not in res:
res["VAE Decoder"] = "Full"
+ skip = set(shared.opts.infotext_skip_pasting)
+ res = {k: v for k, v in res.items() if k not in skip}
+
return res
@@ -443,3 +447,4 @@ def connect_paste(button, paste_fields, input_comp, override_settings_component,
outputs=[],
show_progress=False,
)
+
diff --git a/modules/gradio_extensons.py b/modules/gradio_extensons.py
index e6b6835a..7d88dc98 100644
--- a/modules/gradio_extensons.py
+++ b/modules/gradio_extensons.py
@@ -47,10 +47,20 @@ def Block_get_config(self):
def BlockContext_init(self, *args, **kwargs):
+ if scripts.scripts_current is not None:
+ scripts.scripts_current.before_component(self, **kwargs)
+
+ scripts.script_callbacks.before_component_callback(self, **kwargs)
+
res = original_BlockContext_init(self, *args, **kwargs)
add_classes_to_gradio_component(self)
+ scripts.script_callbacks.after_component_callback(self, **kwargs)
+
+ if scripts.scripts_current is not None:
+ scripts.scripts_current.after_component(self, **kwargs)
+
return res
diff --git a/modules/images.py b/modules/images.py
index daf4eebe..16f9ae7c 100644
--- a/modules/images.py
+++ b/modules/images.py
@@ -791,3 +791,4 @@ def flatten(img, bgcolor):
img = background
return img.convert('RGB')
+
diff --git a/modules/import_hook.py b/modules/import_hook.py
index 28c67dfa..eba9a372 100644
--- a/modules/import_hook.py
+++ b/modules/import_hook.py
@@ -3,3 +3,14 @@ import sys
# this will break any attempt to import xformers which will prevent stability diffusion repo from trying to use it
if "--xformers" not in "".join(sys.argv):
sys.modules["xformers"] = None
+
+# Hack to fix a changed import in torchvision 0.17+, which otherwise breaks
+# basicsr; see https://github.com/AUTOMATIC1111/stable-diffusion-webui/issues/13985
+try:
+ import torchvision.transforms.functional_tensor # noqa: F401
+except ImportError:
+ try:
+ import torchvision.transforms.functional as functional
+ sys.modules["torchvision.transforms.functional_tensor"] = functional
+ except ImportError:
+ pass # shrug...
diff --git a/modules/launch_utils.py b/modules/launch_utils.py
index 1f2b6c5e..2c54e2a0 100644
--- a/modules/launch_utils.py
+++ b/modules/launch_utils.py
@@ -6,6 +6,7 @@ import os
import shutil
import sys
import importlib.util
+import importlib.metadata
import platform
import json
from functools import lru_cache
@@ -119,11 +120,16 @@ def run(command, desc=None, errdesc=None, custom_env=None, live: bool = default_
def is_installed(package):
try:
- spec = importlib.util.find_spec(package)
- except ModuleNotFoundError:
- return False
+ dist = importlib.metadata.distribution(package)
+ except importlib.metadata.PackageNotFoundError:
+ try:
+ spec = importlib.util.find_spec(package)
+ except ModuleNotFoundError:
+ return False
+
+ return spec is not None
- return spec is not None
+ return dist is not None
def repo_dir(name):
@@ -310,6 +316,26 @@ def requirements_met(requirements_file):
def prepare_environment():
torch_index_url = os.environ.get('TORCH_INDEX_URL', "https://download.pytorch.org/whl/cu121")
torch_command = os.environ.get('TORCH_COMMAND', f"pip install torch==2.1.0 torchvision==0.16.0 --extra-index-url {torch_index_url}")
+ if args.use_ipex:
+ if platform.system() == "Windows":
+ # The "Nuullll/intel-extension-for-pytorch" wheels were built from IPEX source for Intel Arc GPU: https://github.com/intel/intel-extension-for-pytorch/tree/xpu-main
+ # This is NOT an Intel official release so please use it at your own risk!!
+ # See https://github.com/Nuullll/intel-extension-for-pytorch/releases/tag/v2.0.110%2Bxpu-master%2Bdll-bundle for details.
+ #
+ # Strengths (over official IPEX 2.0.110 windows release):
+ # - AOT build (for Arc GPU only) to eliminate JIT compilation overhead: https://github.com/intel/intel-extension-for-pytorch/issues/399
+ # - Bundles minimal oneAPI 2023.2 dependencies into the python wheels, so users don't need to install oneAPI for the whole system.
+ # - Provides a compatible torchvision wheel: https://github.com/intel/intel-extension-for-pytorch/issues/465
+ # Limitation:
+ # - Only works for python 3.10
+ url_prefix = "https://github.com/Nuullll/intel-extension-for-pytorch/releases/download/v2.0.110%2Bxpu-master%2Bdll-bundle"
+ torch_command = os.environ.get('TORCH_COMMAND', f"pip install {url_prefix}/torch-2.0.0a0+gite9ebda2-cp310-cp310-win_amd64.whl {url_prefix}/torchvision-0.15.2a0+fa99a53-cp310-cp310-win_amd64.whl {url_prefix}/intel_extension_for_pytorch-2.0.110+gitc6ea20b-cp310-cp310-win_amd64.whl")
+ else:
+ # Using official IPEX release for linux since it's already an AOT build.
+ # However, users still have to install oneAPI toolkit and activate oneAPI environment manually.
+ # See https://intel.github.io/intel-extension-for-pytorch/index.html#installation for details.
+ torch_index_url = os.environ.get('TORCH_INDEX_URL', "https://pytorch-extension.intel.com/release-whl/stable/xpu/us/")
+ torch_command = os.environ.get('TORCH_COMMAND', f"pip install torch==2.0.0a0 intel-extension-for-pytorch==2.0.110+gitba7f6c1 --extra-index-url {torch_index_url}")
requirements_file = os.environ.get('REQS_FILE', "requirements_versions.txt")
xformers_package = os.environ.get('XFORMERS_PACKAGE', 'xformers==0.0.22.post7')
@@ -352,6 +378,8 @@ def prepare_environment():
run(f'"{python}" -m {torch_command}', "Installing torch and torchvision", "Couldn't install torch", live=True)
startup_timer.record("install torch")
+ if args.use_ipex:
+ args.skip_torch_cuda_test = True
if not args.skip_torch_cuda_test and not check_run_python("import torch; assert torch.cuda.is_available()"):
raise RuntimeError(
'Torch is not able to use GPU; '
diff --git a/modules/mac_specific.py b/modules/mac_specific.py
index 89256c5b..d96d86d7 100644
--- a/modules/mac_specific.py
+++ b/modules/mac_specific.py
@@ -1,6 +1,7 @@
import logging
import torch
+from torch import Tensor
import platform
from modules.sd_hijack_utils import CondFunc
from packaging import version
@@ -51,6 +52,17 @@ def cumsum_fix(input, cumsum_func, *args, **kwargs):
return cumsum_func(input, *args, **kwargs)
+# MPS workaround for https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/14046
+def interpolate_with_fp32_fallback(orig_func, *args, **kwargs) -> Tensor:
+ try:
+ return orig_func(*args, **kwargs)
+ except RuntimeError as e:
+ if "not implemented for" in str(e) and "Half" in str(e):
+ input_tensor = args[0]
+ return orig_func(input_tensor.to(torch.float32), *args[1:], **kwargs).to(input_tensor.dtype)
+ else:
+ print(f"An unexpected RuntimeError occurred: {str(e)}")
+
if has_mps:
if platform.mac_ver()[0].startswith("13.2."):
# MPS workaround for https://github.com/pytorch/pytorch/issues/95188, thanks to danieldk (https://github.com/explosion/curated-transformers/pull/124)
@@ -77,6 +89,9 @@ if has_mps:
# MPS workaround for https://github.com/pytorch/pytorch/issues/96113
CondFunc('torch.nn.functional.layer_norm', lambda orig_func, x, normalized_shape, weight, bias, eps, **kwargs: orig_func(x.float(), normalized_shape, weight.float() if weight is not None else None, bias.float() if bias is not None else bias, eps).to(x.dtype), lambda _, input, *args, **kwargs: len(args) == 4 and input.device.type == 'mps')
+ # MPS workaround for https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/14046
+ CondFunc('torch.nn.functional.interpolate', interpolate_with_fp32_fallback, None)
+
# MPS workaround for https://github.com/pytorch/pytorch/issues/92311
if platform.processor() == 'i386':
for funcName in ['torch.argmax', 'torch.Tensor.argmax']:
diff --git a/modules/models/diffusion/ddpm_edit.py b/modules/models/diffusion/ddpm_edit.py
index b892d5fc..6db340da 100644
--- a/modules/models/diffusion/ddpm_edit.py
+++ b/modules/models/diffusion/ddpm_edit.py
@@ -24,10 +24,15 @@ from pytorch_lightning.utilities.distributed import rank_zero_only
from ldm.util import log_txt_as_img, exists, default, ismap, isimage, mean_flat, count_params, instantiate_from_config
from ldm.modules.ema import LitEma
from ldm.modules.distributions.distributions import normal_kl, DiagonalGaussianDistribution
-from ldm.models.autoencoder import VQModelInterface, IdentityFirstStage, AutoencoderKL
+from ldm.models.autoencoder import IdentityFirstStage, AutoencoderKL
from ldm.modules.diffusionmodules.util import make_beta_schedule, extract_into_tensor, noise_like
from ldm.models.diffusion.ddim import DDIMSampler
+try:
+ from ldm.models.autoencoder import VQModelInterface
+except Exception:
+ class VQModelInterface:
+ pass
__conditioning_keys__ = {'concat': 'c_concat',
'crossattn': 'c_crossattn',
diff --git a/modules/options.py b/modules/options.py
index 7703d80e..4fead690 100644
--- a/modules/options.py
+++ b/modules/options.py
@@ -1,5 +1,6 @@
import json
import sys
+from dataclasses import dataclass
import gradio as gr
@@ -8,13 +9,14 @@ from modules.shared_cmd_options import cmd_opts
class OptionInfo:
- def __init__(self, default=None, label="", component=None, component_args=None, onchange=None, section=None, refresh=None, comment_before='', comment_after='', infotext=None, restrict_api=False):
+ def __init__(self, default=None, label="", component=None, component_args=None, onchange=None, section=None, refresh=None, comment_before='', comment_after='', infotext=None, restrict_api=False, category_id=None):
self.default = default
self.label = label
self.component = component
self.component_args = component_args
self.onchange = onchange
self.section = section
+ self.category_id = category_id
self.refresh = refresh
self.do_not_save = False
@@ -63,7 +65,11 @@ class OptionHTML(OptionInfo):
def options_section(section_identifier, options_dict):
for v in options_dict.values():
- v.section = section_identifier
+ if len(section_identifier) == 2:
+ v.section = section_identifier
+ elif len(section_identifier) == 3:
+ v.section = section_identifier[0:2]
+ v.category_id = section_identifier[2]
return options_dict
@@ -158,7 +164,7 @@ class Options:
assert not cmd_opts.freeze_settings, "saving settings is disabled"
with open(filename, "w", encoding="utf8") as file:
- json.dump(self.data, file, indent=4)
+ json.dump(self.data, file, indent=4, ensure_ascii=False)
def same_type(self, x, y):
if x is None or y is None:
@@ -206,6 +212,17 @@ class Options:
d = {k: self.data.get(k, v.default) for k, v in self.data_labels.items()}
d["_comments_before"] = {k: v.comment_before for k, v in self.data_labels.items() if v.comment_before is not None}
d["_comments_after"] = {k: v.comment_after for k, v in self.data_labels.items() if v.comment_after is not None}
+
+ item_categories = {}
+ for item in self.data_labels.values():
+ category = categories.mapping.get(item.category_id)
+ category = "Uncategorized" if category is None else category.label
+ if category not in item_categories:
+ item_categories[category] = item.section[1]
+
+ # _categories is a list of pairs: [section, category]. Each section (a setting page) will get a special heading above it with the category as text.
+ d["_categories"] = [[v, k] for k, v in item_categories.items()] + [["Defaults", "Other"]]
+
return json.dumps(d)
def add_option(self, key, info):
@@ -214,15 +231,40 @@ class Options:
self.data[key] = info.default
def reorder(self):
- """reorder settings so that all items related to section always go together"""
+ """Reorder settings so that:
+ - all items related to section always go together
+ - all sections belonging to a category go together
+ - sections inside a category are ordered alphabetically
+ - categories are ordered by creation order
+
+ Category is a superset of sections: for category "postprocessing" there could be multiple sections: "face restoration", "upscaling".
+
+ This function also changes items' category_id so that all items belonging to a section have the same category_id.
+ """
+
+ category_ids = {}
+ section_categories = {}
- section_ids = {}
settings_items = self.data_labels.items()
for _, item in settings_items:
- if item.section not in section_ids:
- section_ids[item.section] = len(section_ids)
+ if item.section not in section_categories:
+ section_categories[item.section] = item.category_id
+
+ for _, item in settings_items:
+ item.category_id = section_categories.get(item.section)
+
+ for category_id in categories.mapping:
+ if category_id not in category_ids:
+ category_ids[category_id] = len(category_ids)
- self.data_labels = dict(sorted(settings_items, key=lambda x: section_ids[x[1].section]))
+ def sort_key(x):
+ item: OptionInfo = x[1]
+ category_order = category_ids.get(item.category_id, len(category_ids))
+ section_order = item.section[1]
+
+ return category_order, section_order
+
+ self.data_labels = dict(sorted(settings_items, key=sort_key))
def cast_value(self, key, value):
"""casts an arbitrary to the same type as this setting's value with key
@@ -245,3 +287,22 @@ class Options:
value = expected_type(value)
return value
+
+
+@dataclass
+class OptionsCategory:
+ id: str
+ label: str
+
+class OptionsCategories:
+ def __init__(self):
+ self.mapping = {}
+
+ def register_category(self, category_id, label):
+ if category_id in self.mapping:
+ return category_id
+
+ self.mapping[category_id] = OptionsCategory(category_id, label)
+
+
+categories = OptionsCategories()
diff --git a/modules/postprocessing.py b/modules/postprocessing.py
index fd0c0cc9..0c59fad4 100644
--- a/modules/postprocessing.py
+++ b/modules/postprocessing.py
@@ -29,11 +29,7 @@ def run_postprocessing(extras_mode, image, image_folder, input_dir, output_dir,
image_list = shared.listfiles(input_dir)
for filename in image_list:
- try:
- image = Image.open(filename)
- except Exception:
- continue
- yield image, filename
+ yield filename, filename
else:
assert image, 'image not selected'
yield image, None
@@ -45,35 +41,85 @@ def run_postprocessing(extras_mode, image, image_folder, input_dir, output_dir,
infotext = ''
- for image_data, name in get_images(extras_mode, image, image_folder, input_dir):
+ data_to_process = list(get_images(extras_mode, image, image_folder, input_dir))
+ shared.state.job_count = len(data_to_process)
+
+ for image_placeholder, name in data_to_process:
image_data: Image.Image
+ shared.state.nextjob()
shared.state.textinfo = name
+ shared.state.skipped = False
+
+ if shared.state.interrupted:
+ break
+
+ if isinstance(image_placeholder, str):
+ try:
+ image_data = Image.open(image_placeholder)
+ except Exception:
+ continue
+ else:
+ image_data = image_placeholder
+
+ shared.state.assign_current_image(image_data)
parameters, existing_pnginfo = images.read_info_from_image(image_data)
if parameters:
existing_pnginfo["parameters"] = parameters
- pp = scripts_postprocessing.PostprocessedImage(image_data.convert("RGB"))
+ initial_pp = scripts_postprocessing.PostprocessedImage(image_data.convert("RGB"))
- scripts.scripts_postproc.run(pp, args)
+ scripts.scripts_postproc.run(initial_pp, args)
- if opts.use_original_name_batch and name is not None:
- basename = os.path.splitext(os.path.basename(name))[0]
- else:
- basename = ''
+ if shared.state.skipped:
+ continue
+
+ used_suffixes = {}
+ for pp in [initial_pp, *initial_pp.extra_images]:
+ suffix = pp.get_suffix(used_suffixes)
- infotext = ", ".join([k if k == v else f'{k}: {generation_parameters_copypaste.quote(v)}' for k, v in pp.info.items() if v is not None])
+ if opts.use_original_name_batch and name is not None:
+ basename = os.path.splitext(os.path.basename(name))[0]
+ forced_filename = basename + suffix
+ else:
+ basename = ''
+ forced_filename = None
- if opts.enable_pnginfo:
- pp.image.info = existing_pnginfo
- pp.image.info["postprocessing"] = infotext
+ infotext = ", ".join([k if k == v else f'{k}: {generation_parameters_copypaste.quote(v)}' for k, v in pp.info.items() if v is not None])
- if save_output:
- images.save_image(pp.image, path=outpath, basename=basename, seed=None, prompt=None, extension=opts.samples_format, info=infotext, short_filename=True, no_prompt=True, grid=False, pnginfo_section_name="extras", existing_info=existing_pnginfo, forced_filename=None)
+ if opts.enable_pnginfo:
+ pp.image.info = existing_pnginfo
+ pp.image.info["postprocessing"] = infotext
- if extras_mode != 2 or show_extras_results:
- outputs.append(pp.image)
+ if save_output:
+ fullfn, _ = images.save_image(pp.image, path=outpath, basename=basename, extension=opts.samples_format, info=infotext, short_filename=True, no_prompt=True, grid=False, pnginfo_section_name="extras", existing_info=existing_pnginfo, forced_filename=forced_filename, suffix=suffix)
+
+ if pp.caption:
+ caption_filename = os.path.splitext(fullfn)[0] + ".txt"
+ if os.path.isfile(caption_filename):
+ with open(caption_filename, encoding="utf8") as file:
+ existing_caption = file.read().strip()
+ else:
+ existing_caption = ""
+
+ action = shared.opts.postprocessing_existing_caption_action
+ if action == 'Prepend' and existing_caption:
+ caption = f"{existing_caption} {pp.caption}"
+ elif action == 'Append' and existing_caption:
+ caption = f"{pp.caption} {existing_caption}"
+ elif action == 'Keep' and existing_caption:
+ caption = existing_caption
+ else:
+ caption = pp.caption
+
+ caption = caption.strip()
+ if caption:
+ with open(caption_filename, "w", encoding="utf8") as file:
+ file.write(caption)
+
+ if extras_mode != 2 or show_extras_results:
+ outputs.append(pp.image)
image_data.close()
@@ -82,6 +128,10 @@ def run_postprocessing(extras_mode, image, image_folder, input_dir, output_dir,
return outputs, ui_common.plaintext_to_html(infotext), ''
+def run_postprocessing_webui(id_task, *args, **kwargs):
+ return run_postprocessing(*args, **kwargs)
+
+
def run_extras(extras_mode, resize_mode, image, image_folder, input_dir, output_dir, show_extras_results, gfpgan_visibility, codeformer_visibility, codeformer_weight, upscaling_resize, upscaling_resize_w, upscaling_resize_h, upscaling_crop, extras_upscaler_1, extras_upscaler_2, extras_upscaler_2_visibility, upscale_first: bool, save_output: bool = True):
"""old handler for API"""
@@ -97,9 +147,11 @@ def run_extras(extras_mode, resize_mode, image, image_folder, input_dir, output_
"upscaler_2_visibility": extras_upscaler_2_visibility,
},
"GFPGAN": {
+ "enable": True,
"gfpgan_visibility": gfpgan_visibility,
},
"CodeFormer": {
+ "enable": True,
"codeformer_visibility": codeformer_visibility,
"codeformer_weight": codeformer_weight,
},
diff --git a/modules/processing.py b/modules/processing.py
index ac58ef86..bea01ec6 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -62,18 +62,22 @@ def apply_color_correction(correction, original_image):
return image.convert('RGB')
-def apply_overlay(image, paste_loc, index, overlays):
- if overlays is None or index >= len(overlays):
- return image
+def uncrop(image, dest_size, paste_loc):
+ x, y, w, h = paste_loc
+ base_image = Image.new('RGBA', dest_size)
+ image = images.resize_image(1, image, w, h)
+ base_image.paste(image, (x, y))
+ image = base_image
+
+ return image
- overlay = overlays[index]
+
+def apply_overlay(image, paste_loc, overlay):
+ if overlay is None:
+ return image
if paste_loc is not None:
- x, y, w, h = paste_loc
- base_image = Image.new('RGBA', (overlay.width, overlay.height))
- image = images.resize_image(1, image, w, h)
- base_image.paste(image, (x, y))
- image = base_image
+ image = uncrop(image, (overlay.width, overlay.height), paste_loc)
image = image.convert('RGBA')
image.alpha_composite(overlay)
@@ -81,9 +85,12 @@ def apply_overlay(image, paste_loc, index, overlays):
return image
-def create_binary_mask(image):
+def create_binary_mask(image, round=True):
if image.mode == 'RGBA' and image.getextrema()[-1] != (255, 255):
- image = image.split()[-1].convert("L").point(lambda x: 255 if x > 128 else 0)
+ if round:
+ image = image.split()[-1].convert("L").point(lambda x: 255 if x > 128 else 0)
+ else:
+ image = image.split()[-1].convert("L")
else:
image = image.convert('L')
return image
@@ -308,7 +315,7 @@ class StableDiffusionProcessing:
c_adm = torch.cat((c_adm, noise_level_emb), 1)
return c_adm
- def inpainting_image_conditioning(self, source_image, latent_image, image_mask=None):
+ def inpainting_image_conditioning(self, source_image, latent_image, image_mask=None, round_image_mask=True):
self.is_using_inpainting_conditioning = True
# Handle the different mask inputs
@@ -320,8 +327,10 @@ class StableDiffusionProcessing:
conditioning_mask = conditioning_mask.astype(np.float32) / 255.0
conditioning_mask = torch.from_numpy(conditioning_mask[None, None])
- # Inpainting model uses a discretized mask as input, so we round to either 1.0 or 0.0
- conditioning_mask = torch.round(conditioning_mask)
+ if round_image_mask:
+ # Caller is requesting a discretized mask as input, so we round to either 1.0 or 0.0
+ conditioning_mask = torch.round(conditioning_mask)
+
else:
conditioning_mask = source_image.new_ones(1, 1, *source_image.shape[-2:])
@@ -345,7 +354,7 @@ class StableDiffusionProcessing:
return image_conditioning
- def img2img_image_conditioning(self, source_image, latent_image, image_mask=None):
+ def img2img_image_conditioning(self, source_image, latent_image, image_mask=None, round_image_mask=True):
source_image = devices.cond_cast_float(source_image)
# HACK: Using introspection as the Depth2Image model doesn't appear to uniquely
@@ -357,7 +366,7 @@ class StableDiffusionProcessing:
return self.edit_image_conditioning(source_image)
if self.sampler.conditioning_key in {'hybrid', 'concat'}:
- return self.inpainting_image_conditioning(source_image, latent_image, image_mask=image_mask)
+ return self.inpainting_image_conditioning(source_image, latent_image, image_mask=image_mask, round_image_mask=round_image_mask)
if self.sampler.conditioning_key == "crossattn-adm":
return self.unclip_image_conditioning(source_image)
@@ -679,8 +688,8 @@ def create_infotext(p, all_prompts, all_seeds, all_subseeds, comments=None, iter
"Size": f"{p.width}x{p.height}",
"Model hash": p.sd_model_hash if opts.add_model_hash_to_info else None,
"Model": p.sd_model_name if opts.add_model_name_to_info else None,
- "VAE hash": p.sd_vae_hash if opts.add_model_hash_to_info else None,
- "VAE": p.sd_vae_name if opts.add_model_name_to_info else None,
+ "VAE hash": p.sd_vae_hash if opts.add_vae_hash_to_info else None,
+ "VAE": p.sd_vae_name if opts.add_vae_name_to_info else None,
"Variation seed": (None if p.subseed_strength == 0 else (p.all_subseeds[0] if use_main_prompt else all_subseeds[index])),
"Variation seed strength": (None if p.subseed_strength == 0 else p.subseed_strength),
"Seed resize from": (None if p.seed_resize_from_w <= 0 or p.seed_resize_from_h <= 0 else f"{p.seed_resize_from_w}x{p.seed_resize_from_h}"),
@@ -867,6 +876,11 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
with devices.without_autocast() if devices.unet_needs_upcast else devices.autocast():
samples_ddim = p.sample(conditioning=p.c, unconditional_conditioning=p.uc, seeds=p.seeds, subseeds=p.subseeds, subseed_strength=p.subseed_strength, prompts=p.prompts)
+ if p.scripts is not None:
+ ps = scripts.PostSampleArgs(samples_ddim)
+ p.scripts.post_sample(p, ps)
+ samples_ddim = ps.samples
+
if getattr(samples_ddim, 'already_decoded', False):
x_samples_ddim = samples_ddim
else:
@@ -922,13 +936,31 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
pp = scripts.PostprocessImageArgs(image)
p.scripts.postprocess_image(p, pp)
image = pp.image
+
+ mask_for_overlay = getattr(p, "mask_for_overlay", None)
+ overlay_image = p.overlay_images[i] if getattr(p, "overlay_images", None) is not None and i < len(p.overlay_images) else None
+
+ if p.scripts is not None:
+ ppmo = scripts.PostProcessMaskOverlayArgs(i, mask_for_overlay, overlay_image)
+ p.scripts.postprocess_maskoverlay(p, ppmo)
+ mask_for_overlay, overlay_image = ppmo.mask_for_overlay, ppmo.overlay_image
+
if p.color_corrections is not None and i < len(p.color_corrections):
if save_samples and opts.save_images_before_color_correction:
- image_without_cc = apply_overlay(image, p.paste_to, i, p.overlay_images)
+ image_without_cc = apply_overlay(image, p.paste_to, overlay_image)
images.save_image(image_without_cc, p.outpath_samples, "", p.seeds[i], p.prompts[i], opts.samples_format, info=infotext(i), p=p, suffix="-before-color-correction")
image = apply_color_correction(p.color_corrections[i], image)
- image = apply_overlay(image, p.paste_to, i, p.overlay_images)
+ # If the intention is to show the output from the model
+ # that is being composited over the original image,
+ # we need to keep the original image around
+ # and use it in the composite step.
+ original_denoised_image = image.copy()
+
+ if p.paste_to is not None:
+ original_denoised_image = uncrop(original_denoised_image, (overlay_image.width, overlay_image.height), p.paste_to)
+
+ image = apply_overlay(image, p.paste_to, overlay_image)
if save_samples:
images.save_image(image, p.outpath_samples, "", p.seeds[i], p.prompts[i], opts.samples_format, info=infotext(i), p=p)
@@ -938,21 +970,21 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
if opts.enable_pnginfo:
image.info["parameters"] = text
output_images.append(image)
- if save_samples and hasattr(p, 'mask_for_overlay') and p.mask_for_overlay and any([opts.save_mask, opts.save_mask_composite, opts.return_mask, opts.return_mask_composite]):
- image_mask = p.mask_for_overlay.convert('RGB')
- image_mask_composite = Image.composite(image.convert('RGBA').convert('RGBa'), Image.new('RGBa', image.size), images.resize_image(2, p.mask_for_overlay, image.width, image.height).convert('L')).convert('RGBA')
- if opts.save_mask:
- images.save_image(image_mask, p.outpath_samples, "", p.seeds[i], p.prompts[i], opts.samples_format, info=infotext(i), p=p, suffix="-mask")
-
- if opts.save_mask_composite:
- images.save_image(image_mask_composite, p.outpath_samples, "", p.seeds[i], p.prompts[i], opts.samples_format, info=infotext(i), p=p, suffix="-mask-composite")
-
- if opts.return_mask:
- output_images.append(image_mask)
-
- if opts.return_mask_composite:
- output_images.append(image_mask_composite)
+ if mask_for_overlay is not None:
+ if opts.return_mask or opts.save_mask:
+ image_mask = mask_for_overlay.convert('RGB')
+ if save_samples and opts.save_mask:
+ images.save_image(image_mask, p.outpath_samples, "", p.seeds[i], p.prompts[i], opts.samples_format, info=infotext(i), p=p, suffix="-mask")
+ if opts.return_mask:
+ output_images.append(image_mask)
+
+ if opts.return_mask_composite or opts.save_mask_composite:
+ image_mask_composite = Image.composite(original_denoised_image.convert('RGBA').convert('RGBa'), Image.new('RGBa', image.size), images.resize_image(2, mask_for_overlay, image.width, image.height).convert('L')).convert('RGBA')
+ if save_samples and opts.save_mask_composite:
+ images.save_image(image_mask_composite, p.outpath_samples, "", p.seeds[i], p.prompts[i], opts.samples_format, info=infotext(i), p=p, suffix="-mask-composite")
+ if opts.return_mask_composite:
+ output_images.append(image_mask_composite)
del x_samples_ddim
@@ -1352,6 +1384,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
mask_blur_x: int = 4
mask_blur_y: int = 4
mask_blur: int = None
+ mask_round: bool = True
inpainting_fill: int = 0
inpaint_full_res: bool = True
inpaint_full_res_padding: int = 0
@@ -1397,7 +1430,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
if image_mask is not None:
# image_mask is passed in as RGBA by Gradio to support alpha masks,
# but we still want to support binary masks.
- image_mask = create_binary_mask(image_mask)
+ image_mask = create_binary_mask(image_mask, round=self.mask_round)
if self.inpainting_mask_invert:
image_mask = ImageOps.invert(image_mask)
@@ -1504,7 +1537,8 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
latmask = init_mask.convert('RGB').resize((self.init_latent.shape[3], self.init_latent.shape[2]))
latmask = np.moveaxis(np.array(latmask, dtype=np.float32), 2, 0) / 255
latmask = latmask[0]
- latmask = np.around(latmask)
+ if self.mask_round:
+ latmask = np.around(latmask)
latmask = np.tile(latmask[None], (4, 1, 1))
self.mask = torch.asarray(1.0 - latmask).to(shared.device).type(self.sd_model.dtype)
@@ -1516,7 +1550,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
elif self.inpainting_fill == 3:
self.init_latent = self.init_latent * self.mask
- self.image_conditioning = self.img2img_image_conditioning(image * 2 - 1, self.init_latent, image_mask)
+ self.image_conditioning = self.img2img_image_conditioning(image * 2 - 1, self.init_latent, image_mask, self.mask_round)
def sample(self, conditioning, unconditional_conditioning, seeds, subseeds, subseed_strength, prompts):
x = self.rng.next()
@@ -1528,7 +1562,14 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
samples = self.sampler.sample_img2img(self, self.init_latent, x, conditioning, unconditional_conditioning, image_conditioning=self.image_conditioning)
if self.mask is not None:
- samples = samples * self.nmask + self.init_latent * self.mask
+ blended_samples = samples * self.nmask + self.init_latent * self.mask
+
+ if self.scripts is not None:
+ mba = scripts.MaskBlendArgs(samples, self.nmask, self.init_latent, self.mask, blended_samples)
+ self.scripts.on_mask_blend(self, mba)
+ blended_samples = mba.blended_latent
+
+ samples = blended_samples
del x
devices.torch_gc()
diff --git a/modules/scripts.py b/modules/scripts.py
index b0689a23..b6fcf96e 100644
--- a/modules/scripts.py
+++ b/modules/scripts.py
@@ -11,11 +11,31 @@ from modules import shared, paths, script_callbacks, extensions, script_loading,
AlwaysVisible = object()
+class MaskBlendArgs:
+ def __init__(self, current_latent, nmask, init_latent, mask, blended_latent, denoiser=None, sigma=None):
+ self.current_latent = current_latent
+ self.nmask = nmask
+ self.init_latent = init_latent
+ self.mask = mask
+ self.blended_latent = blended_latent
+
+ self.denoiser = denoiser
+ self.is_final_blend = denoiser is None
+ self.sigma = sigma
+
+class PostSampleArgs:
+ def __init__(self, samples):
+ self.samples = samples
class PostprocessImageArgs:
def __init__(self, image):
self.image = image
+class PostProcessMaskOverlayArgs:
+ def __init__(self, index, mask_for_overlay, overlay_image):
+ self.index = index
+ self.mask_for_overlay = mask_for_overlay
+ self.overlay_image = overlay_image
class PostprocessBatchListArgs:
def __init__(self, images):
@@ -206,6 +226,25 @@ class Script:
pass
+ def on_mask_blend(self, p, mba: MaskBlendArgs, *args):
+ """
+ Called in inpainting mode when the original content is blended with the inpainted content.
+ This is called at every step in the denoising process and once at the end.
+ If is_final_blend is true, this is called for the final blending stage.
+ Otherwise, denoiser and sigma are defined and may be used to inform the procedure.
+ """
+
+ pass
+
+ def post_sample(self, p, ps: PostSampleArgs, *args):
+ """
+ Called after the samples have been generated,
+ but before they have been decoded by the VAE, if applicable.
+ Check getattr(samples, 'already_decoded', False) to test if the images are decoded.
+ """
+
+ pass
+
def postprocess_image(self, p, pp: PostprocessImageArgs, *args):
"""
Called for every image after it has been generated.
@@ -213,6 +252,13 @@ class Script:
pass
+ def postprocess_maskoverlay(self, p, ppmo: PostProcessMaskOverlayArgs, *args):
+ """
+ Called for every image after it has been generated.
+ """
+
+ pass
+
def postprocess(self, p, processed, *args):
"""
This function is called after processing ends for AlwaysVisible scripts.
@@ -560,17 +606,25 @@ class ScriptRunner:
on_after.clear()
def create_script_ui(self, script):
- import modules.api.models as api_models
script.args_from = len(self.inputs)
script.args_to = len(self.inputs)
+ try:
+ self.create_script_ui_inner(script)
+ except Exception:
+ errors.report(f"Error creating UI for {script.name}: ", exc_info=True)
+
+ def create_script_ui_inner(self, script):
+ import modules.api.models as api_models
+
controls = wrap_call(script.ui, script.filename, "ui", script.is_img2img)
if controls is None:
return
script.name = wrap_call(script.title, script.filename, "title", default=script.filename).lower()
+
api_args = []
for control in controls:
@@ -759,6 +813,22 @@ class ScriptRunner:
except Exception:
errors.report(f"Error running postprocess_batch_list: {script.filename}", exc_info=True)
+ def post_sample(self, p, ps: PostSampleArgs):
+ for script in self.alwayson_scripts:
+ try:
+ script_args = p.script_args[script.args_from:script.args_to]
+ script.post_sample(p, ps, *script_args)
+ except Exception:
+ errors.report(f"Error running post_sample: {script.filename}", exc_info=True)
+
+ def on_mask_blend(self, p, mba: MaskBlendArgs):
+ for script in self.alwayson_scripts:
+ try:
+ script_args = p.script_args[script.args_from:script.args_to]
+ script.on_mask_blend(p, mba, *script_args)
+ except Exception:
+ errors.report(f"Error running post_sample: {script.filename}", exc_info=True)
+
def postprocess_image(self, p, pp: PostprocessImageArgs):
for script in self.alwayson_scripts:
try:
@@ -767,6 +837,14 @@ class ScriptRunner:
except Exception:
errors.report(f"Error running postprocess_image: {script.filename}", exc_info=True)
+ def postprocess_maskoverlay(self, p, ppmo: PostProcessMaskOverlayArgs):
+ for script in self.alwayson_scripts:
+ try:
+ script_args = p.script_args[script.args_from:script.args_to]
+ script.postprocess_maskoverlay(p, ppmo, *script_args)
+ except Exception:
+ errors.report(f"Error running postprocess_image: {script.filename}", exc_info=True)
+
def before_component(self, component, **kwargs):
for callback, script in self.on_before_component_elem_id.get(kwargs.get("elem_id"), []):
try:
diff --git a/modules/scripts_postprocessing.py b/modules/scripts_postprocessing.py
index bac1335d..901cad08 100644
--- a/modules/scripts_postprocessing.py
+++ b/modules/scripts_postprocessing.py
@@ -1,13 +1,56 @@
+import dataclasses
import os
import gradio as gr
from modules import errors, shared
+@dataclasses.dataclass
+class PostprocessedImageSharedInfo:
+ target_width: int = None
+ target_height: int = None
+
+
class PostprocessedImage:
def __init__(self, image):
self.image = image
self.info = {}
+ self.shared = PostprocessedImageSharedInfo()
+ self.extra_images = []
+ self.nametags = []
+ self.disable_processing = False
+ self.caption = None
+
+ def get_suffix(self, used_suffixes=None):
+ used_suffixes = {} if used_suffixes is None else used_suffixes
+ suffix = "-".join(self.nametags)
+ if suffix:
+ suffix = "-" + suffix
+
+ if suffix not in used_suffixes:
+ used_suffixes[suffix] = 1
+ return suffix
+
+ for i in range(1, 100):
+ proposed_suffix = suffix + "-" + str(i)
+
+ if proposed_suffix not in used_suffixes:
+ used_suffixes[proposed_suffix] = 1
+ return proposed_suffix
+
+ return suffix
+
+ def create_copy(self, new_image, *, nametags=None, disable_processing=False):
+ pp = PostprocessedImage(new_image)
+ pp.shared = self.shared
+ pp.nametags = self.nametags.copy()
+ pp.info = self.info.copy()
+ pp.disable_processing = disable_processing
+
+ if nametags is not None:
+ pp.nametags += nametags
+
+ return pp
class ScriptPostprocessing:
@@ -42,10 +85,17 @@ class ScriptPostprocessing:
pass
- def image_changed(self):
- pass
+ def process_firstpass(self, pp: PostprocessedImage, **args):
+ """
+ Called for all scripts before calling process(). Scripts can examine the image here and set fields
+ of the pp object to communicate things to other scripts.
+ args contains a dictionary with all values returned by components from ui()
+ """
+ pass
+ def image_changed(self):
+ pass
def wrap_call(func, filename, funcname, *args, default=None, **kwargs):
@@ -118,16 +168,42 @@ class ScriptPostprocessingRunner:
return inputs
def run(self, pp: PostprocessedImage, args):
- for script in self.scripts_in_preferred_order():
- shared.state.job = script.name
+ scripts = []
+ for script in self.scripts_in_preferred_order():
script_args = args[script.args_from:script.args_to]
process_args = {}
for (name, _component), value in zip(script.controls.items(), script_args):
process_args[name] = value
- script.process(pp, **process_args)
+ scripts.append((script, process_args))
+
+ for script, process_args in scripts:
+ script.process_firstpass(pp, **process_args)
+
+ all_images = [pp]
+
+ for script, process_args in scripts:
+ if shared.state.skipped:
+ break
+
+ shared.state.job = script.name
+
+ for single_image in all_images.copy():
+
+ if not single_image.disable_processing:
+ script.process(single_image, **process_args)
+
+ for extra_image in single_image.extra_images:
+ if not isinstance(extra_image, PostprocessedImage):
+ extra_image = single_image.create_copy(extra_image)
+
+ all_images.append(extra_image)
+
+ single_image.extra_images.clear()
+
+ pp.extra_images = all_images[1:]
def create_args_for_run(self, scripts_args):
if not self.ui_created:
diff --git a/modules/sd_disable_initialization.py b/modules/sd_disable_initialization.py
index 8863107a..273a7edd 100644
--- a/modules/sd_disable_initialization.py
+++ b/modules/sd_disable_initialization.py
@@ -215,7 +215,7 @@ class LoadStateDictOnMeta(ReplaceHelper):
would be on the meta device.
"""
- if state_dict == sd:
+ if state_dict is sd:
state_dict = {k: v.to(device="meta", dtype=v.dtype) for k, v in state_dict.items()}
original(module, state_dict, strict=strict)
diff --git a/modules/sd_hijack.py b/modules/sd_hijack.py
index 0157e19f..e139d996 100644
--- a/modules/sd_hijack.py
+++ b/modules/sd_hijack.py
@@ -38,8 +38,12 @@ ldm.models.diffusion.ddpm.print = shared.ldm_print
optimizers = []
current_optimizer: sd_hijack_optimizations.SdOptimization = None
-ldm_original_forward = patches.patch(__file__, ldm.modules.diffusionmodules.openaimodel.UNetModel, "forward", sd_unet.UNetModel_forward)
-sgm_original_forward = patches.patch(__file__, sgm.modules.diffusionmodules.openaimodel.UNetModel, "forward", sd_unet.UNetModel_forward)
+ldm_patched_forward = sd_unet.create_unet_forward(ldm.modules.diffusionmodules.openaimodel.UNetModel.forward)
+ldm_original_forward = patches.patch(__file__, ldm.modules.diffusionmodules.openaimodel.UNetModel, "forward", ldm_patched_forward)
+
+sgm_patched_forward = sd_unet.create_unet_forward(sgm.modules.diffusionmodules.openaimodel.UNetModel.forward)
+sgm_original_forward = patches.patch(__file__, sgm.modules.diffusionmodules.openaimodel.UNetModel, "forward", sgm_patched_forward)
+
def list_optimizers():
new_optimizers = script_callbacks.list_optimizers_callback()
@@ -303,8 +307,6 @@ class StableDiffusionModelHijack:
self.layers = None
self.clip = None
- sd_unet.original_forward = None
-
def apply_circular(self, enable):
if self.circular_enabled == enable:
diff --git a/modules/sd_models.py b/modules/sd_models.py
index 841402e8..9355f1e1 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -230,15 +230,19 @@ def select_checkpoint():
return checkpoint_info
-checkpoint_dict_replacements = {
+checkpoint_dict_replacements_sd1 = {
'cond_stage_model.transformer.embeddings.': 'cond_stage_model.transformer.text_model.embeddings.',
'cond_stage_model.transformer.encoder.': 'cond_stage_model.transformer.text_model.encoder.',
'cond_stage_model.transformer.final_layer_norm.': 'cond_stage_model.transformer.text_model.final_layer_norm.',
}
+checkpoint_dict_replacements_sd2_turbo = { # Converts SD 2.1 Turbo from SGM to LDM format.
+ 'conditioner.embedders.0.': 'cond_stage_model.',
+}
+
-def transform_checkpoint_dict_key(k):
- for text, replacement in checkpoint_dict_replacements.items():
+def transform_checkpoint_dict_key(k, replacements):
+ for text, replacement in replacements.items():
if k.startswith(text):
k = replacement + k[len(text):]
@@ -249,9 +253,14 @@ def get_state_dict_from_checkpoint(pl_sd):
pl_sd = pl_sd.pop("state_dict", pl_sd)
pl_sd.pop("state_dict", None)
+ is_sd2_turbo = 'conditioner.embedders.0.model.ln_final.weight' in pl_sd and pl_sd['conditioner.embedders.0.model.ln_final.weight'].size()[0] == 1024
+
sd = {}
for k, v in pl_sd.items():
- new_key = transform_checkpoint_dict_key(k)
+ if is_sd2_turbo:
+ new_key = transform_checkpoint_dict_key(k, checkpoint_dict_replacements_sd2_turbo)
+ else:
+ new_key = transform_checkpoint_dict_key(k, checkpoint_dict_replacements_sd1)
if new_key is not None:
sd[new_key] = v
diff --git a/modules/sd_samplers_cfg_denoiser.py b/modules/sd_samplers_cfg_denoiser.py
index b8101d38..eb9d5daf 100644
--- a/modules/sd_samplers_cfg_denoiser.py
+++ b/modules/sd_samplers_cfg_denoiser.py
@@ -56,6 +56,9 @@ class CFGDenoiser(torch.nn.Module):
self.sampler = sampler
self.model_wrap = None
self.p = None
+
+ # NOTE: masking before denoising can cause the original latents to be oversmoothed
+ # as the original latents do not have noise
self.mask_before_denoising = False
@property
@@ -105,8 +108,21 @@ class CFGDenoiser(torch.nn.Module):
assert not is_edit_model or all(len(conds) == 1 for conds in conds_list), "AND is not supported for InstructPix2Pix checkpoint (unless using Image CFG scale = 1.0)"
+ # If we use masks, blending between the denoised and original latent images occurs here.
+ def apply_blend(current_latent):
+ blended_latent = current_latent * self.nmask + self.init_latent * self.mask
+
+ if self.p.scripts is not None:
+ from modules import scripts
+ mba = scripts.MaskBlendArgs(current_latent, self.nmask, self.init_latent, self.mask, blended_latent, denoiser=self, sigma=sigma)
+ self.p.scripts.on_mask_blend(self.p, mba)
+ blended_latent = mba.blended_latent
+
+ return blended_latent
+
+ # Blend in the original latents (before)
if self.mask_before_denoising and self.mask is not None:
- x = self.init_latent * self.mask + self.nmask * x
+ x = apply_blend(x)
batch_size = len(conds_list)
repeats = [len(conds_list[i]) for i in range(batch_size)]
@@ -207,8 +223,9 @@ class CFGDenoiser(torch.nn.Module):
else:
denoised = self.combine_denoised(x_out, conds_list, uncond, cond_scale)
+ # Blend in the original latents (after)
if not self.mask_before_denoising and self.mask is not None:
- denoised = self.init_latent * self.mask + self.nmask * denoised
+ denoised = apply_blend(denoised)
self.sampler.last_latent = self.get_pred_x0(torch.cat([x_in[i:i + 1] for i in denoised_image_indexes]), torch.cat([x_out[i:i + 1] for i in denoised_image_indexes]), sigma)
diff --git a/modules/sd_samplers_timesteps_impl.py b/modules/sd_samplers_timesteps_impl.py
index a72daafd..930a64af 100644
--- a/modules/sd_samplers_timesteps_impl.py
+++ b/modules/sd_samplers_timesteps_impl.py
@@ -11,7 +11,7 @@ from modules.models.diffusion.uni_pc import uni_pc
def ddim(model, x, timesteps, extra_args=None, callback=None, disable=None, eta=0.0):
alphas_cumprod = model.inner_model.inner_model.alphas_cumprod
alphas = alphas_cumprod[timesteps]
- alphas_prev = alphas_cumprod[torch.nn.functional.pad(timesteps[:-1], pad=(1, 0))].to(torch.float64 if x.device.type != 'mps' else torch.float32)
+ alphas_prev = alphas_cumprod[torch.nn.functional.pad(timesteps[:-1], pad=(1, 0))].to(torch.float64 if x.device.type != 'mps' and x.device.type != 'xpu' else torch.float32)
sqrt_one_minus_alphas = torch.sqrt(1 - alphas)
sigmas = eta * np.sqrt((1 - alphas_prev.cpu().numpy()) / (1 - alphas.cpu()) * (1 - alphas.cpu() / alphas_prev.cpu().numpy()))
@@ -43,7 +43,7 @@ def ddim(model, x, timesteps, extra_args=None, callback=None, disable=None, eta=
def plms(model, x, timesteps, extra_args=None, callback=None, disable=None):
alphas_cumprod = model.inner_model.inner_model.alphas_cumprod
alphas = alphas_cumprod[timesteps]
- alphas_prev = alphas_cumprod[torch.nn.functional.pad(timesteps[:-1], pad=(1, 0))].to(torch.float64 if x.device.type != 'mps' else torch.float32)
+ alphas_prev = alphas_cumprod[torch.nn.functional.pad(timesteps[:-1], pad=(1, 0))].to(torch.float64 if x.device.type != 'mps' and x.device.type != 'xpu' else torch.float32)
sqrt_one_minus_alphas = torch.sqrt(1 - alphas)
extra_args = {} if extra_args is None else extra_args
diff --git a/modules/sd_unet.py b/modules/sd_unet.py
index 6a7bc9e2..a771849c 100644
--- a/modules/sd_unet.py
+++ b/modules/sd_unet.py
@@ -5,8 +5,7 @@ from modules import script_callbacks, shared, devices
unet_options = []
current_unet_option = None
current_unet = None
-original_forward = None
-
+original_forward = None # not used, only left temporarily for compatibility
def list_unets():
new_unets = script_callbacks.list_unets_callback()
@@ -84,9 +83,12 @@ class SdUnet(torch.nn.Module):
pass
-def UNetModel_forward(self, x, timesteps=None, context=None, *args, **kwargs):
- if current_unet is not None:
- return current_unet.forward(x, timesteps, context, *args, **kwargs)
+def create_unet_forward(original_forward):
+ def UNetModel_forward(self, x, timesteps=None, context=None, *args, **kwargs):
+ if current_unet is not None:
+ return current_unet.forward(x, timesteps, context, *args, **kwargs)
+
+ return original_forward(self, x, timesteps, context, *args, **kwargs)
- return original_forward(self, x, timesteps, context, *args, **kwargs)
+ return UNetModel_forward
diff --git a/modules/shared_items.py b/modules/shared_items.py
index 5024b426..991971ad 100644
--- a/modules/shared_items.py
+++ b/modules/shared_items.py
@@ -66,6 +66,22 @@ def reload_hypernetworks():
shared.hypernetworks = hypernetwork.list_hypernetworks(cmd_opts.hypernetwork_dir)
+def get_infotext_names():
+ from modules import generation_parameters_copypaste, shared
+ res = {}
+
+ for info in shared.opts.data_labels.values():
+ if info.infotext:
+ res[info.infotext] = 1
+
+ for tab_data in generation_parameters_copypaste.paste_fields.values():
+ for _, name in tab_data.get("fields") or []:
+ if isinstance(name, str):
+ res[name] = 1
+
+ return list(res)
+
+
ui_reorder_categories_builtin_items = [
"prompt",
"image",
diff --git a/modules/shared_options.py b/modules/shared_options.py
index 9bcd7914..d2e86ff1 100644
--- a/modules/shared_options.py
+++ b/modules/shared_options.py
@@ -3,7 +3,7 @@ import gradio as gr
from modules import localization, ui_components, shared_items, shared, interrogate, shared_gradio_themes
from modules.paths_internal import models_path, script_path, data_path, sd_configs_path, sd_default_config, sd_model_file, default_sd_model_file, extensions_dir, extensions_builtin_dir # noqa: F401
from modules.shared_cmd_options import cmd_opts
-from modules.options import options_section, OptionInfo, OptionHTML
+from modules.options import options_section, OptionInfo, OptionHTML, categories
options_templates = {}
hide_dirs = shared.hide_dirs
@@ -21,7 +21,14 @@ restricted_opts = {
"outdir_init_images"
}
-options_templates.update(options_section(('saving-images', "Saving images/grids"), {
+categories.register_category("saving", "Saving images")
+categories.register_category("sd", "Stable Diffusion")
+categories.register_category("ui", "User Interface")
+categories.register_category("system", "System")
+categories.register_category("postprocessing", "Postprocessing")
+categories.register_category("training", "Training")
+
+options_templates.update(options_section(('saving-images', "Saving images/grids", "saving"), {
"samples_save": OptionInfo(True, "Always save all generated images"),
"samples_format": OptionInfo('png', 'File format for images'),
"samples_filename_pattern": OptionInfo("", "Images filename pattern", component_args=hide_dirs).link("wiki", "https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Custom-Images-Filename-Name-and-Subdirectory"),
@@ -39,8 +46,6 @@ options_templates.update(options_section(('saving-images', "Saving images/grids"
"grid_text_inactive_color": OptionInfo("#999999", "Inactive text color for image grids", ui_components.FormColorPicker, {}),
"grid_background_color": OptionInfo("#ffffff", "Background color for image grids", ui_components.FormColorPicker, {}),
- "enable_pnginfo": OptionInfo(True, "Save text information about generation parameters as chunks to png files"),
- "save_txt": OptionInfo(False, "Create a text file next to every image with generation parameters."),
"save_images_before_face_restoration": OptionInfo(False, "Save a copy of image before doing face restoration."),
"save_images_before_highres_fix": OptionInfo(False, "Save a copy of image before applying highres fix."),
"save_images_before_color_correction": OptionInfo(False, "Save a copy of image before applying color correction to img2img results"),
@@ -67,7 +72,7 @@ options_templates.update(options_section(('saving-images', "Saving images/grids"
"notification_volume": OptionInfo(100, "Notification sound volume", gr.Slider, {"minimum": 0, "maximum": 100, "step": 1}).info("in %"),
}))
-options_templates.update(options_section(('saving-paths', "Paths for saving"), {
+options_templates.update(options_section(('saving-paths', "Paths for saving", "saving"), {
"outdir_samples": OptionInfo("", "Output directory for images; if empty, defaults to three directories below", component_args=hide_dirs),
"outdir_txt2img_samples": OptionInfo("outputs/txt2img-images", 'Output directory for txt2img images', component_args=hide_dirs),
"outdir_img2img_samples": OptionInfo("outputs/img2img-images", 'Output directory for img2img images', component_args=hide_dirs),
@@ -79,7 +84,7 @@ options_templates.update(options_section(('saving-paths', "Paths for saving"), {
"outdir_init_images": OptionInfo("outputs/init-images", "Directory for saving init images when using img2img", component_args=hide_dirs),
}))
-options_templates.update(options_section(('saving-to-dirs', "Saving to a directory"), {
+options_templates.update(options_section(('saving-to-dirs', "Saving to a directory", "saving"), {
"save_to_dirs": OptionInfo(True, "Save images to a subdirectory"),
"grid_save_to_dirs": OptionInfo(True, "Save grids to a subdirectory"),
"use_save_to_dirs_for_ui": OptionInfo(False, "When using \"Save\" button, save images to a subdirectory"),
@@ -87,21 +92,21 @@ options_templates.update(options_section(('saving-to-dirs', "Saving to a directo
"directories_max_prompt_words": OptionInfo(8, "Max prompt words for [prompt_words] pattern", gr.Slider, {"minimum": 1, "maximum": 20, "step": 1, **hide_dirs}),
}))
-options_templates.update(options_section(('upscaling', "Upscaling"), {
+options_templates.update(options_section(('upscaling', "Upscaling", "postprocessing"), {
"ESRGAN_tile": OptionInfo(192, "Tile size for ESRGAN upscalers.", gr.Slider, {"minimum": 0, "maximum": 512, "step": 16}).info("0 = no tiling"),
"ESRGAN_tile_overlap": OptionInfo(8, "Tile overlap for ESRGAN upscalers.", gr.Slider, {"minimum": 0, "maximum": 48, "step": 1}).info("Low values = visible seam"),
"realesrgan_enabled_models": OptionInfo(["R-ESRGAN 4x+", "R-ESRGAN 4x+ Anime6B"], "Select which Real-ESRGAN models to show in the web UI.", gr.CheckboxGroup, lambda: {"choices": shared_items.realesrgan_models_names()}),
"upscaler_for_img2img": OptionInfo(None, "Upscaler for img2img", gr.Dropdown, lambda: {"choices": [x.name for x in shared.sd_upscalers]}),
}))
-options_templates.update(options_section(('face-restoration', "Face restoration"), {
+options_templates.update(options_section(('face-restoration', "Face restoration", "postprocessing"), {
"face_restoration": OptionInfo(False, "Restore faces", infotext='Face restoration').info("will use a third-party model on generation result to reconstruct faces"),
"face_restoration_model": OptionInfo("CodeFormer", "Face restoration model", gr.Radio, lambda: {"choices": [x.name() for x in shared.face_restorers]}),
"code_former_weight": OptionInfo(0.5, "CodeFormer weight", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01}).info("0 = maximum effect; 1 = minimum effect"),
"face_restoration_unload": OptionInfo(False, "Move face restoration model from VRAM into RAM after processing"),
}))
-options_templates.update(options_section(('system', "System"), {
+options_templates.update(options_section(('system', "System", "system"), {
"auto_launch_browser": OptionInfo("Local", "Automatically open webui in browser on startup", gr.Radio, lambda: {"choices": ["Disable", "Local", "Remote"]}),
"enable_console_prompts": OptionInfo(shared.cmd_opts.enable_console_prompts, "Print prompts to console when generating with txt2img and img2img."),
"show_warnings": OptionInfo(False, "Show warnings in console.").needs_reload_ui(),
@@ -116,13 +121,13 @@ options_templates.update(options_section(('system', "System"), {
"dump_stacks_on_signal": OptionInfo(False, "Print stack traces before exiting the program with ctrl+c."),
}))
-options_templates.update(options_section(('API', "API"), {
+options_templates.update(options_section(('API', "API", "system"), {
"api_enable_requests": OptionInfo(True, "Allow http:// and https:// URLs for input images in API", restrict_api=True),
"api_forbid_local_requests": OptionInfo(True, "Forbid URLs to local resources", restrict_api=True),
"api_useragent": OptionInfo("", "User agent for requests", restrict_api=True),
}))
-options_templates.update(options_section(('training', "Training"), {
+options_templates.update(options_section(('training', "Training", "training"), {
"unload_models_when_training": OptionInfo(False, "Move VAE and CLIP to RAM when training if possible. Saves VRAM."),
"pin_memory": OptionInfo(False, "Turn on pin_memory for DataLoader. Makes training slightly faster but can increase memory usage."),
"save_optimizer_state": OptionInfo(False, "Saves Optimizer state as separate *.optim file. Training of embedding or HN can be resumed with the matching optim file."),
@@ -137,7 +142,7 @@ options_templates.update(options_section(('training', "Training"), {
"training_tensorboard_flush_every": OptionInfo(120, "How often, in seconds, to flush the pending tensorboard events and summaries to disk."),
}))
-options_templates.update(options_section(('sd', "Stable Diffusion"), {
+options_templates.update(options_section(('sd', "Stable Diffusion", "sd"), {
"sd_model_checkpoint": OptionInfo(None, "Stable Diffusion checkpoint", gr.Dropdown, lambda: {"choices": shared_items.list_checkpoint_tiles(shared.opts.sd_checkpoint_dropdown_use_short)}, refresh=shared_items.refresh_checkpoints, infotext='Model hash'),
"sd_checkpoints_limit": OptionInfo(1, "Maximum number of checkpoints loaded at the same time", gr.Slider, {"minimum": 1, "maximum": 10, "step": 1}),
"sd_checkpoints_keep_in_cpu": OptionInfo(True, "Only keep one model on device").info("will keep models other than the currently used one in RAM rather than VRAM"),
@@ -154,14 +159,14 @@ options_templates.update(options_section(('sd', "Stable Diffusion"), {
"hires_fix_refiner_pass": OptionInfo("second pass", "Hires fix: which pass to enable refiner for", gr.Radio, {"choices": ["first pass", "second pass", "both passes"]}, infotext="Hires refiner"),
}))
-options_templates.update(options_section(('sdxl', "Stable Diffusion XL"), {
+options_templates.update(options_section(('sdxl', "Stable Diffusion XL", "sd"), {
"sdxl_crop_top": OptionInfo(0, "crop top coordinate"),
"sdxl_crop_left": OptionInfo(0, "crop left coordinate"),
"sdxl_refiner_low_aesthetic_score": OptionInfo(2.5, "SDXL low aesthetic score", gr.Number).info("used for refiner model negative prompt"),
"sdxl_refiner_high_aesthetic_score": OptionInfo(6.0, "SDXL high aesthetic score", gr.Number).info("used for refiner model prompt"),
}))
-options_templates.update(options_section(('vae', "VAE"), {
+options_templates.update(options_section(('vae', "VAE", "sd"), {
"sd_vae_explanation": OptionHTML("""
<abbr title='Variational autoencoder'>VAE</abbr> is a neural network that transforms a standard <abbr title='red/green/blue'>RGB</abbr>
image into latent space representation and back. Latent space representation is what stable diffusion is working on during sampling
@@ -176,7 +181,7 @@ For img2img, VAE is used to process user's input image before the sampling, and
"sd_vae_decode_method": OptionInfo("Full", "VAE type for decode", gr.Radio, {"choices": ["Full", "TAESD"]}, infotext='VAE Decoder').info("method to decode latent to image"),
}))
-options_templates.update(options_section(('img2img', "img2img"), {
+options_templates.update(options_section(('img2img', "img2img", "sd"), {
"inpainting_mask_weight": OptionInfo(1.0, "Inpainting conditioning mask strength", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.01}, infotext='Conditional mask weight'),
"initial_noise_multiplier": OptionInfo(1.0, "Noise multiplier for img2img", gr.Slider, {"minimum": 0.0, "maximum": 1.5, "step": 0.001}, infotext='Noise multiplier'),
"img2img_extra_noise": OptionInfo(0.0, "Extra noise multiplier for img2img and hires fix", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.01}, infotext='Extra noise').info("0 = disabled (default); should be lower than denoising strength"),
@@ -192,7 +197,7 @@ options_templates.update(options_section(('img2img', "img2img"), {
"img2img_batch_show_results_limit": OptionInfo(32, "Show the first N batch img2img results in UI", gr.Slider, {"minimum": -1, "maximum": 1000, "step": 1}).info('0: disable, -1: show all images. Too many images can cause lag'),
}))
-options_templates.update(options_section(('optimizations', "Optimizations"), {
+options_templates.update(options_section(('optimizations', "Optimizations", "sd"), {
"cross_attention_optimization": OptionInfo("Automatic", "Cross attention optimization", gr.Dropdown, lambda: {"choices": shared_items.cross_attention_optimizations()}),
"s_min_uncond": OptionInfo(0.0, "Negative Guidance minimum sigma", gr.Slider, {"minimum": 0.0, "maximum": 15.0, "step": 0.01}).link("PR", "https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/9177").info("skip negative prompt for some steps when the image is almost ready; 0=disable, higher=faster"),
"token_merging_ratio": OptionInfo(0.0, "Token merging ratio", gr.Slider, {"minimum": 0.0, "maximum": 0.9, "step": 0.1}, infotext='Token merging ratio').link("PR", "https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/9256").info("0=disable, higher=faster"),
@@ -203,7 +208,7 @@ options_templates.update(options_section(('optimizations', "Optimizations"), {
"batch_cond_uncond": OptionInfo(True, "Batch cond/uncond").info("do both conditional and unconditional denoising in one batch; uses a bit more VRAM during sampling, but improves speed; previously this was controlled by --always-batch-cond-uncond comandline argument"),
}))
-options_templates.update(options_section(('compatibility', "Compatibility"), {
+options_templates.update(options_section(('compatibility', "Compatibility", "sd"), {
"use_old_emphasis_implementation": OptionInfo(False, "Use old emphasis implementation. Can be useful to reproduce old seeds."),
"use_old_karras_scheduler_sigmas": OptionInfo(False, "Use old karras scheduler sigmas (0.1 to 10)."),
"no_dpmpp_sde_batch_determinism": OptionInfo(False, "Do not make DPM++ SDE deterministic across different batch sizes."),
@@ -228,8 +233,9 @@ options_templates.update(options_section(('interrogate', "Interrogate"), {
"deepbooru_filter_tags": OptionInfo("", "deepbooru: filter out those tags").info("separate by comma"),
}))
-options_templates.update(options_section(('extra_networks', "Extra Networks"), {
+options_templates.update(options_section(('extra_networks', "Extra Networks", "sd"), {
"extra_networks_show_hidden_directories": OptionInfo(True, "Show hidden directories").info("directory is hidden if its name starts with \".\"."),
+ "extra_networks_dir_button_function": OptionInfo(False, "Add a '/' to the beginning of directory buttons").info("Buttons will display the contents of the selected directory without acting as a search filter."),
"extra_networks_hidden_models": OptionInfo("When searched", "Show cards for models in hidden directories", gr.Radio, {"choices": ["Always", "When searched", "Never"]}).info('"When searched" option will only show the item when the search string has 4 characters or more'),
"extra_networks_default_multiplier": OptionInfo(1.0, "Default multiplier for extra networks", gr.Slider, {"minimum": 0.0, "maximum": 2.0, "step": 0.01}),
"extra_networks_card_width": OptionInfo(0, "Card width for Extra Networks").info("in pixels"),
@@ -245,47 +251,66 @@ options_templates.update(options_section(('extra_networks', "Extra Networks"), {
"sd_hypernetwork": OptionInfo("None", "Add hypernetwork to prompt", gr.Dropdown, lambda: {"choices": ["None", *shared.hypernetworks]}, refresh=shared_items.reload_hypernetworks),
}))
-options_templates.update(options_section(('ui', "User interface"), {
- "localization": OptionInfo("None", "Localization", gr.Dropdown, lambda: {"choices": ["None"] + list(localization.localizations.keys())}, refresh=lambda: localization.list_localizations(cmd_opts.localizations_dir)).needs_reload_ui(),
- "gradio_theme": OptionInfo("Default", "Gradio theme", ui_components.DropdownEditable, lambda: {"choices": ["Default"] + shared_gradio_themes.gradio_hf_hub_themes}).info("you can also manually enter any of themes from the <a href='https://huggingface.co/spaces/gradio/theme-gallery'>gallery</a>.").needs_reload_ui(),
- "gradio_themes_cache": OptionInfo(True, "Cache gradio themes locally").info("disable to update the selected Gradio theme"),
- "gallery_height": OptionInfo("", "Gallery height", gr.Textbox).info("an be any valid CSS value").needs_reload_ui(),
- "return_grid": OptionInfo(True, "Show grid in results for web"),
- "do_not_show_images": OptionInfo(False, "Do not show any images in results for web"),
- "send_seed": OptionInfo(True, "Send seed when sending prompt or image to other interface"),
- "send_size": OptionInfo(True, "Send size when sending prompt or image to another interface"),
- "js_modal_lightbox": OptionInfo(True, "Enable full page image viewer"),
- "js_modal_lightbox_initially_zoomed": OptionInfo(True, "Show images zoomed in by default in full page image viewer"),
- "js_modal_lightbox_gamepad": OptionInfo(False, "Navigate image viewer with gamepad"),
- "js_modal_lightbox_gamepad_repeat": OptionInfo(250, "Gamepad repeat period, in milliseconds"),
- "show_progress_in_title": OptionInfo(True, "Show generation progress in window title."),
- "samplers_in_dropdown": OptionInfo(True, "Use dropdown for sampler selection instead of radio group").needs_reload_ui(),
- "dimensions_and_batch_together": OptionInfo(True, "Show Width/Height and Batch sliders in same row").needs_reload_ui(),
- "keyedit_precision_attention": OptionInfo(0.1, "Ctrl+up/down precision when editing (attention:1.1)", gr.Slider, {"minimum": 0.01, "maximum": 0.2, "step": 0.001}),
- "keyedit_precision_extra": OptionInfo(0.05, "Ctrl+up/down precision when editing <extra networks:0.9>", gr.Slider, {"minimum": 0.01, "maximum": 0.2, "step": 0.001}),
- "keyedit_delimiters": OptionInfo(r".,\/!?%^*;:{}=`~() ", "Ctrl+up/down word delimiters"),
+options_templates.update(options_section(('ui_prompt_editing', "Prompt editing", "ui"), {
+ "keyedit_precision_attention": OptionInfo(0.1, "Precision for (attention:1.1) when editing the prompt with Ctrl+up/down", gr.Slider, {"minimum": 0.01, "maximum": 0.2, "step": 0.001}),
+ "keyedit_precision_extra": OptionInfo(0.05, "Precision for <extra networks:0.9> when editing the prompt with Ctrl+up/down", gr.Slider, {"minimum": 0.01, "maximum": 0.2, "step": 0.001}),
+ "keyedit_delimiters": OptionInfo(r".,\/!?%^*;:{}=`~() ", "Word delimiters when editing the prompt with Ctrl+up/down"),
"keyedit_delimiters_whitespace": OptionInfo(["Tab", "Carriage Return", "Line Feed"], "Ctrl+up/down whitespace delimiters", gr.CheckboxGroup, lambda: {"choices": ["Tab", "Carriage Return", "Line Feed"]}),
"keyedit_move": OptionInfo(True, "Alt+left/right moves prompt elements"),
- "quicksettings_list": OptionInfo(["sd_model_checkpoint"], "Quicksettings list", ui_components.DropdownMulti, lambda: {"choices": list(shared.opts.data_labels.keys())}).js("info", "settingsHintsShowQuicksettings").info("setting entries that appear at the top of page rather than in settings tab").needs_reload_ui(),
- "ui_tab_order": OptionInfo([], "UI tab order", ui_components.DropdownMulti, lambda: {"choices": list(shared.tab_names)}).needs_reload_ui(),
- "hidden_tabs": OptionInfo([], "Hidden UI tabs", ui_components.DropdownMulti, lambda: {"choices": list(shared.tab_names)}).needs_reload_ui(),
- "ui_reorder_list": OptionInfo([], "txt2img/img2img UI item order", ui_components.DropdownMulti, lambda: {"choices": list(shared_items.ui_reorder_categories())}).info("selected items appear first").needs_reload_ui(),
+ "disable_token_counters": OptionInfo(False, "Disable prompt token counters").needs_reload_ui(),
+}))
+
+options_templates.update(options_section(('ui_gallery', "Gallery", "ui"), {
+ "return_grid": OptionInfo(True, "Show grid in gallery"),
+ "do_not_show_images": OptionInfo(False, "Do not show any images in gallery"),
+ "js_modal_lightbox": OptionInfo(True, "Full page image viewer: enable"),
+ "js_modal_lightbox_initially_zoomed": OptionInfo(True, "Full page image viewer: show images zoomed in by default"),
+ "js_modal_lightbox_gamepad": OptionInfo(False, "Full page image viewer: navigate with gamepad"),
+ "js_modal_lightbox_gamepad_repeat": OptionInfo(250, "Full page image viewer: gamepad repeat period").info("in milliseconds"),
+ "gallery_height": OptionInfo("", "Gallery height", gr.Textbox).info("can be any valid CSS value, for example 768px or 20em").needs_reload_ui(),
+}))
+
+options_templates.update(options_section(('ui_alternatives', "UI alternatives", "ui"), {
+ "compact_prompt_box": OptionInfo(False, "Compact prompt layout").info("puts prompt and negative prompt inside the Generate tab, leaving more vertical space for the image on the right").needs_reload_ui(),
+ "samplers_in_dropdown": OptionInfo(True, "Use dropdown for sampler selection instead of radio group").needs_reload_ui(),
+ "dimensions_and_batch_together": OptionInfo(True, "Show Width/Height and Batch sliders in same row").needs_reload_ui(),
"sd_checkpoint_dropdown_use_short": OptionInfo(False, "Checkpoint dropdown: use filenames without paths").info("models in subdirectories like photo/sd15.ckpt will be listed as just sd15.ckpt"),
"hires_fix_show_sampler": OptionInfo(False, "Hires fix: show hires checkpoint and sampler selection").needs_reload_ui(),
"hires_fix_show_prompts": OptionInfo(False, "Hires fix: show hires prompt and negative prompt").needs_reload_ui(),
- "disable_token_counters": OptionInfo(False, "Disable prompt token counters").needs_reload_ui(),
"txt2img_settings_accordion": OptionInfo(False, "Settings in txt2img hidden under Accordion").needs_reload_ui(),
"img2img_settings_accordion": OptionInfo(False, "Settings in img2img hidden under Accordion").needs_reload_ui(),
- "compact_prompt_box": OptionInfo(False, "Compact prompt layout").info("puts prompt and negative prompt inside the Generate tab, leaving more vertical space for the image on the right").needs_reload_ui(),
}))
+options_templates.update(options_section(('ui', "User interface", "ui"), {
+ "localization": OptionInfo("None", "Localization", gr.Dropdown, lambda: {"choices": ["None"] + list(localization.localizations.keys())}, refresh=lambda: localization.list_localizations(cmd_opts.localizations_dir)).needs_reload_ui(),
+ "quicksettings_list": OptionInfo(["sd_model_checkpoint"], "Quicksettings list", ui_components.DropdownMulti, lambda: {"choices": list(shared.opts.data_labels.keys())}).js("info", "settingsHintsShowQuicksettings").info("setting entries that appear at the top of page rather than in settings tab").needs_reload_ui(),
+ "ui_tab_order": OptionInfo([], "UI tab order", ui_components.DropdownMulti, lambda: {"choices": list(shared.tab_names)}).needs_reload_ui(),
+ "hidden_tabs": OptionInfo([], "Hidden UI tabs", ui_components.DropdownMulti, lambda: {"choices": list(shared.tab_names)}).needs_reload_ui(),
+ "ui_reorder_list": OptionInfo([], "UI item order for txt2img/img2img tabs", ui_components.DropdownMulti, lambda: {"choices": list(shared_items.ui_reorder_categories())}).info("selected items appear first").needs_reload_ui(),
+ "gradio_theme": OptionInfo("Default", "Gradio theme", ui_components.DropdownEditable, lambda: {"choices": ["Default"] + shared_gradio_themes.gradio_hf_hub_themes}).info("you can also manually enter any of themes from the <a href='https://huggingface.co/spaces/gradio/theme-gallery'>gallery</a>.").needs_reload_ui(),
+ "gradio_themes_cache": OptionInfo(True, "Cache gradio themes locally").info("disable to update the selected Gradio theme"),
+ "show_progress_in_title": OptionInfo(True, "Show generation progress in window title."),
+ "send_seed": OptionInfo(True, "Send seed when sending prompt or image to other interface"),
+ "send_size": OptionInfo(True, "Send size when sending prompt or image to another interface"),
+}))
-options_templates.update(options_section(('infotext', "Infotext"), {
- "add_model_hash_to_info": OptionInfo(True, "Add model hash to generation information"),
- "add_model_name_to_info": OptionInfo(True, "Add model name to generation information"),
- "add_user_name_to_info": OptionInfo(False, "Add user name to generation information when authenticated"),
- "add_version_to_infotext": OptionInfo(True, "Add program version to generation information"),
+
+options_templates.update(options_section(('infotext', "Infotext", "ui"), {
+ "infotext_explanation": OptionHTML("""
+Infotext is what this software calls the text that contains generation parameters and can be used to generate the same picture again.
+It is displayed in UI below the image. To use infotext, paste it into the prompt and click the ↙️ paste button.
+"""),
+ "enable_pnginfo": OptionInfo(True, "Write infotext to metadata of the generated image"),
+ "save_txt": OptionInfo(False, "Create a text file with infotext next to every generated image"),
+
+ "add_model_name_to_info": OptionInfo(True, "Add model name to infotext"),
+ "add_model_hash_to_info": OptionInfo(True, "Add model hash to infotext"),
+ "add_vae_name_to_info": OptionInfo(True, "Add VAE name to infotext"),
+ "add_vae_hash_to_info": OptionInfo(True, "Add VAE hash to infotext"),
+ "add_user_name_to_info": OptionInfo(False, "Add user name to infotext when authenticated"),
+ "add_version_to_infotext": OptionInfo(True, "Add program version to infotext"),
"disable_weights_auto_swap": OptionInfo(True, "Disregard checkpoint information from pasted infotext").info("when reading generation parameters from text into UI"),
+ "infotext_skip_pasting": OptionInfo([], "Disregard fields from pasted infotext", ui_components.DropdownMulti, lambda: {"choices": shared_items.get_infotext_names()}),
"infotext_styles": OptionInfo("Apply if any", "Infer styles from prompts of pasted infotext", gr.Radio, {"choices": ["Ignore", "Apply", "Discard", "Apply if any"]}).info("when reading generation parameters from text into UI)").html("""<ul style='margin-left: 1.5em'>
<li>Ignore: keep prompt and styles dropdown as it is.</li>
<li>Apply: remove style text from prompt, always replace styles dropdown value with found styles (even if none are found).</li>
@@ -295,7 +320,7 @@ options_templates.update(options_section(('infotext', "Infotext"), {
}))
-options_templates.update(options_section(('ui', "Live previews"), {
+options_templates.update(options_section(('ui', "Live previews", "ui"), {
"show_progressbar": OptionInfo(True, "Show progressbar"),
"live_previews_enable": OptionInfo(True, "Show live previews of the created image"),
"live_previews_image_format": OptionInfo("png", "Live preview file format", gr.Radio, {"choices": ["jpeg", "png", "webp"]}),
@@ -306,9 +331,10 @@ options_templates.update(options_section(('ui', "Live previews"), {
"live_preview_content": OptionInfo("Prompt", "Live preview subject", gr.Radio, {"choices": ["Combined", "Prompt", "Negative prompt"]}),
"live_preview_refresh_period": OptionInfo(1000, "Progressbar and preview update period").info("in milliseconds"),
"live_preview_fast_interrupt": OptionInfo(False, "Return image with chosen live preview method on interrupt").info("makes interrupts faster"),
+ "js_live_preview_in_modal_lightbox": OptionInfo(False, "Show Live preview in full page image viewer"),
}))
-options_templates.update(options_section(('sampler-params', "Sampler parameters"), {
+options_templates.update(options_section(('sampler-params', "Sampler parameters", "sd"), {
"hide_samplers": OptionInfo([], "Hide samplers in user interface", gr.CheckboxGroup, lambda: {"choices": [x.name for x in shared_items.list_samplers()]}).needs_reload_ui(),
"eta_ddim": OptionInfo(0.0, "Eta for DDIM", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.01}, infotext='Eta DDIM').info("noise multiplier; higher = more unpredictable results"),
"eta_ancestral": OptionInfo(1.0, "Eta for k-diffusion samplers", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.01}, infotext='Eta').info("noise multiplier; currently only applies to ancestral samplers (i.e. Euler a) and SDE samplers"),
@@ -330,10 +356,11 @@ options_templates.update(options_section(('sampler-params', "Sampler parameters"
'uni_pc_lower_order_final': OptionInfo(True, "UniPC lower order final", infotext='UniPC lower order final'),
}))
-options_templates.update(options_section(('postprocessing', "Postprocessing"), {
+options_templates.update(options_section(('postprocessing', "Postprocessing", "postprocessing"), {
'postprocessing_enable_in_main_ui': OptionInfo([], "Enable postprocessing operations in txt2img and img2img tabs", ui_components.DropdownMulti, lambda: {"choices": [x.name for x in shared_items.postprocessing_scripts()]}),
'postprocessing_operation_order': OptionInfo([], "Postprocessing operation order", ui_components.DropdownMulti, lambda: {"choices": [x.name for x in shared_items.postprocessing_scripts()]}),
'upscaling_max_images_in_cache': OptionInfo(5, "Maximum number of images in upscaling cache", gr.Slider, {"minimum": 0, "maximum": 10, "step": 1}),
+ 'postprocessing_existing_caption_action': OptionInfo("Ignore", "Action for existing captions", gr.Radio, {"choices": ["Ignore", "Keep", "Prepend", "Append"]}).info("when generating captions using postprocessing; Ignore = use generated; Keep = use original; Prepend/Append = combine both"),
}))
options_templates.update(options_section((None, "Hidden options"), {
diff --git a/modules/styles.py b/modules/styles.py
index 0740fe1b..81d9800d 100644
--- a/modules/styles.py
+++ b/modules/styles.py
@@ -1,7 +1,7 @@
import csv
+import fnmatch
import os
import os.path
-import re
import typing
import shutil
@@ -10,6 +10,7 @@ class PromptStyle(typing.NamedTuple):
name: str
prompt: str
negative_prompt: str
+ path: str = None
def merge_prompts(style_prompt: str, prompt: str) -> str:
@@ -29,38 +30,61 @@ def apply_styles_to_prompt(prompt, styles):
return prompt
-re_spaces = re.compile(" +")
+def unwrap_style_text_from_prompt(style_text, prompt):
+ """
+ Checks the prompt to see if the style text is wrapped around it. If so,
+ returns True plus the prompt text without the style text. Otherwise, returns
+ False with the original prompt.
-
-def extract_style_text_from_prompt(style_text, prompt):
- stripped_prompt = re.sub(re_spaces, " ", prompt.strip())
- stripped_style_text = re.sub(re_spaces, " ", style_text.strip())
+ Note that the "cleaned" version of the style text is only used for matching
+ purposes here. It isn't returned; the original style text is not modified.
+ """
+ stripped_prompt = prompt
+ stripped_style_text = style_text
if "{prompt}" in stripped_style_text:
- left, right = stripped_style_text.split("{prompt}", 2)
+ # Work out whether the prompt is wrapped in the style text. If so, we
+ # return True and the "inner" prompt text that isn't part of the style.
+ try:
+ left, right = stripped_style_text.split("{prompt}", 2)
+ except ValueError as e:
+ # If the style text has multple "{prompt}"s, we can't split it into
+ # two parts. This is an error, but we can't do anything about it.
+ print(f"Unable to compare style text to prompt:\n{style_text}")
+ print(f"Error: {e}")
+ return False, prompt
if stripped_prompt.startswith(left) and stripped_prompt.endswith(right):
- prompt = stripped_prompt[len(left):len(stripped_prompt)-len(right)]
+ prompt = stripped_prompt[len(left) : len(stripped_prompt) - len(right)]
return True, prompt
else:
+ # Work out whether the given prompt ends with the style text. If so, we
+ # return True and the prompt text up to where the style text starts.
if stripped_prompt.endswith(stripped_style_text):
- prompt = stripped_prompt[:len(stripped_prompt)-len(stripped_style_text)]
-
- if prompt.endswith(', '):
+ prompt = stripped_prompt[: len(stripped_prompt) - len(stripped_style_text)]
+ if prompt.endswith(", "):
prompt = prompt[:-2]
-
return True, prompt
return False, prompt
-def extract_style_from_prompts(style: PromptStyle, prompt, negative_prompt):
+def extract_original_prompts(style: PromptStyle, prompt, negative_prompt):
+ """
+ Takes a style and compares it to the prompt and negative prompt. If the style
+ matches, returns True plus the prompt and negative prompt with the style text
+ removed. Otherwise, returns False with the original prompt and negative prompt.
+ """
if not style.prompt and not style.negative_prompt:
return False, prompt, negative_prompt
- match_positive, extracted_positive = extract_style_text_from_prompt(style.prompt, prompt)
+ match_positive, extracted_positive = unwrap_style_text_from_prompt(
+ style.prompt, prompt
+ )
if not match_positive:
return False, prompt, negative_prompt
- match_negative, extracted_negative = extract_style_text_from_prompt(style.negative_prompt, negative_prompt)
+ match_negative, extracted_negative = unwrap_style_text_from_prompt(
+ style.negative_prompt, negative_prompt
+ )
if not match_negative:
return False, prompt, negative_prompt
@@ -69,25 +93,84 @@ def extract_style_from_prompts(style: PromptStyle, prompt, negative_prompt):
class StyleDatabase:
def __init__(self, path: str):
- self.no_style = PromptStyle("None", "", "")
+ self.no_style = PromptStyle("None", "", "", None)
self.styles = {}
self.path = path
+ folder, file = os.path.split(self.path)
+ filename, _, ext = file.partition('*')
+ self.default_path = os.path.join(folder, filename + ext)
+
+ self.prompt_fields = [field for field in PromptStyle._fields if field != "path"]
+
self.reload()
def reload(self):
+ """
+ Clears the style database and reloads the styles from the CSV file(s)
+ matching the path used to initialize the database.
+ """
self.styles.clear()
- if not os.path.exists(self.path):
+ path, filename = os.path.split(self.path)
+
+ if "*" in filename:
+ fileglob = filename.split("*")[0] + "*.csv"
+ filelist = []
+ for file in os.listdir(path):
+ if fnmatch.fnmatch(file, fileglob):
+ filelist.append(file)
+ # Add a visible divider to the style list
+ half_len = round(len(file) / 2)
+ divider = f"{'-' * (20 - half_len)} {file.upper()}"
+ divider = f"{divider} {'-' * (40 - len(divider))}"
+ self.styles[divider] = PromptStyle(
+ f"{divider}", None, None, "do_not_save"
+ )
+ # Add styles from this CSV file
+ self.load_from_csv(os.path.join(path, file))
+ if len(filelist) == 0:
+ print(f"No styles found in {path} matching {fileglob}")
+ return
+ elif not os.path.exists(self.path):
+ print(f"Style database not found: {self.path}")
return
+ else:
+ self.load_from_csv(self.path)
- with open(self.path, "r", encoding="utf-8-sig", newline='') as file:
+ def load_from_csv(self, path: str):
+ with open(path, "r", encoding="utf-8-sig", newline="") as file:
reader = csv.DictReader(file, skipinitialspace=True)
for row in reader:
+ # Ignore empty rows or rows starting with a comment
+ if not row or row["name"].startswith("#"):
+ continue
# Support loading old CSV format with "name, text"-columns
prompt = row["prompt"] if "prompt" in row else row["text"]
negative_prompt = row.get("negative_prompt", "")
- self.styles[row["name"]] = PromptStyle(row["name"], prompt, negative_prompt)
+ # Add style to database
+ self.styles[row["name"]] = PromptStyle(
+ row["name"], prompt, negative_prompt, path
+ )
+
+ def get_style_paths(self) -> set:
+ """Returns a set of all distinct paths of files that styles are loaded from."""
+ # Update any styles without a path to the default path
+ for style in list(self.styles.values()):
+ if not style.path:
+ self.styles[style.name] = style._replace(path=self.default_path)
+
+ # Create a list of all distinct paths, including the default path
+ style_paths = set()
+ style_paths.add(self.default_path)
+ for _, style in self.styles.items():
+ if style.path:
+ style_paths.add(style.path)
+
+ # Remove any paths for styles that are just list dividers
+ style_paths.discard("do_not_save")
+
+ return style_paths
def get_style_prompts(self, styles):
return [self.styles.get(x, self.no_style).prompt for x in styles]
@@ -96,20 +179,40 @@ class StyleDatabase:
return [self.styles.get(x, self.no_style).negative_prompt for x in styles]
def apply_styles_to_prompt(self, prompt, styles):
- return apply_styles_to_prompt(prompt, [self.styles.get(x, self.no_style).prompt for x in styles])
+ return apply_styles_to_prompt(
+ prompt, [self.styles.get(x, self.no_style).prompt for x in styles]
+ )
def apply_negative_styles_to_prompt(self, prompt, styles):
- return apply_styles_to_prompt(prompt, [self.styles.get(x, self.no_style).negative_prompt for x in styles])
-
- def save_styles(self, path: str) -> None:
- # Always keep a backup file around
- if os.path.exists(path):
- shutil.copy(path, f"{path}.bak")
-
- with open(path, "w", encoding="utf-8-sig", newline='') as file:
- writer = csv.DictWriter(file, fieldnames=PromptStyle._fields)
- writer.writeheader()
- writer.writerows(style._asdict() for k, style in self.styles.items())
+ return apply_styles_to_prompt(
+ prompt, [self.styles.get(x, self.no_style).negative_prompt for x in styles]
+ )
+
+ def save_styles(self, path: str = None) -> None:
+ # The path argument is deprecated, but kept for backwards compatibility
+ _ = path
+
+ style_paths = self.get_style_paths()
+
+ csv_names = [os.path.split(path)[1].lower() for path in style_paths]
+
+ for style_path in style_paths:
+ # Always keep a backup file around
+ if os.path.exists(style_path):
+ shutil.copy(style_path, f"{style_path}.bak")
+
+ # Write the styles to the CSV file
+ with open(style_path, "w", encoding="utf-8-sig", newline="") as file:
+ writer = csv.DictWriter(file, fieldnames=self.prompt_fields)
+ writer.writeheader()
+ for style in (s for s in self.styles.values() if s.path == style_path):
+ # Skip style list dividers, e.g. "STYLES.CSV"
+ if style.name.lower().strip("# ") in csv_names:
+ continue
+ # Write style fields, ignoring the path field
+ writer.writerow(
+ {k: v for k, v in style._asdict().items() if k != "path"}
+ )
def extract_styles_from_prompt(self, prompt, negative_prompt):
extracted = []
@@ -120,7 +223,9 @@ class StyleDatabase:
found_style = None
for style in applicable_styles:
- is_match, new_prompt, new_neg_prompt = extract_style_from_prompts(style, prompt, negative_prompt)
+ is_match, new_prompt, new_neg_prompt = extract_original_prompts(
+ style, prompt, negative_prompt
+ )
if is_match:
found_style = style
prompt = new_prompt
diff --git a/modules/textual_inversion/autocrop.py b/modules/textual_inversion/autocrop.py
index 1675e39a..e223a2e0 100644
--- a/modules/textual_inversion/autocrop.py
+++ b/modules/textual_inversion/autocrop.py
@@ -3,6 +3,8 @@ import requests
import os
import numpy as np
from PIL import ImageDraw
+from modules import paths_internal
+from pkg_resources import parse_version
GREEN = "#0F0"
BLUE = "#00F"
@@ -25,7 +27,6 @@ def crop_image(im, settings):
elif is_portrait(settings.crop_width, settings.crop_height):
scale_by = settings.crop_height / im.height
-
im = im.resize((int(im.width * scale_by), int(im.height * scale_by)))
im_debug = im.copy()
@@ -69,6 +70,7 @@ def crop_image(im, settings):
return results
+
def focal_point(im, settings):
corner_points = image_corner_points(im, settings) if settings.corner_points_weight > 0 else []
entropy_points = image_entropy_points(im, settings) if settings.entropy_points_weight > 0 else []
@@ -78,118 +80,120 @@ def focal_point(im, settings):
weight_pref_total = 0
if corner_points:
- weight_pref_total += settings.corner_points_weight
+ weight_pref_total += settings.corner_points_weight
if entropy_points:
- weight_pref_total += settings.entropy_points_weight
+ weight_pref_total += settings.entropy_points_weight
if face_points:
- weight_pref_total += settings.face_points_weight
+ weight_pref_total += settings.face_points_weight
corner_centroid = None
if corner_points:
- corner_centroid = centroid(corner_points)
- corner_centroid.weight = settings.corner_points_weight / weight_pref_total
- pois.append(corner_centroid)
+ corner_centroid = centroid(corner_points)
+ corner_centroid.weight = settings.corner_points_weight / weight_pref_total
+ pois.append(corner_centroid)
entropy_centroid = None
if entropy_points:
- entropy_centroid = centroid(entropy_points)
- entropy_centroid.weight = settings.entropy_points_weight / weight_pref_total
- pois.append(entropy_centroid)
+ entropy_centroid = centroid(entropy_points)
+ entropy_centroid.weight = settings.entropy_points_weight / weight_pref_total
+ pois.append(entropy_centroid)
face_centroid = None
if face_points:
- face_centroid = centroid(face_points)
- face_centroid.weight = settings.face_points_weight / weight_pref_total
- pois.append(face_centroid)
+ face_centroid = centroid(face_points)
+ face_centroid.weight = settings.face_points_weight / weight_pref_total
+ pois.append(face_centroid)
average_point = poi_average(pois, settings)
if settings.annotate_image:
- d = ImageDraw.Draw(im)
- max_size = min(im.width, im.height) * 0.07
- if corner_centroid is not None:
- color = BLUE
- box = corner_centroid.bounding(max_size * corner_centroid.weight)
- d.text((box[0], box[1]-15), f"Edge: {corner_centroid.weight:.02f}", fill=color)
- d.ellipse(box, outline=color)
- if len(corner_points) > 1:
- for f in corner_points:
- d.rectangle(f.bounding(4), outline=color)
- if entropy_centroid is not None:
- color = "#ff0"
- box = entropy_centroid.bounding(max_size * entropy_centroid.weight)
- d.text((box[0], box[1]-15), f"Entropy: {entropy_centroid.weight:.02f}", fill=color)
- d.ellipse(box, outline=color)
- if len(entropy_points) > 1:
- for f in entropy_points:
- d.rectangle(f.bounding(4), outline=color)
- if face_centroid is not None:
- color = RED
- box = face_centroid.bounding(max_size * face_centroid.weight)
- d.text((box[0], box[1]-15), f"Face: {face_centroid.weight:.02f}", fill=color)
- d.ellipse(box, outline=color)
- if len(face_points) > 1:
- for f in face_points:
- d.rectangle(f.bounding(4), outline=color)
-
- d.ellipse(average_point.bounding(max_size), outline=GREEN)
+ d = ImageDraw.Draw(im)
+ max_size = min(im.width, im.height) * 0.07
+ if corner_centroid is not None:
+ color = BLUE
+ box = corner_centroid.bounding(max_size * corner_centroid.weight)
+ d.text((box[0], box[1] - 15), f"Edge: {corner_centroid.weight:.02f}", fill=color)
+ d.ellipse(box, outline=color)
+ if len(corner_points) > 1:
+ for f in corner_points:
+ d.rectangle(f.bounding(4), outline=color)
+ if entropy_centroid is not None:
+ color = "#ff0"
+ box = entropy_centroid.bounding(max_size * entropy_centroid.weight)
+ d.text((box[0], box[1] - 15), f"Entropy: {entropy_centroid.weight:.02f}", fill=color)
+ d.ellipse(box, outline=color)
+ if len(entropy_points) > 1:
+ for f in entropy_points:
+ d.rectangle(f.bounding(4), outline=color)
+ if face_centroid is not None:
+ color = RED
+ box = face_centroid.bounding(max_size * face_centroid.weight)
+ d.text((box[0], box[1] - 15), f"Face: {face_centroid.weight:.02f}", fill=color)
+ d.ellipse(box, outline=color)
+ if len(face_points) > 1:
+ for f in face_points:
+ d.rectangle(f.bounding(4), outline=color)
+
+ d.ellipse(average_point.bounding(max_size), outline=GREEN)
return average_point
def image_face_points(im, settings):
if settings.dnn_model_path is not None:
- detector = cv2.FaceDetectorYN.create(
- settings.dnn_model_path,
- "",
- (im.width, im.height),
- 0.9, # score threshold
- 0.3, # nms threshold
- 5000 # keep top k before nms
- )
- faces = detector.detect(np.array(im))
- results = []
- if faces[1] is not None:
- for face in faces[1]:
- x = face[0]
- y = face[1]
- w = face[2]
- h = face[3]
- results.append(
- PointOfInterest(
- int(x + (w * 0.5)), # face focus left/right is center
- int(y + (h * 0.33)), # face focus up/down is close to the top of the head
- size = w,
- weight = 1/len(faces[1])
- )
- )
- return results
+ detector = cv2.FaceDetectorYN.create(
+ settings.dnn_model_path,
+ "",
+ (im.width, im.height),
+ 0.9, # score threshold
+ 0.3, # nms threshold
+ 5000 # keep top k before nms
+ )
+ faces = detector.detect(np.array(im))
+ results = []
+ if faces[1] is not None:
+ for face in faces[1]:
+ x = face[0]
+ y = face[1]
+ w = face[2]
+ h = face[3]
+ results.append(
+ PointOfInterest(
+ int(x + (w * 0.5)), # face focus left/right is center
+ int(y + (h * 0.33)), # face focus up/down is close to the top of the head
+ size=w,
+ weight=1 / len(faces[1])
+ )
+ )
+ return results
else:
- np_im = np.array(im)
- gray = cv2.cvtColor(np_im, cv2.COLOR_BGR2GRAY)
-
- tries = [
- [ f'{cv2.data.haarcascades}haarcascade_eye.xml', 0.01 ],
- [ f'{cv2.data.haarcascades}haarcascade_frontalface_default.xml', 0.05 ],
- [ f'{cv2.data.haarcascades}haarcascade_profileface.xml', 0.05 ],
- [ f'{cv2.data.haarcascades}haarcascade_frontalface_alt.xml', 0.05 ],
- [ f'{cv2.data.haarcascades}haarcascade_frontalface_alt2.xml', 0.05 ],
- [ f'{cv2.data.haarcascades}haarcascade_frontalface_alt_tree.xml', 0.05 ],
- [ f'{cv2.data.haarcascades}haarcascade_eye_tree_eyeglasses.xml', 0.05 ],
- [ f'{cv2.data.haarcascades}haarcascade_upperbody.xml', 0.05 ]
- ]
- for t in tries:
- classifier = cv2.CascadeClassifier(t[0])
- minsize = int(min(im.width, im.height) * t[1]) # at least N percent of the smallest side
- try:
- faces = classifier.detectMultiScale(gray, scaleFactor=1.1,
- minNeighbors=7, minSize=(minsize, minsize), flags=cv2.CASCADE_SCALE_IMAGE)
- except Exception:
- continue
-
- if faces:
- rects = [[f[0], f[1], f[0] + f[2], f[1] + f[3]] for f in faces]
- return [PointOfInterest((r[0] +r[2]) // 2, (r[1] + r[3]) // 2, size=abs(r[0]-r[2]), weight=1/len(rects)) for r in rects]
+ np_im = np.array(im)
+ gray = cv2.cvtColor(np_im, cv2.COLOR_BGR2GRAY)
+
+ tries = [
+ [f'{cv2.data.haarcascades}haarcascade_eye.xml', 0.01],
+ [f'{cv2.data.haarcascades}haarcascade_frontalface_default.xml', 0.05],
+ [f'{cv2.data.haarcascades}haarcascade_profileface.xml', 0.05],
+ [f'{cv2.data.haarcascades}haarcascade_frontalface_alt.xml', 0.05],
+ [f'{cv2.data.haarcascades}haarcascade_frontalface_alt2.xml', 0.05],
+ [f'{cv2.data.haarcascades}haarcascade_frontalface_alt_tree.xml', 0.05],
+ [f'{cv2.data.haarcascades}haarcascade_eye_tree_eyeglasses.xml', 0.05],
+ [f'{cv2.data.haarcascades}haarcascade_upperbody.xml', 0.05]
+ ]
+ for t in tries:
+ classifier = cv2.CascadeClassifier(t[0])
+ minsize = int(min(im.width, im.height) * t[1]) # at least N percent of the smallest side
+ try:
+ faces = classifier.detectMultiScale(gray, scaleFactor=1.1,
+ minNeighbors=7, minSize=(minsize, minsize),
+ flags=cv2.CASCADE_SCALE_IMAGE)
+ except Exception:
+ continue
+
+ if faces:
+ rects = [[f[0], f[1], f[0] + f[2], f[1] + f[3]] for f in faces]
+ return [PointOfInterest((r[0] + r[2]) // 2, (r[1] + r[3]) // 2, size=abs(r[0] - r[2]),
+ weight=1 / len(rects)) for r in rects]
return []
@@ -198,7 +202,7 @@ def image_corner_points(im, settings):
# naive attempt at preventing focal points from collecting at watermarks near the bottom
gd = ImageDraw.Draw(grayscale)
- gd.rectangle([0, im.height*.9, im.width, im.height], fill="#999")
+ gd.rectangle([0, im.height * .9, im.width, im.height], fill="#999")
np_im = np.array(grayscale)
@@ -206,7 +210,7 @@ def image_corner_points(im, settings):
np_im,
maxCorners=100,
qualityLevel=0.04,
- minDistance=min(grayscale.width, grayscale.height)*0.06,
+ minDistance=min(grayscale.width, grayscale.height) * 0.06,
useHarrisDetector=False,
)
@@ -215,8 +219,8 @@ def image_corner_points(im, settings):
focal_points = []
for point in points:
- x, y = point.ravel()
- focal_points.append(PointOfInterest(x, y, size=4, weight=1/len(points)))
+ x, y = point.ravel()
+ focal_points.append(PointOfInterest(x, y, size=4, weight=1 / len(points)))
return focal_points
@@ -225,13 +229,13 @@ def image_entropy_points(im, settings):
landscape = im.height < im.width
portrait = im.height > im.width
if landscape:
- move_idx = [0, 2]
- move_max = im.size[0]
+ move_idx = [0, 2]
+ move_max = im.size[0]
elif portrait:
- move_idx = [1, 3]
- move_max = im.size[1]
+ move_idx = [1, 3]
+ move_max = im.size[1]
else:
- return []
+ return []
e_max = 0
crop_current = [0, 0, settings.crop_width, settings.crop_height]
@@ -241,14 +245,14 @@ def image_entropy_points(im, settings):
e = image_entropy(crop)
if (e > e_max):
- e_max = e
- crop_best = list(crop_current)
+ e_max = e
+ crop_best = list(crop_current)
crop_current[move_idx[0]] += 4
crop_current[move_idx[1]] += 4
- x_mid = int(crop_best[0] + settings.crop_width/2)
- y_mid = int(crop_best[1] + settings.crop_height/2)
+ x_mid = int(crop_best[0] + settings.crop_width / 2)
+ y_mid = int(crop_best[1] + settings.crop_height / 2)
return [PointOfInterest(x_mid, y_mid, size=25, weight=1.0)]
@@ -294,22 +298,23 @@ def is_square(w, h):
return w == h
-def download_and_cache_models(dirname):
- download_url = 'https://github.com/opencv/opencv_zoo/blob/91fb0290f50896f38a0ab1e558b74b16bc009428/models/face_detection_yunet/face_detection_yunet_2022mar.onnx?raw=true'
- model_file_name = 'face_detection_yunet.onnx'
+model_dir_opencv = os.path.join(paths_internal.models_path, 'opencv')
+if parse_version(cv2.__version__) >= parse_version('4.8'):
+ model_file_path = os.path.join(model_dir_opencv, 'face_detection_yunet_2023mar.onnx')
+ model_url = 'https://github.com/opencv/opencv_zoo/blob/b6e370b10f641879a87890d44e42173077154a05/models/face_detection_yunet/face_detection_yunet_2023mar.onnx?raw=true'
+else:
+ model_file_path = os.path.join(model_dir_opencv, 'face_detection_yunet.onnx')
+ model_url = 'https://github.com/opencv/opencv_zoo/blob/91fb0290f50896f38a0ab1e558b74b16bc009428/models/face_detection_yunet/face_detection_yunet_2022mar.onnx?raw=true'
- os.makedirs(dirname, exist_ok=True)
- cache_file = os.path.join(dirname, model_file_name)
- if not os.path.exists(cache_file):
- print(f"downloading face detection model from '{download_url}' to '{cache_file}'")
- response = requests.get(download_url)
- with open(cache_file, "wb") as f:
+def download_and_cache_models():
+ if not os.path.exists(model_file_path):
+ os.makedirs(model_dir_opencv, exist_ok=True)
+ print(f"downloading face detection model from '{model_url}' to '{model_file_path}'")
+ response = requests.get(model_url)
+ with open(model_file_path, "wb") as f:
f.write(response.content)
-
- if os.path.exists(cache_file):
- return cache_file
- return None
+ return model_file_path
class PointOfInterest:
diff --git a/modules/textual_inversion/preprocess.py b/modules/textual_inversion/preprocess.py
deleted file mode 100644
index dbd856bd..00000000
--- a/modules/textual_inversion/preprocess.py
+++ /dev/null
@@ -1,232 +0,0 @@
-import os
-from PIL import Image, ImageOps
-import math
-import tqdm
-
-from modules import paths, shared, images, deepbooru
-from modules.textual_inversion import autocrop
-
-
-def preprocess(id_task, process_src, process_dst, process_width, process_height, preprocess_txt_action, process_keep_original_size, process_flip, process_split, process_caption, process_caption_deepbooru=False, split_threshold=0.5, overlap_ratio=0.2, process_focal_crop=False, process_focal_crop_face_weight=0.9, process_focal_crop_entropy_weight=0.15, process_focal_crop_edges_weight=0.5, process_focal_crop_debug=False, process_multicrop=None, process_multicrop_mindim=None, process_multicrop_maxdim=None, process_multicrop_minarea=None, process_multicrop_maxarea=None, process_multicrop_objective=None, process_multicrop_threshold=None):
- try:
- if process_caption:
- shared.interrogator.load()
-
- if process_caption_deepbooru:
- deepbooru.model.start()
-
- preprocess_work(process_src, process_dst, process_width, process_height, preprocess_txt_action, process_keep_original_size, process_flip, process_split, process_caption, process_caption_deepbooru, split_threshold, overlap_ratio, process_focal_crop, process_focal_crop_face_weight, process_focal_crop_entropy_weight, process_focal_crop_edges_weight, process_focal_crop_debug, process_multicrop, process_multicrop_mindim, process_multicrop_maxdim, process_multicrop_minarea, process_multicrop_maxarea, process_multicrop_objective, process_multicrop_threshold)
-
- finally:
-
- if process_caption:
- shared.interrogator.send_blip_to_ram()
-
- if process_caption_deepbooru:
- deepbooru.model.stop()
-
-
-def listfiles(dirname):
- return os.listdir(dirname)
-
-
-class PreprocessParams:
- src = None
- dstdir = None
- subindex = 0
- flip = False
- process_caption = False
- process_caption_deepbooru = False
- preprocess_txt_action = None
-
-
-def save_pic_with_caption(image, index, params: PreprocessParams, existing_caption=None):
- caption = ""
-
- if params.process_caption:
- caption += shared.interrogator.generate_caption(image)
-
- if params.process_caption_deepbooru:
- if caption:
- caption += ", "
- caption += deepbooru.model.tag_multi(image)
-
- filename_part = params.src
- filename_part = os.path.splitext(filename_part)[0]
- filename_part = os.path.basename(filename_part)
-
- basename = f"{index:05}-{params.subindex}-{filename_part}"
- image.save(os.path.join(params.dstdir, f"{basename}.png"))
-
- if params.preprocess_txt_action == 'prepend' and existing_caption:
- caption = f"{existing_caption} {caption}"
- elif params.preprocess_txt_action == 'append' and existing_caption:
- caption = f"{caption} {existing_caption}"
- elif params.preprocess_txt_action == 'copy' and existing_caption:
- caption = existing_caption
-
- caption = caption.strip()
-
- if caption:
- with open(os.path.join(params.dstdir, f"{basename}.txt"), "w", encoding="utf8") as file:
- file.write(caption)
-
- params.subindex += 1
-
-
-def save_pic(image, index, params, existing_caption=None):
- save_pic_with_caption(image, index, params, existing_caption=existing_caption)
-
- if params.flip:
- save_pic_with_caption(ImageOps.mirror(image), index, params, existing_caption=existing_caption)
-
-
-def split_pic(image, inverse_xy, width, height, overlap_ratio):
- if inverse_xy:
- from_w, from_h = image.height, image.width
- to_w, to_h = height, width
- else:
- from_w, from_h = image.width, image.height
- to_w, to_h = width, height
- h = from_h * to_w // from_w
- if inverse_xy:
- image = image.resize((h, to_w))
- else:
- image = image.resize((to_w, h))
-
- split_count = math.ceil((h - to_h * overlap_ratio) / (to_h * (1.0 - overlap_ratio)))
- y_step = (h - to_h) / (split_count - 1)
- for i in range(split_count):
- y = int(y_step * i)
- if inverse_xy:
- splitted = image.crop((y, 0, y + to_h, to_w))
- else:
- splitted = image.crop((0, y, to_w, y + to_h))
- yield splitted
-
-# not using torchvision.transforms.CenterCrop because it doesn't allow float regions
-def center_crop(image: Image, w: int, h: int):
- iw, ih = image.size
- if ih / h < iw / w:
- sw = w * ih / h
- box = (iw - sw) / 2, 0, iw - (iw - sw) / 2, ih
- else:
- sh = h * iw / w
- box = 0, (ih - sh) / 2, iw, ih - (ih - sh) / 2
- return image.resize((w, h), Image.Resampling.LANCZOS, box)
-
-
-def multicrop_pic(image: Image, mindim, maxdim, minarea, maxarea, objective, threshold):
- iw, ih = image.size
- err = lambda w, h: 1-(lambda x: x if x < 1 else 1/x)(iw/ih/(w/h))
- wh = max(((w, h) for w in range(mindim, maxdim+1, 64) for h in range(mindim, maxdim+1, 64)
- if minarea <= w * h <= maxarea and err(w, h) <= threshold),
- key= lambda wh: (wh[0]*wh[1], -err(*wh))[::1 if objective=='Maximize area' else -1],
- default=None
- )
- return wh and center_crop(image, *wh)
-
-
-def preprocess_work(process_src, process_dst, process_width, process_height, preprocess_txt_action, process_keep_original_size, process_flip, process_split, process_caption, process_caption_deepbooru=False, split_threshold=0.5, overlap_ratio=0.2, process_focal_crop=False, process_focal_crop_face_weight=0.9, process_focal_crop_entropy_weight=0.3, process_focal_crop_edges_weight=0.5, process_focal_crop_debug=False, process_multicrop=None, process_multicrop_mindim=None, process_multicrop_maxdim=None, process_multicrop_minarea=None, process_multicrop_maxarea=None, process_multicrop_objective=None, process_multicrop_threshold=None):
- width = process_width
- height = process_height
- src = os.path.abspath(process_src)
- dst = os.path.abspath(process_dst)
- split_threshold = max(0.0, min(1.0, split_threshold))
- overlap_ratio = max(0.0, min(0.9, overlap_ratio))
-
- assert src != dst, 'same directory specified as source and destination'
-
- os.makedirs(dst, exist_ok=True)
-
- files = listfiles(src)
-
- shared.state.job = "preprocess"
- shared.state.textinfo = "Preprocessing..."
- shared.state.job_count = len(files)
-
- params = PreprocessParams()
- params.dstdir = dst
- params.flip = process_flip
- params.process_caption = process_caption
- params.process_caption_deepbooru = process_caption_deepbooru
- params.preprocess_txt_action = preprocess_txt_action
-
- pbar = tqdm.tqdm(files)
- for index, imagefile in enumerate(pbar):
- params.subindex = 0
- filename = os.path.join(src, imagefile)
- try:
- img = Image.open(filename)
- img = ImageOps.exif_transpose(img)
- img = img.convert("RGB")
- except Exception:
- continue
-
- description = f"Preprocessing [Image {index}/{len(files)}]"
- pbar.set_description(description)
- shared.state.textinfo = description
-
- params.src = filename
-
- existing_caption = None
- existing_caption_filename = f"{os.path.splitext(filename)[0]}.txt"
- if os.path.exists(existing_caption_filename):
- with open(existing_caption_filename, 'r', encoding="utf8") as file:
- existing_caption = file.read()
-
- if shared.state.interrupted:
- break
-
- if img.height > img.width:
- ratio = (img.width * height) / (img.height * width)
- inverse_xy = False
- else:
- ratio = (img.height * width) / (img.width * height)
- inverse_xy = True
-
- process_default_resize = True
-
- if process_split and ratio < 1.0 and ratio <= split_threshold:
- for splitted in split_pic(img, inverse_xy, width, height, overlap_ratio):
- save_pic(splitted, index, params, existing_caption=existing_caption)
- process_default_resize = False
-
- if process_focal_crop and img.height != img.width:
-
- dnn_model_path = None
- try:
- dnn_model_path = autocrop.download_and_cache_models(os.path.join(paths.models_path, "opencv"))
- except Exception as e:
- print("Unable to load face detection model for auto crop selection. Falling back to lower quality haar method.", e)
-
- autocrop_settings = autocrop.Settings(
- crop_width = width,
- crop_height = height,
- face_points_weight = process_focal_crop_face_weight,
- entropy_points_weight = process_focal_crop_entropy_weight,
- corner_points_weight = process_focal_crop_edges_weight,
- annotate_image = process_focal_crop_debug,
- dnn_model_path = dnn_model_path,
- )
- for focal in autocrop.crop_image(img, autocrop_settings):
- save_pic(focal, index, params, existing_caption=existing_caption)
- process_default_resize = False
-
- if process_multicrop:
- cropped = multicrop_pic(img, process_multicrop_mindim, process_multicrop_maxdim, process_multicrop_minarea, process_multicrop_maxarea, process_multicrop_objective, process_multicrop_threshold)
- if cropped is not None:
- save_pic(cropped, index, params, existing_caption=existing_caption)
- else:
- print(f"skipped {img.width}x{img.height} image {filename} (can't find suitable size within error threshold)")
- process_default_resize = False
-
- if process_keep_original_size:
- save_pic(img, index, params, existing_caption=existing_caption)
- process_default_resize = False
-
- if process_default_resize:
- img = images.resize_image(1, img, width, height)
- save_pic(img, index, params, existing_caption=existing_caption)
-
- shared.state.nextjob()
diff --git a/modules/textual_inversion/ui.py b/modules/textual_inversion/ui.py
index 35c4feef..f149ad1f 100644
--- a/modules/textual_inversion/ui.py
+++ b/modules/textual_inversion/ui.py
@@ -3,7 +3,6 @@ import html
import gradio as gr
import modules.textual_inversion.textual_inversion
-import modules.textual_inversion.preprocess
from modules import sd_hijack, shared
@@ -15,12 +14,6 @@ def create_embedding(name, initialization_text, nvpt, overwrite_old):
return gr.Dropdown.update(choices=sorted(sd_hijack.model_hijack.embedding_db.word_embeddings.keys())), f"Created: {filename}", ""
-def preprocess(*args):
- modules.textual_inversion.preprocess.preprocess(*args)
-
- return f"Preprocessing {'interrupted' if shared.state.interrupted else 'finished'}.", ""
-
-
def train_embedding(*args):
assert not shared.cmd_opts.lowvram, 'Training models with lowvram not possible'
diff --git a/modules/ui.py b/modules/ui.py
index 08e0ad77..d80486dd 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -912,71 +912,6 @@ def create_ui():
with gr.Column():
create_hypernetwork = gr.Button(value="Create hypernetwork", variant='primary', elem_id="train_create_hypernetwork")
- with gr.Tab(label="Preprocess images", id="preprocess_images"):
- process_src = gr.Textbox(label='Source directory', elem_id="train_process_src")
- process_dst = gr.Textbox(label='Destination directory', elem_id="train_process_dst")
- process_width = gr.Slider(minimum=64, maximum=2048, step=8, label="Width", value=512, elem_id="train_process_width")
- process_height = gr.Slider(minimum=64, maximum=2048, step=8, label="Height", value=512, elem_id="train_process_height")
- preprocess_txt_action = gr.Dropdown(label='Existing Caption txt Action', value="ignore", choices=["ignore", "copy", "prepend", "append"], elem_id="train_preprocess_txt_action")
-
- with gr.Row():
- process_keep_original_size = gr.Checkbox(label='Keep original size', elem_id="train_process_keep_original_size")
- process_flip = gr.Checkbox(label='Create flipped copies', elem_id="train_process_flip")
- process_split = gr.Checkbox(label='Split oversized images', elem_id="train_process_split")
- process_focal_crop = gr.Checkbox(label='Auto focal point crop', elem_id="train_process_focal_crop")
- process_multicrop = gr.Checkbox(label='Auto-sized crop', elem_id="train_process_multicrop")
- process_caption = gr.Checkbox(label='Use BLIP for caption', elem_id="train_process_caption")
- process_caption_deepbooru = gr.Checkbox(label='Use deepbooru for caption', visible=True, elem_id="train_process_caption_deepbooru")
-
- with gr.Row(visible=False) as process_split_extra_row:
- process_split_threshold = gr.Slider(label='Split image threshold', value=0.5, minimum=0.0, maximum=1.0, step=0.05, elem_id="train_process_split_threshold")
- process_overlap_ratio = gr.Slider(label='Split image overlap ratio', value=0.2, minimum=0.0, maximum=0.9, step=0.05, elem_id="train_process_overlap_ratio")
-
- with gr.Row(visible=False) as process_focal_crop_row:
- process_focal_crop_face_weight = gr.Slider(label='Focal point face weight', value=0.9, minimum=0.0, maximum=1.0, step=0.05, elem_id="train_process_focal_crop_face_weight")
- process_focal_crop_entropy_weight = gr.Slider(label='Focal point entropy weight', value=0.15, minimum=0.0, maximum=1.0, step=0.05, elem_id="train_process_focal_crop_entropy_weight")
- process_focal_crop_edges_weight = gr.Slider(label='Focal point edges weight', value=0.5, minimum=0.0, maximum=1.0, step=0.05, elem_id="train_process_focal_crop_edges_weight")
- process_focal_crop_debug = gr.Checkbox(label='Create debug image', elem_id="train_process_focal_crop_debug")
-
- with gr.Column(visible=False) as process_multicrop_col:
- gr.Markdown('Each image is center-cropped with an automatically chosen width and height.')
- with gr.Row():
- process_multicrop_mindim = gr.Slider(minimum=64, maximum=2048, step=8, label="Dimension lower bound", value=384, elem_id="train_process_multicrop_mindim")
- process_multicrop_maxdim = gr.Slider(minimum=64, maximum=2048, step=8, label="Dimension upper bound", value=768, elem_id="train_process_multicrop_maxdim")
- with gr.Row():
- process_multicrop_minarea = gr.Slider(minimum=64*64, maximum=2048*2048, step=1, label="Area lower bound", value=64*64, elem_id="train_process_multicrop_minarea")
- process_multicrop_maxarea = gr.Slider(minimum=64*64, maximum=2048*2048, step=1, label="Area upper bound", value=640*640, elem_id="train_process_multicrop_maxarea")
- with gr.Row():
- process_multicrop_objective = gr.Radio(["Maximize area", "Minimize error"], value="Maximize area", label="Resizing objective", elem_id="train_process_multicrop_objective")
- process_multicrop_threshold = gr.Slider(minimum=0, maximum=1, step=0.01, label="Error threshold", value=0.1, elem_id="train_process_multicrop_threshold")
-
- with gr.Row():
- with gr.Column(scale=3):
- gr.HTML(value="")
-
- with gr.Column():
- with gr.Row():
- interrupt_preprocessing = gr.Button("Interrupt", elem_id="train_interrupt_preprocessing")
- run_preprocess = gr.Button(value="Preprocess", variant='primary', elem_id="train_run_preprocess")
-
- process_split.change(
- fn=lambda show: gr_show(show),
- inputs=[process_split],
- outputs=[process_split_extra_row],
- )
-
- process_focal_crop.change(
- fn=lambda show: gr_show(show),
- inputs=[process_focal_crop],
- outputs=[process_focal_crop_row],
- )
-
- process_multicrop.change(
- fn=lambda show: gr_show(show),
- inputs=[process_multicrop],
- outputs=[process_multicrop_col],
- )
-
def get_textual_inversion_template_names():
return sorted(textual_inversion.textual_inversion_templates)
@@ -1077,42 +1012,6 @@ def create_ui():
]
)
- run_preprocess.click(
- fn=wrap_gradio_gpu_call(textual_inversion_ui.preprocess, extra_outputs=[gr.update()]),
- _js="start_training_textual_inversion",
- inputs=[
- dummy_component,
- process_src,
- process_dst,
- process_width,
- process_height,
- preprocess_txt_action,
- process_keep_original_size,
- process_flip,
- process_split,
- process_caption,
- process_caption_deepbooru,
- process_split_threshold,
- process_overlap_ratio,
- process_focal_crop,
- process_focal_crop_face_weight,
- process_focal_crop_entropy_weight,
- process_focal_crop_edges_weight,
- process_focal_crop_debug,
- process_multicrop,
- process_multicrop_mindim,
- process_multicrop_maxdim,
- process_multicrop_minarea,
- process_multicrop_maxarea,
- process_multicrop_objective,
- process_multicrop_threshold,
- ],
- outputs=[
- ti_output,
- ti_outcome,
- ],
- )
-
train_embedding.click(
fn=wrap_gradio_gpu_call(textual_inversion_ui.train_embedding, extra_outputs=[gr.update()]),
_js="start_training_textual_inversion",
@@ -1186,12 +1085,6 @@ def create_ui():
outputs=[],
)
- interrupt_preprocessing.click(
- fn=lambda: shared.state.interrupt(),
- inputs=[],
- outputs=[],
- )
-
loadsave = ui_loadsave.UiLoadsave(cmd_opts.ui_config_file)
settings = ui_settings.UiSettings()
diff --git a/modules/ui_extensions.py b/modules/ui_extensions.py
index c0a73b57..dc1e34c8 100644
--- a/modules/ui_extensions.py
+++ b/modules/ui_extensions.py
@@ -65,7 +65,7 @@ def save_config_state(name):
filename = os.path.join(config_states_dir, f"{timestamp}_{name}.json")
print(f"Saving backup of webui/extension state to {filename}.")
with open(filename, "w", encoding="utf-8") as f:
- json.dump(current_config_state, f, indent=4)
+ json.dump(current_config_state, f, indent=4, ensure_ascii=False)
config_states.list_config_states()
new_value = next(iter(config_states.all_config_states.keys()), "Current")
new_choices = ["Current"] + list(config_states.all_config_states.keys())
@@ -335,6 +335,11 @@ def normalize_git_url(url):
return url
+def get_extension_dirname_from_url(url):
+ *parts, last_part = url.split('/')
+ return normalize_git_url(last_part)
+
+
def install_extension_from_url(dirname, url, branch_name=None):
check_access()
@@ -346,10 +351,7 @@ def install_extension_from_url(dirname, url, branch_name=None):
assert url, 'No URL specified'
if dirname is None or dirname == "":
- *parts, last_part = url.split('/')
- last_part = normalize_git_url(last_part)
-
- dirname = last_part
+ dirname = get_extension_dirname_from_url(url)
target_dir = os.path.join(extensions.extensions_dir, dirname)
assert not os.path.exists(target_dir), f'Extension directory already exists: {target_dir}'
@@ -449,7 +451,8 @@ def get_date(info: dict, key):
def refresh_available_extensions_from_data(hide_tags, sort_column, filter_text=""):
extlist = available_extensions["extensions"]
- installed_extension_urls = {normalize_git_url(extension.remote): extension.name for extension in extensions.extensions}
+ installed_extensions = {extension.name for extension in extensions.extensions}
+ installed_extension_urls = {normalize_git_url(extension.remote) for extension in extensions.extensions if extension.remote is not None}
tags = available_extensions.get("tags", {})
tags_to_hide = set(hide_tags)
@@ -482,7 +485,7 @@ def refresh_available_extensions_from_data(hide_tags, sort_column, filter_text="
if url is None:
continue
- existing = installed_extension_urls.get(normalize_git_url(url), None)
+ existing = get_extension_dirname_from_url(url) in installed_extensions or normalize_git_url(url) in installed_extension_urls
extension_tags = extension_tags + ["installed"] if existing else extension_tags
if any(x for x in extension_tags if x in tags_to_hide):
diff --git a/modules/ui_extra_networks.py b/modules/ui_extra_networks.py
index f03e2033..fe5d3ba3 100644
--- a/modules/ui_extra_networks.py
+++ b/modules/ui_extra_networks.py
@@ -151,8 +151,13 @@ class ExtraNetworksPage:
continue
subdir = os.path.abspath(x)[len(parentdir):].replace("\\", "/")
- while subdir.startswith("/"):
- subdir = subdir[1:]
+
+ if shared.opts.extra_networks_dir_button_function:
+ if not subdir.startswith("/"):
+ subdir = "/" + subdir
+ else:
+ while subdir.startswith("/"):
+ subdir = subdir[1:]
is_empty = len(os.listdir(x)) == 0
if not is_empty and not subdir.endswith("/"):
@@ -370,6 +375,9 @@ def create_ui(interface: gr.Blocks, unrelated_tabs, tabname):
for page in ui.stored_extra_pages:
with gr.Tab(page.title, elem_id=f"{tabname}_{page.id_page}", elem_classes=["extra-page"]) as tab:
+ with gr.Column(elem_id=f"{tabname}_{page.id_page}_prompts", elem_classes=["extra-page-prompts"]):
+ pass
+
elem_id = f"{tabname}_{page.id_page}_cards_html"
page_elem = gr.HTML('Loading...', elem_id=elem_id)
ui.pages.append(page_elem)
@@ -400,7 +408,7 @@ def create_ui(interface: gr.Blocks, unrelated_tabs, tabname):
allow_prompt = "true" if page.allow_prompt else "false"
allow_negative_prompt = "true" if page.allow_negative_prompt else "false"
- jscode = 'extraNetworksTabSelected("' + tabname + '", "' + f"{tabname}_{page.id_page}" + '", ' + allow_prompt + ', ' + allow_negative_prompt + ');'
+ jscode = 'extraNetworksTabSelected("' + tabname + '", "' + f"{tabname}_{page.id_page}_prompts" + '", ' + allow_prompt + ', ' + allow_negative_prompt + ');'
tab.select(fn=lambda: [gr.update(visible=True) for _ in tab_controls], _js='function(){ ' + jscode + ' }', inputs=[], outputs=tab_controls, show_progress=False)
diff --git a/modules/ui_extra_networks_user_metadata.py b/modules/ui_extra_networks_user_metadata.py
index bfec140c..36a807fc 100644
--- a/modules/ui_extra_networks_user_metadata.py
+++ b/modules/ui_extra_networks_user_metadata.py
@@ -134,7 +134,7 @@ class UserMetadataEditor:
basename, ext = os.path.splitext(filename)
with open(basename + '.json', "w", encoding="utf8") as file:
- json.dump(metadata, file, indent=4)
+ json.dump(metadata, file, indent=4, ensure_ascii=False)
def save_user_metadata(self, name, desc, notes):
user_metadata = self.get_user_metadata(name)
diff --git a/modules/ui_loadsave.py b/modules/ui_loadsave.py
index eb20ff25..7826786c 100644
--- a/modules/ui_loadsave.py
+++ b/modules/ui_loadsave.py
@@ -141,7 +141,7 @@ class UiLoadsave:
def write_to_file(self, current_ui_settings):
with open(self.filename, "w", encoding="utf8") as file:
- json.dump(current_ui_settings, file, indent=4)
+ json.dump(current_ui_settings, file, indent=4, ensure_ascii=False)
def dump_defaults(self):
"""saves default values to a file unless tjhe file is present and there was an error loading default values at start"""
diff --git a/modules/ui_postprocessing.py b/modules/ui_postprocessing.py
index 802e1ce7..13d888e4 100644
--- a/modules/ui_postprocessing.py
+++ b/modules/ui_postprocessing.py
@@ -1,9 +1,10 @@
import gradio as gr
-from modules import scripts, shared, ui_common, postprocessing, call_queue
+from modules import scripts, shared, ui_common, postprocessing, call_queue, ui_toprow
import modules.generation_parameters_copypaste as parameters_copypaste
def create_ui():
+ dummy_component = gr.Label(visible=False)
tab_index = gr.State(value=0)
with gr.Row(equal_height=False, variant='compact'):
@@ -20,11 +21,13 @@ def create_ui():
extras_batch_output_dir = gr.Textbox(label="Output directory", **shared.hide_dirs, placeholder="Leave blank to save images to the default path.", elem_id="extras_batch_output_dir")
show_extras_results = gr.Checkbox(label='Show result images', value=True, elem_id="extras_show_extras_results")
- submit = gr.Button('Generate', elem_id="extras_generate", variant='primary')
-
script_inputs = scripts.scripts_postproc.setup_ui()
with gr.Column():
+ toprow = ui_toprow.Toprow(is_compact=True, is_img2img=False, id_part="extras")
+ toprow.create_inline_toprow_image()
+ submit = toprow.submit
+
result_images, html_info_x, html_info, html_log = ui_common.create_output_panel("extras", shared.opts.outdir_extras_samples)
tab_single.select(fn=lambda: 0, inputs=[], outputs=[tab_index])
@@ -32,8 +35,10 @@ def create_ui():
tab_batch_dir.select(fn=lambda: 2, inputs=[], outputs=[tab_index])
submit.click(
- fn=call_queue.wrap_gradio_gpu_call(postprocessing.run_postprocessing, extra_outputs=[None, '']),
+ fn=call_queue.wrap_gradio_gpu_call(postprocessing.run_postprocessing_webui, extra_outputs=[None, '']),
+ _js="submit_extras",
inputs=[
+ dummy_component,
tab_index,
extras_image,
image_batch,
@@ -45,8 +50,9 @@ def create_ui():
outputs=[
result_images,
html_info_x,
- html_info,
- ]
+ html_log,
+ ],
+ show_progress=False,
)
parameters_copypaste.add_paste_fields("extras", extras_image, None)
diff --git a/modules/ui_toprow.py b/modules/ui_toprow.py
index 985b5a2d..9caf8faa 100644
--- a/modules/ui_toprow.py
+++ b/modules/ui_toprow.py
@@ -34,8 +34,10 @@ class Toprow:
submit_box = None
- def __init__(self, is_img2img, is_compact=False):
- id_part = "img2img" if is_img2img else "txt2img"
+ def __init__(self, is_img2img, is_compact=False, id_part=None):
+ if id_part is None:
+ id_part = "img2img" if is_img2img else "txt2img"
+
self.id_part = id_part
self.is_img2img = is_img2img
self.is_compact = is_compact
@@ -77,11 +79,11 @@ class Toprow:
def create_prompts(self):
with gr.Column(elem_id=f"{self.id_part}_prompt_container", elem_classes=["prompt-container-compact"] if self.is_compact else [], scale=6):
with gr.Row(elem_id=f"{self.id_part}_prompt_row", elem_classes=["prompt-row"]):
- self.prompt = gr.Textbox(label="Prompt", elem_id=f"{self.id_part}_prompt", show_label=False, lines=3, placeholder="Prompt (press Ctrl+Enter or Alt+Enter to generate)", elem_classes=["prompt"])
+ self.prompt = gr.Textbox(label="Prompt", elem_id=f"{self.id_part}_prompt", show_label=False, lines=3, placeholder="Prompt\n(Press Ctrl+Enter to generate, Alt+Enter to skip, Esc to interrupt)", elem_classes=["prompt"])
self.prompt_img = gr.File(label="", elem_id=f"{self.id_part}_prompt_image", file_count="single", type="binary", visible=False)
with gr.Row(elem_id=f"{self.id_part}_neg_prompt_row", elem_classes=["prompt-row"]):
- self.negative_prompt = gr.Textbox(label="Negative prompt", elem_id=f"{self.id_part}_neg_prompt", show_label=False, lines=3, placeholder="Negative prompt (press Ctrl+Enter or Alt+Enter to generate)", elem_classes=["prompt"])
+ self.negative_prompt = gr.Textbox(label="Negative prompt", elem_id=f"{self.id_part}_neg_prompt", show_label=False, lines=3, placeholder="Negative prompt\n(Press Ctrl+Enter to generate, Alt+Enter to skip, Esc to interrupt)", elem_classes=["prompt"])
self.prompt_img.change(
fn=modules.images.image_data,
diff --git a/modules/upscaler.py b/modules/upscaler.py
index e682bbaa..b256e085 100644
--- a/modules/upscaler.py
+++ b/modules/upscaler.py
@@ -57,6 +57,9 @@ class Upscaler:
dest_h = int((img.height * scale) // 8 * 8)
for _ in range(3):
+ if img.width >= dest_w and img.height >= dest_h:
+ break
+
shape = (img.width, img.height)
img = self.do_upscale(img, selected_model)
@@ -64,9 +67,6 @@ class Upscaler:
if shape == (img.width, img.height):
break
- if img.width >= dest_w and img.height >= dest_h:
- break
-
if img.width != dest_w or img.height != dest_h:
img = img.resize((int(dest_w), int(dest_h)), resample=LANCZOS)
diff --git a/modules/xpu_specific.py b/modules/xpu_specific.py
new file mode 100644
index 00000000..d8da94a0
--- /dev/null
+++ b/modules/xpu_specific.py
@@ -0,0 +1,59 @@
+from modules import shared
+from modules.sd_hijack_utils import CondFunc
+
+has_ipex = False
+try:
+ import torch
+ import intel_extension_for_pytorch as ipex # noqa: F401
+ has_ipex = True
+except Exception:
+ pass
+
+
+def check_for_xpu():
+ return has_ipex and hasattr(torch, 'xpu') and torch.xpu.is_available()
+
+
+def get_xpu_device_string():
+ if shared.cmd_opts.device_id is not None:
+ return f"xpu:{shared.cmd_opts.device_id}"
+ return "xpu"
+
+
+def torch_xpu_gc():
+ with torch.xpu.device(get_xpu_device_string()):
+ torch.xpu.empty_cache()
+
+
+has_xpu = check_for_xpu()
+
+if has_xpu:
+ # W/A for https://github.com/intel/intel-extension-for-pytorch/issues/452: torch.Generator API doesn't support XPU device
+ CondFunc('torch.Generator',
+ lambda orig_func, device=None: torch.xpu.Generator(device),
+ lambda orig_func, device=None: device is not None and device.type == "xpu")
+
+ # W/A for some OPs that could not handle different input dtypes
+ CondFunc('torch.nn.functional.layer_norm',
+ lambda orig_func, input, normalized_shape=None, weight=None, *args, **kwargs:
+ orig_func(input.to(weight.data.dtype), normalized_shape, weight, *args, **kwargs),
+ lambda orig_func, input, normalized_shape=None, weight=None, *args, **kwargs:
+ weight is not None and input.dtype != weight.data.dtype)
+ CondFunc('torch.nn.modules.GroupNorm.forward',
+ lambda orig_func, self, input: orig_func(self, input.to(self.weight.data.dtype)),
+ lambda orig_func, self, input: input.dtype != self.weight.data.dtype)
+ CondFunc('torch.nn.modules.linear.Linear.forward',
+ lambda orig_func, self, input: orig_func(self, input.to(self.weight.data.dtype)),
+ lambda orig_func, self, input: input.dtype != self.weight.data.dtype)
+ CondFunc('torch.nn.modules.conv.Conv2d.forward',
+ lambda orig_func, self, input: orig_func(self, input.to(self.weight.data.dtype)),
+ lambda orig_func, self, input: input.dtype != self.weight.data.dtype)
+ CondFunc('torch.bmm',
+ lambda orig_func, input, mat2, out=None: orig_func(input.to(mat2.dtype), mat2, out=out),
+ lambda orig_func, input, mat2, out=None: input.dtype != mat2.dtype)
+ CondFunc('torch.cat',
+ lambda orig_func, tensors, dim=0, out=None: orig_func([t.to(tensors[0].dtype) for t in tensors], dim=dim, out=out),
+ lambda orig_func, tensors, dim=0, out=None: not all(t.dtype == tensors[0].dtype for t in tensors))
+ CondFunc('torch.nn.functional.scaled_dot_product_attention',
+ lambda orig_func, query, key, value, attn_mask=None, dropout_p=0.0, is_causal=False: orig_func(query, key.to(query.dtype), value.to(query.dtype), attn_mask, dropout_p, is_causal),
+ lambda orig_func, query, key, value, attn_mask=None, dropout_p=0.0, is_causal=False: query.dtype != key.dtype or query.dtype != value.dtype)
diff --git a/script.js b/script.js
index c0e678ea..be1bc317 100644
--- a/script.js
+++ b/script.js
@@ -121,16 +121,22 @@ document.addEventListener("DOMContentLoaded", function() {
});
/**
- * Add a ctrl+enter as a shortcut to start a generation
+ * Add keyboard shortcuts:
+ * Ctrl+Enter to start/restart a generation
+ * Alt/Option+Enter to skip a generation
+ * Esc to interrupt a generation
*/
document.addEventListener('keydown', function(e) {
const isEnter = e.key === 'Enter' || e.keyCode === 13;
- const isModifierKey = e.metaKey || e.ctrlKey || e.altKey;
+ const isCtrlKey = e.metaKey || e.ctrlKey;
+ const isAltKey = e.altKey;
+ const isEsc = e.key === 'Escape';
- const interruptButton = get_uiCurrentTabContent().querySelector('button[id$=_interrupt]');
const generateButton = get_uiCurrentTabContent().querySelector('button[id$=_generate]');
+ const interruptButton = get_uiCurrentTabContent().querySelector('button[id$=_interrupt]');
+ const skipButton = get_uiCurrentTabContent().querySelector('button[id$=_skip]');
- if (isEnter && isModifierKey) {
+ if (isCtrlKey && isEnter) {
if (interruptButton.style.display === 'block') {
interruptButton.click();
const callback = (mutationList) => {
@@ -150,6 +156,21 @@ document.addEventListener('keydown', function(e) {
}
e.preventDefault();
}
+
+ if (isAltKey && isEnter) {
+ skipButton.click();
+ e.preventDefault();
+ }
+
+ if (isEsc) {
+ const globalPopup = document.querySelector('.global-popup');
+ const lightboxModal = document.querySelector('#lightboxModal');
+ if (!globalPopup || globalPopup.style.display === 'none') {
+ if (document.activeElement === lightboxModal) return;
+ interruptButton.click();
+ e.preventDefault();
+ }
+ }
});
/**
diff --git a/scripts/postprocessing_caption.py b/scripts/postprocessing_caption.py
new file mode 100644
index 00000000..243e3ad9
--- /dev/null
+++ b/scripts/postprocessing_caption.py
@@ -0,0 +1,30 @@
+from modules import scripts_postprocessing, ui_components, deepbooru, shared
+import gradio as gr
+
+
+class ScriptPostprocessingCeption(scripts_postprocessing.ScriptPostprocessing):
+ name = "Caption"
+ order = 4000
+
+ def ui(self):
+ with ui_components.InputAccordion(False, label="Caption") as enable:
+ option = gr.CheckboxGroup(value=["Deepbooru"], choices=["Deepbooru", "BLIP"], show_label=False)
+
+ return {
+ "enable": enable,
+ "option": option,
+ }
+
+ def process(self, pp: scripts_postprocessing.PostprocessedImage, enable, option):
+ if not enable:
+ return
+
+ captions = [pp.caption]
+
+ if "Deepbooru" in option:
+ captions.append(deepbooru.model.tag(pp.image))
+
+ if "BLIP" in option:
+ captions.append(shared.interrogator.generate_caption(pp.image))
+
+ pp.caption = ", ".join([x for x in captions if x])
diff --git a/scripts/postprocessing_codeformer.py b/scripts/postprocessing_codeformer.py
index a7d80d40..e1e156dd 100644
--- a/scripts/postprocessing_codeformer.py
+++ b/scripts/postprocessing_codeformer.py
@@ -1,28 +1,28 @@
from PIL import Image
import numpy as np
-from modules import scripts_postprocessing, codeformer_model
+from modules import scripts_postprocessing, codeformer_model, ui_components
import gradio as gr
-from modules.ui_components import FormRow
-
class ScriptPostprocessingCodeFormer(scripts_postprocessing.ScriptPostprocessing):
name = "CodeFormer"
order = 3000
def ui(self):
- with FormRow():
- codeformer_visibility = gr.Slider(minimum=0.0, maximum=1.0, step=0.001, label="CodeFormer visibility", value=0, elem_id="extras_codeformer_visibility")
- codeformer_weight = gr.Slider(minimum=0.0, maximum=1.0, step=0.001, label="CodeFormer weight (0 = maximum effect, 1 = minimum effect)", value=0, elem_id="extras_codeformer_weight")
+ with ui_components.InputAccordion(False, label="CodeFormer") as enable:
+ with gr.Row():
+ codeformer_visibility = gr.Slider(minimum=0.0, maximum=1.0, step=0.001, label="Visibility", value=1.0, elem_id="extras_codeformer_visibility")
+ codeformer_weight = gr.Slider(minimum=0.0, maximum=1.0, step=0.001, label="Weight (0 = maximum effect, 1 = minimum effect)", value=0, elem_id="extras_codeformer_weight")
return {
+ "enable": enable,
"codeformer_visibility": codeformer_visibility,
"codeformer_weight": codeformer_weight,
}
- def process(self, pp: scripts_postprocessing.PostprocessedImage, codeformer_visibility, codeformer_weight):
- if codeformer_visibility == 0:
+ def process(self, pp: scripts_postprocessing.PostprocessedImage, enable, codeformer_visibility, codeformer_weight):
+ if codeformer_visibility == 0 or not enable:
return
restored_img = codeformer_model.codeformer.restore(np.array(pp.image, dtype=np.uint8), w=codeformer_weight)
diff --git a/scripts/postprocessing_create_flipped_copies.py b/scripts/postprocessing_create_flipped_copies.py
new file mode 100644
index 00000000..3425571d
--- /dev/null
+++ b/scripts/postprocessing_create_flipped_copies.py
@@ -0,0 +1,32 @@
+from PIL import ImageOps, Image
+
+from modules import scripts_postprocessing, ui_components
+import gradio as gr
+
+
+class ScriptPostprocessingCreateFlippedCopies(scripts_postprocessing.ScriptPostprocessing):
+ name = "Create flipped copies"
+ order = 4000
+
+ def ui(self):
+ with ui_components.InputAccordion(False, label="Create flipped copies") as enable:
+ with gr.Row():
+ option = gr.CheckboxGroup(value=["Horizontal"], choices=["Horizontal", "Vertical", "Both"], show_label=False)
+
+ return {
+ "enable": enable,
+ "option": option,
+ }
+
+ def process(self, pp: scripts_postprocessing.PostprocessedImage, enable, option):
+ if not enable:
+ return
+
+ if "Horizontal" in option:
+ pp.extra_images.append(ImageOps.mirror(pp.image))
+
+ if "Vertical" in option:
+ pp.extra_images.append(pp.image.transpose(Image.Transpose.FLIP_TOP_BOTTOM))
+
+ if "Both" in option:
+ pp.extra_images.append(pp.image.transpose(Image.Transpose.FLIP_TOP_BOTTOM).transpose(Image.Transpose.FLIP_LEFT_RIGHT))
diff --git a/scripts/postprocessing_focal_crop.py b/scripts/postprocessing_focal_crop.py
new file mode 100644
index 00000000..d3baf298
--- /dev/null
+++ b/scripts/postprocessing_focal_crop.py
@@ -0,0 +1,54 @@
+
+from modules import scripts_postprocessing, ui_components, errors
+import gradio as gr
+
+from modules.textual_inversion import autocrop
+
+
+class ScriptPostprocessingFocalCrop(scripts_postprocessing.ScriptPostprocessing):
+ name = "Auto focal point crop"
+ order = 4000
+
+ def ui(self):
+ with ui_components.InputAccordion(False, label="Auto focal point crop") as enable:
+ face_weight = gr.Slider(label='Focal point face weight', value=0.9, minimum=0.0, maximum=1.0, step=0.05, elem_id="postprocess_focal_crop_face_weight")
+ entropy_weight = gr.Slider(label='Focal point entropy weight', value=0.15, minimum=0.0, maximum=1.0, step=0.05, elem_id="postprocess_focal_crop_entropy_weight")
+ edges_weight = gr.Slider(label='Focal point edges weight', value=0.5, minimum=0.0, maximum=1.0, step=0.05, elem_id="postprocess_focal_crop_edges_weight")
+ debug = gr.Checkbox(label='Create debug image', elem_id="train_process_focal_crop_debug")
+
+ return {
+ "enable": enable,
+ "face_weight": face_weight,
+ "entropy_weight": entropy_weight,
+ "edges_weight": edges_weight,
+ "debug": debug,
+ }
+
+ def process(self, pp: scripts_postprocessing.PostprocessedImage, enable, face_weight, entropy_weight, edges_weight, debug):
+ if not enable:
+ return
+
+ if not pp.shared.target_width or not pp.shared.target_height:
+ return
+
+ dnn_model_path = None
+ try:
+ dnn_model_path = autocrop.download_and_cache_models()
+ except Exception:
+ errors.report("Unable to load face detection model for auto crop selection. Falling back to lower quality haar method.", exc_info=True)
+
+ autocrop_settings = autocrop.Settings(
+ crop_width=pp.shared.target_width,
+ crop_height=pp.shared.target_height,
+ face_points_weight=face_weight,
+ entropy_points_weight=entropy_weight,
+ corner_points_weight=edges_weight,
+ annotate_image=debug,
+ dnn_model_path=dnn_model_path,
+ )
+
+ result, *others = autocrop.crop_image(pp.image, autocrop_settings)
+
+ pp.image = result
+ pp.extra_images = [pp.create_copy(x, nametags=["focal-crop-debug"], disable_processing=True) for x in others]
+
diff --git a/scripts/postprocessing_gfpgan.py b/scripts/postprocessing_gfpgan.py
index d854f3f7..6e756605 100644
--- a/scripts/postprocessing_gfpgan.py
+++ b/scripts/postprocessing_gfpgan.py
@@ -1,26 +1,25 @@
from PIL import Image
import numpy as np
-from modules import scripts_postprocessing, gfpgan_model
+from modules import scripts_postprocessing, gfpgan_model, ui_components
import gradio as gr
-from modules.ui_components import FormRow
-
class ScriptPostprocessingGfpGan(scripts_postprocessing.ScriptPostprocessing):
name = "GFPGAN"
order = 2000
def ui(self):
- with FormRow():
- gfpgan_visibility = gr.Slider(minimum=0.0, maximum=1.0, step=0.001, label="GFPGAN visibility", value=0, elem_id="extras_gfpgan_visibility")
+ with ui_components.InputAccordion(False, label="GFPGAN") as enable:
+ gfpgan_visibility = gr.Slider(minimum=0.0, maximum=1.0, step=0.001, label="Visibility", value=1.0, elem_id="extras_gfpgan_visibility")
return {
+ "enable": enable,
"gfpgan_visibility": gfpgan_visibility,
}
- def process(self, pp: scripts_postprocessing.PostprocessedImage, gfpgan_visibility):
- if gfpgan_visibility == 0:
+ def process(self, pp: scripts_postprocessing.PostprocessedImage, enable, gfpgan_visibility):
+ if gfpgan_visibility == 0 or not enable:
return
restored_img = gfpgan_model.gfpgan_fix_faces(np.array(pp.image, dtype=np.uint8))
diff --git a/scripts/postprocessing_split_oversized.py b/scripts/postprocessing_split_oversized.py
new file mode 100644
index 00000000..c4a03160
--- /dev/null
+++ b/scripts/postprocessing_split_oversized.py
@@ -0,0 +1,71 @@
+import math
+
+from modules import scripts_postprocessing, ui_components
+import gradio as gr
+
+
+def split_pic(image, inverse_xy, width, height, overlap_ratio):
+ if inverse_xy:
+ from_w, from_h = image.height, image.width
+ to_w, to_h = height, width
+ else:
+ from_w, from_h = image.width, image.height
+ to_w, to_h = width, height
+ h = from_h * to_w // from_w
+ if inverse_xy:
+ image = image.resize((h, to_w))
+ else:
+ image = image.resize((to_w, h))
+
+ split_count = math.ceil((h - to_h * overlap_ratio) / (to_h * (1.0 - overlap_ratio)))
+ y_step = (h - to_h) / (split_count - 1)
+ for i in range(split_count):
+ y = int(y_step * i)
+ if inverse_xy:
+ splitted = image.crop((y, 0, y + to_h, to_w))
+ else:
+ splitted = image.crop((0, y, to_w, y + to_h))
+ yield splitted
+
+
+class ScriptPostprocessingSplitOversized(scripts_postprocessing.ScriptPostprocessing):
+ name = "Split oversized images"
+ order = 4000
+
+ def ui(self):
+ with ui_components.InputAccordion(False, label="Split oversized images") as enable:
+ with gr.Row():
+ split_threshold = gr.Slider(label='Threshold', value=0.5, minimum=0.0, maximum=1.0, step=0.05, elem_id="postprocess_split_threshold")
+ overlap_ratio = gr.Slider(label='Overlap ratio', value=0.2, minimum=0.0, maximum=0.9, step=0.05, elem_id="postprocess_overlap_ratio")
+
+ return {
+ "enable": enable,
+ "split_threshold": split_threshold,
+ "overlap_ratio": overlap_ratio,
+ }
+
+ def process(self, pp: scripts_postprocessing.PostprocessedImage, enable, split_threshold, overlap_ratio):
+ if not enable:
+ return
+
+ width = pp.shared.target_width
+ height = pp.shared.target_height
+
+ if not width or not height:
+ return
+
+ if pp.image.height > pp.image.width:
+ ratio = (pp.image.width * height) / (pp.image.height * width)
+ inverse_xy = False
+ else:
+ ratio = (pp.image.height * width) / (pp.image.width * height)
+ inverse_xy = True
+
+ if ratio >= 1.0 and ratio > split_threshold:
+ return
+
+ result, *others = split_pic(pp.image, inverse_xy, width, height, overlap_ratio)
+
+ pp.image = result
+ pp.extra_images = [pp.create_copy(x) for x in others]
+
diff --git a/scripts/postprocessing_upscale.py b/scripts/postprocessing_upscale.py
index eb42a29e..ed709688 100644
--- a/scripts/postprocessing_upscale.py
+++ b/scripts/postprocessing_upscale.py
@@ -81,6 +81,14 @@ class ScriptPostprocessingUpscale(scripts_postprocessing.ScriptPostprocessing):
return image
+ def process_firstpass(self, pp: scripts_postprocessing.PostprocessedImage, upscale_mode=1, upscale_by=2.0, upscale_to_width=None, upscale_to_height=None, upscale_crop=False, upscaler_1_name=None, upscaler_2_name=None, upscaler_2_visibility=0.0):
+ if upscale_mode == 1:
+ pp.shared.target_width = upscale_to_width
+ pp.shared.target_height = upscale_to_height
+ else:
+ pp.shared.target_width = int(pp.image.width * upscale_by)
+ pp.shared.target_height = int(pp.image.height * upscale_by)
+
def process(self, pp: scripts_postprocessing.PostprocessedImage, upscale_mode=1, upscale_by=2.0, upscale_to_width=None, upscale_to_height=None, upscale_crop=False, upscaler_1_name=None, upscaler_2_name=None, upscaler_2_visibility=0.0):
if upscaler_1_name == "None":
upscaler_1_name = None
@@ -126,6 +134,10 @@ class ScriptPostprocessingUpscaleSimple(ScriptPostprocessingUpscale):
"upscaler_name": upscaler_name,
}
+ def process_firstpass(self, pp: scripts_postprocessing.PostprocessedImage, upscale_by=2.0, upscaler_name=None):
+ pp.shared.target_width = int(pp.image.width * upscale_by)
+ pp.shared.target_height = int(pp.image.height * upscale_by)
+
def process(self, pp: scripts_postprocessing.PostprocessedImage, upscale_by=2.0, upscaler_name=None):
if upscaler_name is None or upscaler_name == "None":
return
diff --git a/scripts/processing_autosized_crop.py b/scripts/processing_autosized_crop.py
new file mode 100644
index 00000000..c0980226
--- /dev/null
+++ b/scripts/processing_autosized_crop.py
@@ -0,0 +1,64 @@
+from PIL import Image
+
+from modules import scripts_postprocessing, ui_components
+import gradio as gr
+
+
+def center_crop(image: Image, w: int, h: int):
+ iw, ih = image.size
+ if ih / h < iw / w:
+ sw = w * ih / h
+ box = (iw - sw) / 2, 0, iw - (iw - sw) / 2, ih
+ else:
+ sh = h * iw / w
+ box = 0, (ih - sh) / 2, iw, ih - (ih - sh) / 2
+ return image.resize((w, h), Image.Resampling.LANCZOS, box)
+
+
+def multicrop_pic(image: Image, mindim, maxdim, minarea, maxarea, objective, threshold):
+ iw, ih = image.size
+ err = lambda w, h: 1 - (lambda x: x if x < 1 else 1 / x)(iw / ih / (w / h))
+ wh = max(((w, h) for w in range(mindim, maxdim + 1, 64) for h in range(mindim, maxdim + 1, 64)
+ if minarea <= w * h <= maxarea and err(w, h) <= threshold),
+ key=lambda wh: (wh[0] * wh[1], -err(*wh))[::1 if objective == 'Maximize area' else -1],
+ default=None
+ )
+ return wh and center_crop(image, *wh)
+
+
+class ScriptPostprocessingAutosizedCrop(scripts_postprocessing.ScriptPostprocessing):
+ name = "Auto-sized crop"
+ order = 4000
+
+ def ui(self):
+ with ui_components.InputAccordion(False, label="Auto-sized crop") as enable:
+ gr.Markdown('Each image is center-cropped with an automatically chosen width and height.')
+ with gr.Row():
+ mindim = gr.Slider(minimum=64, maximum=2048, step=8, label="Dimension lower bound", value=384, elem_id="postprocess_multicrop_mindim")
+ maxdim = gr.Slider(minimum=64, maximum=2048, step=8, label="Dimension upper bound", value=768, elem_id="postprocess_multicrop_maxdim")
+ with gr.Row():
+ minarea = gr.Slider(minimum=64 * 64, maximum=2048 * 2048, step=1, label="Area lower bound", value=64 * 64, elem_id="postprocess_multicrop_minarea")
+ maxarea = gr.Slider(minimum=64 * 64, maximum=2048 * 2048, step=1, label="Area upper bound", value=640 * 640, elem_id="postprocess_multicrop_maxarea")
+ with gr.Row():
+ objective = gr.Radio(["Maximize area", "Minimize error"], value="Maximize area", label="Resizing objective", elem_id="postprocess_multicrop_objective")
+ threshold = gr.Slider(minimum=0, maximum=1, step=0.01, label="Error threshold", value=0.1, elem_id="postprocess_multicrop_threshold")
+
+ return {
+ "enable": enable,
+ "mindim": mindim,
+ "maxdim": maxdim,
+ "minarea": minarea,
+ "maxarea": maxarea,
+ "objective": objective,
+ "threshold": threshold,
+ }
+
+ def process(self, pp: scripts_postprocessing.PostprocessedImage, enable, mindim, maxdim, minarea, maxarea, objective, threshold):
+ if not enable:
+ return
+
+ cropped = multicrop_pic(pp.image, mindim, maxdim, minarea, maxarea, objective, threshold)
+ if cropped is not None:
+ pp.image = cropped
+ else:
+ print(f"skipped {pp.image.width}x{pp.image.height} image (can't find suitable size within error threshold)")
diff --git a/scripts/soft_inpainting.py b/scripts/soft_inpainting.py
new file mode 100644
index 00000000..d9024344
--- /dev/null
+++ b/scripts/soft_inpainting.py
@@ -0,0 +1,747 @@
+import numpy as np
+import gradio as gr
+import math
+from modules.ui_components import InputAccordion
+import modules.scripts as scripts
+
+
+class SoftInpaintingSettings:
+ def __init__(self,
+ mask_blend_power,
+ mask_blend_scale,
+ inpaint_detail_preservation,
+ composite_mask_influence,
+ composite_difference_threshold,
+ composite_difference_contrast):
+ self.mask_blend_power = mask_blend_power
+ self.mask_blend_scale = mask_blend_scale
+ self.inpaint_detail_preservation = inpaint_detail_preservation
+ self.composite_mask_influence = composite_mask_influence
+ self.composite_difference_threshold = composite_difference_threshold
+ self.composite_difference_contrast = composite_difference_contrast
+
+ def add_generation_params(self, dest):
+ dest[enabled_gen_param_label] = True
+ dest[gen_param_labels.mask_blend_power] = self.mask_blend_power
+ dest[gen_param_labels.mask_blend_scale] = self.mask_blend_scale
+ dest[gen_param_labels.inpaint_detail_preservation] = self.inpaint_detail_preservation
+ dest[gen_param_labels.composite_mask_influence] = self.composite_mask_influence
+ dest[gen_param_labels.composite_difference_threshold] = self.composite_difference_threshold
+ dest[gen_param_labels.composite_difference_contrast] = self.composite_difference_contrast
+
+
+# ------------------- Methods -------------------
+
+def processing_uses_inpainting(p):
+ # TODO: Figure out a better way to determine if inpainting is being used by p
+ if getattr(p, "image_mask", None) is not None:
+ return True
+
+ if getattr(p, "mask", None) is not None:
+ return True
+
+ if getattr(p, "nmask", None) is not None:
+ return True
+
+ return False
+
+
+def latent_blend(settings, a, b, t):
+ """
+ Interpolates two latent image representations according to the parameter t,
+ where the interpolated vectors' magnitudes are also interpolated separately.
+ The "detail_preservation" factor biases the magnitude interpolation towards
+ the larger of the two magnitudes.
+ """
+ import torch
+
+ # NOTE: We use inplace operations wherever possible.
+
+ # [4][w][h] to [1][4][w][h]
+ t2 = t.unsqueeze(0)
+ # [4][w][h] to [1][1][w][h] - the [4] seem redundant.
+ t3 = t[0].unsqueeze(0).unsqueeze(0)
+
+ one_minus_t2 = 1 - t2
+ one_minus_t3 = 1 - t3
+
+ # Linearly interpolate the image vectors.
+ a_scaled = a * one_minus_t2
+ b_scaled = b * t2
+ image_interp = a_scaled
+ image_interp.add_(b_scaled)
+ result_type = image_interp.dtype
+ del a_scaled, b_scaled, t2, one_minus_t2
+
+ # Calculate the magnitude of the interpolated vectors. (We will remove this magnitude.)
+ # 64-bit operations are used here to allow large exponents.
+ current_magnitude = torch.norm(image_interp, p=2, dim=1, keepdim=True).to(torch.float64).add_(0.00001)
+
+ # Interpolate the powered magnitudes, then un-power them (bring them back to a power of 1).
+ a_magnitude = torch.norm(a, p=2, dim=1, keepdim=True).to(torch.float64).pow_(
+ settings.inpaint_detail_preservation) * one_minus_t3
+ b_magnitude = torch.norm(b, p=2, dim=1, keepdim=True).to(torch.float64).pow_(
+ settings.inpaint_detail_preservation) * t3
+ desired_magnitude = a_magnitude
+ desired_magnitude.add_(b_magnitude).pow_(1 / settings.inpaint_detail_preservation)
+ del a_magnitude, b_magnitude, t3, one_minus_t3
+
+ # Change the linearly interpolated image vectors' magnitudes to the value we want.
+ # This is the last 64-bit operation.
+ image_interp_scaling_factor = desired_magnitude
+ image_interp_scaling_factor.div_(current_magnitude)
+ image_interp_scaling_factor = image_interp_scaling_factor.to(result_type)
+ image_interp_scaled = image_interp
+ image_interp_scaled.mul_(image_interp_scaling_factor)
+ del current_magnitude
+ del desired_magnitude
+ del image_interp
+ del image_interp_scaling_factor
+ del result_type
+
+ return image_interp_scaled
+
+
+def get_modified_nmask(settings, nmask, sigma):
+ """
+ Converts a negative mask representing the transparency of the original latent vectors being overlayed
+ to a mask that is scaled according to the denoising strength for this step.
+
+ Where:
+ 0 = fully opaque, infinite density, fully masked
+ 1 = fully transparent, zero density, fully unmasked
+
+ We bring this transparency to a power, as this allows one to simulate N number of blending operations
+ where N can be any positive real value. Using this one can control the balance of influence between
+ the denoiser and the original latents according to the sigma value.
+
+ NOTE: "mask" is not used
+ """
+ import torch
+ return torch.pow(nmask, (sigma ** settings.mask_blend_power) * settings.mask_blend_scale)
+
+
+def apply_adaptive_masks(
+ settings: SoftInpaintingSettings,
+ nmask,
+ latent_orig,
+ latent_processed,
+ overlay_images,
+ width, height,
+ paste_to):
+ import torch
+ import modules.processing as proc
+ import modules.images as images
+ from PIL import Image, ImageOps, ImageFilter
+
+ # TODO: Bias the blending according to the latent mask, add adjustable parameter for bias control.
+ latent_mask = nmask[0].float()
+ # convert the original mask into a form we use to scale distances for thresholding
+ mask_scalar = 1 - (torch.clamp(latent_mask, min=0, max=1) ** (settings.mask_blend_scale / 2))
+ mask_scalar = (0.5 * (1 - settings.composite_mask_influence)
+ + mask_scalar * settings.composite_mask_influence)
+ mask_scalar = mask_scalar / (1.00001 - mask_scalar)
+ mask_scalar = mask_scalar.cpu().numpy()
+
+ latent_distance = torch.norm(latent_processed - latent_orig, p=2, dim=1)
+
+ kernel, kernel_center = get_gaussian_kernel(stddev_radius=1.5, max_radius=2)
+
+ masks_for_overlay = []
+
+ for i, (distance_map, overlay_image) in enumerate(zip(latent_distance, overlay_images)):
+ converted_mask = distance_map.float().cpu().numpy()
+ converted_mask = weighted_histogram_filter(converted_mask, kernel, kernel_center,
+ percentile_min=0.9, percentile_max=1, min_width=1)
+ converted_mask = weighted_histogram_filter(converted_mask, kernel, kernel_center,
+ percentile_min=0.25, percentile_max=0.75, min_width=1)
+
+ # The distance at which opacity of original decreases to 50%
+ half_weighted_distance = settings.composite_difference_threshold * mask_scalar
+ converted_mask = converted_mask / half_weighted_distance
+
+ converted_mask = 1 / (1 + converted_mask ** settings.composite_difference_contrast)
+ converted_mask = smootherstep(converted_mask)
+ converted_mask = 1 - converted_mask
+ converted_mask = 255. * converted_mask
+ converted_mask = converted_mask.astype(np.uint8)
+ converted_mask = Image.fromarray(converted_mask)
+ converted_mask = images.resize_image(2, converted_mask, width, height)
+ converted_mask = proc.create_binary_mask(converted_mask, round=False)
+
+ # Remove aliasing artifacts using a gaussian blur.
+ converted_mask = converted_mask.filter(ImageFilter.GaussianBlur(radius=4))
+
+ # Expand the mask to fit the whole image if needed.
+ if paste_to is not None:
+ converted_mask = proc.uncrop(converted_mask,
+ (overlay_image.width, overlay_image.height),
+ paste_to)
+
+ masks_for_overlay.append(converted_mask)
+
+ image_masked = Image.new('RGBa', (overlay_image.width, overlay_image.height))
+ image_masked.paste(overlay_image.convert("RGBA").convert("RGBa"),
+ mask=ImageOps.invert(converted_mask.convert('L')))
+
+ overlay_images[i] = image_masked.convert('RGBA')
+
+ return masks_for_overlay
+
+
+def apply_masks(
+ settings,
+ nmask,
+ overlay_images,
+ width, height,
+ paste_to):
+ import torch
+ import modules.processing as proc
+ import modules.images as images
+ from PIL import Image, ImageOps, ImageFilter
+
+ converted_mask = nmask[0].float()
+ converted_mask = torch.clamp(converted_mask, min=0, max=1).pow_(settings.mask_blend_scale / 2)
+ converted_mask = 255. * converted_mask
+ converted_mask = converted_mask.cpu().numpy().astype(np.uint8)
+ converted_mask = Image.fromarray(converted_mask)
+ converted_mask = images.resize_image(2, converted_mask, width, height)
+ converted_mask = proc.create_binary_mask(converted_mask, round=False)
+
+ # Remove aliasing artifacts using a gaussian blur.
+ converted_mask = converted_mask.filter(ImageFilter.GaussianBlur(radius=4))
+
+ # Expand the mask to fit the whole image if needed.
+ if paste_to is not None:
+ converted_mask = proc.uncrop(converted_mask,
+ (width, height),
+ paste_to)
+
+ masks_for_overlay = []
+
+ for i, overlay_image in enumerate(overlay_images):
+ masks_for_overlay[i] = converted_mask
+
+ image_masked = Image.new('RGBa', (overlay_image.width, overlay_image.height))
+ image_masked.paste(overlay_image.convert("RGBA").convert("RGBa"),
+ mask=ImageOps.invert(converted_mask.convert('L')))
+
+ overlay_images[i] = image_masked.convert('RGBA')
+
+ return masks_for_overlay
+
+
+def weighted_histogram_filter(img, kernel, kernel_center, percentile_min=0.0, percentile_max=1.0, min_width=1.0):
+ """
+ Generalization convolution filter capable of applying
+ weighted mean, median, maximum, and minimum filters
+ parametrically using an arbitrary kernel.
+
+ Args:
+ img (nparray):
+ The image, a 2-D array of floats, to which the filter is being applied.
+ kernel (nparray):
+ The kernel, a 2-D array of floats.
+ kernel_center (nparray):
+ The kernel center coordinate, a 1-D array with two elements.
+ percentile_min (float):
+ The lower bound of the histogram window used by the filter,
+ from 0 to 1.
+ percentile_max (float):
+ The upper bound of the histogram window used by the filter,
+ from 0 to 1.
+ min_width (float):
+ The minimum size of the histogram window bounds, in weight units.
+ Must be greater than 0.
+
+ Returns:
+ (nparray): A filtered copy of the input image "img", a 2-D array of floats.
+ """
+
+ # Converts an index tuple into a vector.
+ def vec(x):
+ return np.array(x)
+
+ kernel_min = -kernel_center
+ kernel_max = vec(kernel.shape) - kernel_center
+
+ def weighted_histogram_filter_single(idx):
+ idx = vec(idx)
+ min_index = np.maximum(0, idx + kernel_min)
+ max_index = np.minimum(vec(img.shape), idx + kernel_max)
+ window_shape = max_index - min_index
+
+ class WeightedElement:
+ """
+ An element of the histogram, its weight
+ and bounds.
+ """
+
+ def __init__(self, value, weight):
+ self.value: float = value
+ self.weight: float = weight
+ self.window_min: float = 0.0
+ self.window_max: float = 1.0
+
+ # Collect the values in the image as WeightedElements,
+ # weighted by their corresponding kernel values.
+ values = []
+ for window_tup in np.ndindex(tuple(window_shape)):
+ window_index = vec(window_tup)
+ image_index = window_index + min_index
+ centered_kernel_index = image_index - idx
+ kernel_index = centered_kernel_index + kernel_center
+ element = WeightedElement(img[tuple(image_index)], kernel[tuple(kernel_index)])
+ values.append(element)
+
+ def sort_key(x: WeightedElement):
+ return x.value
+
+ values.sort(key=sort_key)
+
+ # Calculate the height of the stack (sum)
+ # and each sample's range they occupy in the stack
+ sum = 0
+ for i in range(len(values)):
+ values[i].window_min = sum
+ sum += values[i].weight
+ values[i].window_max = sum
+
+ # Calculate what range of this stack ("window")
+ # we want to get the weighted average across.
+ window_min = sum * percentile_min
+ window_max = sum * percentile_max
+ window_width = window_max - window_min
+
+ # Ensure the window is within the stack and at least a certain size.
+ if window_width < min_width:
+ window_center = (window_min + window_max) / 2
+ window_min = window_center - min_width / 2
+ window_max = window_center + min_width / 2
+
+ if window_max > sum:
+ window_max = sum
+ window_min = sum - min_width
+
+ if window_min < 0:
+ window_min = 0
+ window_max = min_width
+
+ value = 0
+ value_weight = 0
+
+ # Get the weighted average of all the samples
+ # that overlap with the window, weighted
+ # by the size of their overlap.
+ for i in range(len(values)):
+ if window_min >= values[i].window_max:
+ continue
+ if window_max <= values[i].window_min:
+ break
+
+ s = max(window_min, values[i].window_min)
+ e = min(window_max, values[i].window_max)
+ w = e - s
+
+ value += values[i].value * w
+ value_weight += w
+
+ return value / value_weight if value_weight != 0 else 0
+
+ img_out = img.copy()
+
+ # Apply the kernel operation over each pixel.
+ for index in np.ndindex(img.shape):
+ img_out[index] = weighted_histogram_filter_single(index)
+
+ return img_out
+
+
+def smoothstep(x):
+ """
+ The smoothstep function, input should be clamped to 0-1 range.
+ Turns a diagonal line (f(x) = x) into a sigmoid-like curve.
+ """
+ return x * x * (3 - 2 * x)
+
+
+def smootherstep(x):
+ """
+ The smootherstep function, input should be clamped to 0-1 range.
+ Turns a diagonal line (f(x) = x) into a sigmoid-like curve.
+ """
+ return x * x * x * (x * (6 * x - 15) + 10)
+
+
+def get_gaussian_kernel(stddev_radius=1.0, max_radius=2):
+ """
+ Creates a Gaussian kernel with thresholded edges.
+
+ Args:
+ stddev_radius (float):
+ Standard deviation of the gaussian kernel, in pixels.
+ max_radius (int):
+ The size of the filter kernel. The number of pixels is (max_radius*2+1) ** 2.
+ The kernel is thresholded so that any values one pixel beyond this radius
+ is weighted at 0.
+
+ Returns:
+ (nparray, nparray): A kernel array (shape: (N, N)), its center coordinate (shape: (2))
+ """
+
+ # Evaluates a 0-1 normalized gaussian function for a given square distance from the mean.
+ def gaussian(sqr_mag):
+ return math.exp(-sqr_mag / (stddev_radius * stddev_radius))
+
+ # Helper function for converting a tuple to an array.
+ def vec(x):
+ return np.array(x)
+
+ """
+ Since a gaussian is unbounded, we need to limit ourselves
+ to a finite range.
+ We taper the ends off at the end of that range so they equal zero
+ while preserving the maximum value of 1 at the mean.
+ """
+ zero_radius = max_radius + 1.0
+ gauss_zero = gaussian(zero_radius * zero_radius)
+ gauss_kernel_scale = 1 / (1 - gauss_zero)
+
+ def gaussian_kernel_func(coordinate):
+ x = coordinate[0] ** 2.0 + coordinate[1] ** 2.0
+ x = gaussian(x)
+ x -= gauss_zero
+ x *= gauss_kernel_scale
+ x = max(0.0, x)
+ return x
+
+ size = max_radius * 2 + 1
+ kernel_center = max_radius
+ kernel = np.zeros((size, size))
+
+ for index in np.ndindex(kernel.shape):
+ kernel[index] = gaussian_kernel_func(vec(index) - kernel_center)
+
+ return kernel, kernel_center
+
+
+# ------------------- Constants -------------------
+
+
+default = SoftInpaintingSettings(1, 0.5, 4, 0, 0.5, 2)
+
+enabled_ui_label = "Soft inpainting"
+enabled_gen_param_label = "Soft inpainting enabled"
+enabled_el_id = "soft_inpainting_enabled"
+
+ui_labels = SoftInpaintingSettings(
+ "Schedule bias",
+ "Preservation strength",
+ "Transition contrast boost",
+ "Mask influence",
+ "Difference threshold",
+ "Difference contrast")
+
+ui_info = SoftInpaintingSettings(
+ "Shifts when preservation of original content occurs during denoising.",
+ "How strongly partially masked content should be preserved.",
+ "Amplifies the contrast that may be lost in partially masked regions.",
+ "How strongly the original mask should bias the difference threshold.",
+ "How much an image region can change before the original pixels are not blended in anymore.",
+ "How sharp the transition should be between blended and not blended.")
+
+gen_param_labels = SoftInpaintingSettings(
+ "Soft inpainting schedule bias",
+ "Soft inpainting preservation strength",
+ "Soft inpainting transition contrast boost",
+ "Soft inpainting mask influence",
+ "Soft inpainting difference threshold",
+ "Soft inpainting difference contrast")
+
+el_ids = SoftInpaintingSettings(
+ "mask_blend_power",
+ "mask_blend_scale",
+ "inpaint_detail_preservation",
+ "composite_mask_influence",
+ "composite_difference_threshold",
+ "composite_difference_contrast")
+
+
+# ------------------- Script -------------------
+
+
+class Script(scripts.Script):
+ def __init__(self):
+ self.section = "inpaint"
+ self.masks_for_overlay = None
+ self.overlay_images = None
+
+ def title(self):
+ return "Soft Inpainting"
+
+ def show(self, is_img2img):
+ return scripts.AlwaysVisible if is_img2img else False
+
+ def ui(self, is_img2img):
+ if not is_img2img:
+ return
+
+ with InputAccordion(False, label=enabled_ui_label, elem_id=enabled_el_id) as soft_inpainting_enabled:
+ with gr.Group():
+ gr.Markdown(
+ """
+ Soft inpainting allows you to **seamlessly blend original content with inpainted content** according to the mask opacity.
+ **High _Mask blur_** values are recommended!
+ """)
+
+ power = \
+ gr.Slider(label=ui_labels.mask_blend_power,
+ info=ui_info.mask_blend_power,
+ minimum=0,
+ maximum=8,
+ step=0.1,
+ value=default.mask_blend_power,
+ elem_id=el_ids.mask_blend_power)
+ scale = \
+ gr.Slider(label=ui_labels.mask_blend_scale,
+ info=ui_info.mask_blend_scale,
+ minimum=0,
+ maximum=8,
+ step=0.05,
+ value=default.mask_blend_scale,
+ elem_id=el_ids.mask_blend_scale)
+ detail = \
+ gr.Slider(label=ui_labels.inpaint_detail_preservation,
+ info=ui_info.inpaint_detail_preservation,
+ minimum=1,
+ maximum=32,
+ step=0.5,
+ value=default.inpaint_detail_preservation,
+ elem_id=el_ids.inpaint_detail_preservation)
+
+ gr.Markdown(
+ """
+ ### Pixel Composite Settings
+ """)
+
+ mask_inf = \
+ gr.Slider(label=ui_labels.composite_mask_influence,
+ info=ui_info.composite_mask_influence,
+ minimum=0,
+ maximum=1,
+ step=0.05,
+ value=default.composite_mask_influence,
+ elem_id=el_ids.composite_mask_influence)
+
+ dif_thresh = \
+ gr.Slider(label=ui_labels.composite_difference_threshold,
+ info=ui_info.composite_difference_threshold,
+ minimum=0,
+ maximum=8,
+ step=0.25,
+ value=default.composite_difference_threshold,
+ elem_id=el_ids.composite_difference_threshold)
+
+ dif_contr = \
+ gr.Slider(label=ui_labels.composite_difference_contrast,
+ info=ui_info.composite_difference_contrast,
+ minimum=0,
+ maximum=8,
+ step=0.25,
+ value=default.composite_difference_contrast,
+ elem_id=el_ids.composite_difference_contrast)
+
+ with gr.Accordion("Help", open=False):
+ gr.Markdown(
+ f"""
+ ### {ui_labels.mask_blend_power}
+
+ The blending strength of original content is scaled proportionally with the decreasing noise level values at each step (sigmas).
+ This ensures that the influence of the denoiser and original content preservation is roughly balanced at each step.
+ This balance can be shifted using this parameter, controlling whether earlier or later steps have stronger preservation.
+
+ - **Below 1**: Stronger preservation near the end (with low sigma)
+ - **1**: Balanced (proportional to sigma)
+ - **Above 1**: Stronger preservation in the beginning (with high sigma)
+ """)
+ gr.Markdown(
+ f"""
+ ### {ui_labels.mask_blend_scale}
+
+ Skews whether partially masked image regions should be more likely to preserve the original content or favor inpainted content.
+ This may need to be adjusted depending on the {ui_labels.mask_blend_power}, CFG Scale, prompt and Denoising strength.
+
+ - **Low values**: Favors generated content.
+ - **High values**: Favors original content.
+ """)
+ gr.Markdown(
+ f"""
+ ### {ui_labels.inpaint_detail_preservation}
+
+ This parameter controls how the original latent vectors and denoised latent vectors are interpolated.
+ With higher values, the magnitude of the resulting blended vector will be closer to the maximum of the two interpolated vectors.
+ This can prevent the loss of contrast that occurs with linear interpolation.
+
+ - **Low values**: Softer blending, details may fade.
+ - **High values**: Stronger contrast, may over-saturate colors.
+ """)
+
+ gr.Markdown(
+ """
+ ## Pixel Composite Settings
+
+ Masks are generated based on how much a part of the image changed after denoising.
+ These masks are used to blend the original and final images together.
+ If the difference is low, the original pixels are used instead of the pixels returned by the inpainting process.
+ """)
+
+ gr.Markdown(
+ f"""
+ ### {ui_labels.composite_mask_influence}
+
+ This parameter controls how much the mask should bias this sensitivity to difference.
+
+ - **0**: Ignore the mask, only consider differences in image content.
+ - **1**: Follow the mask closely despite image content changes.
+ """)
+
+ gr.Markdown(
+ f"""
+ ### {ui_labels.composite_difference_threshold}
+
+ This value represents the difference at which the original pixels will have less than 50% opacity.
+
+ - **Low values**: Two images patches must be almost the same in order to retain original pixels.
+ - **High values**: Two images patches can be very different and still retain original pixels.
+ """)
+
+ gr.Markdown(
+ f"""
+ ### {ui_labels.composite_difference_contrast}
+
+ This value represents the contrast between the opacity of the original and inpainted content.
+
+ - **Low values**: The blend will be more gradual and have longer transitions, but may cause ghosting.
+ - **High values**: Ghosting will be less common, but transitions may be very sudden.
+ """)
+
+ self.infotext_fields = [(soft_inpainting_enabled, enabled_gen_param_label),
+ (power, gen_param_labels.mask_blend_power),
+ (scale, gen_param_labels.mask_blend_scale),
+ (detail, gen_param_labels.inpaint_detail_preservation),
+ (mask_inf, gen_param_labels.composite_mask_influence),
+ (dif_thresh, gen_param_labels.composite_difference_threshold),
+ (dif_contr, gen_param_labels.composite_difference_contrast)]
+
+ self.paste_field_names = []
+ for _, field_name in self.infotext_fields:
+ self.paste_field_names.append(field_name)
+
+ return [soft_inpainting_enabled,
+ power,
+ scale,
+ detail,
+ mask_inf,
+ dif_thresh,
+ dif_contr]
+
+ def process(self, p, enabled, power, scale, detail_preservation, mask_inf, dif_thresh, dif_contr):
+ if not enabled:
+ return
+
+ if not processing_uses_inpainting(p):
+ return
+
+ # Shut off the rounding it normally does.
+ p.mask_round = False
+
+ settings = SoftInpaintingSettings(power, scale, detail_preservation, mask_inf, dif_thresh, dif_contr)
+
+ # p.extra_generation_params["Mask rounding"] = False
+ settings.add_generation_params(p.extra_generation_params)
+
+ def on_mask_blend(self, p, mba: scripts.MaskBlendArgs, enabled, power, scale, detail_preservation, mask_inf,
+ dif_thresh, dif_contr):
+ if not enabled:
+ return
+
+ if not processing_uses_inpainting(p):
+ return
+
+ if mba.is_final_blend:
+ mba.blended_latent = mba.current_latent
+ return
+
+ settings = SoftInpaintingSettings(power, scale, detail_preservation, mask_inf, dif_thresh, dif_contr)
+
+ # todo: Why is sigma 2D? Both values are the same.
+ mba.blended_latent = latent_blend(settings,
+ mba.init_latent,
+ mba.current_latent,
+ get_modified_nmask(settings, mba.nmask, mba.sigma[0]))
+
+ def post_sample(self, p, ps: scripts.PostSampleArgs, enabled, power, scale, detail_preservation, mask_inf,
+ dif_thresh, dif_contr):
+ if not enabled:
+ return
+
+ if not processing_uses_inpainting(p):
+ return
+
+ nmask = getattr(p, "nmask", None)
+ if nmask is None:
+ return
+
+ from modules import images
+ from modules.shared import opts
+
+ settings = SoftInpaintingSettings(power, scale, detail_preservation, mask_inf, dif_thresh, dif_contr)
+
+ # since the original code puts holes in the existing overlay images,
+ # we have to rebuild them.
+ self.overlay_images = []
+ for img in p.init_images:
+
+ image = images.flatten(img, opts.img2img_background_color)
+
+ if p.paste_to is None and p.resize_mode != 3:
+ image = images.resize_image(p.resize_mode, image, p.width, p.height)
+
+ self.overlay_images.append(image.convert('RGBA'))
+
+ if len(p.init_images) == 1:
+ self.overlay_images = self.overlay_images * p.batch_size
+
+ if getattr(ps.samples, 'already_decoded', False):
+ self.masks_for_overlay = apply_masks(settings=settings,
+ nmask=nmask,
+ overlay_images=self.overlay_images,
+ width=p.width,
+ height=p.height,
+ paste_to=p.paste_to)
+ else:
+ self.masks_for_overlay = apply_adaptive_masks(settings=settings,
+ nmask=nmask,
+ latent_orig=p.init_latent,
+ latent_processed=ps.samples,
+ overlay_images=self.overlay_images,
+ width=p.width,
+ height=p.height,
+ paste_to=p.paste_to)
+
+ def postprocess_maskoverlay(self, p, ppmo: scripts.PostProcessMaskOverlayArgs, enabled, power, scale,
+ detail_preservation, mask_inf, dif_thresh, dif_contr):
+ if not enabled:
+ return
+
+ if not processing_uses_inpainting(p):
+ return
+
+ if self.masks_for_overlay is None:
+ return
+
+ if self.overlay_images is None:
+ return
+
+ ppmo.mask_for_overlay = self.masks_for_overlay[ppmo.index]
+ ppmo.overlay_image = self.overlay_images[ppmo.index]
diff --git a/style.css b/style.css
index 73162022..ee39a57b 100644
--- a/style.css
+++ b/style.css
@@ -462,6 +462,15 @@ div.toprow-compact-tools{
padding: 4px;
}
+#settings > div.tab-nav .settings-category{
+ display: block;
+ margin: 1em 0 0.25em 0;
+ font-weight: bold;
+ text-decoration: underline;
+ cursor: default;
+ user-select: none;
+}
+
#settings_result{
height: 1.4em;
margin: 0 1.2em;
@@ -637,6 +646,8 @@ table.popup-table .link{
margin: auto;
padding: 2em;
z-index: 1001;
+ max-height: 90%;
+ max-width: 90%;
}
/* fullpage image viewer */
@@ -840,8 +851,16 @@ footer {
/* extra networks UI */
-.extra-page .prompt{
- margin: 0 0 0.5em 0;
+.extra-page > div.gap{
+ gap: 0;
+}
+
+.extra-page-prompts{
+ margin-bottom: 0;
+}
+
+.extra-page-prompts.extra-page-prompts-active{
+ margin-bottom: 1em;
}
.extra-network-cards{
diff --git a/webui.sh b/webui.sh
index 08911469..69ca2f88 100755
--- a/webui.sh
+++ b/webui.sh
@@ -89,7 +89,7 @@ delimiter="################################################################"
printf "\n%s\n" "${delimiter}"
printf "\e[1m\e[32mInstall script for stable-diffusion + Web UI\n"
-printf "\e[1m\e[34mTested on Debian 11 (Bullseye)\e[0m"
+printf "\e[1m\e[34mTested on Debian 11 (Bullseye), Fedora 34+ and openSUSE Leap 15.4 or newer.\e[0m"
printf "\n%s\n" "${delimiter}"
# Do not run as root
@@ -133,7 +133,7 @@ case "$gpu_info" in
if [[ $(bc <<< "$pyv <= 3.10") -eq 1 ]]
then
# Navi users will still use torch 1.13 because 2.0 does not seem to work.
- export TORCH_COMMAND="pip install torch==1.13.1+rocm5.2 torchvision==0.14.1+rocm5.2 --index-url https://download.pytorch.org/whl/rocm5.2"
+ export TORCH_COMMAND="pip install --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/rocm5.6"
else
printf "\e[1m\e[31mERROR: RX 5000 series GPUs must be using at max python 3.10, aborting...\e[0m"
exit 1
@@ -143,8 +143,7 @@ case "$gpu_info" in
*"Navi 2"*) export HSA_OVERRIDE_GFX_VERSION=10.3.0
;;
*"Navi 3"*) [[ -z "${TORCH_COMMAND}" ]] && \
- export TORCH_COMMAND="pip install torch torchvision --index-url https://download.pytorch.org/whl/test/rocm5.6"
- # Navi 3 needs at least 5.5 which is only on the torch 2.1.0 release candidates right now
+ export TORCH_COMMAND="pip install --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/rocm5.7"
;;
*"Renoir"*) export HSA_OVERRIDE_GFX_VERSION=9.0.0
printf "\n%s\n" "${delimiter}"
@@ -223,7 +222,7 @@ fi
# Try using TCMalloc on Linux
prepare_tcmalloc() {
if [[ "${OSTYPE}" == "linux"* ]] && [[ -z "${NO_TCMALLOC}" ]] && [[ -z "${LD_PRELOAD}" ]]; then
- TCMALLOC="$(PATH=/usr/sbin:$PATH ldconfig -p | grep -Po "libtcmalloc(_minimal|)\.so\.\d" | head -n 1)"
+ TCMALLOC="$(PATH=/sbin:$PATH ldconfig -p | grep -Po "libtcmalloc(_minimal|)\.so\.\d" | head -n 1)"
if [[ ! -z "${TCMALLOC}" ]]; then
echo "Using TCMalloc: ${TCMALLOC}"
export LD_PRELOAD="${TCMALLOC}"