aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.github/ISSUE_TEMPLATE/bug_report.md32
-rw-r--r--.github/ISSUE_TEMPLATE/bug_report.yml83
-rw-r--r--.github/ISSUE_TEMPLATE/config.yml5
-rw-r--r--.github/ISSUE_TEMPLATE/feature_request.md20
-rw-r--r--.github/ISSUE_TEMPLATE/feature_request.yml40
-rw-r--r--.gitignore1
-rw-r--r--README.md30
-rw-r--r--artists.csv7
-rw-r--r--javascript/aspectRatioOverlay.js55
-rw-r--r--javascript/dragdrop.js2
-rw-r--r--javascript/edit-attention.js35
-rw-r--r--javascript/hints.js2
-rw-r--r--javascript/imageviewer.js1
-rw-r--r--javascript/localization.js146
-rw-r--r--javascript/progressbar.js12
-rw-r--r--javascript/ui.js7
-rw-r--r--launch.py51
-rw-r--r--localizations/Put localization files here.txt0
-rw-r--r--modules/aesthetic_clip.py241
-rw-r--r--modules/api/api.py68
-rw-r--r--modules/api/processing.py99
-rw-r--r--modules/deepbooru.py3
-rw-r--r--modules/extras.py27
-rw-r--r--modules/generation_parameters_copypaste.py25
-rw-r--r--modules/hypernetworks/hypernetwork.py112
-rw-r--r--modules/hypernetworks/ui.py22
-rw-r--r--modules/img2img.py4
-rw-r--r--modules/interrogate.py12
-rw-r--r--modules/localization.py31
-rw-r--r--modules/processing.py94
-rw-r--r--modules/prompt_parser.py2
-rw-r--r--modules/scripts.py15
-rw-r--r--modules/sd_hijack.py30
-rw-r--r--modules/sd_hijack_inpainting.py331
-rw-r--r--modules/sd_hijack_optimizations.py16
-rw-r--r--modules/sd_models.py57
-rw-r--r--modules/sd_samplers.py167
-rw-r--r--modules/shared.py39
-rw-r--r--modules/styles.py4
-rw-r--r--modules/textual_inversion/dataset.py4
-rw-r--r--modules/textual_inversion/image_embedding.py5
-rw-r--r--modules/textual_inversion/preprocess.py85
-rw-r--r--modules/textual_inversion/textual_inversion.py6
-rw-r--r--modules/textual_inversion/ui.py4
-rw-r--r--modules/txt2img.py8
-rw-r--r--modules/ui.py257
-rw-r--r--requirements.txt1
-rw-r--r--requirements_versions.txt1
-rw-r--r--script.js10
-rw-r--r--scripts/outpainting_mk_2.py139
-rw-r--r--scripts/xy_grid.py1
-rw-r--r--style.css11
-rw-r--r--webui.bat2
-rw-r--r--webui.py57
-rwxr-xr-xwebui.sh2
55 files changed, 2057 insertions, 464 deletions
diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
deleted file mode 100644
index 50c54210..00000000
--- a/.github/ISSUE_TEMPLATE/bug_report.md
+++ /dev/null
@@ -1,32 +0,0 @@
----
-name: Bug report
-about: Create a report to help us improve
-title: ''
-labels: bug-report
-assignees: ''
-
----
-
-**Describe the bug**
-A clear and concise description of what the bug is.
-
-**To Reproduce**
-Steps to reproduce the behavior:
-1. Go to '...'
-2. Click on '....'
-3. Scroll down to '....'
-4. See error
-
-**Expected behavior**
-A clear and concise description of what you expected to happen.
-
-**Screenshots**
-If applicable, add screenshots to help explain your problem.
-
-**Desktop (please complete the following information):**
- - OS: [e.g. Windows, Linux]
- - Browser [e.g. chrome, safari]
- - Commit revision [looks like this: e68484500f76a33ba477d5a99340ab30451e557b; can be seen when launching webui.bat, or obtained manually by running `git rev-parse HEAD`]
-
-**Additional context**
-Add any other context about the problem here.
diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml
new file mode 100644
index 00000000..9c2ff313
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/bug_report.yml
@@ -0,0 +1,83 @@
+name: Bug Report
+description: You think somethings is broken in the UI
+title: "[Bug]: "
+labels: ["bug-report"]
+
+body:
+ - type: checkboxes
+ attributes:
+ label: Is there an existing issue for this?
+ description: Please search to see if an issue already exists for the bug you encountered, and that it hasn't been fixed in a recent build/commit.
+ options:
+ - label: I have searched the existing issues and checked the recent builds/commits
+ required: true
+ - type: markdown
+ attributes:
+ value: |
+ *Please fill this form with as much information as possible, don't forget to fill "What OS..." and "What browsers" and *provide screenshots if possible**
+ - type: textarea
+ id: what-did
+ attributes:
+ label: What happened?
+ description: Tell us what happened in a very clear and simple way
+ validations:
+ required: true
+ - type: textarea
+ id: steps
+ attributes:
+ label: Steps to reproduce the problem
+ description: Please provide us with precise step by step information on how to reproduce the bug
+ value: |
+ 1. Go to ....
+ 2. Press ....
+ 3. ...
+ validations:
+ required: true
+ - type: textarea
+ id: what-should
+ attributes:
+ label: What should have happened?
+ description: tell what you think the normal behavior should be
+ validations:
+ required: true
+ - type: input
+ id: commit
+ attributes:
+ label: Commit where the problem happens
+ description: Which commit are you running ? (copy the **Commit hash** shown in the cmd/terminal when you launch the UI)
+ validations:
+ required: true
+ - type: dropdown
+ id: platforms
+ attributes:
+ label: What platforms do you use to access UI ?
+ multiple: true
+ options:
+ - Windows
+ - Linux
+ - MacOS
+ - iOS
+ - Android
+ - Other/Cloud
+ - type: dropdown
+ id: browsers
+ attributes:
+ label: What browsers do you use to access the UI ?
+ multiple: true
+ options:
+ - Mozilla Firefox
+ - Google Chrome
+ - Brave
+ - Apple Safari
+ - Microsoft Edge
+ - type: textarea
+ id: cmdargs
+ attributes:
+ label: Command Line Arguments
+ description: Are you using any launching parameters/command line arguments (modified webui-user.py) ? If yes, please write them below
+ render: Shell
+ - type: textarea
+ id: misc
+ attributes:
+ label: Additional information, context and logs
+ description: Please provide us with any relevant additional info, context or log output.
diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
new file mode 100644
index 00000000..f58c94a9
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/config.yml
@@ -0,0 +1,5 @@
+blank_issues_enabled: false
+contact_links:
+ - name: WebUI Community Support
+ url: https://github.com/AUTOMATIC1111/stable-diffusion-webui/discussions
+ about: Please ask and answer questions here.
diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md
deleted file mode 100644
index eda42fa7..00000000
--- a/.github/ISSUE_TEMPLATE/feature_request.md
+++ /dev/null
@@ -1,20 +0,0 @@
----
-name: Feature request
-about: Suggest an idea for this project
-title: ''
-labels: 'suggestion'
-assignees: ''
-
----
-
-**Is your feature request related to a problem? Please describe.**
-A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
-
-**Describe the solution you'd like**
-A clear and concise description of what you want to happen.
-
-**Describe alternatives you've considered**
-A clear and concise description of any alternative solutions or features you've considered.
-
-**Additional context**
-Add any other context or screenshots about the feature request here.
diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml
new file mode 100644
index 00000000..8ca6e21f
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/feature_request.yml
@@ -0,0 +1,40 @@
+name: Feature request
+description: Suggest an idea for this project
+title: "[Feature Request]: "
+labels: ["suggestion"]
+
+body:
+ - type: checkboxes
+ attributes:
+ label: Is there an existing issue for this?
+ description: Please search to see if an issue already exists for the feature you want, and that it's not implemented in a recent build/commit.
+ options:
+ - label: I have searched the existing issues and checked the recent builds/commits
+ required: true
+ - type: markdown
+ attributes:
+ value: |
+ *Please fill this form with as much information as possible, provide screenshots and/or illustrations of the feature if possible*
+ - type: textarea
+ id: feature
+ attributes:
+ label: What would your feature do ?
+ description: Tell us about your feature in a very clear and simple way, and what problem it would solve
+ validations:
+ required: true
+ - type: textarea
+ id: workflow
+ attributes:
+ label: Proposed workflow
+ description: Please provide us with step by step information on how you'd like the feature to be accessed and used
+ value: |
+ 1. Go to ....
+ 2. Press ....
+ 3. ...
+ validations:
+ required: true
+ - type: textarea
+ id: misc
+ attributes:
+ label: Additional information
+ description: Add any other context or screenshots about the feature request here.
diff --git a/.gitignore b/.gitignore
index 69785b3e..f9c3357c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -27,3 +27,4 @@ __pycache__
notification.mp3
/SwinIR
/textual_inversion
+.vscode \ No newline at end of file
diff --git a/README.md b/README.md
index 859a91b6..5b5dc8ba 100644
--- a/README.md
+++ b/README.md
@@ -11,6 +11,7 @@ Check the [custom scripts](https://github.com/AUTOMATIC1111/stable-diffusion-web
- One click install and run script (but you still must install python and git)
- Outpainting
- Inpainting
+- Color Sketch
- Prompt Matrix
- Stable Diffusion Upscale
- Attention, specify parts of text that the model should pay more attention to
@@ -23,6 +24,7 @@ Check the [custom scripts](https://github.com/AUTOMATIC1111/stable-diffusion-web
- have as many embeddings as you want and use any names you like for them
- use multiple embeddings with different numbers of vectors per token
- works with half precision floating point numbers
+ - train embeddings on 8GB (also reports of 6GB working)
- Extras tab with:
- GFPGAN, neural network that fixes faces
- CodeFormer, face restoration tool as an alternative to GFPGAN
@@ -37,14 +39,14 @@ Check the [custom scripts](https://github.com/AUTOMATIC1111/stable-diffusion-web
- Interrupt processing at any time
- 4GB video card support (also reports of 2GB working)
- Correct seeds for batches
-- Prompt length validation
- - get length of prompt in tokens as you type
- - get a warning after generation if some text was truncated
+- Live prompt token length validation
- Generation parameters
- parameters you used to generate images are saved with that image
- in PNG chunks for PNG, in EXIF for JPEG
- can drag the image to PNG info tab to restore generation parameters and automatically copy them into UI
- can be disabled in settings
+ - drag and drop an image/text-parameters to promptbox
+- Read Generation Parameters Button, loads parameters in promptbox to UI
- Settings page
- Running arbitrary python code from UI (must run with --allow-code to enable)
- Mouseover hints for most UI elements
@@ -59,10 +61,10 @@ Check the [custom scripts](https://github.com/AUTOMATIC1111/stable-diffusion-web
- CLIP interrogator, a button that tries to guess prompt from an image
- Prompt Editing, a way to change prompt mid-generation, say to start making a watermelon and switch to anime girl midway
- Batch Processing, process a group of files using img2img
-- Img2img Alternative
+- Img2img Alternative, reverse Euler method of cross attention control
- Highres Fix, a convenience option to produce high resolution pictures in one click without usual distortions
- Reloading checkpoints on the fly
-- Checkpoint Merger, a tab that allows you to merge two checkpoints into one
+- Checkpoint Merger, a tab that allows you to merge up to 3 checkpoints into one
- [Custom scripts](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Custom-Scripts) with many extensions from community
- [Composable-Diffusion](https://energy-based-model.github.io/Compositional-Visual-Generation-with-Composable-Diffusion-Models/), a way to use multiple prompts at once
- separate prompts using uppercase `AND`
@@ -70,14 +72,26 @@ Check the [custom scripts](https://github.com/AUTOMATIC1111/stable-diffusion-web
- No token limit for prompts (original stable diffusion lets you use up to 75 tokens)
- DeepDanbooru integration, creates danbooru style tags for anime prompts (add --deepdanbooru to commandline args)
- [xformers](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Xformers), major speed increase for select cards: (add --xformers to commandline args)
+- History tab: view, direct and delete images conveniently within the UI
+- Generate forever option
+- Training tab
+ - hypernetworks and embeddings options
+ - Preprocessing images: cropping, mirroring, autotagging using BLIP or deepdanbooru (for anime)
+- Clip skip
+- Use Hypernetworks
+- Use VAEs
+- Estimated completion time in progress bar
+- API
+- Support for dedicated [inpainting model](https://github.com/runwayml/stable-diffusion#inpainting-with-stable-diffusion) by RunwayML.
+- Aesthetic Gradients, a way to generate images with a specific aesthetic by using clip images embds (implementation of [https://github.com/vicgalle/stable-diffusion-aesthetic-gradients](https://github.com/vicgalle/stable-diffusion-aesthetic-gradients))
+
## Installation and Running
Make sure the required [dependencies](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Dependencies) are met and follow the instructions available for both [NVidia](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Install-and-Run-on-NVidia-GPUs) (recommended) and [AMD](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Install-and-Run-on-AMD-GPUs) GPUs.
-Alternatively, use Google Colab:
+Alternatively, use online services (like Google Colab):
-- [Colab, maintained by Akaibu](https://colab.research.google.com/drive/1kw3egmSn-KgWsikYvOMjJkVDsPLjEMzl)
-- [Colab, original by me, outdated](https://colab.research.google.com/drive/1Iy-xW9t1-OQWhb0hNxueGij8phCyluOh).
+- [List of Online Services](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Online-Services)
### Automatic Installation on Windows
1. Install [Python 3.10.6](https://www.python.org/downloads/windows/), checking "Add Python to PATH"
diff --git a/artists.csv b/artists.csv
index 99cdbdc6..1a61ed88 100644
--- a/artists.csv
+++ b/artists.csv
@@ -523,7 +523,6 @@ Affandi,0.7170285,nudity
Diane Arbus,0.655138,digipa-high-impact
Joseph Ducreux,0.65247905,digipa-high-impact
Berthe Morisot,0.7165984,fineart
-Hilma AF Klint,0.71643853,scribbles
Hilma af Klint,0.71643853,scribbles
Filippino Lippi,0.7163017,fineart
Leonid Afremov,0.7163005,fineart
@@ -738,14 +737,12 @@ Abraham Mignon,0.60605425,fineart
Albert Bloch,0.69573116,nudity
Charles Dana Gibson,0.67155975,fineart
Alexandre-Évariste Fragonard,0.6507174,fineart
-Alexandre-Évariste Fragonard,0.6507174,fineart
Ernst Fuchs,0.6953538,nudity
Alfredo Jaar,0.6952965,digipa-high-impact
Judy Chicago,0.6952246,weird
Frans van Mieris the Younger,0.6951849,fineart
Aertgen van Leyden,0.6951305,fineart
Emily Carr,0.69512105,fineart
-Frances Macdonald,0.6950408,scribbles
Frances MacDonald,0.6950408,scribbles
Hannah Höch,0.69495845,scribbles
Gillis Rombouts,0.58770025,fineart
@@ -895,7 +892,6 @@ Richard McGuire,0.6820089,scribbles
Anni Albers,0.65708244,digipa-high-impact
Aleksey Savrasov,0.65207493,fineart
Wayne Barlowe,0.6537874,fineart
-Giorgio De Chirico,0.6815907,fineart
Giorgio de Chirico,0.6815907,fineart
Ernest Procter,0.6815795,fineart
Adriaen Brouwer,0.6815058,fineart
@@ -1241,7 +1237,6 @@ Betty Churcher,0.65387225,fineart
Claes Corneliszoon Moeyaert,0.65386075,fineart
David Bomberg,0.6537477,fineart
Abraham Bosschaert,0.6535562,fineart
-Giuseppe De Nittis,0.65354455,fineart
Giuseppe de Nittis,0.65354455,fineart
John La Farge,0.65342575,fineart
Frits Thaulow,0.65341854,fineart
@@ -1522,7 +1517,6 @@ Gertrude Harvey,0.5903887,fineart
Grant Wood,0.6266253,fineart
Fyodor Vasilyev,0.5234919,digipa-med-impact
Cagnaccio di San Pietro,0.6261671,fineart
-Cagnaccio Di San Pietro,0.6261671,fineart
Doris Boulton-Maude,0.62593174,fineart
Adolf Hirémy-Hirschl,0.5946784,fineart
Harold von Schmidt,0.6256755,fineart
@@ -2411,7 +2405,6 @@ Hermann Feierabend,0.5346168,digipa-high-impact
Antonio Donghi,0.4610982,digipa-low-impact
Adonna Khare,0.4858036,digipa-med-impact
James Stokoe,0.5015107,digipa-med-impact
-Art & Language,0.5341332,digipa-high-impact
Agustín Fernández,0.53403986,fineart
Germán Londoño,0.5338712,fineart
Emmanuelle Moureaux,0.5335641,digipa-high-impact
diff --git a/javascript/aspectRatioOverlay.js b/javascript/aspectRatioOverlay.js
index 96f1c00d..66f26a22 100644
--- a/javascript/aspectRatioOverlay.js
+++ b/javascript/aspectRatioOverlay.js
@@ -3,12 +3,12 @@ let currentWidth = null;
let currentHeight = null;
let arFrameTimeout = setTimeout(function(){},0);
-function dimensionChange(e,dimname){
+function dimensionChange(e, is_width, is_height){
- if(dimname == 'Width'){
+ if(is_width){
currentWidth = e.target.value*1.0
}
- if(dimname == 'Height'){
+ if(is_height){
currentHeight = e.target.value*1.0
}
@@ -18,22 +18,13 @@ function dimensionChange(e,dimname){
return;
}
- var img2imgMode = gradioApp().querySelector('#mode_img2img.tabs > div > button.rounded-t-lg.border-gray-200')
- if(img2imgMode){
- img2imgMode=img2imgMode.innerText
- }else{
- return;
- }
-
- var redrawImage = gradioApp().querySelector('div[data-testid=image] img');
- var inpaintImage = gradioApp().querySelector('#img2maskimg div[data-testid=image] img')
-
var targetElement = null;
- if(img2imgMode=='img2img' && redrawImage){
- targetElement = redrawImage;
- }else if(img2imgMode=='Inpaint' && inpaintImage){
- targetElement = inpaintImage;
+ var tabIndex = get_tab_index('mode_img2img')
+ if(tabIndex == 0){
+ targetElement = gradioApp().querySelector('div[data-testid=image] img');
+ } else if(tabIndex == 1){
+ targetElement = gradioApp().querySelector('#img2maskimg div[data-testid=image] img');
}
if(targetElement){
@@ -98,22 +89,20 @@ onUiUpdate(function(){
var inImg2img = Boolean(gradioApp().querySelector("button.rounded-t-lg.border-gray-200"))
if(inImg2img){
let inputs = gradioApp().querySelectorAll('input');
- inputs.forEach(function(e){
- let parentLabel = e.parentElement.querySelector('label')
- if(parentLabel && parentLabel.innerText){
- if(!e.classList.contains('scrollwatch')){
- if(parentLabel.innerText == 'Width' || parentLabel.innerText == 'Height'){
- e.addEventListener('input', function(e){dimensionChange(e,parentLabel.innerText)} )
- e.classList.add('scrollwatch')
- }
- if(parentLabel.innerText == 'Width'){
- currentWidth = e.value*1.0
- }
- if(parentLabel.innerText == 'Height'){
- currentHeight = e.value*1.0
- }
- }
- }
+ inputs.forEach(function(e){
+ var is_width = e.parentElement.id == "img2img_width"
+ var is_height = e.parentElement.id == "img2img_height"
+
+ if((is_width || is_height) && !e.classList.contains('scrollwatch')){
+ e.addEventListener('input', function(e){dimensionChange(e, is_width, is_height)} )
+ e.classList.add('scrollwatch')
+ }
+ if(is_width){
+ currentWidth = e.value*1.0
+ }
+ if(is_height){
+ currentHeight = e.value*1.0
+ }
})
}
});
diff --git a/javascript/dragdrop.js b/javascript/dragdrop.js
index 070cf255..3ed1cb3c 100644
--- a/javascript/dragdrop.js
+++ b/javascript/dragdrop.js
@@ -43,7 +43,7 @@ function dropReplaceImage( imgWrap, files ) {
window.document.addEventListener('dragover', e => {
const target = e.composedPath()[0];
const imgWrap = target.closest('[data-testid="image"]');
- if ( !imgWrap && target.placeholder.indexOf("Prompt") == -1) {
+ if ( !imgWrap && target.placeholder && target.placeholder.indexOf("Prompt") == -1) {
return;
}
e.stopPropagation();
diff --git a/javascript/edit-attention.js b/javascript/edit-attention.js
index 67084e7a..c0d29a74 100644
--- a/javascript/edit-attention.js
+++ b/javascript/edit-attention.js
@@ -9,9 +9,38 @@ addEventListener('keydown', (event) => {
let minus = "ArrowDown"
if (event.key != plus && event.key != minus) return;
- selectionStart = target.selectionStart;
- selectionEnd = target.selectionEnd;
- if(selectionStart == selectionEnd) return;
+ let selectionStart = target.selectionStart;
+ let selectionEnd = target.selectionEnd;
+ // If the user hasn't selected anything, let's select their current parenthesis block
+ if (selectionStart === selectionEnd) {
+ // Find opening parenthesis around current cursor
+ const before = target.value.substring(0, selectionStart);
+ let beforeParen = before.lastIndexOf("(");
+ if (beforeParen == -1) return;
+ let beforeParenClose = before.lastIndexOf(")");
+ while (beforeParenClose !== -1 && beforeParenClose > beforeParen) {
+ beforeParen = before.lastIndexOf("(", beforeParen - 1);
+ beforeParenClose = before.lastIndexOf(")", beforeParenClose - 1);
+ }
+
+ // Find closing parenthesis around current cursor
+ const after = target.value.substring(selectionStart);
+ let afterParen = after.indexOf(")");
+ if (afterParen == -1) return;
+ let afterParenOpen = after.indexOf("(");
+ while (afterParenOpen !== -1 && afterParen > afterParenOpen) {
+ afterParen = after.indexOf(")", afterParen + 1);
+ afterParenOpen = after.indexOf("(", afterParenOpen + 1);
+ }
+ if (beforeParen === -1 || afterParen === -1) return;
+
+ // Set the selection to the text between the parenthesis
+ const parenContent = target.value.substring(beforeParen + 1, selectionStart + afterParen);
+ const lastColon = parenContent.lastIndexOf(":");
+ selectionStart = beforeParen + 1;
+ selectionEnd = selectionStart + lastColon;
+ target.setSelectionRange(selectionStart, selectionEnd);
+ }
event.preventDefault();
diff --git a/javascript/hints.js b/javascript/hints.js
index b98012f5..a1fcc93b 100644
--- a/javascript/hints.js
+++ b/javascript/hints.js
@@ -91,6 +91,8 @@ titles = {
"Weighted sum": "Result = A * (1 - M) + B * M",
"Add difference": "Result = A + (B - C) * M",
+
+ "Learning rate": "how fast should the training go. Low values will take longer to train, high values may fail to converge (not generate accurate results) and/or may break the embedding (This has happened if you see Loss: nan in the training info textbox. If this happens, you need to manually restore your embedding from an older not-broken backup).\n\nYou can set a single numeric value, or multiple learning rates using the syntax:\n\n rate_1:max_steps_1, rate_2:max_steps_2, ...\n\nEG: 0.005:100, 1e-3:1000, 1e-5\n\nWill train with rate of 0.005 for first 100 steps, then 1e-3 until 1000 steps, then 1e-5 for all remaining steps.",
}
diff --git a/javascript/imageviewer.js b/javascript/imageviewer.js
index d4ab6984..9e380c65 100644
--- a/javascript/imageviewer.js
+++ b/javascript/imageviewer.js
@@ -116,6 +116,7 @@ function showGalleryImage() {
e.dataset.modded = true;
if(e && e.parentElement.tagName == 'DIV'){
e.style.cursor='pointer'
+ e.style.userSelect='none'
e.addEventListener('click', function (evt) {
if(!opts.js_modal_lightbox) return;
modalZoomSet(gradioApp().getElementById('modalImage'), opts.js_modal_lightbox_initially_zoomed)
diff --git a/javascript/localization.js b/javascript/localization.js
new file mode 100644
index 00000000..e6644635
--- /dev/null
+++ b/javascript/localization.js
@@ -0,0 +1,146 @@
+
+// localization = {} -- the dict with translations is created by the backend
+
+ignore_ids_for_localization={
+ setting_sd_hypernetwork: 'OPTION',
+ setting_sd_model_checkpoint: 'OPTION',
+ setting_realesrgan_enabled_models: 'OPTION',
+ modelmerger_primary_model_name: 'OPTION',
+ modelmerger_secondary_model_name: 'OPTION',
+ modelmerger_tertiary_model_name: 'OPTION',
+ train_embedding: 'OPTION',
+ train_hypernetwork: 'OPTION',
+ txt2img_style_index: 'OPTION',
+ txt2img_style2_index: 'OPTION',
+ img2img_style_index: 'OPTION',
+ img2img_style2_index: 'OPTION',
+ setting_random_artist_categories: 'SPAN',
+ setting_face_restoration_model: 'SPAN',
+ setting_realesrgan_enabled_models: 'SPAN',
+ extras_upscaler_1: 'SPAN',
+ extras_upscaler_2: 'SPAN',
+}
+
+re_num = /^[\.\d]+$/
+re_emoji = /[\p{Extended_Pictographic}\u{1F3FB}-\u{1F3FF}\u{1F9B0}-\u{1F9B3}]/u
+
+original_lines = {}
+translated_lines = {}
+
+function textNodesUnder(el){
+ var n, a=[], walk=document.createTreeWalker(el,NodeFilter.SHOW_TEXT,null,false);
+ while(n=walk.nextNode()) a.push(n);
+ return a;
+}
+
+function canBeTranslated(node, text){
+ if(! text) return false;
+ if(! node.parentElement) return false;
+
+ parentType = node.parentElement.nodeName
+ if(parentType=='SCRIPT' || parentType=='STYLE' || parentType=='TEXTAREA') return false;
+
+ if (parentType=='OPTION' || parentType=='SPAN'){
+ pnode = node
+ for(var level=0; level<4; level++){
+ pnode = pnode.parentElement
+ if(! pnode) break;
+
+ if(ignore_ids_for_localization[pnode.id] == parentType) return false;
+ }
+ }
+
+ if(re_num.test(text)) return false;
+ if(re_emoji.test(text)) return false;
+ return true
+}
+
+function getTranslation(text){
+ if(! text) return undefined
+
+ if(translated_lines[text] === undefined){
+ original_lines[text] = 1
+ }
+
+ tl = localization[text]
+ if(tl !== undefined){
+ translated_lines[tl] = 1
+ }
+
+ return tl
+}
+
+function processTextNode(node){
+ text = node.textContent.trim()
+
+ if(! canBeTranslated(node, text)) return
+
+ tl = getTranslation(text)
+ if(tl !== undefined){
+ node.textContent = tl
+ }
+}
+
+function processNode(node){
+ if(node.nodeType == 3){
+ processTextNode(node)
+ return
+ }
+
+ if(node.title){
+ tl = getTranslation(node.title)
+ if(tl !== undefined){
+ node.title = tl
+ }
+ }
+
+ if(node.placeholder){
+ tl = getTranslation(node.placeholder)
+ if(tl !== undefined){
+ node.placeholder = tl
+ }
+ }
+
+ textNodesUnder(node).forEach(function(node){
+ processTextNode(node)
+ })
+}
+
+function dumpTranslations(){
+ dumped = {}
+
+ Object.keys(original_lines).forEach(function(text){
+ if(dumped[text] !== undefined) return
+
+ dumped[text] = localization[text] || text
+ })
+
+ return dumped
+}
+
+onUiUpdate(function(m){
+ m.forEach(function(mutation){
+ mutation.addedNodes.forEach(function(node){
+ processNode(node)
+ })
+ });
+})
+
+
+document.addEventListener("DOMContentLoaded", function() {
+ processNode(gradioApp())
+})
+
+function download_localization() {
+ text = JSON.stringify(dumpTranslations(), null, 4)
+
+ var element = document.createElement('a');
+ element.setAttribute('href', 'data:text/plain;charset=utf-8,' + encodeURIComponent(text));
+ element.setAttribute('download', "localization.json");
+ element.style.display = 'none';
+ document.body.appendChild(element);
+
+ element.click();
+
+ document.body.removeChild(element);
+}
diff --git a/javascript/progressbar.js b/javascript/progressbar.js
index c7d0343f..7a05726e 100644
--- a/javascript/progressbar.js
+++ b/javascript/progressbar.js
@@ -72,11 +72,17 @@ function check_gallery(id_gallery){
let galleryButtons = gradioApp().querySelectorAll('#'+id_gallery+' .gallery-item')
let galleryBtnSelected = gradioApp().querySelector('#'+id_gallery+' .gallery-item.\\!ring-2')
if (prevSelectedIndex !== -1 && galleryButtons.length>prevSelectedIndex && !galleryBtnSelected) {
- //automatically re-open previously selected index (if exists)
- activeElement = document.activeElement;
+ // automatically re-open previously selected index (if exists)
+ activeElement = gradioApp().activeElement;
+
galleryButtons[prevSelectedIndex].click();
showGalleryImage();
- if(activeElement) activeElement.focus()
+
+ if(activeElement){
+ // i fought this for about an hour; i don't know why the focus is lost or why this helps recover it
+ // if somenoe has a better solution please by all means
+ setTimeout(function() { activeElement.focus() }, 1);
+ }
}
})
galleryObservers[id_gallery].observe( gallery, { childList:true, subtree:false })
diff --git a/javascript/ui.js b/javascript/ui.js
index 9e1bed4c..cfd0dcd3 100644
--- a/javascript/ui.js
+++ b/javascript/ui.js
@@ -1,5 +1,12 @@
// various functions for interation with ui.py not large enough to warrant putting them in separate files
+function set_theme(theme){
+ gradioURL = window.location.href
+ if (!gradioURL.includes('?__theme=')) {
+ window.location.replace(gradioURL + '?__theme=' + theme);
+ }
+}
+
function selected_gallery_index(){
var buttons = gradioApp().querySelectorAll('[style="display: block;"].tabitem .gallery-item')
var button = gradioApp().querySelector('[style="display: block;"].tabitem .gallery-item.\\!ring-2')
diff --git a/launch.py b/launch.py
index 7520cfee..333f308a 100644
--- a/launch.py
+++ b/launch.py
@@ -86,7 +86,24 @@ def git_clone(url, dir, name, commithash=None):
if commithash is not None:
run(f'"{git}" -C {dir} checkout {commithash}', None, "Couldn't checkout {name}'s hash: {commithash}")
-
+
+def version_check(commit):
+ try:
+ import requests
+ commits = requests.get('https://api.github.com/repos/AUTOMATIC1111/stable-diffusion-webui/branches/master').json()
+ if commit != "<none>" and commits['commit']['sha'] != commit:
+ print("--------------------------------------------------------")
+ print("| You are not up to date with the most recent release. |")
+ print("| Consider running `git pull` to update. |")
+ print("--------------------------------------------------------")
+ elif commits['commit']['sha'] == commit:
+ print("You are up to date with the most recent release.")
+ else:
+ print("Not a git clone, can't perform version check.")
+ except Exception as e:
+ print("versipm check failed",e)
+
+
def prepare_enviroment():
torch_command = os.environ.get('TORCH_COMMAND', "pip install torch==1.12.1+cu113 torchvision==0.13.1+cu113 --extra-index-url https://download.pytorch.org/whl/cu113")
requirements_file = os.environ.get('REQS_FILE', "requirements_versions.txt")
@@ -110,13 +127,14 @@ def prepare_enviroment():
codeformer_commit_hash = os.environ.get('CODEFORMER_COMMIT_HASH', "c5b4593074ba6214284d6acd5f1719b6c5d739af")
blip_commit_hash = os.environ.get('BLIP_COMMIT_HASH', "48211a1594f1321b00f14c9f7a5b4813144b2fb9")
- args = shlex.split(commandline_args)
+ sys.argv += shlex.split(commandline_args)
- args, skip_torch_cuda_test = extract_arg(args, '--skip-torch-cuda-test')
- args, reinstall_xformers = extract_arg(args, '--reinstall-xformers')
- xformers = '--xformers' in args
- deepdanbooru = '--deepdanbooru' in args
- ngrok = '--ngrok' in args
+ sys.argv, skip_torch_cuda_test = extract_arg(sys.argv, '--skip-torch-cuda-test')
+ sys.argv, reinstall_xformers = extract_arg(sys.argv, '--reinstall-xformers')
+ sys.argv, update_check = extract_arg(sys.argv, '--update-check')
+ xformers = '--xformers' in sys.argv
+ deepdanbooru = '--deepdanbooru' in sys.argv
+ ngrok = '--ngrok' in sys.argv
try:
commit = run(f"{git} rev-parse HEAD").strip()
@@ -125,7 +143,7 @@ def prepare_enviroment():
print(f"Python {sys.version}")
print(f"Commit hash: {commit}")
-
+
if not is_installed("torch") or not is_installed("torchvision"):
run(f'"{python}" -m {torch_command}', "Installing torch and torchvision", "Couldn't install torch")
@@ -138,9 +156,15 @@ def prepare_enviroment():
if not is_installed("clip"):
run_pip(f"install {clip_package}", "clip")
- if (not is_installed("xformers") or reinstall_xformers) and xformers and platform.python_version().startswith("3.10"):
+ if (not is_installed("xformers") or reinstall_xformers) and xformers:
if platform.system() == "Windows":
- run_pip(f"install -U -I --no-deps {xformers_windows_package}", "xformers")
+ if platform.python_version().startswith("3.10"):
+ run_pip(f"install -U -I --no-deps {xformers_windows_package}", "xformers")
+ else:
+ print("Installation of xformers is not supported in this version of Python.")
+ print("You can also check this and build manually: https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Xformers#building-xformers-on-windows-by-duckness")
+ if not is_installed("xformers"):
+ exit(0)
elif platform.system() == "Linux":
run_pip("install xformers", "xformers")
@@ -163,9 +187,10 @@ def prepare_enviroment():
run_pip(f"install -r {requirements_file}", "requirements for Web UI")
- sys.argv += args
-
- if "--exit" in args:
+ if update_check:
+ version_check(commit)
+
+ if "--exit" in sys.argv:
print("Exiting because of --exit argument")
exit(0)
diff --git a/localizations/Put localization files here.txt b/localizations/Put localization files here.txt
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/localizations/Put localization files here.txt
diff --git a/modules/aesthetic_clip.py b/modules/aesthetic_clip.py
new file mode 100644
index 00000000..8c828541
--- /dev/null
+++ b/modules/aesthetic_clip.py
@@ -0,0 +1,241 @@
+import copy
+import itertools
+import os
+from pathlib import Path
+import html
+import gc
+
+import gradio as gr
+import torch
+from PIL import Image
+from torch import optim
+
+from modules import shared
+from transformers import CLIPModel, CLIPProcessor, CLIPTokenizer
+from tqdm.auto import tqdm, trange
+from modules.shared import opts, device
+
+
+def get_all_images_in_folder(folder):
+ return [os.path.join(folder, f) for f in os.listdir(folder) if
+ os.path.isfile(os.path.join(folder, f)) and check_is_valid_image_file(f)]
+
+
+def check_is_valid_image_file(filename):
+ return filename.lower().endswith(('.png', '.jpg', '.jpeg', ".gif", ".tiff", ".webp"))
+
+
+def batched(dataset, total, n=1):
+ for ndx in range(0, total, n):
+ yield [dataset.__getitem__(i) for i in range(ndx, min(ndx + n, total))]
+
+
+def iter_to_batched(iterable, n=1):
+ it = iter(iterable)
+ while True:
+ chunk = tuple(itertools.islice(it, n))
+ if not chunk:
+ return
+ yield chunk
+
+
+def create_ui():
+ import modules.ui
+
+ with gr.Group():
+ with gr.Accordion("Open for Clip Aesthetic!", open=False):
+ with gr.Row():
+ aesthetic_weight = gr.Slider(minimum=0, maximum=1, step=0.01, label="Aesthetic weight",
+ value=0.9)
+ aesthetic_steps = gr.Slider(minimum=0, maximum=50, step=1, label="Aesthetic steps", value=5)
+
+ with gr.Row():
+ aesthetic_lr = gr.Textbox(label='Aesthetic learning rate',
+ placeholder="Aesthetic learning rate", value="0.0001")
+ aesthetic_slerp = gr.Checkbox(label="Slerp interpolation", value=False)
+ aesthetic_imgs = gr.Dropdown(sorted(shared.aesthetic_embeddings.keys()),
+ label="Aesthetic imgs embedding",
+ value="None")
+
+ modules.ui.create_refresh_button(aesthetic_imgs, shared.update_aesthetic_embeddings, lambda: {"choices": sorted(shared.aesthetic_embeddings.keys())}, "refresh_aesthetic_embeddings")
+
+ with gr.Row():
+ aesthetic_imgs_text = gr.Textbox(label='Aesthetic text for imgs',
+ placeholder="This text is used to rotate the feature space of the imgs embs",
+ value="")
+ aesthetic_slerp_angle = gr.Slider(label='Slerp angle', minimum=0, maximum=1, step=0.01,
+ value=0.1)
+ aesthetic_text_negative = gr.Checkbox(label="Is negative text", value=False)
+
+ return aesthetic_weight, aesthetic_steps, aesthetic_lr, aesthetic_slerp, aesthetic_imgs, aesthetic_imgs_text, aesthetic_slerp_angle, aesthetic_text_negative
+
+
+aesthetic_clip_model = None
+
+
+def aesthetic_clip():
+ global aesthetic_clip_model
+
+ if aesthetic_clip_model is None or aesthetic_clip_model.name_or_path != shared.sd_model.cond_stage_model.wrapped.transformer.name_or_path:
+ aesthetic_clip_model = CLIPModel.from_pretrained(shared.sd_model.cond_stage_model.wrapped.transformer.name_or_path)
+ aesthetic_clip_model.cpu()
+
+ return aesthetic_clip_model
+
+
+def generate_imgs_embd(name, folder, batch_size):
+ model = aesthetic_clip().to(device)
+ processor = CLIPProcessor.from_pretrained(model.name_or_path)
+
+ with torch.no_grad():
+ embs = []
+ for paths in tqdm(iter_to_batched(get_all_images_in_folder(folder), batch_size),
+ desc=f"Generating embeddings for {name}"):
+ if shared.state.interrupted:
+ break
+ inputs = processor(images=[Image.open(path) for path in paths], return_tensors="pt").to(device)
+ outputs = model.get_image_features(**inputs).cpu()
+ embs.append(torch.clone(outputs))
+ inputs.to("cpu")
+ del inputs, outputs
+
+ embs = torch.cat(embs, dim=0).mean(dim=0, keepdim=True)
+
+ # The generated embedding will be located here
+ path = str(Path(shared.cmd_opts.aesthetic_embeddings_dir) / f"{name}.pt")
+ torch.save(embs, path)
+
+ model.cpu()
+ del processor
+ del embs
+ gc.collect()
+ torch.cuda.empty_cache()
+ res = f"""
+ Done generating embedding for {name}!
+ Aesthetic embedding saved to {html.escape(path)}
+ """
+ shared.update_aesthetic_embeddings()
+ return gr.Dropdown.update(choices=sorted(shared.aesthetic_embeddings.keys()), label="Imgs embedding",
+ value="None"), \
+ gr.Dropdown.update(choices=sorted(shared.aesthetic_embeddings.keys()),
+ label="Imgs embedding",
+ value="None"), res, ""
+
+
+def slerp(low, high, val):
+ low_norm = low / torch.norm(low, dim=1, keepdim=True)
+ high_norm = high / torch.norm(high, dim=1, keepdim=True)
+ omega = torch.acos((low_norm * high_norm).sum(1))
+ so = torch.sin(omega)
+ res = (torch.sin((1.0 - val) * omega) / so).unsqueeze(1) * low + (torch.sin(val * omega) / so).unsqueeze(1) * high
+ return res
+
+
+class AestheticCLIP:
+ def __init__(self):
+ self.skip = False
+ self.aesthetic_steps = 0
+ self.aesthetic_weight = 0
+ self.aesthetic_lr = 0
+ self.slerp = False
+ self.aesthetic_text_negative = ""
+ self.aesthetic_slerp_angle = 0
+ self.aesthetic_imgs_text = ""
+
+ self.image_embs_name = None
+ self.image_embs = None
+ self.load_image_embs(None)
+
+ def set_aesthetic_params(self, p, aesthetic_lr=0, aesthetic_weight=0, aesthetic_steps=0, image_embs_name=None,
+ aesthetic_slerp=True, aesthetic_imgs_text="",
+ aesthetic_slerp_angle=0.15,
+ aesthetic_text_negative=False):
+ self.aesthetic_imgs_text = aesthetic_imgs_text
+ self.aesthetic_slerp_angle = aesthetic_slerp_angle
+ self.aesthetic_text_negative = aesthetic_text_negative
+ self.slerp = aesthetic_slerp
+ self.aesthetic_lr = aesthetic_lr
+ self.aesthetic_weight = aesthetic_weight
+ self.aesthetic_steps = aesthetic_steps
+ self.load_image_embs(image_embs_name)
+
+ if self.image_embs_name is not None:
+ p.extra_generation_params.update({
+ "Aesthetic LR": aesthetic_lr,
+ "Aesthetic weight": aesthetic_weight,
+ "Aesthetic steps": aesthetic_steps,
+ "Aesthetic embedding": self.image_embs_name,
+ "Aesthetic slerp": aesthetic_slerp,
+ "Aesthetic text": aesthetic_imgs_text,
+ "Aesthetic text negative": aesthetic_text_negative,
+ "Aesthetic slerp angle": aesthetic_slerp_angle,
+ })
+
+ def set_skip(self, skip):
+ self.skip = skip
+
+ def load_image_embs(self, image_embs_name):
+ if image_embs_name is None or len(image_embs_name) == 0 or image_embs_name == "None":
+ image_embs_name = None
+ self.image_embs_name = None
+ if image_embs_name is not None and self.image_embs_name != image_embs_name:
+ self.image_embs_name = image_embs_name
+ self.image_embs = torch.load(shared.aesthetic_embeddings[self.image_embs_name], map_location=device)
+ self.image_embs /= self.image_embs.norm(dim=-1, keepdim=True)
+ self.image_embs.requires_grad_(False)
+
+ def __call__(self, z, remade_batch_tokens):
+ if not self.skip and self.aesthetic_steps != 0 and self.aesthetic_lr != 0 and self.aesthetic_weight != 0 and self.image_embs_name is not None:
+ tokenizer = shared.sd_model.cond_stage_model.tokenizer
+ if not opts.use_old_emphasis_implementation:
+ remade_batch_tokens = [
+ [tokenizer.bos_token_id] + x[:75] + [tokenizer.eos_token_id] for x in
+ remade_batch_tokens]
+
+ tokens = torch.asarray(remade_batch_tokens).to(device)
+
+ model = copy.deepcopy(aesthetic_clip()).to(device)
+ model.requires_grad_(True)
+ if self.aesthetic_imgs_text is not None and len(self.aesthetic_imgs_text) > 0:
+ text_embs_2 = model.get_text_features(
+ **tokenizer([self.aesthetic_imgs_text], padding=True, return_tensors="pt").to(device))
+ if self.aesthetic_text_negative:
+ text_embs_2 = self.image_embs - text_embs_2
+ text_embs_2 /= text_embs_2.norm(dim=-1, keepdim=True)
+ img_embs = slerp(self.image_embs, text_embs_2, self.aesthetic_slerp_angle)
+ else:
+ img_embs = self.image_embs
+
+ with torch.enable_grad():
+
+ # We optimize the model to maximize the similarity
+ optimizer = optim.Adam(
+ model.text_model.parameters(), lr=self.aesthetic_lr
+ )
+
+ for _ in trange(self.aesthetic_steps, desc="Aesthetic optimization"):
+ text_embs = model.get_text_features(input_ids=tokens)
+ text_embs = text_embs / text_embs.norm(dim=-1, keepdim=True)
+ sim = text_embs @ img_embs.T
+ loss = -sim
+ optimizer.zero_grad()
+ loss.mean().backward()
+ optimizer.step()
+
+ zn = model.text_model(input_ids=tokens, output_hidden_states=-opts.CLIP_stop_at_last_layers)
+ if opts.CLIP_stop_at_last_layers > 1:
+ zn = zn.hidden_states[-opts.CLIP_stop_at_last_layers]
+ zn = model.text_model.final_layer_norm(zn)
+ else:
+ zn = zn.last_hidden_state
+ model.cpu()
+ del model
+ gc.collect()
+ torch.cuda.empty_cache()
+ zn = torch.concat([zn[77 * i:77 * (i + 1)] for i in range(max(z.shape[1] // 77, 1))], 1)
+ if self.slerp:
+ z = slerp(z, zn, self.aesthetic_weight)
+ else:
+ z = z * (1 - self.aesthetic_weight) + zn * self.aesthetic_weight
+
+ return z
diff --git a/modules/api/api.py b/modules/api/api.py
new file mode 100644
index 00000000..5b0c934e
--- /dev/null
+++ b/modules/api/api.py
@@ -0,0 +1,68 @@
+from modules.api.processing import StableDiffusionProcessingAPI
+from modules.processing import StableDiffusionProcessingTxt2Img, process_images
+from modules.sd_samplers import all_samplers
+from modules.extras import run_pnginfo
+import modules.shared as shared
+import uvicorn
+from fastapi import Body, APIRouter, HTTPException
+from fastapi.responses import JSONResponse
+from pydantic import BaseModel, Field, Json
+import json
+import io
+import base64
+
+sampler_to_index = lambda name: next(filter(lambda row: name.lower() == row[1].name.lower(), enumerate(all_samplers)), None)
+
+class TextToImageResponse(BaseModel):
+ images: list[str] = Field(default=None, title="Image", description="The generated image in base64 format.")
+ parameters: Json
+ info: Json
+
+
+class Api:
+ def __init__(self, app, queue_lock):
+ self.router = APIRouter()
+ self.app = app
+ self.queue_lock = queue_lock
+ self.app.add_api_route("/sdapi/v1/txt2img", self.text2imgapi, methods=["POST"])
+
+ def text2imgapi(self, txt2imgreq: StableDiffusionProcessingAPI ):
+ sampler_index = sampler_to_index(txt2imgreq.sampler_index)
+
+ if sampler_index is None:
+ raise HTTPException(status_code=404, detail="Sampler not found")
+
+ populate = txt2imgreq.copy(update={ # Override __init__ params
+ "sd_model": shared.sd_model,
+ "sampler_index": sampler_index[0],
+ "do_not_save_samples": True,
+ "do_not_save_grid": True
+ }
+ )
+ p = StableDiffusionProcessingTxt2Img(**vars(populate))
+ # Override object param
+ with self.queue_lock:
+ processed = process_images(p)
+
+ b64images = []
+ for i in processed.images:
+ buffer = io.BytesIO()
+ i.save(buffer, format="png")
+ b64images.append(base64.b64encode(buffer.getvalue()))
+
+ return TextToImageResponse(images=b64images, parameters=json.dumps(vars(txt2imgreq)), info=json.dumps(processed.info))
+
+
+
+ def img2imgapi(self):
+ raise NotImplementedError
+
+ def extrasapi(self):
+ raise NotImplementedError
+
+ def pnginfoapi(self):
+ raise NotImplementedError
+
+ def launch(self, server_name, port):
+ self.app.include_router(self.router)
+ uvicorn.run(self.app, host=server_name, port=port)
diff --git a/modules/api/processing.py b/modules/api/processing.py
new file mode 100644
index 00000000..4c541241
--- /dev/null
+++ b/modules/api/processing.py
@@ -0,0 +1,99 @@
+from inflection import underscore
+from typing import Any, Dict, Optional
+from pydantic import BaseModel, Field, create_model
+from modules.processing import StableDiffusionProcessingTxt2Img
+import inspect
+
+
+API_NOT_ALLOWED = [
+ "self",
+ "kwargs",
+ "sd_model",
+ "outpath_samples",
+ "outpath_grids",
+ "sampler_index",
+ "do_not_save_samples",
+ "do_not_save_grid",
+ "extra_generation_params",
+ "overlay_images",
+ "do_not_reload_embeddings",
+ "seed_enable_extras",
+ "prompt_for_display",
+ "sampler_noise_scheduler_override",
+ "ddim_discretize"
+]
+
+class ModelDef(BaseModel):
+ """Assistance Class for Pydantic Dynamic Model Generation"""
+
+ field: str
+ field_alias: str
+ field_type: Any
+ field_value: Any
+
+
+class PydanticModelGenerator:
+ """
+ Takes in created classes and stubs them out in a way FastAPI/Pydantic is happy about:
+ source_data is a snapshot of the default values produced by the class
+ params are the names of the actual keys required by __init__
+ """
+
+ def __init__(
+ self,
+ model_name: str = None,
+ class_instance = None,
+ additional_fields = None,
+ ):
+ def field_type_generator(k, v):
+ # field_type = str if not overrides.get(k) else overrides[k]["type"]
+ # print(k, v.annotation, v.default)
+ field_type = v.annotation
+
+ return Optional[field_type]
+
+ def merge_class_params(class_):
+ all_classes = list(filter(lambda x: x is not object, inspect.getmro(class_)))
+ parameters = {}
+ for classes in all_classes:
+ parameters = {**parameters, **inspect.signature(classes.__init__).parameters}
+ return parameters
+
+
+ self._model_name = model_name
+ self._class_data = merge_class_params(class_instance)
+ self._model_def = [
+ ModelDef(
+ field=underscore(k),
+ field_alias=k,
+ field_type=field_type_generator(k, v),
+ field_value=v.default
+ )
+ for (k,v) in self._class_data.items() if k not in API_NOT_ALLOWED
+ ]
+
+ for fields in additional_fields:
+ self._model_def.append(ModelDef(
+ field=underscore(fields["key"]),
+ field_alias=fields["key"],
+ field_type=fields["type"],
+ field_value=fields["default"]))
+
+ def generate_model(self):
+ """
+ Creates a pydantic BaseModel
+ from the json and overrides provided at initialization
+ """
+ fields = {
+ d.field: (d.field_type, Field(default=d.field_value, alias=d.field_alias)) for d in self._model_def
+ }
+ DynamicModel = create_model(self._model_name, **fields)
+ DynamicModel.__config__.allow_population_by_field_name = True
+ DynamicModel.__config__.allow_mutation = True
+ return DynamicModel
+
+StableDiffusionProcessingAPI = PydanticModelGenerator(
+ "StableDiffusionProcessingTxt2Img",
+ StableDiffusionProcessingTxt2Img,
+ [{"key": "sampler_index", "type": str, "default": "Euler"}]
+).generate_model() \ No newline at end of file
diff --git a/modules/deepbooru.py b/modules/deepbooru.py
index 4ad334a1..8914662d 100644
--- a/modules/deepbooru.py
+++ b/modules/deepbooru.py
@@ -157,8 +157,7 @@ def get_deepbooru_tags_from_model(model, tags, pil_image, threshold, deepbooru_o
# sort by reverse by likelihood and normal for alpha, and format tag text as requested
unsorted_tags_in_theshold.sort(key=lambda y: y[sort_ndx], reverse=(not alpha_sort))
for weight, tag in unsorted_tags_in_theshold:
- # note: tag_outformat will still have a colon if include_ranks is True
- tag_outformat = tag.replace(':', ' ')
+ tag_outformat = tag
if use_spaces:
tag_outformat = tag_outformat.replace('_', ' ')
if use_escape:
diff --git a/modules/extras.py b/modules/extras.py
index 8dbab240..22c5a1c1 100644
--- a/modules/extras.py
+++ b/modules/extras.py
@@ -39,9 +39,12 @@ def run_extras(extras_mode, resize_mode, image, image_folder, input_dir, output_
if input_dir == '':
return outputs, "Please select an input directory.", ''
- image_list = [file for file in [os.path.join(input_dir, x) for x in os.listdir(input_dir)] if os.path.isfile(file)]
+ image_list = [file for file in [os.path.join(input_dir, x) for x in sorted(os.listdir(input_dir))] if os.path.isfile(file)]
for img in image_list:
- image = Image.open(img)
+ try:
+ image = Image.open(img)
+ except Exception:
+ continue
imageArr.append(image)
imageNameArr.append(img)
else:
@@ -91,7 +94,8 @@ def run_extras(extras_mode, resize_mode, image, image_folder, input_dir, output_
def upscale(image, scaler_index, resize, mode, resize_w, resize_h, crop):
small = image.crop((image.width // 2, image.height // 2, image.width // 2 + 10, image.height // 2 + 10))
pixels = tuple(np.array(small).flatten().tolist())
- key = (resize, scaler_index, image.width, image.height, gfpgan_visibility, codeformer_visibility, codeformer_weight) + pixels
+ key = (resize, scaler_index, image.width, image.height, gfpgan_visibility, codeformer_visibility, codeformer_weight,
+ resize_mode, upscaling_resize, upscaling_resize_w, upscaling_resize_h, upscaling_crop) + pixels
c = cached_images.get(key)
if c is None:
@@ -117,10 +121,14 @@ def run_extras(extras_mode, resize_mode, image, image_folder, input_dir, output_
while len(cached_images) > 2:
del cached_images[next(iter(cached_images.keys()))]
+
+ if opts.use_original_name_batch and image_name != None:
+ basename = os.path.splitext(os.path.basename(image_name))[0]
+ else:
+ basename = ''
- images.save_image(image, path=outpath, basename="", seed=None, prompt=None, extension=opts.samples_format, info=info, short_filename=True,
- no_prompt=True, grid=False, pnginfo_section_name="extras", existing_info=existing_pnginfo,
- forced_filename=image_name if opts.use_original_name_batch else None)
+ images.save_image(image, path=outpath, basename=basename, seed=None, prompt=None, extension=opts.samples_format, info=info, short_filename=True,
+ no_prompt=True, grid=False, pnginfo_section_name="extras", existing_info=existing_pnginfo, forced_filename=None)
if opts.enable_pnginfo:
image.info = existing_pnginfo
@@ -215,8 +223,11 @@ def run_modelmerger(primary_model_name, secondary_model_name, teritary_model_nam
if theta_func1:
for key in tqdm.tqdm(theta_1.keys()):
if 'model' in key:
- t2 = theta_2.get(key, torch.zeros_like(theta_1[key]))
- theta_1[key] = theta_func1(theta_1[key], t2)
+ if key in theta_2:
+ t2 = theta_2.get(key, torch.zeros_like(theta_1[key]))
+ theta_1[key] = theta_func1(theta_1[key], t2)
+ else:
+ theta_1[key] = torch.zeros_like(theta_1[key])
del theta_2, teritary_model
for key in tqdm.tqdm(theta_0.keys()):
diff --git a/modules/generation_parameters_copypaste.py b/modules/generation_parameters_copypaste.py
index c27826b6..f73647da 100644
--- a/modules/generation_parameters_copypaste.py
+++ b/modules/generation_parameters_copypaste.py
@@ -4,13 +4,22 @@ import gradio as gr
from modules.shared import script_path
from modules import shared
-re_param_code = r"\s*([\w ]+):\s*([^,]+)(?:,|$)"
+re_param_code = r'\s*([\w ]+):\s*("(?:\\|\"|[^\"])+"|[^,]*)(?:,|$)'
re_param = re.compile(re_param_code)
re_params = re.compile(r"^(?:" + re_param_code + "){3,}$")
re_imagesize = re.compile(r"^(\d+)x(\d+)$")
type_of_gr_update = type(gr.update())
+def quote(text):
+ if ',' not in str(text):
+ return text
+
+ text = str(text)
+ text = text.replace('\\', '\\\\')
+ text = text.replace('"', '\\"')
+ return f'"{text}"'
+
def parse_generation_parameters(x: str):
"""parses generation parameters string, the one you see in text field under the picture in UI:
```
@@ -45,11 +54,8 @@ Steps: 20, Sampler: Euler a, CFG scale: 7, Seed: 965400086, Size: 512x512, Model
else:
prompt += ("" if prompt == "" else "\n") + line
- if len(prompt) > 0:
- res["Prompt"] = prompt
-
- if len(negative_prompt) > 0:
- res["Negative prompt"] = negative_prompt
+ res["Prompt"] = prompt
+ res["Negative prompt"] = negative_prompt
for k, v in re_param.findall(lastline):
m = re_imagesize.match(v)
@@ -86,7 +92,12 @@ def connect_paste(button, paste_fields, input_comp, js=None):
else:
try:
valtype = type(output.value)
- val = valtype(v)
+
+ if valtype == bool and v == "False":
+ val = False
+ else:
+ val = valtype(v)
+
res.append(gr.update(value=val))
except Exception:
res.append(gr.update())
diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py
index 4905710e..47d91ea5 100644
--- a/modules/hypernetworks/hypernetwork.py
+++ b/modules/hypernetworks/hypernetwork.py
@@ -22,25 +22,67 @@ from modules.textual_inversion.learn_schedule import LearnRateScheduler
class HypernetworkModule(torch.nn.Module):
multiplier = 1.0
- def __init__(self, dim, state_dict=None):
+ def __init__(self, dim, state_dict=None, layer_structure=None, add_layer_norm=False, activation_func=None):
super().__init__()
- self.linear1 = torch.nn.Linear(dim, dim * 2)
- self.linear2 = torch.nn.Linear(dim * 2, dim)
+ assert layer_structure is not None, "layer_structure must not be None"
+ assert layer_structure[0] == 1, "Multiplier Sequence should start with size 1!"
+ assert layer_structure[-1] == 1, "Multiplier Sequence should end with size 1!"
+
+ linears = []
+ for i in range(len(layer_structure) - 1):
+ linears.append(torch.nn.Linear(int(dim * layer_structure[i]), int(dim * layer_structure[i+1])))
+
+ if activation_func == "relu":
+ linears.append(torch.nn.ReLU())
+ elif activation_func == "leakyrelu":
+ linears.append(torch.nn.LeakyReLU())
+ elif activation_func == 'linear' or activation_func is None:
+ pass
+ else:
+ raise RuntimeError(f'hypernetwork uses an unsupported activation function: {activation_func}')
+
+ if add_layer_norm:
+ linears.append(torch.nn.LayerNorm(int(dim * layer_structure[i+1])))
+
+ self.linear = torch.nn.Sequential(*linears)
if state_dict is not None:
- self.load_state_dict(state_dict, strict=True)
+ self.fix_old_state_dict(state_dict)
+ self.load_state_dict(state_dict)
else:
-
- self.linear1.weight.data.normal_(mean=0.0, std=0.01)
- self.linear1.bias.data.zero_()
- self.linear2.weight.data.normal_(mean=0.0, std=0.01)
- self.linear2.bias.data.zero_()
+ for layer in self.linear:
+ if type(layer) == torch.nn.Linear or type(layer) == torch.nn.LayerNorm:
+ layer.weight.data.normal_(mean=0.0, std=0.01)
+ layer.bias.data.zero_()
self.to(devices.device)
+ def fix_old_state_dict(self, state_dict):
+ changes = {
+ 'linear1.bias': 'linear.0.bias',
+ 'linear1.weight': 'linear.0.weight',
+ 'linear2.bias': 'linear.1.bias',
+ 'linear2.weight': 'linear.1.weight',
+ }
+
+ for fr, to in changes.items():
+ x = state_dict.get(fr, None)
+ if x is None:
+ continue
+
+ del state_dict[fr]
+ state_dict[to] = x
+
def forward(self, x):
- return x + (self.linear2(self.linear1(x))) * self.multiplier
+ return x + self.linear(x) * self.multiplier
+
+ def trainables(self):
+ layer_structure = []
+ for layer in self.linear:
+ if type(layer) == torch.nn.Linear or type(layer) == torch.nn.LayerNorm:
+ layer_structure += [layer.weight, layer.bias]
+ return layer_structure
def apply_strength(value=None):
@@ -51,16 +93,22 @@ class Hypernetwork:
filename = None
name = None
- def __init__(self, name=None, enable_sizes=None):
+ def __init__(self, name=None, enable_sizes=None, layer_structure=None, add_layer_norm=False, activation_func=None):
self.filename = None
self.name = name
self.layers = {}
self.step = 0
self.sd_checkpoint = None
self.sd_checkpoint_name = None
+ self.layer_structure = layer_structure
+ self.add_layer_norm = add_layer_norm
+ self.activation_func = activation_func
for size in enable_sizes or []:
- self.layers[size] = (HypernetworkModule(size), HypernetworkModule(size))
+ self.layers[size] = (
+ HypernetworkModule(size, None, self.layer_structure, self.add_layer_norm, self.activation_func),
+ HypernetworkModule(size, None, self.layer_structure, self.add_layer_norm, self.activation_func),
+ )
def weights(self):
res = []
@@ -68,7 +116,7 @@ class Hypernetwork:
for k, layers in self.layers.items():
for layer in layers:
layer.train()
- res += [layer.linear1.weight, layer.linear1.bias, layer.linear2.weight, layer.linear2.bias]
+ res += layer.trainables()
return res
@@ -80,6 +128,9 @@ class Hypernetwork:
state_dict['step'] = self.step
state_dict['name'] = self.name
+ state_dict['layer_structure'] = self.layer_structure
+ state_dict['is_layer_norm'] = self.add_layer_norm
+ state_dict['activation_func'] = self.activation_func
state_dict['sd_checkpoint'] = self.sd_checkpoint
state_dict['sd_checkpoint_name'] = self.sd_checkpoint_name
@@ -92,9 +143,16 @@ class Hypernetwork:
state_dict = torch.load(filename, map_location='cpu')
+ self.layer_structure = state_dict.get('layer_structure', [1, 2, 1])
+ self.add_layer_norm = state_dict.get('is_layer_norm', False)
+ self.activation_func = state_dict.get('activation_func', None)
+
for size, sd in state_dict.items():
if type(size) == int:
- self.layers[size] = (HypernetworkModule(size, sd[0]), HypernetworkModule(size, sd[1]))
+ self.layers[size] = (
+ HypernetworkModule(size, sd[0], self.layer_structure, self.add_layer_norm, self.activation_func),
+ HypernetworkModule(size, sd[1], self.layer_structure, self.add_layer_norm, self.activation_func),
+ )
self.name = state_dict.get('name', self.name)
self.step = state_dict.get('step', 0)
@@ -196,7 +254,11 @@ def stack_conds(conds):
return torch.stack(conds)
-def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log_directory, steps, create_image_every, save_hypernetwork_every, template_file, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height):
+
+def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log_directory, training_width, training_height, steps, create_image_every, save_hypernetwork_every, template_file, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height):
+ # images allows training previews to have infotext. Importing it at the top causes a circular import problem.
+ from modules import images
+
assert hypernetwork_name, 'hypernetwork not selected'
path = shared.hypernetworks.get(hypernetwork_name, None)
@@ -225,8 +287,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log
shared.state.textinfo = f"Preparing dataset from {html.escape(data_root)}..."
with torch.autocast("cuda"):
- ds = modules.textual_inversion.dataset.PersonalizedBase(data_root=data_root, width=512, height=512, repeats=shared.opts.training_image_repeats_per_epoch, placeholder_token=hypernetwork_name, model=shared.sd_model, device=devices.device, template_file=template_file, include_cond=True, batch_size=batch_size)
-
+ ds = modules.textual_inversion.dataset.PersonalizedBase(data_root=data_root, width=training_width, height=training_height, repeats=shared.opts.training_image_repeats_per_epoch, placeholder_token=hypernetwork_name, model=shared.sd_model, device=devices.device, template_file=template_file, include_cond=True, batch_size=batch_size)
if unload:
shared.sd_model.cond_stage_model.to(devices.cpu)
shared.sd_model.first_stage_model.to(devices.cpu)
@@ -240,6 +301,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log
last_saved_file = "<none>"
last_saved_image = "<none>"
+ forced_filename = "<none>"
ititial_step = hypernetwork.step or 0
if ititial_step > steps:
@@ -261,7 +323,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log
with torch.autocast("cuda"):
c = stack_conds([entry.cond for entry in entries]).to(devices.device)
-# c = torch.vstack([entry.cond for entry in entries]).to(devices.device)
+ # c = torch.vstack([entry.cond for entry in entries]).to(devices.device)
x = torch.stack([entry.latent for entry in entries]).to(devices.device)
loss = shared.sd_model(x, c)[0]
del x
@@ -278,7 +340,9 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log
pbar.set_description(f"loss: {mean_loss:.7f}")
if hypernetwork.step > 0 and hypernetwork_dir is not None and hypernetwork.step % save_hypernetwork_every == 0:
- last_saved_file = os.path.join(hypernetwork_dir, f'{hypernetwork_name}-{hypernetwork.step}.pt')
+ # Before saving, change name to match current checkpoint.
+ hypernetwork.name = f'{hypernetwork_name}-{hypernetwork.step}'
+ last_saved_file = os.path.join(hypernetwork_dir, f'{hypernetwork.name}.pt')
hypernetwork.save(last_saved_file)
textual_inversion.write_loss(log_directory, "hypernetwork_loss.csv", hypernetwork.step, len(ds), {
@@ -287,7 +351,8 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log
})
if hypernetwork.step > 0 and images_dir is not None and hypernetwork.step % create_image_every == 0:
- last_saved_image = os.path.join(images_dir, f'{hypernetwork_name}-{hypernetwork.step}.png')
+ forced_filename = f'{hypernetwork_name}-{hypernetwork.step}'
+ last_saved_image = os.path.join(images_dir, forced_filename)
optimizer.zero_grad()
shared.sd_model.cond_stage_model.to(devices.device)
@@ -323,7 +388,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log
if image is not None:
shared.state.current_image = image
- image.save(last_saved_image)
+ last_saved_image, last_text_info = images.save_image(image, images_dir, "", p.seed, p.prompt, shared.opts.samples_format, processed.infotexts[0], p=p, forced_filename=forced_filename)
last_saved_image += f", prompt: {preview_text}"
shared.state.job_no = hypernetwork.step
@@ -333,7 +398,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log
Loss: {mean_loss:.7f}<br/>
Step: {hypernetwork.step}<br/>
Last prompt: {html.escape(entries[0].cond_text)}<br/>
-Last saved embedding: {html.escape(last_saved_file)}<br/>
+Last saved hypernetwork: {html.escape(last_saved_file)}<br/>
Last saved image: {html.escape(last_saved_image)}<br/>
</p>
"""
@@ -342,6 +407,9 @@ Last saved image: {html.escape(last_saved_image)}<br/>
hypernetwork.sd_checkpoint = checkpoint.hash
hypernetwork.sd_checkpoint_name = checkpoint.model_name
+ # Before saving for the last time, change name back to the base name (as opposed to the save_hypernetwork_every step-suffixed naming convention).
+ hypernetwork.name = hypernetwork_name
+ filename = os.path.join(shared.cmd_opts.hypernetwork_dir, f'{hypernetwork.name}.pt')
hypernetwork.save(filename)
return hypernetwork, filename
diff --git a/modules/hypernetworks/ui.py b/modules/hypernetworks/ui.py
index dfa599af..e6f50a1f 100644
--- a/modules/hypernetworks/ui.py
+++ b/modules/hypernetworks/ui.py
@@ -1,5 +1,6 @@
import html
import os
+import re
import gradio as gr
@@ -9,11 +10,24 @@ from modules import sd_hijack, shared, devices
from modules.hypernetworks import hypernetwork
-def create_hypernetwork(name, enable_sizes):
- fn = os.path.join(shared.cmd_opts.hypernetwork_dir, f"{name}.pt")
- assert not os.path.exists(fn), f"file {fn} already exists"
+def create_hypernetwork(name, enable_sizes, overwrite_old, layer_structure=None, add_layer_norm=False, activation_func=None):
+ # Remove illegal characters from name.
+ name = "".join( x for x in name if (x.isalnum() or x in "._- "))
- hypernet = modules.hypernetworks.hypernetwork.Hypernetwork(name=name, enable_sizes=[int(x) for x in enable_sizes])
+ fn = os.path.join(shared.cmd_opts.hypernetwork_dir, f"{name}.pt")
+ if not overwrite_old:
+ assert not os.path.exists(fn), f"file {fn} already exists"
+
+ if type(layer_structure) == str:
+ layer_structure = [float(x.strip()) for x in layer_structure.split(",")]
+
+ hypernet = modules.hypernetworks.hypernetwork.Hypernetwork(
+ name=name,
+ enable_sizes=[int(x) for x in enable_sizes],
+ layer_structure=layer_structure,
+ add_layer_norm=add_layer_norm,
+ activation_func=activation_func,
+ )
hypernet.save(fn)
shared.reload_hypernetworks()
diff --git a/modules/img2img.py b/modules/img2img.py
index 24126774..eea5199b 100644
--- a/modules/img2img.py
+++ b/modules/img2img.py
@@ -56,7 +56,7 @@ def process_batch(p, input_dir, output_dir, args):
processed_image.save(os.path.join(output_dir, filename))
-def img2img(mode: int, prompt: str, negative_prompt: str, prompt_style: str, prompt_style2: str, init_img, init_img_with_mask, init_img_inpaint, init_mask_inpaint, mask_mode, steps: int, sampler_index: int, mask_blur: int, inpainting_fill: int, restore_faces: bool, tiling: bool, n_iter: int, batch_size: int, cfg_scale: float, denoising_strength: float, seed: int, subseed: int, subseed_strength: float, seed_resize_from_h: int, seed_resize_from_w: int, seed_enable_extras: bool, height: int, width: int, resize_mode: int, inpaint_full_res: bool, inpaint_full_res_padding: int, inpainting_mask_invert: int, img2img_batch_input_dir: str, img2img_batch_output_dir: str, *args):
+def img2img(mode: int, prompt: str, negative_prompt: str, prompt_style: str, prompt_style2: str, init_img, init_img_with_mask, init_img_inpaint, init_mask_inpaint, mask_mode, steps: int, sampler_index: int, mask_blur: int, inpainting_fill: int, restore_faces: bool, tiling: bool, n_iter: int, batch_size: int, cfg_scale: float, denoising_strength: float, seed: int, subseed: int, subseed_strength: float, seed_resize_from_h: int, seed_resize_from_w: int, seed_enable_extras: bool, height: int, width: int, resize_mode: int, inpaint_full_res: bool, inpaint_full_res_padding: int, inpainting_mask_invert: int, img2img_batch_input_dir: str, img2img_batch_output_dir: str, aesthetic_lr=0, aesthetic_weight=0, aesthetic_steps=0, aesthetic_imgs=None, aesthetic_slerp=False, aesthetic_imgs_text="", aesthetic_slerp_angle=0.15, aesthetic_text_negative=False, *args):
is_inpaint = mode == 1
is_batch = mode == 2
@@ -109,6 +109,8 @@ def img2img(mode: int, prompt: str, negative_prompt: str, prompt_style: str, pro
inpainting_mask_invert=inpainting_mask_invert,
)
+ shared.aesthetic_clip.set_aesthetic_params(p, float(aesthetic_lr), float(aesthetic_weight), int(aesthetic_steps), aesthetic_imgs, aesthetic_slerp, aesthetic_imgs_text, aesthetic_slerp_angle, aesthetic_text_negative)
+
if shared.cmd_opts.enable_console_prompts:
print(f"\nimg2img: {prompt}", file=shared.progress_print_out)
diff --git a/modules/interrogate.py b/modules/interrogate.py
index 64b91eb4..65b05d34 100644
--- a/modules/interrogate.py
+++ b/modules/interrogate.py
@@ -28,9 +28,11 @@ class InterrogateModels:
clip_preprocess = None
categories = None
dtype = None
+ running_on_cpu = None
def __init__(self, content_dir):
self.categories = []
+ self.running_on_cpu = devices.device_interrogate == torch.device("cpu")
if os.path.exists(content_dir):
for filename in os.listdir(content_dir):
@@ -53,7 +55,11 @@ class InterrogateModels:
def load_clip_model(self):
import clip
- model, preprocess = clip.load(clip_model_name)
+ if self.running_on_cpu:
+ model, preprocess = clip.load(clip_model_name, device="cpu")
+ else:
+ model, preprocess = clip.load(clip_model_name)
+
model.eval()
model = model.to(devices.device_interrogate)
@@ -62,14 +68,14 @@ class InterrogateModels:
def load(self):
if self.blip_model is None:
self.blip_model = self.load_blip_model()
- if not shared.cmd_opts.no_half:
+ if not shared.cmd_opts.no_half and not self.running_on_cpu:
self.blip_model = self.blip_model.half()
self.blip_model = self.blip_model.to(devices.device_interrogate)
if self.clip_model is None:
self.clip_model, self.clip_preprocess = self.load_clip_model()
- if not shared.cmd_opts.no_half:
+ if not shared.cmd_opts.no_half and not self.running_on_cpu:
self.clip_model = self.clip_model.half()
self.clip_model = self.clip_model.to(devices.device_interrogate)
diff --git a/modules/localization.py b/modules/localization.py
new file mode 100644
index 00000000..b1810cda
--- /dev/null
+++ b/modules/localization.py
@@ -0,0 +1,31 @@
+import json
+import os
+import sys
+import traceback
+
+localizations = {}
+
+
+def list_localizations(dirname):
+ localizations.clear()
+
+ for file in os.listdir(dirname):
+ fn, ext = os.path.splitext(file)
+ if ext.lower() != ".json":
+ continue
+
+ localizations[fn] = os.path.join(dirname, file)
+
+
+def localization_js(current_localization_name):
+ fn = localizations.get(current_localization_name, None)
+ data = {}
+ if fn is not None:
+ try:
+ with open(fn, "r", encoding="utf8") as file:
+ data = json.load(file)
+ except Exception:
+ print(f"Error loading localization from {fn}:", file=sys.stderr)
+ print(traceback.format_exc(), file=sys.stderr)
+
+ return f"var localization = {json.dumps(data)}\n"
diff --git a/modules/processing.py b/modules/processing.py
index deb6125e..ff1ec4c9 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -9,9 +9,10 @@ from PIL import Image, ImageFilter, ImageOps
import random
import cv2
from skimage import exposure
+from typing import Any, Dict, List, Optional
import modules.sd_hijack
-from modules import devices, prompt_parser, masking, sd_samplers, lowvram
+from modules import devices, prompt_parser, masking, sd_samplers, lowvram, generation_parameters_copypaste
from modules.sd_hijack import model_hijack
from modules.shared import opts, cmd_opts, state
import modules.shared as shared
@@ -51,9 +52,15 @@ def get_correct_sampler(p):
return sd_samplers.samplers
elif isinstance(p, modules.processing.StableDiffusionProcessingImg2Img):
return sd_samplers.samplers_for_img2img
+ elif isinstance(p, modules.api.processing.StableDiffusionProcessingAPI):
+ return sd_samplers.samplers
-class StableDiffusionProcessing:
- def __init__(self, sd_model=None, outpath_samples=None, outpath_grids=None, prompt="", styles=None, seed=-1, subseed=-1, subseed_strength=0, seed_resize_from_h=-1, seed_resize_from_w=-1, seed_enable_extras=True, sampler_index=0, batch_size=1, n_iter=1, steps=50, cfg_scale=7.0, width=512, height=512, restore_faces=False, tiling=False, do_not_save_samples=False, do_not_save_grid=False, extra_generation_params=None, overlay_images=None, negative_prompt=None, eta=None, do_not_reload_embeddings=False):
+class StableDiffusionProcessing():
+ """
+ The first set of paramaters: sd_models -> do_not_reload_embeddings represent the minimum required to create a StableDiffusionProcessing
+
+ """
+ def __init__(self, sd_model=None, outpath_samples=None, outpath_grids=None, prompt: str="", styles: List[str]=None, seed: int=-1, subseed: int=-1, subseed_strength: float=0, seed_resize_from_h: int=-1, seed_resize_from_w: int=-1, seed_enable_extras: bool=True, sampler_index: int=0, batch_size: int=1, n_iter: int=1, steps:int =50, cfg_scale:float=7.0, width:int=512, height:int=512, restore_faces:bool=False, tiling:bool=False, do_not_save_samples:bool=False, do_not_save_grid:bool=False, extra_generation_params: Dict[Any,Any]=None, overlay_images: Any=None, negative_prompt: str=None, eta: float =None, do_not_reload_embeddings: bool=False, denoising_strength: float = 0, ddim_discretize: str = "uniform", s_churn: float = 0.0, s_tmax: float = None, s_tmin: float = 0.0, s_noise: float = 1.0):
self.sd_model = sd_model
self.outpath_samples: str = outpath_samples
self.outpath_grids: str = outpath_grids
@@ -86,10 +93,10 @@ class StableDiffusionProcessing:
self.denoising_strength: float = 0
self.sampler_noise_scheduler_override = None
self.ddim_discretize = opts.ddim_discretize
- self.s_churn = opts.s_churn
- self.s_tmin = opts.s_tmin
- self.s_tmax = float('inf') # not representable as a standard ui option
- self.s_noise = opts.s_noise
+ self.s_churn = s_churn or opts.s_churn
+ self.s_tmin = s_tmin or opts.s_tmin
+ self.s_tmax = s_tmax or float('inf') # not representable as a standard ui option
+ self.s_noise = s_noise or opts.s_noise
if not seed_enable_extras:
self.subseed = -1
@@ -97,6 +104,7 @@ class StableDiffusionProcessing:
self.seed_resize_from_h = 0
self.seed_resize_from_w = 0
+
def init(self, all_prompts, all_seeds, all_subseeds):
pass
@@ -296,7 +304,7 @@ def create_infotext(p, all_prompts, all_seeds, all_subseeds, comments, iteration
"Size": f"{p.width}x{p.height}",
"Model hash": getattr(p, 'sd_model_hash', None if not opts.add_model_hash_to_info or not shared.sd_model.sd_model_hash else shared.sd_model.sd_model_hash),
"Model": (None if not opts.add_model_name_to_info or not shared.sd_model.sd_checkpoint_info.model_name else shared.sd_model.sd_checkpoint_info.model_name.replace(',', '').replace(':', '')),
- "Hypernet": (None if shared.loaded_hypernetwork is None else shared.loaded_hypernetwork.name.replace(',', '').replace(':', '')),
+ "Hypernet": (None if shared.loaded_hypernetwork is None else shared.loaded_hypernetwork.name),
"Batch size": (None if p.batch_size < 2 else p.batch_size),
"Batch pos": (None if p.batch_size < 2 else position_in_batch),
"Variation seed": (None if p.subseed_strength == 0 else all_subseeds[index]),
@@ -310,7 +318,7 @@ def create_infotext(p, all_prompts, all_seeds, all_subseeds, comments, iteration
generation_params.update(p.extra_generation_params)
- generation_params_text = ", ".join([k if k == v else f'{k}: {v}' for k, v in generation_params.items() if v is not None])
+ generation_params_text = ", ".join([k if k == v else f'{k}: {generation_parameters_copypaste.quote(v)}' for k, v in generation_params.items() if v is not None])
negative_prompt_text = "\nNegative prompt: " + p.negative_prompt if p.negative_prompt else ""
@@ -402,12 +410,6 @@ def process_images(p: StableDiffusionProcessing) -> Processed:
with devices.autocast():
samples_ddim = p.sample(conditioning=c, unconditional_conditioning=uc, seeds=seeds, subseeds=subseeds, subseed_strength=p.subseed_strength)
- if state.interrupted or state.skipped:
-
- # if we are interrupted, sample returns just noise
- # use the image collected previously in sampler loop
- samples_ddim = shared.state.current_latent
-
samples_ddim = samples_ddim.to(devices.dtype_vae)
x_samples_ddim = decode_first_stage(p.sd_model, samples_ddim)
x_samples_ddim = torch.clamp((x_samples_ddim + 1.0) / 2.0, min=0.0, max=1.0)
@@ -497,7 +499,7 @@ def process_images(p: StableDiffusionProcessing) -> Processed:
class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
sampler = None
- def __init__(self, enable_hr=False, denoising_strength=0.75, firstphase_width=0, firstphase_height=0, **kwargs):
+ def __init__(self, enable_hr: bool=False, denoising_strength: float=0.75, firstphase_width: int=0, firstphase_height: int=0, **kwargs):
super().__init__(**kwargs)
self.enable_hr = enable_hr
self.denoising_strength = denoising_strength
@@ -538,17 +540,37 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
self.truncate_x = int(self.firstphase_width - firstphase_width_truncated) // opt_f
self.truncate_y = int(self.firstphase_height - firstphase_height_truncated) // opt_f
+ def create_dummy_mask(self, x, width=None, height=None):
+ if self.sampler.conditioning_key in {'hybrid', 'concat'}:
+ height = height or self.height
+ width = width or self.width
+
+ # The "masked-image" in this case will just be all zeros since the entire image is masked.
+ image_conditioning = torch.zeros(x.shape[0], 3, height, width, device=x.device)
+ image_conditioning = self.sd_model.get_first_stage_encoding(self.sd_model.encode_first_stage(image_conditioning))
+
+ # Add the fake full 1s mask to the first dimension.
+ image_conditioning = torch.nn.functional.pad(image_conditioning, (0, 0, 0, 0, 1, 0), value=1.0)
+ image_conditioning = image_conditioning.to(x.dtype)
+
+ else:
+ # Dummy zero conditioning if we're not using inpainting model.
+ # Still takes up a bit of memory, but no encoder call.
+ # Pretty sure we can just make this a 1x1 image since its not going to be used besides its batch size.
+ image_conditioning = torch.zeros(x.shape[0], 5, 1, 1, dtype=x.dtype, device=x.device)
+
+ return image_conditioning
def sample(self, conditioning, unconditional_conditioning, seeds, subseeds, subseed_strength):
self.sampler = sd_samplers.create_sampler_with_index(sd_samplers.samplers, self.sampler_index, self.sd_model)
if not self.enable_hr:
x = create_random_tensors([opt_C, self.height // opt_f, self.width // opt_f], seeds=seeds, subseeds=subseeds, subseed_strength=self.subseed_strength, seed_resize_from_h=self.seed_resize_from_h, seed_resize_from_w=self.seed_resize_from_w, p=self)
- samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning)
+ samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning, image_conditioning=self.create_dummy_mask(x))
return samples
x = create_random_tensors([opt_C, self.firstphase_height // opt_f, self.firstphase_width // opt_f], seeds=seeds, subseeds=subseeds, subseed_strength=self.subseed_strength, seed_resize_from_h=self.seed_resize_from_h, seed_resize_from_w=self.seed_resize_from_w, p=self)
- samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning)
+ samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning, image_conditioning=self.create_dummy_mask(x, self.firstphase_width, self.firstphase_height))
samples = samples[:, :, self.truncate_y//2:samples.shape[2]-self.truncate_y//2, self.truncate_x//2:samples.shape[3]-self.truncate_x//2]
@@ -585,7 +607,7 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
x = None
devices.torch_gc()
- samples = self.sampler.sample_img2img(self, samples, noise, conditioning, unconditional_conditioning, steps=self.steps)
+ samples = self.sampler.sample_img2img(self, samples, noise, conditioning, unconditional_conditioning, steps=self.steps, image_conditioning=self.create_dummy_mask(samples))
return samples
@@ -611,6 +633,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
self.inpainting_mask_invert = inpainting_mask_invert
self.mask = None
self.nmask = None
+ self.image_conditioning = None
def init(self, all_prompts, all_seeds, all_subseeds):
self.sampler = sd_samplers.create_sampler_with_index(sd_samplers.samplers_for_img2img, self.sampler_index, self.sd_model)
@@ -712,10 +735,39 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
elif self.inpainting_fill == 3:
self.init_latent = self.init_latent * self.mask
+ if self.sampler.conditioning_key in {'hybrid', 'concat'}:
+ if self.image_mask is not None:
+ conditioning_mask = np.array(self.image_mask.convert("L"))
+ conditioning_mask = conditioning_mask.astype(np.float32) / 255.0
+ conditioning_mask = torch.from_numpy(conditioning_mask[None, None])
+
+ # Inpainting model uses a discretized mask as input, so we round to either 1.0 or 0.0
+ conditioning_mask = torch.round(conditioning_mask)
+ else:
+ conditioning_mask = torch.ones(1, 1, *image.shape[-2:])
+
+ # Create another latent image, this time with a masked version of the original input.
+ conditioning_mask = conditioning_mask.to(image.device)
+ conditioning_image = image * (1.0 - conditioning_mask)
+ conditioning_image = self.sd_model.get_first_stage_encoding(self.sd_model.encode_first_stage(conditioning_image))
+
+ # Create the concatenated conditioning tensor to be fed to `c_concat`
+ conditioning_mask = torch.nn.functional.interpolate(conditioning_mask, size=self.init_latent.shape[-2:])
+ conditioning_mask = conditioning_mask.expand(conditioning_image.shape[0], -1, -1, -1)
+ self.image_conditioning = torch.cat([conditioning_mask, conditioning_image], dim=1)
+ self.image_conditioning = self.image_conditioning.to(shared.device).type(self.sd_model.dtype)
+ else:
+ self.image_conditioning = torch.zeros(
+ self.init_latent.shape[0], 5, 1, 1,
+ dtype=self.init_latent.dtype,
+ device=self.init_latent.device
+ )
+
+
def sample(self, conditioning, unconditional_conditioning, seeds, subseeds, subseed_strength):
x = create_random_tensors([opt_C, self.height // opt_f, self.width // opt_f], seeds=seeds, subseeds=subseeds, subseed_strength=self.subseed_strength, seed_resize_from_h=self.seed_resize_from_h, seed_resize_from_w=self.seed_resize_from_w, p=self)
- samples = self.sampler.sample_img2img(self, self.init_latent, x, conditioning, unconditional_conditioning)
+ samples = self.sampler.sample_img2img(self, self.init_latent, x, conditioning, unconditional_conditioning, image_conditioning=self.image_conditioning)
if self.mask is not None:
samples = samples * self.nmask + self.init_latent * self.mask
@@ -723,4 +775,4 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
del x
devices.torch_gc()
- return samples
+ return samples \ No newline at end of file
diff --git a/modules/prompt_parser.py b/modules/prompt_parser.py
index 919d5d31..f70872c4 100644
--- a/modules/prompt_parser.py
+++ b/modules/prompt_parser.py
@@ -275,7 +275,7 @@ re_attention = re.compile(r"""
def parse_prompt_attention(text):
"""
- Parses a string with attention tokens and returns a list of pairs: text and its assoicated weight.
+ Parses a string with attention tokens and returns a list of pairs: text and its associated weight.
Accepted tokens are:
(abc) - increases attention to abc by a multiplier of 1.1
(abc:3.12) - increases attention to abc by a multiplier of 3.12
diff --git a/modules/scripts.py b/modules/scripts.py
index ac66d448..1039fa9c 100644
--- a/modules/scripts.py
+++ b/modules/scripts.py
@@ -96,6 +96,7 @@ def wrap_call(func, filename, funcname, *args, default=None, **kwargs):
class ScriptRunner:
def __init__(self):
self.scripts = []
+ self.titles = []
def setup_ui(self, is_img2img):
for script_class, path in scripts_data:
@@ -107,9 +108,10 @@ class ScriptRunner:
self.scripts.append(script)
- titles = [wrap_call(script.title, script.filename, "title") or f"{script.filename} [error]" for script in self.scripts]
+ self.titles = [wrap_call(script.title, script.filename, "title") or f"{script.filename} [error]" for script in self.scripts]
- dropdown = gr.Dropdown(label="Script", choices=["None"] + titles, value="None", type="index")
+ dropdown = gr.Dropdown(label="Script", choices=["None"] + self.titles, value="None", type="index")
+ dropdown.save_to_config = True
inputs = [dropdown]
for script in self.scripts:
@@ -139,6 +141,15 @@ class ScriptRunner:
return [ui.gr_show(True if i == 0 else args_from <= i < args_to) for i in range(len(inputs))]
+ def init_field(title):
+ if title == 'None':
+ return
+ script_index = self.titles.index(title)
+ script = self.scripts[script_index]
+ for i in range(script.args_from, script.args_to):
+ inputs[i].visible = True
+
+ dropdown.init_field = init_field
dropdown.change(
fn=select_script,
inputs=[dropdown],
diff --git a/modules/sd_hijack.py b/modules/sd_hijack.py
index 984b35c4..1f8587d1 100644
--- a/modules/sd_hijack.py
+++ b/modules/sd_hijack.py
@@ -19,6 +19,7 @@ attention_CrossAttention_forward = ldm.modules.attention.CrossAttention.forward
diffusionmodules_model_nonlinearity = ldm.modules.diffusionmodules.model.nonlinearity
diffusionmodules_model_AttnBlock_forward = ldm.modules.diffusionmodules.model.AttnBlock.forward
+
def apply_optimizations():
undo_optimizations()
@@ -167,11 +168,11 @@ class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module):
remade_tokens = remade_tokens[:last_comma]
length = len(remade_tokens)
-
+
rem = int(math.ceil(length / 75)) * 75 - length
remade_tokens += [id_end] * rem + reloc_tokens
multipliers = multipliers[:last_comma] + [1.0] * rem + reloc_mults
-
+
if embedding is None:
remade_tokens.append(token)
multipliers.append(weight)
@@ -223,7 +224,6 @@ class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module):
return batch_multipliers, remade_batch_tokens, used_custom_terms, hijack_comments, hijack_fixes, token_count
-
def process_text_old(self, text):
id_start = self.wrapped.tokenizer.bos_token_id
id_end = self.wrapped.tokenizer.eos_token_id
@@ -280,7 +280,7 @@ class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module):
token_count = len(remade_tokens)
remade_tokens = remade_tokens + [id_end] * (maxlen - 2 - len(remade_tokens))
- remade_tokens = [id_start] + remade_tokens[0:maxlen-2] + [id_end]
+ remade_tokens = [id_start] + remade_tokens[0:maxlen - 2] + [id_end]
cache[tuple_tokens] = (remade_tokens, fixes, multipliers)
multipliers = multipliers + [1.0] * (maxlen - 2 - len(multipliers))
@@ -290,7 +290,7 @@ class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module):
hijack_fixes.append(fixes)
batch_multipliers.append(multipliers)
return batch_multipliers, remade_batch_tokens, used_custom_terms, hijack_comments, hijack_fixes, token_count
-
+
def forward(self, text):
use_old = opts.use_old_emphasis_implementation
if use_old:
@@ -302,11 +302,11 @@ class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module):
if len(used_custom_terms) > 0:
self.hijack.comments.append("Used embeddings: " + ", ".join([f'{word} [{checksum}]' for word, checksum in used_custom_terms]))
-
+
if use_old:
self.hijack.fixes = hijack_fixes
return self.process_tokens(remade_batch_tokens, batch_multipliers)
-
+
z = None
i = 0
while max(map(len, remade_batch_tokens)) != 0:
@@ -320,7 +320,7 @@ class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module):
if fix[0] == i:
fixes.append(fix[1])
self.hijack.fixes.append(fixes)
-
+
tokens = []
multipliers = []
for j in range(len(remade_batch_tokens)):
@@ -332,20 +332,20 @@ class FrozenCLIPEmbedderWithCustomWords(torch.nn.Module):
multipliers.append([1.0] * 75)
z1 = self.process_tokens(tokens, multipliers)
+ z1 = shared.aesthetic_clip(z1, remade_batch_tokens)
z = z1 if z is None else torch.cat((z, z1), axis=-2)
-
+
remade_batch_tokens = rem_tokens
batch_multipliers = rem_multipliers
i += 1
-
+
return z
-
-
+
def process_tokens(self, remade_batch_tokens, batch_multipliers):
if not opts.use_old_emphasis_implementation:
remade_batch_tokens = [[self.wrapped.tokenizer.bos_token_id] + x[:75] + [self.wrapped.tokenizer.eos_token_id] for x in remade_batch_tokens]
batch_multipliers = [[1.0] + x[:75] + [1.0] for x in batch_multipliers]
-
+
tokens = torch.asarray(remade_batch_tokens).to(device)
outputs = self.wrapped.transformer(input_ids=tokens, output_hidden_states=-opts.CLIP_stop_at_last_layers)
@@ -385,8 +385,8 @@ class EmbeddingsWithFixes(torch.nn.Module):
for fixes, tensor in zip(batch_fixes, inputs_embeds):
for offset, embedding in fixes:
emb = embedding.vec
- emb_len = min(tensor.shape[0]-offset-1, emb.shape[0])
- tensor = torch.cat([tensor[0:offset+1], emb[0:emb_len], tensor[offset+1+emb_len:]])
+ emb_len = min(tensor.shape[0] - offset - 1, emb.shape[0])
+ tensor = torch.cat([tensor[0:offset + 1], emb[0:emb_len], tensor[offset + 1 + emb_len:]])
vecs.append(tensor)
diff --git a/modules/sd_hijack_inpainting.py b/modules/sd_hijack_inpainting.py
new file mode 100644
index 00000000..fd92a335
--- /dev/null
+++ b/modules/sd_hijack_inpainting.py
@@ -0,0 +1,331 @@
+import torch
+
+from einops import repeat
+from omegaconf import ListConfig
+
+import ldm.models.diffusion.ddpm
+import ldm.models.diffusion.ddim
+import ldm.models.diffusion.plms
+
+from ldm.models.diffusion.ddpm import LatentDiffusion
+from ldm.models.diffusion.plms import PLMSSampler
+from ldm.models.diffusion.ddim import DDIMSampler, noise_like
+
+# =================================================================================================
+# Monkey patch DDIMSampler methods from RunwayML repo directly.
+# Adapted from:
+# https://github.com/runwayml/stable-diffusion/blob/main/ldm/models/diffusion/ddim.py
+# =================================================================================================
+@torch.no_grad()
+def sample_ddim(self,
+ S,
+ batch_size,
+ shape,
+ conditioning=None,
+ callback=None,
+ normals_sequence=None,
+ img_callback=None,
+ quantize_x0=False,
+ eta=0.,
+ mask=None,
+ x0=None,
+ temperature=1.,
+ noise_dropout=0.,
+ score_corrector=None,
+ corrector_kwargs=None,
+ verbose=True,
+ x_T=None,
+ log_every_t=100,
+ unconditional_guidance_scale=1.,
+ unconditional_conditioning=None,
+ # this has to come in the same format as the conditioning, # e.g. as encoded tokens, ...
+ **kwargs
+ ):
+ if conditioning is not None:
+ if isinstance(conditioning, dict):
+ ctmp = conditioning[list(conditioning.keys())[0]]
+ while isinstance(ctmp, list):
+ ctmp = ctmp[0]
+ cbs = ctmp.shape[0]
+ if cbs != batch_size:
+ print(f"Warning: Got {cbs} conditionings but batch-size is {batch_size}")
+ else:
+ if conditioning.shape[0] != batch_size:
+ print(f"Warning: Got {conditioning.shape[0]} conditionings but batch-size is {batch_size}")
+
+ self.make_schedule(ddim_num_steps=S, ddim_eta=eta, verbose=verbose)
+ # sampling
+ C, H, W = shape
+ size = (batch_size, C, H, W)
+ print(f'Data shape for DDIM sampling is {size}, eta {eta}')
+
+ samples, intermediates = self.ddim_sampling(conditioning, size,
+ callback=callback,
+ img_callback=img_callback,
+ quantize_denoised=quantize_x0,
+ mask=mask, x0=x0,
+ ddim_use_original_steps=False,
+ noise_dropout=noise_dropout,
+ temperature=temperature,
+ score_corrector=score_corrector,
+ corrector_kwargs=corrector_kwargs,
+ x_T=x_T,
+ log_every_t=log_every_t,
+ unconditional_guidance_scale=unconditional_guidance_scale,
+ unconditional_conditioning=unconditional_conditioning,
+ )
+ return samples, intermediates
+
+@torch.no_grad()
+def p_sample_ddim(self, x, c, t, index, repeat_noise=False, use_original_steps=False, quantize_denoised=False,
+ temperature=1., noise_dropout=0., score_corrector=None, corrector_kwargs=None,
+ unconditional_guidance_scale=1., unconditional_conditioning=None):
+ b, *_, device = *x.shape, x.device
+
+ if unconditional_conditioning is None or unconditional_guidance_scale == 1.:
+ e_t = self.model.apply_model(x, t, c)
+ else:
+ x_in = torch.cat([x] * 2)
+ t_in = torch.cat([t] * 2)
+ if isinstance(c, dict):
+ assert isinstance(unconditional_conditioning, dict)
+ c_in = dict()
+ for k in c:
+ if isinstance(c[k], list):
+ c_in[k] = [
+ torch.cat([unconditional_conditioning[k][i], c[k][i]])
+ for i in range(len(c[k]))
+ ]
+ else:
+ c_in[k] = torch.cat([unconditional_conditioning[k], c[k]])
+ else:
+ c_in = torch.cat([unconditional_conditioning, c])
+ e_t_uncond, e_t = self.model.apply_model(x_in, t_in, c_in).chunk(2)
+ e_t = e_t_uncond + unconditional_guidance_scale * (e_t - e_t_uncond)
+
+ if score_corrector is not None:
+ assert self.model.parameterization == "eps"
+ e_t = score_corrector.modify_score(self.model, e_t, x, t, c, **corrector_kwargs)
+
+ alphas = self.model.alphas_cumprod if use_original_steps else self.ddim_alphas
+ alphas_prev = self.model.alphas_cumprod_prev if use_original_steps else self.ddim_alphas_prev
+ sqrt_one_minus_alphas = self.model.sqrt_one_minus_alphas_cumprod if use_original_steps else self.ddim_sqrt_one_minus_alphas
+ sigmas = self.model.ddim_sigmas_for_original_num_steps if use_original_steps else self.ddim_sigmas
+ # select parameters corresponding to the currently considered timestep
+ a_t = torch.full((b, 1, 1, 1), alphas[index], device=device)
+ a_prev = torch.full((b, 1, 1, 1), alphas_prev[index], device=device)
+ sigma_t = torch.full((b, 1, 1, 1), sigmas[index], device=device)
+ sqrt_one_minus_at = torch.full((b, 1, 1, 1), sqrt_one_minus_alphas[index],device=device)
+
+ # current prediction for x_0
+ pred_x0 = (x - sqrt_one_minus_at * e_t) / a_t.sqrt()
+ if quantize_denoised:
+ pred_x0, _, *_ = self.model.first_stage_model.quantize(pred_x0)
+ # direction pointing to x_t
+ dir_xt = (1. - a_prev - sigma_t**2).sqrt() * e_t
+ noise = sigma_t * noise_like(x.shape, device, repeat_noise) * temperature
+ if noise_dropout > 0.:
+ noise = torch.nn.functional.dropout(noise, p=noise_dropout)
+ x_prev = a_prev.sqrt() * pred_x0 + dir_xt + noise
+ return x_prev, pred_x0
+
+
+# =================================================================================================
+# Monkey patch PLMSSampler methods.
+# This one was not actually patched correctly in the RunwayML repo, but we can replicate the changes.
+# Adapted from:
+# https://github.com/CompVis/stable-diffusion/blob/main/ldm/models/diffusion/plms.py
+# =================================================================================================
+@torch.no_grad()
+def sample_plms(self,
+ S,
+ batch_size,
+ shape,
+ conditioning=None,
+ callback=None,
+ normals_sequence=None,
+ img_callback=None,
+ quantize_x0=False,
+ eta=0.,
+ mask=None,
+ x0=None,
+ temperature=1.,
+ noise_dropout=0.,
+ score_corrector=None,
+ corrector_kwargs=None,
+ verbose=True,
+ x_T=None,
+ log_every_t=100,
+ unconditional_guidance_scale=1.,
+ unconditional_conditioning=None,
+ # this has to come in the same format as the conditioning, # e.g. as encoded tokens, ...
+ **kwargs
+ ):
+ if conditioning is not None:
+ if isinstance(conditioning, dict):
+ ctmp = conditioning[list(conditioning.keys())[0]]
+ while isinstance(ctmp, list):
+ ctmp = ctmp[0]
+ cbs = ctmp.shape[0]
+ if cbs != batch_size:
+ print(f"Warning: Got {cbs} conditionings but batch-size is {batch_size}")
+ else:
+ if conditioning.shape[0] != batch_size:
+ print(f"Warning: Got {conditioning.shape[0]} conditionings but batch-size is {batch_size}")
+
+ self.make_schedule(ddim_num_steps=S, ddim_eta=eta, verbose=verbose)
+ # sampling
+ C, H, W = shape
+ size = (batch_size, C, H, W)
+ print(f'Data shape for PLMS sampling is {size}')
+
+ samples, intermediates = self.plms_sampling(conditioning, size,
+ callback=callback,
+ img_callback=img_callback,
+ quantize_denoised=quantize_x0,
+ mask=mask, x0=x0,
+ ddim_use_original_steps=False,
+ noise_dropout=noise_dropout,
+ temperature=temperature,
+ score_corrector=score_corrector,
+ corrector_kwargs=corrector_kwargs,
+ x_T=x_T,
+ log_every_t=log_every_t,
+ unconditional_guidance_scale=unconditional_guidance_scale,
+ unconditional_conditioning=unconditional_conditioning,
+ )
+ return samples, intermediates
+
+
+@torch.no_grad()
+def p_sample_plms(self, x, c, t, index, repeat_noise=False, use_original_steps=False, quantize_denoised=False,
+ temperature=1., noise_dropout=0., score_corrector=None, corrector_kwargs=None,
+ unconditional_guidance_scale=1., unconditional_conditioning=None, old_eps=None, t_next=None):
+ b, *_, device = *x.shape, x.device
+
+ def get_model_output(x, t):
+ if unconditional_conditioning is None or unconditional_guidance_scale == 1.:
+ e_t = self.model.apply_model(x, t, c)
+ else:
+ x_in = torch.cat([x] * 2)
+ t_in = torch.cat([t] * 2)
+
+ if isinstance(c, dict):
+ assert isinstance(unconditional_conditioning, dict)
+ c_in = dict()
+ for k in c:
+ if isinstance(c[k], list):
+ c_in[k] = [
+ torch.cat([unconditional_conditioning[k][i], c[k][i]])
+ for i in range(len(c[k]))
+ ]
+ else:
+ c_in[k] = torch.cat([unconditional_conditioning[k], c[k]])
+ else:
+ c_in = torch.cat([unconditional_conditioning, c])
+
+ e_t_uncond, e_t = self.model.apply_model(x_in, t_in, c_in).chunk(2)
+ e_t = e_t_uncond + unconditional_guidance_scale * (e_t - e_t_uncond)
+
+ if score_corrector is not None:
+ assert self.model.parameterization == "eps"
+ e_t = score_corrector.modify_score(self.model, e_t, x, t, c, **corrector_kwargs)
+
+ return e_t
+
+ alphas = self.model.alphas_cumprod if use_original_steps else self.ddim_alphas
+ alphas_prev = self.model.alphas_cumprod_prev if use_original_steps else self.ddim_alphas_prev
+ sqrt_one_minus_alphas = self.model.sqrt_one_minus_alphas_cumprod if use_original_steps else self.ddim_sqrt_one_minus_alphas
+ sigmas = self.model.ddim_sigmas_for_original_num_steps if use_original_steps else self.ddim_sigmas
+
+ def get_x_prev_and_pred_x0(e_t, index):
+ # select parameters corresponding to the currently considered timestep
+ a_t = torch.full((b, 1, 1, 1), alphas[index], device=device)
+ a_prev = torch.full((b, 1, 1, 1), alphas_prev[index], device=device)
+ sigma_t = torch.full((b, 1, 1, 1), sigmas[index], device=device)
+ sqrt_one_minus_at = torch.full((b, 1, 1, 1), sqrt_one_minus_alphas[index],device=device)
+
+ # current prediction for x_0
+ pred_x0 = (x - sqrt_one_minus_at * e_t) / a_t.sqrt()
+ if quantize_denoised:
+ pred_x0, _, *_ = self.model.first_stage_model.quantize(pred_x0)
+ # direction pointing to x_t
+ dir_xt = (1. - a_prev - sigma_t**2).sqrt() * e_t
+ noise = sigma_t * noise_like(x.shape, device, repeat_noise) * temperature
+ if noise_dropout > 0.:
+ noise = torch.nn.functional.dropout(noise, p=noise_dropout)
+ x_prev = a_prev.sqrt() * pred_x0 + dir_xt + noise
+ return x_prev, pred_x0
+
+ e_t = get_model_output(x, t)
+ if len(old_eps) == 0:
+ # Pseudo Improved Euler (2nd order)
+ x_prev, pred_x0 = get_x_prev_and_pred_x0(e_t, index)
+ e_t_next = get_model_output(x_prev, t_next)
+ e_t_prime = (e_t + e_t_next) / 2
+ elif len(old_eps) == 1:
+ # 2nd order Pseudo Linear Multistep (Adams-Bashforth)
+ e_t_prime = (3 * e_t - old_eps[-1]) / 2
+ elif len(old_eps) == 2:
+ # 3nd order Pseudo Linear Multistep (Adams-Bashforth)
+ e_t_prime = (23 * e_t - 16 * old_eps[-1] + 5 * old_eps[-2]) / 12
+ elif len(old_eps) >= 3:
+ # 4nd order Pseudo Linear Multistep (Adams-Bashforth)
+ e_t_prime = (55 * e_t - 59 * old_eps[-1] + 37 * old_eps[-2] - 9 * old_eps[-3]) / 24
+
+ x_prev, pred_x0 = get_x_prev_and_pred_x0(e_t_prime, index)
+
+ return x_prev, pred_x0, e_t
+
+# =================================================================================================
+# Monkey patch LatentInpaintDiffusion to load the checkpoint with a proper config.
+# Adapted from:
+# https://github.com/runwayml/stable-diffusion/blob/main/ldm/models/diffusion/ddpm.py
+# =================================================================================================
+
+@torch.no_grad()
+def get_unconditional_conditioning(self, batch_size, null_label=None):
+ if null_label is not None:
+ xc = null_label
+ if isinstance(xc, ListConfig):
+ xc = list(xc)
+ if isinstance(xc, dict) or isinstance(xc, list):
+ c = self.get_learned_conditioning(xc)
+ else:
+ if hasattr(xc, "to"):
+ xc = xc.to(self.device)
+ c = self.get_learned_conditioning(xc)
+ else:
+ # todo: get null label from cond_stage_model
+ raise NotImplementedError()
+ c = repeat(c, "1 ... -> b ...", b=batch_size).to(self.device)
+ return c
+
+
+class LatentInpaintDiffusion(LatentDiffusion):
+ def __init__(
+ self,
+ concat_keys=("mask", "masked_image"),
+ masked_image_key="masked_image",
+ *args,
+ **kwargs,
+ ):
+ super().__init__(*args, **kwargs)
+ self.masked_image_key = masked_image_key
+ assert self.masked_image_key in concat_keys
+ self.concat_keys = concat_keys
+
+
+def should_hijack_inpainting(checkpoint_info):
+ return str(checkpoint_info.filename).endswith("inpainting.ckpt") and not checkpoint_info.config.endswith("inpainting.yaml")
+
+
+def do_inpainting_hijack():
+ ldm.models.diffusion.ddpm.get_unconditional_conditioning = get_unconditional_conditioning
+ ldm.models.diffusion.ddpm.LatentInpaintDiffusion = LatentInpaintDiffusion
+
+ ldm.models.diffusion.ddim.DDIMSampler.p_sample_ddim = p_sample_ddim
+ ldm.models.diffusion.ddim.DDIMSampler.sample = sample_ddim
+
+ ldm.models.diffusion.plms.PLMSSampler.p_sample_plms = p_sample_plms
+ ldm.models.diffusion.plms.PLMSSampler.sample = sample_plms \ No newline at end of file
diff --git a/modules/sd_hijack_optimizations.py b/modules/sd_hijack_optimizations.py
index 79405525..98123fbf 100644
--- a/modules/sd_hijack_optimizations.py
+++ b/modules/sd_hijack_optimizations.py
@@ -181,7 +181,7 @@ def einsum_op_cuda(q, k, v):
mem_free_torch = mem_reserved - mem_active
mem_free_total = mem_free_cuda + mem_free_torch
# Divide factor of safety as there's copying and fragmentation
- return self.einsum_op_tensor_mem(q, k, v, mem_free_total / 3.3 / (1 << 20))
+ return einsum_op_tensor_mem(q, k, v, mem_free_total / 3.3 / (1 << 20))
def einsum_op(q, k, v):
if q.device.type == 'cuda':
@@ -296,10 +296,16 @@ def xformers_attnblock_forward(self, x):
try:
h_ = x
h_ = self.norm(h_)
- q1 = self.q(h_).contiguous()
- k1 = self.k(h_).contiguous()
- v = self.v(h_).contiguous()
- out = xformers.ops.memory_efficient_attention(q1, k1, v)
+ q = self.q(h_)
+ k = self.k(h_)
+ v = self.v(h_)
+ b, c, h, w = q.shape
+ q, k, v = map(lambda t: rearrange(t, 'b c h w -> b (h w) c'), (q, k, v))
+ q = q.contiguous()
+ k = k.contiguous()
+ v = v.contiguous()
+ out = xformers.ops.memory_efficient_attention(q, k, v)
+ out = rearrange(out, 'b (h w) c -> b c h w', h=h)
out = self.proj_out(out)
return x + out
except NotImplementedError:
diff --git a/modules/sd_models.py b/modules/sd_models.py
index 3aa21ec1..d99dbce8 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -9,6 +9,7 @@ from ldm.util import instantiate_from_config
from modules import shared, modelloader, devices
from modules.paths import models_path
+from modules.sd_hijack_inpainting import do_inpainting_hijack, should_hijack_inpainting
model_dir = "Stable-diffusion"
model_path = os.path.abspath(os.path.join(models_path, model_dir))
@@ -20,7 +21,7 @@ checkpoints_loaded = collections.OrderedDict()
try:
# this silences the annoying "Some weights of the model checkpoint were not used when initializing..." message at start.
- from transformers import logging
+ from transformers import logging, CLIPModel
logging.set_verbosity_error()
except Exception:
@@ -122,13 +123,41 @@ def select_checkpoint():
return checkpoint_info
+chckpoint_dict_replacements = {
+ 'cond_stage_model.transformer.embeddings.': 'cond_stage_model.transformer.text_model.embeddings.',
+ 'cond_stage_model.transformer.encoder.': 'cond_stage_model.transformer.text_model.encoder.',
+ 'cond_stage_model.transformer.final_layer_norm.': 'cond_stage_model.transformer.text_model.final_layer_norm.',
+}
+
+
+def transform_checkpoint_dict_key(k):
+ for text, replacement in chckpoint_dict_replacements.items():
+ if k.startswith(text):
+ k = replacement + k[len(text):]
+
+ return k
+
+
def get_state_dict_from_checkpoint(pl_sd):
if "state_dict" in pl_sd:
- return pl_sd["state_dict"]
+ pl_sd = pl_sd["state_dict"]
+
+ sd = {}
+ for k, v in pl_sd.items():
+ new_key = transform_checkpoint_dict_key(k)
+
+ if new_key is not None:
+ sd[new_key] = v
+
+ pl_sd.clear()
+ pl_sd.update(sd)
return pl_sd
+vae_ignore_keys = {"model_ema.decay", "model_ema.num_updates"}
+
+
def load_model_weights(model, checkpoint_info):
checkpoint_file = checkpoint_info.filename
sd_model_hash = checkpoint_info.hash
@@ -141,7 +170,7 @@ def load_model_weights(model, checkpoint_info):
print(f"Global Step: {pl_sd['global_step']}")
sd = get_state_dict_from_checkpoint(pl_sd)
- model.load_state_dict(sd, strict=False)
+ missing, extra = model.load_state_dict(sd, strict=False)
if shared.cmd_opts.opt_channelslast:
model.to(memory_format=torch.channels_last)
@@ -160,7 +189,7 @@ def load_model_weights(model, checkpoint_info):
if os.path.exists(vae_file):
print(f"Loading VAE weights from: {vae_file}")
vae_ckpt = torch.load(vae_file, map_location=shared.weight_load_location)
- vae_dict = {k: v for k, v in vae_ckpt["state_dict"].items() if k[0:4] != "loss"}
+ vae_dict = {k: v for k, v in vae_ckpt["state_dict"].items() if k[0:4] != "loss" and k not in vae_ignore_keys}
model.first_stage_model.load_state_dict(vae_dict)
model.first_stage_model.to(devices.dtype_vae)
@@ -178,14 +207,26 @@ def load_model_weights(model, checkpoint_info):
model.sd_checkpoint_info = checkpoint_info
-def load_model():
+def load_model(checkpoint_info=None):
from modules import lowvram, sd_hijack
- checkpoint_info = select_checkpoint()
+ checkpoint_info = checkpoint_info or select_checkpoint()
if checkpoint_info.config != shared.cmd_opts.config:
print(f"Loading config from: {checkpoint_info.config}")
sd_config = OmegaConf.load(checkpoint_info.config)
+
+ if should_hijack_inpainting(checkpoint_info):
+ # Hardcoded config for now...
+ sd_config.model.target = "ldm.models.diffusion.ddpm.LatentInpaintDiffusion"
+ sd_config.model.params.use_ema = False
+ sd_config.model.params.conditioning_key = "hybrid"
+ sd_config.model.params.unet_config.params.in_channels = 9
+
+ # Create a "fake" config with a different name so that we know to unload it when switching models.
+ checkpoint_info = checkpoint_info._replace(config=checkpoint_info.config.replace(".yaml", "-inpainting.yaml"))
+
+ do_inpainting_hijack()
sd_model = instantiate_from_config(sd_config.model)
load_model_weights(sd_model, checkpoint_info)
@@ -209,9 +250,9 @@ def reload_model_weights(sd_model, info=None):
if sd_model.sd_model_checkpoint == checkpoint_info.filename:
return
- if sd_model.sd_checkpoint_info.config != checkpoint_info.config:
+ if sd_model.sd_checkpoint_info.config != checkpoint_info.config or should_hijack_inpainting(checkpoint_info) != should_hijack_inpainting(sd_model.sd_checkpoint_info):
checkpoints_loaded.clear()
- shared.sd_model = load_model()
+ shared.sd_model = load_model(checkpoint_info)
return shared.sd_model
if shared.cmd_opts.lowvram or shared.cmd_opts.medvram:
diff --git a/modules/sd_samplers.py b/modules/sd_samplers.py
index 20309e06..f58a29b9 100644
--- a/modules/sd_samplers.py
+++ b/modules/sd_samplers.py
@@ -98,25 +98,8 @@ def store_latent(decoded):
shared.state.current_image = sample_to_image(decoded)
-
-def extended_tdqm(sequence, *args, desc=None, **kwargs):
- state.sampling_steps = len(sequence)
- state.sampling_step = 0
-
- seq = sequence if cmd_opts.disable_console_progressbars else tqdm.tqdm(sequence, *args, desc=state.job, file=shared.progress_print_out, **kwargs)
-
- for x in seq:
- if state.interrupted or state.skipped:
- break
-
- yield x
-
- state.sampling_step += 1
- shared.total_tqdm.update()
-
-
-ldm.models.diffusion.ddim.tqdm = lambda *args, desc=None, **kwargs: extended_tdqm(*args, desc=desc, **kwargs)
-ldm.models.diffusion.plms.tqdm = lambda *args, desc=None, **kwargs: extended_tdqm(*args, desc=desc, **kwargs)
+class InterruptedException(BaseException):
+ pass
class VanillaStableDiffusionSampler:
@@ -128,14 +111,40 @@ class VanillaStableDiffusionSampler:
self.init_latent = None
self.sampler_noises = None
self.step = 0
+ self.stop_at = None
self.eta = None
self.default_eta = 0.0
self.config = None
+ self.last_latent = None
+
+ self.conditioning_key = sd_model.model.conditioning_key
def number_of_needed_noises(self, p):
return 0
+ def launch_sampling(self, steps, func):
+ state.sampling_steps = steps
+ state.sampling_step = 0
+
+ try:
+ return func()
+ except InterruptedException:
+ return self.last_latent
+
def p_sample_ddim_hook(self, x_dec, cond, ts, unconditional_conditioning, *args, **kwargs):
+ if state.interrupted or state.skipped:
+ raise InterruptedException
+
+ if self.stop_at is not None and self.step > self.stop_at:
+ raise InterruptedException
+
+ # Have to unwrap the inpainting conditioning here to perform pre-processing
+ image_conditioning = None
+ if isinstance(cond, dict):
+ image_conditioning = cond["c_concat"][0]
+ cond = cond["c_crossattn"][0]
+ unconditional_conditioning = unconditional_conditioning["c_crossattn"][0]
+
conds_list, tensor = prompt_parser.reconstruct_multicond_batch(cond, self.step)
unconditional_conditioning = prompt_parser.reconstruct_cond_batch(unconditional_conditioning, self.step)
@@ -156,14 +165,25 @@ class VanillaStableDiffusionSampler:
img_orig = self.sampler.model.q_sample(self.init_latent, ts)
x_dec = img_orig * self.mask + self.nmask * x_dec
+ # Wrap the image conditioning back up since the DDIM code can accept the dict directly.
+ # Note that they need to be lists because it just concatenates them later.
+ if image_conditioning is not None:
+ cond = {"c_concat": [image_conditioning], "c_crossattn": [cond]}
+ unconditional_conditioning = {"c_concat": [image_conditioning], "c_crossattn": [unconditional_conditioning]}
+
res = self.orig_p_sample_ddim(x_dec, cond, ts, unconditional_conditioning=unconditional_conditioning, *args, **kwargs)
if self.mask is not None:
- store_latent(self.init_latent * self.mask + self.nmask * res[1])
+ self.last_latent = self.init_latent * self.mask + self.nmask * res[1]
else:
- store_latent(res[1])
+ self.last_latent = res[1]
+
+ store_latent(self.last_latent)
self.step += 1
+ state.sampling_step = self.step
+ shared.total_tqdm.update()
+
return res
def initialize(self, p):
@@ -176,7 +196,7 @@ class VanillaStableDiffusionSampler:
self.mask = p.mask if hasattr(p, 'mask') else None
self.nmask = p.nmask if hasattr(p, 'nmask') else None
- def sample_img2img(self, p, x, noise, conditioning, unconditional_conditioning, steps=None):
+ def sample_img2img(self, p, x, noise, conditioning, unconditional_conditioning, steps=None, image_conditioning=None):
steps, t_enc = setup_img2img_steps(p, steps)
self.initialize(p)
@@ -190,25 +210,38 @@ class VanillaStableDiffusionSampler:
x1 = self.sampler.stochastic_encode(x, torch.tensor([t_enc] * int(x.shape[0])).to(shared.device), noise=noise)
self.init_latent = x
+ self.last_latent = x
self.step = 0
- samples = self.sampler.decode(x1, conditioning, t_enc, unconditional_guidance_scale=p.cfg_scale, unconditional_conditioning=unconditional_conditioning)
+ # Wrap the conditioning models with additional image conditioning for inpainting model
+ if image_conditioning is not None:
+ conditioning = {"c_concat": [image_conditioning], "c_crossattn": [conditioning]}
+ unconditional_conditioning = {"c_concat": [image_conditioning], "c_crossattn": [unconditional_conditioning]}
+
+
+ samples = self.launch_sampling(steps, lambda: self.sampler.decode(x1, conditioning, t_enc, unconditional_guidance_scale=p.cfg_scale, unconditional_conditioning=unconditional_conditioning))
return samples
- def sample(self, p, x, conditioning, unconditional_conditioning, steps=None):
+ def sample(self, p, x, conditioning, unconditional_conditioning, steps=None, image_conditioning=None):
self.initialize(p)
self.init_latent = None
+ self.last_latent = x
self.step = 0
steps = steps or p.steps
+ # Wrap the conditioning models with additional image conditioning for inpainting model
+ if image_conditioning is not None:
+ conditioning = {"c_concat": [image_conditioning], "c_crossattn": [conditioning]}
+ unconditional_conditioning = {"c_concat": [image_conditioning], "c_crossattn": [unconditional_conditioning]}
+
# existing code fails with certain step counts, like 9
try:
- samples_ddim, _ = self.sampler.sample(S=steps, conditioning=conditioning, batch_size=int(x.shape[0]), shape=x[0].shape, verbose=False, unconditional_guidance_scale=p.cfg_scale, unconditional_conditioning=unconditional_conditioning, x_T=x, eta=self.eta)
+ samples_ddim = self.launch_sampling(steps, lambda: self.sampler.sample(S=steps, conditioning=conditioning, batch_size=int(x.shape[0]), shape=x[0].shape, verbose=False, unconditional_guidance_scale=p.cfg_scale, unconditional_conditioning=unconditional_conditioning, x_T=x, eta=self.eta)[0])
except Exception:
- samples_ddim, _ = self.sampler.sample(S=steps+1, conditioning=conditioning, batch_size=int(x.shape[0]), shape=x[0].shape, verbose=False, unconditional_guidance_scale=p.cfg_scale, unconditional_conditioning=unconditional_conditioning, x_T=x, eta=self.eta)
+ samples_ddim = self.launch_sampling(steps, lambda: self.sampler.sample(S=steps+1, conditioning=conditioning, batch_size=int(x.shape[0]), shape=x[0].shape, verbose=False, unconditional_guidance_scale=p.cfg_scale, unconditional_conditioning=unconditional_conditioning, x_T=x, eta=self.eta)[0])
return samples_ddim
@@ -222,7 +255,10 @@ class CFGDenoiser(torch.nn.Module):
self.init_latent = None
self.step = 0
- def forward(self, x, sigma, uncond, cond, cond_scale):
+ def forward(self, x, sigma, uncond, cond, cond_scale, image_cond):
+ if state.interrupted or state.skipped:
+ raise InterruptedException
+
conds_list, tensor = prompt_parser.reconstruct_multicond_batch(cond, self.step)
uncond = prompt_parser.reconstruct_cond_batch(uncond, self.step)
@@ -230,28 +266,29 @@ class CFGDenoiser(torch.nn.Module):
repeats = [len(conds_list[i]) for i in range(batch_size)]
x_in = torch.cat([torch.stack([x[i] for _ in range(n)]) for i, n in enumerate(repeats)] + [x])
+ image_cond_in = torch.cat([torch.stack([image_cond[i] for _ in range(n)]) for i, n in enumerate(repeats)] + [image_cond])
sigma_in = torch.cat([torch.stack([sigma[i] for _ in range(n)]) for i, n in enumerate(repeats)] + [sigma])
if tensor.shape[1] == uncond.shape[1]:
cond_in = torch.cat([tensor, uncond])
if shared.batch_cond_uncond:
- x_out = self.inner_model(x_in, sigma_in, cond=cond_in)
+ x_out = self.inner_model(x_in, sigma_in, cond={"c_crossattn": [cond_in], "c_concat": [image_cond_in]})
else:
x_out = torch.zeros_like(x_in)
for batch_offset in range(0, x_out.shape[0], batch_size):
a = batch_offset
b = a + batch_size
- x_out[a:b] = self.inner_model(x_in[a:b], sigma_in[a:b], cond=cond_in[a:b])
+ x_out[a:b] = self.inner_model(x_in[a:b], sigma_in[a:b], cond={"c_crossattn": [cond_in[a:b]], "c_concat": [image_cond_in[a:b]]})
else:
x_out = torch.zeros_like(x_in)
batch_size = batch_size*2 if shared.batch_cond_uncond else batch_size
for batch_offset in range(0, tensor.shape[0], batch_size):
a = batch_offset
b = min(a + batch_size, tensor.shape[0])
- x_out[a:b] = self.inner_model(x_in[a:b], sigma_in[a:b], cond=tensor[a:b])
+ x_out[a:b] = self.inner_model(x_in[a:b], sigma_in[a:b], cond={"c_crossattn": [tensor[a:b]], "c_concat": [image_cond_in[a:b]]})
- x_out[-uncond.shape[0]:] = self.inner_model(x_in[-uncond.shape[0]:], sigma_in[-uncond.shape[0]:], cond=uncond)
+ x_out[-uncond.shape[0]:] = self.inner_model(x_in[-uncond.shape[0]:], sigma_in[-uncond.shape[0]:], cond={"c_crossattn": [uncond], "c_concat": [image_cond_in[-uncond.shape[0]:]]})
denoised_uncond = x_out[-uncond.shape[0]:]
denoised = torch.clone(denoised_uncond)
@@ -268,25 +305,6 @@ class CFGDenoiser(torch.nn.Module):
return denoised
-def extended_trange(sampler, count, *args, **kwargs):
- state.sampling_steps = count
- state.sampling_step = 0
-
- seq = range(count) if cmd_opts.disable_console_progressbars else tqdm.trange(count, *args, desc=state.job, file=shared.progress_print_out, **kwargs)
-
- for x in seq:
- if state.interrupted or state.skipped:
- break
-
- if sampler.stop_at is not None and x > sampler.stop_at:
- break
-
- yield x
-
- state.sampling_step += 1
- shared.total_tqdm.update()
-
-
class TorchHijack:
def __init__(self, kdiff_sampler):
self.kdiff_sampler = kdiff_sampler
@@ -314,9 +332,30 @@ class KDiffusionSampler:
self.eta = None
self.default_eta = 1.0
self.config = None
+ self.last_latent = None
+
+ self.conditioning_key = sd_model.model.conditioning_key
def callback_state(self, d):
- store_latent(d["denoised"])
+ step = d['i']
+ latent = d["denoised"]
+ store_latent(latent)
+ self.last_latent = latent
+
+ if self.stop_at is not None and step > self.stop_at:
+ raise InterruptedException
+
+ state.sampling_step = step
+ shared.total_tqdm.update()
+
+ def launch_sampling(self, steps, func):
+ state.sampling_steps = steps
+ state.sampling_step = 0
+
+ try:
+ return func()
+ except InterruptedException:
+ return self.last_latent
def number_of_needed_noises(self, p):
return p.steps
@@ -339,9 +378,6 @@ class KDiffusionSampler:
self.sampler_noise_index = 0
self.eta = p.eta or opts.eta_ancestral
- if hasattr(k_diffusion.sampling, 'trange'):
- k_diffusion.sampling.trange = lambda *args, **kwargs: extended_trange(self, *args, **kwargs)
-
if self.sampler_noises is not None:
k_diffusion.sampling.torch = TorchHijack(self)
@@ -355,7 +391,7 @@ class KDiffusionSampler:
return extra_params_kwargs
- def sample_img2img(self, p, x, noise, conditioning, unconditional_conditioning, steps=None):
+ def sample_img2img(self, p, x, noise, conditioning, unconditional_conditioning, steps=None, image_conditioning=None):
steps, t_enc = setup_img2img_steps(p, steps)
if p.sampler_noise_scheduler_override:
@@ -382,11 +418,18 @@ class KDiffusionSampler:
extra_params_kwargs['sigmas'] = sigma_sched
self.model_wrap_cfg.init_latent = x
+ self.last_latent = x
- return self.func(self.model_wrap_cfg, xi, extra_args={'cond': conditioning, 'uncond': unconditional_conditioning, 'cond_scale': p.cfg_scale}, disable=False, callback=self.callback_state, **extra_params_kwargs)
+ samples = self.launch_sampling(steps, lambda: self.func(self.model_wrap_cfg, xi, extra_args={
+ 'cond': conditioning,
+ 'image_cond': image_conditioning,
+ 'uncond': unconditional_conditioning,
+ 'cond_scale': p.cfg_scale
+ }, disable=False, callback=self.callback_state, **extra_params_kwargs))
+ return samples
- def sample(self, p, x, conditioning, unconditional_conditioning, steps=None):
+ def sample(self, p, x, conditioning, unconditional_conditioning, steps=None, image_conditioning = None):
steps = steps or p.steps
if p.sampler_noise_scheduler_override:
@@ -406,6 +449,14 @@ class KDiffusionSampler:
extra_params_kwargs['n'] = steps
else:
extra_params_kwargs['sigmas'] = sigmas
- samples = self.func(self.model_wrap_cfg, x, extra_args={'cond': conditioning, 'uncond': unconditional_conditioning, 'cond_scale': p.cfg_scale}, disable=False, callback=self.callback_state, **extra_params_kwargs)
+
+ self.last_latent = x
+ samples = self.launch_sampling(steps, lambda: self.func(self.model_wrap_cfg, x, extra_args={
+ 'cond': conditioning,
+ 'image_cond': image_conditioning,
+ 'uncond': unconditional_conditioning,
+ 'cond_scale': p.cfg_scale
+ }, disable=False, callback=self.callback_state, **extra_params_kwargs))
+
return samples
diff --git a/modules/shared.py b/modules/shared.py
index 4d735414..0dbe360d 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -3,6 +3,7 @@ import datetime
import json
import os
import sys
+from collections import OrderedDict
import gradio as gr
import tqdm
@@ -13,7 +14,7 @@ import modules.memmon
import modules.sd_models
import modules.styles
import modules.devices as devices
-from modules import sd_samplers, sd_models
+from modules import sd_samplers, sd_models, localization
from modules.hypernetworks import hypernetwork
from modules.paths import models_path, script_path, sd_path
@@ -30,7 +31,9 @@ parser.add_argument("--no-half-vae", action='store_true', help="do not switch th
parser.add_argument("--no-progressbar-hiding", action='store_true', help="do not hide progressbar in gradio UI (we hide it because it slows down ML if you have hardware acceleration in browser)")
parser.add_argument("--max-batch-count", type=int, default=16, help="maximum batch count value for the UI")
parser.add_argument("--embeddings-dir", type=str, default=os.path.join(script_path, 'embeddings'), help="embeddings directory for textual inversion (default: embeddings)")
+parser.add_argument("--aesthetic_embeddings-dir", type=str, default=os.path.join(models_path, 'aesthetic_embeddings'), help="aesthetic_embeddings directory(default: aesthetic_embeddings)")
parser.add_argument("--hypernetwork-dir", type=str, default=os.path.join(models_path, 'hypernetworks'), help="hypernetwork directory")
+parser.add_argument("--localizations-dir", type=str, default=os.path.join(script_path, 'localizations'), help="localizations directory")
parser.add_argument("--allow-code", action='store_true', help="allow custom script execution from webui")
parser.add_argument("--medvram", action='store_true', help="enable stable diffusion model optimizations for sacrificing a little speed for low VRM usage")
parser.add_argument("--lowvram", action='store_true', help="enable stable diffusion model optimizations for sacrificing a lot of speed for very low VRM usage")
@@ -69,17 +72,16 @@ parser.add_argument("--gradio-img2img-tool", type=str, help='gradio image upload
parser.add_argument("--opt-channelslast", action='store_true', help="change memory type for stable diffusion to channels last")
parser.add_argument("--styles-file", type=str, help="filename to use for styles", default=os.path.join(script_path, 'styles.csv'))
parser.add_argument("--autolaunch", action='store_true', help="open the webui URL in the system's default browser upon launch", default=False)
+parser.add_argument("--theme", type=str, help="launches the UI with light or dark theme", default=None)
parser.add_argument("--use-textbox-seed", action='store_true', help="use textbox for seeds in UI (no up/down, but possible to input long seeds)", default=False)
parser.add_argument("--disable-console-progressbars", action='store_true', help="do not output progressbars to console", default=False)
parser.add_argument("--enable-console-prompts", action='store_true', help="print prompts to console when generating with txt2img and img2img", default=False)
parser.add_argument('--vae-path', type=str, help='Path to Variational Autoencoders model', default=None)
parser.add_argument("--disable-safe-unpickle", action='store_true', help="disable checking pytorch models for malicious code", default=False)
+parser.add_argument("--api", action='store_true', help="use api=True to launch the api with the webui")
+parser.add_argument("--nowebui", action='store_true', help="use api=True to launch the api instead of the webui")
parser.add_argument("--browse-all-images", action='store_true', help="Allow browsing all images by Image Browser", default=False)
-
-cmd_opts = parser.parse_args()
-
-
cmd_opts = parser.parse_args()
restricted_opts = [
"samples_filename_pattern",
@@ -108,6 +110,20 @@ hypernetworks = hypernetwork.list_hypernetworks(cmd_opts.hypernetwork_dir)
loaded_hypernetwork = None
+os.makedirs(cmd_opts.aesthetic_embeddings_dir, exist_ok=True)
+aesthetic_embeddings = {}
+
+
+def update_aesthetic_embeddings():
+ global aesthetic_embeddings
+ aesthetic_embeddings = {f.replace(".pt", ""): os.path.join(cmd_opts.aesthetic_embeddings_dir, f) for f in
+ os.listdir(cmd_opts.aesthetic_embeddings_dir) if f.endswith(".pt")}
+ aesthetic_embeddings = OrderedDict(**{"None": None}, **aesthetic_embeddings)
+
+
+update_aesthetic_embeddings()
+
+
def reload_hypernetworks():
global hypernetworks
@@ -139,7 +155,7 @@ class State:
self.job_no += 1
self.sampling_step = 0
self.current_image_sampling_step = 0
-
+
def get_job_timestamp(self):
return datetime.datetime.now().strftime("%Y%m%d%H%M%S") # shouldn't this return job_timestamp?
@@ -155,6 +171,8 @@ interrogator = modules.interrogate.InterrogateModels("interrogate")
face_restorers = []
+localization.list_localizations(cmd_opts.localizations_dir)
+
def realesrgan_models_names():
import modules.realesrgan_model
@@ -249,7 +267,7 @@ options_templates.update(options_section(('system', "System"), {
}))
options_templates.update(options_section(('training', "Training"), {
- "unload_models_when_training": OptionInfo(False, "Unload VAE and CLIP from VRAM when training"),
+ "unload_models_when_training": OptionInfo(False, "Move VAE and CLIP to RAM when training hypernetwork. Saves VRAM."),
"dataset_filename_word_regex": OptionInfo("", "Filename word regex"),
"dataset_filename_join_string": OptionInfo(" ", "Filename join string"),
"training_image_repeats_per_epoch": OptionInfo(1, "Number of repeats for a single input image per epoch; used only for displaying epoch number", gr.Number, {"precision": 0}),
@@ -295,11 +313,13 @@ options_templates.update(options_section(('ui', "User interface"), {
"do_not_show_images": OptionInfo(False, "Do not show any images in results for web"),
"add_model_hash_to_info": OptionInfo(True, "Add model hash to generation information"),
"add_model_name_to_info": OptionInfo(False, "Add model name to generation information"),
+ "disable_weights_auto_swap": OptionInfo(False, "When reading generation parameters from text into UI (from PNG info or pasted text), do not change the selected model/checkpoint."),
"font": OptionInfo("", "Font for image grids that have text"),
"js_modal_lightbox": OptionInfo(True, "Enable full page image viewer"),
"js_modal_lightbox_initially_zoomed": OptionInfo(True, "Show images zoomed in by default in full page image viewer"),
"show_progress_in_title": OptionInfo(True, "Show generation progress in window title."),
'quicksettings': OptionInfo("sd_model_checkpoint", "Quicksettings list"),
+ 'localization': OptionInfo("None", "Localization (requires restart)", gr.Dropdown, lambda: {"choices": ["None"] + list(localization.localizations.keys())}, refresh=lambda: localization.list_localizations(cmd_opts.localizations_dir)),
}))
options_templates.update(options_section(('sampler-params', "Sampler parameters"), {
@@ -393,6 +413,11 @@ sd_upscalers = []
sd_model = None
+clip_model = None
+
+from modules.aesthetic_clip import AestheticCLIP
+aesthetic_clip = AestheticCLIP()
+
progress_print_out = sys.stdout
diff --git a/modules/styles.py b/modules/styles.py
index d44dfc1a..3bf5c5b6 100644
--- a/modules/styles.py
+++ b/modules/styles.py
@@ -45,7 +45,7 @@ class StyleDatabase:
if not os.path.exists(path):
return
- with open(path, "r", encoding="utf8", newline='') as file:
+ with open(path, "r", encoding="utf-8-sig", newline='') as file:
reader = csv.DictReader(file)
for row in reader:
# Support loading old CSV format with "name, text"-columns
@@ -79,7 +79,7 @@ class StyleDatabase:
def save_styles(self, path: str) -> None:
# Write to temporary file first, so we don't nuke the file if something goes wrong
fd, temp_path = tempfile.mkstemp(".csv")
- with os.fdopen(fd, "w", encoding="utf8", newline='') as file:
+ with os.fdopen(fd, "w", encoding="utf-8-sig", newline='') as file:
# _fields is actually part of the public API: typing.NamedTuple is a replacement for collections.NamedTuple,
# and collections.NamedTuple has explicit documentation for accessing _fields. Same goes for _asdict()
writer = csv.DictWriter(file, fieldnames=PromptStyle._fields)
diff --git a/modules/textual_inversion/dataset.py b/modules/textual_inversion/dataset.py
index 23bb4b6a..5b1c5002 100644
--- a/modules/textual_inversion/dataset.py
+++ b/modules/textual_inversion/dataset.py
@@ -83,7 +83,7 @@ class PersonalizedBase(Dataset):
self.dataset.append(entry)
- assert len(self.dataset) > 1, "No images have been found in the dataset."
+ assert len(self.dataset) > 0, "No images have been found in the dataset."
self.length = len(self.dataset) * repeats // batch_size
self.initial_indexes = np.arange(len(self.dataset))
@@ -91,7 +91,7 @@ class PersonalizedBase(Dataset):
self.shuffle()
def shuffle(self):
- self.indexes = self.initial_indexes[torch.randperm(self.initial_indexes.shape[0])]
+ self.indexes = self.initial_indexes[torch.randperm(self.initial_indexes.shape[0]).numpy()]
def create_text(self, filename_text):
text = random.choice(self.lines)
diff --git a/modules/textual_inversion/image_embedding.py b/modules/textual_inversion/image_embedding.py
index 898ce3b3..ea653806 100644
--- a/modules/textual_inversion/image_embedding.py
+++ b/modules/textual_inversion/image_embedding.py
@@ -5,6 +5,7 @@ import zlib
from PIL import Image, PngImagePlugin, ImageDraw, ImageFont
from fonts.ttf import Roboto
import torch
+from modules.shared import opts
class EmbeddingEncoder(json.JSONEncoder):
@@ -133,7 +134,7 @@ def caption_image_overlay(srcimage, title, footerLeft, footerMid, footerRight, t
from math import cos
image = srcimage.copy()
-
+ fontsize = 32
if textfont is None:
try:
textfont = ImageFont.truetype(opts.font or Roboto, fontsize)
@@ -150,7 +151,7 @@ def caption_image_overlay(srcimage, title, footerLeft, footerMid, footerRight, t
image = Image.alpha_composite(image.convert('RGBA'), gradient.resize(image.size))
draw = ImageDraw.Draw(image)
- fontsize = 32
+
font = ImageFont.truetype(textfont, fontsize)
padding = 10
diff --git a/modules/textual_inversion/preprocess.py b/modules/textual_inversion/preprocess.py
index 886cf0c3..33eaddb6 100644
--- a/modules/textual_inversion/preprocess.py
+++ b/modules/textual_inversion/preprocess.py
@@ -1,5 +1,6 @@
import os
from PIL import Image, ImageOps
+import math
import platform
import sys
import tqdm
@@ -11,7 +12,7 @@ if cmd_opts.deepdanbooru:
import modules.deepbooru as deepbooru
-def preprocess(process_src, process_dst, process_width, process_height, process_flip, process_split, process_caption, process_caption_deepbooru=False):
+def preprocess(process_src, process_dst, process_width, process_height, preprocess_txt_action, process_flip, process_split, process_caption, process_caption_deepbooru=False, split_threshold=0.5, overlap_ratio=0.2):
try:
if process_caption:
shared.interrogator.load()
@@ -21,7 +22,7 @@ def preprocess(process_src, process_dst, process_width, process_height, process_
db_opts[deepbooru.OPT_INCLUDE_RANKS] = False
deepbooru.create_deepbooru_process(opts.interrogate_deepbooru_score_threshold, db_opts)
- preprocess_work(process_src, process_dst, process_width, process_height, process_flip, process_split, process_caption, process_caption_deepbooru)
+ preprocess_work(process_src, process_dst, process_width, process_height, preprocess_txt_action, process_flip, process_split, process_caption, process_caption_deepbooru, split_threshold, overlap_ratio)
finally:
@@ -33,11 +34,13 @@ def preprocess(process_src, process_dst, process_width, process_height, process_
-def preprocess_work(process_src, process_dst, process_width, process_height, process_flip, process_split, process_caption, process_caption_deepbooru=False):
+def preprocess_work(process_src, process_dst, process_width, process_height, preprocess_txt_action, process_flip, process_split, process_caption, process_caption_deepbooru=False, split_threshold=0.5, overlap_ratio=0.2):
width = process_width
height = process_height
src = os.path.abspath(process_src)
dst = os.path.abspath(process_dst)
+ split_threshold = max(0.0, min(1.0, split_threshold))
+ overlap_ratio = max(0.0, min(0.9, overlap_ratio))
assert src != dst, 'same directory specified as source and destination'
@@ -48,7 +51,7 @@ def preprocess_work(process_src, process_dst, process_width, process_height, pro
shared.state.textinfo = "Preprocessing..."
shared.state.job_count = len(files)
- def save_pic_with_caption(image, index):
+ def save_pic_with_caption(image, index, existing_caption=None):
caption = ""
if process_caption:
@@ -66,17 +69,49 @@ def preprocess_work(process_src, process_dst, process_width, process_height, pro
basename = f"{index:05}-{subindex[0]}-{filename_part}"
image.save(os.path.join(dst, f"{basename}.png"))
+ if preprocess_txt_action == 'prepend' and existing_caption:
+ caption = existing_caption + ' ' + caption
+ elif preprocess_txt_action == 'append' and existing_caption:
+ caption = caption + ' ' + existing_caption
+ elif preprocess_txt_action == 'copy' and existing_caption:
+ caption = existing_caption
+
+ caption = caption.strip()
+
if len(caption) > 0:
with open(os.path.join(dst, f"{basename}.txt"), "w", encoding="utf8") as file:
file.write(caption)
subindex[0] += 1
- def save_pic(image, index):
- save_pic_with_caption(image, index)
+ def save_pic(image, index, existing_caption=None):
+ save_pic_with_caption(image, index, existing_caption=existing_caption)
if process_flip:
- save_pic_with_caption(ImageOps.mirror(image), index)
+ save_pic_with_caption(ImageOps.mirror(image), index, existing_caption=existing_caption)
+
+ def split_pic(image, inverse_xy):
+ if inverse_xy:
+ from_w, from_h = image.height, image.width
+ to_w, to_h = height, width
+ else:
+ from_w, from_h = image.width, image.height
+ to_w, to_h = width, height
+ h = from_h * to_w // from_w
+ if inverse_xy:
+ image = image.resize((h, to_w))
+ else:
+ image = image.resize((to_w, h))
+
+ split_count = math.ceil((h - to_h * overlap_ratio) / (to_h * (1.0 - overlap_ratio)))
+ y_step = (h - to_h) / (split_count - 1)
+ for i in range(split_count):
+ y = int(y_step * i)
+ if inverse_xy:
+ splitted = image.crop((y, 0, y + to_h, to_w))
+ else:
+ splitted = image.crop((0, y, to_w, y + to_h))
+ yield splitted
for index, imagefile in enumerate(tqdm.tqdm(files)):
subindex = [0]
@@ -86,31 +121,27 @@ def preprocess_work(process_src, process_dst, process_width, process_height, pro
except Exception:
continue
+ existing_caption = None
+ existing_caption_filename = os.path.splitext(filename)[0] + '.txt'
+ if os.path.exists(existing_caption_filename):
+ with open(existing_caption_filename, 'r', encoding="utf8") as file:
+ existing_caption = file.read()
+
if shared.state.interrupted:
break
- ratio = img.height / img.width
- is_tall = ratio > 1.35
- is_wide = ratio < 1 / 1.35
-
- if process_split and is_tall:
- img = img.resize((width, height * img.height // img.width))
-
- top = img.crop((0, 0, width, height))
- save_pic(top, index)
-
- bot = img.crop((0, img.height - height, width, img.height))
- save_pic(bot, index)
- elif process_split and is_wide:
- img = img.resize((width * img.width // img.height, height))
-
- left = img.crop((0, 0, width, height))
- save_pic(left, index)
+ if img.height > img.width:
+ ratio = (img.width * height) / (img.height * width)
+ inverse_xy = False
+ else:
+ ratio = (img.height * width) / (img.width * height)
+ inverse_xy = True
- right = img.crop((img.width - width, 0, img.width, height))
- save_pic(right, index)
+ if process_split and ratio < 1.0 and ratio <= split_threshold:
+ for splitted in split_pic(img, inverse_xy):
+ save_pic(splitted, index, existing_caption=existing_caption)
else:
img = images.resize_image(1, img, width, height)
- save_pic(img, index)
+ save_pic(img, index, existing_caption=existing_caption)
shared.state.nextjob()
diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py
index 3be69562..529ed3e2 100644
--- a/modules/textual_inversion/textual_inversion.py
+++ b/modules/textual_inversion/textual_inversion.py
@@ -153,7 +153,7 @@ class EmbeddingDatabase:
return None, None
-def create_embedding(name, num_vectors_per_token, init_text='*'):
+def create_embedding(name, num_vectors_per_token, overwrite_old, init_text='*'):
cond_model = shared.sd_model.cond_stage_model
embedding_layer = cond_model.wrapped.transformer.text_model.embeddings
@@ -165,7 +165,8 @@ def create_embedding(name, num_vectors_per_token, init_text='*'):
vec[i] = embedded[i * int(embedded.shape[0]) // num_vectors_per_token]
fn = os.path.join(shared.cmd_opts.embeddings_dir, f"{name}.pt")
- assert not os.path.exists(fn), f"file {fn} already exists"
+ if not overwrite_old:
+ assert not os.path.exists(fn), f"file {fn} already exists"
embedding = Embedding(vec, name)
embedding.step = 0
@@ -275,6 +276,7 @@ def train_embedding(embedding_name, learn_rate, batch_size, data_root, log_direc
loss.backward()
optimizer.step()
+
epoch_num = embedding.step // len(ds)
epoch_step = embedding.step - (epoch_num * len(ds)) + 1
diff --git a/modules/textual_inversion/ui.py b/modules/textual_inversion/ui.py
index 36881e7a..e712284d 100644
--- a/modules/textual_inversion/ui.py
+++ b/modules/textual_inversion/ui.py
@@ -7,8 +7,8 @@ import modules.textual_inversion.preprocess
from modules import sd_hijack, shared
-def create_embedding(name, initialization_text, nvpt):
- filename = modules.textual_inversion.textual_inversion.create_embedding(name, nvpt, init_text=initialization_text)
+def create_embedding(name, initialization_text, nvpt, overwrite_old):
+ filename = modules.textual_inversion.textual_inversion.create_embedding(name, nvpt, overwrite_old, init_text=initialization_text)
sd_hijack.model_hijack.embedding_db.load_textual_inversion_embeddings()
diff --git a/modules/txt2img.py b/modules/txt2img.py
index 2381347f..1761cfa2 100644
--- a/modules/txt2img.py
+++ b/modules/txt2img.py
@@ -1,12 +1,13 @@
import modules.scripts
-from modules.processing import StableDiffusionProcessing, Processed, StableDiffusionProcessingTxt2Img, StableDiffusionProcessingImg2Img, process_images
+from modules.processing import StableDiffusionProcessing, Processed, StableDiffusionProcessingTxt2Img, \
+ StableDiffusionProcessingImg2Img, process_images
from modules.shared import opts, cmd_opts
import modules.shared as shared
import modules.processing as processing
from modules.ui import plaintext_to_html
-def txt2img(prompt: str, negative_prompt: str, prompt_style: str, prompt_style2: str, steps: int, sampler_index: int, restore_faces: bool, tiling: bool, n_iter: int, batch_size: int, cfg_scale: float, seed: int, subseed: int, subseed_strength: float, seed_resize_from_h: int, seed_resize_from_w: int, seed_enable_extras: bool, height: int, width: int, enable_hr: bool, denoising_strength: float, firstphase_width: int, firstphase_height: int, *args):
+def txt2img(prompt: str, negative_prompt: str, prompt_style: str, prompt_style2: str, steps: int, sampler_index: int, restore_faces: bool, tiling: bool, n_iter: int, batch_size: int, cfg_scale: float, seed: int, subseed: int, subseed_strength: float, seed_resize_from_h: int, seed_resize_from_w: int, seed_enable_extras: bool, height: int, width: int, enable_hr: bool, denoising_strength: float, firstphase_width: int, firstphase_height: int, aesthetic_lr=0, aesthetic_weight=0, aesthetic_steps=0, aesthetic_imgs=None, aesthetic_slerp=False, aesthetic_imgs_text="", aesthetic_slerp_angle=0.15, aesthetic_text_negative=False, *args):
p = StableDiffusionProcessingTxt2Img(
sd_model=shared.sd_model,
outpath_samples=opts.outdir_samples or opts.outdir_txt2img_samples,
@@ -35,6 +36,8 @@ def txt2img(prompt: str, negative_prompt: str, prompt_style: str, prompt_style2:
firstphase_height=firstphase_height if enable_hr else None,
)
+ shared.aesthetic_clip.set_aesthetic_params(p, float(aesthetic_lr), float(aesthetic_weight), int(aesthetic_steps), aesthetic_imgs, aesthetic_slerp, aesthetic_imgs_text, aesthetic_slerp_angle, aesthetic_text_negative)
+
if cmd_opts.enable_console_prompts:
print(f"\ntxt2img: {prompt}", file=shared.progress_print_out)
@@ -53,4 +56,3 @@ def txt2img(prompt: str, negative_prompt: str, prompt_style: str, prompt_style2:
processed.images = []
return processed.images, generation_info_js, plaintext_to_html(processed.info)
-
diff --git a/modules/ui.py b/modules/ui.py
index 88f46659..70a9cf10 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -12,7 +12,7 @@ import time
import traceback
import platform
import subprocess as sp
-from functools import reduce
+from functools import partial, reduce
import numpy as np
import torch
@@ -23,9 +23,11 @@ import gradio as gr
import gradio.utils
import gradio.routes
-from modules import sd_hijack, sd_models
+from modules import sd_hijack, sd_models, localization
from modules.paths import script_path
-from modules.shared import opts, cmd_opts, restricted_opts
+
+from modules.shared import opts, cmd_opts, restricted_opts, aesthetic_embeddings
+
if cmd_opts.deepdanbooru:
from modules.deepbooru import get_deepbooru_tags
import modules.shared as shared
@@ -41,8 +43,11 @@ from modules import prompt_parser
from modules.images import save_image
import modules.textual_inversion.ui
import modules.hypernetworks.ui
+
+import modules.aesthetic_clip as aesthetic_clip
import modules.images_history as img_his
+
# this is a fix for Windows users. Without it, javascript files will be served with text/html content-type and the browser will not show any UI
mimetypes.init()
mimetypes.add_type('application/javascript', '.js')
@@ -261,6 +266,24 @@ def wrap_gradio_call(func, extra_outputs=None):
return f
+def calc_time_left(progress, threshold, label, force_display):
+ if progress == 0:
+ return ""
+ else:
+ time_since_start = time.time() - shared.state.time_start
+ eta = (time_since_start/progress)
+ eta_relative = eta-time_since_start
+ if (eta_relative > threshold and progress > 0.02) or force_display:
+ if eta_relative > 3600:
+ return label + time.strftime('%H:%M:%S', time.gmtime(eta_relative))
+ elif eta_relative > 60:
+ return label + time.strftime('%M:%S', time.gmtime(eta_relative))
+ else:
+ return label + time.strftime('%Ss', time.gmtime(eta_relative))
+ else:
+ return ""
+
+
def check_progress_call(id_part):
if shared.state.job_count == 0:
return "", gr_show(False), gr_show(False), gr_show(False)
@@ -272,11 +295,15 @@ def check_progress_call(id_part):
if shared.state.sampling_steps > 0:
progress += 1 / shared.state.job_count * shared.state.sampling_step / shared.state.sampling_steps
+ time_left = calc_time_left( progress, 1, " ETA: ", shared.state.time_left_force_display )
+ if time_left != "":
+ shared.state.time_left_force_display = True
+
progress = min(progress, 1)
progressbar = ""
if opts.show_progressbar:
- progressbar = f"""<div class='progressDiv'><div class='progress' style="width:{progress * 100}%">{str(int(progress*100))+"%" if progress > 0.01 else ""}</div></div>"""
+ progressbar = f"""<div class='progressDiv'><div class='progress' style="overflow:visible;width:{progress * 100}%;white-space:nowrap;">{"&nbsp;" * 2 + str(int(progress*100))+"%" + time_left if progress > 0.01 else ""}</div></div>"""
image = gr_show(False)
preview_visibility = gr_show(False)
@@ -308,6 +335,8 @@ def check_progress_call_initial(id_part):
shared.state.current_latent = None
shared.state.current_image = None
shared.state.textinfo = None
+ shared.state.time_start = time.time()
+ shared.state.time_left_force_display = False
return check_progress_call(id_part)
@@ -458,14 +487,14 @@ def create_toprow(is_img2img):
with gr.Row():
with gr.Column(scale=80):
with gr.Row():
- prompt = gr.Textbox(label="Prompt", elem_id=f"{id_part}_prompt", show_label=False, lines=2,
+ prompt = gr.Textbox(label="Prompt", elem_id=f"{id_part}_prompt", show_label=False, lines=2,
placeholder="Prompt (press Ctrl+Enter or Alt+Enter to generate)"
)
with gr.Row():
with gr.Column(scale=80):
with gr.Row():
- negative_prompt = gr.Textbox(label="Negative prompt", elem_id=f"{id_part}_neg_prompt", show_label=False, lines=2,
+ negative_prompt = gr.Textbox(label="Negative prompt", elem_id=f"{id_part}_neg_prompt", show_label=False, lines=2,
placeholder="Negative prompt (press Ctrl+Enter or Alt+Enter to generate)"
)
@@ -542,6 +571,10 @@ def apply_setting(key, value):
if value is None:
return gr.update()
+ # dont allow model to be swapped when model hash exists in prompt
+ if key == "sd_model_checkpoint" and opts.disable_weights_auto_swap:
+ return gr.update()
+
if key == "sd_model_checkpoint":
ckpt_info = sd_models.get_closet_checkpoint_match(value)
@@ -564,27 +597,29 @@ def apply_setting(key, value):
return value
-def create_ui(wrap_gradio_gpu_call):
- import modules.img2img
- import modules.txt2img
+def create_refresh_button(refresh_component, refresh_method, refreshed_args, elem_id):
+ def refresh():
+ refresh_method()
+ args = refreshed_args() if callable(refreshed_args) else refreshed_args
- def create_refresh_button(refresh_component, refresh_method, refreshed_args, elem_id):
- def refresh():
- refresh_method()
- args = refreshed_args() if callable(refreshed_args) else refreshed_args
+ for k, v in args.items():
+ setattr(refresh_component, k, v)
- for k, v in args.items():
- setattr(refresh_component, k, v)
+ return gr.update(**(args or {}))
- return gr.update(**(args or {}))
+ refresh_button = gr.Button(value=refresh_symbol, elem_id=elem_id)
+ refresh_button.click(
+ fn=refresh,
+ inputs=[],
+ outputs=[refresh_component]
+ )
+ return refresh_button
+
+
+def create_ui(wrap_gradio_gpu_call):
+ import modules.img2img
+ import modules.txt2img
- refresh_button = gr.Button(value=refresh_symbol, elem_id=elem_id)
- refresh_button.click(
- fn = refresh,
- inputs = [],
- outputs = [refresh_component]
- )
- return refresh_button
with gr.Blocks(analytics_enabled=False) as txt2img_interface:
txt2img_prompt, roll, txt2img_prompt_style, txt2img_negative_prompt, txt2img_prompt_style2, submit, _, _, txt2img_prompt_style_apply, txt2img_save_style, txt2img_paste, token_counter, token_button = create_toprow(is_img2img=False)
@@ -627,6 +662,8 @@ def create_ui(wrap_gradio_gpu_call):
seed, reuse_seed, subseed, reuse_subseed, subseed_strength, seed_resize_from_h, seed_resize_from_w, seed_checkbox = create_seed_inputs()
+ aesthetic_weight, aesthetic_steps, aesthetic_lr, aesthetic_slerp, aesthetic_imgs, aesthetic_imgs_text, aesthetic_slerp_angle, aesthetic_text_negative = aesthetic_clip.create_ui()
+
with gr.Group():
custom_inputs = modules.scripts.scripts_txt2img.setup_ui(is_img2img=False)
@@ -681,7 +718,16 @@ def create_ui(wrap_gradio_gpu_call):
denoising_strength,
firstphase_width,
firstphase_height,
+ aesthetic_lr,
+ aesthetic_weight,
+ aesthetic_steps,
+ aesthetic_imgs,
+ aesthetic_slerp,
+ aesthetic_imgs_text,
+ aesthetic_slerp_angle,
+ aesthetic_text_negative
] + custom_inputs,
+
outputs=[
txt2img_gallery,
generation_info,
@@ -758,6 +804,14 @@ def create_ui(wrap_gradio_gpu_call):
(hr_options, lambda d: gr.Row.update(visible="Denoising strength" in d)),
(firstphase_width, "First pass size-1"),
(firstphase_height, "First pass size-2"),
+ (aesthetic_lr, "Aesthetic LR"),
+ (aesthetic_weight, "Aesthetic weight"),
+ (aesthetic_steps, "Aesthetic steps"),
+ (aesthetic_imgs, "Aesthetic embedding"),
+ (aesthetic_slerp, "Aesthetic slerp"),
+ (aesthetic_imgs_text, "Aesthetic text"),
+ (aesthetic_text_negative, "Aesthetic text negative"),
+ (aesthetic_slerp_angle, "Aesthetic slerp angle"),
]
txt2img_preview_params = [
@@ -825,8 +879,8 @@ def create_ui(wrap_gradio_gpu_call):
sampler_index = gr.Radio(label='Sampling method', choices=[x.name for x in samplers_for_img2img], value=samplers_for_img2img[0].name, type="index")
with gr.Group():
- width = gr.Slider(minimum=64, maximum=2048, step=64, label="Width", value=512)
- height = gr.Slider(minimum=64, maximum=2048, step=64, label="Height", value=512)
+ width = gr.Slider(minimum=64, maximum=2048, step=64, label="Width", value=512, elem_id="img2img_width")
+ height = gr.Slider(minimum=64, maximum=2048, step=64, label="Height", value=512, elem_id="img2img_height")
with gr.Row():
restore_faces = gr.Checkbox(label='Restore faces', value=False, visible=len(shared.face_restorers) > 1)
@@ -842,6 +896,8 @@ def create_ui(wrap_gradio_gpu_call):
seed, reuse_seed, subseed, reuse_subseed, subseed_strength, seed_resize_from_h, seed_resize_from_w, seed_checkbox = create_seed_inputs()
+ aesthetic_weight_im, aesthetic_steps_im, aesthetic_lr_im, aesthetic_slerp_im, aesthetic_imgs_im, aesthetic_imgs_text_im, aesthetic_slerp_angle_im, aesthetic_text_negative_im = aesthetic_clip.create_ui()
+
with gr.Group():
custom_inputs = modules.scripts.scripts_img2img.setup_ui(is_img2img=True)
@@ -932,6 +988,14 @@ def create_ui(wrap_gradio_gpu_call):
inpainting_mask_invert,
img2img_batch_input_dir,
img2img_batch_output_dir,
+ aesthetic_lr_im,
+ aesthetic_weight_im,
+ aesthetic_steps_im,
+ aesthetic_imgs_im,
+ aesthetic_slerp_im,
+ aesthetic_imgs_text_im,
+ aesthetic_slerp_angle_im,
+ aesthetic_text_negative_im,
] + custom_inputs,
outputs=[
img2img_gallery,
@@ -1023,6 +1087,14 @@ def create_ui(wrap_gradio_gpu_call):
(seed_resize_from_w, "Seed resize from-1"),
(seed_resize_from_h, "Seed resize from-2"),
(denoising_strength, "Denoising strength"),
+ (aesthetic_lr_im, "Aesthetic LR"),
+ (aesthetic_weight_im, "Aesthetic weight"),
+ (aesthetic_steps_im, "Aesthetic steps"),
+ (aesthetic_imgs_im, "Aesthetic embedding"),
+ (aesthetic_slerp_im, "Aesthetic slerp"),
+ (aesthetic_imgs_text_im, "Aesthetic text"),
+ (aesthetic_text_negative_im, "Aesthetic text negative"),
+ (aesthetic_slerp_angle_im, "Aesthetic slerp angle"),
]
token_button.click(fn=update_token_counter, inputs=[img2img_prompt, steps], outputs=[token_counter])
@@ -1056,10 +1128,10 @@ def create_ui(wrap_gradio_gpu_call):
upscaling_crop = gr.Checkbox(label='Crop to fit', value=True)
with gr.Group():
- extras_upscaler_1 = gr.Radio(label='Upscaler 1', choices=[x.name for x in shared.sd_upscalers], value=shared.sd_upscalers[0].name, type="index")
+ extras_upscaler_1 = gr.Radio(label='Upscaler 1', elem_id="extras_upscaler_1", choices=[x.name for x in shared.sd_upscalers], value=shared.sd_upscalers[0].name, type="index")
with gr.Group():
- extras_upscaler_2 = gr.Radio(label='Upscaler 2', choices=[x.name for x in shared.sd_upscalers], value=shared.sd_upscalers[0].name, type="index")
+ extras_upscaler_2 = gr.Radio(label='Upscaler 2', elem_id="extras_upscaler_2", choices=[x.name for x in shared.sd_upscalers], value=shared.sd_upscalers[0].name, type="index")
extras_upscaler_2_visibility = gr.Slider(minimum=0.0, maximum=1.0, step=0.001, label="Upscaler 2 visibility", value=1)
with gr.Group():
@@ -1183,6 +1255,7 @@ def create_ui(wrap_gradio_gpu_call):
new_embedding_name = gr.Textbox(label="Name")
initialization_text = gr.Textbox(label="Initialization text", value="*")
nvpt = gr.Slider(label="Number of vectors per token", minimum=1, maximum=75, step=1, value=1)
+ overwrite_old_embedding = gr.Checkbox(value=False, label="Overwrite Old Embedding")
with gr.Row():
with gr.Column(scale=3):
@@ -1191,9 +1264,25 @@ def create_ui(wrap_gradio_gpu_call):
with gr.Column():
create_embedding = gr.Button(value="Create embedding", variant='primary')
+ with gr.Tab(label="Create aesthetic images embedding"):
+
+ new_embedding_name_ae = gr.Textbox(label="Name")
+ process_src_ae = gr.Textbox(label='Source directory')
+ batch_ae = gr.Slider(minimum=1, maximum=1024, step=1, label="Batch size", value=256)
+ with gr.Row():
+ with gr.Column(scale=3):
+ gr.HTML(value="")
+
+ with gr.Column():
+ create_embedding_ae = gr.Button(value="Create images embedding", variant='primary')
+
with gr.Tab(label="Create hypernetwork"):
new_hypernetwork_name = gr.Textbox(label="Name")
new_hypernetwork_sizes = gr.CheckboxGroup(label="Modules", value=["768", "320", "640", "1280"], choices=["768", "320", "640", "1280"])
+ new_hypernetwork_layer_structure = gr.Textbox("1, 2, 1", label="Enter hypernetwork layer structure", placeholder="1st and last digit must be 1. ex:'1, 2, 1'")
+ new_hypernetwork_add_layer_norm = gr.Checkbox(label="Add layer normalization")
+ overwrite_old_hypernetwork = gr.Checkbox(value=False, label="Overwrite Old Hypernetwork")
+ new_hypernetwork_activation_func = gr.Dropdown(value="relu", label="Select activation function of hypernetwork", choices=["linear", "relu", "leakyrelu"])
with gr.Row():
with gr.Column(scale=3):
@@ -1207,13 +1296,18 @@ def create_ui(wrap_gradio_gpu_call):
process_dst = gr.Textbox(label='Destination directory')
process_width = gr.Slider(minimum=64, maximum=2048, step=64, label="Width", value=512)
process_height = gr.Slider(minimum=64, maximum=2048, step=64, label="Height", value=512)
+ preprocess_txt_action = gr.Dropdown(label='Existing Caption txt Action', value="ignore", choices=["ignore", "copy", "prepend", "append"])
with gr.Row():
process_flip = gr.Checkbox(label='Create flipped copies')
- process_split = gr.Checkbox(label='Split oversized images into two')
+ process_split = gr.Checkbox(label='Split oversized images')
process_caption = gr.Checkbox(label='Use BLIP for caption')
process_caption_deepbooru = gr.Checkbox(label='Use deepbooru for caption', visible=True if cmd_opts.deepdanbooru else False)
+ with gr.Row(visible=False) as process_split_extra_row:
+ process_split_threshold = gr.Slider(label='Split image threshold', value=0.5, minimum=0.0, maximum=1.0, step=0.05)
+ process_overlap_ratio = gr.Slider(label='Split image overlap ratio', value=0.2, minimum=0.0, maximum=0.9, step=0.05)
+
with gr.Row():
with gr.Column(scale=3):
gr.HTML(value="")
@@ -1221,15 +1315,24 @@ def create_ui(wrap_gradio_gpu_call):
with gr.Column():
run_preprocess = gr.Button(value="Preprocess", variant='primary')
+ process_split.change(
+ fn=lambda show: gr_show(show),
+ inputs=[process_split],
+ outputs=[process_split_extra_row],
+ )
+
with gr.Tab(label="Train"):
- gr.HTML(value="<p style='margin-bottom: 0.7em'>Train an embedding; must specify a directory with a set of 1:1 ratio images</p>")
+ gr.HTML(value="<p style='margin-bottom: 0.7em'>Train an embedding or Hypernetwork; you must specify a directory with a set of 1:1 ratio images <a href=\"https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Textual-Inversion\" style=\"font-weight:bold;\">[wiki]</a></p>")
with gr.Row():
- train_embedding_name = gr.Dropdown(label='Embedding', choices=sorted(sd_hijack.model_hijack.embedding_db.word_embeddings.keys()))
+ train_embedding_name = gr.Dropdown(label='Embedding', elem_id="train_embedding", choices=sorted(sd_hijack.model_hijack.embedding_db.word_embeddings.keys()))
create_refresh_button(train_embedding_name, sd_hijack.model_hijack.embedding_db.load_textual_inversion_embeddings, lambda: {"choices": sorted(sd_hijack.model_hijack.embedding_db.word_embeddings.keys())}, "refresh_train_embedding_name")
with gr.Row():
- train_hypernetwork_name = gr.Dropdown(label='Hypernetwork', choices=[x for x in shared.hypernetworks.keys()])
+ train_hypernetwork_name = gr.Dropdown(label='Hypernetwork', elem_id="train_hypernetwork", choices=[x for x in shared.hypernetworks.keys()])
create_refresh_button(train_hypernetwork_name, shared.reload_hypernetworks, lambda: {"choices": sorted([x for x in shared.hypernetworks.keys()])}, "refresh_train_hypernetwork_name")
- learn_rate = gr.Textbox(label='Learning rate', placeholder="Learning rate", value="0.005")
+ with gr.Row():
+ embedding_learn_rate = gr.Textbox(label='Embedding Learning rate', placeholder="Embedding Learning rate", value="0.005")
+ hypernetwork_learn_rate = gr.Textbox(label='Hypernetwork Learning rate', placeholder="Hypernetwork Learning rate", value="0.00001")
+
batch_size = gr.Number(label='Batch size', value=1, precision=0)
dataset_directory = gr.Textbox(label='Dataset directory', placeholder="Path to directory with input images")
log_directory = gr.Textbox(label='Log directory', placeholder="Path to directory where to write outputs", value="textual_inversion")
@@ -1263,6 +1366,7 @@ def create_ui(wrap_gradio_gpu_call):
new_embedding_name,
initialization_text,
nvpt,
+ overwrite_old_embedding,
],
outputs=[
train_embedding_name,
@@ -1271,11 +1375,30 @@ def create_ui(wrap_gradio_gpu_call):
]
)
+ create_embedding_ae.click(
+ fn=aesthetic_clip.generate_imgs_embd,
+ inputs=[
+ new_embedding_name_ae,
+ process_src_ae,
+ batch_ae
+ ],
+ outputs=[
+ aesthetic_imgs,
+ aesthetic_imgs_im,
+ ti_output,
+ ti_outcome,
+ ]
+ )
+
create_hypernetwork.click(
fn=modules.hypernetworks.ui.create_hypernetwork,
inputs=[
new_hypernetwork_name,
new_hypernetwork_sizes,
+ overwrite_old_hypernetwork,
+ new_hypernetwork_layer_structure,
+ new_hypernetwork_add_layer_norm,
+ new_hypernetwork_activation_func,
],
outputs=[
train_hypernetwork_name,
@@ -1292,10 +1415,13 @@ def create_ui(wrap_gradio_gpu_call):
process_dst,
process_width,
process_height,
+ preprocess_txt_action,
process_flip,
process_split,
process_caption,
- process_caption_deepbooru
+ process_caption_deepbooru,
+ process_split_threshold,
+ process_overlap_ratio,
],
outputs=[
ti_output,
@@ -1308,7 +1434,7 @@ def create_ui(wrap_gradio_gpu_call):
_js="start_training_textual_inversion",
inputs=[
train_embedding_name,
- learn_rate,
+ embedding_learn_rate,
batch_size,
dataset_directory,
log_directory,
@@ -1333,10 +1459,12 @@ def create_ui(wrap_gradio_gpu_call):
_js="start_training_textual_inversion",
inputs=[
train_hypernetwork_name,
- learn_rate,
+ hypernetwork_learn_rate,
batch_size,
dataset_directory,
log_directory,
+ training_width,
+ training_height,
steps,
create_image_every,
save_embedding_every,
@@ -1376,16 +1504,18 @@ def create_ui(wrap_gradio_gpu_call):
else:
raise Exception(f'bad options item type: {str(t)} for key {key}')
+ elem_id = "setting_"+key
+
if info.refresh is not None:
if is_quicksettings:
- res = comp(label=info.label, value=fun, **(args or {}))
- refresh_button = create_refresh_button(res, info.refresh, info.component_args, "refresh_" + key)
+ res = comp(label=info.label, value=fun, elem_id=elem_id, **(args or {}))
+ create_refresh_button(res, info.refresh, info.component_args, "refresh_" + key)
else:
with gr.Row(variant="compact"):
- res = comp(label=info.label, value=fun, **(args or {}))
- refresh_button = create_refresh_button(res, info.refresh, info.component_args, "refresh_" + key)
+ res = comp(label=info.label, value=fun, elem_id=elem_id, **(args or {}))
+ create_refresh_button(res, info.refresh, info.component_args, "refresh_" + key)
else:
- res = comp(label=info.label, value=fun, **(args or {}))
+ res = comp(label=info.label, value=fun, elem_id=elem_id, **(args or {}))
return res
@@ -1509,6 +1639,9 @@ Requested path was: {f}
with gr.Row():
request_notifications = gr.Button(value='Request browser notifications', elem_id="request_notifications")
+ download_localization = gr.Button(value='Download localization template', elem_id="download_localization")
+
+ with gr.Row():
reload_script_bodies = gr.Button(value='Reload custom script bodies (No ui updates, No restart)', variant='secondary')
restart_gradio = gr.Button(value='Restart Gradio and Refresh components (Custom Scripts, ui.py, js and css only)', variant='primary')
@@ -1519,8 +1652,16 @@ Requested path was: {f}
_js='function(){}'
)
+ download_localization.click(
+ fn=lambda: None,
+ inputs=[],
+ outputs=[],
+ _js='download_localization'
+ )
+
def reload_scripts():
modules.scripts.reload_script_body_only()
+ reload_javascript() # need to refresh the html page
reload_script_bodies.click(
fn=reload_scripts,
@@ -1721,7 +1862,7 @@ Requested path was: {f}
print(traceback.format_exc(), file=sys.stderr)
def loadsave(path, x):
- def apply_field(obj, field, condition=None):
+ def apply_field(obj, field, condition=None, init_field=None):
key = path + "/" + field
if getattr(obj,'custom_script_source',None) is not None:
@@ -1737,6 +1878,8 @@ Requested path was: {f}
print(f'Warning: Bad ui setting value: {key}: {saved_value}; Default value "{getattr(obj, field)}" will be used instead.')
else:
setattr(obj, field, saved_value)
+ if init_field is not None:
+ init_field(saved_value)
if type(x) in [gr.Slider, gr.Radio, gr.Checkbox, gr.Textbox, gr.Number] and x.visible:
apply_field(x, 'visible')
@@ -1762,11 +1905,13 @@ Requested path was: {f}
# Since there are many dropdowns that shouldn't be saved,
# we only mark dropdowns that should be saved.
if type(x) == gr.Dropdown and getattr(x, 'save_to_config', False):
- apply_field(x, 'value', lambda val: val in x.choices)
+ apply_field(x, 'value', lambda val: val in x.choices, getattr(x, 'init_field', None))
+ apply_field(x, 'visible')
visit(txt2img_interface, loadsave, "txt2img")
visit(img2img_interface, loadsave, "img2img")
visit(extras_interface, loadsave, "extras")
+ visit(modelmerger_interface, loadsave, "modelmerger")
if not error_loading and (not os.path.exists(ui_config_file) or settings_count != len(ui_settings)):
with open(ui_config_file, "w", encoding="utf8") as file:
@@ -1775,22 +1920,30 @@ Requested path was: {f}
return demo
-with open(os.path.join(script_path, "script.js"), "r", encoding="utf8") as jsfile:
- javascript = f'<script>{jsfile.read()}</script>'
+def load_javascript(raw_response):
+ with open(os.path.join(script_path, "script.js"), "r", encoding="utf8") as jsfile:
+ javascript = f'<script>{jsfile.read()}</script>'
+
+ jsdir = os.path.join(script_path, "javascript")
+ for filename in sorted(os.listdir(jsdir)):
+ with open(os.path.join(jsdir, filename), "r", encoding="utf8") as jsfile:
+ javascript += f"\n<!-- {filename} --><script>{jsfile.read()}</script>"
-jsdir = os.path.join(script_path, "javascript")
-for filename in sorted(os.listdir(jsdir)):
- with open(os.path.join(jsdir, filename), "r", encoding="utf8") as jsfile:
- javascript += f"\n<script>{jsfile.read()}</script>"
+ if cmd_opts.theme is not None:
+ javascript += f"\n<script>set_theme('{cmd_opts.theme}');</script>\n"
+ javascript += f"\n<script>{localization.localization_js(shared.opts.localization)}</script>"
-if 'gradio_routes_templates_response' not in globals():
def template_response(*args, **kwargs):
- res = gradio_routes_templates_response(*args, **kwargs)
- res.body = res.body.replace(b'</head>', f'{javascript}</head>'.encode("utf8"))
+ res = raw_response(*args, **kwargs)
+ res.body = res.body.replace(
+ b'</head>', f'{javascript}</head>'.encode("utf8"))
res.init_headers()
return res
- gradio_routes_templates_response = gradio.routes.templates.TemplateResponse
gradio.routes.templates.TemplateResponse = template_response
+
+reload_javascript = partial(load_javascript,
+ gradio.routes.templates.TemplateResponse)
+reload_javascript()
diff --git a/requirements.txt b/requirements.txt
index cf583de9..da1969cf 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -23,3 +23,4 @@ resize-right
torchdiffeq
kornia
lark
+inflection
diff --git a/requirements_versions.txt b/requirements_versions.txt
index abadcb58..72ccc5a3 100644
--- a/requirements_versions.txt
+++ b/requirements_versions.txt
@@ -22,3 +22,4 @@ resize-right==0.0.2
torchdiffeq==0.2.3
kornia==0.6.7
lark==1.1.2
+inflection==0.5.1
diff --git a/script.js b/script.js
index 88f2c839..8b3b67e3 100644
--- a/script.js
+++ b/script.js
@@ -21,20 +21,20 @@ function onUiTabChange(callback){
uiTabChangeCallbacks.push(callback)
}
-function runCallback(x){
+function runCallback(x, m){
try {
- x()
+ x(m)
} catch (e) {
(console.error || console.log).call(console, e.message, e);
}
}
-function executeCallbacks(queue) {
- queue.forEach(runCallback)
+function executeCallbacks(queue, m) {
+ queue.forEach(function(x){runCallback(x, m)})
}
document.addEventListener("DOMContentLoaded", function() {
var mutationObserver = new MutationObserver(function(m){
- executeCallbacks(uiUpdateCallbacks);
+ executeCallbacks(uiUpdateCallbacks, m);
const newTab = get_uiCurrentTab();
if ( newTab && ( newTab !== uiCurrentTab ) ) {
uiCurrentTab = newTab;
diff --git a/scripts/outpainting_mk_2.py b/scripts/outpainting_mk_2.py
index a6468e09..2afd4aa5 100644
--- a/scripts/outpainting_mk_2.py
+++ b/scripts/outpainting_mk_2.py
@@ -172,54 +172,54 @@ class Script(scripts.Script):
if down > 0:
down = target_h - init_img.height - up
- init_image = p.init_images[0]
-
- state.job_count = (1 if left > 0 else 0) + (1 if right > 0 else 0) + (1 if up > 0 else 0) + (1 if down > 0 else 0)
-
- def expand(init, expand_pixels, is_left=False, is_right=False, is_top=False, is_bottom=False):
+ def expand(init, count, expand_pixels, is_left=False, is_right=False, is_top=False, is_bottom=False):
is_horiz = is_left or is_right
is_vert = is_top or is_bottom
pixels_horiz = expand_pixels if is_horiz else 0
pixels_vert = expand_pixels if is_vert else 0
- res_w = init.width + pixels_horiz
- res_h = init.height + pixels_vert
- process_res_w = math.ceil(res_w / 64) * 64
- process_res_h = math.ceil(res_h / 64) * 64
-
- img = Image.new("RGB", (process_res_w, process_res_h))
- img.paste(init, (pixels_horiz if is_left else 0, pixels_vert if is_top else 0))
- mask = Image.new("RGB", (process_res_w, process_res_h), "white")
- draw = ImageDraw.Draw(mask)
- draw.rectangle((
- expand_pixels + mask_blur if is_left else 0,
- expand_pixels + mask_blur if is_top else 0,
- mask.width - expand_pixels - mask_blur if is_right else res_w,
- mask.height - expand_pixels - mask_blur if is_bottom else res_h,
- ), fill="black")
-
- np_image = (np.asarray(img) / 255.0).astype(np.float64)
- np_mask = (np.asarray(mask) / 255.0).astype(np.float64)
- noised = get_matched_noise(np_image, np_mask, noise_q, color_variation)
- out = Image.fromarray(np.clip(noised * 255., 0., 255.).astype(np.uint8), mode="RGB")
-
- target_width = min(process_width, init.width + pixels_horiz) if is_horiz else img.width
- target_height = min(process_height, init.height + pixels_vert) if is_vert else img.height
-
- crop_region = (
- 0 if is_left else out.width - target_width,
- 0 if is_top else out.height - target_height,
- target_width if is_left else out.width,
- target_height if is_top else out.height,
- )
-
- image_to_process = out.crop(crop_region)
- mask = mask.crop(crop_region)
-
- p.width = target_width if is_horiz else img.width
- p.height = target_height if is_vert else img.height
- p.init_images = [image_to_process]
- p.image_mask = mask
+ images_to_process = []
+ output_images = []
+ for n in range(count):
+ res_w = init[n].width + pixels_horiz
+ res_h = init[n].height + pixels_vert
+ process_res_w = math.ceil(res_w / 64) * 64
+ process_res_h = math.ceil(res_h / 64) * 64
+
+ img = Image.new("RGB", (process_res_w, process_res_h))
+ img.paste(init[n], (pixels_horiz if is_left else 0, pixels_vert if is_top else 0))
+ mask = Image.new("RGB", (process_res_w, process_res_h), "white")
+ draw = ImageDraw.Draw(mask)
+ draw.rectangle((
+ expand_pixels + mask_blur if is_left else 0,
+ expand_pixels + mask_blur if is_top else 0,
+ mask.width - expand_pixels - mask_blur if is_right else res_w,
+ mask.height - expand_pixels - mask_blur if is_bottom else res_h,
+ ), fill="black")
+
+ np_image = (np.asarray(img) / 255.0).astype(np.float64)
+ np_mask = (np.asarray(mask) / 255.0).astype(np.float64)
+ noised = get_matched_noise(np_image, np_mask, noise_q, color_variation)
+ output_images.append(Image.fromarray(np.clip(noised * 255., 0., 255.).astype(np.uint8), mode="RGB"))
+
+ target_width = min(process_width, init[n].width + pixels_horiz) if is_horiz else img.width
+ target_height = min(process_height, init[n].height + pixels_vert) if is_vert else img.height
+ p.width = target_width if is_horiz else img.width
+ p.height = target_height if is_vert else img.height
+
+ crop_region = (
+ 0 if is_left else output_images[n].width - target_width,
+ 0 if is_top else output_images[n].height - target_height,
+ target_width if is_left else output_images[n].width,
+ target_height if is_top else output_images[n].height,
+ )
+ mask = mask.crop(crop_region)
+ p.image_mask = mask
+
+ image_to_process = output_images[n].crop(crop_region)
+ images_to_process.append(image_to_process)
+
+ p.init_images = images_to_process
latent_mask = Image.new("RGB", (p.width, p.height), "white")
draw = ImageDraw.Draw(latent_mask)
@@ -232,31 +232,52 @@ class Script(scripts.Script):
p.latent_mask = latent_mask
proc = process_images(p)
- proc_img = proc.images[0]
if initial_seed_and_info[0] is None:
initial_seed_and_info[0] = proc.seed
initial_seed_and_info[1] = proc.info
- out.paste(proc_img, (0 if is_left else out.width - proc_img.width, 0 if is_top else out.height - proc_img.height))
- out = out.crop((0, 0, res_w, res_h))
- return out
+ for n in range(count):
+ output_images[n].paste(proc.images[n], (0 if is_left else output_images[n].width - proc.images[n].width, 0 if is_top else output_images[n].height - proc.images[n].height))
+ output_images[n] = output_images[n].crop((0, 0, res_w, res_h))
- img = init_image
+ return output_images
- if left > 0:
- img = expand(img, left, is_left=True)
- if right > 0:
- img = expand(img, right, is_right=True)
- if up > 0:
- img = expand(img, up, is_top=True)
- if down > 0:
- img = expand(img, down, is_bottom=True)
+ batch_count = p.n_iter
+ batch_size = p.batch_size
+ p.n_iter = 1
+ state.job_count = batch_count * ((1 if left > 0 else 0) + (1 if right > 0 else 0) + (1 if up > 0 else 0) + (1 if down > 0 else 0))
+ all_processed_images = []
+
+ for i in range(batch_count):
+ imgs = [init_img] * batch_size
+ state.job = f"Batch {i + 1} out of {batch_count}"
+
+ if left > 0:
+ imgs = expand(imgs, batch_size, left, is_left=True)
+ if right > 0:
+ imgs = expand(imgs, batch_size, right, is_right=True)
+ if up > 0:
+ imgs = expand(imgs, batch_size, up, is_top=True)
+ if down > 0:
+ imgs = expand(imgs, batch_size, down, is_bottom=True)
- res = Processed(p, [img], initial_seed_and_info[0], initial_seed_and_info[1])
+ all_processed_images += imgs
+
+ all_images = all_processed_images
+
+ combined_grid_image = images.image_grid(all_processed_images)
+ unwanted_grid_because_of_img_count = len(all_processed_images) < 2 and opts.grid_only_if_multiple
+ if opts.return_grid and not unwanted_grid_because_of_img_count:
+ all_images = [combined_grid_image] + all_processed_images
+
+ res = Processed(p, all_images, initial_seed_and_info[0], initial_seed_and_info[1])
if opts.samples_save:
- images.save_image(img, p.outpath_samples, "", res.seed, p.prompt, opts.grid_format, info=res.info, p=p)
+ for img in all_processed_images:
+ images.save_image(img, p.outpath_samples, "", res.seed, p.prompt, opts.grid_format, info=res.info, p=p)
- return res
+ if opts.grid_save and not unwanted_grid_because_of_img_count:
+ images.save_image(combined_grid_image, p.outpath_grids, "grid", res.seed, p.prompt, opts.grid_format, info=res.info, short_filename=not opts.grid_extended_filename, grid=True, p=p)
+ return res
diff --git a/scripts/xy_grid.py b/scripts/xy_grid.py
index 5cca168a..eff0c942 100644
--- a/scripts/xy_grid.py
+++ b/scripts/xy_grid.py
@@ -89,6 +89,7 @@ def apply_checkpoint(p, x, xs):
if info is None:
raise RuntimeError(f"Unknown checkpoint: {x}")
modules.sd_models.reload_model_weights(shared.sd_model, info)
+ p.sd_model = shared.sd_model
def confirm_checkpoints(p, xs):
diff --git a/style.css b/style.css
index 71eb4d20..5d2bacc9 100644
--- a/style.css
+++ b/style.css
@@ -34,9 +34,10 @@
.performance {
font-size: 0.85em;
color: #444;
- display: flex;
- justify-content: space-between;
- white-space: nowrap;
+}
+
+.performance p{
+ display: inline-block;
}
.performance .time {
@@ -44,8 +45,6 @@
}
.performance .vram {
- margin-left: 0;
- text-align: right;
}
#txt2img_generate, #img2img_generate {
@@ -478,7 +477,7 @@ input[type="range"]{
padding: 0;
}
-#refresh_sd_model_checkpoint, #refresh_sd_hypernetwork, #refresh_train_hypernetwork_name, #refresh_train_embedding_name{
+#refresh_sd_model_checkpoint, #refresh_sd_hypernetwork, #refresh_train_hypernetwork_name, #refresh_train_embedding_name, #refresh_localization, #refresh_aesthetic_embeddings{
max-width: 2.5em;
min-width: 2.5em;
height: 2.4em;
diff --git a/webui.bat b/webui.bat
index 3f1d03f6..a38a28bb 100644
--- a/webui.bat
+++ b/webui.bat
@@ -33,7 +33,7 @@ goto :launch
:skip_venv
:launch
-%PYTHON% launch.py
+%PYTHON% launch.py %*
pause
exit /b
diff --git a/webui.py b/webui.py
index fe0ce321..87589064 100644
--- a/webui.py
+++ b/webui.py
@@ -4,7 +4,7 @@ import time
import importlib
import signal
import threading
-
+from fastapi import FastAPI
from fastapi.middleware.gzip import GZipMiddleware
from modules.paths import script_path
@@ -31,7 +31,6 @@ from modules.paths import script_path
from modules.shared import cmd_opts
import modules.hypernetworks.hypernetwork
-
queue_lock = threading.Lock()
@@ -87,10 +86,6 @@ def initialize():
shared.opts.onchange("sd_hypernetwork", wrap_queued_call(lambda: modules.hypernetworks.hypernetwork.load_hypernetwork(shared.opts.sd_hypernetwork)))
shared.opts.onchange("sd_hypernetwork_strength", modules.hypernetworks.hypernetwork.apply_strength)
-
-def webui():
- initialize()
-
# make the program just exit at ctrl+c without waiting for anything
def sigint_handler(sig, frame):
print(f'Interrupted with signal {sig} in {frame}')
@@ -98,10 +93,38 @@ def webui():
signal.signal(signal.SIGINT, sigint_handler)
+
+def create_api(app):
+ from modules.api.api import Api
+ api = Api(app, queue_lock)
+ return api
+
+def wait_on_server(demo=None):
while 1:
+ time.sleep(0.5)
+ if demo and getattr(demo, 'do_restart', False):
+ time.sleep(0.5)
+ demo.close()
+ time.sleep(0.5)
+ break
+
+def api_only():
+ initialize()
+
+ app = FastAPI()
+ app.add_middleware(GZipMiddleware, minimum_size=1000)
+ api = create_api(app)
+ api.launch(server_name="0.0.0.0" if cmd_opts.listen else "127.0.0.1", port=cmd_opts.port if cmd_opts.port else 7861)
+
+
+def webui():
+ launch_api = cmd_opts.api
+ initialize()
+
+ while 1:
demo = modules.ui.create_ui(wrap_gradio_gpu_call=wrap_gradio_gpu_call)
-
+
app, local_url, share_url = demo.launch(
share=cmd_opts.share,
server_name="0.0.0.0" if cmd_opts.listen else None,
@@ -111,16 +134,13 @@ def webui():
inbrowser=cmd_opts.autolaunch,
prevent_thread_lock=True
)
-
+
app.add_middleware(GZipMiddleware, minimum_size=1000)
- while 1:
- time.sleep(0.5)
- if getattr(demo, 'do_restart', False):
- time.sleep(0.5)
- demo.close()
- time.sleep(0.5)
- break
+ if (launch_api):
+ create_api(app)
+
+ wait_on_server(demo)
sd_samplers.set_samplers()
@@ -133,5 +153,10 @@ def webui():
print('Restarting Gradio')
+
+task = []
if __name__ == "__main__":
- webui()
+ if cmd_opts.nowebui:
+ api_only()
+ else:
+ webui()
diff --git a/webui.sh b/webui.sh
index 980c0aaf..a9f85d89 100755
--- a/webui.sh
+++ b/webui.sh
@@ -138,4 +138,4 @@ fi
printf "\n%s\n" "${delimiter}"
printf "Launching launch.py..."
printf "\n%s\n" "${delimiter}"
-"${python_cmd}" "${LAUNCH_SCRIPT}"
+"${python_cmd}" "${LAUNCH_SCRIPT}" "$@"