From 820f1dc96b1979d7e92170c161db281ee8bd988b Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Sun, 2 Oct 2022 15:03:39 +0300 Subject: initial support for training textual inversion --- modules/textual_inversion/dataset.py | 76 ++++++++ modules/textual_inversion/textual_inversion.py | 258 +++++++++++++++++++++++++ modules/textual_inversion/ui.py | 32 +++ 3 files changed, 366 insertions(+) create mode 100644 modules/textual_inversion/dataset.py create mode 100644 modules/textual_inversion/textual_inversion.py create mode 100644 modules/textual_inversion/ui.py (limited to 'modules/textual_inversion') diff --git a/modules/textual_inversion/dataset.py b/modules/textual_inversion/dataset.py new file mode 100644 index 00000000..7e134a08 --- /dev/null +++ b/modules/textual_inversion/dataset.py @@ -0,0 +1,76 @@ +import os +import numpy as np +import PIL +import torch +from PIL import Image +from torch.utils.data import Dataset +from torchvision import transforms + +import random +import tqdm + + +class PersonalizedBase(Dataset): + def __init__(self, data_root, size=None, repeats=100, flip_p=0.5, placeholder_token="*", width=512, height=512, model=None, device=None, template_file=None): + + self.placeholder_token = placeholder_token + + self.size = size + self.width = width + self.height = height + self.flip = transforms.RandomHorizontalFlip(p=flip_p) + + self.dataset = [] + + with open(template_file, "r") as file: + lines = [x.strip() for x in file.readlines()] + + self.lines = lines + + assert data_root, 'dataset directory not specified' + + self.image_paths = [os.path.join(data_root, file_path) for file_path in os.listdir(data_root)] + print("Preparing dataset...") + for path in tqdm.tqdm(self.image_paths): + image = Image.open(path) + image = image.convert('RGB') + image = image.resize((self.width, self.height), PIL.Image.BICUBIC) + + filename = os.path.basename(path) + filename_tokens = os.path.splitext(filename)[0].replace('_', '-').replace(' ', '-').split('-') + filename_tokens = [token for token in filename_tokens if token.isalpha()] + + npimage = np.array(image).astype(np.uint8) + npimage = (npimage / 127.5 - 1.0).astype(np.float32) + + torchdata = torch.from_numpy(npimage).to(device=device, dtype=torch.float32) + torchdata = torch.moveaxis(torchdata, 2, 0) + + init_latent = model.get_first_stage_encoding(model.encode_first_stage(torchdata.unsqueeze(dim=0))).squeeze() + + self.dataset.append((init_latent, filename_tokens)) + + self.length = len(self.dataset) * repeats + + self.initial_indexes = np.arange(self.length) % len(self.dataset) + self.indexes = None + self.shuffle() + + def shuffle(self): + self.indexes = self.initial_indexes[torch.randperm(self.initial_indexes.shape[0])] + + def __len__(self): + return self.length + + def __getitem__(self, i): + if i % len(self.dataset) == 0: + self.shuffle() + + index = self.indexes[i % len(self.indexes)] + x, filename_tokens = self.dataset[index] + + text = random.choice(self.lines) + text = text.replace("[name]", self.placeholder_token) + text = text.replace("[filewords]", ' '.join(filename_tokens)) + + return x, text diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py new file mode 100644 index 00000000..c0baaace --- /dev/null +++ b/modules/textual_inversion/textual_inversion.py @@ -0,0 +1,258 @@ +import os +import sys +import traceback + +import torch +import tqdm +import html +import datetime + +from modules import shared, devices, sd_hijack, processing +import modules.textual_inversion.dataset + + +class Embedding: + def __init__(self, vec, name, step=None): + self.vec = vec + self.name = name + self.step = step + self.cached_checksum = None + + def save(self, filename): + embedding_data = { + "string_to_token": {"*": 265}, + "string_to_param": {"*": self.vec}, + "name": self.name, + "step": self.step, + } + + torch.save(embedding_data, filename) + + def checksum(self): + if self.cached_checksum is not None: + return self.cached_checksum + + def const_hash(a): + r = 0 + for v in a: + r = (r * 281 ^ int(v) * 997) & 0xFFFFFFFF + return r + + self.cached_checksum = f'{const_hash(self.vec.reshape(-1) * 100) & 0xffff:04x}' + return self.cached_checksum + +class EmbeddingDatabase: + def __init__(self, embeddings_dir): + self.ids_lookup = {} + self.word_embeddings = {} + self.dir_mtime = None + self.embeddings_dir = embeddings_dir + + def register_embedding(self, embedding, model): + + self.word_embeddings[embedding.name] = embedding + + ids = model.cond_stage_model.tokenizer([embedding.name], add_special_tokens=False)['input_ids'][0] + + first_id = ids[0] + if first_id not in self.ids_lookup: + self.ids_lookup[first_id] = [] + self.ids_lookup[first_id].append((ids, embedding)) + + return embedding + + def load_textual_inversion_embeddings(self): + mt = os.path.getmtime(self.embeddings_dir) + if self.dir_mtime is not None and mt <= self.dir_mtime: + return + + self.dir_mtime = mt + self.ids_lookup.clear() + self.word_embeddings.clear() + + def process_file(path, filename): + name = os.path.splitext(filename)[0] + + data = torch.load(path, map_location="cpu") + + # textual inversion embeddings + if 'string_to_param' in data: + param_dict = data['string_to_param'] + if hasattr(param_dict, '_parameters'): + param_dict = getattr(param_dict, '_parameters') # fix for torch 1.12.1 loading saved file from torch 1.11 + assert len(param_dict) == 1, 'embedding file has multiple terms in it' + emb = next(iter(param_dict.items()))[1] + # diffuser concepts + elif type(data) == dict and type(next(iter(data.values()))) == torch.Tensor: + assert len(data.keys()) == 1, 'embedding file has multiple terms in it' + + emb = next(iter(data.values())) + if len(emb.shape) == 1: + emb = emb.unsqueeze(0) + else: + raise Exception(f"Couldn't identify {filename} as neither textual inversion embedding nor diffuser concept.") + + vec = emb.detach().to(devices.device, dtype=torch.float32) + embedding = Embedding(vec, name) + embedding.step = data.get('step', None) + self.register_embedding(embedding, shared.sd_model) + + for fn in os.listdir(self.embeddings_dir): + try: + fullfn = os.path.join(self.embeddings_dir, fn) + + if os.stat(fullfn).st_size == 0: + continue + + process_file(fullfn, fn) + except Exception: + print(f"Error loading emedding {fn}:", file=sys.stderr) + print(traceback.format_exc(), file=sys.stderr) + continue + + print(f"Loaded a total of {len(self.word_embeddings)} textual inversion embeddings.") + + def find_embedding_at_position(self, tokens, offset): + token = tokens[offset] + possible_matches = self.ids_lookup.get(token, None) + + if possible_matches is None: + return None + + for ids, embedding in possible_matches: + if tokens[offset:offset + len(ids)] == ids: + return embedding + + return None + + + +def create_embedding(name, num_vectors_per_token): + init_text = '*' + + cond_model = shared.sd_model.cond_stage_model + embedding_layer = cond_model.wrapped.transformer.text_model.embeddings + + ids = cond_model.tokenizer(init_text, max_length=num_vectors_per_token, return_tensors="pt", add_special_tokens=False)["input_ids"] + embedded = embedding_layer(ids.to(devices.device)).squeeze(0) + vec = torch.zeros((num_vectors_per_token, embedded.shape[1]), device=devices.device) + + for i in range(num_vectors_per_token): + vec[i] = embedded[i * int(embedded.shape[0]) // num_vectors_per_token] + + fn = os.path.join(shared.cmd_opts.embeddings_dir, f"{name}.pt") + assert not os.path.exists(fn), f"file {fn} already exists" + + embedding = Embedding(vec, name) + embedding.step = 0 + embedding.save(fn) + + return fn + + +def train_embedding(embedding_name, learn_rate, data_root, log_directory, steps, create_image_every, save_embedding_every, template_file): + assert embedding_name, 'embedding not selected' + + shared.state.textinfo = "Initializing textual inversion training..." + shared.state.job_count = steps + + filename = os.path.join(shared.cmd_opts.embeddings_dir, f'{embedding_name}.pt') + + log_directory = os.path.join(log_directory, datetime.datetime.now().strftime("%Y-%d-%m"), embedding_name) + + if save_embedding_every > 0: + embedding_dir = os.path.join(log_directory, "embeddings") + os.makedirs(embedding_dir, exist_ok=True) + else: + embedding_dir = None + + if create_image_every > 0: + images_dir = os.path.join(log_directory, "images") + os.makedirs(images_dir, exist_ok=True) + else: + images_dir = None + + cond_model = shared.sd_model.cond_stage_model + + shared.state.textinfo = f"Preparing dataset from {html.escape(data_root)}..." + with torch.autocast("cuda"): + ds = modules.textual_inversion.dataset.PersonalizedBase(data_root=data_root, size=512, placeholder_token=embedding_name, model=shared.sd_model, device=devices.device, template_file=template_file) + + hijack = sd_hijack.model_hijack + + embedding = hijack.embedding_db.word_embeddings[embedding_name] + embedding.vec.requires_grad = True + + optimizer = torch.optim.AdamW([embedding.vec], lr=learn_rate) + + losses = torch.zeros((32,)) + + last_saved_file = "" + last_saved_image = "" + + ititial_step = embedding.step or 0 + if ititial_step > steps: + return embedding, filename + + pbar = tqdm.tqdm(enumerate(ds), total=steps-ititial_step) + for i, (x, text) in pbar: + embedding.step = i + ititial_step + + if embedding.step > steps: + break + + if shared.state.interrupted: + break + + with torch.autocast("cuda"): + c = cond_model([text]) + loss = shared.sd_model(x.unsqueeze(0), c)[0] + + losses[embedding.step % losses.shape[0]] = loss.item() + + optimizer.zero_grad() + loss.backward() + optimizer.step() + + pbar.set_description(f"loss: {losses.mean():.7f}") + + if embedding.step > 0 and embedding_dir is not None and embedding.step % save_embedding_every == 0: + last_saved_file = os.path.join(embedding_dir, f'{embedding_name}-{embedding.step}.pt') + embedding.save(last_saved_file) + + if embedding.step > 0 and images_dir is not None and embedding.step % create_image_every == 0: + last_saved_image = os.path.join(images_dir, f'{embedding_name}-{embedding.step}.png') + + p = processing.StableDiffusionProcessingTxt2Img( + sd_model=shared.sd_model, + prompt=text, + steps=20, + do_not_save_grid=True, + do_not_save_samples=True, + ) + + processed = processing.process_images(p) + image = processed.images[0] + + shared.state.current_image = image + image.save(last_saved_image) + + last_saved_image += f", prompt: {text}" + + shared.state.job_no = embedding.step + + shared.state.textinfo = f""" +

+Loss: {losses.mean():.7f}
+Step: {embedding.step}
+Last prompt: {html.escape(text)}
+Last saved embedding: {html.escape(last_saved_file)}
+Last saved image: {html.escape(last_saved_image)}
+

+""" + + embedding.cached_checksum = None + embedding.save(filename) + + return embedding, filename + diff --git a/modules/textual_inversion/ui.py b/modules/textual_inversion/ui.py new file mode 100644 index 00000000..ce3677a9 --- /dev/null +++ b/modules/textual_inversion/ui.py @@ -0,0 +1,32 @@ +import html + +import gradio as gr + +import modules.textual_inversion.textual_inversion as ti +from modules import sd_hijack, shared + + +def create_embedding(name, nvpt): + filename = ti.create_embedding(name, nvpt) + + sd_hijack.model_hijack.embedding_db.load_textual_inversion_embeddings() + + return gr.Dropdown.update(choices=sorted(sd_hijack.model_hijack.embedding_db.word_embeddings.keys())), f"Created: {filename}", "" + + +def train_embedding(*args): + + try: + sd_hijack.undo_optimizations() + + embedding, filename = ti.train_embedding(*args) + + res = f""" +Training {'interrupted' if shared.state.interrupted else 'finished'} after {embedding.step} steps. +Embedding saved to {html.escape(filename)} +""" + return res, "" + except Exception: + raise + finally: + sd_hijack.apply_optimizations() -- cgit v1.2.1 From 88ec0cf5571883d84abd09196652b3679e359f2e Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Sun, 2 Oct 2022 19:40:51 +0300 Subject: fix for incorrect embedding token length calculation (will break seeds that use embeddings, you're welcome!) add option to input initialization text for embeddings --- modules/textual_inversion/textual_inversion.py | 13 +++++-------- modules/textual_inversion/ui.py | 4 ++-- 2 files changed, 7 insertions(+), 10 deletions(-) (limited to 'modules/textual_inversion') diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index c0baaace..0c50161d 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -117,24 +117,21 @@ class EmbeddingDatabase: possible_matches = self.ids_lookup.get(token, None) if possible_matches is None: - return None + return None, None for ids, embedding in possible_matches: if tokens[offset:offset + len(ids)] == ids: - return embedding + return embedding, len(ids) - return None + return None, None - -def create_embedding(name, num_vectors_per_token): - init_text = '*' - +def create_embedding(name, num_vectors_per_token, init_text='*'): cond_model = shared.sd_model.cond_stage_model embedding_layer = cond_model.wrapped.transformer.text_model.embeddings ids = cond_model.tokenizer(init_text, max_length=num_vectors_per_token, return_tensors="pt", add_special_tokens=False)["input_ids"] - embedded = embedding_layer(ids.to(devices.device)).squeeze(0) + embedded = embedding_layer.token_embedding.wrapped(ids.to(devices.device)).squeeze(0) vec = torch.zeros((num_vectors_per_token, embedded.shape[1]), device=devices.device) for i in range(num_vectors_per_token): diff --git a/modules/textual_inversion/ui.py b/modules/textual_inversion/ui.py index ce3677a9..66c43ffb 100644 --- a/modules/textual_inversion/ui.py +++ b/modules/textual_inversion/ui.py @@ -6,8 +6,8 @@ import modules.textual_inversion.textual_inversion as ti from modules import sd_hijack, shared -def create_embedding(name, nvpt): - filename = ti.create_embedding(name, nvpt) +def create_embedding(name, initialization_text, nvpt): + filename = ti.create_embedding(name, nvpt, init_text=initialization_text) sd_hijack.model_hijack.embedding_db.load_textual_inversion_embeddings() -- cgit v1.2.1 From 71fe7fa49f5eb1a2c89932a9d217ed153c12fc8b Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Sun, 2 Oct 2022 19:56:37 +0300 Subject: fix using aaaa-100 embedding when the prompt has aaaa-10000 and you have both aaaa-100 and aaaa-10000 in the directory with embeddings. --- modules/textual_inversion/textual_inversion.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'modules/textual_inversion') diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index 0c50161d..9d2241ce 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -57,7 +57,8 @@ class EmbeddingDatabase: first_id = ids[0] if first_id not in self.ids_lookup: self.ids_lookup[first_id] = [] - self.ids_lookup[first_id].append((ids, embedding)) + + self.ids_lookup[first_id] = sorted(self.ids_lookup[first_id] + [(ids, embedding)], key=lambda x: len(x[0]), reverse=True) return embedding -- cgit v1.2.1 From 4ec4af6e0b7addeee5221a03f32d117ccdc875d9 Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Sun, 2 Oct 2022 20:15:25 +0300 Subject: add checkpoint info to saved embeddings --- modules/textual_inversion/textual_inversion.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'modules/textual_inversion') diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index 9d2241ce..1183aab7 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -7,7 +7,7 @@ import tqdm import html import datetime -from modules import shared, devices, sd_hijack, processing +from modules import shared, devices, sd_hijack, processing, sd_models import modules.textual_inversion.dataset @@ -17,6 +17,8 @@ class Embedding: self.name = name self.step = step self.cached_checksum = None + self.sd_checkpoint = None + self.sd_checkpoint_name = None def save(self, filename): embedding_data = { @@ -24,6 +26,8 @@ class Embedding: "string_to_param": {"*": self.vec}, "name": self.name, "step": self.step, + "sd_checkpoint": self.sd_checkpoint, + "sd_checkpoint_name": self.sd_checkpoint_name, } torch.save(embedding_data, filename) @@ -41,6 +45,7 @@ class Embedding: self.cached_checksum = f'{const_hash(self.vec.reshape(-1) * 100) & 0xffff:04x}' return self.cached_checksum + class EmbeddingDatabase: def __init__(self, embeddings_dir): self.ids_lookup = {} @@ -96,6 +101,8 @@ class EmbeddingDatabase: vec = emb.detach().to(devices.device, dtype=torch.float32) embedding = Embedding(vec, name) embedding.step = data.get('step', None) + embedding.sd_checkpoint = data.get('hash', None) + embedding.sd_checkpoint_name = data.get('sd_checkpoint_name', None) self.register_embedding(embedding, shared.sd_model) for fn in os.listdir(self.embeddings_dir): @@ -249,6 +256,10 @@ Last saved image: {html.escape(last_saved_image)}

""" + checkpoint = sd_models.select_checkpoint() + + embedding.sd_checkpoint = checkpoint.hash + embedding.sd_checkpoint_name = checkpoint.model_name embedding.cached_checksum = None embedding.save(filename) -- cgit v1.2.1 From a1cde7e6468f80584030525a1b07cbf0f4ee42eb Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Sun, 2 Oct 2022 21:09:10 +0300 Subject: disabled SD model download after multiple complaints --- modules/textual_inversion/ui.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'modules/textual_inversion') diff --git a/modules/textual_inversion/ui.py b/modules/textual_inversion/ui.py index 66c43ffb..633037d8 100644 --- a/modules/textual_inversion/ui.py +++ b/modules/textual_inversion/ui.py @@ -22,7 +22,7 @@ def train_embedding(*args): embedding, filename = ti.train_embedding(*args) res = f""" -Training {'interrupted' if shared.state.interrupted else 'finished'} after {embedding.step} steps. +Training {'interrupted' if shared.state.interrupted else 'finished'} at {embedding.step} steps. Embedding saved to {html.escape(filename)} """ return res, "" -- cgit v1.2.1 From c7543d4940da672d970124ae8f2fec9de7bdc1da Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Sun, 2 Oct 2022 22:41:21 +0300 Subject: preprocessing for textual inversion added --- modules/textual_inversion/preprocess.py | 75 ++++++++++++++++++++++++++ modules/textual_inversion/textual_inversion.py | 1 + modules/textual_inversion/ui.py | 14 +++-- 3 files changed, 87 insertions(+), 3 deletions(-) create mode 100644 modules/textual_inversion/preprocess.py (limited to 'modules/textual_inversion') diff --git a/modules/textual_inversion/preprocess.py b/modules/textual_inversion/preprocess.py new file mode 100644 index 00000000..209e928f --- /dev/null +++ b/modules/textual_inversion/preprocess.py @@ -0,0 +1,75 @@ +import os +from PIL import Image, ImageOps +import tqdm + +from modules import shared, images + + +def preprocess(process_src, process_dst, process_flip, process_split, process_caption): + size = 512 + src = os.path.abspath(process_src) + dst = os.path.abspath(process_dst) + + assert src != dst, 'same directory specified as source and desitnation' + + os.makedirs(dst, exist_ok=True) + + files = os.listdir(src) + + shared.state.textinfo = "Preprocessing..." + shared.state.job_count = len(files) + + if process_caption: + shared.interrogator.load() + + def save_pic_with_caption(image, index): + if process_caption: + caption = "-" + shared.interrogator.generate_caption(image) + else: + caption = "" + + image.save(os.path.join(dst, f"{index:05}-{subindex[0]}{caption}.png")) + subindex[0] += 1 + + def save_pic(image, index): + save_pic_with_caption(image, index) + + if process_flip: + save_pic_with_caption(ImageOps.mirror(image), index) + + for index, imagefile in enumerate(tqdm.tqdm(files)): + subindex = [0] + filename = os.path.join(src, imagefile) + img = Image.open(filename).convert("RGB") + + if shared.state.interrupted: + break + + ratio = img.height / img.width + is_tall = ratio > 1.35 + is_wide = ratio < 1 / 1.35 + + if process_split and is_tall: + img = img.resize((size, size * img.height // img.width)) + + top = img.crop((0, 0, size, size)) + save_pic(top, index) + + bot = img.crop((0, img.height - size, size, img.height)) + save_pic(bot, index) + elif process_split and is_wide: + img = img.resize((size * img.width // img.height, size)) + + left = img.crop((0, 0, size, size)) + save_pic(left, index) + + right = img.crop((img.width - size, 0, img.width, size)) + save_pic(right, index) + else: + img = images.resize_image(1, img, size, size) + save_pic(img, index) + + shared.state.nextjob() + + if process_caption: + shared.interrogator.send_blip_to_ram() diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index 1183aab7..d4e250d8 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -7,6 +7,7 @@ import tqdm import html import datetime + from modules import shared, devices, sd_hijack, processing, sd_models import modules.textual_inversion.dataset diff --git a/modules/textual_inversion/ui.py b/modules/textual_inversion/ui.py index 633037d8..f19ac5e0 100644 --- a/modules/textual_inversion/ui.py +++ b/modules/textual_inversion/ui.py @@ -2,24 +2,31 @@ import html import gradio as gr -import modules.textual_inversion.textual_inversion as ti +import modules.textual_inversion.textual_inversion +import modules.textual_inversion.preprocess from modules import sd_hijack, shared def create_embedding(name, initialization_text, nvpt): - filename = ti.create_embedding(name, nvpt, init_text=initialization_text) + filename = modules.textual_inversion.textual_inversion.create_embedding(name, nvpt, init_text=initialization_text) sd_hijack.model_hijack.embedding_db.load_textual_inversion_embeddings() return gr.Dropdown.update(choices=sorted(sd_hijack.model_hijack.embedding_db.word_embeddings.keys())), f"Created: {filename}", "" +def preprocess(*args): + modules.textual_inversion.preprocess.preprocess(*args) + + return "Preprocessing finished.", "" + + def train_embedding(*args): try: sd_hijack.undo_optimizations() - embedding, filename = ti.train_embedding(*args) + embedding, filename = modules.textual_inversion.textual_inversion.train_embedding(*args) res = f""" Training {'interrupted' if shared.state.interrupted else 'finished'} at {embedding.step} steps. @@ -30,3 +37,4 @@ Embedding saved to {html.escape(filename)} raise finally: sd_hijack.apply_optimizations() + -- cgit v1.2.1 From 6785331e22d6a488fbf5905fab56d7fec867e038 Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Sun, 2 Oct 2022 22:59:01 +0300 Subject: keep textual inversion dataset latents in CPU memory to save a bit of VRAM --- modules/textual_inversion/dataset.py | 2 ++ modules/textual_inversion/textual_inversion.py | 3 +++ 2 files changed, 5 insertions(+) (limited to 'modules/textual_inversion') diff --git a/modules/textual_inversion/dataset.py b/modules/textual_inversion/dataset.py index 7e134a08..e8394ff6 100644 --- a/modules/textual_inversion/dataset.py +++ b/modules/textual_inversion/dataset.py @@ -8,6 +8,7 @@ from torchvision import transforms import random import tqdm +from modules import devices class PersonalizedBase(Dataset): @@ -47,6 +48,7 @@ class PersonalizedBase(Dataset): torchdata = torch.moveaxis(torchdata, 2, 0) init_latent = model.get_first_stage_encoding(model.encode_first_stage(torchdata.unsqueeze(dim=0))).squeeze() + init_latent = init_latent.to(devices.cpu) self.dataset.append((init_latent, filename_tokens)) diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index d4e250d8..8686f534 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -212,7 +212,10 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, steps, with torch.autocast("cuda"): c = cond_model([text]) + + x = x.to(devices.device) loss = shared.sd_model(x.unsqueeze(0), c)[0] + del x losses[embedding.step % losses.shape[0]] = loss.item() -- cgit v1.2.1 From 2865ef4b9ab16d56326cc805541bebcf01d099bc Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Mon, 3 Oct 2022 13:10:03 +0300 Subject: fix broken date in TI --- modules/textual_inversion/textual_inversion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'modules/textual_inversion') diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index 8686f534..cd9f3498 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -164,7 +164,7 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, steps, filename = os.path.join(shared.cmd_opts.embeddings_dir, f'{embedding_name}.pt') - log_directory = os.path.join(log_directory, datetime.datetime.now().strftime("%Y-%d-%m"), embedding_name) + log_directory = os.path.join(log_directory, datetime.datetime.now().strftime("%Y-%m-%d"), embedding_name) if save_embedding_every > 0: embedding_dir = os.path.join(log_directory, "embeddings") -- cgit v1.2.1 From 5ef0baf5eaec7f21a1666af424405cbee19f3764 Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Tue, 4 Oct 2022 08:52:11 +0300 Subject: add support for gelbooru tags in filenames for textual inversion --- modules/textual_inversion/dataset.py | 7 +++++-- modules/textual_inversion/preprocess.py | 4 +++- 2 files changed, 8 insertions(+), 3 deletions(-) (limited to 'modules/textual_inversion') diff --git a/modules/textual_inversion/dataset.py b/modules/textual_inversion/dataset.py index e8394ff6..7c44ea5b 100644 --- a/modules/textual_inversion/dataset.py +++ b/modules/textual_inversion/dataset.py @@ -9,6 +9,9 @@ from torchvision import transforms import random import tqdm from modules import devices +import re + +re_tag = re.compile(r"[a-zA-Z][_\w\d()]+") class PersonalizedBase(Dataset): @@ -38,8 +41,8 @@ class PersonalizedBase(Dataset): image = image.resize((self.width, self.height), PIL.Image.BICUBIC) filename = os.path.basename(path) - filename_tokens = os.path.splitext(filename)[0].replace('_', '-').replace(' ', '-').split('-') - filename_tokens = [token for token in filename_tokens if token.isalpha()] + filename_tokens = os.path.splitext(filename)[0] + filename_tokens = re_tag.findall(filename_tokens) npimage = np.array(image).astype(np.uint8) npimage = (npimage / 127.5 - 1.0).astype(np.float32) diff --git a/modules/textual_inversion/preprocess.py b/modules/textual_inversion/preprocess.py index 209e928f..f545a993 100644 --- a/modules/textual_inversion/preprocess.py +++ b/modules/textual_inversion/preprocess.py @@ -26,7 +26,9 @@ def preprocess(process_src, process_dst, process_flip, process_split, process_ca if process_caption: caption = "-" + shared.interrogator.generate_caption(image) else: - caption = "" + caption = filename + caption = os.path.splitext(caption)[0] + caption = os.path.basename(caption) image.save(os.path.join(dst, f"{index:05}-{subindex[0]}{caption}.png")) subindex[0] += 1 -- cgit v1.2.1 From 2499fb4e1910d31ff12c24110f161b20641b8835 Mon Sep 17 00:00:00 2001 From: Raphael Stoeckli Date: Wed, 5 Oct 2022 21:57:18 +0200 Subject: Add sanitizer for captions in Textual inversion --- modules/textual_inversion/preprocess.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'modules/textual_inversion') diff --git a/modules/textual_inversion/preprocess.py b/modules/textual_inversion/preprocess.py index f545a993..4f3df4bd 100644 --- a/modules/textual_inversion/preprocess.py +++ b/modules/textual_inversion/preprocess.py @@ -1,5 +1,8 @@ +from cmath import log import os from PIL import Image, ImageOps +import platform +import sys import tqdm from modules import shared, images @@ -25,6 +28,7 @@ def preprocess(process_src, process_dst, process_flip, process_split, process_ca def save_pic_with_caption(image, index): if process_caption: caption = "-" + shared.interrogator.generate_caption(image) + caption = sanitize_caption(os.path.join(dst, f"{index:05}-{subindex[0]}"), caption, ".png") else: caption = filename caption = os.path.splitext(caption)[0] @@ -75,3 +79,27 @@ def preprocess(process_src, process_dst, process_flip, process_split, process_ca if process_caption: shared.interrogator.send_blip_to_ram() + +def sanitize_caption(base_path, original_caption, suffix): + operating_system = platform.system().lower() + if (operating_system == "windows"): + invalid_path_characters = "\\/:*?\"<>|" + max_path_length = 259 + else: + invalid_path_characters = "/" #linux/macos + max_path_length = 1023 + caption = original_caption + for invalid_character in invalid_path_characters: + caption = caption.replace(invalid_character, "") + fixed_path_length = len(base_path) + len(suffix) + if fixed_path_length + len(caption) <= max_path_length: + return caption + caption_tokens = caption.split() + new_caption = "" + for token in caption_tokens: + last_caption = new_caption + new_caption = new_caption + token + " " + if (len(new_caption) + fixed_path_length - 1 > max_path_length): + break + print(f"\nPath will be too long. Truncated caption: {original_caption}\nto: {last_caption}", file=sys.stderr) + return last_caption.strip() -- cgit v1.2.1 From 4288e53fc2ea25fa49715bf5b7f14603553c9e38 Mon Sep 17 00:00:00 2001 From: Raphael Stoeckli Date: Wed, 5 Oct 2022 23:11:32 +0200 Subject: removed unused import, fixed typo --- modules/textual_inversion/preprocess.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'modules/textual_inversion') diff --git a/modules/textual_inversion/preprocess.py b/modules/textual_inversion/preprocess.py index 4f3df4bd..f1c002a2 100644 --- a/modules/textual_inversion/preprocess.py +++ b/modules/textual_inversion/preprocess.py @@ -1,4 +1,3 @@ -from cmath import log import os from PIL import Image, ImageOps import platform @@ -13,7 +12,7 @@ def preprocess(process_src, process_dst, process_flip, process_split, process_ca src = os.path.abspath(process_src) dst = os.path.abspath(process_dst) - assert src != dst, 'same directory specified as source and desitnation' + assert src != dst, 'same directory specified as source and destination' os.makedirs(dst, exist_ok=True) -- cgit v1.2.1 From 12c4d5c6b5bf9dd50d0601c36af4f99b65316d58 Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Fri, 7 Oct 2022 23:22:22 +0300 Subject: hypernetwork training mk1 --- modules/textual_inversion/ui.py | 1 - 1 file changed, 1 deletion(-) (limited to 'modules/textual_inversion') diff --git a/modules/textual_inversion/ui.py b/modules/textual_inversion/ui.py index f19ac5e0..c57de1f9 100644 --- a/modules/textual_inversion/ui.py +++ b/modules/textual_inversion/ui.py @@ -22,7 +22,6 @@ def preprocess(*args): def train_embedding(*args): - try: sd_hijack.undo_optimizations() -- cgit v1.2.1 From 5841990b0df04906da7321beef6f7f7902b7d57b Mon Sep 17 00:00:00 2001 From: DepFA <35278260+dfaker@users.noreply.github.com> Date: Sun, 9 Oct 2022 05:38:38 +0100 Subject: Update textual_inversion.py --- modules/textual_inversion/textual_inversion.py | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) (limited to 'modules/textual_inversion') diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index cd9f3498..f6316020 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -7,6 +7,9 @@ import tqdm import html import datetime +from PIL import Image, PngImagePlugin +import base64 +from io import BytesIO from modules import shared, devices, sd_hijack, processing, sd_models import modules.textual_inversion.dataset @@ -80,7 +83,15 @@ class EmbeddingDatabase: def process_file(path, filename): name = os.path.splitext(filename)[0] - data = torch.load(path, map_location="cpu") + data = [] + + if filename.upper().endswith('.PNG'): + embed_image = Image.open(path) + if 'sd-embedding' in embed_image.text: + embeddingData = base64.b64decode(embed_image.text['sd-embedding']) + data = torch.load(BytesIO(embeddingData), map_location="cpu") + else: + data = torch.load(path, map_location="cpu") # textual inversion embeddings if 'string_to_param' in data: @@ -156,7 +167,7 @@ def create_embedding(name, num_vectors_per_token, init_text='*'): return fn -def train_embedding(embedding_name, learn_rate, data_root, log_directory, steps, create_image_every, save_embedding_every, template_file): +def train_embedding(embedding_name, learn_rate, data_root, log_directory, steps, create_image_every, save_embedding_every, template_file, save_image_with_stored_embedding): assert embedding_name, 'embedding not selected' shared.state.textinfo = "Initializing textual inversion training..." @@ -244,7 +255,15 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, steps, image = processed.images[0] shared.state.current_image = image - image.save(last_saved_image) + + if save_image_with_stored_embedding: + info = PngImagePlugin.PngInfo() + info.add_text("sd-embedding", base64.b64encode(open(last_saved_file,'rb').read())) + image.save(last_saved_image, "PNG", pnginfo=info) + else: + image.save(last_saved_image) + + last_saved_image += f", prompt: {text}" -- cgit v1.2.1 From 03694e1f9915e34cf7d9a31073f1a1a9def2909f Mon Sep 17 00:00:00 2001 From: DepFA <35278260+dfaker@users.noreply.github.com> Date: Sun, 9 Oct 2022 21:58:14 +0100 Subject: add embedding load and save from b64 json --- modules/textual_inversion/textual_inversion.py | 30 ++++++++++++++++++-------- 1 file changed, 21 insertions(+), 9 deletions(-) (limited to 'modules/textual_inversion') diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index f6316020..1b7f8906 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -7,9 +7,11 @@ import tqdm import html import datetime -from PIL import Image, PngImagePlugin +from PIL import Image,PngImagePlugin +from ..images import captionImge +import numpy as np import base64 -from io import BytesIO +import json from modules import shared, devices, sd_hijack, processing, sd_models import modules.textual_inversion.dataset @@ -87,9 +89,9 @@ class EmbeddingDatabase: if filename.upper().endswith('.PNG'): embed_image = Image.open(path) - if 'sd-embedding' in embed_image.text: - embeddingData = base64.b64decode(embed_image.text['sd-embedding']) - data = torch.load(BytesIO(embeddingData), map_location="cpu") + if 'sd-ti-embedding' in embed_image.text: + data = embeddingFromB64(embed_image.text['sd-ti-embedding']) + name = data.get('name',name) else: data = torch.load(path, map_location="cpu") @@ -258,13 +260,23 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, steps, if save_image_with_stored_embedding: info = PngImagePlugin.PngInfo() - info.add_text("sd-embedding", base64.b64encode(open(last_saved_file,'rb').read())) - image.save(last_saved_image, "PNG", pnginfo=info) + data = torch.load(last_saved_file) + info.add_text("sd-ti-embedding", embeddingToB64(data)) + + pre_lines = [((255, 207, 175),"<{}>".format(data.get('name','???')))] + + caption_checkpoint_hash = data.get('sd_checkpoint','UNK') + caption_checkpoint_hash = caption_checkpoint_hash.upper() if caption_checkpoint_hash else 'UNK' + caption_stepcount = data.get('step',0) + caption_stepcount = caption_stepcount if caption_stepcount else 0 + + post_lines = [((240, 223, 175),"Trained against checkpoint [{}] for {} steps".format(caption_checkpoint_hash, + caption_stepcount))] + captioned_image = captionImge(image,prelines=pre_lines,postlines=post_lines) + captioned_image.save(last_saved_image, "PNG", pnginfo=info) else: image.save(last_saved_image) - - last_saved_image += f", prompt: {text}" shared.state.job_no = embedding.step -- cgit v1.2.1 From 969bd8256e5b4f1007d3cc653723d4ad50a92528 Mon Sep 17 00:00:00 2001 From: DepFA <35278260+dfaker@users.noreply.github.com> Date: Sun, 9 Oct 2022 22:02:28 +0100 Subject: add alternate checkpoint hash source --- modules/textual_inversion/textual_inversion.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'modules/textual_inversion') diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index 1b7f8906..d7813084 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -265,8 +265,11 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, steps, pre_lines = [((255, 207, 175),"<{}>".format(data.get('name','???')))] - caption_checkpoint_hash = data.get('sd_checkpoint','UNK') - caption_checkpoint_hash = caption_checkpoint_hash.upper() if caption_checkpoint_hash else 'UNK' + caption_checkpoint_hash = data.get('sd_checkpoint') + if caption_checkpoint_hash is None: + caption_checkpoint_hash = data.get('hash') + caption_checkpoint_hash = caption_checkpoint_hash.upper() if caption_checkpoint_hash else 'UNKNOWN' + caption_stepcount = data.get('step',0) caption_stepcount = caption_stepcount if caption_stepcount else 0 -- cgit v1.2.1 From 5d12ec82d3e13f5ff4c55db2930e4e10aed7015a Mon Sep 17 00:00:00 2001 From: DepFA <35278260+dfaker@users.noreply.github.com> Date: Sun, 9 Oct 2022 22:05:09 +0100 Subject: add encoder and decoder classes --- modules/textual_inversion/textual_inversion.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'modules/textual_inversion') diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index d7813084..44d4e08b 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -16,6 +16,27 @@ import json from modules import shared, devices, sd_hijack, processing, sd_models import modules.textual_inversion.dataset +class EmbeddingEncoder(json.JSONEncoder): + def default(self, obj): + if isinstance(obj, torch.Tensor): + return {'EMBEDDINGTENSOR':obj.cpu().detach().numpy().tolist()} + return json.JSONEncoder.default(self, o) + +class EmbeddingDecoder(json.JSONDecoder): + def __init__(self, *args, **kwargs): + json.JSONDecoder.__init__(self, object_hook=self.object_hook, *args, **kwargs) + def object_hook(self, d): + if 'EMBEDDINGTENSOR' in d: + return torch.from_numpy(np.array(d['EMBEDDINGTENSOR'])) + return d + +def embeddingToB64(data): + d = json.dumps(data,cls=EmbeddingEncoder) + return base64.b64encode(d.encode()) + +def EmbeddingFromB64(data): + d = base64.b64decode(data) + return json.loads(d,cls=EmbeddingDecoder) class Embedding: def __init__(self, vec, name, step=None): -- cgit v1.2.1 From d0184b8f76ce492da699f1926f34b57cd095242e Mon Sep 17 00:00:00 2001 From: DepFA <35278260+dfaker@users.noreply.github.com> Date: Sun, 9 Oct 2022 22:06:12 +0100 Subject: change json tensor key name --- modules/textual_inversion/textual_inversion.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'modules/textual_inversion') diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index 44d4e08b..ae8d207d 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -19,15 +19,15 @@ import modules.textual_inversion.dataset class EmbeddingEncoder(json.JSONEncoder): def default(self, obj): if isinstance(obj, torch.Tensor): - return {'EMBEDDINGTENSOR':obj.cpu().detach().numpy().tolist()} + return {'TORCHTENSOR':obj.cpu().detach().numpy().tolist()} return json.JSONEncoder.default(self, o) class EmbeddingDecoder(json.JSONDecoder): def __init__(self, *args, **kwargs): json.JSONDecoder.__init__(self, object_hook=self.object_hook, *args, **kwargs) def object_hook(self, d): - if 'EMBEDDINGTENSOR' in d: - return torch.from_numpy(np.array(d['EMBEDDINGTENSOR'])) + if 'TORCHTENSOR' in d: + return torch.from_numpy(np.array(d['TORCHTENSOR'])) return d def embeddingToB64(data): -- cgit v1.2.1 From 66846105103cfc282434d0dc2102910160b7a633 Mon Sep 17 00:00:00 2001 From: DepFA <35278260+dfaker@users.noreply.github.com> Date: Sun, 9 Oct 2022 22:06:42 +0100 Subject: correct case on embeddingFromB64 --- modules/textual_inversion/textual_inversion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'modules/textual_inversion') diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index ae8d207d..d2b95fa3 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -34,7 +34,7 @@ def embeddingToB64(data): d = json.dumps(data,cls=EmbeddingEncoder) return base64.b64encode(d.encode()) -def EmbeddingFromB64(data): +def embeddingFromB64(data): d = base64.b64decode(data) return json.loads(d,cls=EmbeddingDecoder) -- cgit v1.2.1 From 96f1e6be59316ec640cab2435fa95b3688194906 Mon Sep 17 00:00:00 2001 From: DepFA <35278260+dfaker@users.noreply.github.com> Date: Sun, 9 Oct 2022 22:14:50 +0100 Subject: source checkpoint hash from current checkpoint --- modules/textual_inversion/textual_inversion.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'modules/textual_inversion') diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index d2b95fa3..b16fa84e 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -286,10 +286,8 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, steps, pre_lines = [((255, 207, 175),"<{}>".format(data.get('name','???')))] - caption_checkpoint_hash = data.get('sd_checkpoint') - if caption_checkpoint_hash is None: - caption_checkpoint_hash = data.get('hash') - caption_checkpoint_hash = caption_checkpoint_hash.upper() if caption_checkpoint_hash else 'UNKNOWN' + checkpoint = sd_models.select_checkpoint() + caption_checkpoint_hash = checkpoint.hash caption_stepcount = data.get('step',0) caption_stepcount = caption_stepcount if caption_stepcount else 0 -- cgit v1.2.1 From 01fd9cf0d28d8b71a113ab1aa62accfe7f0d9c51 Mon Sep 17 00:00:00 2001 From: DepFA <35278260+dfaker@users.noreply.github.com> Date: Sun, 9 Oct 2022 22:17:02 +0100 Subject: change source of step count --- modules/textual_inversion/textual_inversion.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'modules/textual_inversion') diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index b16fa84e..e4f339b8 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -285,15 +285,9 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, steps, info.add_text("sd-ti-embedding", embeddingToB64(data)) pre_lines = [((255, 207, 175),"<{}>".format(data.get('name','???')))] - checkpoint = sd_models.select_checkpoint() - caption_checkpoint_hash = checkpoint.hash - - caption_stepcount = data.get('step',0) - caption_stepcount = caption_stepcount if caption_stepcount else 0 - - post_lines = [((240, 223, 175),"Trained against checkpoint [{}] for {} steps".format(caption_checkpoint_hash, - caption_stepcount))] + post_lines = [((240, 223, 175),"Trained against checkpoint [{}] for {} steps".format(checkpoint.hash, + embedding.step))] captioned_image = captionImge(image,prelines=pre_lines,postlines=post_lines) captioned_image.save(last_saved_image, "PNG", pnginfo=info) else: -- cgit v1.2.1 From d6a599ef9ba18a66ae79b50f2945af5788fdda8f Mon Sep 17 00:00:00 2001 From: DepFA <35278260+dfaker@users.noreply.github.com> Date: Mon, 10 Oct 2022 00:07:52 +0100 Subject: change caption method --- modules/textual_inversion/textual_inversion.py | 30 ++++++++++++++++++-------- 1 file changed, 21 insertions(+), 9 deletions(-) (limited to 'modules/textual_inversion') diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index e4f339b8..21596e78 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -8,7 +8,7 @@ import html import datetime from PIL import Image,PngImagePlugin -from ..images import captionImge +from ..images import captionImageOverlay import numpy as np import base64 import json @@ -212,6 +212,12 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, steps, else: images_dir = None + if create_image_every > 0 and save_image_with_stored_embedding: + images_embeds_dir = os.path.join(log_directory, "image_embeddings") + os.makedirs(images_embeds_dir, exist_ok=True) + else: + images_embeds_dir = None + cond_model = shared.sd_model.cond_stage_model shared.state.textinfo = f"Preparing dataset from {html.escape(data_root)}..." @@ -279,19 +285,25 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, steps, shared.state.current_image = image - if save_image_with_stored_embedding: + if save_image_with_stored_embedding and os.path.exists(last_saved_file): + + last_saved_image_chunks = os.path.join(images_embeds_dir, f'{embedding_name}-{embedding.step}.png') + info = PngImagePlugin.PngInfo() data = torch.load(last_saved_file) info.add_text("sd-ti-embedding", embeddingToB64(data)) - pre_lines = [((255, 207, 175),"<{}>".format(data.get('name','???')))] + title = "<{}>".format(data.get('name','???')) checkpoint = sd_models.select_checkpoint() - post_lines = [((240, 223, 175),"Trained against checkpoint [{}] for {} steps".format(checkpoint.hash, - embedding.step))] - captioned_image = captionImge(image,prelines=pre_lines,postlines=post_lines) - captioned_image.save(last_saved_image, "PNG", pnginfo=info) - else: - image.save(last_saved_image) + footer_left = checkpoint.model_name + footer_mid = '[{}]'.format(checkpoint.hash) + footer_right = '[{}]'.format(embedding.step) + + captioned_image = captionImageOverlay(image,title,footer_left,footer_mid,footer_right) + + captioned_image.save(last_saved_image_chunks, "PNG", pnginfo=info) + + image.save(last_saved_image) last_saved_image += f", prompt: {text}" -- cgit v1.2.1 From e2c2925eb4d634b186de2c76798162ec56e2f869 Mon Sep 17 00:00:00 2001 From: DepFA <35278260+dfaker@users.noreply.github.com> Date: Mon, 10 Oct 2022 00:12:53 +0100 Subject: remove braces from steps --- modules/textual_inversion/textual_inversion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'modules/textual_inversion') diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index 21596e78..9a18ee5c 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -297,7 +297,7 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, steps, checkpoint = sd_models.select_checkpoint() footer_left = checkpoint.model_name footer_mid = '[{}]'.format(checkpoint.hash) - footer_right = '[{}]'.format(embedding.step) + footer_right = '{}'.format(embedding.step) captioned_image = captionImageOverlay(image,title,footer_left,footer_mid,footer_right) -- cgit v1.2.1 From 1f92336be768d235c18a82acb2195b7135101ae7 Mon Sep 17 00:00:00 2001 From: JC_Array Date: Sun, 9 Oct 2022 23:58:18 -0500 Subject: refactored the deepbooru module to improve speed on running multiple interogations in a row. Added the option to generate deepbooru tags for textual inversion preproccessing. --- modules/textual_inversion/preprocess.py | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) (limited to 'modules/textual_inversion') diff --git a/modules/textual_inversion/preprocess.py b/modules/textual_inversion/preprocess.py index f1c002a2..9f63c9a4 100644 --- a/modules/textual_inversion/preprocess.py +++ b/modules/textual_inversion/preprocess.py @@ -3,11 +3,14 @@ from PIL import Image, ImageOps import platform import sys import tqdm +import time from modules import shared, images +from modules.shared import opts, cmd_opts +if cmd_opts.deepdanbooru: + import modules.deepbooru as deepbooru - -def preprocess(process_src, process_dst, process_flip, process_split, process_caption): +def preprocess(process_src, process_dst, process_flip, process_split, process_caption, process_caption_deepbooru=False): size = 512 src = os.path.abspath(process_src) dst = os.path.abspath(process_dst) @@ -24,10 +27,21 @@ def preprocess(process_src, process_dst, process_flip, process_split, process_ca if process_caption: shared.interrogator.load() + if process_caption_deepbooru: + deepbooru.create_deepbooru_process() + def save_pic_with_caption(image, index): if process_caption: caption = "-" + shared.interrogator.generate_caption(image) caption = sanitize_caption(os.path.join(dst, f"{index:05}-{subindex[0]}"), caption, ".png") + elif process_caption_deepbooru: + shared.deepbooru_process_return["value"] = -1 + shared.deepbooru_process_queue.put(image) + while shared.deepbooru_process_return["value"] == -1: + time.sleep(0.2) + caption = "-" + shared.deepbooru_process_return["value"] + caption = sanitize_caption(os.path.join(dst, f"{index:05}-{subindex[0]}"), caption, ".png") + shared.deepbooru_process_return["value"] = -1 else: caption = filename caption = os.path.splitext(caption)[0] @@ -79,6 +93,10 @@ def preprocess(process_src, process_dst, process_flip, process_split, process_ca if process_caption: shared.interrogator.send_blip_to_ram() + if process_caption_deepbooru: + deepbooru.release_process() + + def sanitize_caption(base_path, original_caption, suffix): operating_system = platform.system().lower() if (operating_system == "windows"): -- cgit v1.2.1 From 3110f895b2718a3a25aae419fdf5c87c177ec9f4 Mon Sep 17 00:00:00 2001 From: alg-wiki Date: Mon, 10 Oct 2022 17:07:46 +0900 Subject: Textual Inversion: Added custom training image size and number of repeats per input image in a single epoch --- modules/textual_inversion/dataset.py | 6 +++--- modules/textual_inversion/preprocess.py | 4 ++-- modules/textual_inversion/textual_inversion.py | 15 ++++++++++++--- 3 files changed, 17 insertions(+), 8 deletions(-) (limited to 'modules/textual_inversion') diff --git a/modules/textual_inversion/dataset.py b/modules/textual_inversion/dataset.py index 7c44ea5b..acc4ce59 100644 --- a/modules/textual_inversion/dataset.py +++ b/modules/textual_inversion/dataset.py @@ -15,13 +15,13 @@ re_tag = re.compile(r"[a-zA-Z][_\w\d()]+") class PersonalizedBase(Dataset): - def __init__(self, data_root, size=None, repeats=100, flip_p=0.5, placeholder_token="*", width=512, height=512, model=None, device=None, template_file=None): + def __init__(self, data_root, size, repeats, flip_p=0.5, placeholder_token="*", model=None, device=None, template_file=None): self.placeholder_token = placeholder_token self.size = size - self.width = width - self.height = height + self.width = size + self.height = size self.flip = transforms.RandomHorizontalFlip(p=flip_p) self.dataset = [] diff --git a/modules/textual_inversion/preprocess.py b/modules/textual_inversion/preprocess.py index f1c002a2..b3de6fd7 100644 --- a/modules/textual_inversion/preprocess.py +++ b/modules/textual_inversion/preprocess.py @@ -7,8 +7,8 @@ import tqdm from modules import shared, images -def preprocess(process_src, process_dst, process_flip, process_split, process_caption): - size = 512 +def preprocess(process_src, process_dst, process_size, process_flip, process_split, process_caption): + size = process_size src = os.path.abspath(process_src) dst = os.path.abspath(process_dst) diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index cd9f3498..e34dc2e8 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -6,6 +6,7 @@ import torch import tqdm import html import datetime +import math from modules import shared, devices, sd_hijack, processing, sd_models @@ -156,7 +157,7 @@ def create_embedding(name, num_vectors_per_token, init_text='*'): return fn -def train_embedding(embedding_name, learn_rate, data_root, log_directory, steps, create_image_every, save_embedding_every, template_file): +def train_embedding(embedding_name, learn_rate, data_root, log_directory, training_size, steps, num_repeats, create_image_every, save_embedding_every, template_file): assert embedding_name, 'embedding not selected' shared.state.textinfo = "Initializing textual inversion training..." @@ -182,7 +183,7 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, steps, shared.state.textinfo = f"Preparing dataset from {html.escape(data_root)}..." with torch.autocast("cuda"): - ds = modules.textual_inversion.dataset.PersonalizedBase(data_root=data_root, size=512, placeholder_token=embedding_name, model=shared.sd_model, device=devices.device, template_file=template_file) + ds = modules.textual_inversion.dataset.PersonalizedBase(data_root=data_root, size=training_size, repeats=num_repeats, placeholder_token=embedding_name, model=shared.sd_model, device=devices.device, template_file=template_file) hijack = sd_hijack.model_hijack @@ -200,6 +201,9 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, steps, if ititial_step > steps: return embedding, filename + tr_img_len = len([os.path.join(data_root, file_path) for file_path in os.listdir(data_root)]) + epoch_len = (tr_img_len * num_repeats) + tr_img_len + pbar = tqdm.tqdm(enumerate(ds), total=steps-ititial_step) for i, (x, text) in pbar: embedding.step = i + ititial_step @@ -223,7 +227,10 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, steps, loss.backward() optimizer.step() - pbar.set_description(f"loss: {losses.mean():.7f}") + epoch_num = math.floor(embedding.step / epoch_len) + epoch_step = embedding.step - (epoch_num * epoch_len) + + pbar.set_description(f"[Epoch {epoch_num}: {epoch_step}/{epoch_len}]loss: {losses.mean():.7f}") if embedding.step > 0 and embedding_dir is not None and embedding.step % save_embedding_every == 0: last_saved_file = os.path.join(embedding_dir, f'{embedding_name}-{embedding.step}.pt') @@ -236,6 +243,8 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, steps, sd_model=shared.sd_model, prompt=text, steps=20, + height=training_size, + width=training_size, do_not_save_grid=True, do_not_save_samples=True, ) -- cgit v1.2.1 From 4ee7519fc2e459ce8eff1f61f1655afba393357c Mon Sep 17 00:00:00 2001 From: alg-wiki Date: Mon, 10 Oct 2022 17:31:33 +0900 Subject: Fixed progress bar output for epoch --- modules/textual_inversion/textual_inversion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'modules/textual_inversion') diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index e34dc2e8..769682ea 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -228,7 +228,7 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, traini optimizer.step() epoch_num = math.floor(embedding.step / epoch_len) - epoch_step = embedding.step - (epoch_num * epoch_len) + epoch_step = embedding.step - (epoch_num * epoch_len) + 1 pbar.set_description(f"[Epoch {epoch_num}: {epoch_step}/{epoch_len}]loss: {losses.mean():.7f}") -- cgit v1.2.1 From 04c745ea4f81518999927fee5f78500560c25e29 Mon Sep 17 00:00:00 2001 From: alg-wiki Date: Mon, 10 Oct 2022 22:35:35 +0900 Subject: Custom Width and Height --- modules/textual_inversion/dataset.py | 7 +++---- modules/textual_inversion/preprocess.py | 19 ++++++++++--------- modules/textual_inversion/textual_inversion.py | 11 +++++------ 3 files changed, 18 insertions(+), 19 deletions(-) (limited to 'modules/textual_inversion') diff --git a/modules/textual_inversion/dataset.py b/modules/textual_inversion/dataset.py index acc4ce59..bcf772d2 100644 --- a/modules/textual_inversion/dataset.py +++ b/modules/textual_inversion/dataset.py @@ -15,13 +15,12 @@ re_tag = re.compile(r"[a-zA-Z][_\w\d()]+") class PersonalizedBase(Dataset): - def __init__(self, data_root, size, repeats, flip_p=0.5, placeholder_token="*", model=None, device=None, template_file=None): + def __init__(self, data_root, width, height, repeats, flip_p=0.5, placeholder_token="*", model=None, device=None, template_file=None): self.placeholder_token = placeholder_token - self.size = size - self.width = size - self.height = size + self.width = width + self.height = height self.flip = transforms.RandomHorizontalFlip(p=flip_p) self.dataset = [] diff --git a/modules/textual_inversion/preprocess.py b/modules/textual_inversion/preprocess.py index b3de6fd7..d7efdef2 100644 --- a/modules/textual_inversion/preprocess.py +++ b/modules/textual_inversion/preprocess.py @@ -7,8 +7,9 @@ import tqdm from modules import shared, images -def preprocess(process_src, process_dst, process_size, process_flip, process_split, process_caption): - size = process_size +def preprocess(process_src, process_dst, process_width, process_height, process_flip, process_split, process_caption): + width = process_width + height = process_height src = os.path.abspath(process_src) dst = os.path.abspath(process_dst) @@ -55,23 +56,23 @@ def preprocess(process_src, process_dst, process_size, process_flip, process_spl is_wide = ratio < 1 / 1.35 if process_split and is_tall: - img = img.resize((size, size * img.height // img.width)) + img = img.resize((width, height * img.height // img.width)) - top = img.crop((0, 0, size, size)) + top = img.crop((0, 0, width, height)) save_pic(top, index) - bot = img.crop((0, img.height - size, size, img.height)) + bot = img.crop((0, img.height - height, width, img.height)) save_pic(bot, index) elif process_split and is_wide: - img = img.resize((size * img.width // img.height, size)) + img = img.resize((width * img.width // img.height, height)) - left = img.crop((0, 0, size, size)) + left = img.crop((0, 0, width, height)) save_pic(left, index) - right = img.crop((img.width - size, 0, img.width, size)) + right = img.crop((img.width - width, 0, img.width, height)) save_pic(right, index) else: - img = images.resize_image(1, img, size, size) + img = images.resize_image(1, img, width, height) save_pic(img, index) shared.state.nextjob() diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index 769682ea..5965c5a0 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -6,7 +6,6 @@ import torch import tqdm import html import datetime -import math from modules import shared, devices, sd_hijack, processing, sd_models @@ -157,7 +156,7 @@ def create_embedding(name, num_vectors_per_token, init_text='*'): return fn -def train_embedding(embedding_name, learn_rate, data_root, log_directory, training_size, steps, num_repeats, create_image_every, save_embedding_every, template_file): +def train_embedding(embedding_name, learn_rate, data_root, log_directory, training_width, training_height, steps, num_repeats, create_image_every, save_embedding_every, template_file): assert embedding_name, 'embedding not selected' shared.state.textinfo = "Initializing textual inversion training..." @@ -183,7 +182,7 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, traini shared.state.textinfo = f"Preparing dataset from {html.escape(data_root)}..." with torch.autocast("cuda"): - ds = modules.textual_inversion.dataset.PersonalizedBase(data_root=data_root, size=training_size, repeats=num_repeats, placeholder_token=embedding_name, model=shared.sd_model, device=devices.device, template_file=template_file) + ds = modules.textual_inversion.dataset.PersonalizedBase(data_root=data_root, width=training_width, height=training_height, repeats=num_repeats, placeholder_token=embedding_name, model=shared.sd_model, device=devices.device, template_file=template_file) hijack = sd_hijack.model_hijack @@ -227,7 +226,7 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, traini loss.backward() optimizer.step() - epoch_num = math.floor(embedding.step / epoch_len) + epoch_num = embedding.step // epoch_len epoch_step = embedding.step - (epoch_num * epoch_len) + 1 pbar.set_description(f"[Epoch {epoch_num}: {epoch_step}/{epoch_len}]loss: {losses.mean():.7f}") @@ -243,8 +242,8 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, traini sd_model=shared.sd_model, prompt=text, steps=20, - height=training_size, - width=training_size, + height=training_height, + width=training_width, do_not_save_grid=True, do_not_save_samples=True, ) -- cgit v1.2.1 From ea00c1624bbb0dcb5be07f59c9509061baddf5b1 Mon Sep 17 00:00:00 2001 From: alg-wiki Date: Mon, 10 Oct 2022 17:07:46 +0900 Subject: Textual Inversion: Added custom training image size and number of repeats per input image in a single epoch --- modules/textual_inversion/dataset.py | 6 +++--- modules/textual_inversion/preprocess.py | 4 ++-- modules/textual_inversion/textual_inversion.py | 15 ++++++++++++--- 3 files changed, 17 insertions(+), 8 deletions(-) (limited to 'modules/textual_inversion') diff --git a/modules/textual_inversion/dataset.py b/modules/textual_inversion/dataset.py index 7c44ea5b..acc4ce59 100644 --- a/modules/textual_inversion/dataset.py +++ b/modules/textual_inversion/dataset.py @@ -15,13 +15,13 @@ re_tag = re.compile(r"[a-zA-Z][_\w\d()]+") class PersonalizedBase(Dataset): - def __init__(self, data_root, size=None, repeats=100, flip_p=0.5, placeholder_token="*", width=512, height=512, model=None, device=None, template_file=None): + def __init__(self, data_root, size, repeats, flip_p=0.5, placeholder_token="*", model=None, device=None, template_file=None): self.placeholder_token = placeholder_token self.size = size - self.width = width - self.height = height + self.width = size + self.height = size self.flip = transforms.RandomHorizontalFlip(p=flip_p) self.dataset = [] diff --git a/modules/textual_inversion/preprocess.py b/modules/textual_inversion/preprocess.py index f1c002a2..b3de6fd7 100644 --- a/modules/textual_inversion/preprocess.py +++ b/modules/textual_inversion/preprocess.py @@ -7,8 +7,8 @@ import tqdm from modules import shared, images -def preprocess(process_src, process_dst, process_flip, process_split, process_caption): - size = 512 +def preprocess(process_src, process_dst, process_size, process_flip, process_split, process_caption): + size = process_size src = os.path.abspath(process_src) dst = os.path.abspath(process_dst) diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index cd9f3498..e34dc2e8 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -6,6 +6,7 @@ import torch import tqdm import html import datetime +import math from modules import shared, devices, sd_hijack, processing, sd_models @@ -156,7 +157,7 @@ def create_embedding(name, num_vectors_per_token, init_text='*'): return fn -def train_embedding(embedding_name, learn_rate, data_root, log_directory, steps, create_image_every, save_embedding_every, template_file): +def train_embedding(embedding_name, learn_rate, data_root, log_directory, training_size, steps, num_repeats, create_image_every, save_embedding_every, template_file): assert embedding_name, 'embedding not selected' shared.state.textinfo = "Initializing textual inversion training..." @@ -182,7 +183,7 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, steps, shared.state.textinfo = f"Preparing dataset from {html.escape(data_root)}..." with torch.autocast("cuda"): - ds = modules.textual_inversion.dataset.PersonalizedBase(data_root=data_root, size=512, placeholder_token=embedding_name, model=shared.sd_model, device=devices.device, template_file=template_file) + ds = modules.textual_inversion.dataset.PersonalizedBase(data_root=data_root, size=training_size, repeats=num_repeats, placeholder_token=embedding_name, model=shared.sd_model, device=devices.device, template_file=template_file) hijack = sd_hijack.model_hijack @@ -200,6 +201,9 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, steps, if ititial_step > steps: return embedding, filename + tr_img_len = len([os.path.join(data_root, file_path) for file_path in os.listdir(data_root)]) + epoch_len = (tr_img_len * num_repeats) + tr_img_len + pbar = tqdm.tqdm(enumerate(ds), total=steps-ititial_step) for i, (x, text) in pbar: embedding.step = i + ititial_step @@ -223,7 +227,10 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, steps, loss.backward() optimizer.step() - pbar.set_description(f"loss: {losses.mean():.7f}") + epoch_num = math.floor(embedding.step / epoch_len) + epoch_step = embedding.step - (epoch_num * epoch_len) + + pbar.set_description(f"[Epoch {epoch_num}: {epoch_step}/{epoch_len}]loss: {losses.mean():.7f}") if embedding.step > 0 and embedding_dir is not None and embedding.step % save_embedding_every == 0: last_saved_file = os.path.join(embedding_dir, f'{embedding_name}-{embedding.step}.pt') @@ -236,6 +243,8 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, steps, sd_model=shared.sd_model, prompt=text, steps=20, + height=training_size, + width=training_size, do_not_save_grid=True, do_not_save_samples=True, ) -- cgit v1.2.1 From 6ad3a53e368d36535de1a4fca73b3bb78fd40654 Mon Sep 17 00:00:00 2001 From: alg-wiki Date: Mon, 10 Oct 2022 17:31:33 +0900 Subject: Fixed progress bar output for epoch --- modules/textual_inversion/textual_inversion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'modules/textual_inversion') diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index e34dc2e8..769682ea 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -228,7 +228,7 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, traini optimizer.step() epoch_num = math.floor(embedding.step / epoch_len) - epoch_step = embedding.step - (epoch_num * epoch_len) + epoch_step = embedding.step - (epoch_num * epoch_len) + 1 pbar.set_description(f"[Epoch {epoch_num}: {epoch_step}/{epoch_len}]loss: {losses.mean():.7f}") -- cgit v1.2.1 From 7a20f914eddfdf09c0ccced157ec108205bc3d0f Mon Sep 17 00:00:00 2001 From: alg-wiki Date: Mon, 10 Oct 2022 22:35:35 +0900 Subject: Custom Width and Height --- modules/textual_inversion/dataset.py | 7 +++---- modules/textual_inversion/preprocess.py | 19 ++++++++++--------- modules/textual_inversion/textual_inversion.py | 11 +++++------ 3 files changed, 18 insertions(+), 19 deletions(-) (limited to 'modules/textual_inversion') diff --git a/modules/textual_inversion/dataset.py b/modules/textual_inversion/dataset.py index acc4ce59..bcf772d2 100644 --- a/modules/textual_inversion/dataset.py +++ b/modules/textual_inversion/dataset.py @@ -15,13 +15,12 @@ re_tag = re.compile(r"[a-zA-Z][_\w\d()]+") class PersonalizedBase(Dataset): - def __init__(self, data_root, size, repeats, flip_p=0.5, placeholder_token="*", model=None, device=None, template_file=None): + def __init__(self, data_root, width, height, repeats, flip_p=0.5, placeholder_token="*", model=None, device=None, template_file=None): self.placeholder_token = placeholder_token - self.size = size - self.width = size - self.height = size + self.width = width + self.height = height self.flip = transforms.RandomHorizontalFlip(p=flip_p) self.dataset = [] diff --git a/modules/textual_inversion/preprocess.py b/modules/textual_inversion/preprocess.py index b3de6fd7..d7efdef2 100644 --- a/modules/textual_inversion/preprocess.py +++ b/modules/textual_inversion/preprocess.py @@ -7,8 +7,9 @@ import tqdm from modules import shared, images -def preprocess(process_src, process_dst, process_size, process_flip, process_split, process_caption): - size = process_size +def preprocess(process_src, process_dst, process_width, process_height, process_flip, process_split, process_caption): + width = process_width + height = process_height src = os.path.abspath(process_src) dst = os.path.abspath(process_dst) @@ -55,23 +56,23 @@ def preprocess(process_src, process_dst, process_size, process_flip, process_spl is_wide = ratio < 1 / 1.35 if process_split and is_tall: - img = img.resize((size, size * img.height // img.width)) + img = img.resize((width, height * img.height // img.width)) - top = img.crop((0, 0, size, size)) + top = img.crop((0, 0, width, height)) save_pic(top, index) - bot = img.crop((0, img.height - size, size, img.height)) + bot = img.crop((0, img.height - height, width, img.height)) save_pic(bot, index) elif process_split and is_wide: - img = img.resize((size * img.width // img.height, size)) + img = img.resize((width * img.width // img.height, height)) - left = img.crop((0, 0, size, size)) + left = img.crop((0, 0, width, height)) save_pic(left, index) - right = img.crop((img.width - size, 0, img.width, size)) + right = img.crop((img.width - width, 0, img.width, height)) save_pic(right, index) else: - img = images.resize_image(1, img, size, size) + img = images.resize_image(1, img, width, height) save_pic(img, index) shared.state.nextjob() diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index 769682ea..5965c5a0 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -6,7 +6,6 @@ import torch import tqdm import html import datetime -import math from modules import shared, devices, sd_hijack, processing, sd_models @@ -157,7 +156,7 @@ def create_embedding(name, num_vectors_per_token, init_text='*'): return fn -def train_embedding(embedding_name, learn_rate, data_root, log_directory, training_size, steps, num_repeats, create_image_every, save_embedding_every, template_file): +def train_embedding(embedding_name, learn_rate, data_root, log_directory, training_width, training_height, steps, num_repeats, create_image_every, save_embedding_every, template_file): assert embedding_name, 'embedding not selected' shared.state.textinfo = "Initializing textual inversion training..." @@ -183,7 +182,7 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, traini shared.state.textinfo = f"Preparing dataset from {html.escape(data_root)}..." with torch.autocast("cuda"): - ds = modules.textual_inversion.dataset.PersonalizedBase(data_root=data_root, size=training_size, repeats=num_repeats, placeholder_token=embedding_name, model=shared.sd_model, device=devices.device, template_file=template_file) + ds = modules.textual_inversion.dataset.PersonalizedBase(data_root=data_root, width=training_width, height=training_height, repeats=num_repeats, placeholder_token=embedding_name, model=shared.sd_model, device=devices.device, template_file=template_file) hijack = sd_hijack.model_hijack @@ -227,7 +226,7 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, traini loss.backward() optimizer.step() - epoch_num = math.floor(embedding.step / epoch_len) + epoch_num = embedding.step // epoch_len epoch_step = embedding.step - (epoch_num * epoch_len) + 1 pbar.set_description(f"[Epoch {epoch_num}: {epoch_step}/{epoch_len}]loss: {losses.mean():.7f}") @@ -243,8 +242,8 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, traini sd_model=shared.sd_model, prompt=text, steps=20, - height=training_size, - width=training_size, + height=training_height, + width=training_width, do_not_save_grid=True, do_not_save_samples=True, ) -- cgit v1.2.1 From 707a431100362645e914042bb344d08439f48ac8 Mon Sep 17 00:00:00 2001 From: DepFA <35278260+dfaker@users.noreply.github.com> Date: Mon, 10 Oct 2022 15:34:49 +0100 Subject: add pixel data footer --- modules/textual_inversion/textual_inversion.py | 48 ++++++++++++++++++++++++-- 1 file changed, 46 insertions(+), 2 deletions(-) (limited to 'modules/textual_inversion') diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index 7a24192e..6fb64691 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -12,6 +12,7 @@ from ..images import captionImageOverlay import numpy as np import base64 import json +import zlib from modules import shared, devices, sd_hijack, processing, sd_models import modules.textual_inversion.dataset @@ -20,7 +21,7 @@ class EmbeddingEncoder(json.JSONEncoder): def default(self, obj): if isinstance(obj, torch.Tensor): return {'TORCHTENSOR':obj.cpu().detach().numpy().tolist()} - return json.JSONEncoder.default(self, o) + return json.JSONEncoder.default(self, obj) class EmbeddingDecoder(json.JSONDecoder): def __init__(self, *args, **kwargs): @@ -38,6 +39,45 @@ def embeddingFromB64(data): d = base64.b64decode(data) return json.loads(d,cls=EmbeddingDecoder) +def appendImageDataFooter(image,data): + d = 3 + data_compressed = zlib.compress( json.dumps(data,cls=EmbeddingEncoder).encode(),level=9) + dnp = np.frombuffer(data_compressed,np.uint8).copy() + w = image.size[0] + next_size = dnp.shape[0] + (w-(dnp.shape[0]%w)) + next_size = next_size + ((w*d)-(next_size%(w*d))) + dnp.resize(next_size) + dnp = dnp.reshape((-1,w,d)) + print(dnp.shape) + im = Image.fromarray(dnp,mode='RGB') + background = Image.new('RGB',(image.size[0],image.size[1]+im.size[1]+1),(0,0,0)) + background.paste(image,(0,0)) + background.paste(im,(0,image.size[1]+1)) + return background + +def crop_black(img,tol=0): + mask = (img>tol).all(2) + mask0,mask1 = mask.any(0),mask.any(1) + col_start,col_end = mask0.argmax(),mask.shape[1]-mask0[::-1].argmax() + row_start,row_end = mask1.argmax(),mask.shape[0]-mask1[::-1].argmax() + return img[row_start:row_end,col_start:col_end] + +def extractImageDataFooter(image): + d=3 + outarr = crop_black(np.array(image.getdata()).reshape(image.size[1],image.size[0],d ).astype(np.uint8) ) + lastRow = np.where( np.sum(outarr, axis=(1,2))==0) + if lastRow[0].shape[0] == 0: + print('Image data block not found.') + return None + lastRow = lastRow[0] + + lastRow = lastRow.max() + + dataBlock = outarr[lastRow+1::].astype(np.uint8).flatten().tobytes() + print(lastRow) + data = zlib.decompress(dataBlock) + return json.loads(data,cls=EmbeddingDecoder) + class Embedding: def __init__(self, vec, name, step=None): self.vec = vec @@ -113,6 +153,9 @@ class EmbeddingDatabase: if 'sd-ti-embedding' in embed_image.text: data = embeddingFromB64(embed_image.text['sd-ti-embedding']) name = data.get('name',name) + else: + data = extractImageDataFooter(embed_image) + name = data.get('name',name) else: data = torch.load(path, map_location="cpu") @@ -190,7 +233,7 @@ def create_embedding(name, num_vectors_per_token, init_text='*'): return fn -def train_embedding(embedding_name, learn_rate, data_root, log_directory, training_width, training_height, steps, num_repeats, create_image_every, save_embedding_every, template_file): +def train_embedding(embedding_name, learn_rate, data_root, log_directory, training_width, training_height, steps, num_repeats, create_image_every, save_embedding_every, template_file, save_image_with_stored_embedding): assert embedding_name, 'embedding not selected' shared.state.textinfo = "Initializing textual inversion training..." @@ -308,6 +351,7 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, traini footer_right = '{}'.format(embedding.step) captioned_image = captionImageOverlay(image,title,footer_left,footer_mid,footer_right) + captioned_image = appendImageDataFooter(captioned_image,data) captioned_image.save(last_saved_image_chunks, "PNG", pnginfo=info) -- cgit v1.2.1 From df6d0d9286279c41c4c67460c3158fa268697524 Mon Sep 17 00:00:00 2001 From: DepFA <35278260+dfaker@users.noreply.github.com> Date: Mon, 10 Oct 2022 15:43:09 +0100 Subject: convert back to rgb as some hosts add alpha --- modules/textual_inversion/textual_inversion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'modules/textual_inversion') diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index 6fb64691..667a7cf2 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -64,7 +64,7 @@ def crop_black(img,tol=0): def extractImageDataFooter(image): d=3 - outarr = crop_black(np.array(image.getdata()).reshape(image.size[1],image.size[0],d ).astype(np.uint8) ) + outarr = crop_black(np.array(image.convert('RGB').getdata()).reshape(image.size[1],image.size[0],d ).astype(np.uint8) ) lastRow = np.where( np.sum(outarr, axis=(1,2))==0) if lastRow[0].shape[0] == 0: print('Image data block not found.') -- cgit v1.2.1 From bc3e183b739913e7be91213a256f038b10eb71e9 Mon Sep 17 00:00:00 2001 From: alg-wiki Date: Tue, 11 Oct 2022 04:30:13 +0900 Subject: Textual Inversion: Preprocess and Training will only pick-up image files --- modules/textual_inversion/dataset.py | 3 ++- modules/textual_inversion/preprocess.py | 3 ++- modules/textual_inversion/textual_inversion.py | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) (limited to 'modules/textual_inversion') diff --git a/modules/textual_inversion/dataset.py b/modules/textual_inversion/dataset.py index bcf772d2..d4baf066 100644 --- a/modules/textual_inversion/dataset.py +++ b/modules/textual_inversion/dataset.py @@ -22,6 +22,7 @@ class PersonalizedBase(Dataset): self.width = width self.height = height self.flip = transforms.RandomHorizontalFlip(p=flip_p) + self.extns = [".jpg",".jpeg",".png"] self.dataset = [] @@ -32,7 +33,7 @@ class PersonalizedBase(Dataset): assert data_root, 'dataset directory not specified' - self.image_paths = [os.path.join(data_root, file_path) for file_path in os.listdir(data_root)] + self.image_paths = [os.path.join(data_root, file_path) for file_path in os.listdir(data_root) if os.path.splitext(file_path.casefold())[1] in self.extns] print("Preparing dataset...") for path in tqdm.tqdm(self.image_paths): image = Image.open(path) diff --git a/modules/textual_inversion/preprocess.py b/modules/textual_inversion/preprocess.py index d7efdef2..b6c78cf8 100644 --- a/modules/textual_inversion/preprocess.py +++ b/modules/textual_inversion/preprocess.py @@ -12,12 +12,13 @@ def preprocess(process_src, process_dst, process_width, process_height, process_ height = process_height src = os.path.abspath(process_src) dst = os.path.abspath(process_dst) + extns = [".jpg",".jpeg",".png"] assert src != dst, 'same directory specified as source and destination' os.makedirs(dst, exist_ok=True) - files = os.listdir(src) + files = [i for i in os.listdir(src) if os.path.splitext(i.casefold())[1] in extns] shared.state.textinfo = "Preprocessing..." shared.state.job_count = len(files) diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index 5965c5a0..45397be9 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -161,6 +161,7 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, traini shared.state.textinfo = "Initializing textual inversion training..." shared.state.job_count = steps + extns = [".jpg",".jpeg",".png"] filename = os.path.join(shared.cmd_opts.embeddings_dir, f'{embedding_name}.pt') @@ -200,7 +201,7 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, traini if ititial_step > steps: return embedding, filename - tr_img_len = len([os.path.join(data_root, file_path) for file_path in os.listdir(data_root)]) + tr_img_len = len([os.path.join(data_root, file_path) for file_path in os.listdir(data_root) if os.path.splitext(file_path.casefold())[1] in extns]) epoch_len = (tr_img_len * num_repeats) + tr_img_len pbar = tqdm.tqdm(enumerate(ds), total=steps-ititial_step) -- cgit v1.2.1 From 2536ecbb1790da2af0d61b6a26f38732cba665cd Mon Sep 17 00:00:00 2001 From: Fampai <> Date: Mon, 10 Oct 2022 17:10:29 -0400 Subject: Refactored learning rate code --- modules/textual_inversion/textual_inversion.py | 51 ++++++++++++++++++++++++-- 1 file changed, 47 insertions(+), 4 deletions(-) (limited to 'modules/textual_inversion') diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index 5965c5a0..c64a4598 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -189,8 +189,6 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, traini embedding = hijack.embedding_db.word_embeddings[embedding_name] embedding.vec.requires_grad = True - optimizer = torch.optim.AdamW([embedding.vec], lr=learn_rate) - losses = torch.zeros((32,)) last_saved_file = "" @@ -203,12 +201,24 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, traini tr_img_len = len([os.path.join(data_root, file_path) for file_path in os.listdir(data_root)]) epoch_len = (tr_img_len * num_repeats) + tr_img_len + scheduleIter = iter(LearnSchedule(learn_rate, steps, ititial_step)) + (learn_rate, end_step) = next(scheduleIter) + print(f'Training at rate of {learn_rate} until step {end_step}') + + optimizer = torch.optim.AdamW([embedding.vec], lr=learn_rate) + pbar = tqdm.tqdm(enumerate(ds), total=steps-ititial_step) for i, (x, text) in pbar: embedding.step = i + ititial_step - if embedding.step > steps: - break + if embedding.step > end_step: + try: + (learn_rate, end_step) = next(scheduleIter) + except: + break + tqdm.tqdm.write(f'Training at rate of {learn_rate} until step {end_step}') + for pg in optimizer.param_groups: + pg['lr'] = learn_rate if shared.state.interrupted: break @@ -277,3 +287,36 @@ Last saved image: {html.escape(last_saved_image)}
return embedding, filename +class LearnSchedule: + def __init__(self, learn_rate, max_steps, cur_step=0): + pairs = learn_rate.split(',') + self.rates = [] + self.it = 0 + self.maxit = 0 + for i, pair in enumerate(pairs): + tmp = pair.split(':') + if len(tmp) == 2: + step = int(tmp[1]) + if step > cur_step: + self.rates.append((float(tmp[0]), min(step, max_steps))) + self.maxit += 1 + if step > max_steps: + return + elif step == -1: + self.rates.append((float(tmp[0]), max_steps)) + self.maxit += 1 + return + else: + self.rates.append((float(tmp[0]), max_steps)) + self.maxit += 1 + return + + def __iter__(self): + return self + + def __next__(self): + if self.it < self.maxit: + self.it += 1 + return self.rates[self.it - 1] + else: + raise StopIteration -- cgit v1.2.1 From 907a88b2d0be320575c2129d8d6a1d4f3a68f9eb Mon Sep 17 00:00:00 2001 From: alg-wiki Date: Tue, 11 Oct 2022 06:33:08 +0900 Subject: Added .webp .bmp --- modules/textual_inversion/dataset.py | 2 +- modules/textual_inversion/preprocess.py | 2 +- modules/textual_inversion/textual_inversion.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'modules/textual_inversion') diff --git a/modules/textual_inversion/dataset.py b/modules/textual_inversion/dataset.py index d4baf066..0dc54fb7 100644 --- a/modules/textual_inversion/dataset.py +++ b/modules/textual_inversion/dataset.py @@ -22,7 +22,7 @@ class PersonalizedBase(Dataset): self.width = width self.height = height self.flip = transforms.RandomHorizontalFlip(p=flip_p) - self.extns = [".jpg",".jpeg",".png"] + self.extns = [".jpg",".jpeg",".png",".webp",".bmp"] self.dataset = [] diff --git a/modules/textual_inversion/preprocess.py b/modules/textual_inversion/preprocess.py index b6c78cf8..8290abe8 100644 --- a/modules/textual_inversion/preprocess.py +++ b/modules/textual_inversion/preprocess.py @@ -12,7 +12,7 @@ def preprocess(process_src, process_dst, process_width, process_height, process_ height = process_height src = os.path.abspath(process_src) dst = os.path.abspath(process_dst) - extns = [".jpg",".jpeg",".png"] + extns = [".jpg",".jpeg",".png",".webp",".bmp"] assert src != dst, 'same directory specified as source and destination' diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index a03b299c..33c923d1 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -161,7 +161,7 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, traini shared.state.textinfo = "Initializing textual inversion training..." shared.state.job_count = steps - extns = [".jpg",".jpeg",".png"] + extns = [".jpg",".jpeg",".png",".webp",".bmp"] filename = os.path.join(shared.cmd_opts.embeddings_dir, f'{embedding_name}.pt') -- cgit v1.2.1 From 315d5a8ed975c88f670bc484f40a23fbf3a77b63 Mon Sep 17 00:00:00 2001 From: DepFA <35278260+dfaker@users.noreply.github.com> Date: Mon, 10 Oct 2022 23:14:44 +0100 Subject: update data dis[play style --- modules/textual_inversion/textual_inversion.py | 88 +++++++++++++++++++------- 1 file changed, 65 insertions(+), 23 deletions(-) (limited to 'modules/textual_inversion') diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index 667a7cf2..95eebea7 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -39,20 +39,59 @@ def embeddingFromB64(data): d = base64.b64decode(data) return json.loads(d,cls=EmbeddingDecoder) -def appendImageDataFooter(image,data): +def xorBlock(block): + return np.bitwise_xor(block.astype(np.uint8), + ((np.random.RandomState(0xDEADBEEF).random(block.shape)*255).astype(np.uint8)) & 0x0F ) + +def styleBlock(block,sequence): + im = Image.new('RGB',(block.shape[1],block.shape[0])) + draw = ImageDraw.Draw(im) + i=0 + for x in range(-6,im.size[0],8): + for yi,y in enumerate(range(-6,im.size[1],8)): + offset=0 + if yi%2==0: + offset=4 + shade = sequence[i%len(sequence)] + i+=1 + draw.ellipse((x+offset, y, x+6+offset, y+6), fill =(shade,shade,shade) ) + + fg = np.array(im).astype(np.uint8) & 0xF0 + return block ^ fg + +def insertImageDataEmbed(image,data): d = 3 data_compressed = zlib.compress( json.dumps(data,cls=EmbeddingEncoder).encode(),level=9) dnp = np.frombuffer(data_compressed,np.uint8).copy() - w = image.size[0] - next_size = dnp.shape[0] + (w-(dnp.shape[0]%w)) - next_size = next_size + ((w*d)-(next_size%(w*d))) - dnp.resize(next_size) - dnp = dnp.reshape((-1,w,d)) - print(dnp.shape) - im = Image.fromarray(dnp,mode='RGB') - background = Image.new('RGB',(image.size[0],image.size[1]+im.size[1]+1),(0,0,0)) - background.paste(image,(0,0)) - background.paste(im,(0,image.size[1]+1)) + dnphigh = dnp >> 4 + dnplow = dnp & 0x0F + + h = image.size[1] + next_size = dnplow.shape[0] + (h-(dnplow.shape[0]%h)) + next_size = next_size + ((h*d)-(next_size%(h*d))) + + dnplow.resize(next_size) + dnplow = dnplow.reshape((h,-1,d)) + + dnphigh.resize(next_size) + dnphigh = dnphigh.reshape((h,-1,d)) + + edgeStyleWeights = list(data['string_to_param'].values())[0].cpu().detach().numpy().tolist()[0][:1024] + edgeStyleWeights = (np.abs(edgeStyleWeights)/np.max(np.abs(edgeStyleWeights))*255).astype(np.uint8) + + dnplow = styleBlock(dnplow,sequence=edgeStyleWeights) + dnplow = xorBlock(dnplow) + dnphigh = styleBlock(dnphigh,sequence=edgeStyleWeights[::-1]) + dnphigh = xorBlock(dnphigh) + + imlow = Image.fromarray(dnplow,mode='RGB') + imhigh = Image.fromarray(dnphigh,mode='RGB') + + background = Image.new('RGB',(image.size[0]+imlow.size[0]+imhigh.size[0]+2,image.size[1]),(0,0,0)) + background.paste(imlow,(0,0)) + background.paste(image,(imlow.size[0]+1,0)) + background.paste(imhigh,(imlow.size[0]+1+image.size[0]+1,0)) + return background def crop_black(img,tol=0): @@ -62,19 +101,22 @@ def crop_black(img,tol=0): row_start,row_end = mask1.argmax(),mask.shape[0]-mask1[::-1].argmax() return img[row_start:row_end,col_start:col_end] -def extractImageDataFooter(image): +def extractImageDataEmbed(image): d=3 - outarr = crop_black(np.array(image.convert('RGB').getdata()).reshape(image.size[1],image.size[0],d ).astype(np.uint8) ) - lastRow = np.where( np.sum(outarr, axis=(1,2))==0) - if lastRow[0].shape[0] == 0: - print('Image data block not found.') + outarr = crop_black(np.array(image.getdata()).reshape(image.size[1],image.size[0],d ).astype(np.uint8) ) & 0x0F + blackCols = np.where( np.sum(outarr, axis=(0,2))==0) + if blackCols[0].shape[0] < 2: + print('No Image data blocks found.') return None - lastRow = lastRow[0] - - lastRow = lastRow.max() - dataBlock = outarr[lastRow+1::].astype(np.uint8).flatten().tobytes() - print(lastRow) + dataBlocklower = outarr[:,:blackCols[0].min(),:].astype(np.uint8) + dataBlockupper = outarr[:,blackCols[0].max()+1:,:].astype(np.uint8) + + dataBlocklower = xorBlock(dataBlocklower) + dataBlockupper = xorBlock(dataBlockupper) + + dataBlock = (dataBlockupper << 4) | (dataBlocklower) + dataBlock = dataBlock.flatten().tobytes() data = zlib.decompress(dataBlock) return json.loads(data,cls=EmbeddingDecoder) @@ -154,7 +196,7 @@ class EmbeddingDatabase: data = embeddingFromB64(embed_image.text['sd-ti-embedding']) name = data.get('name',name) else: - data = extractImageDataFooter(embed_image) + data = extractImageDataEmbed(embed_image) name = data.get('name',name) else: data = torch.load(path, map_location="cpu") @@ -351,7 +393,7 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, traini footer_right = '{}'.format(embedding.step) captioned_image = captionImageOverlay(image,title,footer_left,footer_mid,footer_right) - captioned_image = appendImageDataFooter(captioned_image,data) + captioned_image = insertImageDataEmbed(captioned_image,data) captioned_image.save(last_saved_image_chunks, "PNG", pnginfo=info) -- cgit v1.2.1 From 767202a4c324f9b49f63ab4dabbb5736fe9df6e5 Mon Sep 17 00:00:00 2001 From: DepFA <35278260+dfaker@users.noreply.github.com> Date: Mon, 10 Oct 2022 23:20:52 +0100 Subject: add dependency --- modules/textual_inversion/textual_inversion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'modules/textual_inversion') diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index 95eebea7..f3cacaa0 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -7,7 +7,7 @@ import tqdm import html import datetime -from PIL import Image,PngImagePlugin +from PIL import Image,PngImagePlugin,ImageDraw from ..images import captionImageOverlay import numpy as np import base64 -- cgit v1.2.1 From e0fbe6d27e7b4505766c8cb5a4264e1114cf3721 Mon Sep 17 00:00:00 2001 From: DepFA <35278260+dfaker@users.noreply.github.com> Date: Mon, 10 Oct 2022 23:26:24 +0100 Subject: colour depth conversion fix --- modules/textual_inversion/textual_inversion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'modules/textual_inversion') diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index f3cacaa0..ae807268 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -103,7 +103,7 @@ def crop_black(img,tol=0): def extractImageDataEmbed(image): d=3 - outarr = crop_black(np.array(image.getdata()).reshape(image.size[1],image.size[0],d ).astype(np.uint8) ) & 0x0F + outarr = crop_black(np.array(image.convert('RGB').getdata()).reshape(image.size[1],image.size[0],d ).astype(np.uint8) ) & 0x0F blackCols = np.where( np.sum(outarr, axis=(0,2))==0) if blackCols[0].shape[0] < 2: print('No Image data blocks found.') -- cgit v1.2.1 From bb932dbf9faf43ba918daa4791873078797b2a48 Mon Sep 17 00:00:00 2001 From: JC_Array Date: Mon, 10 Oct 2022 18:37:52 -0500 Subject: added alpha sort and threshold variables to create process method in preprocessing --- modules/textual_inversion/preprocess.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'modules/textual_inversion') diff --git a/modules/textual_inversion/preprocess.py b/modules/textual_inversion/preprocess.py index 4a2194da..c0af729b 100644 --- a/modules/textual_inversion/preprocess.py +++ b/modules/textual_inversion/preprocess.py @@ -29,7 +29,7 @@ def preprocess(process_src, process_dst, process_width, process_height, process_ shared.interrogator.load() if process_caption_deepbooru: - deepbooru.create_deepbooru_process() + deepbooru.create_deepbooru_process(opts.deepbooru_threshold, opts.deepbooru_sort_alpha) def save_pic_with_caption(image, index): if process_caption: -- cgit v1.2.1 From 7aa8fcac1e45c3ad9c6a40df0e44a346afcd5032 Mon Sep 17 00:00:00 2001 From: DepFA <35278260+dfaker@users.noreply.github.com> Date: Tue, 11 Oct 2022 04:17:36 +0100 Subject: use simple lcg in xor --- modules/textual_inversion/textual_inversion.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'modules/textual_inversion') diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index ae807268..13416a08 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -39,9 +39,15 @@ def embeddingFromB64(data): d = base64.b64decode(data) return json.loads(d,cls=EmbeddingDecoder) +def lcg(m=2**32, a=1664525, c=1013904223, seed=0): + while True: + seed = (a * seed + c) % m + yield seed + def xorBlock(block): - return np.bitwise_xor(block.astype(np.uint8), - ((np.random.RandomState(0xDEADBEEF).random(block.shape)*255).astype(np.uint8)) & 0x0F ) + g = lcg() + randblock = np.array([next(g) for _ in range(np.product(block.shape))]).astype(np.uint8).reshape(block.shape) + return np.bitwise_xor(block.astype(np.uint8),randblock & 0x0F) def styleBlock(block,sequence): im = Image.new('RGB',(block.shape[1],block.shape[0])) -- cgit v1.2.1 From b2368a3bce663f19a7209d9cb38617e635ca6e3c Mon Sep 17 00:00:00 2001 From: alg-wiki Date: Tue, 11 Oct 2022 17:32:46 +0900 Subject: Switched to exception handling --- modules/textual_inversion/dataset.py | 10 +++++----- modules/textual_inversion/preprocess.py | 8 +++++--- modules/textual_inversion/textual_inversion.py | 18 ++++++++---------- 3 files changed, 18 insertions(+), 18 deletions(-) (limited to 'modules/textual_inversion') diff --git a/modules/textual_inversion/dataset.py b/modules/textual_inversion/dataset.py index 0dc54fb7..4d006366 100644 --- a/modules/textual_inversion/dataset.py +++ b/modules/textual_inversion/dataset.py @@ -22,7 +22,6 @@ class PersonalizedBase(Dataset): self.width = width self.height = height self.flip = transforms.RandomHorizontalFlip(p=flip_p) - self.extns = [".jpg",".jpeg",".png",".webp",".bmp"] self.dataset = [] @@ -33,12 +32,13 @@ class PersonalizedBase(Dataset): assert data_root, 'dataset directory not specified' - self.image_paths = [os.path.join(data_root, file_path) for file_path in os.listdir(data_root) if os.path.splitext(file_path.casefold())[1] in self.extns] + self.image_paths = [os.path.join(data_root, file_path) for file_path in os.listdir(data_root)] print("Preparing dataset...") for path in tqdm.tqdm(self.image_paths): - image = Image.open(path) - image = image.convert('RGB') - image = image.resize((self.width, self.height), PIL.Image.BICUBIC) + try: + image = Image.open(path).convert('RGB').resize((self.width, self.height), PIL.Image.BICUBIC) + except Exception: + continue filename = os.path.basename(path) filename_tokens = os.path.splitext(filename)[0] diff --git a/modules/textual_inversion/preprocess.py b/modules/textual_inversion/preprocess.py index 8290abe8..1a672725 100644 --- a/modules/textual_inversion/preprocess.py +++ b/modules/textual_inversion/preprocess.py @@ -12,13 +12,12 @@ def preprocess(process_src, process_dst, process_width, process_height, process_ height = process_height src = os.path.abspath(process_src) dst = os.path.abspath(process_dst) - extns = [".jpg",".jpeg",".png",".webp",".bmp"] assert src != dst, 'same directory specified as source and destination' os.makedirs(dst, exist_ok=True) - files = [i for i in os.listdir(src) if os.path.splitext(i.casefold())[1] in extns] + files = os.listdir(src) shared.state.textinfo = "Preprocessing..." shared.state.job_count = len(files) @@ -47,7 +46,10 @@ def preprocess(process_src, process_dst, process_width, process_height, process_ for index, imagefile in enumerate(tqdm.tqdm(files)): subindex = [0] filename = os.path.join(src, imagefile) - img = Image.open(filename).convert("RGB") + try: + img = Image.open(filename).convert("RGB") + except Exception: + continue if shared.state.interrupted: break diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index 33c923d1..91cde04b 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -161,7 +161,6 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, traini shared.state.textinfo = "Initializing textual inversion training..." shared.state.job_count = steps - extns = [".jpg",".jpeg",".png",".webp",".bmp"] filename = os.path.join(shared.cmd_opts.embeddings_dir, f'{embedding_name}.pt') @@ -201,10 +200,6 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, traini if ititial_step > steps: return embedding, filename - tr_img_len = len([os.path.join(data_root, file_path) for file_path in os.listdir(data_root) if os.path.splitext(file_path.casefold())[1] in extns]) - - epoch_len = (tr_img_len * num_repeats) + tr_img_len - pbar = tqdm.tqdm(enumerate(ds), total=steps-ititial_step) for i, (x, text) in pbar: embedding.step = i + ititial_step @@ -228,10 +223,10 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, traini loss.backward() optimizer.step() - epoch_num = embedding.step // epoch_len - epoch_step = embedding.step - (epoch_num * epoch_len) + 1 + epoch_num = embedding.step // len(ds) + epoch_step = embedding.step - (epoch_num * len(ds)) + 1 - pbar.set_description(f"[Epoch {epoch_num}: {epoch_step}/{epoch_len}]loss: {losses.mean():.7f}") + pbar.set_description(f"[Epoch {epoch_num}: {epoch_step}/{len(ds)}]loss: {losses.mean():.7f}") if embedding.step > 0 and embedding_dir is not None and embedding.step % save_embedding_every == 0: last_saved_file = os.path.join(embedding_dir, f'{embedding_name}-{embedding.step}.pt') @@ -243,9 +238,12 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, traini p = processing.StableDiffusionProcessingTxt2Img( sd_model=shared.sd_model, prompt=text, - steps=20, - height=training_height, + steps=28, + height=768, width=training_width, + negative_prompt="lowres, bad anatomy, bad hands, text, error, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality, normal quality, jpeg artifacts,signature, watermark, username, blurry, artist name", + cfg_scale=7.0, + sampler_index=0, do_not_save_grid=True, do_not_save_samples=True, ) -- cgit v1.2.1 From 8bacbca0a1ab9aabcb0ad0cbf070e0006991e98a Mon Sep 17 00:00:00 2001 From: alg-wiki Date: Tue, 11 Oct 2022 17:35:09 +0900 Subject: Removed my local edits to checkpoint image generation --- modules/textual_inversion/textual_inversion.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'modules/textual_inversion') diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index 91cde04b..e9ff80c2 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -238,12 +238,9 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, traini p = processing.StableDiffusionProcessingTxt2Img( sd_model=shared.sd_model, prompt=text, - steps=28, - height=768, + steps=20, + height=training_height, width=training_width, - negative_prompt="lowres, bad anatomy, bad hands, text, error, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality, normal quality, jpeg artifacts,signature, watermark, username, blurry, artist name", - cfg_scale=7.0, - sampler_index=0, do_not_save_grid=True, do_not_save_samples=True, ) -- cgit v1.2.1 From 530103b586109c11fd068eb70ef09503ec6a4caf Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Tue, 11 Oct 2022 14:53:02 +0300 Subject: fixes related to merge --- modules/textual_inversion/textual_inversion.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'modules/textual_inversion') diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index 5965c5a0..d6977950 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -156,7 +156,7 @@ def create_embedding(name, num_vectors_per_token, init_text='*'): return fn -def train_embedding(embedding_name, learn_rate, data_root, log_directory, training_width, training_height, steps, num_repeats, create_image_every, save_embedding_every, template_file): +def train_embedding(embedding_name, learn_rate, data_root, log_directory, training_width, training_height, steps, num_repeats, create_image_every, save_embedding_every, template_file, preview_image_prompt): assert embedding_name, 'embedding not selected' shared.state.textinfo = "Initializing textual inversion training..." @@ -238,12 +238,14 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, traini if embedding.step > 0 and images_dir is not None and embedding.step % create_image_every == 0: last_saved_image = os.path.join(images_dir, f'{embedding_name}-{embedding.step}.png') + preview_text = text if preview_image_prompt == "" else preview_image_prompt + p = processing.StableDiffusionProcessingTxt2Img( sd_model=shared.sd_model, - prompt=text, + prompt=preview_text, steps=20, - height=training_height, - width=training_width, + height=training_height, + width=training_width, do_not_save_grid=True, do_not_save_samples=True, ) @@ -254,7 +256,7 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, traini shared.state.current_image = image image.save(last_saved_image) - last_saved_image += f", prompt: {text}" + last_saved_image += f", prompt: {preview_text}" shared.state.job_no = embedding.step -- cgit v1.2.1 From 6d09b8d1df3a96e1380bb1650f5961781630af96 Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Tue, 11 Oct 2022 18:33:57 +0300 Subject: produce error when training with medvram/lowvram enabled --- modules/textual_inversion/ui.py | 3 +++ 1 file changed, 3 insertions(+) (limited to 'modules/textual_inversion') diff --git a/modules/textual_inversion/ui.py b/modules/textual_inversion/ui.py index c57de1f9..70f47343 100644 --- a/modules/textual_inversion/ui.py +++ b/modules/textual_inversion/ui.py @@ -22,6 +22,9 @@ def preprocess(*args): def train_embedding(*args): + + assert not shared.cmd_opts.lowvram and not shared.cmd_opts.medvram, 'Training models with lowvram or medvram is not possible' + try: sd_hijack.undo_optimizations() -- cgit v1.2.1 From d4ea5f4d8631f778d11efcde397e4a5b8801d43b Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Tue, 11 Oct 2022 19:03:08 +0300 Subject: add an option to unload models during hypernetwork training to save VRAM --- modules/textual_inversion/dataset.py | 29 ++++++++++++++++++-------- modules/textual_inversion/textual_inversion.py | 2 +- 2 files changed, 21 insertions(+), 10 deletions(-) (limited to 'modules/textual_inversion') diff --git a/modules/textual_inversion/dataset.py b/modules/textual_inversion/dataset.py index 4d006366..f61f40d3 100644 --- a/modules/textual_inversion/dataset.py +++ b/modules/textual_inversion/dataset.py @@ -8,14 +8,14 @@ from torchvision import transforms import random import tqdm -from modules import devices +from modules import devices, shared import re re_tag = re.compile(r"[a-zA-Z][_\w\d()]+") class PersonalizedBase(Dataset): - def __init__(self, data_root, width, height, repeats, flip_p=0.5, placeholder_token="*", model=None, device=None, template_file=None): + def __init__(self, data_root, width, height, repeats, flip_p=0.5, placeholder_token="*", model=None, device=None, template_file=None, include_cond=False): self.placeholder_token = placeholder_token @@ -32,6 +32,8 @@ class PersonalizedBase(Dataset): assert data_root, 'dataset directory not specified' + cond_model = shared.sd_model.cond_stage_model + self.image_paths = [os.path.join(data_root, file_path) for file_path in os.listdir(data_root)] print("Preparing dataset...") for path in tqdm.tqdm(self.image_paths): @@ -53,7 +55,13 @@ class PersonalizedBase(Dataset): init_latent = model.get_first_stage_encoding(model.encode_first_stage(torchdata.unsqueeze(dim=0))).squeeze() init_latent = init_latent.to(devices.cpu) - self.dataset.append((init_latent, filename_tokens)) + if include_cond: + text = self.create_text(filename_tokens) + cond = cond_model([text]).to(devices.cpu) + else: + cond = None + + self.dataset.append((init_latent, filename_tokens, cond)) self.length = len(self.dataset) * repeats @@ -64,6 +72,12 @@ class PersonalizedBase(Dataset): def shuffle(self): self.indexes = self.initial_indexes[torch.randperm(self.initial_indexes.shape[0])] + def create_text(self, filename_tokens): + text = random.choice(self.lines) + text = text.replace("[name]", self.placeholder_token) + text = text.replace("[filewords]", ' '.join(filename_tokens)) + return text + def __len__(self): return self.length @@ -72,10 +86,7 @@ class PersonalizedBase(Dataset): self.shuffle() index = self.indexes[i % len(self.indexes)] - x, filename_tokens = self.dataset[index] - - text = random.choice(self.lines) - text = text.replace("[name]", self.placeholder_token) - text = text.replace("[filewords]", ' '.join(filename_tokens)) + x, filename_tokens, cond = self.dataset[index] - return x, text + text = self.create_text(filename_tokens) + return x, text, cond diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index bb05cdc6..35f4bd9e 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -201,7 +201,7 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, traini return embedding, filename pbar = tqdm.tqdm(enumerate(ds), total=steps-ititial_step) - for i, (x, text) in pbar: + for i, (x, text, _) in pbar: embedding.step = i + ititial_step if embedding.step > steps: -- cgit v1.2.1 From c080f52ceae73b893155eff7de577aaf1a982a2f Mon Sep 17 00:00:00 2001 From: DepFA <35278260+dfaker@users.noreply.github.com> Date: Tue, 11 Oct 2022 19:37:58 +0100 Subject: move embedding logic to separate file --- modules/textual_inversion/image_embedding.py | 234 +++++++++++++++++++++++++++ 1 file changed, 234 insertions(+) create mode 100644 modules/textual_inversion/image_embedding.py (limited to 'modules/textual_inversion') diff --git a/modules/textual_inversion/image_embedding.py b/modules/textual_inversion/image_embedding.py new file mode 100644 index 00000000..6ad39602 --- /dev/null +++ b/modules/textual_inversion/image_embedding.py @@ -0,0 +1,234 @@ +import base64 +import json +import numpy as np +import zlib +from PIL import Image,PngImagePlugin,ImageDraw,ImageFont +from fonts.ttf import Roboto +import torch + +class EmbeddingEncoder(json.JSONEncoder): + def default(self, obj): + if isinstance(obj, torch.Tensor): + return {'TORCHTENSOR':obj.cpu().detach().numpy().tolist()} + return json.JSONEncoder.default(self, obj) + +class EmbeddingDecoder(json.JSONDecoder): + def __init__(self, *args, **kwargs): + json.JSONDecoder.__init__(self, object_hook=self.object_hook, *args, **kwargs) + def object_hook(self, d): + if 'TORCHTENSOR' in d: + return torch.from_numpy(np.array(d['TORCHTENSOR'])) + return d + +def embedding_to_b64(data): + d = json.dumps(data,cls=EmbeddingEncoder) + return base64.b64encode(d.encode()) + +def embedding_from_b64(data): + d = base64.b64decode(data) + return json.loads(d,cls=EmbeddingDecoder) + +def lcg(m=2**32, a=1664525, c=1013904223, seed=0): + while True: + seed = (a * seed + c) % m + yield seed%255 + +def xor_block(block): + g = lcg() + randblock = np.array([next(g) for _ in range(np.product(block.shape))]).astype(np.uint8).reshape(block.shape) + return np.bitwise_xor(block.astype(np.uint8),randblock & 0x0F) + +def style_block(block,sequence): + im = Image.new('RGB',(block.shape[1],block.shape[0])) + draw = ImageDraw.Draw(im) + i=0 + for x in range(-6,im.size[0],8): + for yi,y in enumerate(range(-6,im.size[1],8)): + offset=0 + if yi%2==0: + offset=4 + shade = sequence[i%len(sequence)] + i+=1 + draw.ellipse((x+offset, y, x+6+offset, y+6), fill =(shade,shade,shade) ) + + fg = np.array(im).astype(np.uint8) & 0xF0 + + return block ^ fg + +def insert_image_data_embed(image,data): + d = 3 + data_compressed = zlib.compress( json.dumps(data,cls=EmbeddingEncoder).encode(),level=9) + data_np_ = np.frombuffer(data_compressed,np.uint8).copy() + data_np_high = data_np_ >> 4 + data_np_low = data_np_ & 0x0F + + h = image.size[1] + next_size = data_np_low.shape[0] + (h-(data_np_low.shape[0]%h)) + next_size = next_size + ((h*d)-(next_size%(h*d))) + + data_np_low.resize(next_size) + data_np_low = data_np_low.reshape((h,-1,d)) + + data_np_high.resize(next_size) + data_np_high = data_np_high.reshape((h,-1,d)) + + edge_style = list(data['string_to_param'].values())[0].cpu().detach().numpy().tolist()[0][:1024] + edge_style = (np.abs(edge_style)/np.max(np.abs(edge_style))*255).astype(np.uint8) + + data_np_low = style_block(data_np_low,sequence=edge_style) + data_np_low = xor_block(data_np_low) + data_np_high = style_block(data_np_high,sequence=edge_style[::-1]) + data_np_high = xor_block(data_np_high) + + im_low = Image.fromarray(data_np_low,mode='RGB') + im_high = Image.fromarray(data_np_high,mode='RGB') + + background = Image.new('RGB',(image.size[0]+im_low.size[0]+im_high.size[0]+2,image.size[1]),(0,0,0)) + background.paste(im_low,(0,0)) + background.paste(image,(im_low.size[0]+1,0)) + background.paste(im_high,(im_low.size[0]+1+image.size[0]+1,0)) + + return background + +def crop_black(img,tol=0): + mask = (img>tol).all(2) + mask0,mask1 = mask.any(0),mask.any(1) + col_start,col_end = mask0.argmax(),mask.shape[1]-mask0[::-1].argmax() + row_start,row_end = mask1.argmax(),mask.shape[0]-mask1[::-1].argmax() + return img[row_start:row_end,col_start:col_end] + +def extract_image_data_embed(image): + d=3 + outarr = crop_black(np.array(image.convert('RGB').getdata()).reshape(image.size[1],image.size[0],d ).astype(np.uint8) ) & 0x0F + black_cols = np.where( np.sum(outarr, axis=(0,2))==0) + if black_cols[0].shape[0] < 2: + print('No Image data blocks found.') + return None + + data_block_lower = outarr[:,:black_cols[0].min(),:].astype(np.uint8) + data_block_upper = outarr[:,black_cols[0].max()+1:,:].astype(np.uint8) + + data_block_lower = xor_block(data_block_lower) + data_block_upper = xor_block(data_block_upper) + + data_block = (data_block_upper << 4) | (data_block_lower) + data_block = data_block.flatten().tobytes() + + data = zlib.decompress(data_block) + return json.loads(data,cls=EmbeddingDecoder) + +def addCaptionLines(lines,image,initialx,textfont): + draw = ImageDraw.Draw(image) + hstart =initialx + for fill,line in lines: + fontsize = 32 + font = ImageFont.truetype(textfont, fontsize) + _,_,w, h = draw.textbbox((0,0),line,font=font) + fontsize = min( int(fontsize * ((image.size[0]-35)/w) ), 28) + font = ImageFont.truetype(textfont, fontsize) + _,_,w,h = draw.textbbox((0,0),line,font=font) + draw.text(((image.size[0]-w)/2,hstart), line, font=font, fill=fill) + hstart += h + return hstart + +def caption_image(image,prelines,postlines,background=(51, 51, 51),font=None): + if font is None: + try: + font = ImageFont.truetype(opts.font or Roboto, fontsize) + font = opts.font or Roboto + except Exception: + font = Roboto + + sample_image = image + background = Image.new("RGBA", (sample_image.size[0],sample_image.size[1]+1024), background) + hoffset = addCaptionLines(prelines,background,5,font)+16 + background.paste(sample_image,(0,hoffset)) + hoffset = hoffset+sample_image.size[1]+8 + hoffset = addCaptionLines(postlines,background,hoffset,font) + background = background.crop((0,0,sample_image.size[0],hoffset+8)) + return background + +def caption_image_overlay(srcimage,title,footerLeft,footerMid,footerRight,textfont=None): + from math import cos + + image = srcimage.copy() + + if textfont is None: + try: + textfont = ImageFont.truetype(opts.font or Roboto, fontsize) + textfont = opts.font or Roboto + except Exception: + textfont = Roboto + + factor = 1.5 + gradient = Image.new('RGBA', (1,image.size[1]), color=(0,0,0,0)) + for y in range(image.size[1]): + mag = 1-cos(y/image.size[1]*factor) + mag = max(mag,1-cos((image.size[1]-y)/image.size[1]*factor*1.1)) + gradient.putpixel((0, y), (0,0,0,int(mag*255))) + image = Image.alpha_composite(image.convert('RGBA'), gradient.resize(image.size)) + + draw = ImageDraw.Draw(image) + fontsize = 32 + font = ImageFont.truetype(textfont, fontsize) + padding = 10 + + _,_,w, h = draw.textbbox((0,0),title,font=font) + fontsize = min( int(fontsize * (((image.size[0]*0.75)-(padding*4))/w) ), 72) + font = ImageFont.truetype(textfont, fontsize) + _,_,w,h = draw.textbbox((0,0),title,font=font) + draw.text((padding,padding), title, anchor='lt', font=font, fill=(255,255,255,230)) + + _,_,w, h = draw.textbbox((0,0),footerLeft,font=font) + fontsize_left = min( int(fontsize * (((image.size[0]/3)-(padding))/w) ), 72) + _,_,w, h = draw.textbbox((0,0),footerMid,font=font) + fontsize_mid = min( int(fontsize * (((image.size[0]/3)-(padding))/w) ), 72) + _,_,w, h = draw.textbbox((0,0),footerRight,font=font) + fontsize_right = min( int(fontsize * (((image.size[0]/3)-(padding))/w) ), 72) + + font = ImageFont.truetype(textfont, min(fontsize_left,fontsize_mid,fontsize_right)) + + draw.text((padding,image.size[1]-padding), footerLeft, anchor='ls', font=font, fill=(255,255,255,230)) + draw.text((image.size[0]/2,image.size[1]-padding), footerMid, anchor='ms', font=font, fill=(255,255,255,230)) + draw.text((image.size[0]-padding,image.size[1]-padding), footerRight, anchor='rs', font=font, fill=(255,255,255,230)) + + return image + +if __name__ == '__main__': + + image = Image.new('RGBA',(512,512),(255,255,200,255)) + caption_image(image,[((255,255,255),'line a'),((255,255,255),'line b')], + [((255,255,255),'line c'),((255,255,255),'line d')]) + + image = Image.new('RGBA',(512,512),(255,255,200,255)) + cap_image = caption_image_overlay(image, 'title', 'footerLeft', 'footerMid', 'footerRight') + + test_embed = {'string_to_param':{'*':torch.from_numpy(np.random.random((2, 4096)))}} + + embedded_image = insert_image_data_embed(cap_image, test_embed) + + retrived_embed = extract_image_data_embed(embedded_image) + + assert str(retrived_embed) == str(test_embed) + + embedded_image2 = insert_image_data_embed(cap_image, retrived_embed) + + assert embedded_image == embedded_image2 + + g = lcg() + shared_random = np.array([next(g) for _ in range(100)]).astype(np.uint8).tolist() + + reference_random = [253, 242, 127, 44, 157, 27, 239, 133, 38, 79, 167, 4, 177, + 95, 130, 79, 78, 14, 52, 215, 220, 194, 126, 28, 240, 179, + 160, 153, 149, 50, 105, 14, 21, 218, 199, 18, 54, 198, 193, + 38, 128, 19, 53, 195, 124, 75, 205, 12, 6, 145, 0, 28, + 30, 148, 8, 45, 218, 171, 55, 249, 97, 166, 12, 35, 0, + 41, 221, 122, 215, 170, 31, 113, 186, 97, 119, 31, 23, 185, + 66, 140, 30, 41, 37, 63, 137, 109, 216, 55, 159, 145, 82, + 204, 86, 73, 222, 44, 198, 118, 240, 97] + + assert shared_random == reference_random + + hunna_kay_random_sum = sum(np.array([next(g) for _ in range(100000)]).astype(np.uint8).tolist()) + + assert 12731374 == hunna_kay_random_sum \ No newline at end of file -- cgit v1.2.1 From 61788c0538415fa9ca1dd1b306519c116b18bd2c Mon Sep 17 00:00:00 2001 From: DepFA <35278260+dfaker@users.noreply.github.com> Date: Tue, 11 Oct 2022 19:50:50 +0100 Subject: shift embedding logic out of textual_inversion --- modules/textual_inversion/textual_inversion.py | 125 ++----------------------- 1 file changed, 6 insertions(+), 119 deletions(-) (limited to 'modules/textual_inversion') diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index 8c66aeb5..22b4ae7f 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -7,124 +7,11 @@ import tqdm import html import datetime -from PIL import Image,PngImagePlugin,ImageDraw -from ..images import captionImageOverlay -import numpy as np -import base64 -import json -import zlib +from PIL import Image,PngImagePlugin from modules import shared, devices, sd_hijack, processing, sd_models import modules.textual_inversion.dataset -class EmbeddingEncoder(json.JSONEncoder): - def default(self, obj): - if isinstance(obj, torch.Tensor): - return {'TORCHTENSOR':obj.cpu().detach().numpy().tolist()} - return json.JSONEncoder.default(self, obj) - -class EmbeddingDecoder(json.JSONDecoder): - def __init__(self, *args, **kwargs): - json.JSONDecoder.__init__(self, object_hook=self.object_hook, *args, **kwargs) - def object_hook(self, d): - if 'TORCHTENSOR' in d: - return torch.from_numpy(np.array(d['TORCHTENSOR'])) - return d - -def embeddingToB64(data): - d = json.dumps(data,cls=EmbeddingEncoder) - return base64.b64encode(d.encode()) - -def embeddingFromB64(data): - d = base64.b64decode(data) - return json.loads(d,cls=EmbeddingDecoder) - -def lcg(m=2**32, a=1664525, c=1013904223, seed=0): - while True: - seed = (a * seed + c) % m - yield seed - -def xorBlock(block): - g = lcg() - randblock = np.array([next(g) for _ in range(np.product(block.shape))]).astype(np.uint8).reshape(block.shape) - return np.bitwise_xor(block.astype(np.uint8),randblock & 0x0F) - -def styleBlock(block,sequence): - im = Image.new('RGB',(block.shape[1],block.shape[0])) - draw = ImageDraw.Draw(im) - i=0 - for x in range(-6,im.size[0],8): - for yi,y in enumerate(range(-6,im.size[1],8)): - offset=0 - if yi%2==0: - offset=4 - shade = sequence[i%len(sequence)] - i+=1 - draw.ellipse((x+offset, y, x+6+offset, y+6), fill =(shade,shade,shade) ) - - fg = np.array(im).astype(np.uint8) & 0xF0 - return block ^ fg - -def insertImageDataEmbed(image,data): - d = 3 - data_compressed = zlib.compress( json.dumps(data,cls=EmbeddingEncoder).encode(),level=9) - dnp = np.frombuffer(data_compressed,np.uint8).copy() - dnphigh = dnp >> 4 - dnplow = dnp & 0x0F - - h = image.size[1] - next_size = dnplow.shape[0] + (h-(dnplow.shape[0]%h)) - next_size = next_size + ((h*d)-(next_size%(h*d))) - - dnplow.resize(next_size) - dnplow = dnplow.reshape((h,-1,d)) - - dnphigh.resize(next_size) - dnphigh = dnphigh.reshape((h,-1,d)) - - edgeStyleWeights = list(data['string_to_param'].values())[0].cpu().detach().numpy().tolist()[0][:1024] - edgeStyleWeights = (np.abs(edgeStyleWeights)/np.max(np.abs(edgeStyleWeights))*255).astype(np.uint8) - - dnplow = styleBlock(dnplow,sequence=edgeStyleWeights) - dnplow = xorBlock(dnplow) - dnphigh = styleBlock(dnphigh,sequence=edgeStyleWeights[::-1]) - dnphigh = xorBlock(dnphigh) - - imlow = Image.fromarray(dnplow,mode='RGB') - imhigh = Image.fromarray(dnphigh,mode='RGB') - - background = Image.new('RGB',(image.size[0]+imlow.size[0]+imhigh.size[0]+2,image.size[1]),(0,0,0)) - background.paste(imlow,(0,0)) - background.paste(image,(imlow.size[0]+1,0)) - background.paste(imhigh,(imlow.size[0]+1+image.size[0]+1,0)) - - return background - -def crop_black(img,tol=0): - mask = (img>tol).all(2) - mask0,mask1 = mask.any(0),mask.any(1) - col_start,col_end = mask0.argmax(),mask.shape[1]-mask0[::-1].argmax() - row_start,row_end = mask1.argmax(),mask.shape[0]-mask1[::-1].argmax() - return img[row_start:row_end,col_start:col_end] - -def extractImageDataEmbed(image): - d=3 - outarr = crop_black(np.array(image.convert('RGB').getdata()).reshape(image.size[1],image.size[0],d ).astype(np.uint8) ) & 0x0F - blackCols = np.where( np.sum(outarr, axis=(0,2))==0) - if blackCols[0].shape[0] < 2: - print('No Image data blocks found.') - return None - - dataBlocklower = outarr[:,:blackCols[0].min(),:].astype(np.uint8) - dataBlockupper = outarr[:,blackCols[0].max()+1:,:].astype(np.uint8) - - dataBlocklower = xorBlock(dataBlocklower) - dataBlockupper = xorBlock(dataBlockupper) - - dataBlock = (dataBlockupper << 4) | (dataBlocklower) - dataBlock = dataBlock.flatten().tobytes() - data = zlib.decompress(dataBlock) - return json.loads(data,cls=EmbeddingDecoder) class Embedding: def __init__(self, vec, name, step=None): @@ -199,10 +86,10 @@ class EmbeddingDatabase: if filename.upper().endswith('.PNG'): embed_image = Image.open(path) if 'sd-ti-embedding' in embed_image.text: - data = embeddingFromB64(embed_image.text['sd-ti-embedding']) + data = embedding_from_b64(embed_image.text['sd-ti-embedding']) name = data.get('name',name) else: - data = extractImageDataEmbed(embed_image) + data = extract_image_data_embed(embed_image) name = data.get('name',name) else: data = torch.load(path, map_location="cpu") @@ -393,7 +280,7 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, traini info = PngImagePlugin.PngInfo() data = torch.load(last_saved_file) - info.add_text("sd-ti-embedding", embeddingToB64(data)) + info.add_text("sd-ti-embedding", embedding_to_b64(data)) title = "<{}>".format(data.get('name','???')) checkpoint = sd_models.select_checkpoint() @@ -401,8 +288,8 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, traini footer_mid = '[{}]'.format(checkpoint.hash) footer_right = '{}'.format(embedding.step) - captioned_image = captionImageOverlay(image,title,footer_left,footer_mid,footer_right) - captioned_image = insertImageDataEmbed(captioned_image,data) + captioned_image = caption_image_overlay(image,title,footer_left,footer_mid,footer_right) + captioned_image = insert_image_data_embed(captioned_image,data) captioned_image.save(last_saved_image_chunks, "PNG", pnginfo=info) -- cgit v1.2.1 From db71290d2659d3b58ff9b57a82e4721a9eab9229 Mon Sep 17 00:00:00 2001 From: DepFA <35278260+dfaker@users.noreply.github.com> Date: Tue, 11 Oct 2022 19:55:54 +0100 Subject: remove old caption method --- modules/textual_inversion/image_embedding.py | 39 ++-------------------------- 1 file changed, 2 insertions(+), 37 deletions(-) (limited to 'modules/textual_inversion') diff --git a/modules/textual_inversion/image_embedding.py b/modules/textual_inversion/image_embedding.py index 6ad39602..c67028a5 100644 --- a/modules/textual_inversion/image_embedding.py +++ b/modules/textual_inversion/image_embedding.py @@ -117,37 +117,6 @@ def extract_image_data_embed(image): data = zlib.decompress(data_block) return json.loads(data,cls=EmbeddingDecoder) -def addCaptionLines(lines,image,initialx,textfont): - draw = ImageDraw.Draw(image) - hstart =initialx - for fill,line in lines: - fontsize = 32 - font = ImageFont.truetype(textfont, fontsize) - _,_,w, h = draw.textbbox((0,0),line,font=font) - fontsize = min( int(fontsize * ((image.size[0]-35)/w) ), 28) - font = ImageFont.truetype(textfont, fontsize) - _,_,w,h = draw.textbbox((0,0),line,font=font) - draw.text(((image.size[0]-w)/2,hstart), line, font=font, fill=fill) - hstart += h - return hstart - -def caption_image(image,prelines,postlines,background=(51, 51, 51),font=None): - if font is None: - try: - font = ImageFont.truetype(opts.font or Roboto, fontsize) - font = opts.font or Roboto - except Exception: - font = Roboto - - sample_image = image - background = Image.new("RGBA", (sample_image.size[0],sample_image.size[1]+1024), background) - hoffset = addCaptionLines(prelines,background,5,font)+16 - background.paste(sample_image,(0,hoffset)) - hoffset = hoffset+sample_image.size[1]+8 - hoffset = addCaptionLines(postlines,background,hoffset,font) - background = background.crop((0,0,sample_image.size[0],hoffset+8)) - return background - def caption_image_overlay(srcimage,title,footerLeft,footerMid,footerRight,textfont=None): from math import cos @@ -195,11 +164,7 @@ def caption_image_overlay(srcimage,title,footerLeft,footerMid,footerRight,textfo return image if __name__ == '__main__': - - image = Image.new('RGBA',(512,512),(255,255,200,255)) - caption_image(image,[((255,255,255),'line a'),((255,255,255),'line b')], - [((255,255,255),'line c'),((255,255,255),'line d')]) - + image = Image.new('RGBA',(512,512),(255,255,200,255)) cap_image = caption_image_overlay(image, 'title', 'footerLeft', 'footerMid', 'footerRight') @@ -231,4 +196,4 @@ if __name__ == '__main__': hunna_kay_random_sum = sum(np.array([next(g) for _ in range(100000)]).astype(np.uint8).tolist()) - assert 12731374 == hunna_kay_random_sum \ No newline at end of file + assert 12731374 == hunna_kay_random_sum -- cgit v1.2.1 From d6fcc6b87bc00fcdecea276fe5b7c7945f7a8b14 Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Tue, 11 Oct 2022 22:03:05 +0300 Subject: apply lr schedule to hypernets --- modules/textual_inversion/learn_schedule.py | 34 ++++++++++++++++++++ modules/textual_inversion/textual_inversion.py | 44 +++----------------------- 2 files changed, 38 insertions(+), 40 deletions(-) create mode 100644 modules/textual_inversion/learn_schedule.py (limited to 'modules/textual_inversion') diff --git a/modules/textual_inversion/learn_schedule.py b/modules/textual_inversion/learn_schedule.py new file mode 100644 index 00000000..db720271 --- /dev/null +++ b/modules/textual_inversion/learn_schedule.py @@ -0,0 +1,34 @@ + +class LearnSchedule: + def __init__(self, learn_rate, max_steps, cur_step=0): + pairs = learn_rate.split(',') + self.rates = [] + self.it = 0 + self.maxit = 0 + for i, pair in enumerate(pairs): + tmp = pair.split(':') + if len(tmp) == 2: + step = int(tmp[1]) + if step > cur_step: + self.rates.append((float(tmp[0]), min(step, max_steps))) + self.maxit += 1 + if step > max_steps: + return + elif step == -1: + self.rates.append((float(tmp[0]), max_steps)) + self.maxit += 1 + return + else: + self.rates.append((float(tmp[0]), max_steps)) + self.maxit += 1 + return + + def __iter__(self): + return self + + def __next__(self): + if self.it < self.maxit: + self.it += 1 + return self.rates[self.it - 1] + else: + raise StopIteration diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index 47a27faf..7717837d 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -10,6 +10,7 @@ import datetime from modules import shared, devices, sd_hijack, processing, sd_models import modules.textual_inversion.dataset +from modules.textual_inversion.learn_schedule import LearnSchedule class Embedding: @@ -198,11 +199,8 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, traini if ititial_step > steps: return embedding, filename - tr_img_len = len([os.path.join(data_root, file_path) for file_path in os.listdir(data_root)]) - epoch_len = (tr_img_len * num_repeats) + tr_img_len - - scheduleIter = iter(LearnSchedule(learn_rate, steps, ititial_step)) - (learn_rate, end_step) = next(scheduleIter) + schedules = iter(LearnSchedule(learn_rate, steps, ititial_step)) + (learn_rate, end_step) = next(schedules) print(f'Training at rate of {learn_rate} until step {end_step}') optimizer = torch.optim.AdamW([embedding.vec], lr=learn_rate) @@ -213,7 +211,7 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, traini if embedding.step > end_step: try: - (learn_rate, end_step) = next(scheduleIter) + (learn_rate, end_step) = next(schedules) except: break tqdm.tqdm.write(f'Training at rate of {learn_rate} until step {end_step}') @@ -288,37 +286,3 @@ Last saved image: {html.escape(last_saved_image)}
embedding.save(filename) return embedding, filename - -class LearnSchedule: - def __init__(self, learn_rate, max_steps, cur_step=0): - pairs = learn_rate.split(',') - self.rates = [] - self.it = 0 - self.maxit = 0 - for i, pair in enumerate(pairs): - tmp = pair.split(':') - if len(tmp) == 2: - step = int(tmp[1]) - if step > cur_step: - self.rates.append((float(tmp[0]), min(step, max_steps))) - self.maxit += 1 - if step > max_steps: - return - elif step == -1: - self.rates.append((float(tmp[0]), max_steps)) - self.maxit += 1 - return - else: - self.rates.append((float(tmp[0]), max_steps)) - self.maxit += 1 - return - - def __iter__(self): - return self - - def __next__(self): - if self.it < self.maxit: - self.it += 1 - return self.rates[self.it - 1] - else: - raise StopIteration -- cgit v1.2.1 From aa75d5cfe8c84768b0f5d16f977ddba298677379 Mon Sep 17 00:00:00 2001 From: DepFA <35278260+dfaker@users.noreply.github.com> Date: Tue, 11 Oct 2022 20:06:13 +0100 Subject: correct conflict resolution typo --- modules/textual_inversion/textual_inversion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'modules/textual_inversion') diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index 22b4ae7f..789383ce 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -169,7 +169,7 @@ def create_embedding(name, num_vectors_per_token, init_text='*'): -def train_embedding(embedding_name, learn_rate, data_root, log_directory, training_width, training_height, steps, num_repeats, create_image_every, save_embedding_every, template_file, save_image_with_stored_embedding, preview_image_prompt) +def train_embedding(embedding_name, learn_rate, data_root, log_directory, training_width, training_height, steps, num_repeats, create_image_every, save_embedding_every, template_file, save_image_with_stored_embedding, preview_image_prompt): assert embedding_name, 'embedding not selected' shared.state.textinfo = "Initializing textual inversion training..." -- cgit v1.2.1 From 91d7ee0d097a7ea203d261b570cd2b834837d9e2 Mon Sep 17 00:00:00 2001 From: DepFA <35278260+dfaker@users.noreply.github.com> Date: Tue, 11 Oct 2022 20:09:10 +0100 Subject: update imports --- modules/textual_inversion/textual_inversion.py | 3 +++ 1 file changed, 3 insertions(+) (limited to 'modules/textual_inversion') diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index 789383ce..ff0a62b3 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -12,6 +12,9 @@ from PIL import Image,PngImagePlugin from modules import shared, devices, sd_hijack, processing, sd_models import modules.textual_inversion.dataset +from modules.textual_inversion.image_embedding import( embedding_to_b64,embedding_from_b64, + insert_image_data_embed,extract_image_data_embed, + caption_image_overlay ) class Embedding: def __init__(self, vec, name, step=None): -- cgit v1.2.1 From 5f3317376bb7952bc5145f05f16c1bbd466efc85 Mon Sep 17 00:00:00 2001 From: DepFA <35278260+dfaker@users.noreply.github.com> Date: Tue, 11 Oct 2022 20:09:49 +0100 Subject: spacing --- modules/textual_inversion/textual_inversion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'modules/textual_inversion') diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index ff0a62b3..485ef46c 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -12,7 +12,7 @@ from PIL import Image,PngImagePlugin from modules import shared, devices, sd_hijack, processing, sd_models import modules.textual_inversion.dataset -from modules.textual_inversion.image_embedding import( embedding_to_b64,embedding_from_b64, +from modules.textual_inversion.image_embedding import (embedding_to_b64,embedding_from_b64, insert_image_data_embed,extract_image_data_embed, caption_image_overlay ) -- cgit v1.2.1 From 7e6a6e00ad6f3b7ef43c8120db9ecac6e8d6bea5 Mon Sep 17 00:00:00 2001 From: DepFA <35278260+dfaker@users.noreply.github.com> Date: Tue, 11 Oct 2022 20:20:46 +0100 Subject: Add files via upload --- modules/textual_inversion/test_embedding.png | Bin 0 -> 489220 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 modules/textual_inversion/test_embedding.png (limited to 'modules/textual_inversion') diff --git a/modules/textual_inversion/test_embedding.png b/modules/textual_inversion/test_embedding.png new file mode 100644 index 00000000..07e2d9af Binary files /dev/null and b/modules/textual_inversion/test_embedding.png differ -- cgit v1.2.1 From 66ec505975aaa305a217fc27281ce368cbaef281 Mon Sep 17 00:00:00 2001 From: DepFA <35278260+dfaker@users.noreply.github.com> Date: Tue, 11 Oct 2022 20:21:30 +0100 Subject: add file based test --- modules/textual_inversion/image_embedding.py | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'modules/textual_inversion') diff --git a/modules/textual_inversion/image_embedding.py b/modules/textual_inversion/image_embedding.py index c67028a5..1224fb42 100644 --- a/modules/textual_inversion/image_embedding.py +++ b/modules/textual_inversion/image_embedding.py @@ -164,6 +164,14 @@ def caption_image_overlay(srcimage,title,footerLeft,footerMid,footerRight,textfo return image if __name__ == '__main__': + + testEmbed = Image.open('test_embedding.png') + + data = extract_image_data_embed(testEmbed) + assert data is not None + + data = embedding_from_b64(testEmbed.text['sd-ti-embedding']) + assert data is not None image = Image.new('RGBA',(512,512),(255,255,200,255)) cap_image = caption_image_overlay(image, 'title', 'footerLeft', 'footerMid', 'footerRight') -- cgit v1.2.1 From 6be32b31d181e42c639dad3451229aa7b9cfd1cf Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Tue, 11 Oct 2022 23:07:09 +0300 Subject: reports that training with medvram is possible. --- modules/textual_inversion/ui.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'modules/textual_inversion') diff --git a/modules/textual_inversion/ui.py b/modules/textual_inversion/ui.py index 70f47343..36881e7a 100644 --- a/modules/textual_inversion/ui.py +++ b/modules/textual_inversion/ui.py @@ -23,7 +23,7 @@ def preprocess(*args): def train_embedding(*args): - assert not shared.cmd_opts.lowvram and not shared.cmd_opts.medvram, 'Training models with lowvram or medvram is not possible' + assert not shared.cmd_opts.lowvram, 'Training models with lowvram not possible' try: sd_hijack.undo_optimizations() -- cgit v1.2.1 From f53f703aebc801c4204182d52bb1e0bef9808e1f Mon Sep 17 00:00:00 2001 From: JC_Array Date: Tue, 11 Oct 2022 18:12:12 -0500 Subject: resolved conflicts, moved settings under interrogate section, settings only show if deepbooru flag is enabled --- modules/textual_inversion/preprocess.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'modules/textual_inversion') diff --git a/modules/textual_inversion/preprocess.py b/modules/textual_inversion/preprocess.py index a96388d6..113cecf1 100644 --- a/modules/textual_inversion/preprocess.py +++ b/modules/textual_inversion/preprocess.py @@ -29,7 +29,7 @@ def preprocess(process_src, process_dst, process_width, process_height, process_ shared.interrogator.load() if process_caption_deepbooru: - deepbooru.create_deepbooru_process(opts.deepbooru_threshold, opts.deepbooru_sort_alpha) + deepbooru.create_deepbooru_process(opts.interrogate_deepbooru_score_threshold, opts.deepbooru_sort_alpha) def save_pic_with_caption(image, index): if process_caption: -- cgit v1.2.1 From 50be33e953be93c40814262c6dbce36e66004528 Mon Sep 17 00:00:00 2001 From: DepFA <35278260+dfaker@users.noreply.github.com> Date: Wed, 12 Oct 2022 13:13:25 +0100 Subject: formatting --- modules/textual_inversion/image_embedding.py | 170 ++++++++++++++------------- 1 file changed, 91 insertions(+), 79 deletions(-) (limited to 'modules/textual_inversion') diff --git a/modules/textual_inversion/image_embedding.py b/modules/textual_inversion/image_embedding.py index 1224fb42..898ce3b3 100644 --- a/modules/textual_inversion/image_embedding.py +++ b/modules/textual_inversion/image_embedding.py @@ -2,122 +2,134 @@ import base64 import json import numpy as np import zlib -from PIL import Image,PngImagePlugin,ImageDraw,ImageFont +from PIL import Image, PngImagePlugin, ImageDraw, ImageFont from fonts.ttf import Roboto import torch + class EmbeddingEncoder(json.JSONEncoder): def default(self, obj): if isinstance(obj, torch.Tensor): - return {'TORCHTENSOR':obj.cpu().detach().numpy().tolist()} + return {'TORCHTENSOR': obj.cpu().detach().numpy().tolist()} return json.JSONEncoder.default(self, obj) + class EmbeddingDecoder(json.JSONDecoder): def __init__(self, *args, **kwargs): json.JSONDecoder.__init__(self, object_hook=self.object_hook, *args, **kwargs) + def object_hook(self, d): if 'TORCHTENSOR' in d: return torch.from_numpy(np.array(d['TORCHTENSOR'])) return d + def embedding_to_b64(data): - d = json.dumps(data,cls=EmbeddingEncoder) + d = json.dumps(data, cls=EmbeddingEncoder) return base64.b64encode(d.encode()) + def embedding_from_b64(data): d = base64.b64decode(data) - return json.loads(d,cls=EmbeddingDecoder) + return json.loads(d, cls=EmbeddingDecoder) + def lcg(m=2**32, a=1664525, c=1013904223, seed=0): while True: seed = (a * seed + c) % m - yield seed%255 + yield seed % 255 + def xor_block(block): g = lcg() randblock = np.array([next(g) for _ in range(np.product(block.shape))]).astype(np.uint8).reshape(block.shape) - return np.bitwise_xor(block.astype(np.uint8),randblock & 0x0F) + return np.bitwise_xor(block.astype(np.uint8), randblock & 0x0F) -def style_block(block,sequence): - im = Image.new('RGB',(block.shape[1],block.shape[0])) + +def style_block(block, sequence): + im = Image.new('RGB', (block.shape[1], block.shape[0])) draw = ImageDraw.Draw(im) - i=0 - for x in range(-6,im.size[0],8): - for yi,y in enumerate(range(-6,im.size[1],8)): - offset=0 - if yi%2==0: - offset=4 - shade = sequence[i%len(sequence)] - i+=1 - draw.ellipse((x+offset, y, x+6+offset, y+6), fill =(shade,shade,shade) ) + i = 0 + for x in range(-6, im.size[0], 8): + for yi, y in enumerate(range(-6, im.size[1], 8)): + offset = 0 + if yi % 2 == 0: + offset = 4 + shade = sequence[i % len(sequence)] + i += 1 + draw.ellipse((x+offset, y, x+6+offset, y+6), fill=(shade, shade, shade)) fg = np.array(im).astype(np.uint8) & 0xF0 return block ^ fg -def insert_image_data_embed(image,data): + +def insert_image_data_embed(image, data): d = 3 - data_compressed = zlib.compress( json.dumps(data,cls=EmbeddingEncoder).encode(),level=9) - data_np_ = np.frombuffer(data_compressed,np.uint8).copy() + data_compressed = zlib.compress(json.dumps(data, cls=EmbeddingEncoder).encode(), level=9) + data_np_ = np.frombuffer(data_compressed, np.uint8).copy() data_np_high = data_np_ >> 4 - data_np_low = data_np_ & 0x0F - + data_np_low = data_np_ & 0x0F + h = image.size[1] - next_size = data_np_low.shape[0] + (h-(data_np_low.shape[0]%h)) - next_size = next_size + ((h*d)-(next_size%(h*d))) + next_size = data_np_low.shape[0] + (h-(data_np_low.shape[0] % h)) + next_size = next_size + ((h*d)-(next_size % (h*d))) data_np_low.resize(next_size) - data_np_low = data_np_low.reshape((h,-1,d)) + data_np_low = data_np_low.reshape((h, -1, d)) data_np_high.resize(next_size) - data_np_high = data_np_high.reshape((h,-1,d)) + data_np_high = data_np_high.reshape((h, -1, d)) edge_style = list(data['string_to_param'].values())[0].cpu().detach().numpy().tolist()[0][:1024] edge_style = (np.abs(edge_style)/np.max(np.abs(edge_style))*255).astype(np.uint8) - data_np_low = style_block(data_np_low,sequence=edge_style) - data_np_low = xor_block(data_np_low) - data_np_high = style_block(data_np_high,sequence=edge_style[::-1]) - data_np_high = xor_block(data_np_high) + data_np_low = style_block(data_np_low, sequence=edge_style) + data_np_low = xor_block(data_np_low) + data_np_high = style_block(data_np_high, sequence=edge_style[::-1]) + data_np_high = xor_block(data_np_high) - im_low = Image.fromarray(data_np_low,mode='RGB') - im_high = Image.fromarray(data_np_high,mode='RGB') + im_low = Image.fromarray(data_np_low, mode='RGB') + im_high = Image.fromarray(data_np_high, mode='RGB') - background = Image.new('RGB',(image.size[0]+im_low.size[0]+im_high.size[0]+2,image.size[1]),(0,0,0)) - background.paste(im_low,(0,0)) - background.paste(image,(im_low.size[0]+1,0)) - background.paste(im_high,(im_low.size[0]+1+image.size[0]+1,0)) + background = Image.new('RGB', (image.size[0]+im_low.size[0]+im_high.size[0]+2, image.size[1]), (0, 0, 0)) + background.paste(im_low, (0, 0)) + background.paste(image, (im_low.size[0]+1, 0)) + background.paste(im_high, (im_low.size[0]+1+image.size[0]+1, 0)) return background -def crop_black(img,tol=0): - mask = (img>tol).all(2) - mask0,mask1 = mask.any(0),mask.any(1) - col_start,col_end = mask0.argmax(),mask.shape[1]-mask0[::-1].argmax() - row_start,row_end = mask1.argmax(),mask.shape[0]-mask1[::-1].argmax() - return img[row_start:row_end,col_start:col_end] + +def crop_black(img, tol=0): + mask = (img > tol).all(2) + mask0, mask1 = mask.any(0), mask.any(1) + col_start, col_end = mask0.argmax(), mask.shape[1]-mask0[::-1].argmax() + row_start, row_end = mask1.argmax(), mask.shape[0]-mask1[::-1].argmax() + return img[row_start:row_end, col_start:col_end] + def extract_image_data_embed(image): - d=3 - outarr = crop_black(np.array(image.convert('RGB').getdata()).reshape(image.size[1],image.size[0],d ).astype(np.uint8) ) & 0x0F - black_cols = np.where( np.sum(outarr, axis=(0,2))==0) + d = 3 + outarr = crop_black(np.array(image.convert('RGB').getdata()).reshape(image.size[1], image.size[0], d).astype(np.uint8)) & 0x0F + black_cols = np.where(np.sum(outarr, axis=(0, 2)) == 0) if black_cols[0].shape[0] < 2: print('No Image data blocks found.') return None - data_block_lower = outarr[:,:black_cols[0].min(),:].astype(np.uint8) - data_block_upper = outarr[:,black_cols[0].max()+1:,:].astype(np.uint8) + data_block_lower = outarr[:, :black_cols[0].min(), :].astype(np.uint8) + data_block_upper = outarr[:, black_cols[0].max()+1:, :].astype(np.uint8) data_block_lower = xor_block(data_block_lower) data_block_upper = xor_block(data_block_upper) - + data_block = (data_block_upper << 4) | (data_block_lower) data_block = data_block.flatten().tobytes() data = zlib.decompress(data_block) - return json.loads(data,cls=EmbeddingDecoder) + return json.loads(data, cls=EmbeddingDecoder) + -def caption_image_overlay(srcimage,title,footerLeft,footerMid,footerRight,textfont=None): +def caption_image_overlay(srcimage, title, footerLeft, footerMid, footerRight, textfont=None): from math import cos image = srcimage.copy() @@ -130,11 +142,11 @@ def caption_image_overlay(srcimage,title,footerLeft,footerMid,footerRight,textfo textfont = Roboto factor = 1.5 - gradient = Image.new('RGBA', (1,image.size[1]), color=(0,0,0,0)) + gradient = Image.new('RGBA', (1, image.size[1]), color=(0, 0, 0, 0)) for y in range(image.size[1]): mag = 1-cos(y/image.size[1]*factor) - mag = max(mag,1-cos((image.size[1]-y)/image.size[1]*factor*1.1)) - gradient.putpixel((0, y), (0,0,0,int(mag*255))) + mag = max(mag, 1-cos((image.size[1]-y)/image.size[1]*factor*1.1)) + gradient.putpixel((0, y), (0, 0, 0, int(mag*255))) image = Image.alpha_composite(image.convert('RGBA'), gradient.resize(image.size)) draw = ImageDraw.Draw(image) @@ -142,41 +154,41 @@ def caption_image_overlay(srcimage,title,footerLeft,footerMid,footerRight,textfo font = ImageFont.truetype(textfont, fontsize) padding = 10 - _,_,w, h = draw.textbbox((0,0),title,font=font) - fontsize = min( int(fontsize * (((image.size[0]*0.75)-(padding*4))/w) ), 72) + _, _, w, h = draw.textbbox((0, 0), title, font=font) + fontsize = min(int(fontsize * (((image.size[0]*0.75)-(padding*4))/w)), 72) font = ImageFont.truetype(textfont, fontsize) - _,_,w,h = draw.textbbox((0,0),title,font=font) - draw.text((padding,padding), title, anchor='lt', font=font, fill=(255,255,255,230)) + _, _, w, h = draw.textbbox((0, 0), title, font=font) + draw.text((padding, padding), title, anchor='lt', font=font, fill=(255, 255, 255, 230)) - _,_,w, h = draw.textbbox((0,0),footerLeft,font=font) - fontsize_left = min( int(fontsize * (((image.size[0]/3)-(padding))/w) ), 72) - _,_,w, h = draw.textbbox((0,0),footerMid,font=font) - fontsize_mid = min( int(fontsize * (((image.size[0]/3)-(padding))/w) ), 72) - _,_,w, h = draw.textbbox((0,0),footerRight,font=font) - fontsize_right = min( int(fontsize * (((image.size[0]/3)-(padding))/w) ), 72) + _, _, w, h = draw.textbbox((0, 0), footerLeft, font=font) + fontsize_left = min(int(fontsize * (((image.size[0]/3)-(padding))/w)), 72) + _, _, w, h = draw.textbbox((0, 0), footerMid, font=font) + fontsize_mid = min(int(fontsize * (((image.size[0]/3)-(padding))/w)), 72) + _, _, w, h = draw.textbbox((0, 0), footerRight, font=font) + fontsize_right = min(int(fontsize * (((image.size[0]/3)-(padding))/w)), 72) - font = ImageFont.truetype(textfont, min(fontsize_left,fontsize_mid,fontsize_right)) + font = ImageFont.truetype(textfont, min(fontsize_left, fontsize_mid, fontsize_right)) - draw.text((padding,image.size[1]-padding), footerLeft, anchor='ls', font=font, fill=(255,255,255,230)) - draw.text((image.size[0]/2,image.size[1]-padding), footerMid, anchor='ms', font=font, fill=(255,255,255,230)) - draw.text((image.size[0]-padding,image.size[1]-padding), footerRight, anchor='rs', font=font, fill=(255,255,255,230)) + draw.text((padding, image.size[1]-padding), footerLeft, anchor='ls', font=font, fill=(255, 255, 255, 230)) + draw.text((image.size[0]/2, image.size[1]-padding), footerMid, anchor='ms', font=font, fill=(255, 255, 255, 230)) + draw.text((image.size[0]-padding, image.size[1]-padding), footerRight, anchor='rs', font=font, fill=(255, 255, 255, 230)) return image + if __name__ == '__main__': testEmbed = Image.open('test_embedding.png') - data = extract_image_data_embed(testEmbed) assert data is not None data = embedding_from_b64(testEmbed.text['sd-ti-embedding']) assert data is not None - - image = Image.new('RGBA',(512,512),(255,255,200,255)) + + image = Image.new('RGBA', (512, 512), (255, 255, 200, 255)) cap_image = caption_image_overlay(image, 'title', 'footerLeft', 'footerMid', 'footerRight') - test_embed = {'string_to_param':{'*':torch.from_numpy(np.random.random((2, 4096)))}} + test_embed = {'string_to_param': {'*': torch.from_numpy(np.random.random((2, 4096)))}} embedded_image = insert_image_data_embed(cap_image, test_embed) @@ -191,16 +203,16 @@ if __name__ == '__main__': g = lcg() shared_random = np.array([next(g) for _ in range(100)]).astype(np.uint8).tolist() - reference_random = [253, 242, 127, 44, 157, 27, 239, 133, 38, 79, 167, 4, 177, - 95, 130, 79, 78, 14, 52, 215, 220, 194, 126, 28, 240, 179, - 160, 153, 149, 50, 105, 14, 21, 218, 199, 18, 54, 198, 193, - 38, 128, 19, 53, 195, 124, 75, 205, 12, 6, 145, 0, 28, - 30, 148, 8, 45, 218, 171, 55, 249, 97, 166, 12, 35, 0, - 41, 221, 122, 215, 170, 31, 113, 186, 97, 119, 31, 23, 185, - 66, 140, 30, 41, 37, 63, 137, 109, 216, 55, 159, 145, 82, + reference_random = [253, 242, 127, 44, 157, 27, 239, 133, 38, 79, 167, 4, 177, + 95, 130, 79, 78, 14, 52, 215, 220, 194, 126, 28, 240, 179, + 160, 153, 149, 50, 105, 14, 21, 218, 199, 18, 54, 198, 193, + 38, 128, 19, 53, 195, 124, 75, 205, 12, 6, 145, 0, 28, + 30, 148, 8, 45, 218, 171, 55, 249, 97, 166, 12, 35, 0, + 41, 221, 122, 215, 170, 31, 113, 186, 97, 119, 31, 23, 185, + 66, 140, 30, 41, 37, 63, 137, 109, 216, 55, 159, 145, 82, 204, 86, 73, 222, 44, 198, 118, 240, 97] - assert shared_random == reference_random + assert shared_random == reference_random hunna_kay_random_sum = sum(np.array([next(g) for _ in range(100000)]).astype(np.uint8).tolist()) -- cgit v1.2.1 From 10a2de644f8ea4cfade88e85d768da3480f4c9f0 Mon Sep 17 00:00:00 2001 From: DepFA <35278260+dfaker@users.noreply.github.com> Date: Wed, 12 Oct 2022 13:15:35 +0100 Subject: formatting --- modules/textual_inversion/textual_inversion.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'modules/textual_inversion') diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index 485ef46c..b072d745 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -7,14 +7,14 @@ import tqdm import html import datetime -from PIL import Image,PngImagePlugin +from PIL import Image, PngImagePlugin from modules import shared, devices, sd_hijack, processing, sd_models import modules.textual_inversion.dataset -from modules.textual_inversion.image_embedding import (embedding_to_b64,embedding_from_b64, - insert_image_data_embed,extract_image_data_embed, - caption_image_overlay ) +from modules.textual_inversion.image_embedding import (embedding_to_b64, embedding_from_b64, + insert_image_data_embed, extract_image_data_embed, + caption_image_overlay) class Embedding: def __init__(self, vec, name, step=None): @@ -90,10 +90,10 @@ class EmbeddingDatabase: embed_image = Image.open(path) if 'sd-ti-embedding' in embed_image.text: data = embedding_from_b64(embed_image.text['sd-ti-embedding']) - name = data.get('name',name) + name = data.get('name', name) else: data = extract_image_data_embed(embed_image) - name = data.get('name',name) + name = data.get('name', name) else: data = torch.load(path, map_location="cpu") @@ -278,24 +278,24 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, traini shared.state.current_image = image if save_image_with_stored_embedding and os.path.exists(last_saved_file): - + last_saved_image_chunks = os.path.join(images_embeds_dir, f'{embedding_name}-{embedding.step}.png') info = PngImagePlugin.PngInfo() data = torch.load(last_saved_file) info.add_text("sd-ti-embedding", embedding_to_b64(data)) - title = "<{}>".format(data.get('name','???')) + title = "<{}>".format(data.get('name', '???')) checkpoint = sd_models.select_checkpoint() footer_left = checkpoint.model_name footer_mid = '[{}]'.format(checkpoint.hash) footer_right = '{}'.format(embedding.step) - captioned_image = caption_image_overlay(image,title,footer_left,footer_mid,footer_right) - captioned_image = insert_image_data_embed(captioned_image,data) + captioned_image = caption_image_overlay(image, title, footer_left, footer_mid, footer_right) + captioned_image = insert_image_data_embed(captioned_image, data) captioned_image.save(last_saved_image_chunks, "PNG", pnginfo=info) - + image.save(last_saved_image) last_saved_image += f", prompt: {preview_text}" -- cgit v1.2.1 From c3c8eef9fd5a0c8b26319e32ca4a19b56204e6df Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Wed, 12 Oct 2022 20:49:47 +0300 Subject: train: change filename processing to be more simple and configurable train: make it possible to make text files with prompts train: rework scheduler so that there's less repeating code in textual inversion and hypernets train: move epochs setting to options --- modules/textual_inversion/dataset.py | 47 +++++++++++++++++++------- modules/textual_inversion/learn_schedule.py | 37 +++++++++++++++++++- modules/textual_inversion/textual_inversion.py | 35 +++++++------------ 3 files changed, 83 insertions(+), 36 deletions(-) (limited to 'modules/textual_inversion') diff --git a/modules/textual_inversion/dataset.py b/modules/textual_inversion/dataset.py index f61f40d3..67e90afe 100644 --- a/modules/textual_inversion/dataset.py +++ b/modules/textual_inversion/dataset.py @@ -11,11 +11,21 @@ import tqdm from modules import devices, shared import re -re_tag = re.compile(r"[a-zA-Z][_\w\d()]+") +re_numbers_at_start = re.compile(r"^[-\d]+\s*") + + +class DatasetEntry: + def __init__(self, filename=None, latent=None, filename_text=None): + self.filename = filename + self.latent = latent + self.filename_text = filename_text + self.cond = None + self.cond_text = None class PersonalizedBase(Dataset): def __init__(self, data_root, width, height, repeats, flip_p=0.5, placeholder_token="*", model=None, device=None, template_file=None, include_cond=False): + re_word = re.compile(shared.opts.dataset_filename_word_regex) if len(shared.opts.dataset_filename_word_regex)>0 else None self.placeholder_token = placeholder_token @@ -42,9 +52,18 @@ class PersonalizedBase(Dataset): except Exception: continue + text_filename = os.path.splitext(path)[0] + ".txt" filename = os.path.basename(path) - filename_tokens = os.path.splitext(filename)[0] - filename_tokens = re_tag.findall(filename_tokens) + + if os.path.exists(text_filename): + with open(text_filename, "r", encoding="utf8") as file: + filename_text = file.read() + else: + filename_text = os.path.splitext(filename)[0] + filename_text = re.sub(re_numbers_at_start, '', filename_text) + if re_word: + tokens = re_word.findall(filename_text) + filename_text = (shared.opts.dataset_filename_join_string or "").join(tokens) npimage = np.array(image).astype(np.uint8) npimage = (npimage / 127.5 - 1.0).astype(np.float32) @@ -55,13 +74,13 @@ class PersonalizedBase(Dataset): init_latent = model.get_first_stage_encoding(model.encode_first_stage(torchdata.unsqueeze(dim=0))).squeeze() init_latent = init_latent.to(devices.cpu) + entry = DatasetEntry(filename=path, filename_text=filename_text, latent=init_latent) + if include_cond: - text = self.create_text(filename_tokens) - cond = cond_model([text]).to(devices.cpu) - else: - cond = None + entry.cond_text = self.create_text(filename_text) + entry.cond = cond_model([entry.cond_text]).to(devices.cpu) - self.dataset.append((init_latent, filename_tokens, cond)) + self.dataset.append(entry) self.length = len(self.dataset) * repeats @@ -72,10 +91,10 @@ class PersonalizedBase(Dataset): def shuffle(self): self.indexes = self.initial_indexes[torch.randperm(self.initial_indexes.shape[0])] - def create_text(self, filename_tokens): + def create_text(self, filename_text): text = random.choice(self.lines) text = text.replace("[name]", self.placeholder_token) - text = text.replace("[filewords]", ' '.join(filename_tokens)) + text = text.replace("[filewords]", filename_text) return text def __len__(self): @@ -86,7 +105,9 @@ class PersonalizedBase(Dataset): self.shuffle() index = self.indexes[i % len(self.indexes)] - x, filename_tokens, cond = self.dataset[index] + entry = self.dataset[index] + + if entry.cond is None: + entry.cond_text = self.create_text(entry.filename_text) - text = self.create_text(filename_tokens) - return x, text, cond + return entry diff --git a/modules/textual_inversion/learn_schedule.py b/modules/textual_inversion/learn_schedule.py index db720271..2062726a 100644 --- a/modules/textual_inversion/learn_schedule.py +++ b/modules/textual_inversion/learn_schedule.py @@ -1,6 +1,12 @@ +import tqdm -class LearnSchedule: + +class LearnScheduleIterator: def __init__(self, learn_rate, max_steps, cur_step=0): + """ + specify learn_rate as "0.001:100, 0.00001:1000, 1e-5:10000" to have lr of 0.001 until step 100, 0.00001 until 1000, 1e-5:10000 until 10000 + """ + pairs = learn_rate.split(',') self.rates = [] self.it = 0 @@ -32,3 +38,32 @@ class LearnSchedule: return self.rates[self.it - 1] else: raise StopIteration + + +class LearnRateScheduler: + def __init__(self, learn_rate, max_steps, cur_step=0, verbose=True): + self.schedules = LearnScheduleIterator(learn_rate, max_steps, cur_step) + (self.learn_rate, self.end_step) = next(self.schedules) + self.verbose = verbose + + if self.verbose: + print(f'Training at rate of {self.learn_rate} until step {self.end_step}') + + self.finished = False + + def apply(self, optimizer, step_number): + if step_number <= self.end_step: + return + + try: + (self.learn_rate, self.end_step) = next(self.schedules) + except Exception: + self.finished = True + return + + if self.verbose: + tqdm.tqdm.write(f'Training at rate of {self.learn_rate} until step {self.end_step}') + + for pg in optimizer.param_groups: + pg['lr'] = self.learn_rate + diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index c5153e4a..fa0e33a2 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -11,7 +11,7 @@ from PIL import Image, PngImagePlugin from modules import shared, devices, sd_hijack, processing, sd_models import modules.textual_inversion.dataset -from modules.textual_inversion.learn_schedule import LearnSchedule +from modules.textual_inversion.learn_schedule import LearnRateScheduler from modules.textual_inversion.image_embedding import (embedding_to_b64, embedding_from_b64, insert_image_data_embed, extract_image_data_embed, @@ -172,8 +172,7 @@ def create_embedding(name, num_vectors_per_token, init_text='*'): return fn - -def train_embedding(embedding_name, learn_rate, data_root, log_directory, training_width, training_height, steps, num_repeats, create_image_every, save_embedding_every, template_file, save_image_with_stored_embedding, preview_image_prompt): +def train_embedding(embedding_name, learn_rate, data_root, log_directory, training_width, training_height, steps, create_image_every, save_embedding_every, template_file, save_image_with_stored_embedding, preview_image_prompt): assert embedding_name, 'embedding not selected' shared.state.textinfo = "Initializing textual inversion training..." @@ -205,7 +204,7 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, traini shared.state.textinfo = f"Preparing dataset from {html.escape(data_root)}..." with torch.autocast("cuda"): - ds = modules.textual_inversion.dataset.PersonalizedBase(data_root=data_root, width=training_width, height=training_height, repeats=num_repeats, placeholder_token=embedding_name, model=shared.sd_model, device=devices.device, template_file=template_file) + ds = modules.textual_inversion.dataset.PersonalizedBase(data_root=data_root, width=training_width, height=training_height, repeats=shared.opts.training_image_repeats_per_epoch, placeholder_token=embedding_name, model=shared.sd_model, device=devices.device, template_file=template_file) hijack = sd_hijack.model_hijack @@ -221,32 +220,24 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, traini if ititial_step > steps: return embedding, filename - schedules = iter(LearnSchedule(learn_rate, steps, ititial_step)) - (learn_rate, end_step) = next(schedules) - print(f'Training at rate of {learn_rate} until step {end_step}') - - optimizer = torch.optim.AdamW([embedding.vec], lr=learn_rate) + scheduler = LearnRateScheduler(learn_rate, steps, ititial_step) + optimizer = torch.optim.AdamW([embedding.vec], lr=scheduler.learn_rate) pbar = tqdm.tqdm(enumerate(ds), total=steps-ititial_step) - for i, (x, text, _) in pbar: + for i, entry in pbar: embedding.step = i + ititial_step - if embedding.step > end_step: - try: - (learn_rate, end_step) = next(schedules) - except: - break - tqdm.tqdm.write(f'Training at rate of {learn_rate} until step {end_step}') - for pg in optimizer.param_groups: - pg['lr'] = learn_rate + scheduler.apply(optimizer, embedding.step) + if scheduler.finished: + break if shared.state.interrupted: break with torch.autocast("cuda"): - c = cond_model([text]) + c = cond_model([entry.cond_text]) - x = x.to(devices.device) + x = entry.latent.to(devices.device) loss = shared.sd_model(x.unsqueeze(0), c)[0] del x @@ -268,7 +259,7 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, traini if embedding.step > 0 and images_dir is not None and embedding.step % create_image_every == 0: last_saved_image = os.path.join(images_dir, f'{embedding_name}-{embedding.step}.png') - preview_text = text if preview_image_prompt == "" else preview_image_prompt + preview_text = entry.cond_text if preview_image_prompt == "" else preview_image_prompt p = processing.StableDiffusionProcessingTxt2Img( sd_model=shared.sd_model, @@ -314,7 +305,7 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, traini

Loss: {losses.mean():.7f}
Step: {embedding.step}
-Last prompt: {html.escape(text)}
+Last prompt: {html.escape(entry.cond_text)}
Last saved embedding: {html.escape(last_saved_file)}
Last saved image: {html.escape(last_saved_image)}

-- cgit v1.2.1 From 698d303b04e293635bfb49c525409f3bcf671dce Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Wed, 12 Oct 2022 21:55:43 +0300 Subject: deepbooru: added option to use spaces or underscores deepbooru: added option to quote (\) in tags deepbooru/BLIP: write caption to file instead of image filename deepbooru/BLIP: now possible to use both for captions deepbooru: process is stopped even if an exception occurs --- modules/textual_inversion/preprocess.py | 92 ++++++++++++++------------------- 1 file changed, 40 insertions(+), 52 deletions(-) (limited to 'modules/textual_inversion') diff --git a/modules/textual_inversion/preprocess.py b/modules/textual_inversion/preprocess.py index 113cecf1..3047bede 100644 --- a/modules/textual_inversion/preprocess.py +++ b/modules/textual_inversion/preprocess.py @@ -10,7 +10,28 @@ from modules.shared import opts, cmd_opts if cmd_opts.deepdanbooru: import modules.deepbooru as deepbooru + def preprocess(process_src, process_dst, process_width, process_height, process_flip, process_split, process_caption, process_caption_deepbooru=False): + try: + if process_caption: + shared.interrogator.load() + + if process_caption_deepbooru: + deepbooru.create_deepbooru_process(opts.interrogate_deepbooru_score_threshold, deepbooru.create_deepbooru_opts()) + + preprocess_work(process_src, process_dst, process_width, process_height, process_flip, process_split, process_caption, process_caption_deepbooru) + + finally: + + if process_caption: + shared.interrogator.send_blip_to_ram() + + if process_caption_deepbooru: + deepbooru.release_process() + + + +def preprocess_work(process_src, process_dst, process_width, process_height, process_flip, process_split, process_caption, process_caption_deepbooru=False): width = process_width height = process_height src = os.path.abspath(process_src) @@ -25,30 +46,28 @@ def preprocess(process_src, process_dst, process_width, process_height, process_ shared.state.textinfo = "Preprocessing..." shared.state.job_count = len(files) - if process_caption: - shared.interrogator.load() - - if process_caption_deepbooru: - deepbooru.create_deepbooru_process(opts.interrogate_deepbooru_score_threshold, opts.deepbooru_sort_alpha) - def save_pic_with_caption(image, index): + caption = "" + if process_caption: - caption = "-" + shared.interrogator.generate_caption(image) - caption = sanitize_caption(os.path.join(dst, f"{index:05}-{subindex[0]}"), caption, ".png") - elif process_caption_deepbooru: - shared.deepbooru_process_return["value"] = -1 - shared.deepbooru_process_queue.put(image) - while shared.deepbooru_process_return["value"] == -1: - time.sleep(0.2) - caption = "-" + shared.deepbooru_process_return["value"] - caption = sanitize_caption(os.path.join(dst, f"{index:05}-{subindex[0]}"), caption, ".png") - shared.deepbooru_process_return["value"] = -1 - else: - caption = filename - caption = os.path.splitext(caption)[0] - caption = os.path.basename(caption) + caption += shared.interrogator.generate_caption(image) + + if process_caption_deepbooru: + if len(caption) > 0: + caption += ", " + caption += deepbooru.get_tags_from_process(image) + + filename_part = filename + filename_part = os.path.splitext(filename_part)[0] + filename_part = os.path.basename(filename_part) + + basename = f"{index:05}-{subindex[0]}-{filename_part}" + image.save(os.path.join(dst, f"{basename}.png")) + + if len(caption) > 0: + with open(os.path.join(dst, f"{basename}.txt"), "w", encoding="utf8") as file: + file.write(caption) - image.save(os.path.join(dst, f"{index:05}-{subindex[0]}{caption}.png")) subindex[0] += 1 def save_pic(image, index): @@ -93,34 +112,3 @@ def preprocess(process_src, process_dst, process_width, process_height, process_ save_pic(img, index) shared.state.nextjob() - - if process_caption: - shared.interrogator.send_blip_to_ram() - - if process_caption_deepbooru: - deepbooru.release_process() - - -def sanitize_caption(base_path, original_caption, suffix): - operating_system = platform.system().lower() - if (operating_system == "windows"): - invalid_path_characters = "\\/:*?\"<>|" - max_path_length = 259 - else: - invalid_path_characters = "/" #linux/macos - max_path_length = 1023 - caption = original_caption - for invalid_character in invalid_path_characters: - caption = caption.replace(invalid_character, "") - fixed_path_length = len(base_path) + len(suffix) - if fixed_path_length + len(caption) <= max_path_length: - return caption - caption_tokens = caption.split() - new_caption = "" - for token in caption_tokens: - last_caption = new_caption - new_caption = new_caption + token + " " - if (len(new_caption) + fixed_path_length - 1 > max_path_length): - break - print(f"\nPath will be too long. Truncated caption: {original_caption}\nto: {last_caption}", file=sys.stderr) - return last_caption.strip() -- cgit v1.2.1 From f776254b12361b5bae16f6629bcdcb47b450c48d Mon Sep 17 00:00:00 2001 From: Greg Fuller Date: Wed, 12 Oct 2022 13:08:06 -0700 Subject: [2/?] [wip] ignore OPT_INCLUDE_RANKS for training filenames --- modules/textual_inversion/preprocess.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'modules/textual_inversion') diff --git a/modules/textual_inversion/preprocess.py b/modules/textual_inversion/preprocess.py index 3047bede..886cf0c3 100644 --- a/modules/textual_inversion/preprocess.py +++ b/modules/textual_inversion/preprocess.py @@ -17,7 +17,9 @@ def preprocess(process_src, process_dst, process_width, process_height, process_ shared.interrogator.load() if process_caption_deepbooru: - deepbooru.create_deepbooru_process(opts.interrogate_deepbooru_score_threshold, deepbooru.create_deepbooru_opts()) + db_opts = deepbooru.create_deepbooru_opts() + db_opts[deepbooru.OPT_INCLUDE_RANKS] = False + deepbooru.create_deepbooru_process(opts.interrogate_deepbooru_score_threshold, db_opts) preprocess_work(process_src, process_dst, process_width, process_height, process_flip, process_split, process_caption, process_caption_deepbooru) -- cgit v1.2.1 From 1cfc2a18981ee56bdb69a2de7b463a11ad05e329 Mon Sep 17 00:00:00 2001 From: Melan Date: Wed, 12 Oct 2022 23:36:29 +0200 Subject: Save a csv containing the loss while training --- modules/textual_inversion/textual_inversion.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'modules/textual_inversion') diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index fa0e33a2..25038a89 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -6,6 +6,7 @@ import torch import tqdm import html import datetime +import csv from PIL import Image, PngImagePlugin @@ -172,7 +173,7 @@ def create_embedding(name, num_vectors_per_token, init_text='*'): return fn -def train_embedding(embedding_name, learn_rate, data_root, log_directory, training_width, training_height, steps, create_image_every, save_embedding_every, template_file, save_image_with_stored_embedding, preview_image_prompt): +def train_embedding(embedding_name, learn_rate, data_root, log_directory, training_width, training_height, steps, create_image_every, save_embedding_every, write_csv_every, template_file, save_image_with_stored_embedding, preview_image_prompt): assert embedding_name, 'embedding not selected' shared.state.textinfo = "Initializing textual inversion training..." @@ -256,6 +257,20 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, traini last_saved_file = os.path.join(embedding_dir, f'{embedding_name}-{embedding.step}.pt') embedding.save(last_saved_file) + if write_csv_every > 0 and log_directory is not None and embedding.step % write_csv_every == 0: + write_csv_header = False if os.path.exists(os.path.join(log_directory, "textual_inversion_loss.csv")) else True + + with open(os.path.join(log_directory, "textual_inversion_loss.csv"), "a+") as fout: + + csv_writer = csv.DictWriter(fout, fieldnames=["epoch", "epoch_step", "loss"]) + + if write_csv_header: + csv_writer.writeheader() + + csv_writer.writerow({"epoch": epoch_num + 1, + "epoch_step": epoch_step - 1, + "loss": f"{losses.mean():.7f}"}) + if embedding.step > 0 and images_dir is not None and embedding.step % create_image_every == 0: last_saved_image = os.path.join(images_dir, f'{embedding_name}-{embedding.step}.png') -- cgit v1.2.1 From 8636b50aea83f9c743f005722d9f3f8ee9303e00 Mon Sep 17 00:00:00 2001 From: Melan Date: Thu, 13 Oct 2022 12:37:58 +0200 Subject: Add learn_rate to csv and removed a left-over debug statement --- modules/textual_inversion/textual_inversion.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'modules/textual_inversion') diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index 25038a89..b83df079 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -262,14 +262,15 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, traini with open(os.path.join(log_directory, "textual_inversion_loss.csv"), "a+") as fout: - csv_writer = csv.DictWriter(fout, fieldnames=["epoch", "epoch_step", "loss"]) + csv_writer = csv.DictWriter(fout, fieldnames=["epoch", "epoch_step", "loss", "learn_rate"]) if write_csv_header: csv_writer.writeheader() csv_writer.writerow({"epoch": epoch_num + 1, "epoch_step": epoch_step - 1, - "loss": f"{losses.mean():.7f}"}) + "loss": f"{losses.mean():.7f}", + "learn_rate": scheduler.learn_rate}) if embedding.step > 0 and images_dir is not None and embedding.step % create_image_every == 0: last_saved_image = os.path.join(images_dir, f'{embedding_name}-{embedding.step}.png') -- cgit v1.2.1 From c344ba3b325459abbf9b0df2c1b18f7bf99805b2 Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Fri, 14 Oct 2022 20:31:49 +0300 Subject: add option to read generation params for learning previews from txt2img --- modules/textual_inversion/textual_inversion.py | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) (limited to 'modules/textual_inversion') diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index fa0e33a2..3d835358 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -172,7 +172,7 @@ def create_embedding(name, num_vectors_per_token, init_text='*'): return fn -def train_embedding(embedding_name, learn_rate, data_root, log_directory, training_width, training_height, steps, create_image_every, save_embedding_every, template_file, save_image_with_stored_embedding, preview_image_prompt): +def train_embedding(embedding_name, learn_rate, data_root, log_directory, training_width, training_height, steps, create_image_every, save_embedding_every, template_file, save_image_with_stored_embedding, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height): assert embedding_name, 'embedding not selected' shared.state.textinfo = "Initializing textual inversion training..." @@ -259,18 +259,29 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, traini if embedding.step > 0 and images_dir is not None and embedding.step % create_image_every == 0: last_saved_image = os.path.join(images_dir, f'{embedding_name}-{embedding.step}.png') - preview_text = entry.cond_text if preview_image_prompt == "" else preview_image_prompt - p = processing.StableDiffusionProcessingTxt2Img( sd_model=shared.sd_model, - prompt=preview_text, - steps=20, - height=training_height, - width=training_width, do_not_save_grid=True, do_not_save_samples=True, ) + if preview_from_txt2img: + p.prompt = preview_prompt + p.negative_prompt = preview_negative_prompt + p.steps = preview_steps + p.sampler_index = preview_sampler_index + p.cfg_scale = preview_cfg_scale + p.seed = preview_seed + p.width = preview_width + p.height = preview_height + else: + p.prompt = entry.cond_text + p.steps = 20 + p.width = training_width + p.height = training_height + + preview_text = p.prompt + processed = processing.process_images(p) image = processed.images[0] -- cgit v1.2.1 From 03d62538aebeff51713619fe808c953bdb70193d Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Fri, 14 Oct 2022 22:43:55 +0300 Subject: remove duplicate code for log loss, add step, make it read from options rather than gradio input --- modules/textual_inversion/textual_inversion.py | 44 ++++++++++++++++++-------- 1 file changed, 30 insertions(+), 14 deletions(-) (limited to 'modules/textual_inversion') diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index 1f5ace6f..da0d77a0 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -173,6 +173,32 @@ def create_embedding(name, num_vectors_per_token, init_text='*'): return fn +def write_loss(log_directory, filename, step, epoch_len, values): + if shared.opts.training_write_csv_every == 0: + return + + if step % shared.opts.training_write_csv_every != 0: + return + + write_csv_header = False if os.path.exists(os.path.join(log_directory, filename)) else True + + with open(os.path.join(log_directory, filename), "a+", newline='') as fout: + csv_writer = csv.DictWriter(fout, fieldnames=["step", "epoch", "epoch_step", *(values.keys())]) + + if write_csv_header: + csv_writer.writeheader() + + epoch = step // epoch_len + epoch_step = step - epoch * epoch_len + + csv_writer.writerow({ + "step": step + 1, + "epoch": epoch + 1, + "epoch_step": epoch_step + 1, + **values, + }) + + def train_embedding(embedding_name, learn_rate, data_root, log_directory, training_width, training_height, steps, create_image_every, save_embedding_every, template_file, save_image_with_stored_embedding, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height): assert embedding_name, 'embedding not selected' @@ -257,20 +283,10 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, traini last_saved_file = os.path.join(embedding_dir, f'{embedding_name}-{embedding.step}.pt') embedding.save(last_saved_file) - if write_csv_every > 0 and log_directory is not None and embedding.step % write_csv_every == 0: - write_csv_header = False if os.path.exists(os.path.join(log_directory, "textual_inversion_loss.csv")) else True - - with open(os.path.join(log_directory, "textual_inversion_loss.csv"), "a+") as fout: - - csv_writer = csv.DictWriter(fout, fieldnames=["epoch", "epoch_step", "loss", "learn_rate"]) - - if write_csv_header: - csv_writer.writeheader() - - csv_writer.writerow({"epoch": epoch_num + 1, - "epoch_step": epoch_step - 1, - "loss": f"{losses.mean():.7f}", - "learn_rate": scheduler.learn_rate}) + write_loss(log_directory, "textual_inversion_loss.csv", embedding.step, len(ds), { + "loss": f"{losses.mean():.7f}", + "learn_rate": scheduler.learn_rate + }) if embedding.step > 0 and images_dir is not None and embedding.step % create_image_every == 0: last_saved_image = os.path.join(images_dir, f'{embedding_name}-{embedding.step}.png') -- cgit v1.2.1 From 4d19f3b7d461fe0f63e7ccff936909b0ce0c6126 Mon Sep 17 00:00:00 2001 From: Melan Date: Fri, 14 Oct 2022 22:45:26 +0200 Subject: Raise an assertion error if no training images have been found. --- modules/textual_inversion/dataset.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'modules/textual_inversion') diff --git a/modules/textual_inversion/dataset.py b/modules/textual_inversion/dataset.py index 67e90afe..12e2f43b 100644 --- a/modules/textual_inversion/dataset.py +++ b/modules/textual_inversion/dataset.py @@ -81,7 +81,8 @@ class PersonalizedBase(Dataset): entry.cond = cond_model([entry.cond_text]).to(devices.cpu) self.dataset.append(entry) - + + assert len(self.dataset) > 1, "No images have been found in the dataset." self.length = len(self.dataset) * repeats self.initial_indexes = np.arange(self.length) % len(self.dataset) -- cgit v1.2.1 From c7a86f7fe9c0b8967a87e8d709f507d2f44400d8 Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Sat, 15 Oct 2022 09:24:59 +0300 Subject: add option to use batch size for training --- modules/textual_inversion/dataset.py | 31 ++++++++++++++++---------- modules/textual_inversion/textual_inversion.py | 17 +++++++------- 2 files changed, 27 insertions(+), 21 deletions(-) (limited to 'modules/textual_inversion') diff --git a/modules/textual_inversion/dataset.py b/modules/textual_inversion/dataset.py index 67e90afe..bd99c0cb 100644 --- a/modules/textual_inversion/dataset.py +++ b/modules/textual_inversion/dataset.py @@ -24,11 +24,12 @@ class DatasetEntry: class PersonalizedBase(Dataset): - def __init__(self, data_root, width, height, repeats, flip_p=0.5, placeholder_token="*", model=None, device=None, template_file=None, include_cond=False): - re_word = re.compile(shared.opts.dataset_filename_word_regex) if len(shared.opts.dataset_filename_word_regex)>0 else None + def __init__(self, data_root, width, height, repeats, flip_p=0.5, placeholder_token="*", model=None, device=None, template_file=None, include_cond=False, batch_size=1): + re_word = re.compile(shared.opts.dataset_filename_word_regex) if len(shared.opts.dataset_filename_word_regex) > 0 else None self.placeholder_token = placeholder_token + self.batch_size = batch_size self.width = width self.height = height self.flip = transforms.RandomHorizontalFlip(p=flip_p) @@ -78,13 +79,13 @@ class PersonalizedBase(Dataset): if include_cond: entry.cond_text = self.create_text(filename_text) - entry.cond = cond_model([entry.cond_text]).to(devices.cpu) + entry.cond = cond_model([entry.cond_text]).to(devices.cpu).squeeze(0) self.dataset.append(entry) - self.length = len(self.dataset) * repeats + self.length = len(self.dataset) * repeats // batch_size - self.initial_indexes = np.arange(self.length) % len(self.dataset) + self.initial_indexes = np.arange(len(self.dataset)) self.indexes = None self.shuffle() @@ -101,13 +102,19 @@ class PersonalizedBase(Dataset): return self.length def __getitem__(self, i): - if i % len(self.dataset) == 0: - self.shuffle() + res = [] - index = self.indexes[i % len(self.indexes)] - entry = self.dataset[index] + for j in range(self.batch_size): + position = i * self.batch_size + j + if position % len(self.indexes) == 0: + self.shuffle() - if entry.cond is None: - entry.cond_text = self.create_text(entry.filename_text) + index = self.indexes[position % len(self.indexes)] + entry = self.dataset[index] - return entry + if entry.cond is None: + entry.cond_text = self.create_text(entry.filename_text) + + res.append(entry) + + return res diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index da0d77a0..e754747e 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -199,7 +199,7 @@ def write_loss(log_directory, filename, step, epoch_len, values): }) -def train_embedding(embedding_name, learn_rate, data_root, log_directory, training_width, training_height, steps, create_image_every, save_embedding_every, template_file, save_image_with_stored_embedding, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height): +def train_embedding(embedding_name, learn_rate, batch_size, data_root, log_directory, training_width, training_height, steps, create_image_every, save_embedding_every, template_file, save_image_with_stored_embedding, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height): assert embedding_name, 'embedding not selected' shared.state.textinfo = "Initializing textual inversion training..." @@ -231,7 +231,7 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, traini shared.state.textinfo = f"Preparing dataset from {html.escape(data_root)}..." with torch.autocast("cuda"): - ds = modules.textual_inversion.dataset.PersonalizedBase(data_root=data_root, width=training_width, height=training_height, repeats=shared.opts.training_image_repeats_per_epoch, placeholder_token=embedding_name, model=shared.sd_model, device=devices.device, template_file=template_file) + ds = modules.textual_inversion.dataset.PersonalizedBase(data_root=data_root, width=training_width, height=training_height, repeats=shared.opts.training_image_repeats_per_epoch, placeholder_token=embedding_name, model=shared.sd_model, device=devices.device, template_file=template_file, batch_size=batch_size) hijack = sd_hijack.model_hijack @@ -251,7 +251,7 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, traini optimizer = torch.optim.AdamW([embedding.vec], lr=scheduler.learn_rate) pbar = tqdm.tqdm(enumerate(ds), total=steps-ititial_step) - for i, entry in pbar: + for i, entries in pbar: embedding.step = i + ititial_step scheduler.apply(optimizer, embedding.step) @@ -262,10 +262,9 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, traini break with torch.autocast("cuda"): - c = cond_model([entry.cond_text]) - - x = entry.latent.to(devices.device) - loss = shared.sd_model(x.unsqueeze(0), c)[0] + c = cond_model([entry.cond_text for entry in entries]) + x = torch.stack([entry.latent for entry in entries]).to(devices.device) + loss = shared.sd_model(x, c)[0] del x losses[embedding.step % losses.shape[0]] = loss.item() @@ -307,7 +306,7 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, traini p.width = preview_width p.height = preview_height else: - p.prompt = entry.cond_text + p.prompt = entries[0].cond_text p.steps = 20 p.width = training_width p.height = training_height @@ -348,7 +347,7 @@ def train_embedding(embedding_name, learn_rate, data_root, log_directory, traini

Loss: {losses.mean():.7f}
Step: {embedding.step}
-Last prompt: {html.escape(entry.cond_text)}
+Last prompt: {html.escape(entries[0].cond_text)}
Last saved embedding: {html.escape(last_saved_file)}
Last saved image: {html.escape(last_saved_image)}

-- cgit v1.2.1