From 6021f7a75f7b5208a2be15cda5526028152f922d Mon Sep 17 00:00:00 2001 From: discus0434 Date: Wed, 19 Oct 2022 00:51:36 +0900 Subject: add options to custom hypernetwork layer structure --- modules/hypernetworks/hypernetwork.py | 88 ++++++++++++++++++++++++++--------- 1 file changed, 67 insertions(+), 21 deletions(-) (limited to 'modules/hypernetworks') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 4905710e..cadb9911 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -1,52 +1,98 @@ +import csv import datetime import glob import html import os import sys import traceback -import tqdm -import csv +import modules.textual_inversion.dataset import torch - -from ldm.util import default -from modules import devices, shared, processing, sd_models -import torch -from torch import einsum +import tqdm from einops import rearrange, repeat -import modules.textual_inversion.dataset +from ldm.util import default +from modules import devices, processing, sd_models, shared from modules.textual_inversion import textual_inversion from modules.textual_inversion.learn_schedule import LearnRateScheduler +from torch import einsum + + +def parse_layer_structure(dim, state_dict): + i = 0 + res = [1] + while (key := "linear.{}.weight".format(i)) in state_dict: + weight = state_dict[key] + res.append(len(weight) // dim) + i += 1 + return res class HypernetworkModule(torch.nn.Module): multiplier = 1.0 + layer_structure = None + add_layer_norm = False def __init__(self, dim, state_dict=None): super().__init__() + if (state_dict is None or 'linear.0.weight' not in state_dict) and self.layer_structure is None: + layer_structure = (1, 2, 1) + else: + if self.layer_structure is not None: + assert self.layer_structure[0] == 1, "Multiplier Sequence should start with size 1!" + assert self.layer_structure[-1] == 1, "Multiplier Sequence should end with size 1!" + layer_structure = self.layer_structure + else: + layer_structure = parse_layer_structure(dim, state_dict) + + linears = [] + for i in range(len(layer_structure) - 1): + linears.append(torch.nn.Linear(int(dim * layer_structure[i]), int(dim * layer_structure[i+1]))) + if self.add_layer_norm: + linears.append(torch.nn.LayerNorm(int(dim * layer_structure[i+1]))) - self.linear1 = torch.nn.Linear(dim, dim * 2) - self.linear2 = torch.nn.Linear(dim * 2, dim) + self.linear = torch.nn.Sequential(*linears) if state_dict is not None: - self.load_state_dict(state_dict, strict=True) + try: + self.load_state_dict(state_dict) + except RuntimeError: + self.try_load_previous(state_dict) else: - - self.linear1.weight.data.normal_(mean=0.0, std=0.01) - self.linear1.bias.data.zero_() - self.linear2.weight.data.normal_(mean=0.0, std=0.01) - self.linear2.bias.data.zero_() + for layer in self.linear: + layer.weight.data.normal_(mean = 0.0, std = 0.01) + layer.bias.data.zero_() self.to(devices.device) + def try_load_previous(self, state_dict): + states = self.state_dict() + states['linear.0.bias'].copy_(state_dict['linear1.bias']) + states['linear.0.weight'].copy_(state_dict['linear1.weight']) + states['linear.1.bias'].copy_(state_dict['linear2.bias']) + states['linear.1.weight'].copy_(state_dict['linear2.weight']) + def forward(self, x): - return x + (self.linear2(self.linear1(x))) * self.multiplier + return x + self.linear(x) * self.multiplier + + def trainables(self): + res = [] + for layer in self.linear: + res += [layer.weight, layer.bias] + return res def apply_strength(value=None): HypernetworkModule.multiplier = value if value is not None else shared.opts.sd_hypernetwork_strength +def apply_layer_structure(value=None): + HypernetworkModule.layer_structure = value if value is not None else shared.opts.sd_hypernetwork_layer_structure + + +def apply_layer_norm(value=None): + HypernetworkModule.add_layer_norm = value if value is not None else shared.opts.sd_hypernetwork_add_layer_norm + + class Hypernetwork: filename = None name = None @@ -68,7 +114,7 @@ class Hypernetwork: for k, layers in self.layers.items(): for layer in layers: layer.train() - res += [layer.linear1.weight, layer.linear1.bias, layer.linear2.weight, layer.linear2.bias] + res += layer.trainables() return res @@ -226,7 +272,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log shared.state.textinfo = f"Preparing dataset from {html.escape(data_root)}..." with torch.autocast("cuda"): ds = modules.textual_inversion.dataset.PersonalizedBase(data_root=data_root, width=512, height=512, repeats=shared.opts.training_image_repeats_per_epoch, placeholder_token=hypernetwork_name, model=shared.sd_model, device=devices.device, template_file=template_file, include_cond=True, batch_size=batch_size) - + assert ds.length > 1, "Dataset should contain more than 1 images" if unload: shared.sd_model.cond_stage_model.to(devices.cpu) shared.sd_model.first_stage_model.to(devices.cpu) @@ -261,7 +307,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log with torch.autocast("cuda"): c = stack_conds([entry.cond for entry in entries]).to(devices.device) -# c = torch.vstack([entry.cond for entry in entries]).to(devices.device) + c = torch.vstack([entry.cond for entry in entries]).to(devices.device) x = torch.stack([entry.latent for entry in entries]).to(devices.device) loss = shared.sd_model(x, c)[0] del x @@ -283,7 +329,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log textual_inversion.write_loss(log_directory, "hypernetwork_loss.csv", hypernetwork.step, len(ds), { "loss": f"{mean_loss:.7f}", - "learn_rate": scheduler.learn_rate + "learn_rate": f"{scheduler.learn_rate:.7f}" }) if hypernetwork.step > 0 and images_dir is not None and hypernetwork.step % create_image_every == 0: -- cgit v1.2.1 From a5611ea5026bd8e12d8e84023384c369d0511dda Mon Sep 17 00:00:00 2001 From: discus0434 Date: Wed, 19 Oct 2022 01:00:01 +0900 Subject: update --- modules/hypernetworks/hypernetwork.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'modules/hypernetworks') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index cadb9911..c5835bce 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -1,20 +1,22 @@ -import csv import datetime import glob import html import os import sys import traceback +import tqdm +import csv -import modules.textual_inversion.dataset import torch -import tqdm -from einops import rearrange, repeat + from ldm.util import default -from modules import devices, processing, sd_models, shared +from modules import devices, shared, processing, sd_models +import torch +from torch import einsum +from einops import rearrange, repeat +import modules.textual_inversion.dataset from modules.textual_inversion import textual_inversion from modules.textual_inversion.learn_schedule import LearnRateScheduler -from torch import einsum def parse_layer_structure(dim, state_dict): -- cgit v1.2.1 From e40ba281f1b419cf99552962ea01d87d699840a5 Mon Sep 17 00:00:00 2001 From: discus0434 Date: Wed, 19 Oct 2022 01:03:58 +0900 Subject: update --- modules/hypernetworks/hypernetwork.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'modules/hypernetworks') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index c5835bce..082165f4 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -309,7 +309,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log with torch.autocast("cuda"): c = stack_conds([entry.cond for entry in entries]).to(devices.device) - c = torch.vstack([entry.cond for entry in entries]).to(devices.device) + # c = torch.vstack([entry.cond for entry in entries]).to(devices.device) x = torch.stack([entry.latent for entry in entries]).to(devices.device) loss = shared.sd_model(x, c)[0] del x @@ -331,7 +331,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log textual_inversion.write_loss(log_directory, "hypernetwork_loss.csv", hypernetwork.step, len(ds), { "loss": f"{mean_loss:.7f}", - "learn_rate": f"{scheduler.learn_rate:.7f}" + "learn_rate": scheduler.learn_rate }) if hypernetwork.step > 0 and images_dir is not None and hypernetwork.step % create_image_every == 0: -- cgit v1.2.1 From 42fbda83bb9830af18187fddb50c1bedd01da502 Mon Sep 17 00:00:00 2001 From: discus0434 Date: Wed, 19 Oct 2022 14:30:33 +0000 Subject: layer options moves into create hnet ui --- modules/hypernetworks/hypernetwork.py | 64 +++++++++++++++++------------------ modules/hypernetworks/ui.py | 9 +++-- 2 files changed, 39 insertions(+), 34 deletions(-) (limited to 'modules/hypernetworks') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 583ada31..7d519cd9 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -19,37 +19,21 @@ from modules.textual_inversion import textual_inversion from modules.textual_inversion.learn_schedule import LearnRateScheduler -def parse_layer_structure(dim, state_dict): - i = 0 - res = [1] - while (key := "linear.{}.weight".format(i)) in state_dict: - weight = state_dict[key] - res.append(len(weight) // dim) - i += 1 - return res - - class HypernetworkModule(torch.nn.Module): multiplier = 1.0 - layer_structure = None - add_layer_norm = False - def __init__(self, dim, state_dict=None): + def __init__(self, dim, state_dict=None, layer_structure=None, add_layer_norm=False): super().__init__() - if (state_dict is None or 'linear.0.weight' not in state_dict) and self.layer_structure is None: - layer_structure = (1, 2, 1) + if layer_structure is not None: + assert layer_structure[0] == 1, "Multiplier Sequence should start with size 1!" + assert layer_structure[-1] == 1, "Multiplier Sequence should end with size 1!" else: - if self.layer_structure is not None: - assert self.layer_structure[0] == 1, "Multiplier Sequence should start with size 1!" - assert self.layer_structure[-1] == 1, "Multiplier Sequence should end with size 1!" - layer_structure = self.layer_structure - else: - layer_structure = parse_layer_structure(dim, state_dict) + layer_structure = parse_layer_structure(dim, state_dict) linears = [] for i in range(len(layer_structure) - 1): linears.append(torch.nn.Linear(int(dim * layer_structure[i]), int(dim * layer_structure[i+1]))) - if self.add_layer_norm: + if add_layer_norm: linears.append(torch.nn.LayerNorm(int(dim * layer_structure[i+1]))) self.linear = torch.nn.Sequential(*linears) @@ -77,38 +61,47 @@ class HypernetworkModule(torch.nn.Module): return x + self.linear(x) * self.multiplier def trainables(self): - res = [] + layer_structure = [] for layer in self.linear: - res += [layer.weight, layer.bias] - return res + layer_structure += [layer.weight, layer.bias] + return layer_structure def apply_strength(value=None): HypernetworkModule.multiplier = value if value is not None else shared.opts.sd_hypernetwork_strength -def apply_layer_structure(value=None): - HypernetworkModule.layer_structure = value if value is not None else shared.opts.sd_hypernetwork_layer_structure +def parse_layer_structure(dim, state_dict): + i = 0 + layer_structure = [1] + while (key := "linear.{}.weight".format(i)) in state_dict: + weight = state_dict[key] + layer_structure.append(len(weight) // dim) + i += 1 -def apply_layer_norm(value=None): - HypernetworkModule.add_layer_norm = value if value is not None else shared.opts.sd_hypernetwork_add_layer_norm + return layer_structure class Hypernetwork: filename = None name = None - def __init__(self, name=None, enable_sizes=None): + def __init__(self, name=None, enable_sizes=None, layer_structure=None, add_layer_norm=False): self.filename = None self.name = name self.layers = {} self.step = 0 self.sd_checkpoint = None self.sd_checkpoint_name = None + self.layer_structure = layer_structure + self.add_layer_norm = add_layer_norm for size in enable_sizes or []: - self.layers[size] = (HypernetworkModule(size), HypernetworkModule(size)) + self.layers[size] = ( + HypernetworkModule(size, None, self.layer_structure, self.add_layer_norm), + HypernetworkModule(size, None, self.layer_structure, self.add_layer_norm), + ) def weights(self): res = [] @@ -128,6 +121,8 @@ class Hypernetwork: state_dict['step'] = self.step state_dict['name'] = self.name + state_dict['layer_structure'] = self.layer_structure + state_dict['is_layer_norm'] = self.add_layer_norm state_dict['sd_checkpoint'] = self.sd_checkpoint state_dict['sd_checkpoint_name'] = self.sd_checkpoint_name @@ -142,10 +137,15 @@ class Hypernetwork: for size, sd in state_dict.items(): if type(size) == int: - self.layers[size] = (HypernetworkModule(size, sd[0]), HypernetworkModule(size, sd[1])) + self.layers[size] = ( + HypernetworkModule(size, sd[0], state_dict["layer_structure"], state_dict["is_layer_norm"]), + HypernetworkModule(size, sd[1], state_dict["layer_structure"], state_dict["is_layer_norm"]), + ) self.name = state_dict.get('name', self.name) self.step = state_dict.get('step', 0) + self.layer_structure = state_dict.get('layer_structure', None) + self.add_layer_norm = state_dict.get('is_layer_norm', False) self.sd_checkpoint = state_dict.get('sd_checkpoint', None) self.sd_checkpoint_name = state_dict.get('sd_checkpoint_name', None) diff --git a/modules/hypernetworks/ui.py b/modules/hypernetworks/ui.py index dfa599af..7e8ea95e 100644 --- a/modules/hypernetworks/ui.py +++ b/modules/hypernetworks/ui.py @@ -9,11 +9,16 @@ from modules import sd_hijack, shared, devices from modules.hypernetworks import hypernetwork -def create_hypernetwork(name, enable_sizes): +def create_hypernetwork(name, enable_sizes, layer_structure=None, add_layer_norm=False): fn = os.path.join(shared.cmd_opts.hypernetwork_dir, f"{name}.pt") assert not os.path.exists(fn), f"file {fn} already exists" - hypernet = modules.hypernetworks.hypernetwork.Hypernetwork(name=name, enable_sizes=[int(x) for x in enable_sizes]) + hypernet = modules.hypernetworks.hypernetwork.Hypernetwork( + name=name, + enable_sizes=[int(x) for x in enable_sizes], + layer_structure=layer_structure, + add_layer_norm=add_layer_norm, + ) hypernet.save(fn) shared.reload_hypernetworks() -- cgit v1.2.1 From 3770b8d2fa62066d472a04739c7b84bce8538832 Mon Sep 17 00:00:00 2001 From: discus0434 Date: Wed, 19 Oct 2022 15:28:42 +0000 Subject: enable to write layer structure of hn himself --- modules/hypernetworks/ui.py | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'modules/hypernetworks') diff --git a/modules/hypernetworks/ui.py b/modules/hypernetworks/ui.py index 7e8ea95e..08f75f15 100644 --- a/modules/hypernetworks/ui.py +++ b/modules/hypernetworks/ui.py @@ -1,5 +1,6 @@ import html import os +import re import gradio as gr @@ -13,6 +14,9 @@ def create_hypernetwork(name, enable_sizes, layer_structure=None, add_layer_norm fn = os.path.join(shared.cmd_opts.hypernetwork_dir, f"{name}.pt") assert not os.path.exists(fn), f"file {fn} already exists" + if type(layer_structure) == str: + layer_structure = tuple(map(int, re.sub(r'\D', '', layer_structure))) + hypernet = modules.hypernetworks.hypernetwork.Hypernetwork( name=name, enable_sizes=[int(x) for x in enable_sizes], -- cgit v1.2.1 From c6e9fed5003631c87d548e74d6e359678959a453 Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Wed, 19 Oct 2022 19:21:16 +0300 Subject: fix for #3086 failing to load any previous hypernet --- modules/hypernetworks/hypernetwork.py | 60 ++++++++++++++++------------------- 1 file changed, 28 insertions(+), 32 deletions(-) (limited to 'modules/hypernetworks') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 7d519cd9..74300122 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -24,11 +24,10 @@ class HypernetworkModule(torch.nn.Module): def __init__(self, dim, state_dict=None, layer_structure=None, add_layer_norm=False): super().__init__() - if layer_structure is not None: - assert layer_structure[0] == 1, "Multiplier Sequence should start with size 1!" - assert layer_structure[-1] == 1, "Multiplier Sequence should end with size 1!" - else: - layer_structure = parse_layer_structure(dim, state_dict) + + assert layer_structure is not None, "layer_structure mut not be None" + assert layer_structure[0] == 1, "Multiplier Sequence should start with size 1!" + assert layer_structure[-1] == 1, "Multiplier Sequence should end with size 1!" linears = [] for i in range(len(layer_structure) - 1): @@ -39,23 +38,30 @@ class HypernetworkModule(torch.nn.Module): self.linear = torch.nn.Sequential(*linears) if state_dict is not None: - try: - self.load_state_dict(state_dict) - except RuntimeError: - self.try_load_previous(state_dict) + self.fix_old_state_dict(state_dict) + self.load_state_dict(state_dict) else: for layer in self.linear: - layer.weight.data.normal_(mean = 0.0, std = 0.01) + layer.weight.data.normal_(mean=0.0, std=0.01) layer.bias.data.zero_() self.to(devices.device) - def try_load_previous(self, state_dict): - states = self.state_dict() - states['linear.0.bias'].copy_(state_dict['linear1.bias']) - states['linear.0.weight'].copy_(state_dict['linear1.weight']) - states['linear.1.bias'].copy_(state_dict['linear2.bias']) - states['linear.1.weight'].copy_(state_dict['linear2.weight']) + def fix_old_state_dict(self, state_dict): + changes = { + 'linear1.bias': 'linear.0.bias', + 'linear1.weight': 'linear.0.weight', + 'linear2.bias': 'linear.1.bias', + 'linear2.weight': 'linear.1.weight', + } + + for fr, to in changes.items(): + x = state_dict.get(fr, None) + if x is None: + continue + + del state_dict[fr] + state_dict[to] = x def forward(self, x): return x + self.linear(x) * self.multiplier @@ -71,18 +77,6 @@ def apply_strength(value=None): HypernetworkModule.multiplier = value if value is not None else shared.opts.sd_hypernetwork_strength -def parse_layer_structure(dim, state_dict): - i = 0 - layer_structure = [1] - - while (key := "linear.{}.weight".format(i)) in state_dict: - weight = state_dict[key] - layer_structure.append(len(weight) // dim) - i += 1 - - return layer_structure - - class Hypernetwork: filename = None name = None @@ -135,17 +129,18 @@ class Hypernetwork: state_dict = torch.load(filename, map_location='cpu') + self.layer_structure = state_dict.get('layer_structure', [1, 2, 1]) + self.add_layer_norm = state_dict.get('is_layer_norm', False) + for size, sd in state_dict.items(): if type(size) == int: self.layers[size] = ( - HypernetworkModule(size, sd[0], state_dict["layer_structure"], state_dict["is_layer_norm"]), - HypernetworkModule(size, sd[1], state_dict["layer_structure"], state_dict["is_layer_norm"]), + HypernetworkModule(size, sd[0], self.layer_structure, self.add_layer_norm), + HypernetworkModule(size, sd[1], self.layer_structure, self.add_layer_norm), ) self.name = state_dict.get('name', self.name) self.step = state_dict.get('step', 0) - self.layer_structure = state_dict.get('layer_structure', None) - self.add_layer_norm = state_dict.get('is_layer_norm', False) self.sd_checkpoint = state_dict.get('sd_checkpoint', None) self.sd_checkpoint_name = state_dict.get('sd_checkpoint_name', None) @@ -244,6 +239,7 @@ def stack_conds(conds): return torch.stack(conds) + def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log_directory, training_width, training_height, steps, create_image_every, save_hypernetwork_every, template_file, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height): assert hypernetwork_name, 'hypernetwork not selected' -- cgit v1.2.1 From 2ce52d32e41fb523d1494f45073fd18496e52d35 Mon Sep 17 00:00:00 2001 From: discus0434 Date: Wed, 19 Oct 2022 16:31:12 +0000 Subject: fix for #3086 failing to load any previous hypernet --- modules/hypernetworks/hypernetwork.py | 60 ++++++++++++++++------------------- 1 file changed, 28 insertions(+), 32 deletions(-) (limited to 'modules/hypernetworks') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 7d519cd9..74300122 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -24,11 +24,10 @@ class HypernetworkModule(torch.nn.Module): def __init__(self, dim, state_dict=None, layer_structure=None, add_layer_norm=False): super().__init__() - if layer_structure is not None: - assert layer_structure[0] == 1, "Multiplier Sequence should start with size 1!" - assert layer_structure[-1] == 1, "Multiplier Sequence should end with size 1!" - else: - layer_structure = parse_layer_structure(dim, state_dict) + + assert layer_structure is not None, "layer_structure mut not be None" + assert layer_structure[0] == 1, "Multiplier Sequence should start with size 1!" + assert layer_structure[-1] == 1, "Multiplier Sequence should end with size 1!" linears = [] for i in range(len(layer_structure) - 1): @@ -39,23 +38,30 @@ class HypernetworkModule(torch.nn.Module): self.linear = torch.nn.Sequential(*linears) if state_dict is not None: - try: - self.load_state_dict(state_dict) - except RuntimeError: - self.try_load_previous(state_dict) + self.fix_old_state_dict(state_dict) + self.load_state_dict(state_dict) else: for layer in self.linear: - layer.weight.data.normal_(mean = 0.0, std = 0.01) + layer.weight.data.normal_(mean=0.0, std=0.01) layer.bias.data.zero_() self.to(devices.device) - def try_load_previous(self, state_dict): - states = self.state_dict() - states['linear.0.bias'].copy_(state_dict['linear1.bias']) - states['linear.0.weight'].copy_(state_dict['linear1.weight']) - states['linear.1.bias'].copy_(state_dict['linear2.bias']) - states['linear.1.weight'].copy_(state_dict['linear2.weight']) + def fix_old_state_dict(self, state_dict): + changes = { + 'linear1.bias': 'linear.0.bias', + 'linear1.weight': 'linear.0.weight', + 'linear2.bias': 'linear.1.bias', + 'linear2.weight': 'linear.1.weight', + } + + for fr, to in changes.items(): + x = state_dict.get(fr, None) + if x is None: + continue + + del state_dict[fr] + state_dict[to] = x def forward(self, x): return x + self.linear(x) * self.multiplier @@ -71,18 +77,6 @@ def apply_strength(value=None): HypernetworkModule.multiplier = value if value is not None else shared.opts.sd_hypernetwork_strength -def parse_layer_structure(dim, state_dict): - i = 0 - layer_structure = [1] - - while (key := "linear.{}.weight".format(i)) in state_dict: - weight = state_dict[key] - layer_structure.append(len(weight) // dim) - i += 1 - - return layer_structure - - class Hypernetwork: filename = None name = None @@ -135,17 +129,18 @@ class Hypernetwork: state_dict = torch.load(filename, map_location='cpu') + self.layer_structure = state_dict.get('layer_structure', [1, 2, 1]) + self.add_layer_norm = state_dict.get('is_layer_norm', False) + for size, sd in state_dict.items(): if type(size) == int: self.layers[size] = ( - HypernetworkModule(size, sd[0], state_dict["layer_structure"], state_dict["is_layer_norm"]), - HypernetworkModule(size, sd[1], state_dict["layer_structure"], state_dict["is_layer_norm"]), + HypernetworkModule(size, sd[0], self.layer_structure, self.add_layer_norm), + HypernetworkModule(size, sd[1], self.layer_structure, self.add_layer_norm), ) self.name = state_dict.get('name', self.name) self.step = state_dict.get('step', 0) - self.layer_structure = state_dict.get('layer_structure', None) - self.add_layer_norm = state_dict.get('is_layer_norm', False) self.sd_checkpoint = state_dict.get('sd_checkpoint', None) self.sd_checkpoint_name = state_dict.get('sd_checkpoint_name', None) @@ -244,6 +239,7 @@ def stack_conds(conds): return torch.stack(conds) + def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log_directory, training_width, training_height, steps, create_image_every, save_hypernetwork_every, template_file, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height): assert hypernetwork_name, 'hypernetwork not selected' -- cgit v1.2.1 From d6ea5841374a28f3f6deb73abc251c8f0bcb240f Mon Sep 17 00:00:00 2001 From: DepFA <35278260+dfaker@users.noreply.github.com> Date: Thu, 20 Oct 2022 00:07:57 +0100 Subject: change html output --- modules/hypernetworks/hypernetwork.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'modules/hypernetworks') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 7d519cd9..73c1cb80 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -380,7 +380,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log Loss: {mean_loss:.7f}
Step: {hypernetwork.step}
Last prompt: {html.escape(entries[0].cond_text)}
-Last saved embedding: {html.escape(last_saved_file)}
+Last saved hypernetwork: {html.escape(last_saved_file)}
Last saved image: {html.escape(last_saved_image)}

""" -- cgit v1.2.1 From 166be3919b817cee5e702fd01c34afe9081b952c Mon Sep 17 00:00:00 2001 From: DepFA <35278260+dfaker@users.noreply.github.com> Date: Thu, 20 Oct 2022 00:09:40 +0100 Subject: allow overwrite old hn --- modules/hypernetworks/ui.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'modules/hypernetworks') diff --git a/modules/hypernetworks/ui.py b/modules/hypernetworks/ui.py index 08f75f15..f45345ea 100644 --- a/modules/hypernetworks/ui.py +++ b/modules/hypernetworks/ui.py @@ -10,9 +10,10 @@ from modules import sd_hijack, shared, devices from modules.hypernetworks import hypernetwork -def create_hypernetwork(name, enable_sizes, layer_structure=None, add_layer_norm=False): +def create_hypernetwork(name, enable_sizes, overwrite_old, layer_structure=None, add_layer_norm=False): fn = os.path.join(shared.cmd_opts.hypernetwork_dir, f"{name}.pt") - assert not os.path.exists(fn), f"file {fn} already exists" + if not overwrite_old: + assert not os.path.exists(fn), f"file {fn} already exists" if type(layer_structure) == str: layer_structure = tuple(map(int, re.sub(r'\D', '', layer_structure))) -- cgit v1.2.1 From 6f98e89486f55b0e4657e96ce640cf1c4675d187 Mon Sep 17 00:00:00 2001 From: discus0434 Date: Thu, 20 Oct 2022 00:10:45 +0000 Subject: update --- modules/hypernetworks/hypernetwork.py | 29 +++++++++++++++++++---------- modules/hypernetworks/ui.py | 3 ++- 2 files changed, 21 insertions(+), 11 deletions(-) (limited to 'modules/hypernetworks') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 74300122..7d617680 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -22,16 +22,20 @@ from modules.textual_inversion.learn_schedule import LearnRateScheduler class HypernetworkModule(torch.nn.Module): multiplier = 1.0 - def __init__(self, dim, state_dict=None, layer_structure=None, add_layer_norm=False): + def __init__(self, dim, state_dict=None, layer_structure=None, add_layer_norm=False, activation_func=None): super().__init__() - assert layer_structure is not None, "layer_structure mut not be None" + assert layer_structure is not None, "layer_structure must not be None" assert layer_structure[0] == 1, "Multiplier Sequence should start with size 1!" assert layer_structure[-1] == 1, "Multiplier Sequence should end with size 1!" linears = [] for i in range(len(layer_structure) - 1): linears.append(torch.nn.Linear(int(dim * layer_structure[i]), int(dim * layer_structure[i+1]))) + if activation_func == "relu": + linears.append(torch.nn.ReLU()) + if activation_func == "leakyrelu": + linears.append(torch.nn.LeakyReLU()) if add_layer_norm: linears.append(torch.nn.LayerNorm(int(dim * layer_structure[i+1]))) @@ -42,8 +46,9 @@ class HypernetworkModule(torch.nn.Module): self.load_state_dict(state_dict) else: for layer in self.linear: - layer.weight.data.normal_(mean=0.0, std=0.01) - layer.bias.data.zero_() + if not "ReLU" in layer.__str__(): + layer.weight.data.normal_(mean=0.0, std=0.01) + layer.bias.data.zero_() self.to(devices.device) @@ -69,7 +74,8 @@ class HypernetworkModule(torch.nn.Module): def trainables(self): layer_structure = [] for layer in self.linear: - layer_structure += [layer.weight, layer.bias] + if not "ReLU" in layer.__str__(): + layer_structure += [layer.weight, layer.bias] return layer_structure @@ -81,7 +87,7 @@ class Hypernetwork: filename = None name = None - def __init__(self, name=None, enable_sizes=None, layer_structure=None, add_layer_norm=False): + def __init__(self, name=None, enable_sizes=None, layer_structure=None, add_layer_norm=False, activation_func=None): self.filename = None self.name = name self.layers = {} @@ -90,11 +96,12 @@ class Hypernetwork: self.sd_checkpoint_name = None self.layer_structure = layer_structure self.add_layer_norm = add_layer_norm + self.activation_func = activation_func for size in enable_sizes or []: self.layers[size] = ( - HypernetworkModule(size, None, self.layer_structure, self.add_layer_norm), - HypernetworkModule(size, None, self.layer_structure, self.add_layer_norm), + HypernetworkModule(size, None, self.layer_structure, self.add_layer_norm, self.activation_func), + HypernetworkModule(size, None, self.layer_structure, self.add_layer_norm, self.activation_func), ) def weights(self): @@ -117,6 +124,7 @@ class Hypernetwork: state_dict['name'] = self.name state_dict['layer_structure'] = self.layer_structure state_dict['is_layer_norm'] = self.add_layer_norm + state_dict['activation_func'] = self.activation_func state_dict['sd_checkpoint'] = self.sd_checkpoint state_dict['sd_checkpoint_name'] = self.sd_checkpoint_name @@ -131,12 +139,13 @@ class Hypernetwork: self.layer_structure = state_dict.get('layer_structure', [1, 2, 1]) self.add_layer_norm = state_dict.get('is_layer_norm', False) + self.activation_func = state_dict.get('activation_func', None) for size, sd in state_dict.items(): if type(size) == int: self.layers[size] = ( - HypernetworkModule(size, sd[0], self.layer_structure, self.add_layer_norm), - HypernetworkModule(size, sd[1], self.layer_structure, self.add_layer_norm), + HypernetworkModule(size, sd[0], self.layer_structure, self.add_layer_norm, self.activation_func), + HypernetworkModule(size, sd[1], self.layer_structure, self.add_layer_norm, self.activation_func), ) self.name = state_dict.get('name', self.name) diff --git a/modules/hypernetworks/ui.py b/modules/hypernetworks/ui.py index 08f75f15..83f9547b 100644 --- a/modules/hypernetworks/ui.py +++ b/modules/hypernetworks/ui.py @@ -10,7 +10,7 @@ from modules import sd_hijack, shared, devices from modules.hypernetworks import hypernetwork -def create_hypernetwork(name, enable_sizes, layer_structure=None, add_layer_norm=False): +def create_hypernetwork(name, enable_sizes, layer_structure=None, add_layer_norm=False, activation_func=None): fn = os.path.join(shared.cmd_opts.hypernetwork_dir, f"{name}.pt") assert not os.path.exists(fn), f"file {fn} already exists" @@ -22,6 +22,7 @@ def create_hypernetwork(name, enable_sizes, layer_structure=None, add_layer_norm enable_sizes=[int(x) for x in enable_sizes], layer_structure=layer_structure, add_layer_norm=add_layer_norm, + activation_func=activation_func, ) hypernet.save(fn) -- cgit v1.2.1 From 930b4c64f7dbce6918894d53538003e5959fd022 Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Thu, 20 Oct 2022 08:18:02 +0300 Subject: allow float sizes for hypernet's layer_structure --- modules/hypernetworks/ui.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'modules/hypernetworks') diff --git a/modules/hypernetworks/ui.py b/modules/hypernetworks/ui.py index 08f75f15..e0741d08 100644 --- a/modules/hypernetworks/ui.py +++ b/modules/hypernetworks/ui.py @@ -15,7 +15,7 @@ def create_hypernetwork(name, enable_sizes, layer_structure=None, add_layer_norm assert not os.path.exists(fn), f"file {fn} already exists" if type(layer_structure) == str: - layer_structure = tuple(map(int, re.sub(r'\D', '', layer_structure))) + layer_structure = [float(x.strip()) for x in layer_structure.split(",")] hypernet = modules.hypernetworks.hypernetwork.Hypernetwork( name=name, -- cgit v1.2.1 From d8acd34f66ab35a91f10d66330bcc95a83bfcac6 Mon Sep 17 00:00:00 2001 From: AngelBottomless <35677394+aria1th@users.noreply.github.com> Date: Thu, 20 Oct 2022 23:43:03 +0900 Subject: generalized some functions and option for ignoring first layer --- modules/hypernetworks/hypernetwork.py | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) (limited to 'modules/hypernetworks') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 7d617680..3a44b377 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -21,21 +21,27 @@ from modules.textual_inversion.learn_schedule import LearnRateScheduler class HypernetworkModule(torch.nn.Module): multiplier = 1.0 - + activation_dict = {"relu": torch.nn.ReLU, "leakyrelu": torch.nn.LeakyReLU, "elu": torch.nn.ELU, + "swish": torch.nn.Hardswish} + def __init__(self, dim, state_dict=None, layer_structure=None, add_layer_norm=False, activation_func=None): super().__init__() assert layer_structure is not None, "layer_structure must not be None" assert layer_structure[0] == 1, "Multiplier Sequence should start with size 1!" assert layer_structure[-1] == 1, "Multiplier Sequence should end with size 1!" - + linears = [] for i in range(len(layer_structure) - 1): linears.append(torch.nn.Linear(int(dim * layer_structure[i]), int(dim * layer_structure[i+1]))) - if activation_func == "relu": - linears.append(torch.nn.ReLU()) - if activation_func == "leakyrelu": - linears.append(torch.nn.LeakyReLU()) + # if skip_first_layer because first parameters potentially contain negative values + if i < 1: continue + if activation_func in HypernetworkModule.activation_dict: + linears.append(HypernetworkModule.activation_dict[activation_func]()) + else: + print("Invalid key {} encountered as activation function!".format(activation_func)) + # if use_dropout: + linears.append(torch.nn.Dropout(p=0.3)) if add_layer_norm: linears.append(torch.nn.LayerNorm(int(dim * layer_structure[i+1]))) @@ -46,7 +52,7 @@ class HypernetworkModule(torch.nn.Module): self.load_state_dict(state_dict) else: for layer in self.linear: - if not "ReLU" in layer.__str__(): + if isinstance(layer, torch.nn.Linear): layer.weight.data.normal_(mean=0.0, std=0.01) layer.bias.data.zero_() @@ -298,7 +304,8 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log return hypernetwork, filename scheduler = LearnRateScheduler(learn_rate, steps, ititial_step) - optimizer = torch.optim.AdamW(weights, lr=scheduler.learn_rate) + # if optimizer == "Adam": or else Adam / AdamW / etc... + optimizer = torch.optim.Adam(weights, lr=scheduler.learn_rate) pbar = tqdm.tqdm(enumerate(ds), total=steps - ititial_step) for i, entries in pbar: -- cgit v1.2.1 From a71e0212363979c7cbbb797c9fbd5f8cd03b29d3 Mon Sep 17 00:00:00 2001 From: AngelBottomless <35677394+aria1th@users.noreply.github.com> Date: Thu, 20 Oct 2022 23:48:52 +0900 Subject: only linear --- modules/hypernetworks/hypernetwork.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'modules/hypernetworks') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 3a44b377..905cbeef 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -35,13 +35,13 @@ class HypernetworkModule(torch.nn.Module): for i in range(len(layer_structure) - 1): linears.append(torch.nn.Linear(int(dim * layer_structure[i]), int(dim * layer_structure[i+1]))) # if skip_first_layer because first parameters potentially contain negative values - if i < 1: continue + # if i < 1: continue if activation_func in HypernetworkModule.activation_dict: linears.append(HypernetworkModule.activation_dict[activation_func]()) else: print("Invalid key {} encountered as activation function!".format(activation_func)) # if use_dropout: - linears.append(torch.nn.Dropout(p=0.3)) + # linears.append(torch.nn.Dropout(p=0.3)) if add_layer_norm: linears.append(torch.nn.LayerNorm(int(dim * layer_structure[i+1]))) @@ -80,7 +80,7 @@ class HypernetworkModule(torch.nn.Module): def trainables(self): layer_structure = [] for layer in self.linear: - if not "ReLU" in layer.__str__(): + if isinstance(layer, torch.nn.Linear): layer_structure += [layer.weight, layer.bias] return layer_structure @@ -304,8 +304,8 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log return hypernetwork, filename scheduler = LearnRateScheduler(learn_rate, steps, ititial_step) - # if optimizer == "Adam": or else Adam / AdamW / etc... - optimizer = torch.optim.Adam(weights, lr=scheduler.learn_rate) + # if optimizer == "AdamW": or else Adam / AdamW / SGD, etc... + optimizer = torch.optim.AdamW(weights, lr=scheduler.learn_rate) pbar = tqdm.tqdm(enumerate(ds), total=steps - ititial_step) for i, entries in pbar: -- cgit v1.2.1 From 108be15500aac590b4e00420635d7b61fccfa530 Mon Sep 17 00:00:00 2001 From: AngelBottomless <35677394+aria1th@users.noreply.github.com> Date: Fri, 21 Oct 2022 01:00:41 +0900 Subject: fix bugs and optimizations --- modules/hypernetworks/hypernetwork.py | 105 +++++++++++++++++++--------------- 1 file changed, 59 insertions(+), 46 deletions(-) (limited to 'modules/hypernetworks') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 905cbeef..893ba110 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -36,14 +36,14 @@ class HypernetworkModule(torch.nn.Module): linears.append(torch.nn.Linear(int(dim * layer_structure[i]), int(dim * layer_structure[i+1]))) # if skip_first_layer because first parameters potentially contain negative values # if i < 1: continue + if add_layer_norm: + linears.append(torch.nn.LayerNorm(int(dim * layer_structure[i+1]))) if activation_func in HypernetworkModule.activation_dict: linears.append(HypernetworkModule.activation_dict[activation_func]()) else: print("Invalid key {} encountered as activation function!".format(activation_func)) # if use_dropout: # linears.append(torch.nn.Dropout(p=0.3)) - if add_layer_norm: - linears.append(torch.nn.LayerNorm(int(dim * layer_structure[i+1]))) self.linear = torch.nn.Sequential(*linears) @@ -115,11 +115,24 @@ class Hypernetwork: for k, layers in self.layers.items(): for layer in layers: - layer.train() res += layer.trainables() return res + def eval(self): + for k, layers in self.layers.items(): + for layer in layers: + layer.eval() + for items in self.weights(): + items.requires_grad = False + + def train(self): + for k, layers in self.layers.items(): + for layer in layers: + layer.train() + for items in self.weights(): + items.requires_grad = True + def save(self, filename): state_dict = {} @@ -290,10 +303,6 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log shared.sd_model.first_stage_model.to(devices.cpu) hypernetwork = shared.loaded_hypernetwork - weights = hypernetwork.weights() - for weight in weights: - weight.requires_grad = True - losses = torch.zeros((32,)) last_saved_file = "" @@ -304,10 +313,10 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log return hypernetwork, filename scheduler = LearnRateScheduler(learn_rate, steps, ititial_step) - # if optimizer == "AdamW": or else Adam / AdamW / SGD, etc... - optimizer = torch.optim.AdamW(weights, lr=scheduler.learn_rate) + optimizer = torch.optim.AdamW(hypernetwork.weights(), lr=scheduler.learn_rate) pbar = tqdm.tqdm(enumerate(ds), total=steps - ititial_step) + hypernetwork.train() for i, entries in pbar: hypernetwork.step = i + ititial_step @@ -328,8 +337,9 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log losses[hypernetwork.step % losses.shape[0]] = loss.item() - optimizer.zero_grad() + optimizer.zero_grad(set_to_none=True) loss.backward() + del loss optimizer.step() mean_loss = losses.mean() if torch.isnan(mean_loss): @@ -346,44 +356,47 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log }) if hypernetwork.step > 0 and images_dir is not None and hypernetwork.step % create_image_every == 0: + torch.cuda.empty_cache() last_saved_image = os.path.join(images_dir, f'{hypernetwork_name}-{hypernetwork.step}.png') + with torch.no_grad(): + hypernetwork.eval() + shared.sd_model.cond_stage_model.to(devices.device) + shared.sd_model.first_stage_model.to(devices.device) + + p = processing.StableDiffusionProcessingTxt2Img( + sd_model=shared.sd_model, + do_not_save_grid=True, + do_not_save_samples=True, + ) - optimizer.zero_grad() - shared.sd_model.cond_stage_model.to(devices.device) - shared.sd_model.first_stage_model.to(devices.device) - - p = processing.StableDiffusionProcessingTxt2Img( - sd_model=shared.sd_model, - do_not_save_grid=True, - do_not_save_samples=True, - ) - - if preview_from_txt2img: - p.prompt = preview_prompt - p.negative_prompt = preview_negative_prompt - p.steps = preview_steps - p.sampler_index = preview_sampler_index - p.cfg_scale = preview_cfg_scale - p.seed = preview_seed - p.width = preview_width - p.height = preview_height - else: - p.prompt = entries[0].cond_text - p.steps = 20 - - preview_text = p.prompt - - processed = processing.process_images(p) - image = processed.images[0] if len(processed.images)>0 else None - - if unload: - shared.sd_model.cond_stage_model.to(devices.cpu) - shared.sd_model.first_stage_model.to(devices.cpu) - - if image is not None: - shared.state.current_image = image - image.save(last_saved_image) - last_saved_image += f", prompt: {preview_text}" + if preview_from_txt2img: + p.prompt = preview_prompt + p.negative_prompt = preview_negative_prompt + p.steps = preview_steps + p.sampler_index = preview_sampler_index + p.cfg_scale = preview_cfg_scale + p.seed = preview_seed + p.width = preview_width + p.height = preview_height + else: + p.prompt = entries[0].cond_text + p.steps = 20 + + preview_text = p.prompt + + processed = processing.process_images(p) + image = processed.images[0] if len(processed.images)>0 else None + + if unload: + shared.sd_model.cond_stage_model.to(devices.cpu) + shared.sd_model.first_stage_model.to(devices.cpu) + + if image is not None: + shared.state.current_image = image + image.save(last_saved_image) + last_saved_image += f", prompt: {preview_text}" + + hypernetwork.train() shared.state.job_no = hypernetwork.step -- cgit v1.2.1 From f89829ec3a0baceb445451ad98d4fb4323e922aa Mon Sep 17 00:00:00 2001 From: aria1th <35677394+aria1th@users.noreply.github.com> Date: Fri, 21 Oct 2022 01:37:11 +0900 Subject: Revert "fix bugs and optimizations" This reverts commit 108be15500aac590b4e00420635d7b61fccfa530. --- modules/hypernetworks/hypernetwork.py | 105 +++++++++++++++------------------- 1 file changed, 46 insertions(+), 59 deletions(-) (limited to 'modules/hypernetworks') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 893ba110..905cbeef 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -36,14 +36,14 @@ class HypernetworkModule(torch.nn.Module): linears.append(torch.nn.Linear(int(dim * layer_structure[i]), int(dim * layer_structure[i+1]))) # if skip_first_layer because first parameters potentially contain negative values # if i < 1: continue - if add_layer_norm: - linears.append(torch.nn.LayerNorm(int(dim * layer_structure[i+1]))) if activation_func in HypernetworkModule.activation_dict: linears.append(HypernetworkModule.activation_dict[activation_func]()) else: print("Invalid key {} encountered as activation function!".format(activation_func)) # if use_dropout: # linears.append(torch.nn.Dropout(p=0.3)) + if add_layer_norm: + linears.append(torch.nn.LayerNorm(int(dim * layer_structure[i+1]))) self.linear = torch.nn.Sequential(*linears) @@ -115,24 +115,11 @@ class Hypernetwork: for k, layers in self.layers.items(): for layer in layers: + layer.train() res += layer.trainables() return res - def eval(self): - for k, layers in self.layers.items(): - for layer in layers: - layer.eval() - for items in self.weights(): - items.requires_grad = False - - def train(self): - for k, layers in self.layers.items(): - for layer in layers: - layer.train() - for items in self.weights(): - items.requires_grad = True - def save(self, filename): state_dict = {} @@ -303,6 +290,10 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log shared.sd_model.first_stage_model.to(devices.cpu) hypernetwork = shared.loaded_hypernetwork + weights = hypernetwork.weights() + for weight in weights: + weight.requires_grad = True + losses = torch.zeros((32,)) last_saved_file = "" @@ -313,10 +304,10 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log return hypernetwork, filename scheduler = LearnRateScheduler(learn_rate, steps, ititial_step) - optimizer = torch.optim.AdamW(hypernetwork.weights(), lr=scheduler.learn_rate) + # if optimizer == "AdamW": or else Adam / AdamW / SGD, etc... + optimizer = torch.optim.AdamW(weights, lr=scheduler.learn_rate) pbar = tqdm.tqdm(enumerate(ds), total=steps - ititial_step) - hypernetwork.train() for i, entries in pbar: hypernetwork.step = i + ititial_step @@ -337,9 +328,8 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log losses[hypernetwork.step % losses.shape[0]] = loss.item() - optimizer.zero_grad(set_to_none=True) + optimizer.zero_grad() loss.backward() - del loss optimizer.step() mean_loss = losses.mean() if torch.isnan(mean_loss): @@ -356,47 +346,44 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log }) if hypernetwork.step > 0 and images_dir is not None and hypernetwork.step % create_image_every == 0: - torch.cuda.empty_cache() last_saved_image = os.path.join(images_dir, f'{hypernetwork_name}-{hypernetwork.step}.png') - with torch.no_grad(): - hypernetwork.eval() - shared.sd_model.cond_stage_model.to(devices.device) - shared.sd_model.first_stage_model.to(devices.device) - - p = processing.StableDiffusionProcessingTxt2Img( - sd_model=shared.sd_model, - do_not_save_grid=True, - do_not_save_samples=True, - ) - if preview_from_txt2img: - p.prompt = preview_prompt - p.negative_prompt = preview_negative_prompt - p.steps = preview_steps - p.sampler_index = preview_sampler_index - p.cfg_scale = preview_cfg_scale - p.seed = preview_seed - p.width = preview_width - p.height = preview_height - else: - p.prompt = entries[0].cond_text - p.steps = 20 - - preview_text = p.prompt - - processed = processing.process_images(p) - image = processed.images[0] if len(processed.images)>0 else None - - if unload: - shared.sd_model.cond_stage_model.to(devices.cpu) - shared.sd_model.first_stage_model.to(devices.cpu) - - if image is not None: - shared.state.current_image = image - image.save(last_saved_image) - last_saved_image += f", prompt: {preview_text}" - - hypernetwork.train() + optimizer.zero_grad() + shared.sd_model.cond_stage_model.to(devices.device) + shared.sd_model.first_stage_model.to(devices.device) + + p = processing.StableDiffusionProcessingTxt2Img( + sd_model=shared.sd_model, + do_not_save_grid=True, + do_not_save_samples=True, + ) + + if preview_from_txt2img: + p.prompt = preview_prompt + p.negative_prompt = preview_negative_prompt + p.steps = preview_steps + p.sampler_index = preview_sampler_index + p.cfg_scale = preview_cfg_scale + p.seed = preview_seed + p.width = preview_width + p.height = preview_height + else: + p.prompt = entries[0].cond_text + p.steps = 20 + + preview_text = p.prompt + + processed = processing.process_images(p) + image = processed.images[0] if len(processed.images)>0 else None + + if unload: + shared.sd_model.cond_stage_model.to(devices.cpu) + shared.sd_model.first_stage_model.to(devices.cpu) + + if image is not None: + shared.state.current_image = image + image.save(last_saved_image) + last_saved_image += f", prompt: {preview_text}" shared.state.job_no = hypernetwork.step -- cgit v1.2.1 From c23f666dba2b484d521d2dc4be91cf9e09312647 Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Fri, 21 Oct 2022 09:47:43 +0300 Subject: a more strict check for activation type and a more reasonable check for type of layer in hypernets --- modules/hypernetworks/hypernetwork.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'modules/hypernetworks') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 7d617680..84e7e350 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -32,10 +32,16 @@ class HypernetworkModule(torch.nn.Module): linears = [] for i in range(len(layer_structure) - 1): linears.append(torch.nn.Linear(int(dim * layer_structure[i]), int(dim * layer_structure[i+1]))) + if activation_func == "relu": linears.append(torch.nn.ReLU()) - if activation_func == "leakyrelu": + elif activation_func == "leakyrelu": linears.append(torch.nn.LeakyReLU()) + elif activation_func == 'linear' or activation_func is None: + pass + else: + raise RuntimeError(f'hypernetwork uses an unsupported activation function: {activation_func}') + if add_layer_norm: linears.append(torch.nn.LayerNorm(int(dim * layer_structure[i+1]))) @@ -46,7 +52,7 @@ class HypernetworkModule(torch.nn.Module): self.load_state_dict(state_dict) else: for layer in self.linear: - if not "ReLU" in layer.__str__(): + if type(layer) == torch.nn.Linear: layer.weight.data.normal_(mean=0.0, std=0.01) layer.bias.data.zero_() @@ -74,7 +80,7 @@ class HypernetworkModule(torch.nn.Module): def trainables(self): layer_structure = [] for layer in self.linear: - if not "ReLU" in layer.__str__(): + if type(layer) == torch.nn.Linear: layer_structure += [layer.weight, layer.bias] return layer_structure -- cgit v1.2.1 From 5245c7a4935f67b677da0f5a1fc2b74c074aa0e2 Mon Sep 17 00:00:00 2001 From: timntorres Date: Wed, 19 Oct 2022 12:21:32 -0700 Subject: Issue #2921-Give PNG info to Hypernet previews. --- modules/hypernetworks/hypernetwork.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'modules/hypernetworks') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 84e7e350..68c8f26d 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -256,6 +256,9 @@ def stack_conds(conds): def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log_directory, training_width, training_height, steps, create_image_every, save_hypernetwork_every, template_file, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height): + # images is required here to give training previews their infotext. Importing this at the very top causes a circular dependency. + from modules import images + assert hypernetwork_name, 'hypernetwork not selected' path = shared.hypernetworks.get(hypernetwork_name, None) @@ -298,6 +301,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log last_saved_file = "" last_saved_image = "" + forced_filename = "" ititial_step = hypernetwork.step or 0 if ititial_step > steps: @@ -345,7 +349,8 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log }) if hypernetwork.step > 0 and images_dir is not None and hypernetwork.step % create_image_every == 0: - last_saved_image = os.path.join(images_dir, f'{hypernetwork_name}-{hypernetwork.step}.png') + forced_filename = f'{hypernetwork_name}-{hypernetwork.step}' + last_saved_image = os.path.join(images_dir, forced_filename) optimizer.zero_grad() shared.sd_model.cond_stage_model.to(devices.device) @@ -381,7 +386,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log if image is not None: shared.state.current_image = image - image.save(last_saved_image) + last_saved_image, last_text_info = images.save_image(image, images_dir, "", p.seed, p.prompt, shared.opts.samples_format, processed.infotexts[0], p=p, forced_filename=forced_filename) last_saved_image += f", prompt: {preview_text}" shared.state.job_no = hypernetwork.step -- cgit v1.2.1 From 4ff274e1e35bb642687253ce744d2cfa738ab293 Mon Sep 17 00:00:00 2001 From: timntorres Date: Wed, 19 Oct 2022 12:32:22 -0700 Subject: Revise comments. --- modules/hypernetworks/hypernetwork.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'modules/hypernetworks') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 68c8f26d..3f96361c 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -256,7 +256,7 @@ def stack_conds(conds): def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log_directory, training_width, training_height, steps, create_image_every, save_hypernetwork_every, template_file, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height): - # images is required here to give training previews their infotext. Importing this at the very top causes a circular dependency. + # images allows training previews to have infotext. Importing it at the top causes a circular import problem. from modules import images assert hypernetwork_name, 'hypernetwork not selected' -- cgit v1.2.1 From 03a1e288c4973dd2dff57a97469b40f146b6fccf Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Fri, 21 Oct 2022 10:13:24 +0300 Subject: turns out LayerNorm also has weight and bias and needs to be pre-multiplied and trained for hypernets --- modules/hypernetworks/hypernetwork.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'modules/hypernetworks') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 3274a802..b1a5d0c7 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -52,7 +52,7 @@ class HypernetworkModule(torch.nn.Module): self.load_state_dict(state_dict) else: for layer in self.linear: - if type(layer) == torch.nn.Linear: + if type(layer) == torch.nn.Linear or type(layer) == torch.nn.LayerNorm: layer.weight.data.normal_(mean=0.0, std=0.01) layer.bias.data.zero_() @@ -80,7 +80,7 @@ class HypernetworkModule(torch.nn.Module): def trainables(self): layer_structure = [] for layer in self.linear: - if type(layer) == torch.nn.Linear: + if type(layer) == torch.nn.Linear or type(layer) == torch.nn.LayerNorm: layer_structure += [layer.weight, layer.bias] return layer_structure -- cgit v1.2.1 From 51e3dc9ccad157d7161b697a246e26c868d46a7c Mon Sep 17 00:00:00 2001 From: timntorres Date: Fri, 21 Oct 2022 02:11:12 -0700 Subject: Sanitize hypernet name input. --- modules/hypernetworks/ui.py | 3 +++ 1 file changed, 3 insertions(+) (limited to 'modules/hypernetworks') diff --git a/modules/hypernetworks/ui.py b/modules/hypernetworks/ui.py index 266f04f6..e6f50a1f 100644 --- a/modules/hypernetworks/ui.py +++ b/modules/hypernetworks/ui.py @@ -11,6 +11,9 @@ from modules.hypernetworks import hypernetwork def create_hypernetwork(name, enable_sizes, overwrite_old, layer_structure=None, add_layer_norm=False, activation_func=None): + # Remove illegal characters from name. + name = "".join( x for x in name if (x.isalnum() or x in "._- ")) + fn = os.path.join(shared.cmd_opts.hypernetwork_dir, f"{name}.pt") if not overwrite_old: assert not os.path.exists(fn), f"file {fn} already exists" -- cgit v1.2.1 From 19818f023cfafc472c6c241cab0b72896a168481 Mon Sep 17 00:00:00 2001 From: timntorres Date: Fri, 21 Oct 2022 02:14:02 -0700 Subject: Match hypernet name with filename in all cases. --- modules/hypernetworks/hypernetwork.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'modules/hypernetworks') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index b1a5d0c7..6d392be4 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -340,7 +340,10 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log pbar.set_description(f"loss: {mean_loss:.7f}") if hypernetwork.step > 0 and hypernetwork_dir is not None and hypernetwork.step % save_hypernetwork_every == 0: - last_saved_file = os.path.join(hypernetwork_dir, f'{hypernetwork_name}-{hypernetwork.step}.pt') + temp = hypernetwork.name + # Before saving, change name to match current checkpoint. + hypernetwork.name = f'{hypernetwork_name}-{hypernetwork.step}' + last_saved_file = os.path.join(hypernetwork_dir, f'{hypernetwork.name}.pt') hypernetwork.save(last_saved_file) textual_inversion.write_loss(log_directory, "hypernetwork_loss.csv", hypernetwork.step, len(ds), { @@ -405,6 +408,9 @@ Last saved image: {html.escape(last_saved_image)}
hypernetwork.sd_checkpoint = checkpoint.hash hypernetwork.sd_checkpoint_name = checkpoint.model_name + # Before saving for the last time, change name back to the base name (as opposed to the save_hypernetwork_every step-suffixed naming convention). + hypernetwork.name = hypernetwork_name + filename = os.path.join(shared.cmd_opts.hypernetwork_dir, f'{hypernetwork.name}.pt') hypernetwork.save(filename) return hypernetwork, filename -- cgit v1.2.1 From 272fa527bbe93143668ffc16838107b7dca35b40 Mon Sep 17 00:00:00 2001 From: timntorres Date: Fri, 21 Oct 2022 02:41:55 -0700 Subject: Remove unused variable. --- modules/hypernetworks/hypernetwork.py | 1 - 1 file changed, 1 deletion(-) (limited to 'modules/hypernetworks') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 6d392be4..47d91ea5 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -340,7 +340,6 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log pbar.set_description(f"loss: {mean_loss:.7f}") if hypernetwork.step > 0 and hypernetwork_dir is not None and hypernetwork.step % save_hypernetwork_every == 0: - temp = hypernetwork.name # Before saving, change name to match current checkpoint. hypernetwork.name = f'{hypernetwork_name}-{hypernetwork.step}' last_saved_file = os.path.join(hypernetwork_dir, f'{hypernetwork.name}.pt') -- cgit v1.2.1 From 0e8ca8e7af05be22d7d2c07a47c3c7febe0f0ab6 Mon Sep 17 00:00:00 2001 From: discus0434 Date: Sat, 22 Oct 2022 11:07:00 +0000 Subject: add dropout --- modules/hypernetworks/hypernetwork.py | 68 +++++++++++++++++++++-------------- modules/hypernetworks/ui.py | 10 +++--- 2 files changed, 47 insertions(+), 31 deletions(-) (limited to 'modules/hypernetworks') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 905cbeef..e493f366 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -1,47 +1,60 @@ +import csv import datetime import glob import html import os import sys import traceback -import tqdm -import csv +import modules.textual_inversion.dataset import torch - -from ldm.util import default -from modules import devices, shared, processing, sd_models -import torch -from torch import einsum +import tqdm from einops import rearrange, repeat -import modules.textual_inversion.dataset +from ldm.util import default +from modules import devices, processing, sd_models, shared from modules.textual_inversion import textual_inversion from modules.textual_inversion.learn_schedule import LearnRateScheduler +from torch import einsum class HypernetworkModule(torch.nn.Module): multiplier = 1.0 - activation_dict = {"relu": torch.nn.ReLU, "leakyrelu": torch.nn.LeakyReLU, "elu": torch.nn.ELU, - "swish": torch.nn.Hardswish} - - def __init__(self, dim, state_dict=None, layer_structure=None, add_layer_norm=False, activation_func=None): + activation_dict = { + "relu": torch.nn.ReLU, + "leakyrelu": torch.nn.LeakyReLU, + "elu": torch.nn.ELU, + "swish": torch.nn.Hardswish, + } + + def __init__(self, dim, state_dict=None, layer_structure=None, activation_func=None, add_layer_norm=False, use_dropout=False): super().__init__() assert layer_structure is not None, "layer_structure must not be None" assert layer_structure[0] == 1, "Multiplier Sequence should start with size 1!" assert layer_structure[-1] == 1, "Multiplier Sequence should end with size 1!" - + assert activation_func not in self.activation_dict.keys() + "linear", f"Valid activation funcs: 'linear', 'relu', 'leakyrelu', 'elu', 'swish'" + linears = [] for i in range(len(layer_structure) - 1): + + # Add a fully-connected layer linears.append(torch.nn.Linear(int(dim * layer_structure[i]), int(dim * layer_structure[i+1]))) - # if skip_first_layer because first parameters potentially contain negative values - # if i < 1: continue - if activation_func in HypernetworkModule.activation_dict: - linears.append(HypernetworkModule.activation_dict[activation_func]()) + + # Add an activation func + if activation_func == "linear": + pass + elif activation_func in self.activation_dict: + linears.append(self.activation_dict[activation_func]()) else: - print("Invalid key {} encountered as activation function!".format(activation_func)) - # if use_dropout: - # linears.append(torch.nn.Dropout(p=0.3)) + raise NotImplementedError( + "Valid activation funcs: 'linear', 'relu', 'leakyrelu', 'elu', 'swish'" + ) + + # Add dropout + if use_dropout: + linears.append(torch.nn.Dropout(p=0.3)) + + # Add layer normalization if add_layer_norm: linears.append(torch.nn.LayerNorm(int(dim * layer_structure[i+1]))) @@ -93,7 +106,7 @@ class Hypernetwork: filename = None name = None - def __init__(self, name=None, enable_sizes=None, layer_structure=None, add_layer_norm=False, activation_func=None): + def __init__(self, name=None, enable_sizes=None, layer_structure=None, activation_func=None, add_layer_norm=False, use_dropout=False): self.filename = None self.name = name self.layers = {} @@ -101,13 +114,14 @@ class Hypernetwork: self.sd_checkpoint = None self.sd_checkpoint_name = None self.layer_structure = layer_structure - self.add_layer_norm = add_layer_norm self.activation_func = activation_func + self.add_layer_norm = add_layer_norm + self.use_dropout = use_dropout for size in enable_sizes or []: self.layers[size] = ( - HypernetworkModule(size, None, self.layer_structure, self.add_layer_norm, self.activation_func), - HypernetworkModule(size, None, self.layer_structure, self.add_layer_norm, self.activation_func), + HypernetworkModule(size, None, self.layer_structure, self.activation_func, self.add_layer_norm, self.use_dropout), + HypernetworkModule(size, None, self.layer_structure, self.activation_func, self.add_layer_norm, self.use_dropout), ) def weights(self): @@ -129,8 +143,9 @@ class Hypernetwork: state_dict['step'] = self.step state_dict['name'] = self.name state_dict['layer_structure'] = self.layer_structure - state_dict['is_layer_norm'] = self.add_layer_norm state_dict['activation_func'] = self.activation_func + state_dict['is_layer_norm'] = self.add_layer_norm + state_dict['use_dropout'] = self.use_dropout state_dict['sd_checkpoint'] = self.sd_checkpoint state_dict['sd_checkpoint_name'] = self.sd_checkpoint_name @@ -144,8 +159,9 @@ class Hypernetwork: state_dict = torch.load(filename, map_location='cpu') self.layer_structure = state_dict.get('layer_structure', [1, 2, 1]) - self.add_layer_norm = state_dict.get('is_layer_norm', False) self.activation_func = state_dict.get('activation_func', None) + self.add_layer_norm = state_dict.get('is_layer_norm', False) + self.use_dropout = state_dict.get('use_dropout', False) for size, sd in state_dict.items(): if type(size) == int: diff --git a/modules/hypernetworks/ui.py b/modules/hypernetworks/ui.py index 1a5a27d8..5f6f17b6 100644 --- a/modules/hypernetworks/ui.py +++ b/modules/hypernetworks/ui.py @@ -3,14 +3,13 @@ import os import re import gradio as gr - -import modules.textual_inversion.textual_inversion import modules.textual_inversion.preprocess -from modules import sd_hijack, shared, devices +import modules.textual_inversion.textual_inversion +from modules import devices, sd_hijack, shared from modules.hypernetworks import hypernetwork -def create_hypernetwork(name, enable_sizes, layer_structure=None, add_layer_norm=False, activation_func=None): +def create_hypernetwork(name, enable_sizes, layer_structure=None, activation_func=None, add_layer_norm=False, use_dropout=False): fn = os.path.join(shared.cmd_opts.hypernetwork_dir, f"{name}.pt") assert not os.path.exists(fn), f"file {fn} already exists" @@ -21,8 +20,9 @@ def create_hypernetwork(name, enable_sizes, layer_structure=None, add_layer_norm name=name, enable_sizes=[int(x) for x in enable_sizes], layer_structure=layer_structure, - add_layer_norm=add_layer_norm, activation_func=activation_func, + add_layer_norm=add_layer_norm, + use_dropout=use_dropout, ) hypernet.save(fn) -- cgit v1.2.1 From 7fd90128eb6d1820045bfe2c2c1269661023a712 Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Sat, 22 Oct 2022 14:48:43 +0300 Subject: added a guard for hypernet training that will stop early if weights are getting no gradients --- modules/hypernetworks/hypernetwork.py | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'modules/hypernetworks') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 47d91ea5..46039a49 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -310,6 +310,8 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log scheduler = LearnRateScheduler(learn_rate, steps, ititial_step) optimizer = torch.optim.AdamW(weights, lr=scheduler.learn_rate) + steps_without_grad = 0 + pbar = tqdm.tqdm(enumerate(ds), total=steps - ititial_step) for i, entries in pbar: hypernetwork.step = i + ititial_step @@ -332,8 +334,17 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log losses[hypernetwork.step % losses.shape[0]] = loss.item() optimizer.zero_grad() + weights[0].grad = None loss.backward() + + if weights[0].grad is None: + steps_without_grad += 1 + else: + steps_without_grad = 0 + assert steps_without_grad < 10, 'no gradient found for the trained weight after backward() for 10 steps in a row; this is a bug; training cannot continue' + optimizer.step() + mean_loss = losses.mean() if torch.isnan(mean_loss): raise RuntimeError("Loss diverged.") -- cgit v1.2.1 From fccba4729db341a299db3343e3264fecd9459a07 Mon Sep 17 00:00:00 2001 From: discus0434 Date: Sat, 22 Oct 2022 12:02:41 +0000 Subject: add an option to avoid dying relu --- modules/hypernetworks/hypernetwork.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'modules/hypernetworks') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index b7a04038..3132a56c 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -32,7 +32,6 @@ class HypernetworkModule(torch.nn.Module): assert layer_structure is not None, "layer_structure must not be None" assert layer_structure[0] == 1, "Multiplier Sequence should start with size 1!" assert layer_structure[-1] == 1, "Multiplier Sequence should end with size 1!" - assert activation_func not in self.activation_dict.keys() + "linear", f"Valid activation funcs: 'linear', 'relu', 'leakyrelu', 'elu', 'swish'" linears = [] for i in range(len(layer_structure) - 1): @@ -43,12 +42,13 @@ class HypernetworkModule(torch.nn.Module): # Add an activation func if activation_func == "linear" or activation_func is None: pass + # If ReLU, Skip adding it to the first layer to avoid dying ReLU + elif activation_func == "relu" and i < 1: + pass elif activation_func in self.activation_dict: linears.append(self.activation_dict[activation_func]()) else: - raise RuntimeError( - "Valid activation funcs: 'linear', 'relu', 'leakyrelu', 'elu', 'swish'" - ) + raise RuntimeError(f'hypernetwork uses an unsupported activation function: {activation_func}') # Add dropout if use_dropout: @@ -166,8 +166,8 @@ class Hypernetwork: for size, sd in state_dict.items(): if type(size) == int: self.layers[size] = ( - HypernetworkModule(size, sd[0], self.layer_structure, self.add_layer_norm, self.activation_func), - HypernetworkModule(size, sd[1], self.layer_structure, self.add_layer_norm, self.activation_func), + HypernetworkModule(size, sd[0], self.layer_structure, self.activation_func, self.add_layer_norm, self.use_dropout), + HypernetworkModule(size, sd[1], self.layer_structure, self.activation_func, self.add_layer_norm, self.use_dropout), ) self.name = state_dict.get('name', self.name) -- cgit v1.2.1 From 7912acef725832debef58c4c7bf8ec22fb446c0b Mon Sep 17 00:00:00 2001 From: discus0434 Date: Sat, 22 Oct 2022 13:00:44 +0000 Subject: small fix --- modules/hypernetworks/hypernetwork.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'modules/hypernetworks') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 3132a56c..7d12e0ff 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -42,22 +42,20 @@ class HypernetworkModule(torch.nn.Module): # Add an activation func if activation_func == "linear" or activation_func is None: pass - # If ReLU, Skip adding it to the first layer to avoid dying ReLU - elif activation_func == "relu" and i < 1: - pass elif activation_func in self.activation_dict: linears.append(self.activation_dict[activation_func]()) else: raise RuntimeError(f'hypernetwork uses an unsupported activation function: {activation_func}') - # Add dropout - if use_dropout: - linears.append(torch.nn.Dropout(p=0.3)) - # Add layer normalization if add_layer_norm: linears.append(torch.nn.LayerNorm(int(dim * layer_structure[i+1]))) + # Add dropout + if use_dropout: + p = 0.5 if 0 <= i <= len(layer_structure) - 3 else 0.2 + linears.append(torch.nn.Dropout(p=p)) + self.linear = torch.nn.Sequential(*linears) if state_dict is not None: -- cgit v1.2.1 From 6a4fa73a38935a18779ce1809892730fd1572bee Mon Sep 17 00:00:00 2001 From: discus0434 Date: Sat, 22 Oct 2022 13:44:39 +0000 Subject: small fix --- modules/hypernetworks/hypernetwork.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'modules/hypernetworks') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 3372aae2..3bc71ee5 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -51,10 +51,9 @@ class HypernetworkModule(torch.nn.Module): if add_layer_norm: linears.append(torch.nn.LayerNorm(int(dim * layer_structure[i+1]))) - # Add dropout - if use_dropout: - p = 0.5 if 0 <= i <= len(layer_structure) - 3 else 0.2 - linears.append(torch.nn.Dropout(p=p)) + # Add dropout expect last layer + if use_dropout and i < len(layer_structure) - 3: + linears.append(torch.nn.Dropout(p=0.3)) self.linear = torch.nn.Sequential(*linears) -- cgit v1.2.1 From 24694e5983d0944b901892cb101878e6dec89a20 Mon Sep 17 00:00:00 2001 From: AngelBottomless <35677394+aria1th@users.noreply.github.com> Date: Sun, 23 Oct 2022 01:57:58 +0900 Subject: Update hypernetwork.py --- modules/hypernetworks/hypernetwork.py | 55 ++++++++++++++++++++++++++++------- 1 file changed, 44 insertions(+), 11 deletions(-) (limited to 'modules/hypernetworks') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 3bc71ee5..81132be4 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -16,6 +16,7 @@ from modules.textual_inversion import textual_inversion from modules.textual_inversion.learn_schedule import LearnRateScheduler from torch import einsum +from statistics import stdev, mean class HypernetworkModule(torch.nn.Module): multiplier = 1.0 @@ -268,6 +269,32 @@ def stack_conds(conds): return torch.stack(conds) +def log_statistics(loss_info:dict, key, value): + if key not in loss_info: + loss_info[key] = [value] + else: + loss_info[key].append(value) + if len(loss_info) > 1024: + loss_info.pop(0) + + +def statistics(data): + total_information = f"loss:{mean(data):.3f}"+u"\u00B1"+f"({stdev(data)/ (len(data)**0.5):.3f})" + recent_data = data[-32:] + recent_information = f"recent 32 loss:{mean(recent_data):.3f}"+u"\u00B1"+f"({stdev(recent_data)/ (len(recent_data)**0.5):.3f})" + return total_information, recent_information + + +def report_statistics(loss_info:dict): + keys = sorted(loss_info.keys(), key=lambda x: sum(loss_info[x]) / len(loss_info[x])) + for key in keys: + info, recent = statistics(loss_info[key]) + print("Loss statistics for file " + key) + print(info) + print(recent) + + + def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log_directory, training_width, training_height, steps, create_image_every, save_hypernetwork_every, template_file, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height): # images allows training previews to have infotext. Importing it at the top causes a circular import problem. from modules import images @@ -310,7 +337,11 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log for weight in weights: weight.requires_grad = True - losses = torch.zeros((32,)) + size = len(ds.indexes) + loss_dict = {} + losses = torch.zeros((size,)) + previous_mean_loss = 0 + print("Mean loss of {} elements".format(size)) last_saved_file = "" last_saved_image = "" @@ -329,7 +360,9 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log pbar = tqdm.tqdm(enumerate(ds), total=steps - ititial_step) for i, entries in pbar: hypernetwork.step = i + ititial_step - + if loss_dict and i % size == 0: + previous_mean_loss = sum(i[-1] for i in loss_dict.values()) / len(loss_dict) + scheduler.apply(optimizer, hypernetwork.step) if scheduler.finished: break @@ -346,7 +379,9 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log del c losses[hypernetwork.step % losses.shape[0]] = loss.item() - + for entry in entries: + log_statistics(loss_dict, entry.filename, loss.item()) + optimizer.zero_grad() weights[0].grad = None loss.backward() @@ -359,10 +394,9 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log optimizer.step() - mean_loss = losses.mean() - if torch.isnan(mean_loss): + if torch.isnan(losses[hypernetwork.step % losses.shape[0]]): raise RuntimeError("Loss diverged.") - pbar.set_description(f"loss: {mean_loss:.7f}") + pbar.set_description(f"dataset loss: {previous_mean_loss:.7f}") if hypernetwork.step > 0 and hypernetwork_dir is not None and hypernetwork.step % save_hypernetwork_every == 0: # Before saving, change name to match current checkpoint. @@ -371,7 +405,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log hypernetwork.save(last_saved_file) textual_inversion.write_loss(log_directory, "hypernetwork_loss.csv", hypernetwork.step, len(ds), { - "loss": f"{mean_loss:.7f}", + "loss": f"{previous_mean_loss:.7f}", "learn_rate": scheduler.learn_rate }) @@ -420,14 +454,15 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log shared.state.textinfo = f"""

-Loss: {mean_loss:.7f}
+Loss: {previous_mean_loss:.7f}
Step: {hypernetwork.step}
Last prompt: {html.escape(entries[0].cond_text)}
Last saved hypernetwork: {html.escape(last_saved_file)}
Last saved image: {html.escape(last_saved_image)}

""" - + + report_statistics(loss_dict) checkpoint = sd_models.select_checkpoint() hypernetwork.sd_checkpoint = checkpoint.hash @@ -438,5 +473,3 @@ Last saved image: {html.escape(last_saved_image)}
hypernetwork.save(filename) return hypernetwork, filename - - -- cgit v1.2.1 From 48dbf99e84045ee7af55bc5b1b86492a240e631e Mon Sep 17 00:00:00 2001 From: AngelBottomless <35677394+aria1th@users.noreply.github.com> Date: Sun, 23 Oct 2022 04:17:16 +0900 Subject: Allow tracking real-time loss Someone had 6000 images in their dataset, and it was shown as 0, which was confusing. This will allow tracking real time dataset-average loss for registered objects. --- modules/hypernetworks/hypernetwork.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'modules/hypernetworks') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 81132be4..99fd0f8f 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -360,7 +360,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log pbar = tqdm.tqdm(enumerate(ds), total=steps - ititial_step) for i, entries in pbar: hypernetwork.step = i + ititial_step - if loss_dict and i % size == 0: + if len(loss_dict) > 0: previous_mean_loss = sum(i[-1] for i in loss_dict.values()) / len(loss_dict) scheduler.apply(optimizer, hypernetwork.step) -- cgit v1.2.1 From 1fbfc052eb529d8cf8ce5baf578bcf93d0280c29 Mon Sep 17 00:00:00 2001 From: DepFA <35278260+dfaker@users.noreply.github.com> Date: Sun, 23 Oct 2022 05:43:34 +0100 Subject: Update hypernetwork.py --- modules/hypernetworks/hypernetwork.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'modules/hypernetworks') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 99fd0f8f..98a7b62e 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -288,10 +288,13 @@ def statistics(data): def report_statistics(loss_info:dict): keys = sorted(loss_info.keys(), key=lambda x: sum(loss_info[x]) / len(loss_info[x])) for key in keys: - info, recent = statistics(loss_info[key]) - print("Loss statistics for file " + key) - print(info) - print(recent) + try: + print("Loss statistics for file " + key) + info, recent = statistics(loss_info[key]) + print(info) + print(recent) + except Exception as e: + print(e) -- cgit v1.2.1 From b297cc3324979ec78d69b2d11dd18030dfad7bcc Mon Sep 17 00:00:00 2001 From: AngelBottomless <35677394+aria1th@users.noreply.github.com> Date: Sun, 23 Oct 2022 20:06:42 +0900 Subject: Hypernetworks - fix KeyError in statistics caching Statistics logging has changed to {filename : list[losses]}, so it has to use loss_info[key].pop() --- modules/hypernetworks/hypernetwork.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'modules/hypernetworks') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 98a7b62e..33827210 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -274,8 +274,8 @@ def log_statistics(loss_info:dict, key, value): loss_info[key] = [value] else: loss_info[key].append(value) - if len(loss_info) > 1024: - loss_info.pop(0) + if len(loss_info[key]) > 1024: + loss_info[key].pop(0) def statistics(data): -- cgit v1.2.1 From 40b56c9289bf9458ae5ef3c1990ccea851c6c3e2 Mon Sep 17 00:00:00 2001 From: AngelBottomless <35677394+aria1th@users.noreply.github.com> Date: Sun, 23 Oct 2022 21:07:07 +0900 Subject: cleanup some code --- modules/hypernetworks/hypernetwork.py | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) (limited to 'modules/hypernetworks') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 33827210..4072bf54 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -16,6 +16,7 @@ from modules.textual_inversion import textual_inversion from modules.textual_inversion.learn_schedule import LearnRateScheduler from torch import einsum +from collections import defaultdict, deque from statistics import stdev, mean class HypernetworkModule(torch.nn.Module): @@ -269,15 +270,6 @@ def stack_conds(conds): return torch.stack(conds) -def log_statistics(loss_info:dict, key, value): - if key not in loss_info: - loss_info[key] = [value] - else: - loss_info[key].append(value) - if len(loss_info[key]) > 1024: - loss_info[key].pop(0) - - def statistics(data): total_information = f"loss:{mean(data):.3f}"+u"\u00B1"+f"({stdev(data)/ (len(data)**0.5):.3f})" recent_data = data[-32:] @@ -341,7 +333,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log weight.requires_grad = True size = len(ds.indexes) - loss_dict = {} + loss_dict = defaultdict(lambda : deque(maxlen = 1024)) losses = torch.zeros((size,)) previous_mean_loss = 0 print("Mean loss of {} elements".format(size)) @@ -383,7 +375,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log losses[hypernetwork.step % losses.shape[0]] = loss.item() for entry in entries: - log_statistics(loss_dict, entry.filename, loss.item()) + loss_dict[entry.filename].append(loss.item()) optimizer.zero_grad() weights[0].grad = None -- cgit v1.2.1 From 348f89c8d40397c1875cff4a7331018785f9c3b8 Mon Sep 17 00:00:00 2001 From: AngelBottomless <35677394+aria1th@users.noreply.github.com> Date: Sun, 23 Oct 2022 21:29:53 +0900 Subject: statistics for pbar --- modules/hypernetworks/hypernetwork.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'modules/hypernetworks') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 4072bf54..48b56029 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -335,6 +335,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log size = len(ds.indexes) loss_dict = defaultdict(lambda : deque(maxlen = 1024)) losses = torch.zeros((size,)) + previous_mean_losses = [0] previous_mean_loss = 0 print("Mean loss of {} elements".format(size)) @@ -356,7 +357,8 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log for i, entries in pbar: hypernetwork.step = i + ititial_step if len(loss_dict) > 0: - previous_mean_loss = sum(i[-1] for i in loss_dict.values()) / len(loss_dict) + previous_mean_losses = [i[-1] for i in loss_dict.values()] + previous_mean_loss = mean(previous_mean_losses) scheduler.apply(optimizer, hypernetwork.step) if scheduler.finished: @@ -391,7 +393,13 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log if torch.isnan(losses[hypernetwork.step % losses.shape[0]]): raise RuntimeError("Loss diverged.") - pbar.set_description(f"dataset loss: {previous_mean_loss:.7f}") + + if len(previous_mean_losses) > 1: + std = stdev(previous_mean_losses) + else: + std = 0 + dataset_loss_info = f"dataset loss:{mean(previous_mean_losses):.3f}" + u"\u00B1" + f"({std / (len(previous_mean_losses) ** 0.5):.3f})" + pbar.set_description(dataset_loss_info) if hypernetwork.step > 0 and hypernetwork_dir is not None and hypernetwork.step % save_hypernetwork_every == 0: # Before saving, change name to match current checkpoint. -- cgit v1.2.1 From 0d2e1dac407a0e2f5b148d314715f0457b2525b7 Mon Sep 17 00:00:00 2001 From: AngelBottomless <35677394+aria1th@users.noreply.github.com> Date: Sun, 23 Oct 2022 21:41:39 +0900 Subject: convert deque -> list I don't feel this being efficient --- modules/hypernetworks/hypernetwork.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'modules/hypernetworks') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 48b56029..fb510fa7 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -282,7 +282,7 @@ def report_statistics(loss_info:dict): for key in keys: try: print("Loss statistics for file " + key) - info, recent = statistics(loss_info[key]) + info, recent = statistics(list(loss_info[key])) print(info) print(recent) except Exception as e: -- cgit v1.2.1 From e9a410b5357612f63528015c5533c2185dcff92e Mon Sep 17 00:00:00 2001 From: AngelBottomless <35677394+aria1th@users.noreply.github.com> Date: Sun, 23 Oct 2022 21:47:39 +0900 Subject: check length for variance --- modules/hypernetworks/hypernetwork.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'modules/hypernetworks') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index fb510fa7..d647ea55 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -271,9 +271,17 @@ def stack_conds(conds): def statistics(data): - total_information = f"loss:{mean(data):.3f}"+u"\u00B1"+f"({stdev(data)/ (len(data)**0.5):.3f})" + if len(data) < 2: + std = 0 + else: + std = stdev(data) + total_information = f"loss:{mean(data):.3f}" + u"\u00B1" + f"({std/ (len(data) ** 0.5):.3f})" recent_data = data[-32:] - recent_information = f"recent 32 loss:{mean(recent_data):.3f}"+u"\u00B1"+f"({stdev(recent_data)/ (len(recent_data)**0.5):.3f})" + if len(recent_data) < 2: + std = 0 + else: + std = stdev(recent_data) + recent_information = f"recent 32 loss:{mean(recent_data):.3f}" + u"\u00B1" + f"({std / (len(recent_data) ** 0.5):.3f})" return total_information, recent_information -- cgit v1.2.1