From 2f4c91894d4c0a055c1069b2fda0e4da8fcda188 Mon Sep 17 00:00:00 2001 From: guaneec Date: Wed, 26 Oct 2022 12:10:30 +0800 Subject: Remove activation from final layer of HNs --- modules/hypernetworks/hypernetwork.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'modules/hypernetworks') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index d647ea55..54346b64 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -41,8 +41,8 @@ class HypernetworkModule(torch.nn.Module): # Add a fully-connected layer linears.append(torch.nn.Linear(int(dim * layer_structure[i]), int(dim * layer_structure[i+1]))) - # Add an activation func - if activation_func == "linear" or activation_func is None: + # Add an activation func except last layer + if activation_func == "linear" or activation_func is None or i >= len(layer_structure) - 3: pass elif activation_func in self.activation_dict: linears.append(self.activation_dict[activation_func]()) @@ -53,7 +53,7 @@ class HypernetworkModule(torch.nn.Module): if add_layer_norm: linears.append(torch.nn.LayerNorm(int(dim * layer_structure[i+1]))) - # Add dropout expect last layer + # Add dropout except last layer if use_dropout and i < len(layer_structure) - 3: linears.append(torch.nn.Dropout(p=0.3)) -- cgit v1.2.1 From c702d4d0df21790199d199818f25c449213ffe0f Mon Sep 17 00:00:00 2001 From: guaneec Date: Wed, 26 Oct 2022 13:43:04 +0800 Subject: Fix off-by-one --- modules/hypernetworks/hypernetwork.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'modules/hypernetworks') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 54346b64..3ce85bb5 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -42,7 +42,7 @@ class HypernetworkModule(torch.nn.Module): linears.append(torch.nn.Linear(int(dim * layer_structure[i]), int(dim * layer_structure[i+1]))) # Add an activation func except last layer - if activation_func == "linear" or activation_func is None or i >= len(layer_structure) - 3: + if activation_func == "linear" or activation_func is None or i >= len(layer_structure) - 2: pass elif activation_func in self.activation_dict: linears.append(self.activation_dict[activation_func]()) @@ -54,7 +54,7 @@ class HypernetworkModule(torch.nn.Module): linears.append(torch.nn.LayerNorm(int(dim * layer_structure[i+1]))) # Add dropout except last layer - if use_dropout and i < len(layer_structure) - 3: + if use_dropout and i < len(layer_structure) - 2: linears.append(torch.nn.Dropout(p=0.3)) self.linear = torch.nn.Sequential(*linears) -- cgit v1.2.1 From 877d94f97ca5491d8779440769b191e0dcd32c8e Mon Sep 17 00:00:00 2001 From: guaneec Date: Wed, 26 Oct 2022 14:50:58 +0800 Subject: Back compatibility --- modules/hypernetworks/hypernetwork.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'modules/hypernetworks') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 3ce85bb5..dd317085 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -28,7 +28,7 @@ class HypernetworkModule(torch.nn.Module): "swish": torch.nn.Hardswish, } - def __init__(self, dim, state_dict=None, layer_structure=None, activation_func=None, add_layer_norm=False, use_dropout=False): + def __init__(self, dim, state_dict=None, layer_structure=None, activation_func=None, add_layer_norm=False, use_dropout=False, activate_output=False): super().__init__() assert layer_structure is not None, "layer_structure must not be None" @@ -42,7 +42,7 @@ class HypernetworkModule(torch.nn.Module): linears.append(torch.nn.Linear(int(dim * layer_structure[i]), int(dim * layer_structure[i+1]))) # Add an activation func except last layer - if activation_func == "linear" or activation_func is None or i >= len(layer_structure) - 2: + if activation_func == "linear" or activation_func is None or (i >= len(layer_structure) - 2 and not activate_output): pass elif activation_func in self.activation_dict: linears.append(self.activation_dict[activation_func]()) @@ -105,7 +105,7 @@ class Hypernetwork: filename = None name = None - def __init__(self, name=None, enable_sizes=None, layer_structure=None, activation_func=None, add_layer_norm=False, use_dropout=False): + def __init__(self, name=None, enable_sizes=None, layer_structure=None, activation_func=None, add_layer_norm=False, use_dropout=False, activate_output=False): self.filename = None self.name = name self.layers = {} @@ -116,11 +116,12 @@ class Hypernetwork: self.activation_func = activation_func self.add_layer_norm = add_layer_norm self.use_dropout = use_dropout + self.activate_output = activate_output for size in enable_sizes or []: self.layers[size] = ( - HypernetworkModule(size, None, self.layer_structure, self.activation_func, self.add_layer_norm, self.use_dropout), - HypernetworkModule(size, None, self.layer_structure, self.activation_func, self.add_layer_norm, self.use_dropout), + HypernetworkModule(size, None, self.layer_structure, self.activation_func, self.add_layer_norm, self.use_dropout, self.activate_output), + HypernetworkModule(size, None, self.layer_structure, self.activation_func, self.add_layer_norm, self.use_dropout, self.activate_output), ) def weights(self): @@ -147,6 +148,7 @@ class Hypernetwork: state_dict['use_dropout'] = self.use_dropout state_dict['sd_checkpoint'] = self.sd_checkpoint state_dict['sd_checkpoint_name'] = self.sd_checkpoint_name + state_dict['activate_output'] = self.activate_output torch.save(state_dict, filename) @@ -161,12 +163,13 @@ class Hypernetwork: self.activation_func = state_dict.get('activation_func', None) self.add_layer_norm = state_dict.get('is_layer_norm', False) self.use_dropout = state_dict.get('use_dropout', False) + self.activate_output = state_dict.get('activate_output', True) for size, sd in state_dict.items(): if type(size) == int: self.layers[size] = ( - HypernetworkModule(size, sd[0], self.layer_structure, self.activation_func, self.add_layer_norm, self.use_dropout), - HypernetworkModule(size, sd[1], self.layer_structure, self.activation_func, self.add_layer_norm, self.use_dropout), + HypernetworkModule(size, sd[0], self.layer_structure, self.activation_func, self.add_layer_norm, self.use_dropout, self.activate_output), + HypernetworkModule(size, sd[1], self.layer_structure, self.activation_func, self.add_layer_norm, self.use_dropout, self.activate_output), ) self.name = state_dict.get('name', self.name) -- cgit v1.2.1 From 91bb35b1e6842b30ce7553009c8ecea3643de8d2 Mon Sep 17 00:00:00 2001 From: guaneec Date: Wed, 26 Oct 2022 15:00:03 +0800 Subject: Merge fix --- modules/hypernetworks/hypernetwork.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'modules/hypernetworks') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index eab8b32f..bd171793 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -190,7 +190,7 @@ class Hypernetwork: print(f"Weight initialization is {self.weight_init}") self.add_layer_norm = state_dict.get('is_layer_norm', False) print(f"Layer norm is set to {self.add_layer_norm}") - self.use_dropout = state_dict.get('use_dropout', False + self.use_dropout = state_dict.get('use_dropout', False) print(f"Dropout usage is set to {self.use_dropout}" ) self.activate_output = state_dict.get('activate_output', True) -- cgit v1.2.1 From b6a8bb123bd519736306417399f6441e504f1e8b Mon Sep 17 00:00:00 2001 From: guaneec Date: Wed, 26 Oct 2022 15:15:19 +0800 Subject: Fix merge --- modules/hypernetworks/hypernetwork.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'modules/hypernetworks') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index bd171793..2997cead 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -60,7 +60,7 @@ class HypernetworkModule(torch.nn.Module): linears.append(torch.nn.LayerNorm(int(dim * layer_structure[i+1]))) # Add dropout except last layer - if use_dropout and i < len(layer_structure) - 2: + if use_dropout and i < len(layer_structure) - 3: linears.append(torch.nn.Dropout(p=0.3)) self.linear = torch.nn.Sequential(*linears) @@ -126,7 +126,7 @@ class Hypernetwork: filename = None name = None - def __init__(self, name=None, enable_sizes=None, layer_structure=None, activation_func=None, weight_init=None, add_layer_norm=False, use_dropout=False, activate_output=False) + def __init__(self, name=None, enable_sizes=None, layer_structure=None, activation_func=None, weight_init=None, add_layer_norm=False, use_dropout=False, activate_output=False): self.filename = None self.name = name self.layers = {} -- cgit v1.2.1 From 85fcccc105aa50f1d78de559233eaa9f384608b5 Mon Sep 17 00:00:00 2001 From: AngelBottomless <35677394+aria1th@users.noreply.github.com> Date: Wed, 26 Oct 2022 22:24:33 +0900 Subject: Squashed commit of fixing dropout silently fix dropouts for future hypernetworks add kwargs for Hypernetwork class hypernet UI for gradio input add recommended options remove as options revert adding options in ui --- modules/hypernetworks/hypernetwork.py | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) (limited to 'modules/hypernetworks') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 2997cead..dd921153 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -34,7 +34,8 @@ class HypernetworkModule(torch.nn.Module): } activation_dict.update({cls_name.lower(): cls_obj for cls_name, cls_obj in inspect.getmembers(torch.nn.modules.activation) if inspect.isclass(cls_obj) and cls_obj.__module__ == 'torch.nn.modules.activation'}) - def __init__(self, dim, state_dict=None, layer_structure=None, activation_func=None, weight_init='Normal', add_layer_norm=False, use_dropout=False, activate_output=False): + def __init__(self, dim, state_dict=None, layer_structure=None, activation_func=None, weight_init='Normal', + add_layer_norm=False, use_dropout=False, activate_output=False, **kwargs): super().__init__() assert layer_structure is not None, "layer_structure must not be None" @@ -60,7 +61,7 @@ class HypernetworkModule(torch.nn.Module): linears.append(torch.nn.LayerNorm(int(dim * layer_structure[i+1]))) # Add dropout except last layer - if use_dropout and i < len(layer_structure) - 3: + if 'last_layer_dropout' in kwargs and kwargs['last_layer_dropout'] and use_dropout and i < len(layer_structure) - 2: linears.append(torch.nn.Dropout(p=0.3)) self.linear = torch.nn.Sequential(*linears) @@ -126,7 +127,7 @@ class Hypernetwork: filename = None name = None - def __init__(self, name=None, enable_sizes=None, layer_structure=None, activation_func=None, weight_init=None, add_layer_norm=False, use_dropout=False, activate_output=False): + def __init__(self, name=None, enable_sizes=None, layer_structure=None, activation_func=None, weight_init=None, add_layer_norm=False, use_dropout=False, activate_output=False, **kwargs): self.filename = None self.name = name self.layers = {} @@ -139,11 +140,14 @@ class Hypernetwork: self.add_layer_norm = add_layer_norm self.use_dropout = use_dropout self.activate_output = activate_output + self.last_layer_dropout = kwargs['last_layer_dropout'] if 'last_layer_dropout' in kwargs else True for size in enable_sizes or []: self.layers[size] = ( - HypernetworkModule(size, None, self.layer_structure, self.activation_func, self.weight_init, self.add_layer_norm, self.use_dropout, self.activate_output), - HypernetworkModule(size, None, self.layer_structure, self.activation_func, self.weight_init, self.add_layer_norm, self.use_dropout, self.activate_output), + HypernetworkModule(size, None, self.layer_structure, self.activation_func, self.weight_init, + self.add_layer_norm, self.use_dropout, self.activate_output, last_layer_dropout=self.last_layer_dropout), + HypernetworkModule(size, None, self.layer_structure, self.activation_func, self.weight_init, + self.add_layer_norm, self.use_dropout, self.activate_output, last_layer_dropout=self.last_layer_dropout), ) def weights(self): @@ -172,7 +176,8 @@ class Hypernetwork: state_dict['sd_checkpoint'] = self.sd_checkpoint state_dict['sd_checkpoint_name'] = self.sd_checkpoint_name state_dict['activate_output'] = self.activate_output - + state_dict['last_layer_dropout'] = self.last_layer_dropout + torch.save(state_dict, filename) def load(self, filename): @@ -193,12 +198,16 @@ class Hypernetwork: self.use_dropout = state_dict.get('use_dropout', False) print(f"Dropout usage is set to {self.use_dropout}" ) self.activate_output = state_dict.get('activate_output', True) + print(f"Activate last layer is set to {self.activate_output}") + self.last_layer_dropout = state_dict.get('last_layer_dropout', False) for size, sd in state_dict.items(): if type(size) == int: self.layers[size] = ( - HypernetworkModule(size, sd[0], self.layer_structure, self.activation_func, self.weight_init, self.add_layer_norm, self.use_dropout, self.activate_output), - HypernetworkModule(size, sd[1], self.layer_structure, self.activation_func, self.weight_init, self.add_layer_norm, self.use_dropout, self.activate_output), + HypernetworkModule(size, sd[0], self.layer_structure, self.activation_func, self.weight_init, + self.add_layer_norm, self.use_dropout, self.activate_output, last_layer_dropout=self.last_layer_dropout), + HypernetworkModule(size, sd[1], self.layer_structure, self.activation_func, self.weight_init, + self.add_layer_norm, self.use_dropout, self.activate_output, last_layer_dropout=self.last_layer_dropout), ) self.name = state_dict.get('name', self.name) -- cgit v1.2.1 From cc56df996e95c2c82295ab7b9928da2544791220 Mon Sep 17 00:00:00 2001 From: guaneec Date: Wed, 26 Oct 2022 23:51:51 +0800 Subject: Fix dropout logic --- modules/hypernetworks/hypernetwork.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'modules/hypernetworks') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index dd921153..b17598fe 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -35,7 +35,7 @@ class HypernetworkModule(torch.nn.Module): activation_dict.update({cls_name.lower(): cls_obj for cls_name, cls_obj in inspect.getmembers(torch.nn.modules.activation) if inspect.isclass(cls_obj) and cls_obj.__module__ == 'torch.nn.modules.activation'}) def __init__(self, dim, state_dict=None, layer_structure=None, activation_func=None, weight_init='Normal', - add_layer_norm=False, use_dropout=False, activate_output=False, **kwargs): + add_layer_norm=False, use_dropout=False, activate_output=False, last_layer_dropout=True): super().__init__() assert layer_structure is not None, "layer_structure must not be None" @@ -61,7 +61,7 @@ class HypernetworkModule(torch.nn.Module): linears.append(torch.nn.LayerNorm(int(dim * layer_structure[i+1]))) # Add dropout except last layer - if 'last_layer_dropout' in kwargs and kwargs['last_layer_dropout'] and use_dropout and i < len(layer_structure) - 2: + if use_dropout and (i < len(layer_structure) - 3 or last_layer_dropout and i < len(layer_structure) - 2): linears.append(torch.nn.Dropout(p=0.3)) self.linear = torch.nn.Sequential(*linears) -- cgit v1.2.1 From 029d7c75436558f1e884bb127caed73caaecb83a Mon Sep 17 00:00:00 2001 From: AngelBottomless <35677394+aria1th@users.noreply.github.com> Date: Thu, 27 Oct 2022 14:44:53 +0900 Subject: Revert unresolved changes in Bias initialization it should be zeros_ or parameterized in future properly. --- modules/hypernetworks/hypernetwork.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'modules/hypernetworks') diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index b17598fe..25427a37 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -75,7 +75,7 @@ class HypernetworkModule(torch.nn.Module): w, b = layer.weight.data, layer.bias.data if weight_init == "Normal" or type(layer) == torch.nn.LayerNorm: normal_(w, mean=0.0, std=0.01) - normal_(b, mean=0.0, std=0.005) + normal_(b, mean=0.0, std=0) elif weight_init == 'XavierUniform': xavier_uniform_(w) zeros_(b) -- cgit v1.2.1