aboutsummaryrefslogtreecommitdiff
path: root/modules/hypernetworks/hypernetwork.py
diff options
context:
space:
mode:
authorMuhammad Rizqi Nur <rizqinur2010@gmail.com>2022-10-28 17:16:23 +0700
committerMuhammad Rizqi Nur <rizqinur2010@gmail.com>2022-10-28 17:16:23 +0700
commit16451ca573220e49f2eaaab97580b6b91287c8c4 (patch)
tree5f80fa2082fbbc6469c5807020273af75b5f1a21 /modules/hypernetworks/hypernetwork.py
parent1618df41bad092e068c61bf510b1e20856821ad5 (diff)
Learning rate sched syntax support for grad clipping
Diffstat (limited to 'modules/hypernetworks/hypernetwork.py')
-rw-r--r--modules/hypernetworks/hypernetwork.py13
1 files changed, 10 insertions, 3 deletions
diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py
index c5d60654..86532063 100644
--- a/modules/hypernetworks/hypernetwork.py
+++ b/modules/hypernetworks/hypernetwork.py
@@ -383,11 +383,15 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log
ititial_step = hypernetwork.step or 0
if ititial_step > steps:
return hypernetwork, filename
-
+
clip_grad_mode_value = clip_grad_mode == "value"
clip_grad_mode_norm = clip_grad_mode == "norm"
+ clip_grad_enabled = clip_grad_mode_value or clip_grad_mode_norm
+ if clip_grad_enabled:
+ clip_grad_sched = LearnRateScheduler(clip_grad_value, steps, ititial_step, verbose=False)
scheduler = LearnRateScheduler(learn_rate, steps, ititial_step)
+
# if optimizer == "AdamW": or else Adam / AdamW / SGD, etc...
optimizer = torch.optim.AdamW(weights, lr=scheduler.learn_rate)
@@ -407,6 +411,9 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log
if shared.state.interrupted:
break
+ if clip_grad_enabled:
+ clip_grad_sched.step(hypernetwork.step)
+
with torch.autocast("cuda"):
c = stack_conds([entry.cond for entry in entries]).to(devices.device)
# c = torch.vstack([entry.cond for entry in entries]).to(devices.device)
@@ -430,9 +437,9 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log
assert steps_without_grad < 10, 'no gradient found for the trained weight after backward() for 10 steps in a row; this is a bug; training cannot continue'
if clip_grad_mode_value:
- torch.nn.utils.clip_grad_value_(weights, clip_value=clip_grad_value)
+ torch.nn.utils.clip_grad_value_(weights, clip_value=clip_grad_sched.learn_rate)
elif clip_grad_mode_norm:
- torch.nn.utils.clip_grad_norm_(weights, max_norm=clip_grad_value)
+ torch.nn.utils.clip_grad_norm_(weights, max_norm=clip_grad_sched.learn_rate)
optimizer.step()