ESRGAN support

author: AUTOMATIC <16777216c@gmail.com> 2022-09-04 18:54:12 +0300
committer: AUTOMATIC <16777216c@gmail.com> 2022-09-04 18:54:12 +0300
commit: f299645aeeb65fcddde2d136fd550b6b01ffebb3 (patch)
tree: 5e00d526688ce928b68f023ac91a4bf227aaf092 /modules
parent: 78278ce695beffbf59c7320bb0441922d66b1c0e (diff)
7 files changed, 288 insertions, 17 deletions
diff --git a/modules/esrgam_model_arch.py b/modules/esrgam_model_arch.py
new file mode 100644
index 00000000..e413d36e
--- /dev/null
+++ b/modules/esrgam_model_arch.py
@@ -0,0 +1,80 @@
+# this file is taken from https://github.com/xinntao/ESRGAN
+
+import functools
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+def make_layer(block, n_layers):
+    layers = []
+    for _ in range(n_layers):
+        layers.append(block())
+    return nn.Sequential(*layers)
+
+
+class ResidualDenseBlock_5C(nn.Module):
+    def __init__(self, nf=64, gc=32, bias=True):
+        super(ResidualDenseBlock_5C, self).__init__()
+        # gc: growth channel, i.e. intermediate channels
+        self.conv1 = nn.Conv2d(nf, gc, 3, 1, 1, bias=bias)
+        self.conv2 = nn.Conv2d(nf + gc, gc, 3, 1, 1, bias=bias)
+        self.conv3 = nn.Conv2d(nf + 2 * gc, gc, 3, 1, 1, bias=bias)
+        self.conv4 = nn.Conv2d(nf + 3 * gc, gc, 3, 1, 1, bias=bias)
+        self.conv5 = nn.Conv2d(nf + 4 * gc, nf, 3, 1, 1, bias=bias)
+        self.lrelu = nn.LeakyReLU(negative_slope=0.2, inplace=True)
+
+        # initialization
+        # mutil.initialize_weights([self.conv1, self.conv2, self.conv3, self.conv4, self.conv5], 0.1)
+
+    def forward(self, x):
+        x1 = self.lrelu(self.conv1(x))
+        x2 = self.lrelu(self.conv2(torch.cat((x, x1), 1)))
+        x3 = self.lrelu(self.conv3(torch.cat((x, x1, x2), 1)))
+        x4 = self.lrelu(self.conv4(torch.cat((x, x1, x2, x3), 1)))
+        x5 = self.conv5(torch.cat((x, x1, x2, x3, x4), 1))
+        return x5 * 0.2 + x
+
+
+class RRDB(nn.Module):
+    '''Residual in Residual Dense Block'''
+
+    def __init__(self, nf, gc=32):
+        super(RRDB, self).__init__()
+        self.RDB1 = ResidualDenseBlock_5C(nf, gc)
+        self.RDB2 = ResidualDenseBlock_5C(nf, gc)
+        self.RDB3 = ResidualDenseBlock_5C(nf, gc)
+
+    def forward(self, x):
+        out = self.RDB1(x)
+        out = self.RDB2(out)
+        out = self.RDB3(out)
+        return out * 0.2 + x
+
+
+class RRDBNet(nn.Module):
+    def __init__(self, in_nc, out_nc, nf, nb, gc=32):
+        super(RRDBNet, self).__init__()
+        RRDB_block_f = functools.partial(RRDB, nf=nf, gc=gc)
+
+        self.conv_first = nn.Conv2d(in_nc, nf, 3, 1, 1, bias=True)
+        self.RRDB_trunk = make_layer(RRDB_block_f, nb)
+        self.trunk_conv = nn.Conv2d(nf, nf, 3, 1, 1, bias=True)
+        #### upsampling
+        self.upconv1 = nn.Conv2d(nf, nf, 3, 1, 1, bias=True)
+        self.upconv2 = nn.Conv2d(nf, nf, 3, 1, 1, bias=True)
+        self.HRconv = nn.Conv2d(nf, nf, 3, 1, 1, bias=True)
+        self.conv_last = nn.Conv2d(nf, out_nc, 3, 1, 1, bias=True)
+
+        self.lrelu = nn.LeakyReLU(negative_slope=0.2, inplace=True)
+
+    def forward(self, x):
+        fea = self.conv_first(x)
+        trunk = self.trunk_conv(self.RRDB_trunk(fea))
+        fea = fea + trunk
+
+        fea = self.lrelu(self.upconv1(F.interpolate(fea, scale_factor=2, mode='nearest')))
+        fea = self.lrelu(self.upconv2(F.interpolate(fea, scale_factor=2, mode='nearest')))
+        out = self.conv_last(self.lrelu(self.HRconv(fea)))
+
+        return out
diff --git a/modules/esrgan_model.py b/modules/esrgan_model.py
new file mode 100644
index 00000000..3dcef5a6
--- /dev/null
+++ b/modules/esrgan_model.py
@@ -0,0 +1,134 @@
+import os
+import sys
+import traceback
+
+import numpy as np
+import torch
+from PIL import Image
+
+import modules.esrgam_model_arch as arch
+from modules import shared
+from modules.shared import opts
+import modules.images
+
+
+def load_model(filename):
+    # this code is adapted from https://github.com/xinntao/ESRGAN
+
+    pretrained_net = torch.load(filename)
+    crt_model = arch.RRDBNet(3, 3, 64, 23, gc=32)
+
+    if 'conv_first.weight' in pretrained_net:
+        crt_model.load_state_dict(pretrained_net)
+        return crt_model
+
+    crt_net = crt_model.state_dict()
+    load_net_clean = {}
+    for k, v in pretrained_net.items():
+        if k.startswith('module.'):
+            load_net_clean[k[7:]] = v
+        else:
+            load_net_clean[k] = v
+    pretrained_net = load_net_clean
+
+    tbd = []
+    for k, v in crt_net.items():
+        tbd.append(k)
+
+    # directly copy
+    for k, v in crt_net.items():
+        if k in pretrained_net and pretrained_net[k].size() == v.size():
+            crt_net[k] = pretrained_net[k]
+            tbd.remove(k)
+
+    crt_net['conv_first.weight'] = pretrained_net['model.0.weight']
+    crt_net['conv_first.bias'] = pretrained_net['model.0.bias']
+
+    for k in tbd.copy():
+        if 'RDB' in k:
+            ori_k = k.replace('RRDB_trunk.', 'model.1.sub.')
+            if '.weight' in k:
+                ori_k = ori_k.replace('.weight', '.0.weight')
+            elif '.bias' in k:
+                ori_k = ori_k.replace('.bias', '.0.bias')
+            crt_net[k] = pretrained_net[ori_k]
+            tbd.remove(k)
+
+    crt_net['trunk_conv.weight'] = pretrained_net['model.1.sub.23.weight']
+    crt_net['trunk_conv.bias'] = pretrained_net['model.1.sub.23.bias']
+    crt_net['upconv1.weight'] = pretrained_net['model.3.weight']
+    crt_net['upconv1.bias'] = pretrained_net['model.3.bias']
+    crt_net['upconv2.weight'] = pretrained_net['model.6.weight']
+    crt_net['upconv2.bias'] = pretrained_net['model.6.bias']
+    crt_net['HRconv.weight'] = pretrained_net['model.8.weight']
+    crt_net['HRconv.bias'] = pretrained_net['model.8.bias']
+    crt_net['conv_last.weight'] = pretrained_net['model.10.weight']
+    crt_net['conv_last.bias'] = pretrained_net['model.10.bias']
+
+    crt_model.load_state_dict(crt_net)
+    crt_model.eval()
+    return crt_model
+
+def upscale_without_tiling(model, img):
+    img = np.array(img)
+    img = img[:, :, ::-1]
+    img = np.moveaxis(img, 2, 0) / 255
+    img = torch.from_numpy(img).float()
+    img = img.unsqueeze(0).to(shared.device)
+    with torch.no_grad():
+        output = model(img)
+    output = output.squeeze().float().cpu().clamp_(0, 1).numpy()
+    output = 255. * np.moveaxis(output, 0, 2)
+    output = output.astype(np.uint8)
+    output = output[:, :, ::-1]
+    return Image.fromarray(output, 'RGB')
+
+
+def esrgan_upscale(model, img):
+    if opts.ESRGAN_tile == 0:
+        return upscale_without_tiling(model, img)
+
+    grid = modules.images.split_grid(img, opts.ESRGAN_tile, opts.ESRGAN_tile, opts.ESRGAN_tile_overlap)
+    newtiles = []
+    scale_factor = 1
+
+    for y, h, row in grid.tiles:
+        newrow = []
+        for tiledata in row:
+            x, w, tile = tiledata
+
+            output = upscale_without_tiling(model, tile)
+            scale_factor = output.width // tile.width
+
+            newrow.append([x * scale_factor, w * scale_factor, output])
+        newtiles.append([y * scale_factor, h * scale_factor, newrow])
+
+    newgrid = modules.images.Grid(newtiles, grid.tile_w * scale_factor, grid.tile_h * scale_factor, grid.image_w * scale_factor, grid.image_h * scale_factor, grid.overlap * scale_factor)
+    output = modules.images.combine_grid(newgrid)
+    return output
+
+
+class UpscalerESRGAN(modules.images.Upscaler):
+    def __init__(self, filename, title):
+        self.name = title
+        self.model = load_model(filename)
+
+    def do_upscale(self, img):
+        model = self.model.to(shared.device)
+        img = esrgan_upscale(model, img)
+        return img
+
+
+def load_models(dirname):
+    for file in os.listdir(dirname):
+        path = os.path.join(dirname, file)
+        model_name, extension = os.path.splitext(file)
+
+        if extension != '.pt' and extension != '.pth':
+            continue
+
+        try:
+            modules.shared.sd_upscalers.append(UpscalerESRGAN(path, model_name))
+        except Exception:
+            print(f"Error loading ESRGAN model: {path}", file=sys.stderr)
+            print(traceback.format_exc(), file=sys.stderr)
diff --git a/modules/images.py b/modules/images.py
index 4b9667d2..4226db00 100644
--- a/modules/images.py
+++ b/modules/images.py
@@ -6,6 +6,7 @@ import re
 import numpy as np
 from PIL import Image, ImageFont, ImageDraw, PngImagePlugin
 
+import modules.shared
 from modules.shared import opts
 
 LANCZOS = (Image.Resampling.LANCZOS if hasattr(Image, 'Resampling') else Image.LANCZOS)
@@ -45,20 +46,20 @@ def split_grid(image, tile_w=512, tile_h=512, overlap=64):
     cols = math.ceil((w - overlap) / non_overlap_width)
     rows = math.ceil((h - overlap) / non_overlap_height)
 
-    dx = (w - tile_w) // (cols-1) if cols > 1 else 0
-    dy = (h - tile_h) // (rows-1) if rows > 1 else 0
+    dx = (w - tile_w) / (cols-1) if cols > 1 else 0
+    dy = (h - tile_h) / (rows-1) if rows > 1 else 0
 
     grid = Grid([], tile_w, tile_h, w, h, overlap)
     for row in range(rows):
         row_images = []
 
-        y = row * dy
+        y = int(row * dy)
 
         if y + tile_h >= h:
             y = h - tile_h
 
         for col in range(cols):
-            x = col * dx
+            x = int(col * dx)
 
             if x+tile_w >= w:
                 x = w - tile_w
@@ -291,3 +292,32 @@ def save_image(image, path, basename, seed=None, prompt=None, extension='png', i
         with open(f"{fullfn_without_extension}.txt", "w", encoding="utf8") as file:
             file.write(info + "\n")
 
+
+class Upscaler:
+    name = "Lanczos"
+
+    def do_upscale(self, img):
+        return img
+
+    def upscale(self, img, w, h):
+        for i in range(3):
+            if img.width >= w and img.height >= h:
+                break
+
+            img = self.do_upscale(img)
+
+        if img.width != w or img.height != h:
+            img = img.resize((w, h), resample=LANCZOS)
+
+        return img
+
+
+class UpscalerNone(Upscaler):
+    name = "None"
+
+    def upscale(self, img, w, h):
+        return img
+
+
+modules.shared.sd_upscalers.append(UpscalerNone())
+modules.shared.sd_upscalers.append(Upscaler())
diff --git a/modules/img2img.py b/modules/img2img.py
index d5787dd3..b1ef1326 100644
--- a/modules/img2img.py
+++ b/modules/img2img.py
@@ -9,7 +9,7 @@ from modules.ui import plaintext_to_html
 import modules.images as images
 import modules.scripts
 
-def img2img(prompt: str, init_img, init_img_with_mask, steps: int, sampler_index: int, mask_blur: int, inpainting_fill: int, use_GFPGAN: bool, mode: int, n_iter: int, batch_size: int, cfg_scale: float, denoising_strength: float, seed: int, height: int, width: int, resize_mode: int, upscaler_name: str, upscale_overlap: int, inpaint_full_res: bool, inpainting_mask_invert: int, *args):
+def img2img(prompt: str, init_img, init_img_with_mask, steps: int, sampler_index: int, mask_blur: int, inpainting_fill: int, use_GFPGAN: bool, mode: int, n_iter: int, batch_size: int, cfg_scale: float, denoising_strength: float, seed: int, height: int, width: int, resize_mode: int, upscaler_index: str, upscale_overlap: int, inpaint_full_res: bool, inpainting_mask_invert: int, *args):
     is_inpaint = mode == 1
     is_loopback = mode == 2
     is_upscale = mode == 3
@@ -81,8 +81,8 @@ def img2img(prompt: str, init_img, init_img_with_mask, steps: int, sampler_index
         initial_seed = None
         initial_info = None
 
-        upscaler = shared.sd_upscalers.get(upscaler_name, next(iter(shared.sd_upscalers.values())))
-        img = upscaler(init_img)
+        upscaler = shared.sd_upscalers[upscaler_index]
+        img = upscaler.upscale(init_img, init_img.width * 2, init_img.height * 2)
 
         processing.torch_gc()
 
diff --git a/modules/realesrgan_model.py b/modules/realesrgan_model.py
index 5a6666a3..e480887f 100644
--- a/modules/realesrgan_model.py
+++ b/modules/realesrgan_model.py
@@ -4,6 +4,7 @@ from collections import namedtuple
 import numpy as np
 from PIL import Image
 
+import modules.images
 from modules.shared import cmd_opts
 
 RealesrganModelInfo = namedtuple("RealesrganModelInfo", ["name", "location", "model", "netscale"])
@@ -12,6 +13,17 @@ realesrgan_models = []
 have_realesrgan = False
 RealESRGANer_constructor = None
 
+
+class UpscalerRealESRGAN(modules.images.Upscaler):
+    def __init__(self, upscaling, model_index):
+        self.upscaling = upscaling
+        self.model_index = model_index
+        self.name = realesrgan_models[model_index].name
+
+    def do_upscale(self, img):
+        return upscale_with_realesrgan(img, self.upscaling, self.model_index)
+
+
 def setup_realesrgan():
     global realesrgan_models
     global have_realesrgan
@@ -42,6 +54,9 @@ def setup_realesrgan():
         have_realesrgan = True
         RealESRGANer_constructor = RealESRGANer
 
+        for i, model in enumerate(realesrgan_models):
+            modules.shared.sd_upscalers.append(UpscalerRealESRGAN(model.netscale, i))
+
     except Exception:
         print("Error importing Real-ESRGAN:", file=sys.stderr)
         print(traceback.format_exc(), file=sys.stderr)
diff --git a/modules/shared.py b/modules/shared.py
index c8c2749a..72e92eb9 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -28,6 +28,7 @@ parser.add_argument("--always-batch-cond-uncond", action='store_true', help="a w
 parser.add_argument("--unload-gfpgan", action='store_true', help="unload GFPGAN every time after processing images. Warning: seems to cause memory leaks")
 parser.add_argument("--precision", type=str, help="evaluate at this precision", choices=["full", "autocast"], default="autocast")
 parser.add_argument("--share", action='store_true', help="use share=True for gradio and make the UI accessible through their site (doesn't work for me but you might have better luck)")
+parser.add_argument("--esrgan-models-path", type=str, help="path to directory with ESRGAN models", default=os.path.join(script_path, 'ESRGAN'))
 cmd_opts = parser.parse_args()
 
 cpu = torch.device("cpu")
@@ -79,7 +80,8 @@ class Options:
         "font": OptionInfo("arial.ttf", "Font for image grids  that have text"),
         "enable_emphasis": OptionInfo(True, "Use (text) to make model pay more attention to text text and [text] to make it pay less attention"),
         "save_txt": OptionInfo(False, "Create a text file next to every image with generation parameters."),
-
+        "ESRGAN_tile": OptionInfo(192, "Tile size for ESRGAN upscaling. 0 = no tiling.", gr.Slider, {"minimum": 0, "maximum": 512, "step": 16}),
+        "ESRGAN_tile_overlap": OptionInfo(8, "Tile overlap, in pixels for ESRGAN upscaling. Low values = visible seam.", gr.Slider, {"minimum": 0, "maximum": 48, "step": 1}),
     }
 
     def __init__(self):
@@ -115,7 +117,6 @@ opts = Options()
 if os.path.exists(config_filename):
     opts.load(config_filename)
 
-
-sd_upscalers = {}
+sd_upscalers = []
 
 sd_model = None
diff --git a/modules/ui.py b/modules/ui.py
index d6b39c2f..4119369e 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -256,10 +256,10 @@ def create_ui(txt2img, img2img, run_extras, run_pnginfo):
 
                 with gr.Row():
                     use_gfpgan = gr.Checkbox(label='GFPGAN', value=False, visible=gfpgan.have_gfpgan)
+                    sd_upscale_overlap = gr.Slider(minimum=0, maximum=256, step=16, label='Tile overlap', value=64, visible=False)
 
                 with gr.Row():
-                    sd_upscale_upscaler_name = gr.Radio(label='Upscaler', choices=list(shared.sd_upscalers.keys()), value=list(shared.sd_upscalers.keys())[0], visible=False)
-                    sd_upscale_overlap = gr.Slider(minimum=0, maximum=256, step=16, label='Tile overlap', value=64, visible=False)
+                    sd_upscale_upscaler_name = gr.Radio(label='Upscaler', choices=[x.name for x in shared.sd_upscalers], value=shared.sd_upscalers[0].name, type="index", visible=False)
 
                 with gr.Row():
                     batch_count = gr.Slider(minimum=1, maximum=cmd_opts.max_batch_count, step=1, label='Batch count', value=1)
@@ -401,9 +401,18 @@ def create_ui(txt2img, img2img, run_extras, run_pnginfo):
             with gr.Column(variant='panel'):
                 with gr.Group():
                     image = gr.Image(label="Source", source="upload", interactive=True, type="pil")
-                    gfpgan_strength = gr.Slider(minimum=0.0, maximum=1.0, step=0.001, label="GFPGAN strength", value=1, interactive=gfpgan.have_gfpgan)
-                    realesrgan_resize = gr.Slider(minimum=1.0, maximum=4.0, step=0.05, label="Real-ESRGAN upscaling", value=2, interactive=realesrgan.have_realesrgan)
-                    realesrgan_model = gr.Radio(label='Real-ESRGAN model', choices=[x.name for x in realesrgan.realesrgan_models], value=realesrgan.realesrgan_models[0].name, type="index", interactive=realesrgan.have_realesrgan)
+
+                upscaling_resize = gr.Slider(minimum=1.0, maximum=4.0, step=0.05, label="Resize", value=2)
+
+                with gr.Group():
+                    extras_upscaler_1 = gr.Radio(label='Upscaler 1', choices=[x.name for x in shared.sd_upscalers], value=shared.sd_upscalers[0].name, type="index")
+
+                with gr.Group():
+                    extras_upscaler_2 = gr.Radio(label='Upscaler 2', choices=[x.name for x in shared.sd_upscalers], value=shared.sd_upscalers[0].name, type="index")
+                    extras_upscaler_2_visibility = gr.Slider(minimum=0.0, maximum=1.0, step=0.001, label="Upscaler 2 visibility", value=1)
+
+                with gr.Group():
+                    gfpgan_strength = gr.Slider(minimum=0.0, maximum=1.0, step=0.001, label="GFPGAN strength", value=0, interactive=gfpgan.have_gfpgan)
 
                 submit = gr.Button('Generate', elem_id="extras_generate", variant='primary')
 
@@ -417,8 +426,10 @@ def create_ui(txt2img, img2img, run_extras, run_pnginfo):
             inputs=[
                 image,
                 gfpgan_strength,
-                realesrgan_resize,
-                realesrgan_model,
+                upscaling_resize,
+                extras_upscaler_1,
+                extras_upscaler_2,
+                extras_upscaler_2_visibility,
             ],
             outputs=[
                 result_image,
author	AUTOMATIC <16777216c@gmail.com>	2022-09-04 18:54:12 +0300
committer	AUTOMATIC <16777216c@gmail.com>	2022-09-04 18:54:12 +0300
commit	f299645aeeb65fcddde2d136fd550b6b01ffebb3 (patch)
tree	5e00d526688ce928b68f023ac91a4bf227aaf092 /modules
parent	78278ce695beffbf59c7320bb0441922d66b1c0e (diff)