support DIY models/archs; test benchmarks

ryanxingql · Sep 10, 2024 · 1e0a17c · 1e0a17c
1 parent aed540d
commit 1e0a17c
Show file tree

Hide file tree

Showing 17 changed files with 212 additions and 56 deletions.
diff --git a/doc.md b/doc.md
@@ -125,19 +125,12 @@ powerqe
 ## Training
 
 ```bash
-conda activate pqe
-
-#CUDA_VISIBLE_DEVICES=0 python powerqe/train.py -opt options/train/ESRGAN/RRDBNet_DIV2K_LMDB_G1.yml --auto_resume
-CUDA_VISIBLE_DEVICES=<gpus> python powerqe/train.py -opt <cfg_path> [--auto_resume] [--debug] [--force_yml <key>=<value>]
+#chmod +x scripts/train.sh
 
-# or
-#CUDA_VISIBLE_DEVICES=0 python -m torch.distributed.launch --nproc_per_node=1 --master_port=4321 powerqe/train.py -opt options/train/ESRGAN/RRDBNet_DIV2K_LMDB_G1.yml --launcher pytorch --auto_resume
-CUDA_VISIBLE_DEVICES=<gpus> python -m torch.distributed.launch --nproc_per_node=<num_gpus> --master_port=<master_port> powerqe/train.py -opt <cfg_path> --launcher pytorch [--auto_resume] [--debug] [--force_yml <key>=<value>]
+conda activate pqe
 
-# or
-#chmod +x scripts/dist_train.sh
-#CUDA_VISIBLE_DEVICES=0 scripts/dist_train.sh 1 options/train/ESRGAN/RRDBNet_DIV2K_LMDB_G1.yml --auto_resume
-CUDA_VISIBLE_DEVICES=<gpus> [PORT=<master_port>] scripts/dist_train.sh <num_gpus> <cfg_path> [--auto_resume] [--debug] [--force_yml <key>=<value>]
+#CUDA_VISIBLE_DEVICES=0 scripts/train.sh 1 options/train/ESRGAN/RRDBNet_DIV2K_LMDB_G1.yml --auto_resume
+CUDA_VISIBLE_DEVICES=<gpus> [PORT=<master_port>] scripts/train.sh <num_gpus> <cfg_path> [--auto_resume] [--debug] [--force_yml <key>=<value>]
 ```
 
 - `auto_resume`: Automatically resume from the latest existing checkpoint.
@@ -147,19 +140,12 @@ CUDA_VISIBLE_DEVICES=<gpus> [PORT=<master_port>] scripts/dist_train.sh <num_gpus
 ## Testing
 
 ```bash
-conda activate pqe
-
-#CUDA_VISIBLE_DEVICES=0 python powerqe/test.py -opt options/test/ESRGAN/RRDBNet_DIV2K_LMDB_G1_latest.yml --force_yml path:pretrain_network_g=experiments/train_ESRGAN_RRDBNet_DIV2K_LMDB_G1/models/net_g_600000.pth
-CUDA_VISIBLE_DEVICES=<gpus> python powerqe/test.py -opt <cfg_path> [--force_yml <key>=<value>]
+#chmod +x scripts/test.sh
 
-# or
-#CUDA_VISIBLE_DEVICES=0 python -m torch.distributed.launch --nproc_per_node=1 --master_port=4321 powerqe/test.py -opt options/test/ESRGAN/RRDBNet_DIV2K_LMDB_G1_latest.yml --launcher pytorch --force_yml path:pretrain_network_g=experiments/train_ESRGAN_RRDBNet_DIV2K_LMDB_G1/models/net_g_600000.pth
-CUDA_VISIBLE_DEVICES=<gpus> python -m torch.distributed.launch --nproc_per_node=<num_gpus> --master_port=<master_port> powerqe/test.py -opt <cfg_path> --launcher pytorch [--force_yml <key>=<value>]
+conda activate pqe
 
-# or
-#chmod +x scripts/dist_test.sh
-#CUDA_VISIBLE_DEVICES=0 scripts/dist_test.sh 1 options/test/ESRGAN/RRDBNet_DIV2K_LMDB_G1_latest.yml --force_yml path:pretrain_network_g=experiments/train_ESRGAN_RRDBNet_DIV2K_LMDB_G1/models/net_g_600000.pth
-CUDA_VISIBLE_DEVICES=<gpus> [PORT=<master_port>] scripts/dist_test.sh <num_gpus> <cfg_path> [--force_yml <key>=<value>]
+#CUDA_VISIBLE_DEVICES=0 scripts/test.sh 1 options/test/ESRGAN/RRDBNet_DIV2K_LMDB_G1_latest.yml --force_yml path:pretrain_network_g=experiments/train_ESRGAN_RRDBNet_DIV2K_LMDB_G1/models/net_g_600000.pth
+CUDA_VISIBLE_DEVICES=<gpus> [PORT=<master_port>] scripts/test.sh <num_gpus> <cfg_path> [--force_yml <key>=<value>]
 ```
 
 - Most models support only single-GPU testing, even when multi-GPU testing is requested.

diff --git a/options/test/identity/DIV2K_G1.yml b/options/test/identity/DIV2K_G1.yml
@@ -0,0 +1,47 @@
+# general settings
+name: test_identity_DIV2K_G1
+model_type: QEModel
+scale: 1
+num_gpu: 1  # set num_gpu: 0 for cpu mode
+manual_seed: 0
+
+# dataset settings
+datasets:
+  test:  # multiple test datasets are acceptable
+    name: DIV2K
+    type: PairedImageDataset
+    dataroot_gt: datasets/DIV2K/valid
+    dataroot_lq: datasets/DIV2K/valid_BPG_QP37
+    io_backend:
+      type: disk
+
+# network structures
+network_g:
+  type: IdentityNet
+  scale: 1  # default scale=4
+
+# path
+path:
+  pretrain_network_g: ~
+  strict_load_g: ~
+
+# validation settings
+val:
+  save_img: true  # save img -> tensor -> img version, which is lossy
+  suffix: ~  # add suffix to saved images, if None, use exp name
+
+  metrics:
+    psnr:
+      type: calculate_psnr
+      crop_border: 0
+      test_y_channel: false
+    ssim:
+      type: calculate_ssim
+      crop_border: 0
+      test_y_channel: false
+    fid:
+      type: pyiqa
+      better: lower
+    lpips:
+      type: pyiqa
+      better: lower
diff --git a/powerqe/__init__.py b/powerqe/__init__.py
diff --git a/powerqe/archs/__init__.py b/powerqe/archs/__init__.py
@@ -0,0 +1,5 @@
+from .builder import build_network
+from .registry import ARCH_REGISTRY
+from .identitynet_arch import IdentityNet
+
+__all__ = ["build_network", "ARCH_REGISTRY", "IdentityNet"]
diff --git a/powerqe/archs/builder.py b/powerqe/archs/builder.py
@@ -0,0 +1,14 @@
+from copy import deepcopy
+
+from basicsr.utils import get_root_logger
+
+from .registry import ARCH_REGISTRY
+
+
+def build_network(opt):
+    opt = deepcopy(opt)
+    network_type = opt.pop("type")
+    net = ARCH_REGISTRY.get(network_type)(**opt)
+    logger = get_root_logger()
+    logger.info(f"Network [{net.__class__.__name__}] is created.")
+    return net
diff --git a/powerqe/archs/identitynet_arch.py b/powerqe/archs/identitynet_arch.py
@@ -0,0 +1,19 @@
+from torch import nn as nn
+from torch.nn import functional as F
+
+from .registry import ARCH_REGISTRY
+
+
+@ARCH_REGISTRY.register()
+class IdentityNet(nn.Module):
+    """Identity network used for testing benchmarks (in tensors). Support up-scaling."""
+
+    def __init__(self, scale=1, upscale_mode="nearest"):
+        super(IdentityNet, self).__init__()
+        self.scale = scale
+        self.upscale_mode = upscale_mode
+
+    def forward(self, x):
+        if self.scale != 1:
+            x = F.interpolate(x, scale_factor=self.scale, mode=self.upscale_mode)
+        return x
diff --git a/powerqe/archs/registry.py b/powerqe/archs/registry.py
@@ -0,0 +1,3 @@
+from basicsr.utils.registry import ARCH_REGISTRY as ARCH_REGISTRY_BASICSR
+
+ARCH_REGISTRY = ARCH_REGISTRY_BASICSR
diff --git a/powerqe/models/__init__.py b/powerqe/models/__init__.py
@@ -0,0 +1,5 @@
+from .builder import build_model
+from .registry import MODEL_REGISTRY
+from .qe_model import QEModel
+
+__all__ = ["build_model", "MODEL_REGISTRY", "QEModel"]
diff --git a/powerqe/models/builder.py b/powerqe/models/builder.py
@@ -0,0 +1,19 @@
+from copy import deepcopy
+
+from basicsr.utils import get_root_logger
+
+from .registry import MODEL_REGISTRY
+
+
+def build_model(opt):
+    """Build model from options.
+
+    Args:
+        opt (dict): Configuration. It must contain:
+            model_type (str): Model type.
+    """
+    opt = deepcopy(opt)
+    model = MODEL_REGISTRY.get(opt["model_type"])(opt)
+    logger = get_root_logger()
+    logger.info(f"Model [{model.__class__.__name__}] is created.")
+    return model
diff --git a/powerqe/models/qe_model.py b/powerqe/models/qe_model.py
@@ -0,0 +1,32 @@
+from basicsr.models.sr_model import SRModel
+
+from powerqe.archs import build_network
+
+from .registry import MODEL_REGISTRY
+
+
+@MODEL_REGISTRY.register()
+class QEModel(SRModel):
+    """Base QE model for single image quality enhancement."""
+
+    def __init__(self, opt):
+        super(SRModel, self).__init__(opt)
+
+        # define network
+        self.net_g = build_network(opt["network_g"])
+        self.net_g = self.model_to_device(self.net_g)
+        self.print_network(self.net_g)
+
+        # load pretrained models
+        load_path = self.opt["path"].get("pretrain_network_g", None)
+        if load_path is not None:
+            param_key = self.opt["path"].get("param_key_g", "params")
+            self.load_network(
+                self.net_g,
+                load_path,
+                self.opt["path"].get("strict_load_g", True),
+                param_key,
+            )
+
+        if self.is_train:
+            self.init_training_settings()
diff --git a/powerqe/models/registry.py b/powerqe/models/registry.py
@@ -0,0 +1,4 @@
+from basicsr.utils.registry import MODEL_REGISTRY as MODEL_REGISTRY_BASICSR
+
+
+MODEL_REGISTRY = MODEL_REGISTRY_BASICSR
diff --git a/powerqe/test.py b/powerqe/test.py
@@ -3,10 +3,13 @@
 from os import path as osp
 
 from basicsr.data import build_dataloader, build_dataset
-from basicsr.models import build_model
+
+# from basicsr.models import build_model
 from basicsr.utils import get_env_info, get_root_logger, get_time_str, make_exp_dirs
 from basicsr.utils.options import dict2str, parse_options
 
+from powerqe.models import build_model
+
 
 def test_pipeline(root_path):
     # parse options, set distributed setting, set ramdom seed

diff --git a/powerqe/train.py b/powerqe/train.py
@@ -8,7 +8,8 @@
 from basicsr.data import build_dataloader, build_dataset
 from basicsr.data.data_sampler import EnlargedSampler
 from basicsr.data.prefetch_dataloader import CPUPrefetcher, CUDAPrefetcher
-from basicsr.models import build_model
+
+# from basicsr.models import build_model
 from basicsr.utils import (
     AvgTimer,
     MessageLogger,
@@ -24,6 +25,8 @@
 )
 from basicsr.utils.options import copy_opt_file, dict2str, parse_options
 
+from powerqe.models import build_model
+
 
 def init_tb_loggers(opt):
     # initialize wandb logger before tensorboard logger to allow proper sync

diff --git a/scripts/dist_test.sh b/scripts/dist_test.sh
diff --git a/scripts/dist_train.sh b/scripts/dist_train.sh
diff --git a/scripts/test.sh b/scripts/test.sh
@@ -0,0 +1,24 @@
+#!/usr/bin/env bash
+
+GPUS=$1
+CONFIG=$2
+PORT=${PORT:-4321}
+
+# usage
+if [ $# -lt 2 ] ;then
+    echo "usage:"
+    echo "./scripts/test.sh [number of gpu] [path to option file]"
+    exit
+fi
+
+# check if GPUS is 1 for single-GPU, otherwise run multi-GPU
+if [ "$GPUS" -eq 1 ]; then
+    # if only one GPU, run the simple version
+    PYTHONPATH="$(dirname $0)/..:${PYTHONPATH}" \
+    python powerqe/test.py -opt $CONFIG ${@:3}
+else
+    # if multiple GPUs, run the distributed version
+    PYTHONPATH="$(dirname $0)/..:${PYTHONPATH}" \
+    python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
+    powerqe/test.py -opt $CONFIG --launcher pytorch ${@:3}
+fi
diff --git a/scripts/train.sh b/scripts/train.sh
@@ -0,0 +1,24 @@
+#!/usr/bin/env bash
+
+GPUS=$1
+CONFIG=$2
+PORT=${PORT:-4321}
+
+# usage
+if [ $# -lt 2 ] ;then
+    echo "usage:"
+    echo "./scripts/train.sh [number of gpu] [path to option file]"
+    exit
+fi
+
+# check if GPUS is 1 for single-GPU, otherwise run multi-GPU
+if [ "$GPUS" -eq 1 ]; then
+    # single GPU version
+    PYTHONPATH="$(dirname $0)/..:${PYTHONPATH}" \
+    python powerqe/train.py -opt $CONFIG ${@:3}
+else
+    # multi-GPU version
+    PYTHONPATH="$(dirname $0)/..:${PYTHONPATH}" \
+    python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
+    powerqe/train.py -opt $CONFIG --launcher pytorch ${@:3}
+fi
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		from basicsr.utils.registry import ARCH_REGISTRY as ARCH_REGISTRY_BASICSR

		ARCH_REGISTRY = ARCH_REGISTRY_BASICSR
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,4 @@
		from basicsr.utils.registry import MODEL_REGISTRY as MODEL_REGISTRY_BASICSR


		MODEL_REGISTRY = MODEL_REGISTRY_BASICSR