kidach1 · kidach1 · Jan 15, 2020 · Jan 15, 2020 · Jan 15, 2020 · Jan 15, 2020
diff --git a/code/cfg/train.yml b/code/cfg/train.yml
@@ -13,6 +13,10 @@ TREE:
     BRANCH_NUM: 3
 
 TRAIN:
+    IMG_SIZE: 256
+    CROP_IMG_SIZE: 252
+    RECP_FIELD_SIZE: 70
+    PATCH_STRIDE_SIZE: 8
     FLAG: True
     NET_G: ''  # Specify the generator path to resume training
     NET_D: ''  # Specify the discriminator path to resume training

diff --git a/code/datasets.py b/code/datasets.py
@@ -69,7 +69,7 @@ def get_imgs(img_path, imsize, bbox=None,
     # We resize the full image to be 126 X 126 (instead of 128 X 128)  for the full coverage of the input (full) image by 
     # the receptive fields of the final convolution layer of background discriminator
 
-    my_crop_width = 126	
+    my_crop_width = cfg.TRAIN.CROP_IMG_SIZE	
     re_fimg = transforms.Scale(int(my_crop_width * 76 / 64))(fimg)
     re_width, re_height = re_fimg.size
 
@@ -114,6 +114,7 @@ def get_imgs(img_path, imsize, bbox=None,
 
 class Dataset(data.Dataset):
     def __init__(self, data_dir, base_size=64, transform = None):
+	base_size = base_size * (cfg.TRAIN.IMG_SIZE//128)
 
         self.transform = transform
         self.norm = transforms.Compose([

diff --git a/code/miscc/config.py b/code/miscc/config.py
@@ -34,6 +34,10 @@
 
 # Training options
 __C.TRAIN = edict()
+__C.TRAIN.IMG_SIZE = 256
+__C.TRAIN.CROP_IMG_SIZE = 252
+__C.TRAIN.RECP_FIELD_SIZE: 70
+__C.TRAIN.PATCH_STRIDE_SIZE: 8
 __C.TRAIN.BATCH_SIZE = 64
 __C.TRAIN.BG_LOSS_WT = 10
 __C.TRAIN.VIS_COUNT = 64

diff --git a/code/model.py b/code/model.py
@@ -114,7 +114,8 @@ def define_module(self):
         self.upsample2 = upBlock(ngf // 2, ngf // 4)
         self.upsample3 = upBlock(ngf // 4, ngf // 8)
         self.upsample4 = upBlock(ngf // 8, ngf // 16)
-        self.upsample5 = upBlock(ngf // 16, ngf // 16)
+        self.upsample5 = upBlock(ngf // 16, ngf // 32)
+        self.upsample6 = upBlock(ngf // 32, ngf // 32)
 
 
     def forward(self, z_code, code):
@@ -127,6 +128,7 @@ def forward(self, z_code, code):
         out_code = self.upsample3(out_code)
         out_code = self.upsample4(out_code)
 	out_code = self.upsample5(out_code)
+	out_code = self.upsample6(out_code)
 
         return out_code
 
@@ -203,16 +205,16 @@ def __init__(self):
         self.gf_dim = cfg.GAN.GF_DIM
         self.define_module()
 	self.upsampling = Upsample(scale_factor = 2, mode = 'bilinear')
-	self.scale_fimg = nn.UpsamplingBilinear2d(size = [126, 126])
+	self.scale_fimg = nn.UpsamplingBilinear2d(size=[cfg.TRAIN.CROP_IMG_SIZE, cfg.TRAIN.CROP_IMG_SIZE])
 
     def define_module(self):
 
         #Background stage
-        self.h_net1_bg = INIT_STAGE_G(self.gf_dim * 16, 2)
+        self.h_net1_bg = INIT_STAGE_G(self.gf_dim * 16 * (cfg.TRAIN.IMG_SIZE//128), 2)
         self.img_net1_bg = GET_IMAGE_G(self.gf_dim) # Background generation network
 
         # Parent stage networks
-        self.h_net1 = INIT_STAGE_G(self.gf_dim * 16, 1)
+        self.h_net1 = INIT_STAGE_G(self.gf_dim * 16 * (cfg.TRAIN.IMG_SIZE//128), 1)
         self.h_net2 = NEXT_STAGE_G(self.gf_dim, use_hrc = 1) 
         self.img_net2 = GET_IMAGE_G(self.gf_dim // 2)  # Parent foreground generation network 
         self.img_net2_mask= GET_MASK_G(self.gf_dim // 2) # Parent mask generation network 
@@ -294,15 +296,18 @@ def downBlock(in_planes, out_planes):
 
 def encode_parent_and_child_img(ndf): # Defines the encoder network used for parent and child image
     encode_img = nn.Sequential(
-        nn.Conv2d(3, ndf, 4, 2, 1, bias=False),
+        nn.Conv2d(3, ndf, 4, 2, 1, bias=False), # (256+2)-4/2 + 1 = 128
         nn.LeakyReLU(0.2, inplace=True),
-        nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False),
+        nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False), # (128+2)-4/2 + 1 = 64
         nn.BatchNorm2d(ndf * 2),
         nn.LeakyReLU(0.2, inplace=True),
-        nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False),
+        nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False), # (64+2)-4/2 + 1 = 32
         nn.BatchNorm2d(ndf * 4),
         nn.LeakyReLU(0.2, inplace=True),
-        nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False),
+        nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False), # (32+2)-4/2 + 1 = 16
+        nn.BatchNorm2d(ndf * 8),
+        nn.LeakyReLU(0.2, inplace=True)
+	nn.Conv2d(ndf * 8, ndf * 8, 4, 2, 1, bias=False), # (16+2)-4/2 + 1 = 8
         nn.BatchNorm2d(ndf * 8),
         nn.LeakyReLU(0.2, inplace=True)
     )
@@ -379,12 +384,12 @@ def forward(self, x_var):
 		return [classi_score, rf_score]
 
 	elif self.stg_no > 0:
-        	x_code = self.img_code_s16(x_var)
-        	x_code = self.img_code_s32(x_code)
-        	x_code = self.img_code_s32_1(x_code)
-                h_c_code = self.jointConv(x_code)
-                code_pred = self.logits(h_c_code) # Predicts the parent code and child code in parent and child stage respectively
-                rf_score = self.uncond_logits(x_code) # This score is not used in parent stage while training
+        	x_code = self.img_code_s16(x_var) # ([batch, 512, 4, 4])
+        	x_code = self.img_code_s32(x_code) # ([batch, 1024, 4, 4])
+        	x_code = self.img_code_s32_1(x_code) # ([batch, 512, 4, 4])
+                h_c_code = self.jointConv(x_code) # ([batch, 512, 4, 4])
+                code_pred = self.logits(h_c_code) # ([batch, 20, 1, 1]) Predicts the parent code and child code in parent and child stage respectively
+                rf_score = self.uncond_logits(x_code) # ([batch, 1, 1, 1]) This score is not used in parent stage while training
             	return [code_pred.view(-1, self.ef_dim), rf_score.view(-1)]
 
 

diff --git a/code/trainer.py b/code/trainer.py
@@ -253,9 +253,9 @@ def train_Dnet(self, idx, count):
                         y2 =  self.warped_bbox[3][i]
 
                         a1 = max(torch.tensor(0).float().cuda(), torch.ceil((x1 - self.recp_field)/self.patch_stride))
-                        a2 = min(torch.tensor(self.n_out - 1).float().cuda(), torch.floor((self.n_out - 1) - ((126 - self.recp_field) - x2)/self.patch_stride)) + 1
+                        a2 = min(torch.tensor(self.n_out - 1).float().cuda(), torch.floor((self.n_out - 1) - ((cfg.TRAIN.CROP_IMG_SIZE - self.recp_field) - x2)/self.patch_stride)) + 1
                         b1 = max(torch.tensor(0).float().cuda(), torch.ceil((y1 - self.recp_field)/self.patch_stride))
-                        b2 = min(torch.tensor(self.n_out - 1).float().cuda(), torch.floor((self.n_out - 1) - ((126 - self.recp_field) - y2)/self.patch_stride)) + 1
+                        b2 = min(torch.tensor(self.n_out - 1).float().cuda(), torch.floor((self.n_out - 1) - ((cfg.TRAIN.CROP_IMG_SIZE - self.recp_field) - y2)/self.patch_stride)) + 1
 
 			if (x1 != x2 and y1 != y2):
                         	weights_real[i, :, a1.type(torch.int) : a2.type(torch.int) , b1.type(torch.int) : b2.type(torch.int)] = 0.0
@@ -378,9 +378,10 @@ def train(self):
         hard_noise = \
             Variable(torch.FloatTensor(self.batch_size, nz).normal_(0, 1)).cuda()
 
-	self.patch_stride = float(4)    # Receptive field stride given the current discriminator architecture for background stage 
+	self.patch_stride = float(cfg.TRAIN.PATCH_STRIDE_SIZE)    # Receptive field stride given the current discriminator architecture for background stage 
 	self.n_out = 24                 # Output size of the discriminator at the background stage; N X N where N = 24
-	self.recp_field = 34            # Receptive field of each of the member of N X N
+	# see: https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix/issues/39#issuecomment-368239697
+	self.recp_field = cfg.TRAIN.RECP_FIELD_SIZE            # Receptive field of each of the member of N X N
 
 
         if cfg.CUDA: