Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

change resolution #1

Merged
merged 7 commits into from
Jan 15, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions code/cfg/train.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ TREE:
BRANCH_NUM: 3

TRAIN:
IMG_SIZE: 256
CROP_IMG_SIZE: 252
RECP_FIELD_SIZE: 70
PATCH_STRIDE_SIZE: 8
FLAG: True
NET_G: '' # Specify the generator path to resume training
NET_D: '' # Specify the discriminator path to resume training
Expand Down
3 changes: 2 additions & 1 deletion code/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def get_imgs(img_path, imsize, bbox=None,
# We resize the full image to be 126 X 126 (instead of 128 X 128) for the full coverage of the input (full) image by
# the receptive fields of the final convolution layer of background discriminator

my_crop_width = 126
my_crop_width = cfg.TRAIN.CROP_IMG_SIZE
re_fimg = transforms.Scale(int(my_crop_width * 76 / 64))(fimg)
re_width, re_height = re_fimg.size

Expand Down Expand Up @@ -114,6 +114,7 @@ def get_imgs(img_path, imsize, bbox=None,

class Dataset(data.Dataset):
def __init__(self, data_dir, base_size=64, transform = None):
base_size = base_size * (cfg.TRAIN.IMG_SIZE//128)

self.transform = transform
self.norm = transforms.Compose([
Expand Down
4 changes: 4 additions & 0 deletions code/miscc/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@

# Training options
__C.TRAIN = edict()
__C.TRAIN.IMG_SIZE = 256
__C.TRAIN.CROP_IMG_SIZE = 252
__C.TRAIN.RECP_FIELD_SIZE: 70
__C.TRAIN.PATCH_STRIDE_SIZE: 8
__C.TRAIN.BATCH_SIZE = 64
__C.TRAIN.BG_LOSS_WT = 10
__C.TRAIN.VIS_COUNT = 64
Expand Down
33 changes: 19 additions & 14 deletions code/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,8 @@ def define_module(self):
self.upsample2 = upBlock(ngf // 2, ngf // 4)
self.upsample3 = upBlock(ngf // 4, ngf // 8)
self.upsample4 = upBlock(ngf // 8, ngf // 16)
self.upsample5 = upBlock(ngf // 16, ngf // 16)
self.upsample5 = upBlock(ngf // 16, ngf // 32)
self.upsample6 = upBlock(ngf // 32, ngf // 32)


def forward(self, z_code, code):
Expand All @@ -127,6 +128,7 @@ def forward(self, z_code, code):
out_code = self.upsample3(out_code)
out_code = self.upsample4(out_code)
out_code = self.upsample5(out_code)
out_code = self.upsample6(out_code)

return out_code

Expand Down Expand Up @@ -203,16 +205,16 @@ def __init__(self):
self.gf_dim = cfg.GAN.GF_DIM
self.define_module()
self.upsampling = Upsample(scale_factor = 2, mode = 'bilinear')
self.scale_fimg = nn.UpsamplingBilinear2d(size = [126, 126])
self.scale_fimg = nn.UpsamplingBilinear2d(size=[cfg.TRAIN.CROP_IMG_SIZE, cfg.TRAIN.CROP_IMG_SIZE])

def define_module(self):

#Background stage
self.h_net1_bg = INIT_STAGE_G(self.gf_dim * 16, 2)
self.h_net1_bg = INIT_STAGE_G(self.gf_dim * 16 * (cfg.TRAIN.IMG_SIZE//128), 2)
self.img_net1_bg = GET_IMAGE_G(self.gf_dim) # Background generation network

# Parent stage networks
self.h_net1 = INIT_STAGE_G(self.gf_dim * 16, 1)
self.h_net1 = INIT_STAGE_G(self.gf_dim * 16 * (cfg.TRAIN.IMG_SIZE//128), 1)
self.h_net2 = NEXT_STAGE_G(self.gf_dim, use_hrc = 1)
self.img_net2 = GET_IMAGE_G(self.gf_dim // 2) # Parent foreground generation network
self.img_net2_mask= GET_MASK_G(self.gf_dim // 2) # Parent mask generation network
Expand Down Expand Up @@ -294,15 +296,18 @@ def downBlock(in_planes, out_planes):

def encode_parent_and_child_img(ndf): # Defines the encoder network used for parent and child image
encode_img = nn.Sequential(
nn.Conv2d(3, ndf, 4, 2, 1, bias=False),
nn.Conv2d(3, ndf, 4, 2, 1, bias=False), # (256+2)-4/2 + 1 = 128
nn.LeakyReLU(0.2, inplace=True),
nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False),
nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False), # (128+2)-4/2 + 1 = 64
nn.BatchNorm2d(ndf * 2),
nn.LeakyReLU(0.2, inplace=True),
nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False),
nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False), # (64+2)-4/2 + 1 = 32
nn.BatchNorm2d(ndf * 4),
nn.LeakyReLU(0.2, inplace=True),
nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False),
nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False), # (32+2)-4/2 + 1 = 16
nn.BatchNorm2d(ndf * 8),
nn.LeakyReLU(0.2, inplace=True)
nn.Conv2d(ndf * 8, ndf * 8, 4, 2, 1, bias=False), # (16+2)-4/2 + 1 = 8
nn.BatchNorm2d(ndf * 8),
nn.LeakyReLU(0.2, inplace=True)
)
Expand Down Expand Up @@ -379,12 +384,12 @@ def forward(self, x_var):
return [classi_score, rf_score]

elif self.stg_no > 0:
x_code = self.img_code_s16(x_var)
x_code = self.img_code_s32(x_code)
x_code = self.img_code_s32_1(x_code)
h_c_code = self.jointConv(x_code)
code_pred = self.logits(h_c_code) # Predicts the parent code and child code in parent and child stage respectively
rf_score = self.uncond_logits(x_code) # This score is not used in parent stage while training
x_code = self.img_code_s16(x_var) # ([batch, 512, 4, 4])
x_code = self.img_code_s32(x_code) # ([batch, 1024, 4, 4])
x_code = self.img_code_s32_1(x_code) # ([batch, 512, 4, 4])
h_c_code = self.jointConv(x_code) # ([batch, 512, 4, 4])
code_pred = self.logits(h_c_code) # ([batch, 20, 1, 1]) Predicts the parent code and child code in parent and child stage respectively
rf_score = self.uncond_logits(x_code) # ([batch, 1, 1, 1]) This score is not used in parent stage while training
return [code_pred.view(-1, self.ef_dim), rf_score.view(-1)]


Expand Down
9 changes: 5 additions & 4 deletions code/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,9 +253,9 @@ def train_Dnet(self, idx, count):
y2 = self.warped_bbox[3][i]

a1 = max(torch.tensor(0).float().cuda(), torch.ceil((x1 - self.recp_field)/self.patch_stride))
a2 = min(torch.tensor(self.n_out - 1).float().cuda(), torch.floor((self.n_out - 1) - ((126 - self.recp_field) - x2)/self.patch_stride)) + 1
a2 = min(torch.tensor(self.n_out - 1).float().cuda(), torch.floor((self.n_out - 1) - ((cfg.TRAIN.CROP_IMG_SIZE - self.recp_field) - x2)/self.patch_stride)) + 1
b1 = max(torch.tensor(0).float().cuda(), torch.ceil((y1 - self.recp_field)/self.patch_stride))
b2 = min(torch.tensor(self.n_out - 1).float().cuda(), torch.floor((self.n_out - 1) - ((126 - self.recp_field) - y2)/self.patch_stride)) + 1
b2 = min(torch.tensor(self.n_out - 1).float().cuda(), torch.floor((self.n_out - 1) - ((cfg.TRAIN.CROP_IMG_SIZE - self.recp_field) - y2)/self.patch_stride)) + 1

if (x1 != x2 and y1 != y2):
weights_real[i, :, a1.type(torch.int) : a2.type(torch.int) , b1.type(torch.int) : b2.type(torch.int)] = 0.0
Expand Down Expand Up @@ -378,9 +378,10 @@ def train(self):
hard_noise = \
Variable(torch.FloatTensor(self.batch_size, nz).normal_(0, 1)).cuda()

self.patch_stride = float(4) # Receptive field stride given the current discriminator architecture for background stage
self.patch_stride = float(cfg.TRAIN.PATCH_STRIDE_SIZE) # Receptive field stride given the current discriminator architecture for background stage
self.n_out = 24 # Output size of the discriminator at the background stage; N X N where N = 24
self.recp_field = 34 # Receptive field of each of the member of N X N
# see: https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix/issues/39#issuecomment-368239697
self.recp_field = cfg.TRAIN.RECP_FIELD_SIZE # Receptive field of each of the member of N X N


if cfg.CUDA:
Expand Down