clean up arguments

hangzhaomit · hangzhaomit · commit d91f3d5e30a0 · 2019-07-29T18:25:15.000-04:00
diff --git a/config/ade20k-mobilenetv2dilated-c1_deepsup.yaml b/config/ade20k-mobilenetv2dilated-c1_deepsup.yaml
@@ -12,12 +12,10 @@ DATASET:
 MODEL:
   arch_encoder: "mobilenetv2dilated"
   arch_decoder: "c1_deepsup"
-  weights_encoder: ""
-  weights_decoder: ""
   fc_dim: 320
 
 TRAIN:
-  batch_size_per_gpu: 2
+  batch_size_per_gpu: 3
   num_epoch: 20
   start_epoch: 0
   epoch_iters: 5000
@@ -35,10 +33,10 @@ TRAIN:
 
 VAL:
   visualize: False
-  suffix: "_epoch_20.pth"
+  checkpoint: "epoch_20.pth"
 
 TEST:
-  suffix: "_epoch_20.pth"
+  checkpoint: "epoch_20.pth"
   result: "./"
 
 DIR: "ckpt/ade20k-mobilenetv2dilated-c1_deepsup"
diff --git a/config/ade20k-resnet101-upernet.yaml b/config/ade20k-resnet101-upernet.yaml
@@ -12,8 +12,6 @@ DATASET:
 MODEL:
   arch_encoder: "resnet101"
   arch_decoder: "upernet"
-  weights_encoder: ""
-  weights_decoder: ""
   fc_dim: 2048
 
 TRAIN:
@@ -35,10 +33,10 @@ TRAIN:
 
 VAL:
   visualize: False
-  suffix: "_epoch_40.pth"
+  checkpoint: "epoch_20.pth"
 
 TEST:
-  suffix: "_epoch_40.pth"
+  checkpoint: "epoch_20.pth"
   result: "./"
 
 DIR: "ckpt/ade20k-resnet101-upernet"
diff --git a/config/ade20k-resnet101dilated-ppm_deepsup.yaml b/config/ade20k-resnet101dilated-ppm_deepsup.yaml
@@ -12,8 +12,6 @@ DATASET:
 MODEL:
   arch_encoder: "resnet50dilated"
   arch_decoder: "ppm_deepsup"
-  weights_encoder: ""
-  weights_decoder: ""
   fc_dim: 2048
 
 TRAIN:
@@ -35,10 +33,10 @@ TRAIN:
 
 VAL:
   visualize: False
-  suffix: "_epoch_20.pth"
+  checkpoint: "epoch_20.pth"
 
 TEST:
-  suffix: "_epoch_20.pth"
+  checkpoint: "epoch_20.pth"
   result: "./"
 
 DIR: "ckpt/ade20k-resnet50dilated-ppm_deepsup"
diff --git a/config/ade20k-resnet18dilated-ppm_deepsup.yaml b/config/ade20k-resnet18dilated-ppm_deepsup.yaml
@@ -12,8 +12,6 @@ DATASET:
 MODEL:
   arch_encoder: "resnet18dilated"
   arch_decoder: "ppm_deepsup"
-  weights_encoder: ""
-  weights_decoder: ""
   fc_dim: 512
 
 TRAIN:
@@ -35,10 +33,10 @@ TRAIN:
 
 VAL:
   visualize: False
-  suffix: "_epoch_20.pth"
+  checkpoint: "epoch_20.pth"
 
 TEST:
-  suffix: "_epoch_20.pth"
+  checkpoint: "epoch_20.pth"
   result: "./"
 
 DIR: "ckpt/ade20k-resnet18dilated-ppm_deepsup"
diff --git a/config/ade20k-resnet50dilated-ppm_deepsup.yaml b/config/ade20k-resnet50dilated-ppm_deepsup.yaml
@@ -12,8 +12,6 @@ DATASET:
 MODEL:
   arch_encoder: "resnet50dilated"
   arch_decoder: "ppm_deepsup"
-  weights_encoder: ""
-  weights_decoder: ""
   fc_dim: 2048
 
 TRAIN:
@@ -35,10 +33,10 @@ TRAIN:
 
 VAL:
   visualize: False
-  suffix: "_epoch_20.pth"
+  checkpoint: "epoch_20.pth"
 
 TEST:
-  suffix: "_epoch_20.pth"
+  checkpoint: "epoch_20.pth"
   result: "./"
 
 DIR: "ckpt/ade20k-resnet50dilated-ppm_deepsup"
diff --git a/config/defaults.py b/config/defaults.py
@@ -49,7 +49,7 @@
 # epochs to train for
 _C.TRAIN.num_epoch = 20
 # epoch to start training. useful if continue from a checkpoint
-_C.TRAIN.start_epoch = 1
+_C.TRAIN.start_epoch = 0
 # iterations of each epoch (irrelevant to batch size)
 _C.TRAIN.epoch_iters = 5000
 
@@ -83,7 +83,7 @@
 # output visualization during validation
 _C.VAL.visualize = False
 # the checkpoint to evaluate on
-_C.VAL.suffix = "_epoch_20.pth"
+_C.VAL.checkpoint = "epoch_20.pth"
 
 # -----------------------------------------------------------------------------
 # Testing
@@ -92,6 +92,6 @@
 # currently only supports 1
 _C.TEST.batch_size = 1
 # the checkpoint to test on
-_C.TEST.suffix = "_epoch_20.pth"
+_C.TEST.checkpoint = "epoch_20.pth"
 # folder to output visualization results
 _C.TEST.result = "./"
diff --git a/dataset.py b/dataset.py
@@ -1,7 +1,6 @@
 import os
 import json
 import torch
-import lib.utils.data as torchdata
 import cv2
 from torchvision import transforms
 import numpy as np
@@ -23,7 +22,7 @@ def imresize(im, size, interp='bilinear'):
     )
 
 
-class BaseDataset(torchdata.Dataset):
+class BaseDataset(torch.utils.data.Dataset):
     def __init__(self, odgt, opt, **kwargs):
         # parse options
         self.imgSizes = opt.imgSizes
@@ -110,6 +109,7 @@ def _get_sub_batch(self):
     def __getitem__(self, index):
         # NOTE: random shuffle for the first time. shuffle in __init__ is useless
         if not self.if_shuffled:
+            np.random.seed(index)
             np.random.shuffle(self.list_sample)
             self.if_shuffled = True
 
diff --git a/eval.py b/eval.py
@@ -15,7 +15,6 @@
 from utils import AverageMeter, colorEncode, accuracy, intersectionAndUnion, setup_logger
 from lib.nn import user_scattered_collate, async_copy_to
 from lib.utils import as_numpy
-import lib.utils.data as torchdata
 import cv2
 from tqdm import tqdm
 
@@ -133,7 +132,7 @@ def main(cfg, gpu):
         cfg.DATASET.root_dataset,
         cfg.DATASET.list_val,
         cfg.DATASET)
-    loader_val = torchdata.DataLoader(
+    loader_val = torch.utils.data.DataLoader(
         dataset_val,
         batch_size=cfg.VAL.batch_size,
         shuffle=False,
@@ -186,10 +185,9 @@ def main(cfg, gpu):
 
     # absolute paths of model weights
     cfg.MODEL.weights_encoder = os.path.join(
-        cfg.DIR, 'encoder' + cfg.VAL.suffix)
+        cfg.DIR, 'encoder_' + cfg.VAL.checkpoint)
     cfg.MODEL.weights_decoder = os.path.join(
-        cfg.DIR, 'decoder' + cfg.VAL.suffix)
-
+        cfg.DIR, 'decoder_' + cfg.VAL.checkpoint)
     assert os.path.exists(cfg.MODEL.weights_encoder) and \
         os.path.exists(cfg.MODEL.weights_decoder), "checkpoint does not exitst!"
 
diff --git a/eval_multipro.py b/eval_multipro.py
@@ -16,7 +16,6 @@
 from utils import AverageMeter, colorEncode, accuracy, intersectionAndUnion, parse_devices, setup_logger
 from lib.nn import user_scattered_collate, async_copy_to
 from lib.utils import as_numpy
-import lib.utils.data as torchdata
 import cv2
 from tqdm import tqdm
 
@@ -94,7 +93,7 @@ def worker(cfg, gpu_id, start_idx, end_idx, result_queue):
         cfg.DATASET.list_val,
         cfg.DATASET,
         start_idx=start_idx, end_idx=end_idx)
-    loader_val = torchdata.DataLoader(
+    loader_val = torch.utils.data.DataLoader(
         dataset_val,
         batch_size=cfg.VAL.batch_size,
         shuffle=False,
@@ -211,10 +210,9 @@ def main(cfg, gpus):
 
     # absolute paths of model weights
     cfg.MODEL.weights_encoder = os.path.join(
-        cfg.DIR, 'encoder' + cfg.VAL.suffix)
+        cfg.DIR, 'encoder_' + cfg.VAL.checkpoint)
     cfg.MODEL.weights_decoder = os.path.join(
-        cfg.DIR, 'decoder' + cfg.VAL.suffix)
-
+        cfg.DIR, 'decoder_' + cfg.VAL.checkpoint)
     assert os.path.exists(cfg.MODEL.weights_encoder) and \
         os.path.exists(cfg.MODEL.weights_decoder), "checkpoint does not exitst!"
 
diff --git a/test.py b/test.py
@@ -14,7 +14,6 @@
 from utils import colorEncode, find_recursive, setup_logger
 from lib.nn import user_scattered_collate, async_copy_to
 from lib.utils import as_numpy
-import lib.utils.data as torchdata
 import cv2
 from tqdm import tqdm
 from config import cfg
@@ -116,7 +115,7 @@ def main(cfg, gpu):
     dataset_test = TestDataset(
         cfg.list_test,
         cfg.DATASET)
-    loader_test = torchdata.DataLoader(
+    loader_test = torch.utils.data.DataLoader(
         dataset_test,
         batch_size=cfg.TEST.batch_size,
         shuffle=False,
@@ -179,9 +178,9 @@ def main(cfg, gpu):
 
     # absolute paths of model weights
     cfg.MODEL.weights_encoder = os.path.join(
-        cfg.DIR, 'encoder' + cfg.TEST.suffix)
+        cfg.DIR, 'encoder_' + cfg.TEST.checkpoint)
     cfg.MODEL.weights_decoder = os.path.join(
-        cfg.DIR, 'decoder' + cfg.TEST.suffix)
+        cfg.DIR, 'decoder_' + cfg.TEST.checkpoint)
 
     assert os.path.exists(cfg.MODEL.weights_encoder) and \
         os.path.exists(cfg.MODEL.weights_decoder), "checkpoint does not exitst!"
diff --git a/train.py b/train.py
@@ -14,7 +14,6 @@
 from models import ModelBuilder, SegmentationModule
 from utils import AverageMeter, parse_devices, setup_logger
 from lib.nn import UserScatteredDataParallel, user_scattered_collate, patch_replication_callback
-import lib.utils.data as torchdata
 
 
 # train one epoch
@@ -31,7 +30,6 @@ def train(segmentation_module, iterator, optimizers, history, epoch, cfg):
     for i in range(cfg.TRAIN.epoch_iters):
         batch_data = next(iterator)
         data_time.update(time.time() - tic)
-
         segmentation_module.zero_grad()
 
         # forward pass
@@ -72,7 +70,7 @@ def train(segmentation_module, iterator, optimizers, history, epoch, cfg):
         adjust_learning_rate(optimizers, cur_iter, cfg)
 
 
-def checkpoint(nets, history, cfg, epoch_num):
+def checkpoint(nets, history, cfg, epoch):
     print('Saving checkpoints...')
     (net_encoder, net_decoder, crit) = nets
 
@@ -81,13 +79,13 @@ def checkpoint(nets, history, cfg, epoch_num):
 
     torch.save(
         history,
-        '{}/history_epoch_{}.pth'.format(cfg.DIR, epoch_num))
+        '{}/history_epoch_{}.pth'.format(cfg.DIR, epoch))
     torch.save(
         dict_encoder,
-       '{}/encoder_epoch_{}.pth'.format(cfg.DIR, epoch_num))
+        '{}/encoder_epoch_{}.pth'.format(cfg.DIR, epoch))
     torch.save(
         dict_decoder,
-       '{}/decoder_epoch_{}.pth'.format(cfg.DIR, epoch_num))
+        '{}/decoder_epoch_{}.pth'.format(cfg.DIR, epoch))
 
 
 def group_weight(module):
@@ -169,7 +167,7 @@ def main(cfg, gpus):
         cfg.DATASET,
         batch_per_gpu=cfg.TRAIN.batch_size_per_gpu)
 
-    loader_train = torchdata.DataLoader(
+    loader_train = torch.utils.data.DataLoader(
         dataset_train,
         batch_size=len(gpus),  # we have modified data_parallel
         shuffle=False,  # we do not use this param
@@ -242,12 +240,22 @@ def main(cfg, gpus):
     logger.info("Loaded configuration file {}".format(args.cfg))
     logger.info("Running with config:\n{}".format(cfg))
 
+    # Output directory
     if not os.path.isdir(cfg.DIR):
         os.makedirs(cfg.DIR)
     logger.info("Outputing checkpoints to: {}".format(cfg.DIR))
     with open(os.path.join(cfg.DIR, 'config.yaml'), 'w') as f:
         f.write("{}".format(cfg))
 
+    # Start from checkpoint
+    if cfg.TRAIN.start_epoch > 0:
+        cfg.MODEL.weights_encoder = os.path.join(
+            cfg.DIR, 'encoder_epoch_{}.pth'.format(cfg.TRAIN.start_epoch))
+        cfg.MODEL.weights_decoder = os.path.join(
+            cfg.DIR, 'decoder_epoch_{}.pth'.format(cfg.TRAIN.start_epoch))
+        assert os.path.exists(cfg.MODEL.weights_encoder) and \
+            os.path.exists(cfg.MODEL.weights_decoder), "checkpoint does not exitst!"
+
     # Parse gpu ids
     gpus = parse_devices(args.gpus)
     gpus = [x.replace('gpu', '') for x in gpus]