New to RTMDet - Train with custom seg dataset #987
Replies: 1 comment 1 reply
-
I didn’t look at your config file but if I were you I would use mmdetection not mmyolo, and choose a model that already supports the task you want to do. I think they support instance segmentation with RTM-Det, and other models support semantic segmentation. Either way if you have to change the dataset (and/or modify the model) it will take a lot of time reviewing docs and stepping through code to figure it out. You could also try the Ultralytics repo, they seem to be very responsive in trying to help folks out and also support RTM-Det. I view mmdetection and mmyolo more like research projects where they expect you to figure it out on your own. You’ll learn a lot but it’s a lot of work. Whatever you choose, good luck! |
Beta Was this translation helpful? Give feedback.
-
Hello, I am trying to train a model with a custom dataset, but I dont understand how to make a proper config file. Is there a tutorial video for segmentation or something?
Tried with this:
import copy
import os.path as osp
from typing import List, Union
from mmengine.fileio import get_local_path
from mmseg.datasets.basesegdataset import BaseSegDataset, BaseCDDataset
from mmseg.registry import DATASETS
@DATASETS.register_module()
class ColelapSMDataset(BaseSegDataset):
"""Dataset for Colelap SM."""
GCC 9.3
C++ Version: 201703
Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications
Intel(R) MKL-DNN v3.1.1 (Git Hash 64f6bcbcbab628e96f33a62c3e975f8535a7bde4)
OpenMP 201511 (a.k.a. OpenMP 4.5)
LAPACK is enabled (usually provided by MKL)
NNPACK is enabled
CPU capability usage: AVX2
Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CXX_COMPILER=/opt/rh/devtoolset-9/root/usr/bin/c++, CXX_FLAGS= -D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOCUPTI -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_DISABLE_GPU_ASSERTS=ON, TORCH_VERSION=2.1.2, USE_CUDA=0, USE_CUDNN=OFF, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=OFF, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF,
TorchVision: 0.16.2+cpu
OpenCV: 4.9.0
MMEngine: 0.10.3
Runtime environment:
cudnn_benchmark: True
dist_cfg: {'backend': 'nccl'}
mp_cfg: {'mp_start_method': 'fork', 'opencv_num_threads': 0}
seed: 1312745982
Distributed launcher: none
Distributed training: False
GPU number: 1
04/07 19:37:48 - mmengine - INFO - Config:
data = dict(
train=dict(
data_prefix=dict(
img_path='/kaggle/input/colelap-sm/dataset_13/images/train',
seg_map_path='/kaggle/input/colelap-sm/dataset_13/train.coco.json'
),
data_root='/kaggle/input/colelap-sm/dataset_13',
pipeline=[
dict(type='LoadSingleRSImageFromFile'),
dict(type='ConvertTo8Bit'),
dict(reduce_zero_label=False, type='LoadAnnotations'),
dict(type='ConvertToGrayScaleMask'),
dict(
backend='cv2',
keep_ratio=False,
scale=(
512,
512,
),
type='Resize'),
dict(prob=0.5, type='RandomFlip'),
dict(type='PhotoMetricDistortion'),
dict(type='PackSegInputs'),
],
type='PascalVOCDataset'),
val=dict(
data_prefix=dict(
img_path='/kaggle/input/colelap-sm/dataset_13/images/validation',
seg_map_path='/kaggle/input/colelap-sm/dataset_13/val.coco.json'),
data_root='/kaggle/input/colelap-sm/dataset_13',
pipeline=[
dict(type='LoadSingleRSImageFromFile'),
dict(
backend='cv2',
keep_ratio=False,
scale=(
512,
512,
),
type='Resize'),
dict(reduce_zero_label=False, type='LoadAnnotations'),
dict(type='ConvertToGrayScaleMask'),
dict(type='PackSegInputs'),
],
type='PascalVOCDataset'))
data_preprocessor = dict(
scope='mmseg',
bgr_to_rgb=True,
mean=[
123.675,
116.28,
103.53,
],
pad_val=0,
seg_pad_val=255,
size=(
512,
512,
),
std=[
58.395,
57.12,
57.375,
],
type='SegDataPreProcessor')
data_root = '/kaggle/input/colelap-sm/dataset_13'
dataset_type = 'PascalVOCDataset'
default_hooks = dict(
checkpoint=dict(
scope='mmseg', by_epoch=False, interval=2000, type='CheckpointHook'),
logger=dict(
scope='mmseg',
interval=50,
log_metric_by_epoch=False,
type='LoggerHook'),
param_scheduler=dict(scope='mmseg', type='ParamSchedulerHook'),
sampler_seed=dict(scope='mmseg', type='DistSamplerSeedHook'),
timer=dict(scope='mmseg', type='IterTimerHook'),
visualization=dict(scope='mmseg', type='SegVisualizationHook'))
default_scope = 'mmseg'
env_cfg = dict(
cudnn_benchmark=True,
dist_cfg=dict(backend='nccl'),
mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0))
img_scale = (
512,
512,
)
launcher = 'none'
load_from = None
log_level = 'INFO'
log_processor = dict(by_epoch=False)
model = dict(
scope='mmseg',
auxiliary_head=dict(
align_corners=False,
channels=64,
concat_input=False,
dropout_ratio=0.1,
in_channels=128,
in_index=3,
loss_decode=dict(
loss_weight=0.4, type='CrossEntropyLoss', use_sigmoid=True),
norm_cfg=dict(requires_grad=True, type='SyncBN'),
num_classes=2,
num_convs=1,
out_channels=1,
type='FCNHead'),
backbone=dict(
act_cfg=dict(type='ReLU'),
base_channels=64,
conv_cfg=None,
dec_dilations=(
1,
1,
1,
1,
),
dec_num_convs=(
2,
2,
2,
2,
),
downsamples=(
True,
True,
True,
True,
),
enc_dilations=(
1,
1,
1,
1,
1,
),
enc_num_convs=(
2,
2,
2,
2,
2,
),
in_channels=3,
norm_cfg=dict(requires_grad=True, type='SyncBN'),
norm_eval=False,
num_stages=5,
strides=(
1,
1,
1,
1,
1,
),
type='UNet',
upsample_cfg=dict(type='InterpConv'),
with_cp=False),
data_preprocessor=dict(
bgr_to_rgb=True,
mean=[
123.675,
116.28,
103.53,
],
pad_val=0,
seg_pad_val=255,
size=(
512,
512,
),
std=[
58.395,
57.12,
57.375,
],
type='SegDataPreProcessor'),
decode_head=dict(
align_corners=False,
channels=64,
concat_input=False,
dropout_ratio=0.1,
in_channels=64,
in_index=4,
loss_decode=dict(
avg_non_ignore=True,
loss_weight=1.0,
type='CrossEntropyLoss',
use_sigmoid=True),
norm_cfg=dict(requires_grad=True, type='SyncBN'),
num_classes=2,
num_convs=1,
out_channels=1,
type='FCNHead'),
pretrained=None,
test_cfg=dict(mode='whole'),
train_cfg=dict(),
type='EncoderDecoder')
norm_cfg = dict(scope='mmseg', requires_grad=True, type='SyncBN')
optim_wrapper = dict(
scope='mmseg',
clip_grad=None,
optimizer=dict(lr=0.001, momentum=0.9, type='SGD', weight_decay=0.0005),
type='OptimWrapper')
optimizer = dict(
scope='mmseg', lr=0.001, momentum=0.9, type='SGD', weight_decay=0.0005)
param_scheduler = [
dict(
scope='mmseg',
begin=0,
by_epoch=False,
end=20000,
eta_min=0.0001,
power=0.9,
type='PolyLR'),
]
resume = False
test_cfg = dict(scope='mmseg', type='TestLoop')
test_dataloader = dict(
batch_size=1,
dataset=dict(
data_prefix=dict(
img_path='/kaggle/input/colelap-sm/dataset_13/images/validation',
seg_map_path='/kaggle/input/colelap-sm/dataset_13/val.coco.json'),
data_root='/kaggle/input/colelap-sm/dataset_13',
pipeline=[
dict(type='LoadSingleRSImageFromFile'),
dict(
backend='cv2',
keep_ratio=False,
scale=(
512,
512,
),
type='Resize'),
dict(reduce_zero_label=False, type='LoadAnnotations'),
dict(type='ConvertToGrayScaleMask'),
dict(type='PackSegInputs'),
],
type='PascalVOCDataset'),
num_workers=9,
persistent_workers=True,
sampler=dict(shuffle=False, type='DefaultSampler'))
test_evaluator = dict(
iou_metrics=[
'mIoU',
'mDice',
], type='IoUMetric')
test_pipeline = [
dict(type='LoadSingleRSImageFromFile'),
dict(backend='cv2', keep_ratio=False, scale=(
512,
512,
), type='Resize'),
dict(reduce_zero_label=False, type='LoadAnnotations'),
dict(type='ConvertToGrayScaleMask'),
dict(type='PackSegInputs'),
]
train_cfg = dict(max_iters=20000, val_interval=200)
train_dataloader = dict(
batch_size=4,
dataset=dict(
data_prefix=dict(
img_path='/kaggle/input/colelap-sm/dataset_13/images/train',
seg_map_path='/kaggle/input/colelap-sm/dataset_13/train.coco.json'
),
data_root='/kaggle/input/colelap-sm/dataset_13',
pipeline=[
dict(type='LoadSingleRSImageFromFile'),
dict(type='ConvertTo8Bit'),
dict(reduce_zero_label=False, type='LoadAnnotations'),
dict(type='ConvertToGrayScaleMask'),
dict(
backend='cv2',
keep_ratio=False,
scale=(
512,
512,
),
type='Resize'),
dict(prob=0.5, type='RandomFlip'),
dict(type='PhotoMetricDistortion'),
dict(type='PackSegInputs'),
],
type='PascalVOCDataset'),
num_workers=9,
persistent_workers=True,
sampler=dict(shuffle=True, type='InfiniteSampler'))
train_pipeline = [
dict(type='LoadSingleRSImageFromFile'),
dict(type='ConvertTo8Bit'),
dict(reduce_zero_label=False, type='LoadAnnotations'),
dict(type='ConvertToGrayScaleMask'),
dict(backend='cv2', keep_ratio=False, scale=(
512,
512,
), type='Resize'),
dict(prob=0.5, type='RandomFlip'),
dict(type='PhotoMetricDistortion'),
dict(type='PackSegInputs'),
]
tta_model = dict(scope='mmseg', type='SegTTAModel')
val_cfg = dict(scope='mmseg', type='ValLoop')
val_dataloader = dict(
batch_size=1,
dataset=dict(
data_prefix=dict(
img_path='/kaggle/input/colelap-sm/dataset_13/images/validation',
seg_map_path='/kaggle/input/colelap-sm/dataset_13/val.coco.json'),
data_root='/kaggle/input/colelap-sm/dataset_13',
pipeline=[
dict(type='LoadSingleRSImageFromFile'),
dict(
backend='cv2',
keep_ratio=False,
scale=(
512,
512,
),
type='Resize'),
dict(reduce_zero_label=False, type='LoadAnnotations'),
dict(type='ConvertToGrayScaleMask'),
dict(type='PackSegInputs'),
],
type='PascalVOCDataset'),
num_workers=9,
persistent_workers=True,
sampler=dict(shuffle=False, type='DefaultSampler'))
val_evaluator = dict(
iou_metrics=[
'mIoU',
'mDice',
], type='IoUMetric')
vis_backends = None
visualizer = dict(
scope='mmseg',
name='visualizer',
type='SegLocalVisualizer',
vis_backends=None)
work_dir = '/kaggle/working/mmsegmentation/work_dirs/config_cira2'
/kaggle/working/mmsegmentation/mmseg/models/decode_heads/decode_head.py:136: UserWarning: threshold is not defined for binary, and defaultsto 0.3
warnings.warn('threshold is not defined for binary, and defaults'
/kaggle/working/mmsegmentation/mmseg/models/losses/cross_entropy_loss.py:250: UserWarning: Default
avg_non_ignore
is False, if you would like to ignore the certain label and average loss over non-ignore labels, which is the same with PyTorch official cross_entropy, setavg_non_ignore=True
.warnings.warn(
04/07 19:37:50 - mmengine - INFO - Distributed training is not used, all SyncBatchNorm (SyncBN) layers in the model will be automatically reverted to BatchNormXd layers if they are used.
/kaggle/working/mmsegmentation/mmseg/engine/hooks/visualization_hook.py:60: UserWarning: The draw is False, it means that the hook for visualization will not take effect. The results will NOT be visualized or stored.
warnings.warn('The draw is False, it means that the '
04/07 19:37:50 - mmengine - INFO - Hooks will be executed in the following order:
before_run:
(VERY_HIGH ) RuntimeInfoHook
(BELOW_NORMAL) LoggerHook
before_train:
(VERY_HIGH ) RuntimeInfoHook
(NORMAL ) IterTimerHook
(VERY_LOW ) CheckpointHook
before_train_epoch:
(VERY_HIGH ) RuntimeInfoHook
(NORMAL ) IterTimerHook
(NORMAL ) DistSamplerSeedHook
before_train_iter:
(VERY_HIGH ) RuntimeInfoHook
(NORMAL ) IterTimerHook
after_train_iter:
(VERY_HIGH ) RuntimeInfoHook
(NORMAL ) IterTimerHook
(BELOW_NORMAL) LoggerHook
(LOW ) ParamSchedulerHook
(VERY_LOW ) CheckpointHook
after_train_epoch:
(NORMAL ) IterTimerHook
(LOW ) ParamSchedulerHook
(VERY_LOW ) CheckpointHook
before_val:
(VERY_HIGH ) RuntimeInfoHook
before_val_epoch:
(NORMAL ) IterTimerHook
before_val_iter:
(NORMAL ) IterTimerHook
after_val_iter:
(NORMAL ) IterTimerHook
(NORMAL ) SegVisualizationHook
(BELOW_NORMAL) LoggerHook
after_val_epoch:
(VERY_HIGH ) RuntimeInfoHook
(NORMAL ) IterTimerHook
(BELOW_NORMAL) LoggerHook
(LOW ) ParamSchedulerHook
(VERY_LOW ) CheckpointHook
after_val:
(VERY_HIGH ) RuntimeInfoHook
after_train:
(VERY_HIGH ) RuntimeInfoHook
(VERY_LOW ) CheckpointHook
before_test:
(VERY_HIGH ) RuntimeInfoHook
before_test_epoch:
(NORMAL ) IterTimerHook
before_test_iter:
(NORMAL ) IterTimerHook
after_test_iter:
(NORMAL ) IterTimerHook
(NORMAL ) SegVisualizationHook
(BELOW_NORMAL) LoggerHook
after_test_epoch:
(VERY_HIGH ) RuntimeInfoHook
(NORMAL ) IterTimerHook
(BELOW_NORMAL) LoggerHook
after_test:
(VERY_HIGH ) RuntimeInfoHook
after_run:
(BELOW_NORMAL) LoggerHook
Traceback (most recent call last):
File "/kaggle/working/mmsegmentation/tools/train.py", line 104, in
main()
File "/kaggle/working/mmsegmentation/tools/train.py", line 100, in main
runner.train()
File "/opt/conda/lib/python3.10/site-packages/mmengine/runner/runner.py", line 1728, in train
self._train_loop = self.build_train_loop(
File "/opt/conda/lib/python3.10/site-packages/mmengine/runner/runner.py", line 1530, in build_train_loop
loop = IterBasedTrainLoop(
File "/opt/conda/lib/python3.10/site-packages/mmengine/runner/loops.py", line 219, in init
super().init(runner, dataloader)
File "/opt/conda/lib/python3.10/site-packages/mmengine/runner/base_loop.py", line 26, in init
self.dataloader = runner.build_dataloader(
File "/opt/conda/lib/python3.10/site-packages/mmengine/runner/runner.py", line 1370, in build_dataloader
dataset = DATASETS.build(dataset_cfg)
File "/opt/conda/lib/python3.10/site-packages/mmengine/registry/registry.py", line 570, in build
return self.build_func(cfg, *args, **kwargs, registry=self)
File "/opt/conda/lib/python3.10/site-packages/mmengine/registry/build_functions.py", line 121, in build_from_cfg
obj = obj_cls(**args) # type: ignore
TypeError: PascalVOCDataset.init() missing 1 required positional argument: 'ann_file'
btw I am using Kaggle and tried some other ways too like trying to inherit from the base folder and so on. I used RTMDet for objectd detection but no luck with segementation.
Any help or tutorial would be appreciated
Beta Was this translation helpful? Give feedback.
All reactions