-
Notifications
You must be signed in to change notification settings - Fork 91
/
inference_tsm_enn.py
56 lines (56 loc) · 1.65 KB
/
inference_tsm_enn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# model settings
model = dict(
type='Recognizer2D',
backbone=dict(
type='ResNetTSM',
pretrained='torchvision://resnet50',
depth=50,
norm_eval=False,
shift_div=8),
cls_head=dict(
type='TSMHead',
loss_cls=dict(type='EvidenceLoss',
num_classes=101,
evidence='exp',
loss_type='log',
annealing_method='exp'),
num_classes=101,
in_channels=2048,
spatial_type='avg',
consensus=dict(type='AvgConsensus', dim=1),
dropout_ratio=0.5,
init_std=0.001,
is_shift=True))
# model training and testing settings
evidence='exp' # only used for EDL
test_cfg = dict(average_clips='score')
# dataset settings
dataset_type = 'VideoDataset'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_bgr=False)
test_pipeline = [
dict(type='OpenCVInit', num_threads=1),
dict(
type='DenseSampleFrames',
clip_len=1,
frame_interval=1,
num_clips=8,
test_mode=True),
dict(type='OpenCVDecode'),
dict(type='Resize', scale=(-1, 256)),
dict(type='CenterCrop', crop_size=224),
dict(type='Flip', flip_ratio=0),
dict(type='Normalize', **img_norm_cfg),
dict(type='FormatShape', input_format='NCHW'),
dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]),
dict(type='ToTensor', keys=['imgs'])
]
data = dict(
videos_per_gpu=8,
workers_per_gpu=4,
test=dict(
type=dataset_type,
ann_file=None,
data_prefix=None,
start_index=0,
pipeline=test_pipeline))