open-mmlab · ly015 · Oct 12, 2023 · Jul 20, 2023 · Jul 20, 2023 · Aug 8, 2023
diff --git a/.circleci/test.yml b/.circleci/test.yml
@@ -66,12 +66,17 @@ jobs:
             mim install 'mmcv >= 2.0.0'
             pip install git+https://[email protected]/open-mmlab/[email protected]
             pip install git+https://github.com/open-mmlab/[email protected]
+            pip install git+https://github.com/open-mmlab/mmpretrain.git@dev
             pip install git+https://github.com/open-mmlab/[email protected]
             pip install -r requirements.txt
       - run:
           name: Install timm
           command: |
             pip install timm
+      - run:
+          name: Install transformers
+          command: |
+            pip install transformers
       - when:
           condition:
             equal: [ "0.10.0", << parameters.torchvision >> ]
@@ -117,6 +122,10 @@ jobs:
           command: |
             docker exec mmaction pip install timm
             docker exec mmaction python -m pip install pytorchvideo
+      - run:
+          name: Install transformers
+          command: |
+            docker exec mmaction pip install transformers
       - run:
           name: Install mmaction dependencies
           command: |
@@ -126,6 +135,7 @@ jobs:
             docker exec mmaction pip install git+https://[email protected]/open-mmlab/[email protected]
             docker exec mmaction pip install git+https://[email protected]/open-mmlab/[email protected]
             docker exec mmaction pip install git+https://github.com/open-mmlab/[email protected]
+            docker exec mmaction pip install git+https://github.com/open-mmlab/mmpretrain.git@dev
             docker exec mmaction pip install -r requirements.txt
       - run:
           name: Build and install

diff --git a/.github/workflows/merge_stage_test.yml b/.github/workflows/merge_stage_test.yml
@@ -60,13 +60,17 @@ jobs:
         run: pip install git+https://github.com/open-mmlab/[email protected]
       - name: Install MMCls
         run: pip install git+https://github.com/open-mmlab/[email protected]
+      - name: Install MMPretrain
+        run: pip install git+https://github.com/open-mmlab/mmpretrain.git@dev
       - name: Install MMPose
         run: pip install git+https://github.com/open-mmlab/[email protected]
       - name: Install PytorchVideo
         run: pip install pytorchvideo
         if: ${{matrix.torchvision == '0.10.0'}}
       - name: Install timm
         run: pip install timm
+      - name: Install transformers
+        run: pip install transformers
       - name: Build and install
         run: rm -rf .eggs && pip install -e .
       - name: Run unittests and generate coverage report
@@ -108,6 +112,8 @@ jobs:
         run: pip install lmdb
       - name: Install timm
         run: pip install timm
+      - name: Install transformers
+        run: pip install transformers
       - name: Install TurboJpeg lib
         run: sudo apt-get install -y libturbojpeg
       - name: Install PyTorch
@@ -122,6 +128,8 @@ jobs:
         run: pip install git+https://github.com/open-mmlab/[email protected]
       - name: Install MMCls
         run: pip install git+https://github.com/open-mmlab/[email protected]
+      - name: Install MMPretrain
+        run: pip install git+https://github.com/open-mmlab/mmpretrain.git@dev
       - name: Install MMPose
         run: pip install git+https://github.com/open-mmlab/[email protected]
       - name: Install unittest dependencies
@@ -179,13 +187,16 @@ jobs:
         run: pip install librosa soundfile
       - name: Install lmdb
         run: pip install lmdb
+      - name: Install transformers
+        run: pip install transformers
       - name: Install mmaction dependencies
         run: |
           pip install git+https://github.com/open-mmlab/mmengine.git@main
           pip install -U openmim
           mim install 'mmcv >= 2.0.0'
           pip install git+https://github.com/open-mmlab/[email protected]
           pip install git+https://github.com/open-mmlab/[email protected]
+          pip install git+https://github.com/open-mmlab/mmpretrain.git@dev
           pip install git+https://github.com/open-mmlab/[email protected]
           pip install -r requirements.txt
       - name: Install PytorchVideo
@@ -228,12 +239,15 @@ jobs:
           mim install 'mmcv >= 2.0.0'
           pip install git+https://github.com/open-mmlab/[email protected]
           pip install git+https://github.com/open-mmlab/[email protected]
+          pip install git+https://github.com/open-mmlab/mmpretrain.git@dev
           pip install git+https://github.com/open-mmlab/[email protected]
           pip install -r requirements.txt
       - name: Install PytorchVideo
         run: python -m pip install pytorchvideo
       - name: Install timm
         run: python -m pip install timm
+      - name: Install transformers
+        run: python -m pip install transformers
       - name: Build and install
         run: |
           pip install -e . -v

diff --git a/.github/workflows/pr_stage_test.yml b/.github/workflows/pr_stage_test.yml
@@ -51,6 +51,8 @@ jobs:
         run: pip install git+https://github.com/open-mmlab/[email protected]
       - name: Install MMCls
         run: pip install git+https://github.com/open-mmlab/[email protected]
+      - name: Install MMPretrain
+        run: pip install git+https://github.com/open-mmlab/mmpretrain.git@dev
       - name: Install MMPose
         run: pip install git+https://github.com/open-mmlab/[email protected]
       - name: Install unittest dependencies
@@ -119,6 +121,7 @@ jobs:
           mim install 'mmcv >= 2.0.0'
           pip install git+https://github.com/open-mmlab/[email protected]
           pip install git+https://github.com/open-mmlab/[email protected]
+          pip install git+https://github.com/open-mmlab/mmpretrain.git@dev
           pip install git+https://github.com/open-mmlab/[email protected]
           pip install -r requirements.txt
       - name: Install PytorchVideo
@@ -168,6 +171,7 @@ jobs:
           mim install 'mmcv >= 2.0.0'
           pip install git+https://github.com/open-mmlab/[email protected]
           pip install git+https://github.com/open-mmlab/[email protected]
+          pip install git+https://github.com/open-mmlab/mmpretrain.git@dev
           pip install git+https://github.com/open-mmlab/[email protected]
           pip install -r requirements.txt
       - name: Install PytorchVideo

diff --git a/README.md b/README.md
@@ -75,14 +75,13 @@ English | [简体中文](/README_zh-CN.md)
 
 **The default branch has been switched to `main`(previous `1.x`) from `master`(current `0.x`), and we encourage users to migrate to the latest version with more supported models, stronger pre-training checkpoints and simpler coding. Please refer to [Migration Guide](https://mmaction2.readthedocs.io/en/latest/migration.html) for more details.**
 
-**Release (2023.07.04)**: v1.1.0 with the following new features:
-
-- Support CLIP-based multi-modality models: ActionCLIP(Arxiv'2021) and CLIP4clip(ArXiv'2022)
-- Support rich projects: gesture recognition, spatio-temporal action detection tutorial, and knowledge distillation
-- Support HACS-segments dataset(ICCV'2019), MultiSports dataset(ICCV'2021), Kinetics-710 dataset(Arxiv'2022)
-- Support VideoMAE V2(CVPR'2023), and VideoMAE(NeurIPS'2022) on action detection
-- Support TCANet(CVPR'2021)
-- Support [Pure Python style Configuration File](https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#a-pure-python-style-configuration-file-beta) and downloading datasets by MIM with one command
+**Release (2023.10.12)**: v1.2.0 with the following new features:
+
+- Support VindLU multi-modality algorithm and the Training of ActionClip
+- Support lightweight model MobileOne TSN/TSM
+- Support video retrieval dataset MSVD
+- Support SlowOnly K700 feature to train localization models
+- Support Video and Audio Demos
 
 ## 📖 Introduction [🔝](#-table-of-contents)
 

diff --git a/README_zh-CN.md b/README_zh-CN.md
@@ -26,7 +26,7 @@
 [![Percentage of issues still open](https://isitmaintained.com/badge/open/open-mmlab/mmaction2.svg)](https://github.com/open-mmlab/mmaction2/issues)
 
 [📘中文文档](https://mmaction2.readthedocs.io/zh_CN/latest/index.html) |
-[🛠️安装指南](https://mmaction2.readthedocs.io/zh_CN/get_started/installation.html) |
+[🛠️安装指南](https://mmaction2.readthedocs.io/zh_CN/latest/get_started/installation.html) |
 [👀模型库](https://mmaction2.readthedocs.io/zh_CN/latest/modelzoo_statistics.html) |
 [🆕更新日志](https://mmaction2.readthedocs.io/en/latest/notes/changelog.html) |
 [🚀进行中项目](https://github.com/open-mmlab/mmaction2/projects) |
@@ -380,10 +380,10 @@ MMAction2 是一款由来自不同高校和企业的研发人员共同参与贡
 
 ## ❤️ 欢迎加入 OpenMMLab 社区 [🔝](#-table-of-contents)
 
-扫描下方的二维码可关注 OpenMMLab 团队的 [知乎官方账号](https://www.zhihu.com/people/openmmlab)，加入 OpenMMLab 团队的 [官方交流 QQ 群](https://jq.qq.com/?_wv=1027&k=aCvMxdr3) 或联络 OpenMMLab 官方微信小助手
+扫描下方的二维码可关注 OpenMMLab 团队的 [知乎官方账号](https://www.zhihu.com/people/openmmlab)，扫描下方微信二维码添加喵喵好友，进入 MMAction2 微信交流社群。【加好友申请格式：研究方向+地区+学校/公司+姓名】
 
 <div align="center">
-<img src="./resources/zhihu_qrcode.jpg" height="400"/> <img src="./resources/xiaozhushou_weixin_qrcode.jpeg" height="400"/>
+<img src="./resources/zhihu_qrcode.jpg" height="400"/> <img src="./resources/miaomiao_qrcode.jpg" height="400"/>
 </div>
 
 我们会在 OpenMMLab 社区为大家

diff --git a/configs/_base_/models/tsm_mobileone_s4.py b/configs/_base_/models/tsm_mobileone_s4.py
@@ -0,0 +1,31 @@
+# model settings
+preprocess_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375])
+
+checkpoint = ('https://download.openmmlab.com/mmclassification/'
+              'v0/mobileone/mobileone-s4_8xb32_in1k_20221110-28d888cb.pth')
+model = dict(
+    type='Recognizer2D',
+    backbone=dict(
+        type='MobileOneTSM',
+        arch='s4',
+        shift_div=8,
+        num_segments=8,
+        is_shift=True,
+        init_cfg=dict(
+            type='Pretrained', checkpoint=checkpoint, prefix='backbone')),
+    cls_head=dict(
+        type='TSMHead',
+        num_segments=8,
+        num_classes=400,
+        in_channels=2048,
+        spatial_type='avg',
+        consensus=dict(type='AvgConsensus', dim=1),
+        dropout_ratio=0.5,
+        init_std=0.001,
+        is_shift=True,
+        average_clips='prob'),
+    # model training and testing settings
+    data_preprocessor=dict(type='ActionDataPreprocessor', **preprocess_cfg),
+    train_cfg=None,
+    test_cfg=None)
diff --git a/configs/_base_/models/tsn_mobileone_s0.py b/configs/_base_/models/tsn_mobileone_s0.py
@@ -0,0 +1,26 @@
+checkpoint = ('https://download.openmmlab.com/mmclassification/'
+              'v0/mobileone/mobileone-s0_8xb32_in1k_20221110-0bc94952.pth')
+model = dict(
+    type='Recognizer2D',
+    backbone=dict(
+        type='mmpretrain.MobileOne',
+        arch='s0',
+        init_cfg=dict(
+            type='Pretrained', checkpoint=checkpoint, prefix='backbone'),
+        norm_eval=False),
+    cls_head=dict(
+        type='TSNHead',
+        num_classes=400,
+        in_channels=1024,
+        spatial_type='avg',
+        consensus=dict(type='AvgConsensus', dim=1),
+        dropout_ratio=0.4,
+        init_std=0.01,
+        average_clips='prob'),
+    data_preprocessor=dict(
+        type='ActionDataPreprocessor',
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        format_shape='NCHW'),
+    train_cfg=None,
+    test_cfg=None)
diff --git a/configs/_base_/models/tsn_r18_audio.py b/configs/_base_/models/tsn_r18_audio.py
diff --git a/...etection/acrn/slowfast-acrn_kinetics400-pretrained-r50_8xb8-8x8x1-cosine-10e_ava21-rgb.py b/...etection/acrn/slowfast-acrn_kinetics400-pretrained-r50_8xb8-8x8x1-cosine-10e_ava21-rgb.py
@@ -46,6 +46,7 @@
         shared_head=dict(type='ACRNHead', in_channels=4608, out_channels=2304),
         bbox_head=dict(
             type='BBoxHeadAVA',
+            background_class=True,
             in_channels=2304,
             num_classes=81,
             multilabel=True,
@@ -88,9 +89,6 @@
 proposal_file_val = f'{anno_root}/ava_dense_proposals_val.FAIR.recall_93.9.pkl'
 
 file_client_args = dict(io_backend='disk')
-file_client_args = dict(
-    io_backend='petrel',
-    path_mapping=dict({'data/ava': 's254:s3://openmmlab/datasets/action/ava'}))
 train_pipeline = [
     dict(type='SampleAVAFrames', clip_len=32, frame_interval=2),
     dict(type='RawFrameDecode', **file_client_args),

diff --git a/...etection/acrn/slowfast-acrn_kinetics400-pretrained-r50_8xb8-8x8x1-cosine-10e_ava22-rgb.py b/...etection/acrn/slowfast-acrn_kinetics400-pretrained-r50_8xb8-8x8x1-cosine-10e_ava22-rgb.py
@@ -46,6 +46,7 @@
         shared_head=dict(type='ACRNHead', in_channels=4608, out_channels=2304),
         bbox_head=dict(
             type='BBoxHeadAVA',
+            background_class=True,
             in_channels=2304,
             num_classes=81,
             multilabel=True,

diff --git a/configs/detection/lfb/slowonly-lfb-infer_r50_ava21-rgb.py b/configs/detection/lfb/slowonly-lfb-infer_r50_ava21-rgb.py
@@ -34,6 +34,7 @@
             with_temporal_pool=True),
         bbox_head=dict(
             type='BBoxHeadAVA',
+            background_class=True,
             in_channels=2048,
             num_classes=81,
             multilabel=True,

diff --git a/...gs/detection/lfb/slowonly-lfb-nl_kinetics400-pretrained-r50_8xb12-4x16x1-20e_ava21-rgb.py b/...gs/detection/lfb/slowonly-lfb-nl_kinetics400-pretrained-r50_8xb12-4x16x1-20e_ava21-rgb.py
@@ -37,6 +37,7 @@
             with_temporal_pool=True),
         bbox_head=dict(
             type='BBoxHeadAVA',
+            background_class=True,
             in_channels=2560,
             num_classes=81,
             multilabel=True,

diff --git a/configs/detection/lfb/slowonly-lfb_ava-pretrained-r50_infer-4x16x1_ava21-rgb.py b/configs/detection/lfb/slowonly-lfb_ava-pretrained-r50_infer-4x16x1_ava21-rgb.py
@@ -34,6 +34,7 @@
             with_temporal_pool=True),
         bbox_head=dict(
             type='BBoxHeadAVA',
+            background_class=True,
             in_channels=2048,
             num_classes=81,
             multilabel=True,

diff --git a/configs/detection/slowfast/slowfast_kinetics400-pretrained-r50_8xb16-4x16x1-20e_ava21-rgb.py b/configs/detection/slowfast/slowfast_kinetics400-pretrained-r50_8xb16-4x16x1-20e_ava21-rgb.py
@@ -44,6 +44,7 @@
             with_temporal_pool=True),
         bbox_head=dict(
             type='BBoxHeadAVA',
+            background_class=True,
             in_channels=2304,
             num_classes=81,
             multilabel=True,

diff --git a/...detection/slowfast/slowfast_kinetics400-pretrained-r50_8xb6-8x8x1-cosine-10e_ava22-rgb.py b/...detection/slowfast/slowfast_kinetics400-pretrained-r50_8xb6-8x8x1-cosine-10e_ava22-rgb.py
@@ -45,6 +45,7 @@
             with_temporal_pool=True),
         bbox_head=dict(
             type='BBoxHeadAVA',
+            background_class=True,
             in_channels=2304,
             num_classes=81,
             multilabel=True,

diff --git a/configs/detection/slowfast/slowfast_kinetics400-pretrained-r50_8xb8-8x8x1-20e_ava21-rgb.py b/configs/detection/slowfast/slowfast_kinetics400-pretrained-r50_8xb8-8x8x1-20e_ava21-rgb.py
@@ -45,6 +45,7 @@
             with_temporal_pool=True),
         bbox_head=dict(
             type='BBoxHeadAVA',
+            background_class=True,
             in_channels=2304,
             num_classes=81,
             multilabel=True,

diff --git a/configs/detection/slowonly/slowonly_k400-pre-r50_8xb8-4x16x1-10e_ava-kinetics-rgb.py b/configs/detection/slowonly/slowonly_k400-pre-r50_8xb8-4x16x1-10e_ava-kinetics-rgb.py
@@ -29,6 +29,7 @@
             with_temporal_pool=True),
         bbox_head=dict(
             type='BBoxHeadAVA',
+            background_class=True,
             in_channels=2048,
             num_classes=81,
             multilabel=True,

diff --git a/configs/detection/slowonly/slowonly_k400-pre-r50_8xb8-8x8x1-10e_ava-kinetics-rgb.py b/configs/detection/slowonly/slowonly_k400-pre-r50_8xb8-8x8x1-10e_ava-kinetics-rgb.py
@@ -29,6 +29,7 @@
             with_temporal_pool=True),
         bbox_head=dict(
             type='BBoxHeadAVA',
+            background_class=True,
             in_channels=2048,
             num_classes=81,
             multilabel=True,

diff --git a/configs/detection/slowonly/slowonly_kinetics400-pretrained-r101_8xb16-8x8x1-20e_ava21-rgb.py b/configs/detection/slowonly/slowonly_kinetics400-pretrained-r101_8xb16-8x8x1-20e_ava21-rgb.py
@@ -28,6 +28,7 @@
             with_temporal_pool=True),
         bbox_head=dict(
             type='BBoxHeadAVA',
+            background_class=True,
             in_channels=2048,
             num_classes=81,
             multilabel=True,

diff --git a/...s/detection/slowonly/slowonly_kinetics400-pretrained-r50-nl_8xb16-4x16x1-20e_ava21-rgb.py b/...s/detection/slowonly/slowonly_kinetics400-pretrained-r50-nl_8xb16-4x16x1-20e_ava21-rgb.py
@@ -36,6 +36,7 @@
             with_temporal_pool=True),
         bbox_head=dict(
             type='BBoxHeadAVA',
+            background_class=True,
             in_channels=2048,
             num_classes=81,
             multilabel=True,

diff --git a/...gs/detection/slowonly/slowonly_kinetics400-pretrained-r50-nl_8xb16-8x8x1-20e_ava21-rgb.py b/...gs/detection/slowonly/slowonly_kinetics400-pretrained-r50-nl_8xb16-8x8x1-20e_ava21-rgb.py
@@ -36,6 +36,7 @@
             with_temporal_pool=True),
         bbox_head=dict(
             type='BBoxHeadAVA',
+            background_class=True,
             in_channels=2048,
             num_classes=81,
             multilabel=True,

diff --git a/configs/detection/slowonly/slowonly_kinetics400-pretrained-r50_8xb16-4x16x1-20e_ava21-rgb.py b/configs/detection/slowonly/slowonly_kinetics400-pretrained-r50_8xb16-4x16x1-20e_ava21-rgb.py
@@ -29,6 +29,7 @@
             with_temporal_pool=True),
         bbox_head=dict(
             type='BBoxHeadAVA',
+            background_class=True,
             in_channels=2048,
             num_classes=81,
             multilabel=True,

diff --git a/...detection/slowonly/slowonly_kinetics400-pretrained-r50_8xb16-4x16x1-8e_multisports-rgb.py b/...detection/slowonly/slowonly_kinetics400-pretrained-r50_8xb16-4x16x1-8e_multisports-rgb.py
@@ -30,6 +30,7 @@
             with_temporal_pool=True),
         bbox_head=dict(
             type='BBoxHeadAVA',
+            background_class=False,
             in_channels=2048,
             num_classes=num_classes,
             multilabel=False,

diff --git a/configs/detection/slowonly/slowonly_kinetics700-pretrained-r50_8xb16-4x16x1-20e_ava21-rgb.py b/configs/detection/slowonly/slowonly_kinetics700-pretrained-r50_8xb16-4x16x1-20e_ava21-rgb.py
@@ -29,6 +29,7 @@
             with_temporal_pool=True),
         bbox_head=dict(
             type='BBoxHeadAVA',
+            background_class=True,
             in_channels=2048,
             num_classes=81,
             multilabel=True,

diff --git a/...tection/videomae/vit-base-p16_videomae-k400-pre_8xb8-16x4x1-20e-adamw_ava-kinetics-rgb.py b/...tection/videomae/vit-base-p16_videomae-k400-pre_8xb8-16x4x1-20e-adamw_ava-kinetics-rgb.py
@@ -31,6 +31,7 @@
             with_temporal_pool=True),
         bbox_head=dict(
             type='BBoxHeadAVA',
+            background_class=True,
             in_channels=768,
             num_classes=81,
             multilabel=True,

diff --git a/...ection/videomae/vit-large-p16_videomae-k400-pre_8xb8-16x4x1-20e-adamw_ava-kinetics-rgb.py b/...ection/videomae/vit-large-p16_videomae-k400-pre_8xb8-16x4x1-20e-adamw_ava-kinetics-rgb.py
@@ -32,6 +32,7 @@
             with_temporal_pool=True),
         bbox_head=dict(
             type='BBoxHeadAVA',
+            background_class=True,
             in_channels=1024,
             num_classes=81,
             multilabel=True,