-
Notifications
You must be signed in to change notification settings - Fork 1
121 lines (107 loc) · 4.79 KB
/
large.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
name: Large data CI
on:
workflow_dispatch:
jobs:
########################################################################################
checkout:
name: Checkout
runs-on: [self-hosted, vclust, x64_linux, large]
steps:
- name: clean
run: rm -rf ${{ github.workspace }}/*
- uses: actions/checkout@v4
with:
submodules: recursive
- name: Get tags
run: |
cd ./3rd_party/clusty/libs/igraph
git fetch --prune --unshallow
echo exit code $?
git tag --list
continue-on-error: true
########################################################################################
download-release:
name: Download release
needs: checkout
strategy:
matrix:
compiler: [14]
runs-on: [self-hosted, vclust, x64_linux, large]
steps:
# - name: clean
# run: rm -rf ${{ github.workspace }}/*
# - uses: robinraju/[email protected]
# with:
# latest: true
# tarBall: true
# extract: true
# token: ${{ secrets.MY_TOKEN }}
# - name: download
# run: ./.github/workflows/github-release-downloader.sh refresh-bio vclust-dev "x64_linux.tar.gz"
- name: make
run: gmake -j32 CXX=g++-${{matrix.compiler}} CC=gcc-${{matrix.compiler}} PLATFORM=avx2 LEIDEN=true STATIC_LINK=true
- name: print info
run: python3 vclust.py info
########################################################################################
ani:
name: ANI calculation
needs: download-release
strategy:
fail-fast: false
matrix:
dataset: [ICTV, IMGVR]
include:
- dataset: ICTV
variant_name: full
prefilter_args: '-k 25 --min-ident 0.7 --min-kmers 20'
align_args: '--out-tani 0.70'
- dataset: IMGVR_HQ
variant_name: full
prefilter_args: '-k 25 --min-ident 0.95 --min-kmers 20 --batch-size 1000000'
align_args: '--out-ani 0.95 --out-qcov 0.85'
- dataset: IMGVR
variant_name: fraction_02
prefilter_args: '-k 25 --min-ident 0.95 --min-kmers 4 --kmers-fraction 0.2 --batch-size 2000000'
align_args: '--out-ani 0.95 --out-qcov 0.85'
env:
INPUT_DIR: ../../../../vclust/input
TEMP_DIR: ../../../../vclust/temp
runs-on: [self-hosted, vclust, x64_linux, large]
steps:
- name: prefilter
run: /usr/bin/time -v ./vclust.py prefilter -t 32 -i ${INPUT_DIR}/${{ matrix.dataset }}.fna.gz -o ${TEMP_DIR}/${{ matrix.dataset }}.${{ matrix.variant_name }}.filter ${{ matrix.prefilter_args }}
- name: prefilter md5
run: md5sum ${TEMP_DIR}/${{ matrix.dataset }}.${{ matrix.variant_name }}.filter
- name: align
run: /usr/bin/time -v ./vclust.py align -t 32 -i ${INPUT_DIR}/${{ matrix.dataset }}.fna.gz -o ${TEMP_DIR}/${{ matrix.dataset }}.${{ matrix.variant_name }}.ani.tsv --filter ${TEMP_DIR}/${{ matrix.dataset }}.${{ matrix.variant_name }}.filter ${{ matrix.align_args }}
- name: align md5
run: md5sum ${TEMP_DIR}/${{ matrix.dataset }}.${{ matrix.variant_name }}.ani.tsv ${TEMP_DIR}/${{ matrix.dataset }}.${{ matrix.variant_name }}.ani.ids.tsv
########################################################################################
clustering:
name: clustering
needs: ani
strategy:
fail-fast: false
matrix:
dataset: [ICTV, IMGVR, IMGVR_HQ]
algo_name: [single, complete, set-cover, uclust, cd-hit, leiden_07, leiden_10]
include:
- {dataset: ICTV, variant_name: full, args: '--metric tani --tani 0.95'}
- {dataset: IMGVR, variant_name: fraction_02, args: '--metric ani --ani 0.95 --qcov 0.85'}
- {dataset: IMGVR_HQ, variant_name: full, args: '--metric ani --ani 0.95 --qcov 0.85'}
- {algo_name: single, algo_cmd: single}
- {algo_name: complete, algo_cmd: complete}
- {algo_name: set-cover, algo_cmd: set-cover}
- {algo_name: uclust, algo_cmd: uclust}
- {algo_name: cd-hit, algo_cmd: cd-hit}
- {algo_name: leiden_07, algo_cmd: 'leiden --leiden-resolution 0.7'}
- {algo_name: leiden_10, algo_cmd: 'leiden --leiden-resolution 1.0'}
env:
INPUT_DIR: ../../../../vclust/input
TEMP_DIR: ../../../../vclust/temp
runs-on: [self-hosted, vclust, x64_linux, large]
steps:
- name: cluster
run: /usr/bin/time -v ./vclust.py cluster -i ${TEMP_DIR}/${{ matrix.dataset }}.${{ matrix.variant_name }}.ani.tsv --ids ${TEMP_DIR}/${{ matrix.dataset }}.${{ matrix.variant_name }}.ani.ids.tsv -o ${TEMP_DIR}/${{ matrix.dataset }}.${{ matrix.variant_name }}.${{ matrix.algo_name }}.clusty --algorithm ${{ matrix.algo_cmd }} ${{ matrix.args }}
- name: md5
run: md5sum ${TEMP_DIR}/${{ matrix.dataset }}.${{ matrix.variant_name }}.${{ matrix.algo_name }}.clusty