-
Notifications
You must be signed in to change notification settings - Fork 0
/
start.sh
110 lines (70 loc) · 2.67 KB
/
start.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
#!/bin/bash
#change all these to the volumes
INPUT_DIR=/INPUT
OUTPUT_DIR=/OUTPUT
GERMLINE_DIR=/GERMLINE
GERMLINE_BUILD_DIR=/GERMLINE_BUILD_DIR
mkdir -p $GERMLINE_BUILD_DIR
correct_usage()
{
/mixcrep/tools/correct_usage.txt
}
if [ -z "$1" ]
then
echo "SPECIES not provided. Will exit."
correct_usage
exit 2
else
SPECIES=$1
fi
if [ -z "$2" ]
then
echo "RECEPTOR not provided. Will exit."
correct_usage
exit 2
else
RECEPTOR=$2
fi
collect_fastas()
{
the_path=$1
the_output=$2
the_pattern=$3
FQ_LIST='find $the_path -t f -name "$the_pattern"'
size=${#FQ_LIST}
}
collect_fastas $INPUT_DIR $INPUT_DIR "*.fasta"
#change so that you only copy the VDJ folder?
cp -R $GERMLINE_DIR/vdj $GERMLINE_BUILD_DIR
#create file for germline
mkdir $GERMLINE_BUILD_DIR/$RECEPTOR
#move to folder
cd /$GERMLINE_BUILD_DIR/vdj
grep -lir -i "$RECEPTOR" | xargs mv -t /$GERMLINE_BUILD_DIR/$RECEPTOR
#move to folder
cd /$GERMLINE_BUILD_DIR/$RECEPTOR
#rename the files so they can be used by repseqio
#need to update IGH to receptro
mv $(find . -maxdepth 1 -name "*${RECEPTOR^^}D*" -print) d.fasta
mv $(find . -maxdepth 1 -name "*${RECEPTOR^^}J*" -print) j.fasta
mv $(find . -maxdepth 1 -name "*${RECEPTOR^^}V*" -print) v.fasta
#carry out repseqio steps
cd /mixcrep
#change to use the receptor
./repseqio-1.3.4/repseqio fromPaddedFasta -t 39442 -c $RECEPTOR --name-index 1 -g D --gene-feature DRegion /$GERMLINE_BUILD_DIR/$RECEPTOR/d.fasta ighd.fasta ighd.d.json
./repseqio-1.3.4/repseqio fromPaddedFasta -t 39442 -c $RECEPTOR --name-index 1 -g J --gene-feature JRegion /$GERMLINE_BUILD_DIR/$RECEPTOR/j.fasta ighj.fasta ighj.j.json
./repseqio-1.3.4/repseqio fromPaddedFasta -t 39442 -c $RECEPTOR --name-index 1 -g V --gene-feature VRegion /$GERMLINE_BUILD_DIR/$RECEPTOR/v.fasta ighv.fasta ighv.v.json
./repseqio-1.3.4/repseqio merge ighd.d.json ighj.j.json ighv.v.json lib2.json -f
./repseqio-1.3.4/repseqio inferPoints -g VRegion -g JRegion -g DRegion -f lib2.json lib2.json
cp /mixcrep/${RECEPTOR}d.fasta /mixcrep/mixcr-3.0.13/libraries
cp /mixcrep/${RECEPTOR}v.fasta /mixcrep/mixcr-3.0.13/libraries
cp /mixcrep/${RECEPTOR}j.fasta /mixcrep/mixcr-3.0.13/libraries
cp /mixcrep/lib2.json /mixcrep/mixcr-3.0.13/libraries
#mixcr alignment
cd /mixcrep/mixcr-3.0.13
#need to update to do for all files in INPUT folder
mixcr align -f --write-all --library lib2 --species 39442 -O saveOriginalReads=true /../../INPUT/sample.fasta output.vdjca
mixcr exportAlignments -descrsR1 -vHit -dHit -jHit -aaFeature CDR3 output.vdjca output.tsv -f
cp /mixcrep/mixcr-3.0.13/output.tsv $OUTPUT_DIR
python3 /mixcrep/tools/finalOutputConvert.py /$INPUT_DIR/sample.fasta /$OUTPUT_DIR/output.tsv
cp finalMixcrep.tsv $OUTPUT_DIR