-
Notifications
You must be signed in to change notification settings - Fork 0
/
ppl_vCall_ref_Hr4Rm47Prc4.pbs
160 lines (126 loc) · 5.03 KB
/
ppl_vCall_ref_Hr4Rm47Prc4.pbs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
# Request 1 processors on 1 node
#PBS -l nodes=1:ppn=5
#Request x number of hours of walltime
#PBS -l walltime=18:00:00
#Request 47 gigabyte of memory per process
#PBS -l mem=47gb
#Request that regular output and terminal output go to the same file
#PBS -j oe
#PBS -m abe
ulimit -Ss unlimited
export JOB_SCRATCH=/scratch/${PBS_JOBCOOKIE}
echo "Will use scratch dir ${JOB_SCRATCH} on ${HOSTNAME}"
mkdir -p ${JOB_SCRATCH}/tmp
cd ${JOB_SCRATCH}
echo "------------------------------------------------------"
echo "${sample}_${sample}"
echo "start time: $(date)"
module load samtools
ref=${ref} #broad-hg38.masked
refdirName=${refdirName} #masked_hg38
ftpjob=${ftpjob}
study=${study}
sample=${sample}
dirPath0="/mnt/nap2/gbVol1/transRegVar/exrna/vCall"
mkdir -p ${dirPath0}/c/${sample}/vCall_${ref}
mkdir -p ${study}/${sample}/vCall_${ref}
cp -v ${dirPath0}/../fastq/${ftpjob}/${sample}.fastq.gz ${study}/${sample}/${sample}.fastq.gz
cp -Rv ${dirPath0}/../refGenome/${refdirName}_4pbs/ ${refdirName}/
echo "cp time: $(date)"
echo
echo "cp time: $(date)"
echo
cd ${refdirName}
tar -zxvf ${refdirName}.tar.gz
tar -zxvf STARind_${ref}.tar.gz
cd ..
dirPath=$(pwd)
echo $dirPath
refGenomePath="${dirPath}/${refdirName}/STARind_${ref}"
echo "decompress time: $(date)"
echo
module load STAR
module load gatk
module load picard
module load samtools
module load BEDTools
echo
echo "study:${study}"
echo "sample:${sample}"
#
cd $dirPath/${study}/${sample}/vCall_${ref}
echo "------------------------------------------------------"
# Map fast to personal reference genome
STAR \
--genomeDir ${refGenomePath} \
--sjdbGTFfile ${refGenomePath}/../${ref}.gtf \
--runThreadN 4pl \
--twopassMode Basic \
--twopass1readsN -1 \
--limitSjdbInsertNsj 3000000 \
--outSAMtype BAM SortedByCoordinate \
--outSAMattrRGline ID:$sample SM:$sample PL:$sample \
--readFilesIn ${dirPath}/${study}/${sample}/${sample}.fastq.gz \
--readFilesCommand zcat \
--outFileNamePrefix ${sample}. > ConsoleOutput.STAR.${sample}.out
echo "STAR time: $(date)"
echo "------------------------------------------------------"
# Mark duplicates
java -Xmx47G -Djava.io.tmpdir=./ -jar /opt/picard/2.25/picard.jar AddOrReplaceReadGroups \
I=${sample}.Aligned.sortedByCoord.out.bam \
O=${sample}.tmp \
RGID=${sample} RGLB=${sample} RGPL=${sample} RGPU=${sample} RGSM=${sample} && \
java -Xmx47G -Djava.io.tmpdir=./ -jar /opt/picard/2.25/picard.jar MarkDuplicates \
I=${sample}.tmp \
O=${sample}.markdup.bam \
CREATE_INDEX=true VALIDATION_STRINGENCY=SILENT M=${sample}.metrics > ConsoleOutput.markDup.${sample}.out
echo "MarkDuplicate time: $(date)"
echo "------------------------------------------------------"
# cigarN
gatk --java-options '-Xmx47G -DGATK_STACKTRACE_ON_USER_EXCEPTION=true' SplitNCigarReads \
-R $refGenomePath/../${ref}.fa \
-I ${sample}.markdup.bam \
-O ${sample}.split.bam
echo "CiGar time: $(date)"
echo "------------------------------------------------------"
## BQSR
gatk --java-options '-Xmx47G -DGATK_STACKTRACE_ON_USER_EXCEPTION=true' BaseRecalibrator \
-R $refGenomePath/../${ref}.fa \
-I ${sample}.split.bam \
--known-sites ${refGenomePath}/../resources-broad-hg38-v0-1000G_phase1.snps.high_confidence.hg38.vcf.gz \
--known-sites ${refGenomePath}/../resources-broad-hg38-v0-Mills_and_1000G_gold_standard.indels.hg38.vcf.gz \
-O ${sample}.recalibrated.bam.grp \
&& gatk --java-options '-Xmx47G -DGATK_STACKTRACE_ON_USER_EXCEPTION=true' ApplyBQSR \
-bqsr ${sample}.recalibrated.bam.grp \
-R $refGenomePath/../${ref}.fa \
-I ${sample}.split.bam \
-O ${sample}.recalibrated.bam
echo "BQSR time: $(date)"
echo "------------------------------------------------------"
# haplotypeCaller
gatk --java-options '-Xmx47G -DGATK_STACKTRACE_ON_USER_EXCEPTION=true' HaplotypeCaller \
-R $refGenomePath/../${ref}.fa \
--standard-min-confidence-threshold-for-calling 20 \
--dont-use-soft-clipped-bases \
-I ${sample}.split.bam \
-O ${sample}.vcf.gz
echo "haplotypeCaller time: $(date)"
echo "------------------------------------------------------"
# variantFilter
gatk --java-options '-Xmx47G -DGATK_STACKTRACE_ON_USER_EXCEPTION=true' VariantFiltration \
-R $refGenomePath/../${ref}.fa \
-V ${sample}.vcf.gz \
-O ${sample}.filtered.vcf.gz \
--window 35 --cluster 3 --filter-name "FS" \
--filter "FS>30.0" --filter-name "QD" --filter "QD<2.0" > ConsoleOutput.VariantFiltration.${sample}.out
echo "variantFilter time: $(date)"
echo "------------------------------------------------------"
if [ -f ${sample}.filtered.vcf.gz ] ; then
tar -cvzf ${sample}.tar.gz ConsoleOutput.* *.sortedByCoord.out.bam *.Log.* *.out.tab *.vcf.gz *.vcf.gz.tbi
cp ${sample}.tar.gz ${dirPath0}/${study}/${sample}/vCall_${ref}/${sample}.tar.gz ; else
tar -cvzf ${sample}.bam.tar.gz ConsoleOutput.* *.sortedByCoord.out.bam *.Log.* *.out.tab
cp ${sample}.bam.tar.gz ${dirPath0}/${study}/${sample}/vCall_${ref}/${sample}.bam.tar.gz
echo "not ready" ; fi
echo "end time: $(date)"
qstat -f ${PBS_JOBID}
rm -rf ${JOB_SCRATCH}