diff --git a/.github/.dockstore.yml b/.github/.dockstore.yml
index 035d1cc84..9ac974da2 100644
--- a/.github/.dockstore.yml
+++ b/.github/.dockstore.yml
@@ -141,6 +141,7 @@ workflows:
     filters:
       branches:
         - main
+        - kj_sv_cleanvcf
       tags:
         - /.*/
 
diff --git a/dockerfiles/sv-pipeline/Dockerfile b/dockerfiles/sv-pipeline/Dockerfile
index d4f9aa687..5d9c759c7 100644
--- a/dockerfiles/sv-pipeline/Dockerfile
+++ b/dockerfiles/sv-pipeline/Dockerfile
@@ -70,13 +70,9 @@ RUN plink2 || true
 # -Compile StitchFragmentedCNVs Java program
 # -Compile StitchFragmentedCNVs unit tests
 # -Compile VCFParser unit tests
-# -Compile and test CleanVCFPart1 Java program
-# -Compile and test CleanVCFPart1 unit tests
 ENV STITCH_JAR="/opt/sv-pipeline/java/build/StitchFragmentedCNVs.jar"
 ARG STITCH_UNIT_TEST_JAR="/opt/sv-pipeline/java/build/StitchFragmentedCNVsUnitTest.jar"
 ARG VCF_PARSER_UNIT_TEST_JAR="/opt/sv-pipeline/java/build/VCFParserUnitTest.jar"
-ENV CLEAN_VCF_PART_1_JAR="/opt/sv-pipeline/java/build/CleanVCFPart1.jar"
-ARG CLEAN_VCF_PART_1_UNIT_TEST_JAR="/opt/sv-pipeline/java/build/CleanVCFPart1UnitTest.jar"
 ARG BUILD_DEPS="openjdk-8-jdk"
 ARG DEBIAN_FRONTEND=noninteractive
 RUN export APT_TRANSIENT_PACKAGES=$(diff_of_lists.sh "$BUILD_DEPS" $APT_REQUIRED_PACKAGES) && \
@@ -97,14 +93,6 @@ RUN export APT_TRANSIENT_PACKAGES=$(diff_of_lists.sh "$BUILD_DEPS" $APT_REQUIRED
     echo "Running VCFParserUnitTest..." && \
     java -enableassertions -jar $VCF_PARSER_UNIT_TEST_JAR && \
     rm -rf build/classes/* $VCF_PARSER_UNIT_TEST_JAR && \
-    javac -d build/classes org/broadinstitute/svpipeline/CleanVCFPart1.java org/broadinstitute/svpipeline/VCFParser.java && \
-    jar cfe build/CleanVCFPart1.jar "org.broadinstitute.svpipeline.CleanVCFPart1" -C build/classes . && \
-    rm -rf build/classes/* && \
-    javac -d build/classes org/broadinstitute/svpipeline/CleanVCFPart1UnitTest.java org/broadinstitute/svpipeline/CleanVCFPart1.java org/broadinstitute/svpipeline/VCFParser.java && \
-    jar cfe build/CleanVCFPart1UnitTest.jar "org.broadinstitute.svpipeline.CleanVCFPart1UnitTest" -C build/classes . && \
-    echo "Running CleanVCFPart1UnitTest..." && \
-    java -enableassertions -jar $CLEAN_VCF_PART_1_UNIT_TEST_JAR && \
-    rm -rf build/classes/* $CLEAN_VCF_PART_1_UNIT_TEST_JAR && \
     apt-get -qqy remove --purge $APT_TRANSIENT_PACKAGES && \
     apt-get -qqy autoremove --purge && \
     apt-get -qqy clean && \
diff --git a/inputs/values/dockers.json b/inputs/values/dockers.json
index b68084b91..0b92b966f 100644
--- a/inputs/values/dockers.json
+++ b/inputs/values/dockers.json
@@ -12,8 +12,8 @@
   "samtools_cloud_docker": "us.gcr.io/broad-dsde-methods/gatk-sv/samtools-cloud:2024-10-25-v0.29-beta-5ea22a52",
   "sv_base_docker": "us.gcr.io/broad-dsde-methods/gatk-sv/sv-base:2024-10-25-v0.29-beta-5ea22a52",
   "sv_base_mini_docker": "us.gcr.io/broad-dsde-methods/gatk-sv/sv-base-mini:2024-10-25-v0.29-beta-5ea22a52",
-  "sv_pipeline_docker": "us.gcr.io/broad-dsde-methods/gatk-sv/sv-pipeline:2024-11-15-v1.0-488d7cb0",
-  "sv_pipeline_qc_docker": "us.gcr.io/broad-dsde-methods/gatk-sv/sv-pipeline:2024-11-15-v1.0-488d7cb0",
+  "sv_pipeline_docker": "us-central1-docker.pkg.dev/talkowski-training/kj-development/sv-pipeline:kj-clean-vcf-170ec7c38b004f02639b5f82270113b74162fe79",
+  "sv_pipeline_qc_docker": "us-central1-docker.pkg.dev/talkowski-training/kj-development/sv-pipeline:kj-clean-vcf-170ec7c38b004f02639b5f82270113b74162fe79",
   "wham_docker": "us.gcr.io/broad-dsde-methods/gatk-sv/wham:2024-10-25-v0.29-beta-5ea22a52",
   "igv_docker": "us.gcr.io/broad-dsde-methods/gatk-sv/igv:mw-xz-fixes-2-b1be6a9",
   "duphold_docker": "us.gcr.io/broad-dsde-methods/gatk-sv/duphold:mw-xz-fixes-2-b1be6a9",
@@ -28,5 +28,5 @@
   "sv_utils_docker": "us.gcr.io/broad-dsde-methods/gatk-sv/sv-utils:2024-10-25-v0.29-beta-5ea22a52",
   "gq_recalibrator_docker": "us.gcr.io/broad-dsde-methods/markw/gatk:mw-tb-form-sv-filter-training-data-899360a",
   "str": "us.gcr.io/broad-dsde-methods/gatk-sv/str:2023-05-23-v0.27.3-beta-e537bdd6",
-  "denovo": "us.gcr.io/broad-dsde-methods/gatk-sv/denovo:2024-11-15-v1.0-488d7cb0"
+  "denovo": "us-central1-docker.pkg.dev/talkowski-training/kj-development/denovo:kj-clean-vcf-170ec7c38b004f02639b5f82270113b74162fe79"
 }
\ No newline at end of file
diff --git a/src/sv-pipeline/04_variant_resolution/scripts/clean_vcf_part1b_build_dict.py b/src/sv-pipeline/04_variant_resolution/scripts/clean_vcf_part1b_build_dict.py
deleted file mode 100644
index b7da153cb..000000000
--- a/src/sv-pipeline/04_variant_resolution/scripts/clean_vcf_part1b_build_dict.py
+++ /dev/null
@@ -1,154 +0,0 @@
-"""
-Remove CNVs that are improperly genotyped by depth because they are nested
-within a real CNV
-"""
-
-import logging
-import pybedtools
-import pysam
-import sys
-import json
-
-from collections import defaultdict
-
-SVTYPE = "SVTYPE"
-BLANK_SAMPLES = "blanksample"
-
-
-class SVType:
-    DUP = "DUP"
-    DEL = "DEL"
-
-
-class VariantFormatTypes:
-    # Predicted copy state
-    RD_CN = "RD_CN"
-    # Classes of evidence supporting final genotype
-    EV = "EV"
-
-
-class VCFReviser:
-    def __init__(self):
-        self.rd_cn = {}
-        self.sample_indices_dict = {}
-        self.sample_list = []
-
-    def _update_rd_cn(self, variant, sample_indices):
-        self.rd_cn[variant.id] = {s: variant.samples[s][VariantFormatTypes.RD_CN] for s in sample_indices}
-
-    @staticmethod
-    def get_wider(f):
-        # f[1] : first interval start
-        # f[2] : first interval end
-        # f[7] : second interval start
-        # f[8] : second interval end
-        if int(f[2]) - int(f[1]) >= int(f[8]) - int(f[7]):
-            return f[0:6], f[6:12]
-        else:
-            return f[6:12], f[0:6]
-
-    @staticmethod
-    def get_coverage(wider, narrower):
-        n_start = int(narrower[1])
-        n_stop = int(narrower[2])
-        w_start = int(wider[1])
-        w_stop = int(wider[2])
-
-        coverage = 0
-        if w_start <= n_stop and n_start <= w_stop:
-            intersection_size = min(n_stop, w_stop) - max(n_start, w_start)
-            coverage = intersection_size / (n_stop - n_start)
-        return coverage
-
-    def get_geno_normal_revise(self, vcf_file, bed_file):
-        overlap_test_text = defaultdict(dict)
-        with pysam.VariantFile(vcf_file, "r") as f:
-            header = f.header
-            i = -1
-            for sample in header.samples:
-                i += 1
-                self.sample_indices_dict[sample] = i
-                self.sample_list.append(sample)
-
-            logging.info("Filtering intersect results")
-            bed = pybedtools.BedTool(bed_file)
-            for interval in bed.intervals:
-                wider, narrower = self.get_wider(interval.fields)
-                # wider and narrower are lists/tuples with the following fields:
-                # [0] : contig
-                # [1] : start position
-                # [2] : end position
-                # [3] : variant ID
-                # [4] : SV type
-                # [5] : comma-delimited sample lists, or BLANK_SAMPLES if none
-                if wider[5] == BLANK_SAMPLES:
-                    continue
-
-                coverage = self.get_coverage(wider, narrower)
-                if coverage >= 0.5:
-                    wider_samples = set(wider[5].split(","))
-                    narrower_samples = set(narrower[5].split(","))
-                    non_common_samples = [self.sample_indices_dict[s] for s in wider_samples - narrower_samples]
-                    for x in non_common_samples:
-                        vid = narrower[3]
-                        overlap_test_text[vid][x] = (wider[3], wider[4])
-
-            # Determine for which vid/sample pairs we need RD_CN
-            # Substantially reduces memory
-            logging.info('Getting revised variant IDs')
-            revise_vids = defaultdict(set)
-            for var_id, samples_dict in overlap_test_text.items():
-                for sample_index, v in samples_dict.items():
-                    # v[0] : variant ID
-                    # v[1] : SV type
-                    if v[1] == SVType.DUP or v[1] == SVType.DEL:
-                        revise_vids[var_id].add(sample_index)
-                        revise_vids[v[0]].add(sample_index)
-
-            logging.info('Getting RD_CN/EV')
-            for variant in f:
-                if variant.id in revise_vids:
-                    sample_indices = revise_vids[variant.id]
-                    self._update_rd_cn(variant, sample_indices)
-
-        logging.info('Generating geno_normal_revise_dict')
-        geno_normal_revise_dict = {}
-        for var_id, samples_dict in overlap_test_text.items():
-            for sample_index, v in samples_dict.items():
-                # v[0] : variant ID
-                # v[1] : SV type
-                new_val = None
-                if sample_index not in revise_vids[v[0]]:
-                    sys.stderr.write("{} {}\n".format(sample_index, v[0]))
-                if v[1] == SVType.DUP and \
-                        self.rd_cn[var_id][sample_index] == 2 and \
-                        self.rd_cn[v[0]][sample_index] == 3:
-                    new_val = 1
-                elif v[1] == SVType.DEL and \
-                        self.rd_cn[var_id][sample_index] == 2 \
-                        and self.rd_cn[v[0]][sample_index] == 1:
-                    new_val = 3
-
-                if new_val:
-                    if var_id not in geno_normal_revise_dict:
-                        geno_normal_revise_dict[var_id] = {}
-                    sample_id = self.sample_list[sample_index]
-                    geno_normal_revise_dict[var_id][sample_id] = new_val
-
-        return geno_normal_revise_dict
-
-
-def main(args):
-    logging.basicConfig(format='%(asctime)s - %(message)s', level=logging.INFO)
-    logging.info('Starting script')
-    reviser = VCFReviser()
-    filtered_vcf = args[1]
-    intersected_bed = args[2]
-    geno_normal_revise_dict = reviser.get_geno_normal_revise(filtered_vcf, intersected_bed)
-    logging.info('Dumping dictionary')
-    sys.stdout.write(json.dumps(geno_normal_revise_dict))
-    logging.info('Done')
-
-
-if __name__ == '__main__':
-    main(sys.argv)
diff --git a/src/sv-pipeline/04_variant_resolution/scripts/clean_vcf_part1b_filter.py b/src/sv-pipeline/04_variant_resolution/scripts/clean_vcf_part1b_filter.py
deleted file mode 100644
index e63b890cd..000000000
--- a/src/sv-pipeline/04_variant_resolution/scripts/clean_vcf_part1b_filter.py
+++ /dev/null
@@ -1,82 +0,0 @@
-"""
-Remove CNVs that are improperly genotyped by depth because they are nested
-within a real CNV
-"""
-
-import os
-import logging
-import pysam
-import sys
-from pathlib import Path
-import json
-import gzip
-
-SVTYPE = "SVTYPE"
-BLANK_SAMPLES = "B"
-
-
-class SVType:
-    DUP = "DUP"
-    DEL = "DEL"
-
-
-class VariantFormatTypes:
-    # Predicted copy state
-    RD_CN = "RD_CN"
-    # Classes of evidence supporting final genotype
-    EV = "EV"
-
-
-def modify_variants(dict_file_gz, vcf, multi_cnvs):
-    logging.info('Loading dictionary')
-    with gzip.open(dict_file_gz, 'rt') as f:
-        geno_normal_revise_dict = json.load(f)
-
-    logging.info('Filtering variants')
-    with pysam.VariantFile(vcf, "r") as f_in:
-        header = f_in.header
-        sys.stdout.write(str(header))
-        with open(multi_cnvs, "w") as multi_cnvs_f:
-            variants = f_in.fetch()
-            for variant in variants:
-                if variant.id in geno_normal_revise_dict:
-                    for sample_id in geno_normal_revise_dict[variant.id]:
-                        o = variant.samples[sample_id]
-                        o.update({"GT": (0, 1)})
-                        o.update({"GQ": o["RD_GQ"]})
-
-                if variant.stop - variant.start >= 1000:
-                    if variant.info[SVTYPE] in [SVType.DEL, SVType.DUP]:
-                        is_del = variant.info[SVTYPE] == SVType.DEL
-                        for k, v in variant.samples.items():
-                            rd_cn = v[VariantFormatTypes.RD_CN]
-                            if rd_cn is None:
-                                continue
-                            if (is_del and rd_cn > 3) or \
-                                    (not is_del and (rd_cn < 1 or rd_cn > 4)):
-                                multi_cnvs_f.write(variant.id + "\n")
-                                break
-
-                sys.stdout.write(str(variant))
-
-
-def ensure_file(filename):
-    filename = os.path.join(".", filename)
-    filename = Path(filename)
-    if filename.exists():
-        os.remove(filename)
-    return filename.name
-
-
-def main(args):
-    logging.basicConfig(format='%(asctime)s - %(message)s', level=logging.INFO)
-    logging.info('Starting script')
-    multi_cnvs_filename = ensure_file("multi.cnvs.txt")
-    dict_file_gz = args[1]
-    vcf_file = args[2]
-    modify_variants(dict_file_gz, vcf_file, multi_cnvs_filename)
-    logging.info('Done')
-
-
-if __name__ == '__main__':
-    main(sys.argv)
diff --git a/src/sv-pipeline/04_variant_resolution/scripts/clean_vcf_part2.sh b/src/sv-pipeline/04_variant_resolution/scripts/clean_vcf_part2.sh
deleted file mode 100755
index 5d4827fa5..000000000
--- a/src/sv-pipeline/04_variant_resolution/scripts/clean_vcf_part2.sh
+++ /dev/null
@@ -1,171 +0,0 @@
-#!/bin/bash
-#
-# clean_vcf_part2.sh
-#
-
-set -euo pipefail
-
-##bgzipped combined vcf from clean vcf part1b.sh##
-normal_revise_vcf_gz=$1
-##whitelist of split ids for parallelization##
-whitelist=$2
-##list of multiallelic CNVs from 1b##
-multi_cnv=$3
-##output filename##
-outputfile=$4
-
-export LC_ALL=C
-
-##subset vcf to whitelist samples##
-bcftools view $normal_revise_vcf_gz -S $whitelist --no-update \
-  | gzip \
-  > subset.vcf.gz
-
-
-##create new bed with updated genotypes###
-zcat subset.vcf.gz \
-  | awk 'BEGIN{FS=OFS="\t"}{if (substr($1,1,1)=="#" || $5=="<DEL>" || $5=="<DUP>") print}' \
-  | svtk vcf2bed stdin stdout \
-  | sed 1d \
-  | sort -k4,4 \
-  | gzip \
-  > int.afternormalfix.bed.gz
-
-##Find overlapping depth based variants and reassign depth based; note this is necessary because depth call >5kb genotypes are 100% driven by depth ##
-##generate a sample list based on depth for depth overlap check below. Necessary because genotype is capped at 1/1 and by direction (i.e no dels in dups)##
-##grab all samples per variant with a non normal copy state##
-zcat subset.vcf.gz \
-  | awk 'BEGIN{FS=OFS="\t"}{if (substr($1,1,1)=="#") print; else if ($5=="<DEL>" || $5=="<DUP>") {$1=$3; print}}' \
-  | vcftools --vcf - --stdout --extract-FORMAT-info RD_CN \
-  | awk 'BEGIN{FS=OFS="\t"} NR==1{for (i=3;i<=NF;i++) header[i]=$i} NR>1{for(j=3;j<=NF;j++) print $1,header[j],$j }' \
-  | sort -k1,1 \
-  | awk 'BEGIN{FS=OFS="\t"} \
-              {if (($1~"DEL" && $3<2 && $3!=".") || ($1~"DUP" && $3>2 && $3!=".")) a[$1]=a[$1]?a[$1]","$2:$2} \
-         END{for (i in a) print i,a[i]}' \
-  | sort -k1,1 \
-  > afternormal.combined.RD_CN.list.txt
-
-##get a list of samples for actual variants not just those with aberrant copy states##
-zcat int.afternormalfix.bed.gz \
-  | awk 'BEGIN{FS=OFS="\t"} $6!="" {split($6,samples,","); for (i in samples) print $4"@"samples[i]}' \
-  | sort -k1,1 \
-  > fullvar.afternormal.list.txt
-
-##create bed with anything that has abnormal copy state##
-##do not compress all.bed because of bedtools bug: https://github.com/arq5x/bedtools2/issues/643##
-zcat int.afternormalfix.bed.gz \
-  | cut -f1-5 \
-  | awk 'BEGIN{FS=OFS="\t"}{if($3-$2>=5000)print $0}' \
-  | join -1 4 -2 1 -t '	' - afternormal.combined.RD_CN.list.txt \
-  | awk 'BEGIN {FS=OFS="\t"} \
-         $6!=""{split($6,samples,","); \
-                for (i in samples) {s = samples[i]; print $2"_"s,$3,$4,$1,$5,s,$1"@"s}}' \
-  | sort -k7,7 \
-  | join -t '	' -a 1 -1 7 -2 1 -e "NA" -o 1.1,1.2,1.3,1.4,1.5,1.6,1.7,2.1  - fullvar.afternormal.list.txt \
-  > all.bed
-
-##intersect variants and always set larger to left##
-bedtools intersect -wa -wb -a all.bed -b all.bed \
-  | awk 'BEGIN{FS=OFS="\t"} \
-              {if ($8=="NA") { if ($16!="NA") print $9,$10,$11,$12,$13,$14,$15,$1,$2,$3,$4,$5,$6,$7; } \
-               else if ($4!=$12) {
-                if ($3-$2>=$11-$10) print $1,$2,$3,$4,$5,$6,$7,$9,$10,$11,$12,$13,$14,$15; \
-                else if ($16!="NA") print $9,$10,$11,$12,$13,$14,$15,$1,$2,$3,$4,$5,$6,$7; \
-                else print $1,$2,$3,$4,$5,$6,$7,$9,$10,$11,$12,$13,$14,$15;}}' \
-  | sort -k7,7 \
-  | uniq \
-  | gzip \
-  > bed.overlap.txt.gz
-
-zcat subset.vcf.gz \
-  | grep '^#' \
-  > vcf_header
-zcat bed.overlap.txt.gz \
-  | awk '{print $4; print $11}' \
-  | sort -u \
-  > overlap.events
-echo '~~~' >> overlap.events
-zcat subset.vcf.gz \
-  | grep -v '^#' \
-  | sort -k3,3 \
-  | join -t '	' -1 1 -2 3 overlap.events - \
-  | awk 'BEGIN{FS=OFS="\t"}{tmp=$2;$2=$3;$3=tmp;print}' \
-  | sort -k2n,2 \
-  > overlap.events.vcf
-
-##get info for each variant##
-zcat bed.overlap.txt.gz \
-  | awk 'BEGIN{FS=OFS="\t"}{print $7; print $14;}' \
-  | sort -u \
-  > combined.bed
-for var in EV RD_CN GT
-do
- echo '~~~' >> combined.bed
- cat vcf_header overlap.events.vcf \
-  | vcftools --vcf - --stdout --extract-FORMAT-info ${var} \
-  | awk 'BEGIN{FS=OFS="\t"} \
-         NR==1{for (i=3;i<=NF;i++) header[i]=$i} \
-         NR>1 {for (j=3;j<=NF;j++) print $1"@"header[j],$j}' \
-  | sort -k1,1 \
-  | join -t '	' -1 1 -2 1 combined.bed - \
-  > combined.bed.tmp
-  mv combined.bed.tmp combined.bed
-done
-
-zcat bed.overlap.txt.gz \
-  | join -t '	' -1 7 -2 1 - combined.bed \
-  | cut -f2- \
-  | sort -k13,13 \
-  | join -t '	' -1 13 -2 1 - combined.bed \
-  | cut -f2- \
-  | awk 'BEGIN{FS=OFS="\t"}{print $3-$2,$9-$8,$0}' \
-  | sort -k1nr,1 -k2nr,2 \
-  | cut -f3- \
-  | gzip \
-  > all.combined.bed.gz
-
-zcat all.combined.bed.gz \
-  | awk -v multiCNVFile=$multi_cnv ' \
-     function makeRevision( id, val ) { reviseCN[id] = val; if ( val == 2 ) wasRevisedToNormal[id] = 1; } \
-     BEGIN \
-       {FS=OFS="\t"; \
-        while ( getline < multiCNVFile ) multiCNV[$0] = 1; \
-        close(multiCNVFile)} \
-       {chr_sample1 = $1; start1 = $2; stop1 = $3; ev1 = $4; svtype1 = $5; sample1 = $6; \
-        chr_sample2 = $7; start2 = $8; stop2 = $9; ev2 = $10; svtype2 = $11; sample2 = $12; \
-        support1 = $13; RD_CN1 = $14; GT1 = $15; support2 = $16; RD_CN2 = $17; GT2 = $18; \
-        id1 = ev1"@"sample1; id2 = ev2"@"sample2; \
-        length1 = stop1 - start1; length2 = stop2 - start2; \
-        if ( id1 in wasRevisedToNormal ) next; \
-        if ( id1 in reviseCN ) RD_CN1 = reviseCN[id1]; \
-        if ( id2 in reviseCN ) RD_CN2 = reviseCN[id2]; \
-        overlap = (stop1 < stop2 ? stop1 : stop2) - (start1 > start2 ? start1 : start2); \
-        smallOverlap50 = overlap/length2 > .5; \
-        largeOverlap50 = overlap/length1 > .5; \
-        ##Call where smaller depth call is being driven by larger## \
-        if ( support1 ~ /RD/ && support1 != "RD" && support2 == "RD" && smallOverlap50 && !(ev1 in multiCNV) ) { \
-          if ( RD_CN1 == 0 ) makeRevision(id2, RD_CN2 + 2); \
-          else if ( RD_CN1 == 1 ) makeRevision(id2, RD_CN2 + RD_CN1); \
-          else if ( RD_CN1 > 1 ) { newCN = RD_CN2 - RD_CN1 + 2; if ( newCN < 0 ) newCN = 0; makeRevision(id2, newCN); } } \
-        ##Smaller CNV driving larger CNV genotype## \
-        else if ( support1 == "RD" && support2 ~ /RD/ && support2 != "RD" && smallOverlap50 && !(ev2 in multiCNV) && GT2 != "0/0" && largeOverlap50 ) { \
-          if ( RD_CN2 == 0 ) makeRevision(id1, RD_CN1 + 2); \
-          else if ( RD_CN2 == 1 ) makeRevision(id1, RD_CN1 + RD_CN2); \
-          else if ( RD_CN2 > 1 ) { newCN = RD_CN1 - RD_CN2 + 2; if ( newCN < 0 ) newCN = 0; makeRevision(id1, newCN); } } \
-        ##Depth only calls where smaller call is being driven by larger## \
-        else if ( support1 == "RD" && support2 == "RD" && smallOverlap50 && svtype1 == svtype2 && !(ev1 in multiCNV) ) { \
-          if ( RD_CN1 == 0 && RD_CN1 != RD_CN2 ) makeRevision(id2, RD_CN2 + 2); \
-          else if ( RD_CN1 == 1 && RD_CN1 > RD_CN2 ) makeRevision(id2, 1); \
-          else if ( RD_CN1 > 1 && RD_CN1 < RD_CN2 ) { newCN = RD_CN2 - RD_CN1 + 2; if ( newCN < 0 ) newCN = 0; makeRevision(id2, newCN); } \
-          else makeRevision(id2, 2); } \
-        ##Any other time a larger call is driving a smaller call## \
-        else if ( support1 ~ /RD/ && smallOverlap50 && length2 > 5000 && !(ev1 in multiCNV) ) { \
-          if ( RD_CN1 == 0 ) makeRevision(id2, RD_CN2 + 2); \
-          else if ( RD_CN1 == 1 ) makeRevision(id2, RD_CN2 + RD_CN1); \
-          else if ( RD_CN1 > 1 ) { newCN = RD_CN2 - RD_CN1 + 2; if ( newCN < 0 ) newCN = 0; makeRevision(id2, newCN); } } } \
-     END \
-       {for ( id in reviseCN ) print id,reviseCN[id]; }' \
-  | sed 's/@/	/' \
-  | sort \
-  | awk 'BEGIN{FS=OFS="\t"}{if ($3<0) $3=0; print $0}' \
-  > $outputfile
diff --git a/src/sv-pipeline/04_variant_resolution/scripts/clean_vcf_part5_find_redundant_multiallelics.py b/src/sv-pipeline/04_variant_resolution/scripts/clean_vcf_part5_find_redundant_multiallelics.py
deleted file mode 100755
index ad2b744a5..000000000
--- a/src/sv-pipeline/04_variant_resolution/scripts/clean_vcf_part5_find_redundant_multiallelics.py
+++ /dev/null
@@ -1,60 +0,0 @@
-#!/usr/bin/env python
-
-import argparse
-import sys
-import svtk.utils as svu
-
-
-def process_features_for_size1(features_for_size1, redundant_multiallelics):
-    for intersection in sorted(features_for_size1, key=lambda x: int(x[9]) - int(x[8]), reverse=True):
-        b_len = int(intersection.fields[9]) - int(intersection.fields[8])
-        overlap = int(intersection.fields[14])
-        small_coverage = overlap / b_len
-        if small_coverage > 0.50:
-            if intersection.fields[3] not in redundant_multiallelics:
-                redundant_multiallelics.add(intersection.fields[10])
-
-
-def main():
-    parser = argparse.ArgumentParser(
-        description=__doc__,
-        formatter_class=argparse.RawDescriptionHelpFormatter)
-    parser.add_argument('multiallelic_filename')
-    parser.add_argument('fout')
-    args = parser.parse_args()
-
-    print("finding redundant overlapping sites", file=sys.stderr)
-    multiallelic_bed = svu.vcf2bedtool(args.multiallelic_filename, include_filters=True)
-
-    redundant_multiallelics = set()
-    # feature fields:
-    #   [1] : first interval start
-    #   [2] : first interval end
-    #   [3] : first interval variant ID
-    #   [8] : second interval start
-    #   [9] : second interval end
-    #   [10] : second interval variant ID
-    self_inter = multiallelic_bed.intersect(multiallelic_bed, wo=True)\
-        .filter(lambda feature: feature[3] != feature[10]) \
-        .filter(lambda feature: (int(feature[2]) - int(feature[1])) >= (int(feature[9]) - int(feature[8]))) \
-        .sort(sizeD=True)
-    current_size1 = -1
-    features_for_size1 = []
-    for feature in self_inter:
-        size1 = int(feature[2]) - int(feature[1])
-        if size1 != current_size1:
-            process_features_for_size1(features_for_size1, redundant_multiallelics)
-            features_for_size1 = []
-
-        current_size1 = size1
-        features_for_size1.append(feature)
-
-    process_features_for_size1(features_for_size1, redundant_multiallelics)
-    print("identified {} redundant multiallelic sites".format(len(redundant_multiallelics)), file=sys.stderr)
-    with open(args.fout, "w") as list_file:
-        for vid in redundant_multiallelics:
-            print(vid, file=list_file)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/src/sv-pipeline/04_variant_resolution/scripts/clean_vcf_part5_update_records.py b/src/sv-pipeline/04_variant_resolution/scripts/clean_vcf_part5_update_records.py
deleted file mode 100755
index a5bf42982..000000000
--- a/src/sv-pipeline/04_variant_resolution/scripts/clean_vcf_part5_update_records.py
+++ /dev/null
@@ -1,191 +0,0 @@
-#!/usr/bin/env python
-
-import argparse
-from collections import Counter
-import gzip
-import pysam
-import sys
-import svtk.utils as svu
-
-
-def main():
-    parser = argparse.ArgumentParser(
-        description=__doc__,
-        formatter_class=argparse.RawDescriptionHelpFormatter)
-    parser.add_argument('revise_vcf_lines', type=argparse.FileType('r'))
-    parser.add_argument('normal_revise_vcf')
-    parser.add_argument('famfile', type=argparse.FileType('r'))
-    parser.add_argument('sexchr_revise')
-    parser.add_argument('multi_geno_ids_txt')
-    parser.add_argument('outlier_samples_list', type=argparse.FileType('r'))
-    parser.add_argument('out_prefix')
-    parser.add_argument('--threads_per_file', required=False, default=2, type=int)
-    args = parser.parse_args()
-
-    # load the revised lines and index by ID
-    with pysam.VariantFile(args.revise_vcf_lines, threads=args.threads_per_file) as revise_vcf:
-        header2 = revise_vcf.header
-        revised_lines_by_id = {record.id: record for record in revise_vcf}
-    print("loaded {} revised lines".format(len(revised_lines_by_id)), file=sys.stderr)
-
-    outlier_samples = set([line.rstrip() for line in args.outlier_samples_list if not line.isspace()])
-    print("loaded {} outlier samples".format(len(outlier_samples)), file=sys.stderr)
-
-    male_samples = set()
-    for line in args.famfile:
-        if line.isspace():
-            continue
-        fields = line.rstrip().split("\t")
-        if fields[4] == '1':
-            male_samples.add(fields[1])
-    print("identified {} male samples".format(len(male_samples)), file=sys.stderr)
-
-    if args.sexchr_revise.endswith(".gz"):
-        sexchr_revise = {line.rstrip() for line in gzip.open(args.sexchr_revise, 'rt')}
-    else:
-        sexchr_revise = {line.rstrip() for line in open(args.sexchr_revise, 'rt')}
-    print("{} sites to revise on sex chromosomes".format(len(sexchr_revise)), file=sys.stderr)
-
-    if args.multi_geno_ids_txt.endswith(".gz"):
-        multi_geno_ids = {line.rstrip() for line in gzip.open(args.multi_geno_ids_txt, 'rt')}
-    else:
-        multi_geno_ids = {line.rstrip() for line in open(args.multi_geno_ids_txt, 'rt')}
-    print("{} multiallelic sites".format(len(multi_geno_ids)), file=sys.stderr)
-
-    NEW_HEADER_LINES = ['##ALT=<ID=CNV,Description="Copy Number Polymorphism">',
-                        '##FORMAT=<ID=CNQ,Number=1,Type=Integer,Description="Read-depth genotype quality">',
-                        '##FORMAT=<ID=CN,Number=1,Type=Integer,Description="Predicted copy state">',
-                        '##INFO=<ID=PESR_GT_OVERDISPERSION,Number=0,Type=Flag,'
-                        'Description="High PESR dispersion count">',
-                        '##FILTER=<ID=MULTIALLELIC,Description="Multiallelic site">']
-
-    with pysam.VariantFile(args.normal_revise_vcf) as normal_vcf:
-
-        # # Add metadata lines for annotations
-        header1 = normal_vcf.header
-
-        for f in NEW_HEADER_LINES:
-            header1.add_line(f)
-            header2.add_line(f)
-
-        non_outlier_samples = {s for s in header1.samples if s not in outlier_samples}
-        vf_1 = max(len(non_outlier_samples) * 0.01, 2)
-
-        biallelic_gts = {(1, 1), (0, 0), (0, 1), (None, None)}
-
-        print("reformatting records", file=sys.stderr)
-        cleangq_filename = args.out_prefix + ".cleanGQ.vcf.gz"
-        multiallelic_filename = args.out_prefix + ".multiallelic.vcf.gz"
-        no_variant_samples_list_file = args.out_prefix + ".no_called_samples.list"
-
-        with pysam.VariantFile(cleangq_filename, 'w', header=normal_vcf.header, threads=args.threads_per_file) as cleanqg_out, \
-                pysam.VariantFile(multiallelic_filename, 'w', header=normal_vcf.header) as multiallelic_out, \
-                open(no_variant_samples_list_file, 'w') as no_variant_samples_out:
-            for idx, record in enumerate(normal_vcf):
-                multi_del = False
-                multi_dup = False
-                gt4_copystate = False
-                gt5kb_dup = False
-                gt5kb_del = False
-                if (idx - 1) % 1000 == 0:
-                    print("processed {} records".format(idx), file=sys.stderr)
-                if record.id in revised_lines_by_id:
-                    record = revised_lines_by_id[record.id]
-                if record.info.get('SVTYPE', None) == 'DEL':
-                    if abs(record.stop - record.pos) >= 1000:
-                        sample_cn_map = {s: record.samples[s]['RD_CN'] for s in non_outlier_samples}
-                        if len([s for s in sample_cn_map if (sample_cn_map[s] is not None and sample_cn_map[s] > 3)]) > vf_1:
-                            multi_del = True
-                    gts = [record.samples[s]['GT'] for s in non_outlier_samples]
-                    if any(gt not in biallelic_gts for gt in gts):
-                        gt5kb_del = True
-                    if abs(record.stop - record.pos) >= 5000:
-                        if not multi_del:
-                            gt5kb_del = True
-
-                if record.info.get('SVTYPE', None) == 'DUP':
-                    if abs(record.stop - record.pos) >= 1000:
-                        sample_cn_map = {s: record.samples[s]['RD_CN'] for s in non_outlier_samples}
-                        if sum(1 for s in sample_cn_map if sample_cn_map[s] is not None and sample_cn_map[s] > 4) > vf_1:
-                            multi_dup = True
-                        if sum(1 for x in Counter(sample_cn_map.values()) if x is not None and (x < 1 or x > 4)) > 4:
-                            gt4_copystate = True
-                        if sum(1 for s in sample_cn_map if sample_cn_map[s] is not None and
-                                (sample_cn_map[s] < 1 or sample_cn_map[s] > 4) and gt4_copystate) > vf_1:
-                            multi_dup = True
-                    gts = [record.samples[s]['GT'] for s in non_outlier_samples]
-                    if any(gt not in biallelic_gts for gt in gts):
-                        gt5kb_dup = True
-                    if abs(record.stop - record.pos) >= 5000:
-                        if not multi_dup:
-                            gt5kb_dup = True
-
-                if gt5kb_del:
-                    for sample_obj in record.samples.itervalues():
-                        # Leave no-calls
-                        if sample_obj['GT'] == (None, None):
-                            continue
-                        if not sample_obj['GQ'] is None and \
-                                (sample_obj['RD_CN'] is not None and sample_obj['RD_CN'] >= 2):
-                            sample_obj['GT'] = (0, 0)
-                        elif not sample_obj['GQ'] is None and \
-                                (sample_obj['RD_CN'] is not None and sample_obj['RD_CN'] == 1):
-                            sample_obj['GT'] = (0, 1)
-                        elif not sample_obj['GQ'] is None:
-                            sample_obj['GT'] = (1, 1)  # RD_CN 0 DEL
-
-                if gt5kb_dup:
-                    for sample_obj in record.samples.itervalues():
-                        # Leave no-calls - also causes bug that skips multiallelic genotypes for a biallelic variant
-                        if sample_obj['GT'] == (None, None):
-                            continue
-                        if not sample_obj['GQ'] is None and \
-                                (sample_obj['RD_CN'] is not None and sample_obj['RD_CN'] <= 2):
-                            sample_obj['GT'] = (0, 0)
-                        elif not sample_obj['GQ'] is None and \
-                                (sample_obj['RD_CN'] is not None and sample_obj['RD_CN'] == 3):
-                            sample_obj['GT'] = (0, 1)
-                        elif not sample_obj['GQ'] is None:
-                            sample_obj['GT'] = (1, 1)  # RD_CN > 3 DUP
-
-                if record.id in multi_geno_ids:
-                    record.info['PESR_GT_OVERDISPERSION'] = True
-
-                if multi_del or multi_dup:
-                    record.filter.add('MULTIALLELIC')
-                    for j, sample in enumerate(record.samples):
-                        record.samples[sample]['GT'] = None
-                        record.samples[sample]['GQ'] = None
-                        record.samples[sample]['CN'] = record.samples[sample]['RD_CN']
-                        record.samples[sample]['CNQ'] = record.samples[sample]['RD_GQ']
-
-                if len(record.filter) > 1 and 'PASS' in record.filter:
-                    del record.filter['PASS']
-
-                if 'MULTIALLELIC' in record.filter and ('<DUP>' in record.alts or '<DEL>' in record.alts):
-                    record.alts = ('<CNV>',)
-                    record.info['SVTYPE'] = 'CNV'
-
-                if record.id in sexchr_revise:
-                    for sample in record.samples:
-                        if sample in male_samples:
-                            cn = record.samples[sample]['RD_CN']
-                            if cn is not None and int(cn) > 0:
-                                cn = int(cn)
-                                record.samples[sample]['RD_CN'] = cn - 1
-                                if 'CN' in record.samples[sample]:
-                                    record.samples[sample]['CN'] = cn - 1  # the old script didn't do this but I think it should
-
-                cleanqg_out.write(record)
-
-                if 'MULTIALLELIC' in record.filter:
-                    multiallelic_out.write(record)
-
-                if len(svu.get_called_samples(record)) == 0:
-                    print(record.id, file=no_variant_samples_out)
-
-    print("done", file=sys.stderr)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/src/sv-pipeline/04_variant_resolution/scripts/cleanvcf_preprocess.py b/src/sv-pipeline/04_variant_resolution/scripts/cleanvcf_preprocess.py
new file mode 100644
index 000000000..d04e0a781
--- /dev/null
+++ b/src/sv-pipeline/04_variant_resolution/scripts/cleanvcf_preprocess.py
@@ -0,0 +1,101 @@
+#!/bin/python
+
+import argparse
+import pysam
+import gzip
+
+
+VAR_GQ = 'varGQ'
+MULTIALLELIC = 'MULTIALLELIC'
+UNRESOLVED = 'UNRESOLVED'
+HIGH_SR_BACKGROUND = 'HIGH_SR_BACKGROUND'
+BOTHSIDES_SUPPORT = 'BOTHSIDES_SUPPORT'
+REVISED_EVENT = 'REVISED_EVENT'
+EV_VALUES = ['SR', 'PE', 'SR,PE', 'RD', 'BAF', 'RD,BAF']
+
+
+def read_last_column(file_path):
+    result_set = set()
+    with open(file_path, 'r') as f:
+        for line in f:
+            if line.strip():
+                columns = line.strip().split()
+                result_set.add(columns[-1])
+    return result_set
+
+
+def process_record(record, fail_set, pass_set):
+    record = process_varGQ(record)
+    record = process_multiallelic(record)
+    record = process_unresolved(record)
+    record = process_noisy(record, fail_set)
+    record = process_bothsides_support(record, pass_set)
+    return record
+
+
+def process_varGQ(record):
+    if VAR_GQ in record.info:
+        var_gq = record.info[VAR_GQ]
+        if isinstance(var_gq, list):
+            var_gq = var_gq[0]
+        del record.info[VAR_GQ]
+        record.qual = var_gq
+    return record
+
+
+def process_multiallelic(record):
+    if MULTIALLELIC in record.info:
+        del record.info[MULTIALLELIC]
+    return record
+
+
+def process_unresolved(record):
+    if UNRESOLVED in record.info:
+        del record.info[UNRESOLVED]
+        record.filter.add(UNRESOLVED)
+    return record
+
+
+def process_noisy(record, fail_set):
+    if record.id in fail_set:
+        record.info[HIGH_SR_BACKGROUND] = True
+    return record
+
+
+def process_bothsides_support(record, pass_set):
+    if record.id in pass_set:
+        record.info[BOTHSIDES_SUPPORT] = True
+    return record
+
+
+if __name__ == '__main__':
+    # Parse arguments
+    parser = argparse.ArgumentParser(description='CleanVcf preprocessing.')
+    parser.add_argument('-V', '--input', dest='input_vcf', required=True, help='Input VCF file')
+    parser.add_argument('-O', '--output', dest='output_vcf', required=True, help='Output VCF file')
+    parser.add_argument('--fail-list', required=True, help='File with variants failing the background test')
+    parser.add_argument('--pass-list', required=True, help='File with variants passing both sides')
+    args = parser.parse_args()
+
+    # Read input files
+    fail_set = read_last_column(args.fail_list)
+    pass_set = read_last_column(args.pass_list)
+    if args.input_vcf.endswith('.gz'):
+        vcf_in = pysam.VariantFile(gzip.open(args.input_vcf, 'rt'))
+    else:
+        vcf_in = pysam.VariantFile(args.input_vcf)
+    
+    # Open output file
+    if args.output_vcf.endswith('.gz'):
+        vcf_out = pysam.VariantFile(args.output_vcf, 'wz', header=vcf_in.header)
+    else:
+        vcf_out = pysam.VariantFile(args.output_vcf, 'w', header=vcf_in.header.copy())
+
+    # Process records
+    for record in vcf_in:
+        record = process_record(record, fail_set, pass_set)
+        vcf_out.write(record)
+    
+    # Close files
+    vcf_in.close()
+    vcf_out.close()
diff --git a/src/sv-pipeline/04_variant_resolution/scripts/replace_ev_numeric_code_with_string.py b/src/sv-pipeline/04_variant_resolution/scripts/replace_ev_numeric_code_with_string.py
index b7d611d41..69c7d16b6 100755
--- a/src/sv-pipeline/04_variant_resolution/scripts/replace_ev_numeric_code_with_string.py
+++ b/src/sv-pipeline/04_variant_resolution/scripts/replace_ev_numeric_code_with_string.py
@@ -61,7 +61,10 @@ def main():
     if args.fout in '- stdout'.split():
         fout = sys.stdout
     else:
-        fout = open(args.fout, 'w')
+        if args.fout.endswith(".gz"):
+            fout = gzip.open(args.fout, 'wt')
+        else:
+            fout = open(args.fout, 'w')
 
     while True:
         line = vcf.readline().rstrip()
diff --git a/src/sv-pipeline/java/org/broadinstitute/svpipeline/CleanVCFPart1.java b/src/sv-pipeline/java/org/broadinstitute/svpipeline/CleanVCFPart1.java
deleted file mode 100644
index 5637154bb..000000000
--- a/src/sv-pipeline/java/org/broadinstitute/svpipeline/CleanVCFPart1.java
+++ /dev/null
@@ -1,316 +0,0 @@
-package org.broadinstitute.svpipeline;
-
-import java.io.*;
-import java.nio.charset.StandardCharsets;
-import java.util.*;
-import java.util.regex.Pattern;
-import org.broadinstitute.svpipeline.VCFParser.*;
-
-public class CleanVCFPart1 {
-    private static final ByteSequence[] EV_VALS = {
-            null,
-            new ByteSequence("RD"),
-            new ByteSequence("PE"),
-            new ByteSequence("RD,PE"),
-            new ByteSequence("SR"),
-            new ByteSequence("RD,SR"),
-            new ByteSequence("PE,SR"),
-            new ByteSequence("RD,PE,SR")
-    };
-    private static final ByteSequence FORMAT_LINE = new ByteSequence("FORMAT");
-    private static final ByteSequence ID_KEY = new ByteSequence("ID");
-    private static final ByteSequence EV_VALUE = new ByteSequence("EV");
-    private static final ByteSequence TYPE_KEY = new ByteSequence("Type");
-    private static final ByteSequence STRING_VALUE = new ByteSequence("String");
-    private static final ByteSequence NUMBER_KEY = new ByteSequence("Number");
-    private static final ByteSequence SVTYPE_KEY = new ByteSequence("SVTYPE");
-    private static final ByteSequence ME_VALUE = new ByteSequence(":ME");
-    private static final ByteSequence LT_VALUE = new ByteSequence("<");
-    private static final ByteSequence GT_VALUE = new ByteSequence(">");
-    private static final ByteSequence N_VALUE = new ByteSequence("N");
-    private static final ByteSequence END_KEY = new ByteSequence("END");
-    private static final ByteSequence VARGQ_KEY = new ByteSequence("varGQ");
-    private static final ByteSequence MULTIALLELIC_KEY = new ByteSequence("MULTIALLELIC");
-    private static final ByteSequence UNRESOLVED_KEY = new ByteSequence("UNRESOLVED");
-    private static final ByteSequence HIGH_SR_BACKGROUND_KEY = new ByteSequence("HIGH_SR_BACKGROUND");
-    private static final ByteSequence BOTHSIDES_SUPPORT_KEY = new ByteSequence("BOTHSIDES_SUPPORT");
-    private static final ByteSequence DEL_VALUE = new ByteSequence("DEL");
-    private static final ByteSequence DUP_VALUE = new ByteSequence("DUP");
-    private static final ByteSequence RDCN_VALUE = new ByteSequence("RD_CN");
-    private static final ByteSequence MISSING_VALUE = new ByteSequence(".");
-    private static final ByteSequence MISSING_GENOTYPE = new ByteSequence("./.");
-    private static final ByteSequence GT_REF_REF = new ByteSequence("0/0");
-    private static final ByteSequence GT_REF_ALT = new ByteSequence("0/1");
-    private static final ByteSequence GT_ALT_ALT = new ByteSequence("1/1");
-
-    private static final int MIN_ALLOSOME_EVENT_SIZE = 5000;
-
-    public static void main( final String[] args ) {
-        if ( args.length != 8 ) {
-            System.err.println("Usage: java org.broadinstitute.svpipeline.CleanVCFPart1 " +
-                    "INPUTVCFFILE PEDIGREES XCHR YCHR NOISYEVENTS BOTHSIDES SAMPLESOUT REVISEDEVENTSOUT");
-            System.exit(1);
-        }
-        final VCFParser parser = new VCFParser(args[0]);
-        final ByteSequence xChrName = new ByteSequence(args[2]);
-        final ByteSequence yChrName = new ByteSequence(args[3]);
-        final Set<ByteSequence> noisyEvents = readLastColumn(args[4]);
-        final Set<ByteSequence> bothsidesSupportEvents = readLastColumn(args[5]);
-        try ( final OutputStream os
-                      = new BufferedOutputStream(new FileOutputStream(FileDescriptor.out));
-              final OutputStream osSamples = new BufferedOutputStream(new FileOutputStream(args[6]));
-              final OutputStream osRevEvents = new BufferedOutputStream(new FileOutputStream(args[7])) ) {
-            int[] sexForSample = null;
-            while ( parser.hasMetadata() ) {
-                final Metadata metadata = parser.nextMetaData();
-                if ( metadata instanceof ColumnHeaderMetadata ) {
-                    final ColumnHeaderMetadata cols = ((ColumnHeaderMetadata)metadata);
-                    final List<ByteSequence> colNames = cols.getValue();
-                    final int nCols = colNames.size();
-                    for ( int idx = 9; idx < nCols; ++idx ) {
-                        colNames.get(idx).write(osSamples);
-                        osSamples.write('\n');
-                    }
-                    sexForSample = readPedFile(args[1], cols.getValue());
-                    os.write(("##INFO=<ID=HIGH_SR_BACKGROUND,Number=0,Type=Flag,Description=\"High number of "
-                            + "SR splits in background samples indicating messy region\">\n")
-                                .getBytes(StandardCharsets.UTF_8));
-                    os.write("##FILTER=<ID=UNRESOLVED,Description=\"Variant is unresolved\">\n"
-                                .getBytes(StandardCharsets.UTF_8));
-                    os.write(("##INFO=<ID=BOTHSIDES_SUPPORT,Number=0,Type=Flag,Description=\"Variant has " +
-                            "read-level support for both sides of breakpoint\">\n")
-                                .getBytes(StandardCharsets.UTF_8));
-                } else if ( metadata instanceof KeyAttributesMetadata ) {
-                    final KeyAttributesMetadata keyAttrs = (KeyAttributesMetadata)metadata;
-                    if ( keyAttrs.getKey().equals(FORMAT_LINE) ) {
-                        final List<KeyValue> kvs = keyAttrs.getValue();
-                        final int nKVs = kvs.size();
-                        if ( nKVs > 2 ) {
-                            final KeyValue kv0 = kvs.get(0);
-                            final KeyValue kv1 = kvs.get(1);
-                            final KeyValue kv2 = kvs.get(2);
-                            if ( kv0.getKey().equals(ID_KEY) && kv0.getValue().equals(EV_VALUE) ) {
-                                if ( kv1.getKey().equals(NUMBER_KEY) ) {
-                                    kvs.set(1, new KeyValue(NUMBER_KEY, MISSING_VALUE));
-                                }
-                                if ( kv2.getKey().equals(TYPE_KEY) ) {
-                                    kvs.set(2, new KeyValue(TYPE_KEY, STRING_VALUE));
-                                }
-                            }
-                        }
-                    }
-                }
-                metadata.write(os);
-            }
-            if ( sexForSample == null ) {
-                throw new RuntimeException("header line with sample names is missing.");
-            }
-            while ( parser.hasRecord() ) {
-                final Record record = parser.nextRecord();
-
-                // replace the numeric EV value with a text value
-                final int evIdx = record.getFormat().indexOf(EV_VALUE);
-                if ( evIdx >= 0 ) {
-                    for ( final CompoundField genotypeVals : record.getGenotypes() ) {
-                        genotypeVals.set(evIdx, EV_VALS[genotypeVals.get(evIdx).asInt()]);
-                    }
-                }
-
-                // move the SVTYPE to the ALT field (except for MEs)
-                final InfoField info = record.getInfo();
-                final ByteSequence svType = info.get(SVTYPE_KEY);
-                if ( !record.getAlt().contains(ME_VALUE) ) {
-                    if ( svType != null ) {
-                        record.setAlt(new ByteSequence(LT_VALUE, svType, GT_VALUE));
-                    }
-                }
-                record.setRef(N_VALUE);
-
-                // move varGQ info field to quality column
-                final ByteSequence varGQ = info.get(VARGQ_KEY);
-                if ( varGQ != null ) {
-                    record.setQuality(varGQ);
-                    info.remove(VARGQ_KEY);
-                }
-
-                // remove MULTIALLELIC flag, if present
-                info.remove(MULTIALLELIC_KEY);
-
-                // remove UNRESOLVED flag and add it as a filter
-                if ( info.containsKey(UNRESOLVED_KEY) ) {
-                    record.getFilter().add(UNRESOLVED_KEY);
-                    info.remove(UNRESOLVED_KEY);
-                }
-
-                // mark noisy events
-                if ( noisyEvents.contains(record.getID()) ) {
-                    record.getInfo().put(HIGH_SR_BACKGROUND_KEY, null);
-                }
-
-                // mark bothsides support
-                if ( bothsidesSupportEvents.contains(record.getID()) ) {
-                    record.getInfo().put(BOTHSIDES_SUPPORT_KEY, null);
-                }
-
-                // fix genotypes on allosomes
-                final boolean isY;
-                if ( (isY = yChrName.equals(record.getChromosome())) ||
-                        xChrName.equals(record.getChromosome())) {
-                    final List<CompoundField> genotypes = record.getGenotypes();
-                    final int rdCNIndex = record.getFormat().indexOf(RDCN_VALUE);
-                    final ByteSequence end = info.get(END_KEY);
-                    boolean adjustMale = false;
-                    final boolean isDel;
-                    if ( ((isDel = DEL_VALUE.equals(svType)) || DUP_VALUE.equals(svType)) && rdCNIndex >= 0 && end != null &&
-                            end.asInt() + 1 - record.getPosition() > MIN_ALLOSOME_EVENT_SIZE ) {
-                        adjustMale = isRevisableEvent(genotypes, rdCNIndex, sexForSample, isY);
-                        if ( adjustMale ) {
-                            record.getID().write(osRevEvents);
-                            osRevEvents.write('\n');
-                        }
-                    }
-                    CompoundField emptyGenotype = null;
-                    final int nSamples = genotypes.size();
-                    for ( int sampleIdx = 0; sampleIdx < nSamples; ++sampleIdx ) {
-                        final int sampleSex = sexForSample[sampleIdx];
-                        final CompoundField genotype = genotypes.get(sampleIdx);
-                        if ( sampleSex == 1 ) {
-                            if ( adjustMale ) {
-                                final ByteSequence rdCN = genotype.get(rdCNIndex);
-                                if ( rdCN.equals(MISSING_VALUE) ) {
-                                    continue;
-                                }
-                                final int rdCNVal = rdCN.asInt();
-                                genotype.set(rdCNIndex, new ByteSequence(Integer.toString(rdCNVal + 1)));
-                                if ( isDel ) {
-                                    if ( rdCNVal >= 1 ) genotype.set(0, GT_REF_REF);
-                                    else if ( rdCNVal == 0 ) genotype.set(0, GT_REF_ALT);
-                                } else {
-                                    if ( rdCNVal <= 1 ) genotype.set(0, GT_REF_REF);
-                                    else if ( rdCNVal == 2 ) genotype.set(0, GT_REF_ALT);
-                                    else genotype.set(0, GT_ALT_ALT);
-                                }
-                            }
-                        } else if ( sampleSex == 2 ) {
-                            if ( isY ) {
-                                if ( emptyGenotype == null ) {
-                                    emptyGenotype = new CompoundField(MISSING_GENOTYPE, ':');
-                                    int nFields = genotype.size();
-                                    while ( --nFields > 0 ) {
-                                        emptyGenotype.add(MISSING_VALUE);
-                                    }
-                                    emptyGenotype.getValue(); // performance hack to put the pieces together
-                                }
-                                genotypes.set(sampleIdx, emptyGenotype);
-                            }
-                        } else {
-                            genotype.set(0, MISSING_GENOTYPE);
-                        }
-                    }
-                }
-
-                record.write(os);
-            }
-        } catch ( final IOException ioe ) {
-            throw new RuntimeException("Can't write to stdout", ioe);
-        }
-    }
-
-    private static boolean isRevisableEvent( final List<CompoundField> genotypes,
-                                             final int rdCNIndex,
-                                             final int[] sexForColumn,
-                                             final boolean isY ) {
-        // We're going to calculate the median rdCN values for males and females.
-        // We only care if the median is 0, 1, 2, or something larger, so we'll use 4 bins to
-        // sum up the counts:  all values >2 go into the last bucket.
-        final int[] maleCounts = new int[4];
-        final int[] femaleCounts = new int[4];
-        final int nSamples = genotypes.size();
-        for ( int sampleIdx = 0; sampleIdx < nSamples; ++sampleIdx ) {
-            final ByteSequence rdCN = genotypes.get(sampleIdx).get(rdCNIndex);
-            if ( MISSING_VALUE.equals(rdCN) ) {
-                continue;
-            }
-            int rdCNVal = rdCN.asInt();
-            if ( rdCNVal > 2 ) {
-                rdCNVal = 3;
-            }
-            final int sampleSex = sexForColumn[sampleIdx];
-            if ( sampleSex == 1 ) {
-                maleCounts[rdCNVal] += 1;
-            } else if ( sampleSex == 2 ) {
-                femaleCounts[rdCNVal] += 1;
-            }
-        }
-        final double maleMedian = calcMedian(maleCounts);
-        double femaleMedian = calcMedian(femaleCounts);
-        return maleMedian == 1. && (isY ? femaleMedian == 0. : femaleMedian == 2.);
-    }
-
-    // visible for testing
-    static double calcMedian( final int[] counts ) {
-        final double target = (counts[0] + counts[1] + counts[2] + counts[3]) / 2.;
-        if ( target == 0. ) {
-            return Double.NaN;
-        }
-        int total = 0;
-        for ( int iii = 0; iii < 4; ++iii ) {
-            total += counts[iii];
-            if ( total == target ) {
-                return iii + .5;
-            } else if ( total > target ) {
-                return (double)iii;
-            }
-        }
-        throw new IllegalStateException("we should never reach this statement");
-    }
-
-    private static Set<ByteSequence> readLastColumn( final String filename ) {
-        final Set<ByteSequence> values = new HashSet<>();
-        try {
-            final BufferedReader neRdr =
-                    new BufferedReader(new InputStreamReader(new FileInputStream(filename)));
-            String line;
-            while ( (line = neRdr.readLine()) != null ) {
-                final String lastCol = line.substring(line.lastIndexOf('\t') + 1);
-                values.add(new ByteSequence(lastCol));
-            }
-        } catch ( final IOException ioe ) {
-            throw new RuntimeException("can't read table file " + filename);
-        }
-        return values;
-    }
-
-    private static int[] readPedFile( final String pedFilename, List<ByteSequence> sampleNames ) {
-        final int nCols = sampleNames.size() - 9;
-        final Map<ByteSequence, Integer> sexForSampleMap = new HashMap<>(2*nCols);
-        final int[] sexForSample = new int[nCols];
-        try {
-            final BufferedReader pedRdr =
-                    new BufferedReader(new InputStreamReader(new FileInputStream(pedFilename)));
-            final Pattern tabPattern = Pattern.compile("\\t");
-            String line;
-            while ( (line = pedRdr.readLine()) != null ) {
-                if ( line.startsWith("#") ) continue;
-                final Scanner scanner = new Scanner(line).useDelimiter(tabPattern);
-                scanner.next(); // family ignored
-                final String sampleName = scanner.next();
-                scanner.next(); // mom ignored
-                scanner.next(); // pop ignored
-                final int sex = scanner.nextInt();
-                sexForSampleMap.put(new ByteSequence(sampleName), sex);
-            }
-        } catch ( final IOException ioe ) {
-            throw new RuntimeException("can't read " + pedFilename, ioe);
-        }
-        for ( int col = 0; col < nCols; ++col ) {
-            final ByteSequence sampleName = sampleNames.get(col + 9);
-            final Integer sex = sexForSampleMap.get(sampleName);
-            if ( sex == null ) {
-                throw new RuntimeException("can't determine sex for sample " + sampleName);
-            }
-            sexForSample[col] = sex;
-        }
-        return sexForSample;
-    }
-}
diff --git a/src/sv-pipeline/java/org/broadinstitute/svpipeline/CleanVCFPart1UnitTest.java b/src/sv-pipeline/java/org/broadinstitute/svpipeline/CleanVCFPart1UnitTest.java
deleted file mode 100644
index 77a6b5658..000000000
--- a/src/sv-pipeline/java/org/broadinstitute/svpipeline/CleanVCFPart1UnitTest.java
+++ /dev/null
@@ -1,40 +0,0 @@
-package org.broadinstitute.svpipeline;
-
-public class CleanVCFPart1UnitTest {
-    public static void main( final String[] args ) {
-        testAsserts();
-        testMedianCalculation();
-        System.out.println("OK");
-    }
-
-    public static void testAsserts() {
-        boolean caughtIt = false;
-        try {
-            assert(false);
-        } catch ( final AssertionError ae ) {
-            caughtIt = true;
-        }
-        if ( !caughtIt ) {
-            throw new AssertionError("assertions aren't turned on, so you're not testing anything.");
-        }
-    }
-
-    public static void testMedianCalculation() {
-        final int[] counts = new int[4];
-        assert(Double.isNaN(CleanVCFPart1.calcMedian(counts)));
-        counts[0] = 1;
-        assert(CleanVCFPart1.calcMedian(counts) == 0.0);
-        counts[1] = 1;
-        assert(CleanVCFPart1.calcMedian(counts) == 0.5);
-        counts[2] = 1;
-        assert(CleanVCFPart1.calcMedian(counts) == 1.0);
-        counts[3] = 1;
-        assert(CleanVCFPart1.calcMedian(counts) == 1.5);
-        counts[2] = 2;
-        assert(CleanVCFPart1.calcMedian(counts) == 2.0);
-        counts[3] = 4;
-        assert(CleanVCFPart1.calcMedian(counts) == 2.5);
-        counts[3] = 5;
-        assert(CleanVCFPart1.calcMedian(counts) == 3.0);
-    }
-}
diff --git a/wdl/CalcAF.wdl b/wdl/CalcAF.wdl
index cbc124e2a..064c3b28a 100644
--- a/wdl/CalcAF.wdl
+++ b/wdl/CalcAF.wdl
@@ -1,7 +1,6 @@
 version 1.0
 
 import "Structs.wdl"
-import "CleanVcf5.wdl" as cleanvcf5
 import "TasksMakeCohortVcf.wdl" as tmc
 
 workflow CalcAF {
diff --git a/wdl/CleanVcf.wdl b/wdl/CleanVcf.wdl
index ab228ccf7..42974547b 100644
--- a/wdl/CleanVcf.wdl
+++ b/wdl/CleanVcf.wdl
@@ -21,8 +21,6 @@ workflow CleanVcf {
     Int min_records_per_shard_step1
     Int samples_per_step2_shard
     Int? max_samples_per_shard_step3
-    Int clean_vcf1b_records_per_shard
-    Int clean_vcf5_records_per_shard
 
     File HERVK_reference
     File LINE1_reference
@@ -47,6 +45,7 @@ workflow CleanVcf {
     File? resolve_complex_merged_vcf
     File? genotype_complex_merged_vcf
 
+    String gatk_docker
     String linux_docker
     String sv_base_mini_docker
     String sv_pipeline_docker
@@ -60,28 +59,16 @@ workflow CleanVcf {
 
     # overrides for CleanVcfContig
     RuntimeAttr? runtime_override_clean_vcf_1a
+    RuntimeAttr? runtime_override_clean_vcf_1b
     RuntimeAttr? runtime_override_clean_vcf_2
     RuntimeAttr? runtime_override_clean_vcf_3
     RuntimeAttr? runtime_override_clean_vcf_4
-    RuntimeAttr? runtime_override_clean_vcf_5_scatter
-    RuntimeAttr? runtime_override_clean_vcf_5_make_cleangq
-    RuntimeAttr? runtime_override_clean_vcf_5_find_redundant_multiallelics
-    RuntimeAttr? runtime_override_clean_vcf_5_polish
+    RuntimeAttr? runtime_override_clean_vcf_5
     RuntimeAttr? runtime_override_stitch_fragmented_cnvs
     RuntimeAttr? runtime_override_final_cleanup
     RuntimeAttr? runtime_attr_format
     RuntimeAttr? runtime_override_rescue_me_dels
 
-    # Clean vcf 1b
-    RuntimeAttr? runtime_attr_override_subset_large_cnvs_1b
-    RuntimeAttr? runtime_attr_override_sort_bed_1b
-    RuntimeAttr? runtime_attr_override_intersect_bed_1b
-    RuntimeAttr? runtime_attr_override_build_dict_1b
-    RuntimeAttr? runtime_attr_override_scatter_1b
-    RuntimeAttr? runtime_attr_override_filter_vcf_1b
-    RuntimeAttr? runtime_override_concat_vcfs_1b
-    RuntimeAttr? runtime_override_cat_multi_cnvs_1b
-
     RuntimeAttr? runtime_override_preconcat_step1
     RuntimeAttr? runtime_override_hail_merge_step1
     RuntimeAttr? runtime_override_fix_header_step1
@@ -91,11 +78,8 @@ workflow CleanVcf {
     RuntimeAttr? runtime_override_fix_header_drc
 
     RuntimeAttr? runtime_override_split_vcf_to_clean
-    RuntimeAttr? runtime_override_combine_step_1_sex_chr_revisions
     RuntimeAttr? runtime_override_split_include_list
     RuntimeAttr? runtime_override_combine_clean_vcf_2
-    RuntimeAttr? runtime_override_combine_revised_4
-    RuntimeAttr? runtime_override_combine_multi_ids_4
     RuntimeAttr? runtime_override_drop_redundant_cnvs
     RuntimeAttr? runtime_override_combine_step_1_vcfs
     RuntimeAttr? runtime_override_sort_drop_redundant_cnvs
@@ -134,32 +118,26 @@ workflow CleanVcf {
         outlier_samples_list=outlier_samples_list,
         use_hail=use_hail,
         gcs_project=gcs_project,
-        clean_vcf1b_records_per_shard=clean_vcf1b_records_per_shard,
-        clean_vcf5_records_per_shard=clean_vcf5_records_per_shard,
         ploidy_table=CreatePloidyTableFromPed.out,
         HERVK_reference=HERVK_reference,
         LINE1_reference=LINE1_reference,
         chr_x=chr_x,
         chr_y=chr_y,
+        gatk_docker=gatk_docker,
         linux_docker=linux_docker,
         sv_base_mini_docker=sv_base_mini_docker,
         sv_pipeline_docker=sv_pipeline_docker,
         runtime_override_clean_vcf_1a=runtime_override_clean_vcf_1a,
+        runtime_override_clean_vcf_1b=runtime_override_clean_vcf_1b,
         runtime_override_clean_vcf_2=runtime_override_clean_vcf_2,
         runtime_override_clean_vcf_3=runtime_override_clean_vcf_3,
         runtime_override_clean_vcf_4=runtime_override_clean_vcf_4,
-        runtime_override_clean_vcf_5_scatter=runtime_override_clean_vcf_5_scatter,
-        runtime_override_clean_vcf_5_make_cleangq=runtime_override_clean_vcf_5_make_cleangq,
-        runtime_override_clean_vcf_5_find_redundant_multiallelics=runtime_override_clean_vcf_5_find_redundant_multiallelics,
-        runtime_override_clean_vcf_5_polish=runtime_override_clean_vcf_5_polish,
+        runtime_override_clean_vcf_5=runtime_override_clean_vcf_5,
         runtime_override_stitch_fragmented_cnvs=runtime_override_stitch_fragmented_cnvs,
         runtime_override_final_cleanup=runtime_override_final_cleanup,
         runtime_override_split_vcf_to_clean=runtime_override_split_vcf_to_clean,
-        runtime_override_combine_step_1_sex_chr_revisions=runtime_override_combine_step_1_sex_chr_revisions,
         runtime_override_split_include_list=runtime_override_split_include_list,
         runtime_override_combine_clean_vcf_2=runtime_override_combine_clean_vcf_2,
-        runtime_override_combine_revised_4=runtime_override_combine_revised_4,
-        runtime_override_combine_multi_ids_4=runtime_override_combine_multi_ids_4,
         runtime_override_preconcat_step1=runtime_override_preconcat_step1,
         runtime_override_hail_merge_step1=runtime_override_hail_merge_step1,
         runtime_override_fix_header_step1=runtime_override_fix_header_step1,
@@ -167,14 +145,8 @@ workflow CleanVcf {
         runtime_override_hail_merge_drc=runtime_override_hail_merge_drc,
         runtime_override_fix_header_drc=runtime_override_fix_header_drc,
         runtime_override_drop_redundant_cnvs=runtime_override_drop_redundant_cnvs,
-        runtime_attr_override_subset_large_cnvs_1b=runtime_attr_override_subset_large_cnvs_1b,
-        runtime_attr_override_sort_bed_1b=runtime_attr_override_sort_bed_1b,
-        runtime_attr_override_intersect_bed_1b=runtime_attr_override_intersect_bed_1b,
-        runtime_attr_override_build_dict_1b=runtime_attr_override_build_dict_1b,
-        runtime_attr_override_scatter_1b=runtime_attr_override_scatter_1b,
-        runtime_attr_override_filter_vcf_1b=runtime_attr_override_filter_vcf_1b,
-        runtime_override_concat_vcfs_1b=runtime_override_concat_vcfs_1b,
-        runtime_override_cat_multi_cnvs_1b=runtime_override_cat_multi_cnvs_1b,
+        runtime_override_combine_step_1_vcfs=runtime_override_combine_step_1_vcfs,
+        runtime_override_sort_drop_redundant_cnvs=runtime_override_sort_drop_redundant_cnvs,
         runtime_attr_format=runtime_attr_format,
         runtime_override_rescue_me_dels=runtime_override_rescue_me_dels
     }
diff --git a/wdl/CleanVcf1b.wdl b/wdl/CleanVcf1b.wdl
deleted file mode 100644
index 691d0591c..000000000
--- a/wdl/CleanVcf1b.wdl
+++ /dev/null
@@ -1,353 +0,0 @@
-version 1.0
-
-import "Structs.wdl"
-import "CleanVcf5.wdl" as CleanVcf5
-import "TasksMakeCohortVcf.wdl" as MiniTasks
-
-workflow CleanVcf1b {
-    input {
-        File intermediate_vcf
-        String prefix
-        Int records_per_shard
-
-        String sv_pipeline_docker
-        String sv_base_mini_docker
-
-        RuntimeAttr? runtime_attr_override_subset_large_cnvs
-        RuntimeAttr? runtime_attr_override_sort_bed
-        RuntimeAttr? runtime_attr_override_intersect_bed
-        RuntimeAttr? runtime_attr_override_build_dict
-        RuntimeAttr? runtime_attr_override_scatter
-        RuntimeAttr? runtime_attr_override_filter_vcf
-        RuntimeAttr? runtime_override_concat_vcfs
-        RuntimeAttr? runtime_override_cat_multi_cnvs
-    }
-
-    call SubsetLargeCNVs {
-        input:
-            vcf=intermediate_vcf,
-            prefix="~{prefix}.subset_large_cnvs",
-            sv_pipeline_docker=sv_pipeline_docker,
-            runtime_attr_override=runtime_attr_override_subset_large_cnvs
-    }
-
-    call Vcf2Bed {
-        input:
-            vcf=SubsetLargeCNVs.out,
-            prefix="~{prefix}.subset_large_cnvs",
-            sv_pipeline_docker=sv_pipeline_docker,
-            runtime_attr_override=runtime_attr_override_subset_large_cnvs
-    }
-
-    call SortBed {
-        input:
-            bed=Vcf2Bed.out,
-            prefix="~{prefix}.subset_large_cnvs.sorted",
-            sv_base_mini_docker=sv_base_mini_docker,
-            runtime_attr_override=runtime_attr_override_sort_bed
-    }
-
-    call BedtoolsIntersect {
-        input:
-            bed=SortBed.out,
-            prefix="~{prefix}.bedtools_intersect",
-            sv_base_mini_docker=sv_base_mini_docker,
-            runtime_attr_override=runtime_attr_override_intersect_bed
-    }
-
-    call BuildGenoNormalReviseDictionary {
-        input:
-            filtered_vcf=SubsetLargeCNVs.out,
-            intersect_bed=BedtoolsIntersect.out,
-            prefix="~{prefix}.geno_normal_revise",
-            sv_pipeline_docker=sv_pipeline_docker,
-            runtime_attr_override=runtime_attr_override_build_dict
-    }
-
-    call MiniTasks.ScatterVcf {
-        input:
-            vcf=intermediate_vcf,
-            records_per_shard=records_per_shard,
-            prefix="~{prefix}.scatter_vcf",
-            sv_pipeline_docker=sv_pipeline_docker,
-            runtime_attr_override=runtime_attr_override_scatter
-    }
-
-    scatter ( i in range(length(ScatterVcf.shards)) ) {
-        call FilterVcf {
-            input:
-                intermediate_vcf=ScatterVcf.shards[i],
-                dictionary_json_gz=BuildGenoNormalReviseDictionary.out,
-                prefix="~{prefix}.filter_vcf.shard_~{i}",
-                sv_pipeline_docker=sv_pipeline_docker,
-                runtime_attr_override=runtime_attr_override_filter_vcf
-        }
-    }
-
-    call MiniTasks.ConcatVcfs as ConcatCleanVcf1bShards {
-        input:
-            vcfs=FilterVcf.out,
-            naive=true,
-            sort_vcf_list=true,
-            outfile_prefix="~{prefix}.concat_vcfs",
-            sv_base_mini_docker=sv_pipeline_docker,
-            runtime_attr_override=runtime_override_concat_vcfs
-    }
-
-    call MiniTasks.CatUncompressedFiles as ConcatMultiCnvs  {
-        input:
-            shards=FilterVcf.multi_cnvs,
-            outfile_name="~{prefix}.multi.cnvs.txt",
-            sv_base_mini_docker=sv_base_mini_docker,
-            runtime_attr_override=runtime_override_cat_multi_cnvs
-    }
-
-    output {
-        File normal = ConcatCleanVcf1bShards.concat_vcf
-        File multi = ConcatMultiCnvs.outfile
-    }
-}
-
-task SubsetLargeCNVs {
-    input {
-        File vcf
-        String prefix
-        String sv_pipeline_docker
-        RuntimeAttr? runtime_attr_override
-    }
-
-    Float input_size = size(vcf, "GB")
-    RuntimeAttr runtime_default = object {
-                                      mem_gb: 3.75,
-                                      disk_gb: ceil(10.0 + input_size * 2.0),
-                                      cpu_cores: 1,
-                                      preemptible_tries: 3,
-                                      max_retries: 1,
-                                      boot_disk_gb: 10
-                                  }
-    RuntimeAttr runtime_override = select_first([runtime_attr_override, runtime_default])
-    runtime {
-        memory: "~{select_first([runtime_override.mem_gb, runtime_default.mem_gb])} GB"
-        disks: "local-disk ~{select_first([runtime_override.disk_gb, runtime_default.disk_gb])} HDD"
-        cpu: select_first([runtime_override.cpu_cores, runtime_default.cpu_cores])
-        preemptible: select_first([runtime_override.preemptible_tries, runtime_default.preemptible_tries])
-        maxRetries: select_first([runtime_override.max_retries, runtime_default.max_retries])
-        docker: sv_pipeline_docker
-        bootDiskSizeGb: select_first([runtime_override.boot_disk_gb, runtime_default.boot_disk_gb])
-    }
-
-    command <<<
-        set -euo pipefail
-        bcftools view --no-version \
-            -i '(INFO/SVTYPE=="DEL" || INFO/SVTYPE=="DUP") && INFO/SVLEN>=5000' \
-            ~{vcf} \
-            | bgzip \
-            > ~{prefix}.vcf.gz
-    >>>
-    output {
-        File out = "~{prefix}.vcf.gz"
-    }
-}
-
-task Vcf2Bed {
-    input {
-        File vcf
-        String prefix
-        String sv_pipeline_docker
-        RuntimeAttr? runtime_attr_override
-    }
-
-    Float input_size = size(vcf, "GB")
-    RuntimeAttr runtime_default = object {
-                                      mem_gb: 3.75,
-                                      disk_gb: ceil(10.0 + input_size * 2.0),
-                                      cpu_cores: 1,
-                                      preemptible_tries: 3,
-                                      max_retries: 1,
-                                      boot_disk_gb: 10
-                                  }
-    RuntimeAttr runtime_override = select_first([runtime_attr_override, runtime_default])
-    runtime {
-        memory: "~{select_first([runtime_override.mem_gb, runtime_default.mem_gb])} GB"
-        disks: "local-disk ~{select_first([runtime_override.disk_gb, runtime_default.disk_gb])} HDD"
-        cpu: select_first([runtime_override.cpu_cores, runtime_default.cpu_cores])
-        preemptible: select_first([runtime_override.preemptible_tries, runtime_default.preemptible_tries])
-        maxRetries: select_first([runtime_override.max_retries, runtime_default.max_retries])
-        docker: sv_pipeline_docker
-        bootDiskSizeGb: select_first([runtime_override.boot_disk_gb, runtime_default.boot_disk_gb])
-    }
-
-    command <<<
-        set -euo pipefail
-        svtk vcf2bed --no-header ~{vcf} stdout \
-            | awk -F'\t' -v OFS='\t' '{if ($6=="") $6="blanksample";print $0}' \
-            | gzip -1 \
-            > ~{prefix}.bed.gz
-    >>>
-    output {
-        File out = "~{prefix}.bed.gz"
-    }
-}
-
-task SortBed {
-    input {
-        File bed
-        String prefix
-        String sv_base_mini_docker
-        RuntimeAttr? runtime_attr_override
-    }
-
-    Float input_size = size(bed, "GB")
-    RuntimeAttr runtime_default = object {
-                                      mem_gb: 3.75,
-                                      disk_gb: ceil(10.0 + input_size * 10.0),
-                                      cpu_cores: 1,
-                                      preemptible_tries: 3,
-                                      max_retries: 1,
-                                      boot_disk_gb: 10
-                                  }
-    RuntimeAttr runtime_override = select_first([runtime_attr_override, runtime_default])
-    runtime {
-        memory: "~{select_first([runtime_override.mem_gb, runtime_default.mem_gb])} GB"
-        disks: "local-disk ~{select_first([runtime_override.disk_gb, runtime_default.disk_gb])} HDD"
-        cpu: select_first([runtime_override.cpu_cores, runtime_default.cpu_cores])
-        preemptible: select_first([runtime_override.preemptible_tries, runtime_default.preemptible_tries])
-        maxRetries: select_first([runtime_override.max_retries, runtime_default.max_retries])
-        docker: sv_base_mini_docker
-        bootDiskSizeGb: select_first([runtime_override.boot_disk_gb, runtime_default.boot_disk_gb])
-    }
-
-    command <<<
-        set -euo pipefail
-        mkdir tmp
-        zcat ~{bed} \
-            | sort -T tmp -k1,1 -k2,2n \
-            | gzip -1 \
-            > ~{prefix}.bed.gz
-    >>>
-    output {
-        File out = "~{prefix}.bed.gz"
-    }
-}
-
-task BedtoolsIntersect {
-    input {
-        File bed
-        String prefix
-        String sv_base_mini_docker
-        RuntimeAttr? runtime_attr_override
-    }
-
-    Float input_size = size(bed, "GB")
-    RuntimeAttr runtime_default = object {
-                                      mem_gb: 3.75,
-                                      disk_gb: ceil(10.0 + input_size * 10.0),
-                                      cpu_cores: 1,
-                                      preemptible_tries: 3,
-                                      max_retries: 1,
-                                      boot_disk_gb: 10
-                                  }
-    RuntimeAttr runtime_override = select_first([runtime_attr_override, runtime_default])
-    runtime {
-        memory: "~{select_first([runtime_override.mem_gb, runtime_default.mem_gb])} GB"
-        disks: "local-disk ~{select_first([runtime_override.disk_gb, runtime_default.disk_gb])} HDD"
-        cpu: select_first([runtime_override.cpu_cores, runtime_default.cpu_cores])
-        preemptible: select_first([runtime_override.preemptible_tries, runtime_default.preemptible_tries])
-        maxRetries: select_first([runtime_override.max_retries, runtime_default.max_retries])
-        docker: sv_base_mini_docker
-        bootDiskSizeGb: select_first([runtime_override.boot_disk_gb, runtime_default.boot_disk_gb])
-    }
-
-    command <<<
-        set -euo pipefail
-        bedtools intersect -sorted -wa -wb -a <(zcat ~{bed}) -b <(zcat ~{bed}) \
-            | awk -F'\t' -v OFS='\t' '$4!=$10 && $5!=$11' \
-            | gzip -1 \
-            > ~{prefix}.bed.gz
-    >>>
-    output {
-        File out = "~{prefix}.bed.gz"
-    }
-}
-
-task BuildGenoNormalReviseDictionary {
-    input {
-        File filtered_vcf
-        File intersect_bed
-        String prefix
-        String sv_pipeline_docker
-        RuntimeAttr? runtime_attr_override
-    }
-
-    Float input_size = size([filtered_vcf, intersect_bed], "GB")
-    RuntimeAttr runtime_default = object {
-                                      mem_gb: 3.75,
-                                      disk_gb: ceil(10.0 + input_size * 2.0),
-                                      cpu_cores: 1,
-                                      preemptible_tries: 3,
-                                      max_retries: 1,
-                                      boot_disk_gb: 10
-                                  }
-    RuntimeAttr runtime_override = select_first([runtime_attr_override, runtime_default])
-    runtime {
-        memory: "~{select_first([runtime_override.mem_gb, runtime_default.mem_gb])} GB"
-        disks: "local-disk ~{select_first([runtime_override.disk_gb, runtime_default.disk_gb])} HDD"
-        cpu: select_first([runtime_override.cpu_cores, runtime_default.cpu_cores])
-        preemptible: select_first([runtime_override.preemptible_tries, runtime_default.preemptible_tries])
-        maxRetries: select_first([runtime_override.max_retries, runtime_default.max_retries])
-        docker: sv_pipeline_docker
-        bootDiskSizeGb: select_first([runtime_override.boot_disk_gb, runtime_default.boot_disk_gb])
-    }
-
-    command <<<
-        set -euo pipefail
-        python /opt/sv-pipeline/04_variant_resolution/scripts/clean_vcf_part1b_build_dict.py ~{filtered_vcf} ~{intersect_bed} \
-            | gzip -1 \
-            > ~{prefix}.json.gz
-    >>>
-    output {
-        File out = "~{prefix}.json.gz"
-    }
-}
-
-task FilterVcf {
-    input {
-        File intermediate_vcf
-        File dictionary_json_gz
-        String prefix
-        String sv_pipeline_docker
-        RuntimeAttr? runtime_attr_override
-    }
-
-    Float input_size = size([intermediate_vcf, dictionary_json_gz], "GB")
-    RuntimeAttr runtime_default = object {
-                                      mem_gb: 3.75,
-                                      disk_gb: ceil(10.0 + input_size * 2.0),
-                                      cpu_cores: 1,
-                                      preemptible_tries: 3,
-                                      max_retries: 1,
-                                      boot_disk_gb: 10
-                                  }
-    RuntimeAttr runtime_override = select_first([runtime_attr_override, runtime_default])
-    runtime {
-        memory: "~{select_first([runtime_override.mem_gb, runtime_default.mem_gb])} GB"
-        disks: "local-disk ~{select_first([runtime_override.disk_gb, runtime_default.disk_gb])} HDD"
-        cpu: select_first([runtime_override.cpu_cores, runtime_default.cpu_cores])
-        preemptible: select_first([runtime_override.preemptible_tries, runtime_default.preemptible_tries])
-        maxRetries: select_first([runtime_override.max_retries, runtime_default.max_retries])
-        docker: sv_pipeline_docker
-        bootDiskSizeGb: select_first([runtime_override.boot_disk_gb, runtime_default.boot_disk_gb])
-    }
-
-    command <<<
-        set -euo pipefail
-        python /opt/sv-pipeline/04_variant_resolution/scripts/clean_vcf_part1b_filter.py ~{dictionary_json_gz} ~{intermediate_vcf} \
-            | bgzip \
-            > ~{prefix}.vcf.gz
-        mv multi.cnvs.txt ~{prefix}.multi.cnvs.txt
-    >>>
-    output {
-        File out = "~{prefix}.vcf.gz"
-        File multi_cnvs = "~{prefix}.multi.cnvs.txt"
-    }
-}
diff --git a/wdl/CleanVcf5.wdl b/wdl/CleanVcf5.wdl
deleted file mode 100644
index f9be79029..000000000
--- a/wdl/CleanVcf5.wdl
+++ /dev/null
@@ -1,273 +0,0 @@
-version 1.0
-
-import "Structs.wdl"
-import "TasksMakeCohortVcf.wdl" as tasks
-
-workflow CleanVcf5 {
-    input {
-        File normal_revise_vcf
-        File revise_vcf_lines
-        File ped_file
-        File sex_chr_revise
-        File multi_ids
-        File? outlier_samples_list
-
-        String prefix
-        String contig
-        Int records_per_shard
-
-        File? make_clean_gq_script
-        File? find_redundant_sites_script
-
-        String sv_base_mini_docker
-        String sv_pipeline_docker
-
-        Int? threads_per_task
-        RuntimeAttr? runtime_attr_override_scatter
-        RuntimeAttr? runtime_attr_override_make_cleangq
-        RuntimeAttr? runtime_attr_override_find_redundant_multiallelics
-        RuntimeAttr? runtime_attr_override_polish
-    }
-
-    call tasks.ScatterVcf {
-        input:
-            vcf=normal_revise_vcf,
-            records_per_shard = records_per_shard,
-            prefix = "~{prefix}.scatter_vcf",
-            sv_pipeline_docker=sv_pipeline_docker,
-            runtime_attr_override=runtime_attr_override_scatter
-    }
-
-    scatter ( i in range(length(ScatterVcf.shards)) ) {
-        call MakeCleanGQ {
-            input:
-                revise_vcf_lines=revise_vcf_lines,
-                normal_revise_vcf=ScatterVcf.shards[i],
-                ped_file=ped_file,
-                sex_chr_revise=sex_chr_revise,
-                multi_ids=multi_ids,
-                outlier_samples_list=outlier_samples_list,
-                make_clean_gq_script=make_clean_gq_script,
-                prefix="~{prefix}.make_clean_gq.shard_~{i}",
-                sv_pipeline_docker=sv_pipeline_docker,
-                runtime_attr_override=runtime_attr_override_make_cleangq
-        }
-    }
-
-    call FindRedundantMultiallelics {
-        input:
-            multiallelic_vcfs=MakeCleanGQ.multiallelic_vcf,
-            find_redundant_sites_script=find_redundant_sites_script,
-            prefix="~{prefix}.find_redundant_multiallelics",
-            sv_pipeline_docker=sv_pipeline_docker,
-            runtime_attr_override=runtime_attr_override_find_redundant_multiallelics
-    }
-
-    call Polish {
-        input:
-            clean_gq_vcfs=MakeCleanGQ.clean_gq_vcf,
-            no_sample_lists=MakeCleanGQ.no_sample_list,
-            redundant_multiallelics_list=FindRedundantMultiallelics.redundant_multiallelics_list,
-            prefix="~{prefix}.polish",
-            sv_pipeline_docker=sv_pipeline_docker,
-            runtime_attr_override=runtime_attr_override_polish
-    }
-
-    output {
-        File polished=Polish.polished
-    }
-}
-
-task MakeCleanGQ {
-    input {
-        File revise_vcf_lines
-        File normal_revise_vcf
-        File ped_file
-        File sex_chr_revise
-        File multi_ids
-        File? outlier_samples_list
-        File? make_clean_gq_script
-        String prefix
-        Int? threads = 2
-        String sv_pipeline_docker
-        RuntimeAttr? runtime_attr_override
-    }
-
-    # generally assume working disk size is ~2 * inputs, and outputs are ~2 *inputs, and inputs are not removed
-    # generally assume working memory is ~3 * inputs
-    Float input_size = size(
-                       select_all([revise_vcf_lines, normal_revise_vcf, ped_file, sex_chr_revise, multi_ids, outlier_samples_list]),
-                       "GB")
-    Float base_disk_gb = 10.0
-
-    RuntimeAttr runtime_default = object {
-                                      mem_gb: 16,
-                                      disk_gb: ceil(base_disk_gb + input_size * 5.0),
-                                      cpu_cores: 1,
-                                      preemptible_tries: 3,
-                                      max_retries: 1,
-                                      boot_disk_gb: 10
-                                  }
-    RuntimeAttr runtime_override = select_first([runtime_attr_override, runtime_default])
-    runtime {
-        memory: "~{select_first([runtime_override.mem_gb, runtime_default.mem_gb])} GB"
-        disks: "local-disk ~{select_first([runtime_override.disk_gb, runtime_default.disk_gb])} HDD"
-        cpu: select_first([runtime_override.cpu_cores, runtime_default.cpu_cores])
-        preemptible: select_first([runtime_override.preemptible_tries, runtime_default.preemptible_tries])
-        maxRetries: select_first([runtime_override.max_retries, runtime_default.max_retries])
-        docker: sv_pipeline_docker
-        bootDiskSizeGb: select_first([runtime_override.boot_disk_gb, runtime_default.boot_disk_gb])
-    }
-
-    command <<<
-        set -eu -o pipefail
-
-        ~{if defined(outlier_samples_list) then "ln ~{outlier_samples_list} outliers.txt" else "touch outliers.txt"}
-
-        # put the revise lines into a normal VCF format
-        bcftools view -h ~{normal_revise_vcf} > header.txt
-        cat header.txt <(zcat ~{revise_vcf_lines} | grep . | tr " " "\t") | bgzip -c > revise.vcf.lines.vcf.gz
-
-        python3 ~{default="/opt/sv-pipeline/04_variant_resolution/scripts/clean_vcf_part5_update_records.py" make_clean_gq_script} \
-            --threads_per_file ~{threads} \
-            revise.vcf.lines.vcf.gz \
-            ~{normal_revise_vcf} \
-            ~{ped_file} \
-            ~{sex_chr_revise} \
-            ~{multi_ids} \
-            outliers.txt \
-            ~{prefix}
-
-        bcftools view -G -O z ~{prefix}.multiallelic.vcf.gz > ~{prefix}.multiallelic.sites.vcf.gz
-        tabix ~{prefix}.cleanGQ.vcf.gz
-    >>>
-
-    output {
-        File clean_gq_vcf=prefix + ".cleanGQ.vcf.gz"
-        File clean_gq_vcf_idx=prefix + ".cleanGQ.vcf.gz.tbi"
-        File multiallelic_vcf=prefix + ".multiallelic.sites.vcf.gz"
-        File no_sample_list = prefix + ".no_called_samples.list"
-    }
-}
-
-task FindRedundantMultiallelics {
-    input {
-        Array[File] multiallelic_vcfs
-        File? find_redundant_sites_script
-        String prefix
-        String sv_pipeline_docker
-        RuntimeAttr? runtime_attr_override
-    }
-
-    # generally assume working disk size is ~2 * inputs, and outputs are ~2 *inputs, and inputs are not removed
-    # generally assume working memory is ~3 * inputs
-    Float input_size = size(multiallelic_vcfs, "GB")
-    Float base_disk_gb = 10.0
-
-    RuntimeAttr runtime_default = object {
-                                      mem_gb: 16,
-                                      disk_gb: ceil(base_disk_gb + input_size * 5.0),
-                                      cpu_cores: 1,
-                                      preemptible_tries: 3,
-                                      max_retries: 1,
-                                      boot_disk_gb: 10
-                                  }
-    RuntimeAttr runtime_override = select_first([runtime_attr_override, runtime_default])
-    runtime {
-        memory: "~{select_first([runtime_override.mem_gb, runtime_default.mem_gb])} GB"
-        disks: "local-disk ~{select_first([runtime_override.disk_gb, runtime_default.disk_gb])} HDD"
-        cpu: select_first([runtime_override.cpu_cores, runtime_default.cpu_cores])
-        preemptible: select_first([runtime_override.preemptible_tries, runtime_default.preemptible_tries])
-        maxRetries: select_first([runtime_override.max_retries, runtime_default.max_retries])
-        docker: sv_pipeline_docker
-        bootDiskSizeGb: select_first([runtime_override.boot_disk_gb, runtime_default.boot_disk_gb])
-    }
-
-    command <<<
-        set -euo pipefail
-        VCFS="~{write_lines(multiallelic_vcfs)}"
-        cat $VCFS | awk -F '/' '{print $NF"\t"$0}' | sort -k1,1V | awk '{print $2}' > vcfs_sorted.list
-        bcftools concat --no-version --output-type z --file-list vcfs_sorted.list --output multiallelic.vcf.gz
-
-        python3 ~{default="/opt/sv-pipeline/04_variant_resolution/scripts/clean_vcf_part5_find_redundant_multiallelics.py" find_redundant_sites_script} \
-            multiallelic.vcf.gz \
-            ~{prefix}.list
-
-    >>>
-
-    output {
-        File redundant_multiallelics_list="~{prefix}.list"
-    }
-}
-
-
-task Polish {
-    input {
-        Array[File] clean_gq_vcfs
-        Array[File] no_sample_lists
-        File redundant_multiallelics_list
-        String prefix
-        String sv_pipeline_docker
-        Int threads = 2
-        RuntimeAttr? runtime_attr_override
-    }
-
-    # generally assume working disk size is ~2 * inputs, and outputs are ~2 *inputs, and inputs are not removed
-    # generally assume working memory is ~3 * inputs
-    Float input_size = size(clean_gq_vcfs, "GB")
-    Float base_disk_gb = 10.0
-
-    RuntimeAttr runtime_default = object {
-                                      mem_gb: 16,
-                                      disk_gb: ceil(base_disk_gb + input_size * 5.0),
-                                      cpu_cores: 4,
-                                      preemptible_tries: 3,
-                                      max_retries: 1,
-                                      boot_disk_gb: 10
-                                  }
-    RuntimeAttr runtime_override = select_first([runtime_attr_override, runtime_default])
-    runtime {
-        memory: "~{select_first([runtime_override.mem_gb, runtime_default.mem_gb])} GB"
-        disks: "local-disk ~{select_first([runtime_override.disk_gb, runtime_default.disk_gb])} HDD"
-        cpu: select_first([runtime_override.cpu_cores, runtime_default.cpu_cores])
-        preemptible: select_first([runtime_override.preemptible_tries, runtime_default.preemptible_tries])
-        maxRetries: select_first([runtime_override.max_retries, runtime_default.max_retries])
-        docker: sv_pipeline_docker
-        bootDiskSizeGb: select_first([runtime_override.boot_disk_gb, runtime_default.boot_disk_gb])
-    }
-
-    command <<<
-        set -euo pipefail
-
-        VCFS="~{write_lines(clean_gq_vcfs)}"
-        cat $VCFS | awk -F '/' '{print $NF"\t"$0}' | sort -k1,1V | awk '{print $2}' > vcfs_sorted.list
-        cat ~{redundant_multiallelics_list} ~{sep=" " no_sample_lists} > ids_to_remove.list
-        bcftools concat --no-version --output-type u --file-list vcfs_sorted.list | \
-            bcftools view --no-version \
-                --exclude 'ID=@ids_to_remove.list' \
-                --output-type z -o polished.need_reheader.vcf.gz --threads ~{threads}
-
-        # replace multiallelic genotypes for CNVs with homref
-        bcftools +setGT polished.need_reheader.vcf.gz -- \
-            -t q \
-            -n c:'1/1' \
-            -i '(INFO/SVTYPE="DEL" | INFO/SVTYPE="DUP") & (FMT/GT~"[2-9]" | FMT/GT~"[1-9][0-9]+") & FMT/RD_CN>3' > polished.need_reheader.regenotyped.vcf
-        
-        bgzip polished.need_reheader.regenotyped.vcf
-        
-        # do the last bit of header cleanup
-        bcftools view -h polished.need_reheader.regenotyped.vcf.gz > original_header.vcf
-        cat original_header.vcf | fgrep '##fileformat' > new_header.vcf
-        cat original_header.vcf \
-            | egrep -v "CIPOS|CIEND|RMSSTD|EVENT|INFO=<ID=UNRESOLVED,|source|varGQ|bcftools|ALT=<ID=UNR|INFO=<ID=MULTIALLELIC|GATKCommandLine|#CHROM|##contig|##fileformat" \
-            | sort >> new_header.vcf
-        # Don't sort contigs lexicographically, which would result in incorrect chr1, chr10, chr11, ... ordering
-        cat original_header.vcf | fgrep '##contig' >> new_header.vcf
-        cat original_header.vcf | fgrep '#CHROM' >> new_header.vcf
-        bcftools reheader polished.need_reheader.regenotyped.vcf.gz -h new_header.vcf -o ~{prefix}.vcf.gz
-    >>>
-
-    output {
-        File polished="~{prefix}.vcf.gz"
-    }
-}
diff --git a/wdl/CleanVcfChromosome.wdl b/wdl/CleanVcfChromosome.wdl
index 5800be909..69a1e068e 100644
--- a/wdl/CleanVcfChromosome.wdl
+++ b/wdl/CleanVcfChromosome.wdl
@@ -3,294 +3,182 @@ version 1.0
 import "Structs.wdl"
 import "TasksMakeCohortVcf.wdl" as MiniTasks
 import "FormatVcfForGatk.wdl" as fvcf
-import "CleanVcf1b.wdl" as c1b
-import "CleanVcf5.wdl" as c5
 import "HailMerge.wdl" as HailMerge
 
 workflow CleanVcfChromosome {
-  input {
-    File vcf
-    String contig
-    File background_list
-    File ped_file
-    File allosome_fai
-    String prefix
-    Int max_shards_per_chrom_step1
-    File bothsides_pass_list
-    Int min_records_per_shard_step1
-    Int samples_per_step2_shard
-    Int clean_vcf1b_records_per_shard
-    Int clean_vcf5_records_per_shard
-    Int? clean_vcf5_threads_per_task
-    File? outlier_samples_list
-    Int? max_samples_per_shard_step3
-
-    File HERVK_reference
-    File LINE1_reference
-
-    File ploidy_table
-    String chr_x
-    String chr_y
-
-    File? svtk_to_gatk_script  # For debugging
-
-    Boolean use_hail
-    String? gcs_project
-
-    String linux_docker
-    String sv_base_mini_docker
-    String sv_pipeline_docker
-
-    # overrides for local tasks
-    RuntimeAttr? runtime_override_clean_vcf_1a
-    RuntimeAttr? runtime_override_clean_vcf_2
-    RuntimeAttr? runtime_override_clean_vcf_3
-    RuntimeAttr? runtime_override_clean_vcf_4
-    RuntimeAttr? runtime_override_clean_vcf_5_scatter
-    RuntimeAttr? runtime_override_clean_vcf_5_make_cleangq
-    RuntimeAttr? runtime_override_clean_vcf_5_find_redundant_multiallelics
-    RuntimeAttr? runtime_override_clean_vcf_5_polish
-    RuntimeAttr? runtime_override_stitch_fragmented_cnvs
-    RuntimeAttr? runtime_override_final_cleanup
-    RuntimeAttr? runtime_override_rescue_me_dels
-    RuntimeAttr? runtime_attr_add_high_fp_rate_filters
-
-    # Clean vcf 1b
-    RuntimeAttr? runtime_attr_override_subset_large_cnvs_1b
-    RuntimeAttr? runtime_attr_override_sort_bed_1b
-    RuntimeAttr? runtime_attr_override_intersect_bed_1b
-    RuntimeAttr? runtime_attr_override_build_dict_1b
-    RuntimeAttr? runtime_attr_override_scatter_1b
-    RuntimeAttr? runtime_attr_override_filter_vcf_1b
-    RuntimeAttr? runtime_override_concat_vcfs_1b
-    RuntimeAttr? runtime_override_cat_multi_cnvs_1b
-
-    RuntimeAttr? runtime_override_preconcat_step1
-    RuntimeAttr? runtime_override_hail_merge_step1
-    RuntimeAttr? runtime_override_fix_header_step1
-
-    RuntimeAttr? runtime_override_preconcat_drc
-    RuntimeAttr? runtime_override_hail_merge_drc
-    RuntimeAttr? runtime_override_fix_header_drc
-
-    # overrides for MiniTasks
-    RuntimeAttr? runtime_override_split_vcf_to_clean
-    RuntimeAttr? runtime_override_combine_step_1_sex_chr_revisions
-    RuntimeAttr? runtime_override_split_include_list
-    RuntimeAttr? runtime_override_combine_clean_vcf_2
-    RuntimeAttr? runtime_override_combine_revised_4
-    RuntimeAttr? runtime_override_combine_multi_ids_4
-    RuntimeAttr? runtime_override_drop_redundant_cnvs
-    RuntimeAttr? runtime_override_combine_step_1_vcfs
-    RuntimeAttr? runtime_override_sort_drop_redundant_cnvs
-    RuntimeAttr? runtime_attr_format
-
-  }
-
-  call MiniTasks.SplitVcf as SplitVcfToClean {
-    input:
-      vcf=vcf,
-      contig=contig,
-      prefix="~{prefix}.shard_",
-      n_shards=max_shards_per_chrom_step1,
-      min_vars_per_shard=min_records_per_shard_step1,
-      sv_base_mini_docker=sv_base_mini_docker,
-      runtime_attr_override=runtime_override_split_vcf_to_clean
-  }
-
-  scatter ( i in range(length(SplitVcfToClean.vcf_shards)) ) {
-    call CleanVcf1a {
-      input:
-        vcf=SplitVcfToClean.vcf_shards[i],
-        prefix="~{prefix}.clean_vcf_1.shard_~{i}",
-        background_fail_list=background_list,
-        bothsides_pass_list=bothsides_pass_list,
-        ped_file=ped_file,
-        allosome_fai=allosome_fai,
-        chr_x=chr_x,
-        chr_y=chr_y,
-        sv_pipeline_docker=sv_pipeline_docker,
-        runtime_attr_override=runtime_override_clean_vcf_1a
-    }
-  }
-
-  if (use_hail) {
-    call HailMerge.HailMerge as CombineStep1VcfsHail {
-      input:
-        vcfs=CleanVcf1a.intermediate_vcf,
-        prefix="~{prefix}.combine_step_1_vcfs",
-        gcs_project=gcs_project,
-        sv_base_mini_docker=sv_base_mini_docker,
-        sv_pipeline_docker=sv_pipeline_docker,
-        runtime_override_preconcat=runtime_override_preconcat_step1,
-        runtime_override_hail_merge=runtime_override_hail_merge_step1,
-        runtime_override_fix_header=runtime_override_fix_header_step1
-    }
-  }
-  if (!use_hail) {
-    call MiniTasks.ConcatVcfs as CombineStep1Vcfs {
-      input:
-        vcfs=CleanVcf1a.intermediate_vcf,
-        vcfs_idx=CleanVcf1a.intermediate_vcf_idx,
-        naive=true,
-        generate_index=false,
-        outfile_prefix="~{prefix}.combine_step_1_vcfs",
-        sv_base_mini_docker=sv_base_mini_docker,
-        runtime_attr_override=runtime_override_combine_step_1_vcfs
-    }
-  }
-
-  call MiniTasks.CatUncompressedFiles as CombineStep1SexChrRevisions {
-    input:
-      shards=CleanVcf1a.sex,
-      outfile_name="~{prefix}.combine_step_1_sex_chr_revisions.txt",
-      sv_base_mini_docker=sv_base_mini_docker,
-      runtime_attr_override=runtime_override_combine_step_1_sex_chr_revisions
-  }
-
-  call c1b.CleanVcf1b {
-    input:
-      intermediate_vcf=select_first([CombineStep1Vcfs.concat_vcf, CombineStep1VcfsHail.merged_vcf]),
-      prefix="~{prefix}.clean_vcf_1b",
-      records_per_shard=clean_vcf1b_records_per_shard,
-      sv_pipeline_docker=sv_pipeline_docker,
-      sv_base_mini_docker=sv_base_mini_docker,
-      runtime_attr_override_subset_large_cnvs=runtime_attr_override_subset_large_cnvs_1b,
-      runtime_attr_override_sort_bed=runtime_attr_override_sort_bed_1b,
-      runtime_attr_override_intersect_bed=runtime_attr_override_intersect_bed_1b,
-      runtime_attr_override_build_dict=runtime_attr_override_build_dict_1b,
-      runtime_attr_override_scatter=runtime_attr_override_scatter_1b,
-      runtime_attr_override_filter_vcf=runtime_attr_override_filter_vcf_1b,
-      runtime_override_concat_vcfs=runtime_override_concat_vcfs_1b,
-      runtime_override_cat_multi_cnvs=runtime_override_cat_multi_cnvs_1b
-  }
-
-  call MiniTasks.SplitUncompressed as SplitIncludeList {
-    input:
-      whole_file=CleanVcf1a.include_list[0],
-      lines_per_shard=samples_per_step2_shard,
-      shard_prefix="~{prefix}.split_include_list.",
-      sv_pipeline_docker=sv_pipeline_docker,
-      runtime_attr_override=runtime_override_split_include_list
-  }
-
-  scatter ( i in range(length(SplitIncludeList.shards)) ){
-    call CleanVcf2 {
-      input:
-        normal_revise_vcf=CleanVcf1b.normal,
-        prefix="~{prefix}.clean_vcf_2.shard_~{i}",
-        include_list=SplitIncludeList.shards[i],
-        multi_cnvs=CleanVcf1b.multi,
-        sv_pipeline_docker=sv_pipeline_docker,
-        runtime_attr_override=runtime_override_clean_vcf_2
-      }
-  }
-
-  call MiniTasks.CatUncompressedFiles as CombineCleanVcf2 {
-    input:
-      shards=CleanVcf2.out,
-      outfile_name="~{prefix}.combine_clean_vcf_2.txt",
-      sv_base_mini_docker=sv_base_mini_docker,
-      runtime_attr_override=runtime_override_combine_clean_vcf_2
-  }
-
-  call CleanVcf3 {
-    input:
-      rd_cn_revise=CombineCleanVcf2.outfile,
-      max_samples_shard = max_samples_per_shard_step3,
-      sv_pipeline_docker=sv_pipeline_docker,
-      runtime_attr_override=runtime_override_clean_vcf_3
-  }
-
-  scatter ( i in range(length(CleanVcf3.shards)) ){
-    call CleanVcf4 {
-      input:
-        rd_cn_revise=CleanVcf3.shards[i],
-        normal_revise_vcf=CleanVcf1b.normal,
-        prefix="~{prefix}.clean_vcf_4.shard_~{i}",
-        sv_pipeline_docker=sv_pipeline_docker,
-        runtime_attr_override=runtime_override_clean_vcf_4
-    }
-  }
-
-  call MiniTasks.CatUncompressedFiles as CombineRevised4 {
-    input:
-      shards=CleanVcf4.out,
-      outfile_name="~{prefix}.combine_revised_4.txt.gz",
-      sv_base_mini_docker=sv_base_mini_docker,
-      runtime_attr_override=runtime_override_combine_revised_4
-  }
-
-  call MiniTasks.CatUncompressedFiles as CombineMultiIds4 {
-    input:
-      shards=CleanVcf4.multi_ids,
-      outfile_name="~{prefix}.combine_multi_ids_4.txt.gz",
-      sv_base_mini_docker=sv_base_mini_docker,
-      runtime_attr_override=runtime_override_combine_multi_ids_4
-  }
-
-  call c5.CleanVcf5 {
-    input:
-      revise_vcf_lines=CombineRevised4.outfile,
-      normal_revise_vcf=CleanVcf1b.normal,
-      ped_file=ped_file,
-      sex_chr_revise=CombineStep1SexChrRevisions.outfile,
-      multi_ids=CombineMultiIds4.outfile,
-      outlier_samples_list=outlier_samples_list,
-      contig=contig,
-      prefix="~{prefix}.clean_vcf_5",
-      records_per_shard=clean_vcf5_records_per_shard,
-      threads_per_task=clean_vcf5_threads_per_task,
-      sv_pipeline_docker=sv_pipeline_docker,
-      sv_base_mini_docker=sv_base_mini_docker,
-      runtime_attr_override_scatter=runtime_override_clean_vcf_5_scatter,
-      runtime_attr_override_make_cleangq=runtime_override_clean_vcf_5_make_cleangq,
-      runtime_attr_override_find_redundant_multiallelics=runtime_override_clean_vcf_5_find_redundant_multiallelics,
-      runtime_attr_override_polish=runtime_override_clean_vcf_5_polish
-  }
-
-  call DropRedundantCnvs {
-    input:
-      vcf=CleanVcf5.polished,
-      prefix="~{prefix}.drop_redundant_cnvs",
-      contig=contig,
-      sv_pipeline_docker=sv_pipeline_docker,
-      runtime_attr_override=runtime_override_drop_redundant_cnvs
-  }
-
-  if (use_hail) {
-    call HailMerge.HailMerge as SortDropRedundantCnvsHail {
-      input:
-        vcfs=[DropRedundantCnvs.out],
-        prefix="~{prefix}.drop_redundant_cnvs.sorted",
-        gcs_project=gcs_project,
-        reset_cnv_gts=true,
-        sv_base_mini_docker=sv_base_mini_docker,
-        sv_pipeline_docker=sv_pipeline_docker,
-        runtime_override_preconcat=runtime_override_preconcat_drc,
-        runtime_override_hail_merge=runtime_override_hail_merge_drc,
-        runtime_override_fix_header=runtime_override_fix_header_drc
-    }
-  }
-  if (!use_hail) {
-    call MiniTasks.SortVcf as SortDropRedundantCnvs {
-      input:
-        vcf=DropRedundantCnvs.out,
-        outfile_prefix="~{prefix}.drop_redundant_cnvs.sorted",
-        sv_base_mini_docker=sv_base_mini_docker,
-        runtime_attr_override=runtime_override_sort_drop_redundant_cnvs
-    }
-  }
-
-  call StitchFragmentedCnvs {
-    input:
-      vcf=select_first([SortDropRedundantCnvs.out, SortDropRedundantCnvsHail.merged_vcf]),
-      prefix="~{prefix}.stitch_fragmented_cnvs",
-      sv_pipeline_docker=sv_pipeline_docker,
-      runtime_attr_override=runtime_override_stitch_fragmented_cnvs
-  }
-
-  call RescueMobileElementDeletions {
+	input {
+		File vcf
+		String contig
+		File background_list
+		File bothsides_pass_list
+		File? outlier_samples_list
+		File ped_file
+		File allosome_fai
+		String prefix
+		
+		File HERVK_reference
+		File LINE1_reference
+
+		File ploidy_table
+		String chr_x
+		String chr_y
+
+		Boolean use_hail
+		String? gcs_project
+
+		String gatk_docker
+		String linux_docker
+		String sv_base_mini_docker
+		String sv_pipeline_docker
+
+		# overrides for local tasks
+		RuntimeAttr? runtime_attr_preprocess
+		RuntimeAttr? runtime_attr_revise_overlapping_cnv_gts
+		RuntimeAttr? runtime_attr_revise_overlapping_cnv_cns
+		RuntimeAttr? runtime_attr_revise_large_cnvs
+		RuntimeAttr? runtime_attr_revise_abnormal_allosomes
+		RuntimeAttr? runtime_attr_revise_multiallelics
+		RuntimeAttr? runtime_attr_postprocess
+
+		RuntimeAttr? runtime_override_clean_vcf_1a
+		RuntimeAttr? runtime_override_clean_vcf_1b
+		RuntimeAttr? runtime_override_clean_vcf_2
+		RuntimeAttr? runtime_override_clean_vcf_3
+		RuntimeAttr? runtime_override_clean_vcf_4
+		RuntimeAttr? runtime_override_clean_vcf_5
+		RuntimeAttr? runtime_override_stitch_fragmented_cnvs
+		RuntimeAttr? runtime_override_final_cleanup
+		RuntimeAttr? runtime_override_rescue_me_dels
+		RuntimeAttr? runtime_attr_add_high_fp_rate_filters
+
+		RuntimeAttr? runtime_override_preconcat_step1
+		RuntimeAttr? runtime_override_hail_merge_step1
+		RuntimeAttr? runtime_override_fix_header_step1
+
+		RuntimeAttr? runtime_override_preconcat_drc
+		RuntimeAttr? runtime_override_hail_merge_drc
+		RuntimeAttr? runtime_override_fix_header_drc
+
+		# overrides for MiniTasks
+		RuntimeAttr? runtime_override_split_vcf_to_clean
+		RuntimeAttr? runtime_override_split_include_list
+		RuntimeAttr? runtime_override_combine_clean_vcf_2
+		RuntimeAttr? runtime_override_drop_redundant_cnvs
+		RuntimeAttr? runtime_override_combine_step_1_vcfs
+		RuntimeAttr? runtime_override_sort_drop_redundant_cnvs
+		RuntimeAttr? runtime_attr_format
+	}
+
+	call fvcf.FormatVcf as FormatVcfToClean {
+		input:
+			vcf=vcf,
+			ploidy_table=ploidy_table,
+			output_prefix="~{prefix}.formatted",
+			sv_pipeline_docker=sv_pipeline_docker,
+			runtime_attr_override=runtime_attr_format
+	}
+
+	call CleanVcfPreprocess {
+		input:
+			vcf=FormatVcfToClean.out,
+			background_list=background_list,
+			bothsides_pass_list=bothsides_pass_list,
+			prefix="~{prefix}.preprocess",
+			sv_pipeline_docker=sv_pipeline_docker,
+			runtime_attr_override=runtime_attr_preprocess
+	}
+
+	call CleanVcfReviseOverlappingCnvGts {
+		input:
+			vcf=CleanVcfPreprocess.out,
+			prefix="~{prefix}.revise_overlapping_cnv_gts",
+			gatk_docker=gatk_docker,
+			runtime_attr_override=runtime_attr_revise_overlapping_cnv_gts
+	}
+
+	call CleanVcfReviseOverlappingCnvCns {
+		input:
+			vcf=CleanVcfReviseOverlappingCnvGts.out,
+			prefix="~{prefix}.revise_overlapping_cnv_cns",
+			gatk_docker=gatk_docker,
+			runtime_attr_override=runtime_attr_revise_overlapping_cnv_cns
+	}
+
+	call CleanVcfReviseLargeCnvs {
+		input:
+			vcf=CleanVcfReviseOverlappingCnvGts.out,
+			outlier_samples_list=outlier_samples_list,
+			prefix="~{prefix}.revise_large_cnvs",
+			gatk_docker=gatk_docker,
+			runtime_attr_override=runtime_attr_revise_large_cnvs
+	}
+
+	call CleanVcfReviseAbnormalAllosomes {
+		input:
+			vcf=CleanVcfReviseLargeCnvs.out,
+			prefix="~{prefix}.revise_abnormal_allosomes",
+			gatk_docker=gatk_docker,
+			runtime_attr_override=runtime_attr_revise_abnormal_allosomes
+	}
+
+	call CleanVcfReviseMultiallelicCnvs {
+		input:
+			vcf=CleanVcfReviseAbnormalAllosomes.out,
+			prefix="~{prefix}.revise_multiallelic_cnvs",
+			gatk_docker=gatk_docker,
+			runtime_attr_override=runtime_attr_revise_multiallelics
+	}
+
+	call CleanVcfPostprocess {
+		input:
+			vcf=CleanVcfReviseMultiallelicCnvs.out,
+			prefix="~{prefix}.postprocess",
+			sv_pipeline_docker=sv_pipeline_docker,
+			runtime_attr_override=runtime_attr_postprocess
+	}
+
+	call DropRedundantCnvs {
+		input:
+			vcf=CleanVcfPostprocess.out,
+			prefix="~{prefix}.drop_redundant_cnvs",
+			contig=contig,
+			sv_pipeline_docker=sv_pipeline_docker,
+			runtime_attr_override=runtime_override_drop_redundant_cnvs
+	}
+
+	if (use_hail) {
+		call HailMerge.HailMerge as SortDropRedundantCnvsHail {
+			input:
+				vcfs=[DropRedundantCnvs.out],
+				prefix="~{prefix}.drop_redundant_cnvs.sorted",
+				gcs_project=gcs_project,
+				reset_cnv_gts=true,
+				sv_base_mini_docker=sv_base_mini_docker,
+				sv_pipeline_docker=sv_pipeline_docker,
+				runtime_override_preconcat=runtime_override_preconcat_drc,
+				runtime_override_hail_merge=runtime_override_hail_merge_drc,
+				runtime_override_fix_header=runtime_override_fix_header_drc
+		}
+	}
+	if (!use_hail) {
+		call MiniTasks.SortVcf as SortDropRedundantCnvs {
+			input:
+				vcf=DropRedundantCnvs.out,
+				outfile_prefix="~{prefix}.drop_redundant_cnvs.sorted",
+				sv_base_mini_docker=sv_base_mini_docker,
+				runtime_attr_override=runtime_override_sort_drop_redundant_cnvs
+		}
+	}
+
+	call StitchFragmentedCnvs {
+		input:
+			vcf=select_first([SortDropRedundantCnvs.out, SortDropRedundantCnvsHail.merged_vcf]),
+			prefix="~{prefix}.stitch_fragmented_cnvs",
+			sv_pipeline_docker=sv_pipeline_docker,
+			runtime_attr_override=runtime_override_stitch_fragmented_cnvs
+	}
+
+	call RescueMobileElementDeletions {
     input:
       vcf = StitchFragmentedCnvs.stitched_vcf_shard,
       prefix = "~{prefix}.rescue_me_dels",
@@ -317,495 +205,577 @@ workflow CleanVcfChromosome {
       runtime_attr_override=runtime_override_final_cleanup
   }
 
-  call fvcf.FormatVcf {
+  call fvcf.FormatVcf as FormatVcfToOutput {
     input:
       vcf=FinalCleanup.final_cleaned_shard,
       ploidy_table=ploidy_table,
       args="--scale-down-gq",
       output_prefix="~{prefix}.final_format",
-      script=svtk_to_gatk_script,
       sv_pipeline_docker=sv_pipeline_docker,
       runtime_attr_override=runtime_attr_format
   }
-  
-  output {
-    File out = FormatVcf.out
-    File out_idx = FormatVcf.out_index
-  }
+	
+	output {
+		File out = FormatVcfToOutput.out
+		File out_idx = FormatVcfToOutput.out_index
+	}
 }
 
-
-task CleanVcf1a {
-  input {
-    File vcf
-    String prefix
-    File background_fail_list
-    File bothsides_pass_list
-    File ped_file
-    File allosome_fai
-    String chr_x
-    String chr_y
-    String sv_pipeline_docker
-    RuntimeAttr? runtime_attr_override
-  }
-
-  Float input_size = size([vcf, background_fail_list, bothsides_pass_list], "GB")
-  RuntimeAttr runtime_default = object {
-                                  mem_gb: 3.75,
-                                  disk_gb: ceil(10.0 + input_size * 2),
-                                  cpu_cores: 1,
-                                  preemptible_tries: 3,
-                                  max_retries: 1,
-                                  boot_disk_gb: 10
-                                }
-  RuntimeAttr runtime_override = select_first([runtime_attr_override, runtime_default])
-  runtime {
-    memory: "~{select_first([runtime_override.mem_gb, runtime_default.mem_gb])} GB"
-    disks: "local-disk ~{select_first([runtime_override.disk_gb, runtime_default.disk_gb])} HDD"
-    cpu: select_first([runtime_override.cpu_cores, runtime_default.cpu_cores])
-    preemptible: select_first([runtime_override.preemptible_tries, runtime_default.preemptible_tries])
-    maxRetries: select_first([runtime_override.max_retries, runtime_default.max_retries])
-    docker: sv_pipeline_docker
-    bootDiskSizeGb: select_first([runtime_override.boot_disk_gb, runtime_default.boot_disk_gb])
-  }
-
-  command <<<
-    set -euo pipefail
-
-    touch ~{prefix}.includelist.txt
-    touch ~{prefix}.sexchr.revise.txt
-
-    # outputs
-    # includelist.txt: the names of all the samples in the input vcf
-    # sexchr.revise.txt: the names of the events where genotypes got tweaked on allosomes
-    # stdout: a revised vcf
-    java -jar $CLEAN_VCF_PART_1_JAR \
-      ~{vcf} \
-      ~{ped_file} \
-      ~{chr_x} \
-      ~{chr_y} \
-      ~{background_fail_list} \
-      ~{bothsides_pass_list} \
-      ~{prefix}.includelist.txt \
-      ~{prefix}.sexchr.revise.txt \
-      | bgzip \
-      > ~{prefix}.vcf.gz
-    tabix ~{prefix}.vcf.gz
-  >>>
-
-  output {
-    File include_list="~{prefix}.includelist.txt"
-    File sex="~{prefix}.sexchr.revise.txt"
-    File intermediate_vcf="~{prefix}.vcf.gz"
-    File intermediate_vcf_idx="~{prefix}.vcf.gz.tbi"
-  }
+task CleanVcfPreprocess {
+	input {
+		File vcf
+		File background_list
+		File bothsides_pass_list
+		String prefix
+		String sv_pipeline_docker
+		RuntimeAttr? runtime_attr_override
+	}
+
+	RuntimeAttr runtime_default = object {
+																	mem_gb: 3.75,
+																	disk_gb: ceil(10.0 + size(vcf, "GB") * 2),
+																	cpu_cores: 1,
+																	preemptible_tries: 3,
+																	max_retries: 1,
+																	boot_disk_gb: 10
+																}						
+	RuntimeAttr runtime_override = select_first([runtime_attr_override, runtime_default])
+	runtime {
+		memory: "~{select_first([runtime_override.mem_gb, runtime_default.mem_gb])} GB"
+		disks: "local-disk ~{select_first([runtime_override.disk_gb, runtime_default.disk_gb])} HDD"
+		cpu: select_first([runtime_override.cpu_cores, runtime_default.cpu_cores])
+		preemptible: select_first([runtime_override.preemptible_tries, runtime_default.preemptible_tries])
+		maxRetries: select_first([runtime_override.max_retries, runtime_default.max_retries])
+		docker: sv_pipeline_docker
+		bootDiskSizeGb: select_first([runtime_override.boot_disk_gb, runtime_default.boot_disk_gb])
+	}
+
+	Int java_mem_mb = ceil(select_first([runtime_override.mem_gb, runtime_default.mem_gb]) * 1000 * 0.7)
+	String output_vcf = "~{prefix}.vcf.gz"
+
+	command <<<
+		set -euo pipefail
+
+		if [ ! -f "~{vcf}.tbi" ]; then
+			tabix -p vcf ~{vcf}
+		fi
+
+		python /opt/sv-pipeline/04_variant_resolution/scripts/replace_ev_numeric_code_with_string.py \
+			~{vcf} \
+			processed.vcf.gz
+
+		zgrep '^##' processed.vcf.gz > header.txt
+
+		cat <<EOF >> header.txt
+		##FILTER=<ID=UNRESOLVED,Description="Variant is unresolved">
+		##INFO=<ID=HIGH_SR_BACKGROUND,Number=0,Type=Flag,Description="Variant has high number of SR splits in background samples">
+		##INFO=<ID=BOTHSIDES_SUPPORT,Number=0,Type=Flag,Description="Variant has read-level support for both sides of breakpoint">
+		##INFO=<ID=REVISED_EVENT,Number=0,Type=Flag,Description="Variant has been revised due to a copy number mismatch">
+		EOF
+
+		zgrep '^#CHROM' processed.vcf.gz >> header.txt
+
+		bcftools view processed.vcf.gz | bcftools reheader -h header.txt | bgzip -c > processed.reheader.vcf.gz
+
+		rm header.txt
+		
+		python /opt/sv-pipeline/04_variant_resolution/scripts/cleanvcf_preprocess.py \
+			-V processed.reheader.vcf.gz \
+			-O ~{output_vcf} \
+			--fail-list ~{background_list} \
+			--pass-list ~{bothsides_pass_list}
+
+		tabix -p vcf ~{output_vcf}
+	>>>
+
+	output {
+		File out="~{output_vcf}"
+		File out_idx="~{output_vcf}.tbi"
+	}
 }
 
-task CleanVcf2 {
-  input {
-    File normal_revise_vcf
-    String prefix
-    File include_list
-    File multi_cnvs
-    String sv_pipeline_docker
-    RuntimeAttr? runtime_attr_override
-  }
-
-  # generally assume working disk size is ~2 * inputs, and outputs are ~2 *inputs, and inputs are not removed
-  # generally assume working memory is ~3 * inputs
-  Float input_size = size([normal_revise_vcf, include_list, multi_cnvs], "GB")
-  Float base_disk_gb = 10.0
-  Float input_disk_scale = 3.0
-  RuntimeAttr runtime_default = object {
-    mem_gb: 2.0,
-    disk_gb: ceil(base_disk_gb + input_size * input_disk_scale),
-    cpu_cores: 1,
-    preemptible_tries: 3,
-    max_retries: 1,
-    boot_disk_gb: 10
-  }
-  RuntimeAttr runtime_override = select_first([runtime_attr_override, runtime_default])
-  runtime {
-    memory: "~{select_first([runtime_override.mem_gb, runtime_default.mem_gb])} GB"
-    disks: "local-disk ~{select_first([runtime_override.disk_gb, runtime_default.disk_gb])} HDD"
-    cpu: select_first([runtime_override.cpu_cores, runtime_default.cpu_cores])
-    preemptible: select_first([runtime_override.preemptible_tries, runtime_default.preemptible_tries])
-    maxRetries: select_first([runtime_override.max_retries, runtime_default.max_retries])
-    docker: sv_pipeline_docker
-    bootDiskSizeGb: select_first([runtime_override.boot_disk_gb, runtime_default.boot_disk_gb])
-  }
-
-  command <<<
-    set -eu -o pipefail
-
-    bcftools index ~{normal_revise_vcf}
-    /opt/sv-pipeline/04_variant_resolution/scripts/clean_vcf_part2.sh \
-      ~{normal_revise_vcf} \
-      ~{include_list} \
-      ~{multi_cnvs} \
-      "~{prefix}.txt"
-  >>>
-
-  output {
-    File out="~{prefix}.txt"
-  }
+task CleanVcfReviseOverlappingCnvGts {
+	input {
+		File vcf
+		String prefix
+		String gatk_docker
+		RuntimeAttr? runtime_attr_override
+	}
+
+	RuntimeAttr runtime_default = object {
+																	mem_gb: 3.75,
+																	disk_gb: ceil(10.0 + size(vcf, "GB") * 2),
+																	cpu_cores: 1,
+																	preemptible_tries: 3,
+																	max_retries: 1,
+																	boot_disk_gb: 10
+																}						
+	RuntimeAttr runtime_override = select_first([runtime_attr_override, runtime_default])
+	runtime {
+		memory: "~{select_first([runtime_override.mem_gb, runtime_default.mem_gb])} GB"
+		disks: "local-disk ~{select_first([runtime_override.disk_gb, runtime_default.disk_gb])} HDD"
+		cpu: select_first([runtime_override.cpu_cores, runtime_default.cpu_cores])
+		preemptible: select_first([runtime_override.preemptible_tries, runtime_default.preemptible_tries])
+		maxRetries: select_first([runtime_override.max_retries, runtime_default.max_retries])
+		docker: gatk_docker
+		bootDiskSizeGb: select_first([runtime_override.boot_disk_gb, runtime_default.boot_disk_gb])
+	}
+
+	Int java_mem_mb = ceil(select_first([runtime_override.mem_gb, runtime_default.mem_gb]) * 1000 * 0.7)
+	String output_vcf = "~{prefix}.vcf.gz"
+
+	command <<<
+		set -euo pipefail
+
+		if [ ! -f "~{vcf}.tbi" ]; then
+			tabix -p vcf ~{vcf}
+		fi
+		
+		gatk --java-options "-Xmx~{java_mem_mb}m" SVReviseOverlappingCnvGts \
+			-V ~{vcf} \
+			-O ~{output_vcf}
+	>>>
+
+	output {
+		File out="~{output_vcf}"
+		File out_idx="~{output_vcf}.tbi"
+	}
 }
 
-
-task CleanVcf3 {
-  input {
-    File rd_cn_revise
-    Int? max_samples_shard
-    String sv_pipeline_docker
-    RuntimeAttr? runtime_attr_override
-  }
-  Int max_samples_shard_ = select_first([max_samples_shard, 7000])
-  # generally assume working disk size is ~2 * inputs, and outputs are ~2 *inputs, and inputs are not removed
-  # generally assume working memory is ~3 * inputs
-  Float input_size = size(rd_cn_revise, "GB")
-  RuntimeAttr runtime_default = object {
-    mem_gb: 3.75,
-    disk_gb: ceil(10.0 + input_size * 2.0),
-    cpu_cores: 1,
-    preemptible_tries: 3,
-    max_retries: 1,
-    boot_disk_gb: 10
-  }
-  RuntimeAttr runtime_override = select_first([runtime_attr_override, runtime_default])
-  runtime {
-    memory: "~{select_first([runtime_override.mem_gb, runtime_default.mem_gb])} GB"
-    disks: "local-disk ~{select_first([runtime_override.disk_gb, runtime_default.disk_gb])} HDD"
-    cpu: select_first([runtime_override.cpu_cores, runtime_default.cpu_cores])
-    preemptible: select_first([runtime_override.preemptible_tries, runtime_default.preemptible_tries])
-    maxRetries: select_first([runtime_override.max_retries, runtime_default.max_retries])
-    docker: sv_pipeline_docker
-    bootDiskSizeGb: select_first([runtime_override.boot_disk_gb, runtime_default.boot_disk_gb])
-  }
-
-  command <<<
-    set -euo pipefail
-    python /opt/sv-pipeline/04_variant_resolution/scripts/clean_vcf_part3.py ~{rd_cn_revise} -s ~{max_samples_shard_}
-    # Ensure there is at least one shard
-    touch shards/out.0_0.txt
-  >>>
-
-  output {
-     Array[File] shards = glob("shards/*")
-  }
+task CleanVcfReviseOverlappingCnvCns {
+	input {
+		File vcf
+		String prefix
+		String gatk_docker
+		RuntimeAttr? runtime_attr_override
+	}
+
+	RuntimeAttr runtime_default = object {
+																	mem_gb: 3.75,
+																	disk_gb: ceil(10.0 + size(vcf, "GB") * 2),
+																	cpu_cores: 1,
+																	preemptible_tries: 3,
+																	max_retries: 1,
+																	boot_disk_gb: 10
+																}						
+	RuntimeAttr runtime_override = select_first([runtime_attr_override, runtime_default])
+	runtime {
+		memory: "~{select_first([runtime_override.mem_gb, runtime_default.mem_gb])} GB"
+		disks: "local-disk ~{select_first([runtime_override.disk_gb, runtime_default.disk_gb])} HDD"
+		cpu: select_first([runtime_override.cpu_cores, runtime_default.cpu_cores])
+		preemptible: select_first([runtime_override.preemptible_tries, runtime_default.preemptible_tries])
+		maxRetries: select_first([runtime_override.max_retries, runtime_default.max_retries])
+		docker: gatk_docker
+		bootDiskSizeGb: select_first([runtime_override.boot_disk_gb, runtime_default.boot_disk_gb])
+	}
+
+	Int java_mem_mb = ceil(select_first([runtime_override.mem_gb, runtime_default.mem_gb]) * 1000 * 0.7)
+	String output_vcf = "~{prefix}.vcf.gz"
+
+	command <<<
+		set -euo pipefail
+
+		if [ ! -f "~{vcf}.tbi" ]; then
+			tabix -p vcf ~{vcf}
+		fi
+		
+		gatk --java-options "-Xmx~{java_mem_mb}m" SVReviseOverlappingCnvCns \
+			-V ~{vcf} \
+			-O ~{output_vcf}
+	>>>
+
+	output {
+		File out="~{output_vcf}"
+		File out_idx="~{output_vcf}.tbi"
+	}
 }
 
+task CleanVcfReviseLargeCnvs {
+	input {
+		File vcf
+		File? outlier_samples_list
+		String prefix
+		String gatk_docker
+		RuntimeAttr? runtime_attr_override
+	}
+
+	RuntimeAttr runtime_default = object {
+																	mem_gb: 3.75,
+																	disk_gb: ceil(10.0 + size(vcf, "GB") * 2),
+																	cpu_cores: 1,
+																	preemptible_tries: 3,
+																	max_retries: 1,
+																	boot_disk_gb: 10
+																}						
+	RuntimeAttr runtime_override = select_first([runtime_attr_override, runtime_default])
+	runtime {
+		memory: "~{select_first([runtime_override.mem_gb, runtime_default.mem_gb])} GB"
+		disks: "local-disk ~{select_first([runtime_override.disk_gb, runtime_default.disk_gb])} HDD"
+		cpu: select_first([runtime_override.cpu_cores, runtime_default.cpu_cores])
+		preemptible: select_first([runtime_override.preemptible_tries, runtime_default.preemptible_tries])
+		maxRetries: select_first([runtime_override.max_retries, runtime_default.max_retries])
+		docker: gatk_docker
+		bootDiskSizeGb: select_first([runtime_override.boot_disk_gb, runtime_default.boot_disk_gb])
+	}
+
+	Int java_mem_mb = ceil(select_first([runtime_override.mem_gb, runtime_default.mem_gb]) * 1000 * 0.7)
+	String output_vcf = "~{prefix}.vcf.gz"
+
+	command <<<
+		set -euo pipefail
+
+		if [ ! -f "~{vcf}.tbi" ]; then
+			tabix -p vcf ~{vcf}
+		fi
+		
+		gatk --java-options "-Xmx~{java_mem_mb}m" SVReviseLargeCnvs \
+			-V ~{vcf} \
+			-O ~{output_vcf} \
+			~{if defined(outlier_samples_list) then "--outlier-samples ~{outlier_samples_list}" else "" }
+	>>>
+
+	output {
+		File out="~{output_vcf}"
+		File out_idx="~{output_vcf}.tbi"
+	}
+}
 
-task CleanVcf4 {
-  input {
-    File rd_cn_revise
-    File normal_revise_vcf
-    String prefix
-    String sv_pipeline_docker
-    RuntimeAttr? runtime_attr_override
-  }
-
-  Float input_size = size([rd_cn_revise, normal_revise_vcf], "GB")
-  RuntimeAttr runtime_default = object {
-                                  mem_gb: 2.0,
-                                  disk_gb: 50,
-                                  cpu_cores: 1,
-                                  preemptible_tries: 3,
-                                  max_retries: 1,
-                                  boot_disk_gb: 10
-                                }
-  RuntimeAttr runtime_override = select_first([runtime_attr_override, runtime_default])
-  runtime {
-    memory: "~{select_first([runtime_override.mem_gb, runtime_default.mem_gb])} GB"
-    disks: "local-disk ~{select_first([runtime_override.disk_gb, runtime_default.disk_gb])} HDD"
-    cpu: select_first([runtime_override.cpu_cores, runtime_default.cpu_cores])
-    preemptible: select_first([runtime_override.preemptible_tries, runtime_default.preemptible_tries])
-    maxRetries: select_first([runtime_override.max_retries, runtime_default.max_retries])
-    docker: sv_pipeline_docker
-    bootDiskSizeGb: select_first([runtime_override.boot_disk_gb, runtime_default.boot_disk_gb])
-  }
+task CleanVcfReviseAbnormalAllosomes {
+	input {
+		File vcf
+		String prefix
+		String gatk_docker
+		RuntimeAttr? runtime_attr_override
+	}
+
+	RuntimeAttr runtime_default = object {
+																	mem_gb: 3.75,
+																	disk_gb: ceil(10.0 + size(vcf, "GB") * 2),
+																	cpu_cores: 1,
+																	preemptible_tries: 3,
+																	max_retries: 1,
+																	boot_disk_gb: 10
+																}						
+	RuntimeAttr runtime_override = select_first([runtime_attr_override, runtime_default])
+	runtime {
+		memory: "~{select_first([runtime_override.mem_gb, runtime_default.mem_gb])} GB"
+		disks: "local-disk ~{select_first([runtime_override.disk_gb, runtime_default.disk_gb])} HDD"
+		cpu: select_first([runtime_override.cpu_cores, runtime_default.cpu_cores])
+		preemptible: select_first([runtime_override.preemptible_tries, runtime_default.preemptible_tries])
+		maxRetries: select_first([runtime_override.max_retries, runtime_default.max_retries])
+		docker: gatk_docker
+		bootDiskSizeGb: select_first([runtime_override.boot_disk_gb, runtime_default.boot_disk_gb])
+	}
+
+	Int java_mem_mb = ceil(select_first([runtime_override.mem_gb, runtime_default.mem_gb]) * 1000 * 0.7)
+	String output_vcf = "~{prefix}.vcf.gz"
+
+	command <<<
+		set -euo pipefail
+
+		if [ ! -f "~{vcf}.tbi" ]; then
+			tabix -p vcf ~{vcf}
+		fi
+		
+		gatk --java-options "-Xmx~{java_mem_mb}m" SVReviseAbnormalAllosomes \
+			-V ~{vcf} \
+			-O ~{output_vcf}
+	>>>
+
+	output {
+		File out="~{output_vcf}"
+		File out_idx="~{output_vcf}.tbi"
+	}
+}
 
-  command <<<
-    set -euo pipefail
-    python3 <<CODE
-    import pysam
-    import os
-
-    # Inputs
-    REGENO_FILE="~{rd_cn_revise}"
-    VCF_FILE="~{normal_revise_vcf}"
-
-    # Build map of variants to regenotype
-    with open(REGENO_FILE) as f:
-      vid_sample_cn_map = {}
-      for line in f:
-        tokens = line.strip().split('\t')
-        vid = tokens[0]
-        if vid not in vid_sample_cn_map:
-          vid_sample_cn_map[vid] = []
-        vid_sample_cn_map[vid].append(tuple(tokens[1:]))
-
-    # Traverse VCF and replace genotypes
-    with open("~{prefix}.revise_vcf_lines.txt", "w") as f:
-      vcf = pysam.VariantFile(VCF_FILE)
-      num_vcf_records = 0
-      for record in vcf:
-        num_vcf_records += 1
-        if record.id not in vid_sample_cn_map:
-          continue
-        for entry in vid_sample_cn_map[record.id]:
-          s = record.samples[entry[0]]
-          s['GT'] = (0, 1)
-          s['RD_CN'] = int(entry[1])
-        f.write(str(record))
-      vcf.close()
-
-    # Get batch size
-    regeno_file_name_tokens = os.path.basename(REGENO_FILE).split('.')[1].split('_')
-    batch_num = max(int(regeno_file_name_tokens[0]), 1)
-    total_batch = max(int(regeno_file_name_tokens[1]), 1)
-    segments = num_vcf_records / float(total_batch)
-    print("{} {} {}".format(batch_num, total_batch, segments))
-
-    vcf = pysam.VariantFile(VCF_FILE)
-    # Max sample count with PE or SR GT over 3
-    max_vf = max(len(vcf.header.samples) * 0.01, 2)
-    record_start = (batch_num - 1) * segments
-    record_end = batch_num * segments
-    record_idx = 0
-    print("{} {} {}".format(max_vf, record_start, record_end))
-    multi_geno_ids = set([])
-    for record in vcf:
-      record_idx += 1
-      if record_idx < record_start:
-        continue
-      elif record_idx > record_end:
-        break
-      num_gt_over_2 = 0
-      for sid in record.samples:
-        s = record.samples[sid]
-        # Pick best GT
-        if s['PE_GT'] is None:
-          continue
-        elif s['SR_GT'] is None:
-          gt = s['PE_GT']
-        elif s['PE_GT'] > 0 and s['SR_GT'] == 0:
-          gt = s['PE_GT']
-        elif s['PE_GT'] == 0:
-          gt = s['SR_GT']
-        elif s['PE_GQ'] >= s['SR_GQ']:
-          gt = s['PE_GT']
-        else:
-          gt = s['SR_GT']
-        if gt > 2:
-          num_gt_over_2 += 1
-      if num_gt_over_2 > max_vf:
-        multi_geno_ids.add(record.id)
-    vcf.close()
-
-    multi_geno_ids = sorted(list(multi_geno_ids))
-    with open("~{prefix}.multi_geno_ids.txt", "w") as f:
-      for vid in multi_geno_ids:
-        f.write(vid + "\n")
-    CODE
-
-    bgzip ~{prefix}.revise_vcf_lines.txt
-    gzip ~{prefix}.multi_geno_ids.txt
-  >>>
+task CleanVcfReviseMultiallelicCnvs {
+	input {
+		File vcf
+		String prefix
+		String gatk_docker
+		RuntimeAttr? runtime_attr_override
+	}
+
+	RuntimeAttr runtime_default = object {
+																	mem_gb: 3.75,
+																	disk_gb: ceil(10.0 + size(vcf, "GB") * 2),
+																	cpu_cores: 1,
+																	preemptible_tries: 3,
+																	max_retries: 1,
+																	boot_disk_gb: 10
+																}						
+	RuntimeAttr runtime_override = select_first([runtime_attr_override, runtime_default])
+	runtime {
+		memory: "~{select_first([runtime_override.mem_gb, runtime_default.mem_gb])} GB"
+		disks: "local-disk ~{select_first([runtime_override.disk_gb, runtime_default.disk_gb])} HDD"
+		cpu: select_first([runtime_override.cpu_cores, runtime_default.cpu_cores])
+		preemptible: select_first([runtime_override.preemptible_tries, runtime_default.preemptible_tries])
+		maxRetries: select_first([runtime_override.max_retries, runtime_default.max_retries])
+		docker: gatk_docker
+		bootDiskSizeGb: select_first([runtime_override.boot_disk_gb, runtime_default.boot_disk_gb])
+	}
+
+	Int java_mem_mb = ceil(select_first([runtime_override.mem_gb, runtime_default.mem_gb]) * 1000 * 0.7)
+	String output_vcf = "~{prefix}.vcf.gz"
+
+	command <<<
+		set -euo pipefail
+
+		if [ ! -f "~{vcf}.tbi" ]; then
+			tabix -p vcf ~{vcf}
+		fi
+		
+		gatk --java-options "-Xmx~{java_mem_mb}m" SVReviseMultiallelicCnvs \
+			-V ~{vcf} \
+			-O ~{output_vcf}
+	>>>
+
+	output {
+		File out="~{output_vcf}"
+		File out_idx="~{output_vcf}.tbi"
+	}
+}
 
-  output {
-    File out="~{prefix}.revise_vcf_lines.txt.gz"
-    File multi_ids="~{prefix}.multi_geno_ids.txt.gz"
-  }
+task CleanVcfPostprocess {
+	input {
+		File vcf
+		String prefix
+		String sv_pipeline_docker
+		RuntimeAttr? runtime_attr_override
+	}
+
+	RuntimeAttr runtime_default = object {
+																	mem_gb: 3.75,
+																	disk_gb: ceil(10.0 + size(vcf, "GB") * 2),
+																	cpu_cores: 1,
+																	preemptible_tries: 3,
+																	max_retries: 1,
+																	boot_disk_gb: 10
+																}						
+	RuntimeAttr runtime_override = select_first([runtime_attr_override, runtime_default])
+	runtime {
+		memory: "~{select_first([runtime_override.mem_gb, runtime_default.mem_gb])} GB"
+		disks: "local-disk ~{select_first([runtime_override.disk_gb, runtime_default.disk_gb])} HDD"
+		cpu: select_first([runtime_override.cpu_cores, runtime_default.cpu_cores])
+		preemptible: select_first([runtime_override.preemptible_tries, runtime_default.preemptible_tries])
+		maxRetries: select_first([runtime_override.max_retries, runtime_default.max_retries])
+		docker: sv_pipeline_docker
+		bootDiskSizeGb: select_first([runtime_override.boot_disk_gb, runtime_default.boot_disk_gb])
+	}
+
+	Int java_mem_mb = ceil(select_first([runtime_override.mem_gb, runtime_default.mem_gb]) * 1000 * 0.7)
+	String output_vcf = "~{prefix}.vcf.gz"
+
+	command <<<
+		set -euo pipefail
+
+		if [ ! -f "~{vcf}.tbi" ]; then
+			tabix -p vcf ~{vcf}
+		fi
+
+		bcftools annotate -x INFO/MULTIALLELIC,INFO/UNRESOLVED,INFO/EVENT,INFO/REVISED_EVENT,INFO/MULTI_CNV,INFO/varGQ ~{vcf} -o processed.vcf.gz -O z
+
+		bcftools view -h processed.vcf.gz | grep -v -E "CIPOS|CIEND|RMSSTD|source|bcftools|GATKCommandLine|##FORMAT=<ID=EV>|##ALT=<ID=UNR>|##INFO=<ID=(MULTIALLELIC|UNRESOLVED|EVENT|REVISED_EVENT|MULTI_CNV|varGQ)" > header.txt
+
+		bcftools reheader -h header.txt processed.vcf.gz -o ~{output_vcf}
+
+		tabix -p vcf ~{output_vcf}
+	>>>
+
+	output {
+		File out="~{output_vcf}"
+		File out_idx="~{output_vcf}.tbi"
+	}
 }
 
 task RescueMobileElementDeletions {
-  input {
-    File vcf
-    String prefix
-    File LINE1
-    File HERVK
-    String sv_pipeline_docker
-    RuntimeAttr? runtime_attr_override
-  }
-
-  Float input_size = size(vcf, "GiB")
-  RuntimeAttr runtime_default = object {
-    mem_gb: 3.75 + input_size * 1.5,
-    disk_gb: ceil(100.0 + input_size * 3.0),
-    cpu_cores: 1,
-    preemptible_tries: 3,
-    max_retries: 1,
-    boot_disk_gb: 10
-  }
-  RuntimeAttr runtime_override = select_first([runtime_attr_override, runtime_default])
-  runtime {
-    memory: "~{select_first([runtime_override.mem_gb, runtime_default.mem_gb])} GB"
-    disks: "local-disk ~{select_first([runtime_override.disk_gb, runtime_default.disk_gb])} HDD"
-    cpu: select_first([runtime_override.cpu_cores, runtime_default.cpu_cores])
-    preemptible: select_first([runtime_override.preemptible_tries, runtime_default.preemptible_tries])
-    maxRetries: select_first([runtime_override.max_retries, runtime_default.max_retries])
-    docker: sv_pipeline_docker
-    bootDiskSizeGb: select_first([runtime_override.boot_disk_gb, runtime_default.boot_disk_gb])
-  }
-
-  command <<<
-    set -euo pipefail
-
-    python <<CODE
+	input {
+		File vcf
+		String prefix
+		File LINE1
+		File HERVK
+		String sv_pipeline_docker
+		RuntimeAttr? runtime_attr_override
+	}
+
+	Float input_size = size(vcf, "GiB")
+	RuntimeAttr runtime_default = object {
+		mem_gb: 3.75 + input_size * 1.5,
+		disk_gb: ceil(100.0 + input_size * 3.0),
+		cpu_cores: 1,
+		preemptible_tries: 3,
+		max_retries: 1,
+		boot_disk_gb: 10
+	}
+	RuntimeAttr runtime_override = select_first([runtime_attr_override, runtime_default])
+	runtime {
+		memory: "~{select_first([runtime_override.mem_gb, runtime_default.mem_gb])} GB"
+		disks: "local-disk ~{select_first([runtime_override.disk_gb, runtime_default.disk_gb])} HDD"
+		cpu: select_first([runtime_override.cpu_cores, runtime_default.cpu_cores])
+		preemptible: select_first([runtime_override.preemptible_tries, runtime_default.preemptible_tries])
+		maxRetries: select_first([runtime_override.max_retries, runtime_default.max_retries])
+		docker: sv_pipeline_docker
+		bootDiskSizeGb: select_first([runtime_override.boot_disk_gb, runtime_default.boot_disk_gb])
+	}
+
+	command <<<
+		set -euo pipefail
+
+		python <<CODE
 import os
 import pysam
 fin=pysam.VariantFile("~{vcf}")
 fo=pysam.VariantFile("~{prefix}.bnd_del.vcf.gz", 'w', header = fin.header)
 for record in fin:
-    if record.info['SVTYPE'] in ['BND'] and record.info['STRANDS']=="+-" and record.chrom == record.info['CHR2'] and record.info['END2'] - record.start < 10000:
-        record.info['SVLEN'] = record.info['END2'] - record.start
-        fo.write(record)
+		if record.info['SVTYPE'] in ['BND'] and record.info['STRANDS']=="+-" and record.chrom == record.info['CHR2'] and record.info['END2'] - record.start < 10000:
+				record.info['SVLEN'] = record.info['END2'] - record.start
+				fo.write(record)
 fin.close()
 fo.close()
 CODE
 
-    tabix -p vcf ~{prefix}.bnd_del.vcf.gz
+		tabix -p vcf ~{prefix}.bnd_del.vcf.gz
 
-    svtk vcf2bed ~{prefix}.bnd_del.vcf.gz -i ALL --include-filters ~{prefix}.bnd_del.bed
-    bgzip ~{prefix}.bnd_del.bed
+		svtk vcf2bed ~{prefix}.bnd_del.vcf.gz -i ALL --include-filters ~{prefix}.bnd_del.bed
+		bgzip ~{prefix}.bnd_del.bed
 
-    bedtools coverage -wo -a ~{prefix}.bnd_del.bed.gz -b ~{LINE1} | awk '{if ($NF>.5) print}' | cut -f4 | sed -e 's/$/\tDEL\tPASS\toverlap_LINE1/' > manual_revise.MEI_DEL_from_BND.SVID_SVTYPE_FILTER_INFO.tsv
-    bedtools coverage -wo -a ~{prefix}.bnd_del.bed.gz -b ~{HERVK} | awk '{if ($NF>.5) print}' | cut -f4 | sed -e 's/$/\tDEL\tPASS\toverlap_HERVK/' >> manual_revise.MEI_DEL_from_BND.SVID_SVTYPE_FILTER_INFO.tsv
+		bedtools coverage -wo -a ~{prefix}.bnd_del.bed.gz -b ~{LINE1} | awk '{if ($NF>.5) print}' | cut -f4 | sed -e 's/$/\tDEL\tPASS\toverlap_LINE1/' > manual_revise.MEI_DEL_from_BND.SVID_SVTYPE_FILTER_INFO.tsv
+		bedtools coverage -wo -a ~{prefix}.bnd_del.bed.gz -b ~{HERVK} | awk '{if ($NF>.5) print}' | cut -f4 | sed -e 's/$/\tDEL\tPASS\toverlap_HERVK/' >> manual_revise.MEI_DEL_from_BND.SVID_SVTYPE_FILTER_INFO.tsv
 
-    python <<CODE
+		python <<CODE
 import pysam
 def SVID_MEI_DEL_readin(MEI_DEL_reset):
-    out={}
-    fin=open(MEI_DEL_reset)
-    for line in fin:
-        pin=line.strip().split()
-        if not pin[0] in out.keys():
-            out[pin[0]] = pin[3]
-    fin.close()
-    return out
+		out={}
+		fin=open(MEI_DEL_reset)
+		for line in fin:
+				pin=line.strip().split()
+				if not pin[0] in out.keys():
+						out[pin[0]] = pin[3]
+		fin.close()
+		return out
 
 hash_MEI_DEL_reset = SVID_MEI_DEL_readin("manual_revise.MEI_DEL_from_BND.SVID_SVTYPE_FILTER_INFO.tsv")
 fin=pysam.VariantFile("~{vcf}")
 fo=pysam.VariantFile("~{prefix}.vcf.gz", 'w', header = fin.header)
 for record in fin:
-    if record.id in hash_MEI_DEL_reset.keys():
-        del record.filter['UNRESOLVED']
-        record.info['SVTYPE'] = 'DEL'
-        record.info['SVLEN'] = record.info['END2'] - record.start
-        record.stop = record.info['END2']
-        record.info.pop("CHR2")
-        record.info.pop("END2")
-        record.info.pop("UNRESOLVED_TYPE")
-        if hash_MEI_DEL_reset[record.id] == 'overlap_LINE1':
-            record.alts = ('<DEL:ME:LINE1>',)
-        if hash_MEI_DEL_reset[record.id] == 'overlap_HERVK':
-            record.alts = ('<DEL:ME:HERVK>',)
-    fo.write(record)
+		if record.id in hash_MEI_DEL_reset.keys():
+				del record.filter['UNRESOLVED']
+				record.info['SVTYPE'] = 'DEL'
+				record.info['SVLEN'] = record.info['END2'] - record.start
+				record.stop = record.info['END2']
+				record.info.pop("CHR2")
+				record.info.pop("END2")
+				record.info.pop("UNRESOLVED_TYPE")
+				if hash_MEI_DEL_reset[record.id] == 'overlap_LINE1':
+						record.alts = ('<DEL:ME:LINE1>',)
+				if hash_MEI_DEL_reset[record.id] == 'overlap_HERVK':
+						record.alts = ('<DEL:ME:HERVK>',)
+		fo.write(record)
 fin.close()
 fo.close()
 CODE
-  >>>
+	>>>
 
-  output {
-    File out = "~{prefix}.vcf.gz"
-  }
+	output {
+		File out = "~{prefix}.vcf.gz"
+	}
 }
 
 
 # Remove CNVs that are redundant with CPX events or other CNVs
 task DropRedundantCnvs {
-  input {
-    File vcf
-    String prefix
-    String contig
-    String sv_pipeline_docker
-    RuntimeAttr? runtime_attr_override
-  }
-
-  Float input_size = size(vcf, "GiB")
-  # disk is cheap, read/write speed is proportional to disk size, and disk IO is a significant time factor:
-  # in tests on large VCFs, memory usage is ~1.0 * input VCF size
-  # the biggest disk usage is at the end of the task, with input + output VCF on disk
-  Int cpu_cores = 2 # speed up compression / decompression of VCFs
-  RuntimeAttr runtime_default = object {
-    mem_gb: 3.75 + input_size * 1.5,
-    disk_gb: ceil(100.0 + input_size * 2.0),
-    cpu_cores: cpu_cores,
-    preemptible_tries: 3,
-    max_retries: 1,
-    boot_disk_gb: 10
-  }
-  RuntimeAttr runtime_override = select_first([runtime_attr_override, runtime_default])
-  runtime {
-    memory: "~{select_first([runtime_override.mem_gb, runtime_default.mem_gb])} GB"
-    disks: "local-disk ~{select_first([runtime_override.disk_gb, runtime_default.disk_gb])} HDD"
-    cpu: select_first([runtime_override.cpu_cores, runtime_default.cpu_cores])
-    preemptible: select_first([runtime_override.preemptible_tries, runtime_default.preemptible_tries])
-    maxRetries: select_first([runtime_override.max_retries, runtime_default.max_retries])
-    docker: sv_pipeline_docker
-    bootDiskSizeGb: select_first([runtime_override.boot_disk_gb, runtime_default.boot_disk_gb])
-  }
-
-  command <<<
-    set -euo pipefail
-    /opt/sv-pipeline/04_variant_resolution/scripts/resolve_cpx_cnv_redundancies.py \
-      ~{vcf} ~{prefix}.vcf.gz --temp-dir ./tmp
-  >>>
-
-  output {
-    File out = "~{prefix}.vcf.gz"
-  }
+	input {
+		File vcf
+		String prefix
+		String contig
+		String sv_pipeline_docker
+		RuntimeAttr? runtime_attr_override
+	}
+
+	Float input_size = size(vcf, "GiB")
+	# disk is cheap, read/write speed is proportional to disk size, and disk IO is a significant time factor:
+	# in tests on large VCFs, memory usage is ~1.0 * input VCF size
+	# the biggest disk usage is at the end of the task, with input + output VCF on disk
+	Int cpu_cores = 2 # speed up compression / decompression of VCFs
+	RuntimeAttr runtime_default = object {
+		mem_gb: 3.75 + input_size * 1.5,
+		disk_gb: ceil(100.0 + input_size * 2.0),
+		cpu_cores: cpu_cores,
+		preemptible_tries: 3,
+		max_retries: 1,
+		boot_disk_gb: 10
+	}
+	RuntimeAttr runtime_override = select_first([runtime_attr_override, runtime_default])
+	runtime {
+		memory: "~{select_first([runtime_override.mem_gb, runtime_default.mem_gb])} GB"
+		disks: "local-disk ~{select_first([runtime_override.disk_gb, runtime_default.disk_gb])} HDD"
+		cpu: select_first([runtime_override.cpu_cores, runtime_default.cpu_cores])
+		preemptible: select_first([runtime_override.preemptible_tries, runtime_default.preemptible_tries])
+		maxRetries: select_first([runtime_override.max_retries, runtime_default.max_retries])
+		docker: sv_pipeline_docker
+		bootDiskSizeGb: select_first([runtime_override.boot_disk_gb, runtime_default.boot_disk_gb])
+	}
+
+	command <<<
+		set -euo pipefail
+		/opt/sv-pipeline/04_variant_resolution/scripts/resolve_cpx_cnv_redundancies.py \
+			~{vcf} ~{prefix}.vcf.gz --temp-dir ./tmp
+	>>>
+
+	output {
+		File out = "~{prefix}.vcf.gz"
+	}
 }
 
 
 # Stitch fragmented RD-only calls found in 100% of the same samples
 task StitchFragmentedCnvs {
-  input {
-    File vcf
-    String prefix
-    String sv_pipeline_docker
-    RuntimeAttr? runtime_attr_override
-  }
-
-  Float input_size = size(vcf, "GB")
-  RuntimeAttr runtime_default = object {
-                                  mem_gb: 7.5,
-                                  disk_gb: ceil(10.0 + input_size * 2),
-                                  cpu_cores: 1,
-                                  preemptible_tries: 3,
-                                  max_retries: 1,
-                                  boot_disk_gb: 10
-                                }
-  RuntimeAttr runtime_override = select_first([runtime_attr_override, runtime_default])
-  Float mem_gb = select_first([runtime_override.mem_gb, runtime_default.mem_gb])
-  Int java_mem_mb = ceil(mem_gb * 1000 * 0.8)
-
-  runtime {
-    memory: "~{mem_gb} GB"
-    disks: "local-disk ~{select_first([runtime_override.disk_gb, runtime_default.disk_gb])} HDD"
-    cpu: select_first([runtime_override.cpu_cores, runtime_default.cpu_cores])
-    preemptible: select_first([runtime_override.preemptible_tries, runtime_default.preemptible_tries])
-    maxRetries: select_first([runtime_override.max_retries, runtime_default.max_retries])
-    docker: sv_pipeline_docker
-    bootDiskSizeGb: select_first([runtime_override.boot_disk_gb, runtime_default.boot_disk_gb])
-  }
-
-  command <<<
-    set -euo pipefail
-    echo "First pass..."
-    java -Xmx~{java_mem_mb}M -jar ${STITCH_JAR} 0.2 200000 0.2 ~{vcf} \
-      | bgzip \
-      > tmp.vcf.gz
-    rm ~{vcf}
-    echo "Second pass..."
-    java -Xmx~{java_mem_mb}M -jar ${STITCH_JAR} 0.2 200000 0.2 tmp.vcf.gz \
-      | bgzip \
-      > ~{prefix}.vcf.gz
-  >>>
-
-  output {
-    File stitched_vcf_shard = "~{prefix}.vcf.gz"
-  }
+	input {
+		File vcf
+		String prefix
+		String sv_pipeline_docker
+		RuntimeAttr? runtime_attr_override
+	}
+
+	Float input_size = size(vcf, "GB")
+	RuntimeAttr runtime_default = object {
+																	mem_gb: 7.5,
+																	disk_gb: ceil(10.0 + input_size * 2),
+																	cpu_cores: 1,
+																	preemptible_tries: 3,
+																	max_retries: 1,
+																	boot_disk_gb: 10
+																}
+	RuntimeAttr runtime_override = select_first([runtime_attr_override, runtime_default])
+	Float mem_gb = select_first([runtime_override.mem_gb, runtime_default.mem_gb])
+	Int java_mem_mb = ceil(mem_gb * 1000 * 0.8)
+
+	runtime {
+		memory: "~{mem_gb} GB"
+		disks: "local-disk ~{select_first([runtime_override.disk_gb, runtime_default.disk_gb])} HDD"
+		cpu: select_first([runtime_override.cpu_cores, runtime_default.cpu_cores])
+		preemptible: select_first([runtime_override.preemptible_tries, runtime_default.preemptible_tries])
+		maxRetries: select_first([runtime_override.max_retries, runtime_default.max_retries])
+		docker: sv_pipeline_docker
+		bootDiskSizeGb: select_first([runtime_override.boot_disk_gb, runtime_default.boot_disk_gb])
+	}
+
+	command <<<
+		set -euo pipefail
+		echo "First pass..."
+		java -Xmx~{java_mem_mb}M -jar ${STITCH_JAR} 0.2 200000 0.2 ~{vcf} \
+			| bgzip \
+			> tmp.vcf.gz
+		rm ~{vcf}
+		echo "Second pass..."
+		java -Xmx~{java_mem_mb}M -jar ${STITCH_JAR} 0.2 200000 0.2 tmp.vcf.gz \
+			| bgzip \
+			> ~{prefix}.vcf.gz
+	>>>
+
+	output {
+		File stitched_vcf_shard = "~{prefix}.vcf.gz"
+	}
 }
 
 # Add FILTER status for pockets of variants with high FP rate: wham-only DELs and Scramble-only SVAs with HIGH_SR_BACKGROUND
@@ -863,53 +833,53 @@ CODE
 
 # Final VCF cleanup
 task FinalCleanup {
-  input {
-    File vcf
-    String contig
-    String prefix
-    String sv_pipeline_docker
-    RuntimeAttr? runtime_attr_override
-  }
-
-  # generally assume working disk size is ~2 * inputs, and outputs are ~2 *inputs, and inputs are not removed
-  # generally assume working memory is ~3 * inputs
-  Float input_size = size(vcf, "GB")
-  Float base_disk_gb = 10.0
-  Float base_mem_gb = 2.0
-  Float input_mem_scale = 3.0
-  Float input_disk_scale = 5.0
-  RuntimeAttr runtime_default = object {
-    mem_gb: base_mem_gb + input_size * input_mem_scale,
-    disk_gb: ceil(base_disk_gb + input_size * input_disk_scale),
-    cpu_cores: 1,
-    preemptible_tries: 3,
-    max_retries: 1,
-    boot_disk_gb: 10
-  }
-  RuntimeAttr runtime_override = select_first([runtime_attr_override, runtime_default])
-  runtime {
-    memory: "~{select_first([runtime_override.mem_gb, runtime_default.mem_gb])} GB"
-    disks: "local-disk ~{select_first([runtime_override.disk_gb, runtime_default.disk_gb])} HDD"
-    cpu: select_first([runtime_override.cpu_cores, runtime_default.cpu_cores])
-    preemptible: select_first([runtime_override.preemptible_tries, runtime_default.preemptible_tries])
-    maxRetries: select_first([runtime_override.max_retries, runtime_default.max_retries])
-    docker: sv_pipeline_docker
-    bootDiskSizeGb: select_first([runtime_override.boot_disk_gb, runtime_default.boot_disk_gb])
-  }
-
-  command <<<
-    set -eu -o pipefail
-    
-    /opt/sv-pipeline/04_variant_resolution/scripts/rename_after_vcfcluster.py \
-      --chrom ~{contig} \
-      --prefix ~{prefix} \
-      ~{vcf} stdout \
-      | bcftools annotate --no-version -e 'SVTYPE=="CNV" && SVLEN<5000' -x INFO/MEMBERS -Oz -o ~{prefix}.vcf.gz
-    tabix ~{prefix}.vcf.gz
-  >>>
-
-  output {
-    File final_cleaned_shard = "~{prefix}.vcf.gz"
-    File final_cleaned_shard_idx = "~{prefix}.vcf.gz.tbi"
-  }
+	input {
+		File vcf
+		String contig
+		String prefix
+		String sv_pipeline_docker
+		RuntimeAttr? runtime_attr_override
+	}
+
+	# generally assume working disk size is ~2 * inputs, and outputs are ~2 *inputs, and inputs are not removed
+	# generally assume working memory is ~3 * inputs
+	Float input_size = size(vcf, "GB")
+	Float base_disk_gb = 10.0
+	Float base_mem_gb = 2.0
+	Float input_mem_scale = 3.0
+	Float input_disk_scale = 5.0
+	RuntimeAttr runtime_default = object {
+		mem_gb: base_mem_gb + input_size * input_mem_scale,
+		disk_gb: ceil(base_disk_gb + input_size * input_disk_scale),
+		cpu_cores: 1,
+		preemptible_tries: 3,
+		max_retries: 1,
+		boot_disk_gb: 10
+	}
+	RuntimeAttr runtime_override = select_first([runtime_attr_override, runtime_default])
+	runtime {
+		memory: "~{select_first([runtime_override.mem_gb, runtime_default.mem_gb])} GB"
+		disks: "local-disk ~{select_first([runtime_override.disk_gb, runtime_default.disk_gb])} HDD"
+		cpu: select_first([runtime_override.cpu_cores, runtime_default.cpu_cores])
+		preemptible: select_first([runtime_override.preemptible_tries, runtime_default.preemptible_tries])
+		maxRetries: select_first([runtime_override.max_retries, runtime_default.max_retries])
+		docker: sv_pipeline_docker
+		bootDiskSizeGb: select_first([runtime_override.boot_disk_gb, runtime_default.boot_disk_gb])
+	}
+
+	command <<<
+		set -eu -o pipefail
+		
+		/opt/sv-pipeline/04_variant_resolution/scripts/rename_after_vcfcluster.py \
+			--chrom ~{contig} \
+			--prefix ~{prefix} \
+			~{vcf} stdout \
+			| bcftools annotate --no-version -e 'SVTYPE=="CNV" && SVLEN<5000' -x INFO/MEMBERS -Oz -o ~{prefix}.vcf.gz
+		tabix ~{prefix}.vcf.gz
+	>>>
+
+	output {
+		File final_cleaned_shard = "~{prefix}.vcf.gz"
+		File final_cleaned_shard_idx = "~{prefix}.vcf.gz.tbi"
+	}
 }
\ No newline at end of file