diff --git a/src/main/java/org/seqdoop/hadoop_bam/AnySAMInputFormat.java b/src/main/java/org/seqdoop/hadoop_bam/AnySAMInputFormat.java index 8d871e6..6da74df 100644 --- a/src/main/java/org/seqdoop/hadoop_bam/AnySAMInputFormat.java +++ b/src/main/java/org/seqdoop/hadoop_bam/AnySAMInputFormat.java @@ -24,6 +24,7 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Comparator; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -229,6 +230,9 @@ else if (split instanceof FileVirtualSplit) final List origSplits = BAMInputFormat.removeIndexFiles(super.getSplits(job)); + final List sortedSplits = new ArrayList<>(origSplits); + sortedSplits.sort(Comparator.comparing(split -> ((FileSplit) split).getPath())); + // We have to partition the splits by input format and hand them over to // the *InputFormats for any further handling. // @@ -236,11 +240,11 @@ else if (split instanceof FileVirtualSplit) // just extract the BAM and CRAM ones and leave the rest as they are. final List - bamOrigSplits = new ArrayList(origSplits.size()), - cramOrigSplits = new ArrayList(origSplits.size()), - newSplits = new ArrayList(origSplits.size()); + bamOrigSplits = new ArrayList(sortedSplits.size()), + cramOrigSplits = new ArrayList(sortedSplits.size()), + newSplits = new ArrayList(sortedSplits.size()); - for (final InputSplit iSplit : origSplits) { + for (final InputSplit iSplit : sortedSplits) { final FileSplit split = (FileSplit)iSplit; if (SAMFormat.BAM.equals(getFormat(split.getPath())))