Wow

GMOD · Nov 11, 2024 · 11bcd2f · 11bcd2f
1 parent 36b9dd4
commit 11bcd2f
Show file tree

Hide file tree

Showing 2 changed files with 54 additions and 46 deletions.
diff --git a/src/bai.ts b/src/bai.ts
@@ -1,9 +1,8 @@
-import Long from 'long'
 import VirtualOffset, { fromBytes } from './virtualOffset'
 import Chunk from './chunk'
 
+import { optimizeChunks, parsePseudoBin, findFirstData, BaseOpts } from './util'
 import IndexFile from './indexFile'
-import { optimizeChunks, BaseOpts, parsePseudoBin } from './util'
 
 const BAI_MAGIC = 21578050 // BAI\1
 
@@ -27,8 +26,6 @@ function reg2bins(beg: number, end: number) {
 }
 
 export default class BAI extends IndexFile {
-  baiP?: Promise<Buffer>
-
   async lineCount(refId: number, opts?: BaseOpts) {
     const indexData = await this.parse(opts)
     return indexData.indices[refId]?.stats?.lineCount || 0
@@ -37,67 +34,83 @@ export default class BAI extends IndexFile {
   // fetch and parse the index
   async _parse(opts?: BaseOpts) {
     const bytes = (await this.filehandle.readFile(opts)) as Buffer
-    const data: { [key: string]: any } = { bai: true, maxBlockSize: 1 << 16 }
 
     // check BAI magic numbers
     if (bytes.readUInt32LE(0) !== BAI_MAGIC) {
       throw new Error('Not a BAI file')
     }
 
-    data.refCount = bytes.readInt32LE(4)
+    const refCount = bytes.readInt32LE(4)
     const depth = 5
     const binLimit = ((1 << ((depth + 1) * 3)) - 1) / 7
 
     // read the indexes for each reference sequence
-    data.indices = new Array(data.refCount)
-    let currOffset = 8
-    for (let i = 0; i < data.refCount; i += 1) {
+    let curr = 8
+    let firstDataLine: VirtualOffset | undefined
+
+    type BinIndex = Record<string, Chunk[]>
+    type LinearIndex = VirtualOffset[]
+    const indices = new Array<{
+      binIndex: BinIndex
+      linearIndex: LinearIndex
+      stats?: { lineCount: number }
+    }>(refCount)
+    for (let i = 0; i < refCount; i++) {
       // the binning index
-      const binCount = bytes.readInt32LE(currOffset)
+      const binCount = bytes.readInt32LE(curr)
       let stats
 
-      currOffset += 4
-      const binIndex: { [key: number]: Chunk[] } = {}
+      curr += 4
+      const binIndex: Record<number, Chunk[]> = {}
+
       for (let j = 0; j < binCount; j += 1) {
-        const bin = bytes.readUInt32LE(currOffset)
-        currOffset += 4
+        const bin = bytes.readUInt32LE(curr)
+        curr += 4
         if (bin === binLimit + 1) {
-          currOffset += 4
-          stats = parsePseudoBin(bytes, currOffset)
-          currOffset += 32
+          curr += 4
+          stats = parsePseudoBin(bytes, curr + 16)
+          curr += 32
         } else if (bin > binLimit + 1) {
           throw new Error('bai index contains too many bins, please use CSI')
         } else {
-          const chunkCount = bytes.readInt32LE(currOffset)
-          currOffset += 4
-          const chunks = new Array(chunkCount)
-          for (let k = 0; k < chunkCount; k += 1) {
-            const u = fromBytes(bytes, currOffset)
-            const v = fromBytes(bytes, currOffset + 8)
-            currOffset += 16
-            this._findFirstData(data, u)
+          const chunkCount = bytes.readInt32LE(curr)
+          curr += 4
+          const chunks = new Array<Chunk>(chunkCount)
+          for (let k = 0; k < chunkCount; k++) {
+            const u = fromBytes(bytes, curr)
+            curr += 8
+            const v = fromBytes(bytes, curr)
+            curr += 8
+            firstDataLine = findFirstData(firstDataLine, u)
             chunks[k] = new Chunk(u, v, bin)
           }
           binIndex[bin] = chunks
         }
       }
 
-      const linearCount = bytes.readInt32LE(currOffset)
-      currOffset += 4
-      // as we're going through the linear index, figure out
-      // the smallest virtual offset in the indexes, which
-      // tells us where the BAM header ends
-      const linearIndex = new Array(linearCount)
-      for (let k = 0; k < linearCount; k += 1) {
-        linearIndex[k] = fromBytes(bytes, currOffset)
-        currOffset += 8
-        this._findFirstData(data, linearIndex[k])
+      const linearCount = bytes.readInt32LE(curr)
+      curr += 4
+      // as we're going through the linear index, figure out the smallest
+      // virtual offset in the indexes, which tells us where the BAM header
+      // ends
+      const linearIndex = new Array<VirtualOffset>(linearCount)
+      for (let j = 0; j < linearCount; j++) {
+        const offset = fromBytes(bytes, curr)
+        curr += 8
+        firstDataLine = findFirstData(firstDataLine, offset)
+        linearIndex[j] = offset
       }
 
-      data.indices[i] = { binIndex, linearIndex, stats }
+      indices[i] = { binIndex, linearIndex, stats }
     }
 
-    return data
+    return {
+      bai: true,
+      firstDataLine,
+      maxBlockSize: 1 << 16,
+      indices,
+      refCount,
+    }
   }
 
   async indexCov(
@@ -123,7 +136,6 @@ export default class BAI extends IndexFile {
     const depths = range
       ? new Array((e - s) / v)
       : new Array(linearIndex.length - 1)
-
     const totalSize = linearIndex[linearIndex.length - 1].blockPosition
     if (e > (linearIndex.length - 1) * v) {
       throw new Error('query outside of range of linear index')
@@ -154,12 +166,10 @@ export default class BAI extends IndexFile {
     }
 
     const indexData = await this.parse(opts)
-    // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
     if (!indexData) {
       return []
     }
     const ba = indexData.indices[refId]
-    // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
     if (!ba) {
       return []
     }
@@ -171,12 +181,10 @@ export default class BAI extends IndexFile {
     // Find chunks in overlapping bins.  Leaf bins (< 4681) are not pruned
     for (const [start, end] of overlappingBins) {
       for (let bin = start; bin <= end; bin++) {
-        // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
         if (ba.binIndex[bin]) {
           const binChunks = ba.binIndex[bin]
-
-          for (const binChunk of binChunks) {
-            chunks.push(binChunk)
+          for (const chunk of binChunks) {
+            chunks.push(new Chunk(chunk.minv, chunk.maxv, bin))
           }
         }
       }
@@ -190,7 +198,7 @@ export default class BAI extends IndexFile {
     const maxLin = Math.min(max >> 14, nintv - 1)
     for (let i = minLin; i <= maxLin; ++i) {
       const vp = ba.linearIndex[i]
-      // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
+
       if (vp && (!lowest || vp.compareTo(lowest) < 0)) {
         lowest = vp
       }

diff --git a/src/util.ts b/src/util.ts
@@ -105,7 +105,7 @@ export function optimizeChunks(chunks: Chunk[], lowest?: VirtualOffset) {
 export function parsePseudoBin(bytes: Buffer, offset: number) {
   return {
     lineCount: Long.fromBytesLE(
-      Array.prototype.slice.call(bytes, offset + 16, offset + 24),
+      Array.prototype.slice.call(bytes, offset, offset + 8),
       true,
     ).toNumber(),
   }