Skip to content

Commit

Permalink
Wow
Browse files Browse the repository at this point in the history
  • Loading branch information
cmdcolin committed Nov 11, 2024
1 parent 36b9dd4 commit 11bcd2f
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 46 deletions.
98 changes: 53 additions & 45 deletions src/bai.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
import Long from 'long'
import VirtualOffset, { fromBytes } from './virtualOffset'
import Chunk from './chunk'

import { optimizeChunks, parsePseudoBin, findFirstData, BaseOpts } from './util'
import IndexFile from './indexFile'
import { optimizeChunks, BaseOpts, parsePseudoBin } from './util'

const BAI_MAGIC = 21578050 // BAI\1

Expand All @@ -27,8 +26,6 @@ function reg2bins(beg: number, end: number) {
}

export default class BAI extends IndexFile {
baiP?: Promise<Buffer>

async lineCount(refId: number, opts?: BaseOpts) {
const indexData = await this.parse(opts)
return indexData.indices[refId]?.stats?.lineCount || 0
Expand All @@ -37,67 +34,83 @@ export default class BAI extends IndexFile {
// fetch and parse the index
async _parse(opts?: BaseOpts) {
const bytes = (await this.filehandle.readFile(opts)) as Buffer
const data: { [key: string]: any } = { bai: true, maxBlockSize: 1 << 16 }

// check BAI magic numbers
if (bytes.readUInt32LE(0) !== BAI_MAGIC) {
throw new Error('Not a BAI file')
}

data.refCount = bytes.readInt32LE(4)
const refCount = bytes.readInt32LE(4)
const depth = 5
const binLimit = ((1 << ((depth + 1) * 3)) - 1) / 7

// read the indexes for each reference sequence
data.indices = new Array(data.refCount)
let currOffset = 8
for (let i = 0; i < data.refCount; i += 1) {
let curr = 8
let firstDataLine: VirtualOffset | undefined

type BinIndex = Record<string, Chunk[]>
type LinearIndex = VirtualOffset[]
const indices = new Array<{
binIndex: BinIndex
linearIndex: LinearIndex
stats?: { lineCount: number }
}>(refCount)
for (let i = 0; i < refCount; i++) {
// the binning index
const binCount = bytes.readInt32LE(currOffset)
const binCount = bytes.readInt32LE(curr)
let stats

currOffset += 4
const binIndex: { [key: number]: Chunk[] } = {}
curr += 4
const binIndex: Record<number, Chunk[]> = {}

for (let j = 0; j < binCount; j += 1) {
const bin = bytes.readUInt32LE(currOffset)
currOffset += 4
const bin = bytes.readUInt32LE(curr)
curr += 4
if (bin === binLimit + 1) {
currOffset += 4
stats = parsePseudoBin(bytes, currOffset)
currOffset += 32
curr += 4
stats = parsePseudoBin(bytes, curr + 16)
curr += 32
} else if (bin > binLimit + 1) {
throw new Error('bai index contains too many bins, please use CSI')
} else {
const chunkCount = bytes.readInt32LE(currOffset)
currOffset += 4
const chunks = new Array(chunkCount)
for (let k = 0; k < chunkCount; k += 1) {
const u = fromBytes(bytes, currOffset)
const v = fromBytes(bytes, currOffset + 8)
currOffset += 16
this._findFirstData(data, u)
const chunkCount = bytes.readInt32LE(curr)
curr += 4
const chunks = new Array<Chunk>(chunkCount)
for (let k = 0; k < chunkCount; k++) {
const u = fromBytes(bytes, curr)
curr += 8
const v = fromBytes(bytes, curr)
curr += 8
firstDataLine = findFirstData(firstDataLine, u)
chunks[k] = new Chunk(u, v, bin)
}
binIndex[bin] = chunks
}
}

const linearCount = bytes.readInt32LE(currOffset)
currOffset += 4
// as we're going through the linear index, figure out
// the smallest virtual offset in the indexes, which
// tells us where the BAM header ends
const linearIndex = new Array(linearCount)
for (let k = 0; k < linearCount; k += 1) {
linearIndex[k] = fromBytes(bytes, currOffset)
currOffset += 8
this._findFirstData(data, linearIndex[k])
const linearCount = bytes.readInt32LE(curr)
curr += 4
// as we're going through the linear index, figure out the smallest
// virtual offset in the indexes, which tells us where the BAM header
// ends
const linearIndex = new Array<VirtualOffset>(linearCount)
for (let j = 0; j < linearCount; j++) {
const offset = fromBytes(bytes, curr)
curr += 8
firstDataLine = findFirstData(firstDataLine, offset)
linearIndex[j] = offset
}

data.indices[i] = { binIndex, linearIndex, stats }
indices[i] = { binIndex, linearIndex, stats }
}

return data
return {
bai: true,
firstDataLine,
maxBlockSize: 1 << 16,
indices,
refCount,
}
}

async indexCov(
Expand All @@ -123,7 +136,6 @@ export default class BAI extends IndexFile {
const depths = range
? new Array((e - s) / v)
: new Array(linearIndex.length - 1)

const totalSize = linearIndex[linearIndex.length - 1].blockPosition
if (e > (linearIndex.length - 1) * v) {
throw new Error('query outside of range of linear index')
Expand Down Expand Up @@ -154,12 +166,10 @@ export default class BAI extends IndexFile {
}

const indexData = await this.parse(opts)
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
if (!indexData) {
return []
}
const ba = indexData.indices[refId]
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
if (!ba) {
return []
}
Expand All @@ -171,12 +181,10 @@ export default class BAI extends IndexFile {
// Find chunks in overlapping bins. Leaf bins (< 4681) are not pruned
for (const [start, end] of overlappingBins) {
for (let bin = start; bin <= end; bin++) {
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
if (ba.binIndex[bin]) {
const binChunks = ba.binIndex[bin]

for (const binChunk of binChunks) {
chunks.push(binChunk)
for (const chunk of binChunks) {
chunks.push(new Chunk(chunk.minv, chunk.maxv, bin))
}
}
}
Expand All @@ -190,7 +198,7 @@ export default class BAI extends IndexFile {
const maxLin = Math.min(max >> 14, nintv - 1)
for (let i = minLin; i <= maxLin; ++i) {
const vp = ba.linearIndex[i]
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition

if (vp && (!lowest || vp.compareTo(lowest) < 0)) {
lowest = vp
}
Expand Down
2 changes: 1 addition & 1 deletion src/util.ts
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ export function optimizeChunks(chunks: Chunk[], lowest?: VirtualOffset) {
export function parsePseudoBin(bytes: Buffer, offset: number) {
return {
lineCount: Long.fromBytesLE(
Array.prototype.slice.call(bytes, offset + 16, offset + 24),
Array.prototype.slice.call(bytes, offset, offset + 8),
true,
).toNumber(),
}
Expand Down

0 comments on commit 11bcd2f

Please sign in to comment.