Skip to content

Commit

Permalink
Still working
Browse files Browse the repository at this point in the history
  • Loading branch information
cmdcolin committed Nov 11, 2024
1 parent 542f813 commit 7d4f2cb
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 419 deletions.
115 changes: 44 additions & 71 deletions src/bai.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import { fromBytes } from './virtualOffset'
import Chunk from './chunk'

import IndexFile from './indexFile'
import { optimizeChunks, BaseOpts } from './util'
import { optimizeChunks, BaseOpts, parsePseudoBin } from './util'

const BAI_MAGIC = 21578050 // BAI\1

Expand All @@ -14,44 +14,30 @@ function roundUp(n: number, multiple: number) {
return n - (n % multiple) + multiple
}

function reg2bins(beg: number, end: number) {
end -= 1
return [
[0, 0],
[1 + (beg >> 26), 1 + (end >> 26)],
[9 + (beg >> 23), 9 + (end >> 23)],
[73 + (beg >> 20), 73 + (end >> 20)],
[585 + (beg >> 17), 585 + (end >> 17)],
[4681 + (beg >> 14), 4681 + (end >> 14)],
] as const
}

export default class BAI extends IndexFile {
baiP?: Promise<Buffer>

async hasRefSeq() {
return true
}
parsePseudoBin(bytes: Buffer, offset: number) {
const lineCount = Long.fromBytesLE(
Array.prototype.slice.call(bytes, offset + 16, offset + 24),
true,
).toNumber()
return { lineCount }
}

async lineCount(refId: number, opts: BaseOpts = {}) {
const prom = await this.parse()
const index = prom.indices[refId]
if (!index) {
return -1
}
const ret = index.stats || {}
return ret.lineCount === undefined ? -1 : ret.lineCount
}

fetchBai(opts: BaseOpts = {}) {
if (!this.baiP) {
this.baiP = this.filehandle.readFile(opts).catch(e => {
this.baiP = undefined
throw e
}) as Promise<Buffer>
}
return this.baiP
async lineCount(refId: number, opts?: BaseOpts) {
const indexData = await this.parse(opts)
return indexData.indices[refId]?.stats?.lineCount || 0
}

// fetch and parse the index
async _parse() {
async _parse(opts?: BaseOpts) {
const bytes = (await this.filehandle.readFile(opts)) as Buffer
const data: { [key: string]: any } = { bai: true, maxBlockSize: 1 << 16 }
const bytes = await this.fetchBai()

// check BAI magic numbers
if (bytes.readUInt32LE(0) !== BAI_MAGIC) {
Expand All @@ -77,7 +63,7 @@ export default class BAI extends IndexFile {
currOffset += 4
if (bin === binLimit + 1) {
currOffset += 4
stats = this.parsePseudoBin(bytes, currOffset)
stats = parsePseudoBin(bytes, currOffset)
currOffset += 32
} else if (bin > binLimit + 1) {
throw new Error('bai index contains too many bins, please use CSI')
Expand Down Expand Up @@ -122,23 +108,22 @@ export default class BAI extends IndexFile {
): Promise<{ start: number; end: number; score: number }[]> {
const v = 16384
const range = start !== undefined
const indexData = await this.parse()
const indexData = await this.parse(opts)
const seqIdx = indexData.indices[seqId]
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
if (!seqIdx) {
return []
}
const { linearIndex = [], stats } = seqIdx
if (!linearIndex.length) {
if (linearIndex.length === 0) {
return []
}
const e = end !== undefined ? roundUp(end, v) : (linearIndex.length - 1) * v
const s = start !== undefined ? roundDown(start, v) : 0
let depths
if (range) {
depths = new Array((e - s) / v)
} else {
depths = new Array(linearIndex.length - 1)
}
const e = end === undefined ? (linearIndex.length - 1) * v : roundUp(end, v)
const s = start === undefined ? 0 : roundDown(start, v)
const depths = range
? new Array((e - s) / v)
: new Array(linearIndex.length - 1)

const totalSize = linearIndex[linearIndex.length - 1].blockPosition
if (e > (linearIndex.length - 1) * v) {
throw new Error('query outside of range of linear index')
Expand All @@ -152,25 +137,10 @@ export default class BAI extends IndexFile {
}
currentPos = linearIndex[i + 1].blockPosition
}
return depths.map(d => {
return { ...d, score: (d.score * stats.lineCount) / totalSize }
})
}

/**
* calculate the list of bins that may overlap with region [beg,end) (zero-based half-open)
* @returns {Array[number]}
*/
reg2bins(beg: number, end: number) {
end -= 1
return [
[0, 0],
[1 + (beg >> 26), 1 + (end >> 26)],
[9 + (beg >> 23), 9 + (end >> 23)],
[73 + (beg >> 20), 73 + (end >> 20)],
[585 + (beg >> 17), 585 + (end >> 17)],
[4681 + (beg >> 14), 4681 + (end >> 14)],
]
return depths.map(d => ({
...d,
score: (d.score * (stats?.lineCount || 0)) / totalSize,
}))
}

async blocksForRange(
Expand All @@ -183,26 +153,30 @@ export default class BAI extends IndexFile {
min = 0
}

const indexData = await this.parse()
const indexData = await this.parse(opts)
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
if (!indexData) {
return []
}
const ba = indexData.indices[refId]
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
if (!ba) {
return []
}

// List of bin #s that overlap min, max
const overlappingBins = this.reg2bins(min, max)
const overlappingBins = reg2bins(min, max)
const chunks: Chunk[] = []

// Find chunks in overlapping bins. Leaf bins (< 4681) are not pruned
for (const [start, end] of overlappingBins) {
for (let bin = start; bin <= end; bin++) {
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
if (ba.binIndex[bin]) {
const binChunks = ba.binIndex[bin]
for (let c = 0; c < binChunks.length; ++c) {
chunks.push(new Chunk(binChunks[c].minv, binChunks[c].maxv, bin))

for (const binChunk of binChunks) {
chunks.push(binChunk)
}
}
}
Expand All @@ -211,15 +185,14 @@ export default class BAI extends IndexFile {
// Use the linear index to find minimum file position of chunks that could
// contain alignments in the region
const nintv = ba.linearIndex.length
let lowest = null
let lowest: VirtualOffset | undefined
const minLin = Math.min(min >> 14, nintv - 1)
const maxLin = Math.min(max >> 14, nintv - 1)
for (let i = minLin; i <= maxLin; ++i) {
const vp = ba.linearIndex[i]
if (vp) {
if (!lowest || vp.compareTo(lowest) < 0) {
lowest = vp
}
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
if (vp && (!lowest || vp.compareTo(lowest) < 0)) {
lowest = vp
}
}

Expand Down
Loading

0 comments on commit 7d4f2cb

Please sign in to comment.