Skip to content

Commit

Permalink
Misc
Browse files Browse the repository at this point in the history
  • Loading branch information
cmdcolin committed Mar 18, 2024
1 parent 8db17b9 commit d5805ad
Show file tree
Hide file tree
Showing 4 changed files with 1,539 additions and 2,006 deletions.
3 changes: 0 additions & 3 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,8 @@
"dependencies": {
"@gmod/binary-parser": "^1.3.5",
"@jkbonfield/htscodecs": "^0.5.1",
"abortable-promise-cache": "^1.2.0",
"buffer-crc32": "^1.0.0",
"bzip2": "^0.1.1",
"long": "^4.0.0",
"md5": "^2.2.1",
"pako": "^1.0.4",
"quick-lru": "^4.0.1",
Expand All @@ -58,7 +56,6 @@
"@babel/preset-typescript": "^7.17.12",
"@gmod/indexedfasta": "^2.1.0",
"@types/jest": "^29.5.12",
"@types/long": "^4.0.2",
"@types/md5": "^2.3.2",
"@types/pako": "^1.0.3",
"@typescript-eslint/eslint-plugin": "^7.0.2",
Expand Down
107 changes: 53 additions & 54 deletions src/craiIndex.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import AbortablePromiseCache from 'abortable-promise-cache'
import QuickLRU from 'quick-lru'
import { unzip } from './unzip'
import { open } from './io'
import { CramMalformedError } from './errors'
Expand Down Expand Up @@ -34,6 +32,13 @@ function addRecordToIndex(index: ParsedIndex, record: number[]) {
})
}

function maybeUnzip(data: Buffer) {
if (data[0] === 31 && data[1] === 139) {
return unzip(data)
}
return data
}

export default class CraiIndex {
// A CRAM index (.crai) is a gzipped tab delimited file containing the following columns:
// 1. Sequence id
Expand All @@ -43,7 +48,7 @@ export default class CraiIndex {
// 5. Slice start byte position in the container data (‘blocks’)
// 6. Slice size in bytes
// Each line represents a slice in the CRAM file. Please note that all slices must be listed in index file.
private _parseCache: AbortablePromiseCache<unknown, ParsedIndex>
private parseIndexP?: Promise<ParsedIndex>
private filehandle: Filehandle

/**
Expand All @@ -55,57 +60,45 @@ export default class CraiIndex {
*/
constructor(args: CramFileSource) {
this.filehandle = open(args.url, args.path, args.filehandle)
this._parseCache = new AbortablePromiseCache<unknown, ParsedIndex>({
cache: new QuickLRU({ maxSize: 1 }),
fill: (_data, _signal) => this.parseIndex(),
})
}

parseIndex() {
async parseIndex(opts: { signal?: AbortSignal } = {}) {
const index: ParsedIndex = {}
return this.filehandle
.readFile()
.then(data => {
if (data[0] === 31 && data[1] === 139) {
return unzip(data)
}
return data
})
.then(uncompressedBuffer => {
if (
uncompressedBuffer.length > 4 &&
uncompressedBuffer.readUInt32LE(0) === BAI_MAGIC
) {
throw new CramMalformedError(
'invalid .crai index file. note: file appears to be a .bai index. this is technically legal but please open a github issue if you need support',
)
}
// interpret the text as regular ascii, since it is
// supposed to be only digits and whitespace characters
// this is written in a deliberately low-level fashion for performance,
// because some .crai files can be pretty large.
let currentRecord: number[] = []
let currentString = ''
const uncompressedBuffer = maybeUnzip(await this.filehandle.readFile(opts))
if (
uncompressedBuffer.length > 4 &&
uncompressedBuffer.readUInt32LE(0) === BAI_MAGIC
) {
throw new CramMalformedError(
'invalid .crai index file. note: file appears to be a .bai index. this is technically legal but please open a github issue if you need support',
)
}
// interpret the text as regular ascii, since it is
// supposed to be only digits and whitespace characters
// this is written in a deliberately low-level fashion for performance,
// because some .crai files can be pretty large.
let currentRecord: number[] = []
let currentString = ''
for (const charCode of uncompressedBuffer) {
if (
(charCode >= 48 && charCode <= 57) /* 0-9 */ ||
(!currentString && charCode === 45) /* leading - */
) {
currentString += String.fromCharCode(charCode)
} else if (charCode === 9 /* \t */) {
currentRecord.push(Number.parseInt(currentString, 10))
currentString = ''
} else if (charCode === 10 /* \n */) {
currentRecord.push(Number.parseInt(currentString, 10))
currentString = ''
addRecordToIndex(index, currentRecord)
currentRecord = []
} else if (charCode !== 13 /* \r */ && charCode !== 32 /* space */) {
// if there are other characters in the file besides
// space and \r, something is wrong.
throw new CramMalformedError('invalid .crai index file')
}
}
if (
(charCode >= 48 && charCode <= 57) /* 0-9 */ ||
(!currentString && charCode === 45) /* leading - */
) {
currentString += String.fromCharCode(charCode)
} else if (charCode === 9 /* \t */) {
currentRecord.push(Number.parseInt(currentString, 10))
currentString = ''
} else if (charCode === 10 /* \n */) {
currentRecord.push(Number.parseInt(currentString, 10))
currentString = ''
addRecordToIndex(index, currentRecord)
currentRecord = []
} else if (charCode !== 13 /* \r */ && charCode !== 32 /* space */) {
// if there are other characters in the file besides
// space and \r, something is wrong.
throw new CramMalformedError('invalid .crai index file')
}
}

// if the file ends without a \n, we need to flush our buffers
if (currentString) {
Expand All @@ -125,14 +118,20 @@ export default class CraiIndex {
})
}

getIndex(opts: { signal?: AbortSignal } = {}) {
return this._parseCache.get('index', null, opts.signal)
getIndex(opts?: { signal?: AbortSignal }) {
if (!this.parseIndexP) {
this.parseIndexP = this.parseIndex(opts).catch(e => {
this.parseIndexP = undefined
throw e
})
}
return this.parseIndexP
}

/**
* @param {number} seqId
* @returns {Promise} true if the index contains entries for
* the given reference sequence ID, false otherwise
* @returns true if the index contains entries for the given reference
* sequence ID, false otherwise
*/
async hasDataForReferenceSequence(seqId: number) {
return !!(await this.getIndex())[seqId]
Expand Down
11 changes: 5 additions & 6 deletions src/cramFile/slice/decodeRecord.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import Long from 'long'
import { CramMalformedError } from '../../errors'
import {
BamFlagsDecoder,
Expand Down Expand Up @@ -31,7 +30,7 @@ function readNullTerminatedString(buffer: Uint8Array) {
* parse a BAM tag's array value from a binary buffer
* @private
*/
function parseTagValueArray(buffer: Buffer) {
function parseTagValueArray(buffer: Uint8Array) {
const arrayType = String.fromCharCode(buffer[0])
const length = Int32Array.from(buffer.slice(1))[0]

Expand Down Expand Up @@ -80,15 +79,15 @@ function parseTagValueArray(buffer: Buffer) {
return array
}

function parseTagData(tagType: string, buffer: any) {
function parseTagData(tagType: string, buffer: Uint8Array) {
if (tagType === 'Z') {
return readNullTerminatedString(buffer)
}
if (tagType === 'A') {
return String.fromCharCode(buffer[0])
}
if (tagType === 'I') {
return Long.fromBytesLE(buffer).toNumber()
return new Uint32Array(buffer.buffer)[0]
}
if (tagType === 'i') {
return new Int32Array(buffer.buffer)[0]
Expand Down Expand Up @@ -259,8 +258,8 @@ export default function decodeRecord(
let mateRecordNumber
// mate record
if (CramFlagsDecoder.isDetached(cramFlags)) {
// note: the MF is a byte in 1.0, int32 in 2+, but once again this doesn't matter for javascript
// const mate: any = {}
// note: the MF is a byte in 1.0, int32 in 2+,
// but once again this doesn't matter for javascript
const mateFlags = decodeDataSeries('MF')
let mateReadName
if (!compressionScheme.readNamesIncluded) {
Expand Down
Loading

0 comments on commit d5805ad

Please sign in to comment.