From 947bb77e6a9d85df99fd4cf1c1d49c4b929e7049 Mon Sep 17 00:00:00 2001 From: Colin Date: Sat, 14 Dec 2024 16:00:08 -0500 Subject: [PATCH] Updates --- eslint.config.mjs | 9 ++- src/cramFile/file.ts | 5 +- src/cramFile/util.ts | 17 ---- src/htscodecs/arith_gen.ts | 26 ++---- src/htscodecs/arith_sh.ts | 2 + src/htscodecs/byte_model.ts | 3 + src/htscodecs/fqzcomp.ts | 9 ++- src/htscodecs/index.ts | 20 ++--- src/htscodecs/iostream.ts | 157 ++++++------------------------------ src/htscodecs/rans.ts | 7 +- src/htscodecs/rans4x16.ts | 10 ++- src/htscodecs/tok3.ts | 21 +---- src/util.ts | 16 ++++ 13 files changed, 93 insertions(+), 209 deletions(-) create mode 100644 src/util.ts diff --git a/eslint.config.mjs b/eslint.config.mjs index c4c1490d..63069c0e 100644 --- a/eslint.config.mjs +++ b/eslint.config.mjs @@ -5,7 +5,14 @@ import tseslint from 'typescript-eslint' export default tseslint.config( { - ignores: ['esm/**/*', 'dist/**/*', '*.js', '*.mjs', 'example/*'], + ignores: [ + 'esm/**/*', + 'dist/**/*', + '*.js', + '*.mjs', + 'example/*', + 'src/htscodecs', + ], }, { languageOptions: { diff --git a/src/cramFile/file.ts b/src/cramFile/file.ts index d4df1e92..a3089f32 100644 --- a/src/cramFile/file.ts +++ b/src/cramFile/file.ts @@ -4,11 +4,12 @@ import QuickLRU from 'quick-lru' import { XzReadableStream } from 'xz-decompress' import { CramMalformedError, CramUnimplementedError } from '../errors' -import htscodecs from '../htscodecs' +import * as htscodecs from '../htscodecs' import { open } from '../io' import ransuncompress from '../rans' import { parseHeaderText } from '../sam' import { unzip } from '../unzip' +import { concatUint8Array } from '../util' import CramContainer from './container' import CramRecord from './record' import { @@ -17,7 +18,7 @@ import { cramFileDefinition, getSectionParsers, } from './sectionParsers' -import { concatUint8Array, parseItem, tinyMemoize } from './util' +import { parseItem, tinyMemoize } from './util' import type { GenericFilehandle } from 'generic-filehandle2' diff --git a/src/cramFile/util.ts b/src/cramFile/util.ts index 00872bf5..46420ff3 100644 --- a/src/cramFile/util.ts +++ b/src/cramFile/util.ts @@ -179,20 +179,3 @@ export function tinyMemoize(_class: any, methodName: any) { export function sequenceMD5(seq: string) { return md5(seq.toUpperCase().replaceAll(/[^\u0021-\u007e]/g, '')) } - -export function sum(array: Uint8Array[]) { - let sum = 0 - for (const entry of array) { - sum += entry.length - } - return sum -} -export function concatUint8Array(args: Uint8Array[]) { - const mergedArray = new Uint8Array(sum(args)) - let offset = 0 - for (const entry of args) { - mergedArray.set(entry, offset) - offset += entry.length - } - return mergedArray -} diff --git a/src/htscodecs/arith_gen.ts b/src/htscodecs/arith_gen.ts index 16182cc7..16abe39e 100644 --- a/src/htscodecs/arith_gen.ts +++ b/src/htscodecs/arith_gen.ts @@ -1,3 +1,6 @@ +/* eslint-disable no-var */ +// @ts-nocheck + /* * Copyright (c) 2019,2020 Genome Research Ltd. * Author(s): James Bonfield @@ -36,23 +39,7 @@ import bzip2 from 'bzip2' import RangeCoder from './arith_sh' import ByteModel from './byte_model' import IOStream from './iostream' - -function sum(array) { - let sum = 0 - for (const entry of array) { - sum += entry.length - } - return sum -} -function concatUint8Array(args) { - const mergedArray = new Uint8Array(sum(args)) - let offset = 0 - for (const entry of args) { - mergedArray.set(entry, offset) - offset += entry.length - } - return mergedArray -} +import { concatUint8Array } from '../util' const ARITH_ORDER = 1 const ARITH_EXT = 4 @@ -63,12 +50,13 @@ const ARITH_RLE = 64 const ARITH_PACK = 128 export default class RangeCoderGen { - decode(src) { + stream: IOStream + decode(src: Uint8Array) { this.stream = new IOStream(src) return this.decodeStream(this.stream) } - decodeStream(stream, n_out = 0) { + decodeStream(stream: IOStream, n_out = 0) { const flags = this.stream.ReadByte() if (!(flags & ARITH_NOSIZE)) { n_out = this.stream.ReadUint7() diff --git a/src/htscodecs/arith_sh.ts b/src/htscodecs/arith_sh.ts index ccfaeace..6d6253c4 100644 --- a/src/htscodecs/arith_sh.ts +++ b/src/htscodecs/arith_sh.ts @@ -1,3 +1,5 @@ +// @ts-nocheck + /* * Copyright (c) 2019 Genome Research Ltd. * Author(s): James Bonfield diff --git a/src/htscodecs/byte_model.ts b/src/htscodecs/byte_model.ts index 926c9701..4c94f5e2 100644 --- a/src/htscodecs/byte_model.ts +++ b/src/htscodecs/byte_model.ts @@ -1,3 +1,6 @@ +/* eslint-disable no-var */ +// @ts-nocheck + /* * Copyright (c) 2019 Genome Research Ltd. * Author(s): James Bonfield diff --git a/src/htscodecs/fqzcomp.ts b/src/htscodecs/fqzcomp.ts index db18899b..a2fdd1eb 100644 --- a/src/htscodecs/fqzcomp.ts +++ b/src/htscodecs/fqzcomp.ts @@ -1,3 +1,6 @@ +/* eslint-disable no-var */ +// @ts-nocheck + /* * Copyright (c) 2019 Genome Research Ltd. * Author(s): James Bonfield @@ -293,7 +296,7 @@ function decode_fqz_new_record(src, rc, gparams, model, state, rev) { state.rec++ } -function decode_fqz(src, q_lens) { +function decode_fqz(src: IOStream, q_lens: number) { // Decode parameter block const n_out = src.ReadUint7() const gparams = decode_fqz_params(src) @@ -384,10 +387,8 @@ function reverse_qualities(qual, qual_len, rev, len) { } } -export function decode(src, q_lens) { +export function decode(src: Uint8Array, q_lens) { const stream = new IOStream(src) - // var n_out = stream.ReadUint32(); stream.ReadUint32(); // move to main - return decode_fqz(stream, q_lens) } diff --git a/src/htscodecs/index.ts b/src/htscodecs/index.ts index 97e83c74..8e33d7fc 100644 --- a/src/htscodecs/index.ts +++ b/src/htscodecs/index.ts @@ -1,3 +1,5 @@ +// @ts-nocheck +// /* * Copyright (c) 2020 Genome Research Ltd. * Author(s): James Bonfield @@ -43,35 +45,27 @@ import * as r4x8 from './rans' import * as r4x16 from './rans4x16' import * as tok3 from './tok3' -function r4x8_uncompress(inputBuffer, outputBuffer) { +export function r4x8_uncompress(inputBuffer: Uint8Array) { return r4x8.decode(inputBuffer) } -function r4x16_uncompress(inputBuffer) { +export function r4x16_uncompress(inputBuffer: Uint8Array) { return r4x16.decode(inputBuffer) } -function arith_uncompress(inputBuffer) { +export function arith_uncompress(inputBuffer: Uint8Array) { // fix by @cmdcolin for CRAM 3.1 // xref https://github.com/jkbonfield/htscodecs/pull/1/files return new arith().decode(inputBuffer) } -function fqzcomp_uncompress(inputBuffer) { +export function fqzcomp_uncompress(inputBuffer: Uint8Array) { const q_lens = [] return fqzcomp.decode(inputBuffer, q_lens) } -function tok3_uncompress(inputBuffer) { +export function tok3_uncompress(inputBuffer: Uint8Array) { // Returns in string form instead of buffer const out = tok3.decode(inputBuffer, 0, '\0') return Uint8Array.from(Array.from(out).map(letter => letter.charCodeAt(0))) } - -module.exports = { - arith_uncompress: arith_uncompress, - fqzcomp_uncompress: fqzcomp_uncompress, - r4x16_uncompress: r4x16_uncompress, - r4x8_uncompress: r4x8_uncompress, - tok3_uncompress: tok3_uncompress, -} diff --git a/src/htscodecs/iostream.ts b/src/htscodecs/iostream.ts index 7e22d925..4605f614 100644 --- a/src/htscodecs/iostream.ts +++ b/src/htscodecs/iostream.ts @@ -1,3 +1,5 @@ +// @ts-nocheck + /* * Copyright (c) 2019 Genome Research Ltd. * Author(s): James Bonfield @@ -34,8 +36,13 @@ // Turn a buffer into a fake stream with get / put commands. // This enables up to closely match the published pseudocode. export default class IOStream { - constructor(buf, start_pos = 0, size = 0) { - if (size != 0) { + buf: Uint8Array + length: number + pos: number + dataView: DataView + + constructor(buf: Uint8Array, start_pos = 0, size = 0) { + if (size !== 0) { this.buf = new Uint8Array(size) this.length = size } else { @@ -52,20 +59,20 @@ export default class IOStream { return this.pos >= this.length } - ReadData(len) { + ReadData(len: number) { const A = this.buf.slice(this.pos, this.pos + len) this.pos += len return A } ReadByte() { - const b = this.buf[this.pos] + const b = this.buf[this.pos]! this.pos++ return b } ReadChar() { - const b = this.buf[this.pos] + const b = this.buf[this.pos]! this.pos++ return String.fromCharCode(b) } @@ -85,8 +92,9 @@ export default class IOStream { // nul terminated string ReadString() { let s = '' + let b: number do { - var b = this.buf[this.pos++] + b = this.buf[this.pos++]! if (b) { s += String.fromCharCode(b) } @@ -94,24 +102,12 @@ export default class IOStream { return s } - // ReadUint7() { - // // Variable sized unsigned integers - // var i = 0; - // var s = 0; - // do { - // var c = this.ReadByte(); - // i = i | ((c & 0x7f)< +4 bytes i = (i & 0x0f) << 28 i += - (this.buf[this.pos + 0] << 20) + - (this.buf[this.pos + 1] << 12) + - (this.buf[this.pos + 2] << 4) + - (this.buf[this.pos + 3] >> 4) + (this.buf[this.pos + 0]! << 20) + + (this.buf[this.pos + 1]! << 12) + + (this.buf[this.pos + 2]! << 4) + + (this.buf[this.pos + 3]! >> 4) this.pos += 4 // process.stderr.write(" 4i="+i+"\n"); } else if (i >= 0xe0) { // 1110xxxx => +3 bytes i = (i & 0x0f) << 24 i += - (this.buf[this.pos + 0] << 16) + - (this.buf[this.pos + 1] << 8) + - (this.buf[this.pos + 2] << 0) + (this.buf[this.pos + 0]! << 16) + + (this.buf[this.pos + 1]! << 8) + + (this.buf[this.pos + 2]! << 0) this.pos += 3 // process.stderr.write(" 3i="+i+"\n"); } else if (i >= 0xc0) { // 110xxxxx => +2 bytes i = (i & 0x1f) << 16 - i += (this.buf[this.pos + 0] << 8) + (this.buf[this.pos + 1] << 0) + i += (this.buf[this.pos + 0]! << 8) + (this.buf[this.pos + 1]! << 0) this.pos += 2 // process.stderr.write(" 2i="+i+"\n"); } else if (i >= 0x80) { // 10xxxxxx => +1 bytes i = (i & 0x3f) << 8 - i += this.buf[this.pos] + i += this.buf[this.pos]! this.pos++ - // process.stderr.write(" 1i="+i+"\n"); } else { // 0xxxxxxx => +0 bytes } return i } - - // ---------- - // Writing - WriteByte(b) { - this.buf[this.pos++] = b - } - - WriteChar(b) { - this.buf[this.pos++] = b.charCodeAt(0) - } - - WriteString(str) { - for (let i = 0; i < str.length; i++) { - this.buf[this.pos++] = str.charCodeAt(i) - } - this.buf[this.pos++] = 0 - } - - WriteData(buf, len) { - for (let i = 0; i < len; i++) { - this.buf[this.pos++] = buf[i] - } - } - - WriteStream(stream) { - this.WriteData(stream.buf, stream.pos) - } - - WriteUint16(u) { - // this.buf.writeInt16LE(u, this.pos); - this.WriteByte(u & 0xff) - this.WriteByte((u >> 8) & 0xff) - } - - WriteUint32(u) { - this.buf.writeInt32LE(u, this.pos) - this.pos += 4 - } - - // WriteUint7(i) { - // do { - // this.WriteByte((i & 0x7f) | ((i > 0x80) << 7)); - // i >>= 7; - // } while (i > 0); - // } - - WriteUint7(i) { - let s = 0 - let X = i - do { - s += 7 - X >>= 7 - } while (X > 0) - - do { - s -= 7 - this.WriteByte(((i >> s) & 0x7f) + ((s > 0) << 7)) - } while (s > 0) - } - - WriteITF8(i) { - // Horrid, ITF8 is unsigned, but we still write signed into it - if (i < 0) { - i = (1 << 32) + i - } - - if (i <= 0x0000007f) { - // 1 byte - this.buf[this.pos++] = i - } else if (i <= 0x00003fff) { - // 2 bytes - this.buf[this.pos++] = 0x80 | Math.floor(i / 256) - this.buf[this.pos++] = i & 0xff - } else if (i < 0x0001ffff) { - // 3 bytes - this.buf[this.pos++] = 0xc0 | Math.floor(i / 65536) - this.buf[this.pos++] = Math.floor(i / 256) & 0xff - this.buf[this.pos++] = i & 0xff - } else if (i < 0x0fffffff) { - // 4 bytes - this.buf[this.pos++] = 0xe0 | Math.floor(i / 16777216) - this.buf[this.pos++] = Math.floor(i / 65536) & 0xff - this.buf[this.pos++] = Math.floor(i / 256) & 0xff - this.buf[this.pos++] = i & 0xff - } else { - // 5 bytes; oddly using 4.5 bytes - this.buf[this.pos++] = 0xf0 | Math.floor(i / 268435456) - this.buf[this.pos++] = Math.floor(i / 1048576) & 0xff - this.buf[this.pos++] = Math.floor(i / 4096) & 0xff - this.buf[this.pos++] = Math.floor(i / 4) & 0xff - this.buf[this.pos++] = i & 0x0f - } - } - - // ---------- - // Writing from end of buffer going backwards. - // Needed by rANS codec. - WriteByteNeg(b) { - this.buf[--this.pos] = b - } } diff --git a/src/htscodecs/rans.ts b/src/htscodecs/rans.ts index f750d4c4..24c5cf5f 100644 --- a/src/htscodecs/rans.ts +++ b/src/htscodecs/rans.ts @@ -1,3 +1,6 @@ +/* eslint-disable no-var */ +// @ts-nocheck + /* * Copyright (c) 2019-2020 Genome Research Ltd. * Author(s): James Bonfield @@ -80,7 +83,7 @@ function RansRenorm(src, R) { // ---------------------------------------------------------------------- // Main rANS entry function: decodes a compressed src and // returns the uncompressed buffer. -function decode(src) { +export function decode(src) { const stream = new IOStream(src) const order = stream.ReadByte() const n_in = stream.ReadUint32() @@ -245,5 +248,3 @@ function RansDecode1(src, nbytes) { return output } - -module.exports = { decode } diff --git a/src/htscodecs/rans4x16.ts b/src/htscodecs/rans4x16.ts index e5b49d86..cda01ebd 100644 --- a/src/htscodecs/rans4x16.ts +++ b/src/htscodecs/rans4x16.ts @@ -1,3 +1,6 @@ +/* eslint-disable no-var */ +// @ts-nocheck + /* * Copyright (c) 2019,2020 Genome Research Ltd. * Author(s): James Bonfield @@ -468,8 +471,11 @@ function RansDecode1(src, nbytes, N) { // Fast lookup to avoid slow RansGetSymbolFromFreq const C2S = new Array(256) - for (var i = 0; i < 256; i++) // Could do only for symbols in alphabet? - { + for ( + var i = 0; + i < 256; + i++ // Could do only for symbols in alphabet? + ) { C2S[i] = RansBuildC2S(C[i], shift) } diff --git a/src/htscodecs/tok3.ts b/src/htscodecs/tok3.ts index 23ea00b7..458a9331 100644 --- a/src/htscodecs/tok3.ts +++ b/src/htscodecs/tok3.ts @@ -1,3 +1,6 @@ +/* eslint-disable no-var */ +// @ts-nocheck + /* * Copyright (c) 2019 Genome Research Ltd. * Author(s): James Bonfield @@ -37,27 +40,11 @@ // written specification as closely as possible. It is *NOT* // an efficient implementation, but see comments below. +import { concatUint8Array } from '../util' import arith_gen from './arith_gen' import IOStream from './iostream' import * as rans from './rans4x16' -function sum(array) { - let sum = 0 - for (const entry of array) { - sum += entry.length - } - return sum -} -function concatUint8Array(args) { - const mergedArray = new Uint8Array(sum(args)) - let offset = 0 - for (const entry of args) { - mergedArray.set(entry, offset) - offset += entry.length - } - return mergedArray -} - const arith = new arith_gen() const TOK_TYPE = 0 diff --git a/src/util.ts b/src/util.ts new file mode 100644 index 00000000..135fadfa --- /dev/null +++ b/src/util.ts @@ -0,0 +1,16 @@ +function sum(array: Uint8Array[]) { + let sum = 0 + for (const entry of array) { + sum += entry.length + } + return sum +} +export function concatUint8Array(args: Uint8Array[]) { + const mergedArray = new Uint8Array(sum(args)) + let offset = 0 + for (const entry of args) { + mergedArray.set(entry, offset) + offset += entry.length + } + return mergedArray +}