Skip to content

Commit

Permalink
Updates
Browse files Browse the repository at this point in the history
  • Loading branch information
cmdcolin committed Dec 14, 2024
1 parent 9864902 commit 1382a6f
Show file tree
Hide file tree
Showing 8 changed files with 55 additions and 203 deletions.
5 changes: 3 additions & 2 deletions src/cramFile/file.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@ import QuickLRU from 'quick-lru'
import { XzReadableStream } from 'xz-decompress'

import { CramMalformedError, CramUnimplementedError } from '../errors'
import htscodecs from '../htscodecs'
import * as htscodecs from '../htscodecs'
import { open } from '../io'
import ransuncompress from '../rans'
import { parseHeaderText } from '../sam'
import { unzip } from '../unzip'
import { concatUint8Array } from '../util'
import CramContainer from './container'
import CramRecord from './record'
import {
Expand All @@ -17,7 +18,7 @@ import {
cramFileDefinition,
getSectionParsers,
} from './sectionParsers'
import { concatUint8Array, parseItem, tinyMemoize } from './util'
import { parseItem, tinyMemoize } from './util'

import type { GenericFilehandle } from 'generic-filehandle2'

Expand Down
17 changes: 0 additions & 17 deletions src/cramFile/util.ts
Original file line number Diff line number Diff line change
Expand Up @@ -179,20 +179,3 @@ export function tinyMemoize(_class: any, methodName: any) {
export function sequenceMD5(seq: string) {
return md5(seq.toUpperCase().replaceAll(/[^\u0021-\u007e]/g, ''))
}

export function sum(array: Uint8Array[]) {
let sum = 0
for (const entry of array) {
sum += entry.length
}
return sum
}
export function concatUint8Array(args: Uint8Array[]) {
const mergedArray = new Uint8Array(sum(args))
let offset = 0
for (const entry of args) {
mergedArray.set(entry, offset)
offset += entry.length
}
return mergedArray
}
23 changes: 4 additions & 19 deletions src/htscodecs/arith_gen.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,23 +36,7 @@ import bzip2 from 'bzip2'
import RangeCoder from './arith_sh'
import ByteModel from './byte_model'
import IOStream from './iostream'

function sum(array) {
let sum = 0
for (const entry of array) {
sum += entry.length
}
return sum
}
function concatUint8Array(args) {
const mergedArray = new Uint8Array(sum(args))
let offset = 0
for (const entry of args) {
mergedArray.set(entry, offset)
offset += entry.length
}
return mergedArray
}
import { concatUint8Array } from '../util'

const ARITH_ORDER = 1
const ARITH_EXT = 4
Expand All @@ -63,12 +47,13 @@ const ARITH_RLE = 64
const ARITH_PACK = 128

export default class RangeCoderGen {
decode(src) {
stream: IOStream
decode(src: Uint8Array) {
this.stream = new IOStream(src)
return this.decodeStream(this.stream)
}

decodeStream(stream, n_out = 0) {
decodeStream(stream: IOStream, n_out = 0) {
const flags = this.stream.ReadByte()
if (!(flags & ARITH_NOSIZE)) {
n_out = this.stream.ReadUint7()
Expand Down
6 changes: 2 additions & 4 deletions src/htscodecs/fqzcomp.ts
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,7 @@ function decode_fqz_new_record(src, rc, gparams, model, state, rev) {
state.rec++
}

function decode_fqz(src, q_lens) {
function decode_fqz(src: IOStream, q_lens: number) {
// Decode parameter block
const n_out = src.ReadUint7()
const gparams = decode_fqz_params(src)
Expand Down Expand Up @@ -384,10 +384,8 @@ function reverse_qualities(qual, qual_len, rev, len) {
}
}

export function decode(src, q_lens) {
export function decode(src: Uint8Array, q_lens) {
const stream = new IOStream(src)

// var n_out = stream.ReadUint32(); stream.ReadUint32(); // move to main

return decode_fqz(stream, q_lens)
}
18 changes: 5 additions & 13 deletions src/htscodecs/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,35 +43,27 @@ import * as r4x8 from './rans'
import * as r4x16 from './rans4x16'
import * as tok3 from './tok3'

function r4x8_uncompress(inputBuffer, outputBuffer) {
export function r4x8_uncompress(inputBuffer: Uint8Array) {
return r4x8.decode(inputBuffer)
}

function r4x16_uncompress(inputBuffer) {
export function r4x16_uncompress(inputBuffer: Uint8Array) {
return r4x16.decode(inputBuffer)
}

function arith_uncompress(inputBuffer) {
export function arith_uncompress(inputBuffer: Uint8Array) {
// fix by @cmdcolin for CRAM 3.1
// xref https://github.com/jkbonfield/htscodecs/pull/1/files
return new arith().decode(inputBuffer)
}

function fqzcomp_uncompress(inputBuffer) {
export function fqzcomp_uncompress(inputBuffer: Uint8Array) {
const q_lens = []
return fqzcomp.decode(inputBuffer, q_lens)
}

function tok3_uncompress(inputBuffer) {
export function tok3_uncompress(inputBuffer: Uint8Array) {
// Returns in string form instead of buffer
const out = tok3.decode(inputBuffer, 0, '\0')
return Uint8Array.from(Array.from(out).map(letter => letter.charCodeAt(0)))
}

module.exports = {
arith_uncompress: arith_uncompress,
fqzcomp_uncompress: fqzcomp_uncompress,
r4x16_uncompress: r4x16_uncompress,
r4x8_uncompress: r4x8_uncompress,
tok3_uncompress: tok3_uncompress,
}
155 changes: 24 additions & 131 deletions src/htscodecs/iostream.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,13 @@
// Turn a buffer into a fake stream with get / put commands.
// This enables up to closely match the published pseudocode.
export default class IOStream {
constructor(buf, start_pos = 0, size = 0) {
if (size != 0) {
buf: Uint8Array
length: number
pos: number
dataView: DataView

constructor(buf: Uint8Array, start_pos = 0, size = 0) {
if (size !== 0) {
this.buf = new Uint8Array(size)
this.length = size
} else {
Expand All @@ -52,20 +57,20 @@ export default class IOStream {
return this.pos >= this.length
}

ReadData(len) {
ReadData(len: number) {
const A = this.buf.slice(this.pos, this.pos + len)
this.pos += len
return A
}

ReadByte() {
const b = this.buf[this.pos]
const b = this.buf[this.pos]!
this.pos++
return b
}

ReadChar() {
const b = this.buf[this.pos]
const b = this.buf[this.pos]!
this.pos++
return String.fromCharCode(b)
}
Expand All @@ -85,41 +90,30 @@ export default class IOStream {
// nul terminated string
ReadString() {
let s = ''
let b: number
do {
var b = this.buf[this.pos++]
b = this.buf[this.pos++]!
if (b) {
s += String.fromCharCode(b)
}
} while (b)
return s
}

// ReadUint7() {
// // Variable sized unsigned integers
// var i = 0;
// var s = 0;
// do {
// var c = this.ReadByte();
// i = i | ((c & 0x7f)<<s);
// s += 7;
// } while ((c & 0x80))
//
// return i;
// }

ReadUint7() {
// Variable sized unsigned integers
let i = 0
let c: number
do {
var c = this.ReadByte()
c = this.ReadByte()
i = (i << 7) | (c & 0x7f)
} while (c & 0x80)

return i
}

ReadITF8() {
let i = this.buf[this.pos]
let i = this.buf[this.pos]!
this.pos++

// process.stderr.write("i="+i+"\n");
Expand All @@ -128,137 +122,36 @@ export default class IOStream {
// 1111xxxx => +4 bytes
i = (i & 0x0f) << 28
i +=
(this.buf[this.pos + 0] << 20) +
(this.buf[this.pos + 1] << 12) +
(this.buf[this.pos + 2] << 4) +
(this.buf[this.pos + 3] >> 4)
(this.buf[this.pos + 0]! << 20) +
(this.buf[this.pos + 1]! << 12) +
(this.buf[this.pos + 2]! << 4) +
(this.buf[this.pos + 3]! >> 4)
this.pos += 4
// process.stderr.write(" 4i="+i+"\n");
} else if (i >= 0xe0) {
// 1110xxxx => +3 bytes
i = (i & 0x0f) << 24
i +=
(this.buf[this.pos + 0] << 16) +
(this.buf[this.pos + 1] << 8) +
(this.buf[this.pos + 2] << 0)
(this.buf[this.pos + 0]! << 16) +
(this.buf[this.pos + 1]! << 8) +
(this.buf[this.pos + 2]! << 0)
this.pos += 3
// process.stderr.write(" 3i="+i+"\n");
} else if (i >= 0xc0) {
// 110xxxxx => +2 bytes
i = (i & 0x1f) << 16
i += (this.buf[this.pos + 0] << 8) + (this.buf[this.pos + 1] << 0)
i += (this.buf[this.pos + 0]! << 8) + (this.buf[this.pos + 1]! << 0)
this.pos += 2
// process.stderr.write(" 2i="+i+"\n");
} else if (i >= 0x80) {
// 10xxxxxx => +1 bytes
i = (i & 0x3f) << 8
i += this.buf[this.pos]
i += this.buf[this.pos]!
this.pos++
// process.stderr.write(" 1i="+i+"\n");
} else {
// 0xxxxxxx => +0 bytes
}

return i
}

// ----------
// Writing
WriteByte(b) {
this.buf[this.pos++] = b
}

WriteChar(b) {
this.buf[this.pos++] = b.charCodeAt(0)
}

WriteString(str) {
for (let i = 0; i < str.length; i++) {
this.buf[this.pos++] = str.charCodeAt(i)
}
this.buf[this.pos++] = 0
}

WriteData(buf, len) {
for (let i = 0; i < len; i++) {
this.buf[this.pos++] = buf[i]
}
}

WriteStream(stream) {
this.WriteData(stream.buf, stream.pos)
}

WriteUint16(u) {
// this.buf.writeInt16LE(u, this.pos);
this.WriteByte(u & 0xff)
this.WriteByte((u >> 8) & 0xff)
}

WriteUint32(u) {
this.buf.writeInt32LE(u, this.pos)
this.pos += 4
}

// WriteUint7(i) {
// do {
// this.WriteByte((i & 0x7f) | ((i > 0x80) << 7));
// i >>= 7;
// } while (i > 0);
// }

WriteUint7(i) {
let s = 0
let X = i
do {
s += 7
X >>= 7
} while (X > 0)

do {
s -= 7
this.WriteByte(((i >> s) & 0x7f) + ((s > 0) << 7))
} while (s > 0)
}

WriteITF8(i) {
// Horrid, ITF8 is unsigned, but we still write signed into it
if (i < 0) {
i = (1 << 32) + i
}

if (i <= 0x0000007f) {
// 1 byte
this.buf[this.pos++] = i
} else if (i <= 0x00003fff) {
// 2 bytes
this.buf[this.pos++] = 0x80 | Math.floor(i / 256)
this.buf[this.pos++] = i & 0xff
} else if (i < 0x0001ffff) {
// 3 bytes
this.buf[this.pos++] = 0xc0 | Math.floor(i / 65536)
this.buf[this.pos++] = Math.floor(i / 256) & 0xff
this.buf[this.pos++] = i & 0xff
} else if (i < 0x0fffffff) {
// 4 bytes
this.buf[this.pos++] = 0xe0 | Math.floor(i / 16777216)
this.buf[this.pos++] = Math.floor(i / 65536) & 0xff
this.buf[this.pos++] = Math.floor(i / 256) & 0xff
this.buf[this.pos++] = i & 0xff
} else {
// 5 bytes; oddly using 4.5 bytes
this.buf[this.pos++] = 0xf0 | Math.floor(i / 268435456)
this.buf[this.pos++] = Math.floor(i / 1048576) & 0xff
this.buf[this.pos++] = Math.floor(i / 4096) & 0xff
this.buf[this.pos++] = Math.floor(i / 4) & 0xff
this.buf[this.pos++] = i & 0x0f
}
}

// ----------
// Writing from end of buffer going backwards.
// Needed by rANS codec.
WriteByteNeg(b) {
this.buf[--this.pos] = b
}
}
Loading

0 comments on commit 1382a6f

Please sign in to comment.