Skip to content

Commit

Permalink
Merge pull request #2 from patrickhousley/fix/parser-control-chars
Browse files Browse the repository at this point in the history
fix(parser): parse control characters in string
  • Loading branch information
patrickhousley authored Jul 8, 2024
2 parents 3d0e840 + 51022cb commit 6e9dbcd
Show file tree
Hide file tree
Showing 3 changed files with 132 additions and 23 deletions.
1 change: 1 addition & 0 deletions packages/parser/src/lib/charset.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ export enum Token {

// String tokens
DOUBLE_QUOTE = 0x22, // "
BACKWARD_SLASH = 0x5c, // \
}

export enum NumberValueLiteralToken {
Expand Down
115 changes: 112 additions & 3 deletions packages/parser/src/lib/memory-parser.spec.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { faker } from '@faker-js/faker'
import { MemoryParser } from './memory-parser'
import { AllowedWhitespaceToken } from './charset'
import { AllowedWhitespaceToken, Token } from './charset'
import { ParserError } from './parser-error'

it('should stop processing when passed true in subsequent call to next', () => {
Expand Down Expand Up @@ -133,7 +133,7 @@ describe('JSON opening', () => {
})
})

describe('string literal', () => {
describe('strings', () => {
it('should properly close a string literal', () => {
const parser = new MemoryParser('""')
const iterator = parser.read()
Expand Down Expand Up @@ -230,6 +230,34 @@ describe('string literal', () => {
expect((err as ParserError).message).toEqual(`Unterminated string in JSON at position 4`)
}
})

it('should support control characters in strings %s', () => {
const input = '[]{}:,"\\'
const expected = [91, 93, 123, 125, 58, 44, 92, 34, 92, 92]
const parser = new MemoryParser(JSON.stringify(input))
const iterator = parser.read()

expect(iterator.next()).toEqual({
done: false,
value: { event: 'STRING_START' },
})
for (let index = 0; index < expected.length; index++) {
expect(iterator.next()).toEqual({
done: false,
value: {
event: 'CHARACTER',
charCode: expected[index],
},
})
}
expect(iterator.next()).toEqual({
done: false,
value: { event: 'STRING_END' },
})
expect(iterator.next()).toEqual({
done: true,
})
})
})

describe('value literals', () => {
Expand Down Expand Up @@ -742,12 +770,13 @@ describe('arrays', () => {
iterator.next()
iterator.next()
iterator.next()
iterator.next()

try {
expect(iterator.next()).toThrow()
} catch (err) {
expect(err).toBeInstanceOf(ParserError)
expect((err as ParserError).message).toEqual('Unterminated string in JSON at position 5')
expect((err as ParserError).message).toEqual('Unterminated string in JSON at position 6')
}
})

Expand All @@ -769,6 +798,42 @@ describe('arrays', () => {
expect((err as ParserError).message).toEqual(`Unexpected token '${input}' at position 1`)
}
})

it('should support control characters in strings %s', () => {
const input = '[]{}:,"\\'
const expected = [91, 93, 123, 125, 58, 44, 92, 34, 92, 92]
const parser = new MemoryParser(JSON.stringify([input]))
const iterator = parser.read()

expect(iterator.next()).toEqual({
done: false,
value: { event: 'ARRAY_START' },
})
expect(iterator.next()).toEqual({
done: false,
value: { event: 'STRING_START' },
})
for (let index = 0; index < expected.length; index++) {
expect(iterator.next()).toEqual({
done: false,
value: {
event: 'CHARACTER',
charCode: expected[index],
},
})
}
expect(iterator.next()).toEqual({
done: false,
value: { event: 'STRING_END' },
})
expect(iterator.next()).toEqual({
done: false,
value: { event: 'ARRAY_END' },
})
expect(iterator.next()).toEqual({
done: true,
})
})
})

describe('objects', () => {
Expand Down Expand Up @@ -1083,4 +1148,48 @@ describe('objects', () => {
}
},
)

it('should support control characters in strings %s', () => {
const input = '[]{}:,"\\'
const expected = [91, 93, 123, 125, 58, 44, 92, 34, 92, 92]
const parser = new MemoryParser(JSON.stringify({ foo: input }))
const iterator = parser.read()

expect(iterator.next()).toEqual({
done: false,
value: { event: 'OBJECT_START' },
})

iterator.next()
iterator.next()
iterator.next()
iterator.next()
iterator.next()
iterator.next()

expect(iterator.next()).toEqual({
done: false,
value: { event: 'STRING_START' },
})
for (let index = 0; index < expected.length; index++) {
expect(iterator.next()).toEqual({
done: false,
value: {
event: 'CHARACTER',
charCode: expected[index],
},
})
}
expect(iterator.next()).toEqual({
done: false,
value: { event: 'STRING_END' },
})
expect(iterator.next()).toEqual({
done: false,
value: { event: 'OBJECT_END' },
})
expect(iterator.next()).toEqual({
done: true,
})
})
})
39 changes: 19 additions & 20 deletions packages/parser/src/lib/memory-parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ export class MemoryParser extends Parser {
*read(): Generator<ParserEvent, void, boolean> {
const eventStack: ParserEvent[] = []
let openingEvent: ParserEvent | undefined | void
let stringEscapeCharacterSeen = false
let done = false
let inputIndex = 0
let buffer = this.input.slice(0, this.options.bufferSize)
Expand Down Expand Up @@ -81,7 +82,7 @@ export class MemoryParser extends Parser {
continue
}

if (charCode === Token.COMMA) {
if (charCode === Token.COMMA && !this.#isInString(eventStack)) {
if (eventStack[eventStack.length - 1]?.event === 'VALUE_LITERAL_START') {
const event: ValueLiteralEndParserEvent = { event: 'VALUE_LITERAL_END' }
eventStack.pop()
Expand All @@ -97,21 +98,21 @@ export class MemoryParser extends Parser {
continue
}

if (charCode === Token.COLON) {
if (charCode === Token.COLON && !this.#isInString(eventStack)) {
const event: KeyValueSplitParserEvent = { event: 'KEY_VALUE_SPLIT' }
eventStack.push(event)
done = yield event
continue
}

if (charCode === Token.LEFT_SQUARE_BRACKET) {
if (charCode === Token.LEFT_SQUARE_BRACKET && !this.#isInString(eventStack)) {
const event: ArrayStartParserEvent = { event: 'ARRAY_START' }
eventStack.push(event)
done = yield event
continue
}

if (charCode === Token.RIGHT_SQUARE_BRACKET) {
if (charCode === Token.RIGHT_SQUARE_BRACKET && !this.#isInString(eventStack)) {
if (eventStack[eventStack.length - 1]?.event === 'VALUE_LITERAL_START') {
const event: ValueLiteralEndParserEvent = { event: 'VALUE_LITERAL_END' }
eventStack.pop()
Expand All @@ -129,14 +130,14 @@ export class MemoryParser extends Parser {
continue
}

if (charCode === Token.LEFT_CURLY_BRACKET) {
if (charCode === Token.LEFT_CURLY_BRACKET && !this.#isInString(eventStack)) {
const event: ObjectStartParserEvent = { event: 'OBJECT_START' }
eventStack.push(event)
done = yield event
continue
}

if (charCode === Token.RIGHT_CURLY_BRACKET) {
if (charCode === Token.RIGHT_CURLY_BRACKET && !this.#isInString(eventStack)) {
if (eventStack[eventStack.length - 1]?.event === 'VALUE_LITERAL_START') {
const event: ValueLiteralEndParserEvent = { event: 'VALUE_LITERAL_END' }
eventStack.pop()
Expand Down Expand Up @@ -170,7 +171,7 @@ export class MemoryParser extends Parser {
continue
}

if (charCode === Token.DOUBLE_QUOTE) {
if (charCode === Token.DOUBLE_QUOTE && (!this.#isInString(eventStack) || !stringEscapeCharacterSeen)) {
if (
eventStack[eventStack.length - 1]?.event === 'KEY_VALUE_SPLIT' &&
eventStack[eventStack.length - 2]?.event === 'OBJECT_START'
Expand All @@ -179,6 +180,7 @@ export class MemoryParser extends Parser {
eventStack.pop()
}
if (eventStack[eventStack.length - 1]?.event === 'STRING_START') {
stringEscapeCharacterSeen = false
eventStack.pop()
done = yield { event: 'STRING_END' }
continue
Expand All @@ -204,7 +206,11 @@ export class MemoryParser extends Parser {
}

// If processing gets to this point, just return character events
// Most likely processing values: true, false, null
if (charCode === Token.BACKWARD_SLASH && this.#isInString(eventStack) && !stringEscapeCharacterSeen) {
stringEscapeCharacterSeen = true
} else {
stringEscapeCharacterSeen = false
}
done = yield { event: 'CHARACTER', charCode }
}

Expand Down Expand Up @@ -237,17 +243,6 @@ export class MemoryParser extends Parser {
throw new ParserError('Unexpected end of JSON input')
}
}

// if (eventStack[eventStack.length - 1]?.event === 'STRING_START') {
// throw new ParserError('String never closed.')
// } else if (eventStack[eventStack.length - 1]?.event === 'ARRAY_START') {
// throw new ParserError('Array never closed.')
// } else if (
// eventStack[eventStack.length - 1]?.event === 'NUMBER_START' ||
// eventStack[eventStack.length - 1]?.event === 'NUMBER_FLOAT_SPLITTER_CHARACTER'
// ) {
// yield { event: 'NUMBER_END' }
// }
}
}

Expand Down Expand Up @@ -292,7 +287,7 @@ export class MemoryParser extends Parser {
}

#checkForInvalidToken(charCode: number, eventStack: ParserEvent[]): void {
if (eventStack.length === 0) {
if (eventStack.length === 0 || this.#isInString(eventStack)) {
// Let the parser logic process
return
}
Expand Down Expand Up @@ -353,4 +348,8 @@ export class MemoryParser extends Parser {
throw new SyntaxError("Expected property name or '}' in JSON")
}
}

#isInString(eventStack: ParserEvent[]): boolean {
return eventStack.map((e) => e.event).includes('STRING_START')
}
}

0 comments on commit 6e9dbcd

Please sign in to comment.