| | |
| |
|
| | |
| | |
| | |
| |
|
| | type ASTNode = ObjectNode | ArrayNode | LiteralNode; |
| |
|
| | interface ObjectNode { |
| | type: 'Object'; |
| | properties: PropertyNode[]; |
| | } |
| |
|
| | interface PropertyNode { |
| | key: string; |
| | value: ASTNode; |
| | } |
| |
|
| | interface ArrayNode { |
| | type: 'Array'; |
| | elements: ASTNode[]; |
| | } |
| |
|
| | interface LiteralNode { |
| | type: 'Literal'; |
| | value: string | number | boolean | null; |
| | } |
| |
|
| | |
| | |
| | |
| |
|
| | enum TokenType { |
| | LBrace, RBrace, LBracket, RBracket, |
| | Colon, Comma, |
| | String, Number, Ident, |
| | EOF |
| | } |
| |
|
| | interface Token { |
| | type: TokenType; |
| | value: string; |
| | pos: number; |
| | } |
| |
|
| | class Tokenizer { |
| | private pos = 0; |
| | private tokens: Token[] = []; |
| |
|
| | constructor(private input: string) { |
| | this.tokenize(); |
| | this.tokens.push({ type: TokenType.EOF, value: '', pos: this.pos }); |
| | } |
| | private tokenize() { |
| | const re = /\s+|("(?:\\.|[^"\\])*"|'(?:\\.|[^'\\])*')|(-?\d+(?:\.\d+)?(?:[eE][+-]?\d+)?)|([{}[\]:,])|([A-Za-z_]\w*)|(.)/gy; |
| | let m: RegExpExecArray | null; |
| | while ((m = re.exec(this.input)) !== null) { |
| | const [raw, str, num, punct, ident, bad] = m; |
| | if (raw.match(/^\s+$/)) continue; |
| | let type: TokenType; |
| | let val: string; |
| | if (str) { type = TokenType.String; val = str; } |
| | else if (num) { type = TokenType.Number; val = num; } |
| | else if (punct){ |
| | switch (punct) { |
| | case '{': type = TokenType.LBrace; break; |
| | case '}': type = TokenType.RBrace; break; |
| | case '[': type = TokenType.LBracket;break; |
| | case ']': type = TokenType.RBracket;break; |
| | case ':': type = TokenType.Colon; break; |
| | case ',': type = TokenType.Comma; break; |
| | default: continue; |
| | } |
| | val = punct; |
| | } |
| | else if (ident){ type = TokenType.Ident; val = ident; } |
| | else if (bad) { continue; } |
| | else { continue; } |
| | this.tokens.push({ type, value: val, pos: m.index }); |
| | } |
| | } |
| |
|
| | peek(offset = 0): Token { |
| | return this.tokens[this.pos + offset] || this.tokens[this.tokens.length - 1]; |
| | } |
| | next(): Token { |
| | return this.tokens[this.pos++]; |
| | } |
| | } |
| |
|
| | |
| | |
| | |
| |
|
| | class Parser { |
| | private warnings: string[] = []; |
| |
|
| | constructor(private tz: Tokenizer) {} |
| |
|
| | parse(): ASTNode { |
| | const node = this.parseValue(); |
| | if (this.tz.peek().type !== TokenType.EOF) { |
| | this.warnings.push(`Extra data at pos ${this.tz.peek().pos}`); |
| | } |
| | if (this.warnings.length) { |
| | console.warn('Parse warnings:\n ' + this.warnings.join('\n ')); |
| | } |
| | return node; |
| | } |
| |
|
| | private parseValue(): ASTNode { |
| | const tok = this.tz.peek(); |
| | switch (tok.type) { |
| | case TokenType.LBrace: return this.parseObject(); |
| | case TokenType.LBracket: return this.parseArray(); |
| | case TokenType.String: return this.parseString(); |
| | case TokenType.Number: return this.parseNumber(); |
| | case TokenType.Ident: return this.parseIdent(); |
| | default: |
| | this.warnings.push(`Unexpected token '${tok.value}' at pos ${tok.pos}, inserting null`); |
| | this.tz.next(); |
| | return { type: 'Literal', value: null }; |
| | } |
| | } |
| |
|
| | private parseObject(): ObjectNode { |
| | this.tz.next(); |
| | const props: PropertyNode[] = []; |
| | while (this.tz.peek().type !== TokenType.RBrace && |
| | this.tz.peek().type !== TokenType.EOF) { |
| | if (this.tz.peek().type === TokenType.Comma) { |
| | this.tz.next(); |
| | continue; |
| | } |
| | |
| | let keyTok = this.tz.peek(); |
| | let key: string; |
| | if (keyTok.type === TokenType.String) { |
| | key = this.unquote(this.tz.next().value); |
| | } else if (keyTok.type === TokenType.Ident) { |
| | key = this.tz.next().value; |
| | } else { |
| | this.warnings.push(`Expected property name at pos ${keyTok.pos}, skipping token`); |
| | this.tz.next(); |
| | continue; |
| | } |
| | |
| | if (this.tz.peek().type === TokenType.Colon) { |
| | this.tz.next(); |
| | } else { |
| | this.warnings.push(`Missing ':' after key "${key}" at pos ${keyTok.pos}`); |
| | } |
| | |
| | const val = this.parseValue(); |
| | props.push({ key, value: val }); |
| | |
| | if (this.tz.peek().type === TokenType.Comma) { |
| | this.tz.next(); |
| | } |
| | } |
| | if (this.tz.peek().type === TokenType.RBrace) { |
| | this.tz.next(); |
| | } else { |
| | this.warnings.push(`Unclosed '{'`); |
| | } |
| | return { type: 'Object', properties: props }; |
| | } |
| |
|
| | private parseArray(): ArrayNode { |
| | this.tz.next(); |
| | const elems: ASTNode[] = []; |
| | while (this.tz.peek().type !== TokenType.RBracket && |
| | this.tz.peek().type !== TokenType.EOF) { |
| | if (this.tz.peek().type === TokenType.Comma) { |
| | this.tz.next(); |
| | continue; |
| | } |
| | elems.push(this.parseValue()); |
| | } |
| | if (this.tz.peek().type === TokenType.RBracket) { |
| | this.tz.next(); |
| | } else { |
| | this.warnings.push(`Unclosed '['`); |
| | } |
| | return { type: 'Array', elements: elems }; |
| | } |
| |
|
| | private parseString(): LiteralNode { |
| | const raw = this.tz.next().value; |
| | return { type: 'Literal', value: this.unquote(raw) }; |
| | } |
| |
|
| | private parseNumber(): LiteralNode { |
| | const num = this.tz.next().value; |
| | return num.includes('.') || /[eE]/.test(num) |
| | ? { type: 'Literal', value: parseFloat(num) } |
| | : { type: 'Literal', value: parseInt(num, 10) }; |
| | } |
| |
|
| | private parseIdent(): LiteralNode { |
| | const id = this.tz.next().value.toLowerCase(); |
| | if (id === 'true') return { type: 'Literal', value: true }; |
| | if (id === 'false') return { type: 'Literal', value: false }; |
| | if (id === 'null') return { type: 'Literal', value: null }; |
| | |
| | return { type: 'Literal', value: id }; |
| | } |
| |
|
| | private unquote(str: string): string { |
| | |
| | return JSON.parse( |
| | '"' + |
| | str |
| | .slice(1, -1) |
| | .replace(/\\/g, '\\\\') |
| | .replace(/"/g, '\\"') |
| | .replace(/\n/g, '\\n') |
| | .replace(/\r/g, '\\r') |
| | .replace(/\t/g, '\\t') + |
| | '"' |
| | ); |
| | } |
| | } |
| |
|
| | |
| | |
| | |
| |
|
| | function evalAST(node: ASTNode): any { |
| | switch (node.type) { |
| | case 'Literal': |
| | return node.value; |
| | case 'Array': |
| | return node.elements.map(evalAST); |
| | case 'Object': |
| | const obj: Record<string, any> = {}; |
| | for (const p of node.properties) { |
| | obj[p.key] = evalAST(p.value); |
| | } |
| | return obj; |
| | } |
| | } |
| |
|
| | |
| | |
| | |
| |
|
| | export function parseDirtyJSON(raw: string): any { |
| | |
| | raw = raw.replace(/[\u0000-\u001F]/g, c => |
| | `\\u${c.charCodeAt(0).toString(16).padStart(4, '0')}` |
| | ); |
| |
|
| | const tz = new Tokenizer(raw); |
| | const parser = new Parser(tz); |
| | const ast = parser.parse(); |
| | return evalAST(ast); |
| | } |
| |
|