/*! Copyright (c) 2011, Lloyd Hilaiel, ISC License */ /* * This is the JSONSelect reference implementation, in javascript. This * code is designed to run under node.js or in a browser. In the former * case, the "public API" is exposed as properties on the `export` object, * in the latter, as properties on `window.JSONSelect`. That API is thus: * * Selector formating and parameter escaping: * * Anywhere where a string selector is selected, it may be followed by an * optional array of values. When provided, they will be escaped and * inserted into the selector string properly escaped. i.e.: * * .match(':has(?)', [ 'foo' ], {}) * * would result in the seclector ':has("foo")' being matched against {}. * * This feature makes dynamically generated selectors more readable. * * .match(selector, [ values ], object) * * Parses and "compiles" the selector, then matches it against the object * argument. Matches are returned in an array. Throws an error when * there's a problem parsing the selector. * * .forEach(selector, [ values ], object, callback) * * Like match, but rather than returning an array, invokes the provided * callback once per match as the matches are discovered. * * .compile(selector, [ values ]) * * Parses the selector and compiles it to an internal form, and returns * an object which contains the compiled selector and has two properties: * `match` and `forEach`. These two functions work identically to the * above, except they do not take a selector as an argument and instead * use the compiled selector. * * For cases where a complex selector is repeatedly used, this method * should be faster as it will avoid recompiling the selector each time. */ (function(exports) { var // localize references toString = Object.prototype.toString; function jsonParse(str) { try { if(JSON && JSON.parse){ return JSON.parse(str); } return (new Function("return " + str))(); } catch(e) { te("ijs", e.message); } } // emitted error codes. var errorCodes = { "bop": "binary operator expected", "ee": "expression expected", "epex": "closing paren expected ')'", "ijs": "invalid json string", "mcp": "missing closing paren", "mepf": "malformed expression in pseudo-function", "mexp": "multiple expressions not allowed", "mpc": "multiple pseudo classes (:xxx) not allowed", "nmi": "multiple ids not allowed", "pex": "opening paren expected '('", "se": "selector expected", "sex": "string expected", "sra": "string required after '.'", "uc": "unrecognized char", "ucp": "unexpected closing paren", "ujs": "unclosed json string", "upc": "unrecognized pseudo class" }; // throw an error message function te(ec, context) { throw new Error(errorCodes[ec] + ( context && " in '" + context + "'")); } // THE LEXER var toks = { psc: 1, // pseudo class psf: 2, // pseudo class function typ: 3, // type str: 4, // string ide: 5 // identifiers (or "classes", stuff after a dot) }; // The primary lexing regular expression in jsonselect var pat = new RegExp( "^(?:" + // (1) whitespace "([\\r\\n\\t\\ ]+)|" + // (2) one-char ops "([~*,>\\)\\(])|" + // (3) types names "(string|boolean|null|array|object|number)|" + // (4) pseudo classes "(:(?:root|first-child|last-child|only-child))|" + // (5) pseudo functions "(:(?:nth-child|nth-last-child|has|expr|val|contains))|" + // (6) bogusly named pseudo something or others "(:\\w+)|" + // (7 & 8) identifiers and JSON strings "(?:(\\.)?(\\\"(?:[^\\\\\\\"]|\\\\[^\\\"])*\\\"))|" + // (8) bogus JSON strings missing a trailing quote "(\\\")|" + // (9) identifiers (unquoted) "\\.((?:[_a-zA-Z]|[^\\0-\\0177]|\\\\[^\\r\\n\\f0-9a-fA-F])(?:[_a-zA-Z0-9\\-]|[^\\u0000-\\u0177]|(?:\\\\[^\\r\\n\\f0-9a-fA-F]))*)" + ")" ); // A regular expression for matching "nth expressions" (see grammar, what :nth-child() eats) var nthPat = /^\s*\(\s*(?:([+\-]?)([0-9]*)n\s*(?:([+\-])\s*([0-9]))?|(odd|even)|([+\-]?[0-9]+))\s*\)/; function lex(str, off) { if (!off) off = 0; var m = pat.exec(str.substr(off)); if (!m) return undefined; off+=m[0].length; var a; if (m[1]) a = [off, " "]; else if (m[2]) a = [off, m[0]]; else if (m[3]) a = [off, toks.typ, m[0]]; else if (m[4]) a = [off, toks.psc, m[0]]; else if (m[5]) a = [off, toks.psf, m[0]]; else if (m[6]) te("upc", str); else if (m[8]) a = [off, m[7] ? toks.ide : toks.str, jsonParse(m[8])]; else if (m[9]) te("ujs", str); else if (m[10]) a = [off, toks.ide, m[10].replace(/\\([^\r\n\f0-9a-fA-F])/g,"$1")]; return a; } // THE EXPRESSION SUBSYSTEM var exprPat = new RegExp( // skip and don't capture leading whitespace "^\\s*(?:" + // (1) simple vals "(true|false|null)|" + // (2) numbers "(-?\\d+(?:\\.\\d*)?(?:[eE][+\\-]?\\d+)?)|" + // (3) strings "(\"(?:[^\\]|\\[^\"])*\")|" + // (4) the 'x' value placeholder "(x)|" + // (5) binops "(&&|\\|\\||[\\$\\^<>!\\*]=|[=+\\-*/%<>])|" + // (6) parens "([\\(\\)])" + ")" ); function is(o, t) { return typeof o === t; } var operators = { '*': [ 9, function(lhs, rhs) { return lhs * rhs; } ], '/': [ 9, function(lhs, rhs) { return lhs / rhs; } ], '%': [ 9, function(lhs, rhs) { return lhs % rhs; } ], '+': [ 7, function(lhs, rhs) { return lhs + rhs; } ], '-': [ 7, function(lhs, rhs) { return lhs - rhs; } ], '<=': [ 5, function(lhs, rhs) { return is(lhs, 'number') && is(rhs, 'number') && lhs <= rhs; } ], '>=': [ 5, function(lhs, rhs) { return is(lhs, 'number') && is(rhs, 'number') && lhs >= rhs; } ], '$=': [ 5, function(lhs, rhs) { return is(lhs, 'string') && is(rhs, 'string') && lhs.lastIndexOf(rhs) === lhs.length - rhs.length; } ], '^=': [ 5, function(lhs, rhs) { return is(lhs, 'string') && is(rhs, 'string') && lhs.indexOf(rhs) === 0; } ], '*=': [ 5, function(lhs, rhs) { return is(lhs, 'string') && is(rhs, 'string') && lhs.indexOf(rhs) !== -1; } ], '>': [ 5, function(lhs, rhs) { return is(lhs, 'number') && is(rhs, 'number') && lhs > rhs; } ], '<': [ 5, function(lhs, rhs) { return is(lhs, 'number') && is(rhs, 'number') && lhs < rhs; } ], '=': [ 3, function(lhs, rhs) { return lhs === rhs; } ], '!=': [ 3, function(lhs, rhs) { return lhs !== rhs; } ], '&&': [ 2, function(lhs, rhs) { return lhs && rhs; } ], '||': [ 1, function(lhs, rhs) { return lhs || rhs; } ] }; function exprLex(str, off) { var v, m = exprPat.exec(str.substr(off)); if (m) { off += m[0].length; v = m[1] || m[2] || m[3] || m[5] || m[6]; if (m[1] || m[2] || m[3]) return [off, 0, jsonParse(v)]; else if (m[4]) return [off, 0, undefined]; return [off, v]; } } function exprParse2(str, off) { if (!off) off = 0; // first we expect a value or a '(' var l = exprLex(str, off), lhs; if (l && l[1] === '(') { lhs = exprParse2(str, l[0]); var p = exprLex(str, lhs[0]); if (!p || p[1] !== ')') te('epex', str); off = p[0]; lhs = [ '(', lhs[1] ]; } else if (!l || (l[1] && l[1] != 'x')) { te("ee", str + " - " + ( l[1] && l[1] )); } else { lhs = ((l[1] === 'x') ? undefined : l[2]); off = l[0]; } // now we expect a binary operator or a ')' var op = exprLex(str, off); if (!op || op[1] == ')') return [off, lhs]; else if (op[1] == 'x' || !op[1]) { te('bop', str + " - " + ( op[1] && op[1] )); } // tail recursion to fetch the rhs expression var rhs = exprParse2(str, op[0]); off = rhs[0]; rhs = rhs[1]; // and now precedence! how shall we put everything together? var v; if (typeof rhs !== 'object' || rhs[0] === '(' || operators[op[1]][0] < operators[rhs[1]][0] ) { v = [lhs, op[1], rhs]; } else { v = rhs; while (typeof rhs[0] === 'object' && rhs[0][0] != '(' && operators[op[1]][0] >= operators[rhs[0][1]][0]) { rhs = rhs[0]; } rhs[0] = [lhs, op[1], rhs[0]]; } return [off, v]; } function exprParse(str, off) { function deparen(v) { if (typeof v !== 'object' || v === null) return v; else if (v[0] === '(') return deparen(v[1]); else return [deparen(v[0]), v[1], deparen(v[2])]; } var e = exprParse2(str, off ? off : 0); return [e[0], deparen(e[1])]; } function exprEval(expr, x) { if (expr === undefined) return x; else if (expr === null || typeof expr !== 'object') { return expr; } var lhs = exprEval(expr[0], x), rhs = exprEval(expr[2], x); return operators[expr[1]][1](lhs, rhs); } // THE PARSER function parse(str, off, nested, hints) { if (!nested) hints = {}; var a = [], am, readParen; if (!off) off = 0; while (true) { var s = parse_selector(str, off, hints); a.push(s[1]); s = lex(str, off = s[0]); if (s && s[1] === " ") s = lex(str, off = s[0]); if (!s) break; // now we've parsed a selector, and have something else... if (s[1] === ">" || s[1] === "~") { if (s[1] === "~") hints.usesSiblingOp = true; a.push(s[1]); off = s[0]; } else if (s[1] === ",") { if (am === undefined) am = [ ",", a ]; else am.push(a); a = []; off = s[0]; } else if (s[1] === ")") { if (!nested) te("ucp", s[1]); readParen = 1; off = s[0]; break; } } if (nested && !readParen) te("mcp", str); if (am) am.push(a); var rv; if (!nested && hints.usesSiblingOp) { rv = normalize(am ? am : a); } else { rv = am ? am : a; } return [off, rv]; } function normalizeOne(sel) { var sels = [], s; for (var i = 0; i < sel.length; i++) { if (sel[i] === '~') { // `A ~ B` maps to `:has(:root > A) > B` // `Z A ~ B` maps to `Z :has(:root > A) > B, Z:has(:root > A) > B` // This first clause, takes care of the first case, and the first half of the latter case. if (i < 2 || sel[i-2] != '>') { s = sel.slice(0,i-1); s = s.concat([{has:[[{pc: ":root"}, ">", sel[i-1]]]}, ">"]); s = s.concat(sel.slice(i+1)); sels.push(s); } // here we take care of the second half of above: // (`Z A ~ B` maps to `Z :has(:root > A) > B, Z :has(:root > A) > B`) // and a new case: // Z > A ~ B maps to Z:has(:root > A) > B if (i > 1) { var at = sel[i-2] === '>' ? i-3 : i-2; s = sel.slice(0,at); var z = {}; for (var k in sel[at]) if (sel[at].hasOwnProperty(k)) z[k] = sel[at][k]; if (!z.has) z.has = []; z.has.push([{pc: ":root"}, ">", sel[i-1]]); s = s.concat(z, '>', sel.slice(i+1)); sels.push(s); } break; } } if (i == sel.length) return sel; return sels.length > 1 ? [','].concat(sels) : sels[0]; } function normalize(sels) { if (sels[0] === ',') { var r = [","]; for (var i = i; i < sels.length; i++) { var s = normalizeOne(s[i]); r = r.concat(s[0] === "," ? s.slice(1) : s); } return r; } else { return normalizeOne(sels); } } function parse_selector(str, off, hints) { var soff = off; var s = { }; var l = lex(str, off); // skip space if (l && l[1] === " ") { soff = off = l[0]; l = lex(str, off); } if (l && l[1] === toks.typ) { s.type = l[2]; l = lex(str, (off = l[0])); } else if (l && l[1] === "*") { // don't bother representing the universal sel, '*' in the // parse tree, cause it's the default l = lex(str, (off = l[0])); } // now support either an id or a pc while (true) { if (l === undefined) { break; } else if (l[1] === toks.ide) { if (s.id) te("nmi", l[1]); s.id = l[2]; } else if (l[1] === toks.psc) { if (s.pc || s.pf) te("mpc", l[1]); // collapse first-child and last-child into nth-child expressions if (l[2] === ":first-child") { s.pf = ":nth-child"; s.a = 0; s.b = 1; } else if (l[2] === ":last-child") { s.pf = ":nth-last-child"; s.a = 0; s.b = 1; } else { s.pc = l[2]; } } else if (l[1] === toks.psf) { if (l[2] === ":val" || l[2] === ":contains") { s.expr = [ undefined, l[2] === ":val" ? "=" : "*=", undefined]; // any amount of whitespace, followed by paren, string, paren l = lex(str, (off = l[0])); if (l && l[1] === " ") l = lex(str, off = l[0]); if (!l || l[1] !== "(") te("pex", str); l = lex(str, (off = l[0])); if (l && l[1] === " ") l = lex(str, off = l[0]); if (!l || l[1] !== toks.str) te("sex", str); s.expr[2] = l[2]; l = lex(str, (off = l[0])); if (l && l[1] === " ") l = lex(str, off = l[0]); if (!l || l[1] !== ")") te("epex", str); } else if (l[2] === ":has") { // any amount of whitespace, followed by paren l = lex(str, (off = l[0])); if (l && l[1] === " ") l = lex(str, off = l[0]); if (!l || l[1] !== "(") te("pex", str); var h = parse(str, l[0], true); l[0] = h[0]; if (!s.has) s.has = []; s.has.push(h[1]); } else if (l[2] === ":expr") { if (s.expr) te("mexp", str); var e = exprParse(str, l[0]); l[0] = e[0]; s.expr = e[1]; } else { if (s.pc || s.pf ) te("mpc", str); s.pf = l[2]; var m = nthPat.exec(str.substr(l[0])); if (!m) te("mepf", str); if (m[5]) { s.a = 2; s.b = (m[5] === "odd") ? 1 : 0; } else if (m[6]) { s.a = 0; s.b = parseInt(m[6], 10); } else { s.a = parseInt((m[1] ? m[1] : "+") + (m[2] ? m[2] : "1"),10); s.b = m[3] ? parseInt(m[3] + m[4],10) : 0; } l[0] += m[0].length; } } else { break; } l = lex(str, (off = l[0])); } // now if we didn't actually parse anything it's an error if (soff === off) te("se", str); return [off, s]; } // THE EVALUATOR function isArray(o) { return Array.isArray ? Array.isArray(o) : toString.call(o) === "[object Array]"; } function mytypeof(o) { if (o === null) return "null"; var to = typeof o; if (to === "object" && isArray(o)) to = "array"; return to; } function mn(node, sel, id, num, tot) { var sels = []; var cs = (sel[0] === ">") ? sel[1] : sel[0]; var m = true, mod; if (cs.type) m = m && (cs.type === mytypeof(node)); if (cs.id) m = m && (cs.id === id); if (m && cs.pf) { if (cs.pf === ":nth-last-child") num = tot - num; else num++; if (cs.a === 0) { m = cs.b === num; } else { mod = ((num - cs.b) % cs.a); m = (!mod && ((num*cs.a + cs.b) >= 0)); } } if (m && cs.has) { // perhaps we should augment forEach to handle a return value // that indicates "client cancels traversal"? var bail = function() { throw 42; }; for (var i = 0; i < cs.has.length; i++) { try { forEach(cs.has[i], node, bail); } catch (e) { if (e === 42) continue; } m = false; break; } } if (m && cs.expr) { m = exprEval(cs.expr, node); } // should we repeat this selector for descendants? if (sel[0] !== ">" && sel[0].pc !== ":root") sels.push(sel); if (m) { // is there a fragment that we should pass down? if (sel[0] === ">") { if (sel.length > 2) { m = false; sels.push(sel.slice(2)); } } else if (sel.length > 1) { m = false; sels.push(sel.slice(1)); } } return [m, sels]; } function forEach(sel, obj, fun, id, num, tot) { var a = (sel[0] === ",") ? sel.slice(1) : [sel], a0 = [], call = false, i = 0, j = 0, k, x; for (i = 0; i < a.length; i++) { x = mn(obj, a[i], id, num, tot); if (x[0]) { call = true; } for (j = 0; j < x[1].length; j++) { a0.push(x[1][j]); } } if (a0.length && typeof obj === "object") { if (a0.length >= 1) { a0.unshift(","); } if (isArray(obj)) { for (i = 0; i < obj.length; i++) { forEach(a0, obj[i], fun, undefined, i, obj.length); } } else { for (k in obj) { if (obj.hasOwnProperty(k)) { forEach(a0, obj[k], fun, k); } } } } if (call && fun) { fun(obj); } } function match(sel, obj) { var a = []; forEach(sel, obj, function(x) { a.push(x); }); return a; } function format(sel, arr) { sel = sel.replace(/\?/g, function() { if (arr.length === 0) throw "too few parameters given"; var p = arr.shift(); return ((typeof p === 'string') ? JSON.stringify(p) : p); }); if (arr.length) throw "too many parameters supplied"; return sel; } function compile(sel, arr) { if (arr) sel = format(sel, arr); return { sel: parse(sel)[1], match: function(obj){ return match(this.sel, obj); }, forEach: function(obj, fun) { return forEach(this.sel, obj, fun); } }; } exports._lex = lex; exports._parse = parse; exports.match = function (sel, arr, obj) { if (!obj) { obj = arr; arr = undefined; } return compile(sel, arr).match(obj); }; exports.forEach = function(sel, arr, obj, fun) { if (!fun) { fun = obj; obj = arr; arr = undefined } return compile(sel, arr).forEach(obj, fun); }; exports.compile = compile; })(typeof exports === "undefined" ? (window.JSONSelect = {}) : exports);