machineuser
Sync widgets demo
32d7cd6
/**
* Represents tokens that our language understands in parsing.
*/
export const TOKEN_TYPES = Object.freeze({
Text: "Text", // The text between Jinja statements or expressions
NumericLiteral: "NumericLiteral", // e.g., 123
BooleanLiteral: "BooleanLiteral", // true or false
StringLiteral: "StringLiteral", // 'string'
Identifier: "Identifier", // Variables, functions, etc.
Equals: "Equals", // =
OpenParen: "OpenParen", // (
CloseParen: "CloseParen", // )
OpenStatement: "OpenStatement", // {%
CloseStatement: "CloseStatement", // %}
OpenExpression: "OpenExpression", // {{
CloseExpression: "CloseExpression", // }}
OpenSquareBracket: "OpenSquareBracket", // [
CloseSquareBracket: "CloseSquareBracket", // ]
OpenCurlyBracket: "OpenCurlyBracket", // {
CloseCurlyBracket: "CloseCurlyBracket", // }
Comma: "Comma", // ,
Dot: "Dot", // .
Colon: "Colon", // :
Pipe: "Pipe", // |
CallOperator: "CallOperator", // ()
AdditiveBinaryOperator: "AdditiveBinaryOperator", // + -
MultiplicativeBinaryOperator: "MultiplicativeBinaryOperator", // * / %
ComparisonBinaryOperator: "ComparisonBinaryOperator", // < > <= >= == !=
UnaryOperator: "UnaryOperator", // ! - +
// Keywords
Set: "Set",
If: "If",
For: "For",
In: "In",
Is: "Is",
NotIn: "NotIn",
Else: "Else",
EndIf: "EndIf",
ElseIf: "ElseIf",
EndFor: "EndFor",
And: "And",
Or: "Or",
Not: "UnaryOperator",
});
export type TokenType = keyof typeof TOKEN_TYPES;
/**
* Constant lookup for keywords and known identifiers + symbols.
*/
const KEYWORDS = Object.freeze({
set: TOKEN_TYPES.Set,
for: TOKEN_TYPES.For,
in: TOKEN_TYPES.In,
is: TOKEN_TYPES.Is,
if: TOKEN_TYPES.If,
else: TOKEN_TYPES.Else,
endif: TOKEN_TYPES.EndIf,
elif: TOKEN_TYPES.ElseIf,
endfor: TOKEN_TYPES.EndFor,
and: TOKEN_TYPES.And,
or: TOKEN_TYPES.Or,
not: TOKEN_TYPES.Not,
"not in": TOKEN_TYPES.NotIn,
// Literals
true: TOKEN_TYPES.BooleanLiteral,
false: TOKEN_TYPES.BooleanLiteral,
});
/**
* Represents a single token in the template.
*/
export class Token {
/**
* Constructs a new Token.
* @param {string} value The raw value as seen inside the source code.
* @param {TokenType} type The type of token.
*/
constructor(
public value: string,
public type: TokenType
) {}
}
function isWord(char: string): boolean {
return /\w/.test(char);
}
function isInteger(char: string): boolean {
return /[0-9]/.test(char);
}
/**
* A data structure which contains a list of rules to test
*/
const ORDERED_MAPPING_TABLE: [string, TokenType][] = [
// Control sequences
["{%", TOKEN_TYPES.OpenStatement],
["%}", TOKEN_TYPES.CloseStatement],
["{{", TOKEN_TYPES.OpenExpression],
["}}", TOKEN_TYPES.CloseExpression],
// Single character tokens
["(", TOKEN_TYPES.OpenParen],
[")", TOKEN_TYPES.CloseParen],
["{", TOKEN_TYPES.OpenCurlyBracket],
["}", TOKEN_TYPES.CloseCurlyBracket],
["[", TOKEN_TYPES.OpenSquareBracket],
["]", TOKEN_TYPES.CloseSquareBracket],
[",", TOKEN_TYPES.Comma],
[".", TOKEN_TYPES.Dot],
[":", TOKEN_TYPES.Colon],
["|", TOKEN_TYPES.Pipe],
// Comparison operators
["<=", TOKEN_TYPES.ComparisonBinaryOperator],
[">=", TOKEN_TYPES.ComparisonBinaryOperator],
["==", TOKEN_TYPES.ComparisonBinaryOperator],
["!=", TOKEN_TYPES.ComparisonBinaryOperator],
["<", TOKEN_TYPES.ComparisonBinaryOperator],
[">", TOKEN_TYPES.ComparisonBinaryOperator],
// Arithmetic operators
["+", TOKEN_TYPES.AdditiveBinaryOperator],
["-", TOKEN_TYPES.AdditiveBinaryOperator],
["*", TOKEN_TYPES.MultiplicativeBinaryOperator],
["/", TOKEN_TYPES.MultiplicativeBinaryOperator],
["%", TOKEN_TYPES.MultiplicativeBinaryOperator],
// Assignment operator
["=", TOKEN_TYPES.Equals],
];
const ESCAPE_CHARACTERS = new Map([
["n", "\n"], // New line
["t", "\t"], // Horizontal tab
["r", "\r"], // Carriage return
["b", "\b"], // Backspace
["f", "\f"], // Form feed
["v", "\v"], // Vertical tab
["'", "'"], // Single quote
['"', '"'], // Double quote
["\\", "\\"], // Backslash
]);
export interface PreprocessOptions {
trim_blocks?: boolean;
lstrip_blocks?: boolean;
}
function preprocess(template: string, options: PreprocessOptions = {}): string {
// According to https://jinja.palletsprojects.com/en/3.0.x/templates/#whitespace-control
// In the default configuration:
// - a single trailing newline is stripped if present
// - other whitespace (spaces, tabs, newlines etc.) is returned unchanged
if (template.endsWith("\n")) {
template = template.slice(0, -1);
}
// Replace all comments with a placeholder
// This ensures that comments don't interfere with the following options
template = template.replace(/{#.*?#}/gs, "{##}");
if (options.lstrip_blocks) {
// The lstrip_blocks option can also be set to strip tabs and spaces from the
// beginning of a line to the start of a block. (Nothing will be stripped if
// there are other characters before the start of the block.)
template = template.replace(/^[ \t]*({[#%])/gm, "$1");
}
if (options.trim_blocks) {
// If an application configures Jinja to trim_blocks, the first newline after
// a template tag is removed automatically (like in PHP).
template = template.replace(/([#%]})\n/g, "$1");
}
return template
.replace(/{##}/g, "") // Remove comments
.replace(/-%}\s*/g, "%}")
.replace(/\s*{%-/g, "{%")
.replace(/-}}\s*/g, "}}")
.replace(/\s*{{-/g, "{{");
}
/**
* Generate a list of tokens from a source string.
*/
export function tokenize(source: string, options: PreprocessOptions = {}): Token[] {
const tokens: Token[] = [];
const src: string = preprocess(source, options);
let cursorPosition = 0;
const consumeWhile = (predicate: (char: string) => boolean): string => {
let str = "";
while (predicate(src[cursorPosition])) {
// Check for escaped characters
if (src[cursorPosition] === "\\") {
// Consume the backslash
++cursorPosition;
// Check for end of input
if (cursorPosition >= src.length) throw new SyntaxError("Unexpected end of input");
// Add the escaped character
const escaped = src[cursorPosition++];
const unescaped = ESCAPE_CHARACTERS.get(escaped);
if (unescaped === undefined) {
throw new SyntaxError(`Unexpected escaped character: ${escaped}`);
}
str += unescaped;
continue;
}
str += src[cursorPosition++];
if (cursorPosition >= src.length) throw new SyntaxError("Unexpected end of input");
}
return str;
};
// Build each token until end of input
main: while (cursorPosition < src.length) {
// First, consume all text that is outside of a Jinja statement or expression
const lastTokenType = tokens.at(-1)?.type;
if (
lastTokenType === undefined ||
lastTokenType === TOKEN_TYPES.CloseStatement ||
lastTokenType === TOKEN_TYPES.CloseExpression
) {
let text = "";
while (
cursorPosition < src.length &&
// Keep going until we hit the next Jinja statement or expression
!(src[cursorPosition] === "{" && (src[cursorPosition + 1] === "%" || src[cursorPosition + 1] === "{"))
) {
// Consume text
text += src[cursorPosition++];
}
// There is some text to add
if (text.length > 0) {
tokens.push(new Token(text, TOKEN_TYPES.Text));
continue;
}
}
// Consume (and ignore) all whitespace inside Jinja statements or expressions
consumeWhile((char) => /\s/.test(char));
// Handle multi-character tokens
const char = src[cursorPosition];
// Check for unary operators
if (char === "-" || char === "+") {
const lastTokenType = tokens.at(-1)?.type;
if (lastTokenType === TOKEN_TYPES.Text || lastTokenType === undefined) {
throw new SyntaxError(`Unexpected character: ${char}`);
}
switch (lastTokenType) {
case TOKEN_TYPES.Identifier:
case TOKEN_TYPES.NumericLiteral:
case TOKEN_TYPES.BooleanLiteral:
case TOKEN_TYPES.StringLiteral:
case TOKEN_TYPES.CloseParen:
case TOKEN_TYPES.CloseSquareBracket:
// Part of a binary operator
// a - 1, 1 - 1, true - 1, "apple" - 1, (1) - 1, a[1] - 1
// Continue parsing normally
break;
default: {
// Is part of a unary operator
// (-1), [-1], (1 + -1), not -1, -apple
++cursorPosition; // consume the unary operator
// Check for numbers following the unary operator
const num = consumeWhile(isInteger);
tokens.push(
new Token(`${char}${num}`, num.length > 0 ? TOKEN_TYPES.NumericLiteral : TOKEN_TYPES.UnaryOperator)
);
continue;
}
}
}
// Try to match one of the tokens in the mapping table
for (const [char, token] of ORDERED_MAPPING_TABLE) {
const slice = src.slice(cursorPosition, cursorPosition + char.length);
if (slice === char) {
tokens.push(new Token(char, token));
cursorPosition += char.length;
continue main;
}
}
if (char === "'" || char === '"') {
++cursorPosition; // Skip the opening quote
const str = consumeWhile((c) => c !== char);
tokens.push(new Token(str, TOKEN_TYPES.StringLiteral));
++cursorPosition; // Skip the closing quote
continue;
}
if (isInteger(char)) {
const num = consumeWhile(isInteger);
tokens.push(new Token(num, TOKEN_TYPES.NumericLiteral));
continue;
}
if (isWord(char)) {
const word = consumeWhile(isWord);
// Check for special/reserved keywords
// NOTE: We use Object.hasOwn() to avoid matching `.toString()` and other Object methods
const type = Object.hasOwn(KEYWORDS, word) ? KEYWORDS[word as keyof typeof KEYWORDS] : TOKEN_TYPES.Identifier;
// Special case of not in:
// If the previous token was a "not", and this token is "in"
// then we want to combine them into a single token
if (type === TOKEN_TYPES.In && tokens.at(-1)?.type === TOKEN_TYPES.Not) {
tokens.pop();
tokens.push(new Token("not in", TOKEN_TYPES.NotIn));
} else {
tokens.push(new Token(word, type));
}
continue;
}
throw new SyntaxError(`Unexpected character: ${char}`);
}
return tokens;
}