| import { startsWith, endsWith } from 'lodash' | |
| import type { Token } from './types' | |
| import { childlessTags } from './tags' | |
| interface State { | |
| str: string | |
| position: number | |
| tokens: Token[] | |
| } | |
| const jumpPosition = (state: State, end: number) => { | |
| const len = end - state.position | |
| movePositopn(state, len) | |
| } | |
| const movePositopn = (state: State, len: number) => { | |
| state.position = state.position + len | |
| } | |
| const findTextEnd = (str: string, index: number) => { | |
| const isEnd = false | |
| while (!isEnd) { | |
| const textEnd = str.indexOf('<', index) | |
| if (textEnd === -1) { | |
| return textEnd | |
| } | |
| const char = str.charAt(textEnd + 1) | |
| if (char === '/' || char === '!' || /[A-Za-z0-9]/.test(char)) { | |
| return textEnd | |
| } | |
| index = textEnd + 1 | |
| } | |
| return -1 | |
| } | |
| const lexText = (state: State) => { | |
| const { str } = state | |
| let textEnd = findTextEnd(str, state.position) | |
| if (textEnd === state.position) return | |
| if (textEnd === -1) { | |
| textEnd = str.length | |
| } | |
| const content = str.slice(state.position, textEnd) | |
| jumpPosition(state, textEnd) | |
| state.tokens.push({ | |
| type: 'text', | |
| content, | |
| }) | |
| } | |
| const lexComment = (state: State) => { | |
| const { str } = state | |
| movePositopn(state, 4) | |
| let contentEnd = str.indexOf('-->', state.position) | |
| let commentEnd = contentEnd + 3 | |
| if (contentEnd === -1) { | |
| contentEnd = commentEnd = str.length | |
| } | |
| const content = str.slice(state.position, contentEnd) | |
| jumpPosition(state, commentEnd) | |
| state.tokens.push({ | |
| type: 'comment', | |
| content, | |
| }) | |
| } | |
| const lexTagName = (state: State) => { | |
| const { str } = state | |
| const len = str.length | |
| let start = state.position | |
| while (start < len) { | |
| const char = str.charAt(start) | |
| const isTagChar = !(/\s/.test(char) || char === '/' || char === '>') | |
| if (isTagChar) break | |
| start++ | |
| } | |
| let end = start + 1 | |
| while (end < len) { | |
| const char = str.charAt(end) | |
| const isTagChar = !(/\s/.test(char) || char === '/' || char === '>') | |
| if (!isTagChar) break | |
| end++ | |
| } | |
| jumpPosition(state, end) | |
| const tagName = str.slice(start, end) | |
| state.tokens.push({ | |
| type: 'tag', | |
| content: tagName | |
| }) | |
| return tagName | |
| } | |
| const lexTagAttributes = (state: State) => { | |
| const { str, tokens } = state | |
| let cursor = state.position | |
| let quote = null | |
| let wordBegin = cursor | |
| const words = [] | |
| const len = str.length | |
| while (cursor < len) { | |
| const char = str.charAt(cursor) | |
| if (quote) { | |
| const isQuoteEnd = char === quote | |
| if (isQuoteEnd) quote = null | |
| cursor++ | |
| continue | |
| } | |
| const isTagEnd = char === '/' || char === '>' | |
| if (isTagEnd) { | |
| if (cursor !== wordBegin) words.push(str.slice(wordBegin, cursor)) | |
| break | |
| } | |
| const isWordEnd = /\s/.test(char) | |
| if (isWordEnd) { | |
| if (cursor !== wordBegin) words.push(str.slice(wordBegin, cursor)) | |
| wordBegin = cursor + 1 | |
| cursor++ | |
| continue | |
| } | |
| const isQuoteStart = char === '\'' || char === '"' | |
| if (isQuoteStart) { | |
| quote = char | |
| cursor++ | |
| continue | |
| } | |
| cursor++ | |
| } | |
| jumpPosition(state, cursor) | |
| const type = 'attribute' | |
| for (let i = 0; i < words.length; i++) { | |
| const word = words[i] | |
| const isNotPair = word.indexOf('=') === -1 | |
| if (isNotPair) { | |
| const secondWord = words[i + 1] | |
| if (secondWord && startsWith(secondWord, '=')) { | |
| if (secondWord.length > 1) { | |
| const newWord = word + secondWord | |
| tokens.push({ type, content: newWord }) | |
| i += 1 | |
| continue | |
| } | |
| const thirdWord = words[i + 2] | |
| i += 1 | |
| if (thirdWord) { | |
| const newWord = word + '=' + thirdWord | |
| tokens.push({ type, content: newWord }) | |
| i += 1 | |
| continue | |
| } | |
| } | |
| } | |
| if (endsWith(word, '=')) { | |
| const secondWord = words[i + 1] | |
| if (secondWord && secondWord.indexOf('=') === -1) { | |
| const newWord = word + secondWord | |
| tokens.push({ type, content: newWord }) | |
| i += 1 | |
| continue | |
| } | |
| const newWord = word.slice(0, -1) | |
| tokens.push({ type, content: newWord }) | |
| continue | |
| } | |
| tokens.push({ type, content: word }) | |
| } | |
| } | |
| const lexSkipTag = (tagName: string, state: State) => { | |
| const { str, tokens } = state | |
| const safeTagName = tagName.toLowerCase() | |
| const len = str.length | |
| let index = state.position | |
| while (index < len) { | |
| const nextTag = str.indexOf('</', index) | |
| if (nextTag === -1) { | |
| lexText(state) | |
| break | |
| } | |
| const tagState = { | |
| str, | |
| position: state.position, | |
| tokens: [], | |
| } | |
| jumpPosition(tagState, nextTag) | |
| const name = lexTag(tagState) | |
| if (safeTagName !== name.toLowerCase()) { | |
| index = tagState.position | |
| continue | |
| } | |
| if (nextTag !== state.position) { | |
| const textStart = state.position | |
| jumpPosition(state, nextTag) | |
| tokens.push({ | |
| type: 'text', | |
| content: str.slice(textStart, nextTag), | |
| }) | |
| } | |
| tokens.push(...tagState.tokens) | |
| jumpPosition(state, tagState.position) | |
| break | |
| } | |
| } | |
| const lexTag = (state: State) => { | |
| const { str } = state | |
| const secondChar = str.charAt(state.position + 1) | |
| const tagStartClose = secondChar === '/' | |
| movePositopn(state, tagStartClose ? 2 : 1) | |
| state.tokens.push({ | |
| type: 'tag-start', | |
| close: tagStartClose, | |
| }) | |
| const tagName = lexTagName(state) | |
| lexTagAttributes(state) | |
| const firstChar = str.charAt(state.position) | |
| const tagEndClose = firstChar === '/' | |
| movePositopn(state, tagEndClose ? 2 : 1) | |
| state.tokens.push({ | |
| type: 'tag-end', | |
| close: tagEndClose, | |
| }) | |
| return tagName | |
| } | |
| const lex = (state: State) => { | |
| const str = state.str | |
| const len = str.length | |
| while (state.position < len) { | |
| const start = state.position | |
| lexText(state) | |
| if (state.position === start) { | |
| const isComment = startsWith(str, '!--', start + 1) | |
| if (isComment) lexComment(state) | |
| else { | |
| const tagName = lexTag(state) | |
| const safeTag = tagName.toLowerCase() | |
| if (childlessTags.includes(safeTag)) lexSkipTag(tagName, state) | |
| } | |
| } | |
| } | |
| } | |
| export const lexer = (str: string): Token[] => { | |
| const state = { | |
| str, | |
| position: 0, | |
| tokens: [], | |
| } | |
| lex(state) | |
| return state.tokens | |
| } |