54 lines
1.2 KiB
TypeScript
54 lines
1.2 KiB
TypeScript
![]() |
type Token<T extends string> = {
|
||
|
type: T,
|
||
|
string: string,
|
||
|
}
|
||
|
|
||
|
type LexerProps<T extends string> = {
|
||
|
rules: {
|
||
|
[key in T]: RegExp;
|
||
|
},
|
||
|
// postprocess?: (token: Token<T>) =>
|
||
|
}
|
||
|
|
||
|
const entries = <K extends string, V>(obj: {[key in K]: V}) => {
|
||
|
return Object.entries(obj) as Array<[K, V]>;
|
||
|
}
|
||
|
|
||
|
const lexer = <T extends string>(props: LexerProps<T>) => {
|
||
|
const {rules} = props;
|
||
|
return (string: string): Array<Token<T>> => {
|
||
|
const tokens: Array<Token<T>> = [];
|
||
|
let str = string;
|
||
|
while (str.length) {
|
||
|
let matched = false;
|
||
|
for (const [type, matcher] of entries(rules)) {
|
||
|
const match = str.match(matcher);
|
||
|
if (match && match.index === 0) {
|
||
|
if (type !== "_skip") {
|
||
|
tokens.push({type, string: match[0]});
|
||
|
}
|
||
|
str = str.slice(match[0].length);
|
||
|
matched = true;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
if (!matched) {
|
||
|
console.log(tokens);
|
||
|
console.log(str);
|
||
|
throw 'Infinite Loop';
|
||
|
}
|
||
|
}
|
||
|
return tokens;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
const my_lexer = lexer({
|
||
|
rules: {
|
||
|
symbol: /\w+/,
|
||
|
punctuation: /[\.,:;()[\]{}]/,
|
||
|
string_literal: /"([^"\\]|\\.)*"/,
|
||
|
_skip: /\s+/,
|
||
|
}
|
||
|
});
|
||
|
|
||
|
console.log(my_lexer(`abc def { ghi } "string literal!" "with \\"escaped\\" quote marks"`));
|