54 lines
1.2 KiB
TypeScript
Raw Normal View History

2023-04-28 17:57:35 -07:00
type Token<T extends string> = {
type: T,
string: string,
}
type LexerProps<T extends string> = {
rules: {
[key in T]: RegExp;
},
// postprocess?: (token: Token<T>) =>
}
const entries = <K extends string, V>(obj: {[key in K]: V}) => {
return Object.entries(obj) as Array<[K, V]>;
}
const lexer = <T extends string>(props: LexerProps<T>) => {
const {rules} = props;
return (string: string): Array<Token<T>> => {
const tokens: Array<Token<T>> = [];
let str = string;
while (str.length) {
let matched = false;
for (const [type, matcher] of entries(rules)) {
const match = str.match(matcher);
if (match && match.index === 0) {
if (type !== "_skip") {
tokens.push({type, string: match[0]});
}
str = str.slice(match[0].length);
matched = true;
break;
}
}
if (!matched) {
console.log(tokens);
console.log(str);
throw 'Infinite Loop';
}
}
return tokens;
}
}
const my_lexer = lexer({
rules: {
symbol: /\w+/,
punctuation: /[\.,:;()[\]{}]/,
string_literal: /"([^"\\]|\\.)*"/,
_skip: /\s+/,
}
});
console.log(my_lexer(`abc def { ghi } "string literal!" "with \\"escaped\\" quote marks"`));