Foundation for upcoming editor features (scope-aware autocomplete, goto-def / find-usages, source formatter). Hand-rolled recursive descent in TypeScript with Pratt precedence climbing for expressions, error-tolerant so partial trees stay usable while the user is typing. Symbol table walks the AST to produce per-scope declarations, usage sites, and object-literal field maps. Vitest added as a dev-only runner; no editor wiring in this commit. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
74 lines
2.8 KiB
TypeScript
74 lines
2.8 KiB
TypeScript
import { describe, it, expect } from 'vitest';
|
|
import { tokenize } from './lexer';
|
|
|
|
function kinds(src: string): string[] {
|
|
return tokenize(src).tokens.filter((t) => t.kind !== 'EOF').map((t) => t.kind);
|
|
}
|
|
|
|
function texts(src: string): string[] {
|
|
return tokenize(src).tokens.filter((t) => t.kind !== 'EOF').map((t) => t.text);
|
|
}
|
|
|
|
describe('lexer', () => {
|
|
it('emits an EOF for empty input', () => {
|
|
const { tokens } = tokenize('');
|
|
expect(tokens).toHaveLength(1);
|
|
expect(tokens[0].kind).toBe('EOF');
|
|
});
|
|
|
|
it('distinguishes keywords from identifiers', () => {
|
|
const { tokens } = tokenize('let foo = bar;');
|
|
expect(tokens[0]).toMatchObject({ kind: 'Keyword', text: 'let' });
|
|
expect(tokens[1]).toMatchObject({ kind: 'Ident', text: 'foo' });
|
|
expect(tokens[2]).toMatchObject({ kind: 'Operator', text: '=' });
|
|
expect(tokens[3]).toMatchObject({ kind: 'Ident', text: 'bar' });
|
|
expect(tokens[4]).toMatchObject({ kind: 'Punct', text: ';' });
|
|
});
|
|
|
|
it('lexes integer, float, hex, and binary numbers', () => {
|
|
expect(texts('1 1.5 0xff 0b1010 1e10 1_000')).toEqual(['1', '1.5', '0xff', '0b1010', '1e10', '1_000']);
|
|
expect(kinds('1 1.5 0xff')).toEqual(['Number', 'Number', 'Number']);
|
|
});
|
|
|
|
it('lexes double-quote and backtick strings', () => {
|
|
const { tokens } = tokenize('"hi" `world`');
|
|
expect(tokens[0]).toMatchObject({ kind: 'String', text: '"hi"' });
|
|
expect(tokens[1]).toMatchObject({ kind: 'String', text: '`world`' });
|
|
});
|
|
|
|
it('preserves backslash escapes inside double-quoted strings', () => {
|
|
const { tokens } = tokenize('"a\\"b"');
|
|
expect(tokens[0].text).toBe('"a\\"b"');
|
|
});
|
|
|
|
it('captures line and block comments as comments, not tokens', () => {
|
|
const { tokens, comments } = tokenize('let x = 1; // tail\n/* block */ y');
|
|
expect(comments.map((c) => c.kind)).toEqual(['LineComment', 'BlockComment']);
|
|
expect(tokens.find((t) => t.text === '//' || t.text === '/*')).toBeUndefined();
|
|
});
|
|
|
|
it('handles nested block comments', () => {
|
|
const { comments } = tokenize('/* outer /* inner */ still outer */');
|
|
expect(comments).toHaveLength(1);
|
|
expect(comments[0].text).toBe('/* outer /* inner */ still outer */');
|
|
});
|
|
|
|
it('lexes multi-character operators greedily', () => {
|
|
expect(texts('a == b && c != d')).toEqual(['a', '==', 'b', '&&', 'c', '!=', 'd']);
|
|
expect(texts('a ?? b ??= c')).toEqual(['a', '??', 'b', '??=', 'c']);
|
|
expect(texts('1..=10')).toEqual(['1', '..=', '10']);
|
|
});
|
|
|
|
it('recognizes #{ as separate punctuation tokens', () => {
|
|
const { tokens } = tokenize('#{}');
|
|
expect(tokens.slice(0, 3).map((t) => t.text)).toEqual(['#', '{', '}']);
|
|
});
|
|
|
|
it('records accurate byte ranges', () => {
|
|
const src = 'let abc = 42;';
|
|
const { tokens } = tokenize(src);
|
|
const abc = tokens.find((t) => t.text === 'abc')!;
|
|
expect(src.slice(abc.start, abc.end)).toBe('abc');
|
|
});
|
|
});
|