import { describe, it, expect } from 'vitest'; import { tokenize } from './lexer'; function kinds(src: string): string[] { return tokenize(src).tokens.filter((t) => t.kind !== 'EOF').map((t) => t.kind); } function texts(src: string): string[] { return tokenize(src).tokens.filter((t) => t.kind !== 'EOF').map((t) => t.text); } describe('lexer', () => { it('emits an EOF for empty input', () => { const { tokens } = tokenize(''); expect(tokens).toHaveLength(1); expect(tokens[0].kind).toBe('EOF'); }); it('distinguishes keywords from identifiers', () => { const { tokens } = tokenize('let foo = bar;'); expect(tokens[0]).toMatchObject({ kind: 'Keyword', text: 'let' }); expect(tokens[1]).toMatchObject({ kind: 'Ident', text: 'foo' }); expect(tokens[2]).toMatchObject({ kind: 'Operator', text: '=' }); expect(tokens[3]).toMatchObject({ kind: 'Ident', text: 'bar' }); expect(tokens[4]).toMatchObject({ kind: 'Punct', text: ';' }); }); it('lexes integer, float, hex, and binary numbers', () => { expect(texts('1 1.5 0xff 0b1010 1e10 1_000')).toEqual(['1', '1.5', '0xff', '0b1010', '1e10', '1_000']); expect(kinds('1 1.5 0xff')).toEqual(['Number', 'Number', 'Number']); }); it('lexes double-quote and backtick strings', () => { const { tokens } = tokenize('"hi" `world`'); expect(tokens[0]).toMatchObject({ kind: 'String', text: '"hi"' }); expect(tokens[1]).toMatchObject({ kind: 'String', text: '`world`' }); }); it('preserves backslash escapes inside double-quoted strings', () => { const { tokens } = tokenize('"a\\"b"'); expect(tokens[0].text).toBe('"a\\"b"'); }); it('captures line and block comments as comments, not tokens', () => { const { tokens, comments } = tokenize('let x = 1; // tail\n/* block */ y'); expect(comments.map((c) => c.kind)).toEqual(['LineComment', 'BlockComment']); expect(tokens.find((t) => t.text === '//' || t.text === '/*')).toBeUndefined(); }); it('handles nested block comments', () => { const { comments } = tokenize('/* outer /* inner */ still outer */'); expect(comments).toHaveLength(1); expect(comments[0].text).toBe('/* outer /* inner */ still outer */'); }); it('lexes multi-character operators greedily', () => { expect(texts('a == b && c != d')).toEqual(['a', '==', 'b', '&&', 'c', '!=', 'd']); expect(texts('a ?? b ??= c')).toEqual(['a', '??', 'b', '??=', 'c']); expect(texts('1..=10')).toEqual(['1', '..=', '10']); }); it('recognizes #{ as separate punctuation tokens', () => { const { tokens } = tokenize('#{}'); expect(tokens.slice(0, 3).map((t) => t.text)).toEqual(['#', '{', '}']); }); it('records accurate byte ranges', () => { const src = 'let abc = 42;'; const { tokens } = tokenize(src); const abc = tokens.find((t) => t.text === 'abc')!; expect(src.slice(abc.start, abc.end)).toBe('abc'); }); });