feat(dashboard): hand-rolled Rhai parser + symbol table + Vitest

Foundation for upcoming editor features (scope-aware autocomplete, goto-def / find-usages, source formatter). Hand-rolled recursive descent in TypeScript with Pratt precedence climbing for expressions, error-tolerant so partial trees stay usable while the user is typing. Symbol table walks the AST to produce per-scope declarations, usage sites, and object-literal field maps. Vitest added as a dev-only runner; no editor wiring in this commit. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-23 23:38:15 +02:00
parent a80e6d1ca4
commit bc8b512b56
11 changed files with 2361 additions and 3 deletions
--- a/dashboard/src/lib/rhai/lexer.ts
+++ b/dashboard/src/lib/rhai/lexer.ts
@@ -0,0 +1,248 @@
+// Tokenizer for the dashboard's Rhai parser.
+//
+// Produces a flat array of tokens (eager — Rhai scripts in the dashboard
+// are small, 20–200 lines typical) plus a separate list of comments. The
+// parser only sees tokens; comments are handed to the formatter so it
+// can re-emit them at the right positions.
+//
+// Keyword and operator lists trace back to the upstream TextMate grammar
+// (rhaiscript/vscode-rhai). We don't copy any grammar bytes.
+
+import type { Comment, Range } from './ast';
+
+export type TokenKind =
+	| 'Ident'
+	| 'Keyword'
+	| 'Number'
+	| 'String'
+	| 'Punct'
+	| 'Operator'
+	| 'EOF';
+
+export interface Token extends Range {
+	kind: TokenKind;
+	// For Ident/Keyword/Punct/Operator: the literal source text. For
+	// Number/String: the full literal including quotes.
+	text: string;
+}
+
+export const KEYWORDS = new Set([
+	'let',
+	'const',
+	'fn',
+	'if',
+	'else',
+	'while',
+	'loop',
+	'do',
+	'for',
+	'in',
+	'return',
+	'break',
+	'continue',
+	'switch',
+	'case',
+	'default',
+	'true',
+	'false',
+	'null',
+	'try',
+	'catch',
+	'throw',
+	'as',
+	'is',
+	'private'
+]);
+
+// Multi-char operators, longest first so the lexer picks them up greedily.
+const MULTI_CHAR_OPS = [
+	'??=',
+	'..=',
+	'??',
+	'..',
+	'::',
+	'==',
+	'!=',
+	'<=',
+	'>=',
+	'&&',
+	'||',
+	'<<',
+	'>>',
+	'+=',
+	'-=',
+	'*=',
+	'/=',
+	'%=',
+	'=>',
+	'->'
+];
+
+const SINGLE_CHAR_OPS = new Set(['+', '-', '*', '/', '%', '<', '>', '!', '&', '|', '^', '~', '=', '?']);
+
+// `#` is included so we can recognize the start of `#{` object-map literals;
+// the lexer emits it as a separate `Punct` and the parser combines it with
+// the following `{`.
+const PUNCTS = new Set(['(', ')', '{', '}', '[', ']', ';', ',', '.', ':', '#']);
+
+export interface LexResult {
+	tokens: Token[];
+	comments: Comment[];
+}
+
+export function tokenize(source: string): LexResult {
+	const tokens: Token[] = [];
+	const comments: Comment[] = [];
+	let i = 0;
+	const n = source.length;
+
+	while (i < n) {
+		const ch = source[i];
+
+		// Whitespace
+		if (ch === ' ' || ch === '\t' || ch === '\n' || ch === '\r') {
+			i++;
+			continue;
+		}
+
+		// Line comment
+		if (ch === '/' && source[i + 1] === '/') {
+			const start = i;
+			while (i < n && source[i] !== '\n') i++;
+			comments.push({ kind: 'LineComment', start, end: i, text: source.slice(start, i) });
+			continue;
+		}
+
+		// Block comment (supports nesting per the Rhai book)
+		if (ch === '/' && source[i + 1] === '*') {
+			const start = i;
+			i += 2;
+			let depth = 1;
+			while (i < n && depth > 0) {
+				if (source[i] === '/' && source[i + 1] === '*') {
+					depth++;
+					i += 2;
+				} else if (source[i] === '*' && source[i + 1] === '/') {
+					depth--;
+					i += 2;
+				} else {
+					i++;
+				}
+			}
+			comments.push({ kind: 'BlockComment', start, end: i, text: source.slice(start, i) });
+			continue;
+		}
+
+		// Strings: " ... " (escape-aware, single-line by convention) and
+		// ` ... ` (raw, multi-line). We tokenize the entire literal including
+		// quotes; the parser only cares about its position and text.
+		if (ch === '"' || ch === '`') {
+			const quote = ch;
+			const start = i;
+			i++;
+			while (i < n) {
+				const c = source[i];
+				if (c === '\\' && quote === '"') {
+					i += 2;
+					continue;
+				}
+				if (c === quote) {
+					i++;
+					break;
+				}
+				i++;
+			}
+			tokens.push({ kind: 'String', start, end: i, text: source.slice(start, i) });
+			continue;
+		}
+
+		// Numbers: hex, binary, decimal, optional `.frac`, optional exponent.
+		// Underscores are allowed as digit separators per Rhai.
+		if (isDigit(ch)) {
+			const start = i;
+			if (ch === '0' && (source[i + 1] === 'x' || source[i + 1] === 'X')) {
+				i += 2;
+				while (i < n && (isHexDigit(source[i]) || source[i] === '_')) i++;
+			} else if (ch === '0' && (source[i + 1] === 'b' || source[i + 1] === 'B')) {
+				i += 2;
+				while (i < n && (source[i] === '0' || source[i] === '1' || source[i] === '_')) i++;
+			} else {
+				while (i < n && (isDigit(source[i]) || source[i] === '_')) i++;
+				if (source[i] === '.' && isDigit(source[i + 1])) {
+					i++;
+					while (i < n && (isDigit(source[i]) || source[i] === '_')) i++;
+				}
+				if (source[i] === 'e' || source[i] === 'E') {
+					i++;
+					if (source[i] === '+' || source[i] === '-') i++;
+					while (i < n && isDigit(source[i])) i++;
+				}
+			}
+			tokens.push({ kind: 'Number', start, end: i, text: source.slice(start, i) });
+			continue;
+		}
+
+		// Identifier or keyword
+		if (isIdentStart(ch)) {
+			const start = i;
+			i++;
+			while (i < n && isIdentCont(source[i])) i++;
+			const text = source.slice(start, i);
+			tokens.push({
+				kind: KEYWORDS.has(text) ? 'Keyword' : 'Ident',
+				start,
+				end: i,
+				text
+			});
+			continue;
+		}
+
+		// Multi-char operators
+		let matched = false;
+		for (const op of MULTI_CHAR_OPS) {
+			if (source.startsWith(op, i)) {
+				tokens.push({ kind: 'Operator', start: i, end: i + op.length, text: op });
+				i += op.length;
+				matched = true;
+				break;
+			}
+		}
+		if (matched) continue;
+
+		// Single-char operator
+		if (SINGLE_CHAR_OPS.has(ch)) {
+			tokens.push({ kind: 'Operator', start: i, end: i + 1, text: ch });
+			i++;
+			continue;
+		}
+
+		// Punctuation
+		if (PUNCTS.has(ch)) {
+			tokens.push({ kind: 'Punct', start: i, end: i + 1, text: ch });
+			i++;
+			continue;
+		}
+
+		// Unrecognized: skip and let the parser report the gap if needed.
+		i++;
+	}
+
+	tokens.push({ kind: 'EOF', start: n, end: n, text: '' });
+	return { tokens, comments };
+}
+
+function isDigit(c: string): boolean {
+	return c >= '0' && c <= '9';
+}
+
+function isHexDigit(c: string): boolean {
+	return isDigit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
+}
+
+function isIdentStart(c: string): boolean {
+	return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c === '_';
+}
+
+function isIdentCont(c: string): boolean {
+	return isIdentStart(c) || isDigit(c);
+}