From c1919bb87982a1b69459877190f04122b878e301 Mon Sep 17 00:00:00 2001 From: Affaan Mustafa Date: Fri, 13 Feb 2026 01:52:30 -0800 Subject: [PATCH] fix: greedy regex in validate-commands captures all refs per line, add 18 tests The command cross-reference regex /^.*`\/(...)`.*$/gm only captured the LAST command ref per line due to greedy .* consuming earlier refs. Replaced with line-by-line processing using non-anchored regex to capture ALL command references. New tests: - 4 validate-commands multi-ref-per-line tests (regression) - 8 evaluate-session threshold boundary tests (new file) - 6 session-aliases edge case tests (cleanup, rename, path matching) --- scripts/ci/validate-commands.js | 17 ++- tests/ci/validators.test.js | 69 ++++++++++ tests/hooks/evaluate-session.test.js | 183 +++++++++++++++++++++++++++ tests/lib/session-aliases.test.js | 76 +++++++++++ tests/run-all.js | 1 + 5 files changed, 339 insertions(+), 7 deletions(-) create mode 100644 tests/hooks/evaluate-session.test.js diff --git a/scripts/ci/validate-commands.js b/scripts/ci/validate-commands.js index 049ce6c..c569543 100644 --- a/scripts/ci/validate-commands.js +++ b/scripts/ci/validate-commands.js @@ -74,14 +74,17 @@ function validateCommands() { // Check cross-references to other commands (e.g., `/build-fix`) // Skip lines that describe hypothetical output (e.g., "→ Creates: `/new-table`") - const cmdRefs = contentNoCodeBlocks.matchAll(/^.*`\/([a-z][-a-z0-9]*)`.*$/gm); - for (const match of cmdRefs) { - const line = match[0]; + // Process line-by-line so ALL command refs per line are captured + // (previous anchored regex /^.*`\/...`.*$/gm only matched the last ref per line) + for (const line of contentNoCodeBlocks.split('\n')) { if (/creates:|would create:/i.test(line)) continue; - const refName = match[1]; - if (!validCommands.has(refName)) { - console.error(`ERROR: ${file} - references non-existent command /${refName}`); - hasErrors = true; + const lineRefs = line.matchAll(/`\/([a-z][-a-z0-9]*)`/g); + for (const match of lineRefs) { + const refName = match[1]; + if (!validCommands.has(refName)) { + console.error(`ERROR: ${file} - references non-existent command /${refName}`); + hasErrors = true; + } } } diff --git a/tests/ci/validators.test.js b/tests/ci/validators.test.js index c45e655..d21a073 100644 --- a/tests/ci/validators.test.js +++ b/tests/ci/validators.test.js @@ -634,6 +634,75 @@ function runTests() { cleanupTestDir(testDir); cleanupTestDir(agentsDir); cleanupTestDir(skillsDir); })) passed++; else failed++; + if (test('captures ALL command references on a single line (multi-ref)', () => { + const testDir = createTestDir(); + const agentsDir = createTestDir(); + const skillsDir = createTestDir(); + // Line with two command references — both should be detected + fs.writeFileSync(path.join(testDir, 'multi.md'), + '# Multi\nUse `/ghost-a` and `/ghost-b` together.'); + + const result = runValidatorWithDirs('validate-commands', { + COMMANDS_DIR: testDir, AGENTS_DIR: agentsDir, SKILLS_DIR: skillsDir + }); + assert.strictEqual(result.code, 1, 'Should fail on broken refs'); + // BOTH ghost-a AND ghost-b must be reported (this was the greedy regex bug) + assert.ok(result.stderr.includes('ghost-a'), 'Should report first ref /ghost-a'); + assert.ok(result.stderr.includes('ghost-b'), 'Should report second ref /ghost-b'); + cleanupTestDir(testDir); cleanupTestDir(agentsDir); cleanupTestDir(skillsDir); + })) passed++; else failed++; + + if (test('captures three command refs on one line', () => { + const testDir = createTestDir(); + const agentsDir = createTestDir(); + const skillsDir = createTestDir(); + fs.writeFileSync(path.join(testDir, 'triple.md'), + '# Triple\nChain `/alpha`, `/beta`, and `/gamma` in order.'); + + const result = runValidatorWithDirs('validate-commands', { + COMMANDS_DIR: testDir, AGENTS_DIR: agentsDir, SKILLS_DIR: skillsDir + }); + assert.strictEqual(result.code, 1, 'Should fail on all three broken refs'); + assert.ok(result.stderr.includes('alpha'), 'Should report /alpha'); + assert.ok(result.stderr.includes('beta'), 'Should report /beta'); + assert.ok(result.stderr.includes('gamma'), 'Should report /gamma'); + cleanupTestDir(testDir); cleanupTestDir(agentsDir); cleanupTestDir(skillsDir); + })) passed++; else failed++; + + if (test('multi-ref line with one valid and one invalid ref', () => { + const testDir = createTestDir(); + const agentsDir = createTestDir(); + const skillsDir = createTestDir(); + // "real-cmd" exists, "fake-cmd" does not + fs.writeFileSync(path.join(testDir, 'real-cmd.md'), '# Real\nA real command.'); + fs.writeFileSync(path.join(testDir, 'mixed.md'), + '# Mixed\nRun `/real-cmd` then `/fake-cmd`.'); + + const result = runValidatorWithDirs('validate-commands', { + COMMANDS_DIR: testDir, AGENTS_DIR: agentsDir, SKILLS_DIR: skillsDir + }); + assert.strictEqual(result.code, 1, 'Should fail for the fake ref'); + assert.ok(result.stderr.includes('fake-cmd'), 'Should report /fake-cmd'); + // real-cmd should NOT appear in errors + assert.ok(!result.stderr.includes('real-cmd'), 'Should not report valid /real-cmd'); + cleanupTestDir(testDir); cleanupTestDir(agentsDir); cleanupTestDir(skillsDir); + })) passed++; else failed++; + + if (test('creates: line with multiple refs skips entire line', () => { + const testDir = createTestDir(); + const agentsDir = createTestDir(); + const skillsDir = createTestDir(); + // Both refs on a "Creates:" line should be skipped entirely + fs.writeFileSync(path.join(testDir, 'gen.md'), + '# Generator\nCreates: `/new-a` and `/new-b`'); + + const result = runValidatorWithDirs('validate-commands', { + COMMANDS_DIR: testDir, AGENTS_DIR: agentsDir, SKILLS_DIR: skillsDir + }); + assert.strictEqual(result.code, 0, 'Should skip all refs on creates: line'); + cleanupTestDir(testDir); cleanupTestDir(agentsDir); cleanupTestDir(skillsDir); + })) passed++; else failed++; + if (test('validates valid workflow diagram with known agents', () => { const testDir = createTestDir(); const agentsDir = createTestDir(); diff --git a/tests/hooks/evaluate-session.test.js b/tests/hooks/evaluate-session.test.js new file mode 100644 index 0000000..2302bbf --- /dev/null +++ b/tests/hooks/evaluate-session.test.js @@ -0,0 +1,183 @@ +/** + * Tests for scripts/hooks/evaluate-session.js + * + * Tests the session evaluation threshold logic, config loading, + * and stdin parsing. Uses temporary JSONL transcript files. + * + * Run with: node tests/hooks/evaluate-session.test.js + */ + +const assert = require('assert'); +const path = require('path'); +const fs = require('fs'); +const os = require('os'); +const { spawnSync, execFileSync } = require('child_process'); + +const evaluateScript = path.join(__dirname, '..', '..', 'scripts', 'hooks', 'evaluate-session.js'); + +// Test helpers +function test(name, fn) { + try { + fn(); + console.log(` \u2713 ${name}`); + return true; + } catch (err) { + console.log(` \u2717 ${name}`); + console.log(` Error: ${err.message}`); + return false; + } +} + +function createTestDir() { + return fs.mkdtempSync(path.join(os.tmpdir(), 'eval-session-test-')); +} + +function cleanupTestDir(testDir) { + fs.rmSync(testDir, { recursive: true, force: true }); +} + +/** + * Create a JSONL transcript file with N user messages. + * Each line is a JSON object with `"type":"user"`. + */ +function createTranscript(dir, messageCount) { + const filePath = path.join(dir, 'transcript.jsonl'); + const lines = []; + for (let i = 0; i < messageCount; i++) { + lines.push(JSON.stringify({ type: 'user', content: `Message ${i + 1}` })); + // Intersperse assistant messages to be realistic + lines.push(JSON.stringify({ type: 'assistant', content: `Response ${i + 1}` })); + } + fs.writeFileSync(filePath, lines.join('\n') + '\n'); + return filePath; +} + +/** + * Run evaluate-session.js with stdin providing the transcript_path. + * Uses spawnSync to capture both stdout and stderr regardless of exit code. + * Returns { code, stdout, stderr }. + */ +function runEvaluate(stdinJson) { + const result = spawnSync('node', [evaluateScript], { + encoding: 'utf8', + input: JSON.stringify(stdinJson), + timeout: 10000, + }); + return { + code: result.status || 0, + stdout: result.stdout || '', + stderr: result.stderr || '', + }; +} + +function runTests() { + console.log('\n=== Testing evaluate-session.js ===\n'); + + let passed = 0; + let failed = 0; + + // Threshold boundary tests (default minSessionLength = 10) + console.log('Threshold boundary (default min=10):'); + + if (test('skips session with 9 user messages (below threshold)', () => { + const testDir = createTestDir(); + const transcript = createTranscript(testDir, 9); + const result = runEvaluate({ transcript_path: transcript }); + assert.strictEqual(result.code, 0, 'Should exit 0'); + // "too short" message should appear in stderr (log goes to stderr) + assert.ok( + result.stderr.includes('too short') || result.stderr.includes('9 messages'), + 'Should indicate session too short' + ); + cleanupTestDir(testDir); + })) passed++; else failed++; + + if (test('evaluates session with exactly 10 user messages (at threshold)', () => { + const testDir = createTestDir(); + const transcript = createTranscript(testDir, 10); + const result = runEvaluate({ transcript_path: transcript }); + assert.strictEqual(result.code, 0, 'Should exit 0'); + // Should NOT say "too short" — should say "evaluate for extractable patterns" + assert.ok(!result.stderr.includes('too short'), 'Should NOT say too short at threshold'); + assert.ok( + result.stderr.includes('10 messages') || result.stderr.includes('evaluate'), + 'Should indicate evaluation' + ); + cleanupTestDir(testDir); + })) passed++; else failed++; + + if (test('evaluates session with 11 user messages (above threshold)', () => { + const testDir = createTestDir(); + const transcript = createTranscript(testDir, 11); + const result = runEvaluate({ transcript_path: transcript }); + assert.strictEqual(result.code, 0); + assert.ok(!result.stderr.includes('too short'), 'Should NOT say too short'); + assert.ok(result.stderr.includes('evaluate'), 'Should trigger evaluation'); + cleanupTestDir(testDir); + })) passed++; else failed++; + + // Edge cases + console.log('\nEdge cases:'); + + if (test('exits 0 with missing transcript_path', () => { + const result = runEvaluate({}); + assert.strictEqual(result.code, 0, 'Should exit 0 gracefully'); + })) passed++; else failed++; + + if (test('exits 0 with non-existent transcript file', () => { + const result = runEvaluate({ transcript_path: '/nonexistent/path/transcript.jsonl' }); + assert.strictEqual(result.code, 0, 'Should exit 0 gracefully'); + })) passed++; else failed++; + + if (test('exits 0 with invalid stdin JSON', () => { + // Pass raw string instead of JSON + const result = spawnSync('node', [evaluateScript], { + encoding: 'utf8', + input: 'not valid json at all', + timeout: 10000, + }); + assert.strictEqual(result.status, 0, 'Should exit 0 even on bad stdin'); + })) passed++; else failed++; + + if (test('skips empty transcript file (0 user messages)', () => { + const testDir = createTestDir(); + const filePath = path.join(testDir, 'empty.jsonl'); + fs.writeFileSync(filePath, ''); + const result = runEvaluate({ transcript_path: filePath }); + assert.strictEqual(result.code, 0); + // 0 < 10, so should be "too short" + assert.ok( + result.stderr.includes('too short') || result.stderr.includes('0 messages'), + 'Empty transcript should be too short' + ); + cleanupTestDir(testDir); + })) passed++; else failed++; + + if (test('counts only user messages (ignores assistant messages)', () => { + const testDir = createTestDir(); + const filePath = path.join(testDir, 'mixed.jsonl'); + // 5 user messages + 50 assistant messages — should still be "too short" + const lines = []; + for (let i = 0; i < 5; i++) { + lines.push(JSON.stringify({ type: 'user', content: `msg ${i}` })); + } + for (let i = 0; i < 50; i++) { + lines.push(JSON.stringify({ type: 'assistant', content: `resp ${i}` })); + } + fs.writeFileSync(filePath, lines.join('\n') + '\n'); + + const result = runEvaluate({ transcript_path: filePath }); + assert.strictEqual(result.code, 0); + assert.ok( + result.stderr.includes('too short') || result.stderr.includes('5 messages'), + 'Should count only user messages' + ); + cleanupTestDir(testDir); + })) passed++; else failed++; + + // Summary + console.log(`\nResults: Passed: ${passed}, Failed: ${failed}`); + process.exit(failed > 0 ? 1 : 0); +} + +runTests(); diff --git a/tests/lib/session-aliases.test.js b/tests/lib/session-aliases.test.js index b8dc2cd..95c0c3b 100644 --- a/tests/lib/session-aliases.test.js +++ b/tests/lib/session-aliases.test.js @@ -543,6 +543,82 @@ function runTests() { assert.ok(data.metadata.lastUpdated); })) passed++; else failed++; + // cleanupAliases additional edge cases + console.log('\ncleanupAliases (edge cases):'); + + if (test('returns correct totalChecked when all removed', () => { + resetAliases(); + aliases.setAlias('dead-1', '/dead/1'); + aliases.setAlias('dead-2', '/dead/2'); + aliases.setAlias('dead-3', '/dead/3'); + + const result = aliases.cleanupAliases(() => false); // none exist + assert.strictEqual(result.removed, 3); + assert.strictEqual(result.totalChecked, 3); // 0 remaining + 3 removed + assert.strictEqual(result.removedAliases.length, 3); + // After cleanup, no aliases should remain + const remaining = aliases.listAliases(); + assert.strictEqual(remaining.length, 0); + })) passed++; else failed++; + + if (test('cleanupAliases with empty aliases file does nothing', () => { + resetAliases(); + const result = aliases.cleanupAliases(() => true); + assert.strictEqual(result.removed, 0); + assert.strictEqual(result.totalChecked, 0); + assert.strictEqual(result.removedAliases.length, 0); + })) passed++; else failed++; + + if (test('cleanupAliases preserves aliases where sessionExists returns true', () => { + resetAliases(); + aliases.setAlias('keep-me', '/sessions/real'); + aliases.setAlias('remove-me', '/sessions/gone'); + + const result = aliases.cleanupAliases((p) => p === '/sessions/real'); + assert.strictEqual(result.removed, 1); + assert.strictEqual(result.removedAliases[0].name, 'remove-me'); + // keep-me should survive + const kept = aliases.resolveAlias('keep-me'); + assert.ok(kept, 'keep-me should still exist'); + assert.strictEqual(kept.sessionPath, '/sessions/real'); + })) passed++; else failed++; + + // renameAlias edge cases + console.log('\nrenameAlias (edge cases):'); + + if (test('rename preserves session path and title', () => { + resetAliases(); + aliases.setAlias('src', '/my/session', 'My Feature'); + const result = aliases.renameAlias('src', 'dst'); + assert.strictEqual(result.success, true); + const resolved = aliases.resolveAlias('dst'); + assert.ok(resolved); + assert.strictEqual(resolved.sessionPath, '/my/session'); + assert.strictEqual(resolved.title, 'My Feature'); + })) passed++; else failed++; + + if (test('rename preserves original createdAt timestamp', () => { + resetAliases(); + aliases.setAlias('orig', '/path', 'T'); + const before = aliases.loadAliases().aliases['orig'].createdAt; + aliases.renameAlias('orig', 'renamed'); + const after = aliases.loadAliases().aliases['renamed'].createdAt; + assert.strictEqual(after, before, 'createdAt should be preserved across rename'); + })) passed++; else failed++; + + // getAliasesForSession edge cases + console.log('\ngetAliasesForSession (edge cases):'); + + if (test('does not match partial session paths', () => { + resetAliases(); + aliases.setAlias('full', '/sessions/abc123'); + aliases.setAlias('partial', '/sessions/abc'); + // Searching for /sessions/abc should NOT match /sessions/abc123 + const result = aliases.getAliasesForSession('/sessions/abc'); + assert.strictEqual(result.length, 1); + assert.strictEqual(result[0].name, 'partial'); + })) passed++; else failed++; + // Cleanup — restore both HOME and USERPROFILE (Windows) process.env.HOME = origHome; if (origUserProfile !== undefined) { diff --git a/tests/run-all.js b/tests/run-all.js index e4a52a3..7cf851b 100644 --- a/tests/run-all.js +++ b/tests/run-all.js @@ -16,6 +16,7 @@ const testFiles = [ 'lib/session-manager.test.js', 'lib/session-aliases.test.js', 'hooks/hooks.test.js', + 'hooks/evaluate-session.test.js', 'integration/hooks.test.js', 'ci/validators.test.js', 'scripts/setup-package-manager.test.js',