feat(002): add sitemap generation feature

- Refactor kmeContentSourceAdapter.js into getValidToken(), oidcAuthFlow(),
  and sitemapFlow(); add sitemap generation using hydra:member response structure
- Add searchApiBaseUrl, tenant, proxyBaseUrl fields to kme_CSA_settings.json
  and kme_CSA_settings.json.example
- Add 17 unit tests for sitemap flow and non-sitemap routing regression
- Add 5 contract tests for sitemap endpoint (proxy-http.test.js)
- Add [Unreleased] sitemap entry to CHANGELOG.md
- Add full specs/002-sitemap-generation/ artifact directory
  (spec, plan, tasks, data-model, contracts, research, quickstart, checklist)
- Update constitution.md: add redis as permitted global, refresh
  kme_CSA_settings references
- Update copilot-instructions.md SPECKIT marker to sitemap plan
This commit is contained in:
2026-04-22 22:08:08 -05:00
parent 49a6b2e4e7
commit 50b87297d2
17 changed files with 1879 additions and 40 deletions

View File

@@ -6,6 +6,7 @@ import { readFileSync } from 'node:fs';
import { fileURLToPath } from 'node:url';
import { dirname, join } from 'node:path';
import axios from 'axios';
import { create as xmlBuilder } from 'xmlbuilder2';
const __filename = fileURLToPath(import.meta.url);
const __dirname = dirname(__filename);
@@ -15,12 +16,12 @@ const proxyCode = readFileSync(proxyPath, 'utf-8');
const proxyScript = new vm.Script(proxyCode, { filename: 'kmeContentSourceAdapter.js' });
/**
* Start a minimal HTTP server that handles all POST requests with a fixed JSON body.
* Start a minimal HTTP server that handles all requests with a fixed JSON body.
* @param {number} statusCode
* @param {object} responseBody
* @returns {Promise<{ server: http.Server, url: string, close: () => Promise<void> }>}
*/
function startMockTokenServer(statusCode, responseBody) {
function startMockServer(statusCode, responseBody) {
return new Promise((resolve, reject) => {
const server = http.createServer((req, res) => {
res.writeHead(statusCode, { 'Content-Type': 'application/json' });
@@ -36,6 +37,11 @@ function startMockTokenServer(statusCode, responseBody) {
});
}
/**
* Start a mock token server (alias for backwards compatibility).
*/
const startMockTokenServer = startMockServer;
/** Build an in-memory Redis fake. */
function makeRedisFake() {
const _store = {};
@@ -76,6 +82,7 @@ describe('proxy HTTP contract: 200 OK', () => {
URLSearchParams,
console,
axios,
xmlBuilder,
redis: makeRedisFake(),
kme_CSA_settings: {
tokenUrl: mock.url,
@@ -113,6 +120,7 @@ describe('proxy HTTP contract: 401 Unauthorized', () => {
URLSearchParams,
console,
axios,
xmlBuilder,
redis: makeRedisFake(),
kme_CSA_settings: {
tokenUrl: mock.url,
@@ -135,3 +143,159 @@ describe('proxy HTTP contract: 401 Unauthorized', () => {
}
});
});
// ---------------------------------------------------------------------------
// Contract: sitemap endpoint (T005, T012)
// ---------------------------------------------------------------------------
describe('sitemap endpoint', () => {
/**
* Build a VM context wired to a real token server and a real search server.
* The token cache is pre-seeded so no real token exchange is needed.
*/
function makeSitemapCtx({ searchUrl, tokenUrl }) {
const redis = makeRedisFake();
// Pre-seed a valid token so no token fetch is needed
redis.hSet('authorization', 'token', 'sitemap-contract-token');
redis.hSet('authorization', 'expiry', '9999999999');
const res = makeRes();
const ctx = vm.createContext({
URLSearchParams,
console,
axios,
xmlBuilder,
redis,
kme_CSA_settings: {
tokenUrl: tokenUrl ?? 'http://127.0.0.1:1', // not used (cache hit)
username: 'user',
password: 'pass',
clientId: 'client',
scope: 'openid',
searchApiBaseUrl: searchUrl,
tenant: 'test',
proxyBaseUrl: 'https://proxy.example.com',
},
req: { url: '/sitemap.xml', method: 'GET', headers: {} },
res,
});
ctx._res = res;
return ctx;
}
test('full round-trip GET /sitemap.xml → 200 application/xml with loc elements', async () => {
const searchMock = await startMockServer(200, {
'hydra:member': [
{ 'hydra:member': [{ 'vkm:url': 'https://kme.example.com/doc-1' }] },
],
});
try {
const ctx = makeSitemapCtx({ searchUrl: searchMock.url });
await proxyScript.runInContext(ctx);
assert.strictEqual(ctx._res.statusCode, 200);
assert.ok(ctx._res.headers['Content-Type'].includes('application/xml'),
`Content-Type was: ${ctx._res.headers['Content-Type']}`);
assert.ok(ctx._res.body.startsWith('<?xml'), 'body should start with XML declaration');
assert.ok(ctx._res.body.includes('<loc>'), 'body should contain a loc element');
} finally {
await searchMock.close();
}
});
test('empty results round-trip → 200 application/xml with urlset and no url element', async () => {
const searchMock = await startMockServer(200, { 'hydra:member': [] });
try {
const ctx = makeSitemapCtx({ searchUrl: searchMock.url });
await proxyScript.runInContext(ctx);
assert.strictEqual(ctx._res.statusCode, 200);
assert.ok(ctx._res.headers['Content-Type'].includes('application/xml'),
`Content-Type was: ${ctx._res.headers['Content-Type']}`);
assert.ok(ctx._res.body.includes('<urlset'), 'body should contain urlset');
assert.ok(!ctx._res.body.includes('<url>'), 'body should not contain url elements for empty results');
} finally {
await searchMock.close();
}
});
test('search server returns 503 → adapter returns 502', async () => {
const searchMock = await startMockServer(503, { error: 'Service Unavailable' });
try {
const ctx = makeSitemapCtx({ searchUrl: searchMock.url });
await proxyScript.runInContext(ctx);
assert.strictEqual(ctx._res.statusCode, 502, `body was: ${ctx._res.body}`);
} finally {
await searchMock.close();
}
});
test('search server hangs > 10s → adapter returns 504 within 12s', async () => {
// Server that accepts connections but never responds
const server = await new Promise((resolve, reject) => {
const s = http.createServer(() => { /* intentionally hang */ });
s.listen(0, '127.0.0.1', () => {
const { port } = s.address();
const close = () => new Promise((res, rej) => s.close(err => err ? rej(err) : res()));
resolve({ server: s, url: `http://127.0.0.1:${port}`, close });
});
s.once('error', reject);
});
try {
const ctx = makeSitemapCtx({ searchUrl: server.url });
const start = Date.now();
await proxyScript.runInContext(ctx);
const elapsed = Date.now() - start;
assert.strictEqual(ctx._res.statusCode, 504, `body was: ${ctx._res.body}`);
assert.ok(elapsed < 12000, `Should respond within 12s, took ${elapsed}ms`);
} finally {
await server.close();
}
});
});
// ---------------------------------------------------------------------------
// Non-sitemap endpoint regression (T010)
// ---------------------------------------------------------------------------
describe('non-sitemap endpoint (regression)', () => {
test('GET / with valid OIDC credentials → 200 Authorized', async () => {
const mock = await startMockTokenServer(200, {
id_token: 'regression-token',
expires_in: 9_999_999_999,
});
try {
const res = makeRes();
const ctx = vm.createContext({
URLSearchParams,
console,
axios,
xmlBuilder,
redis: makeRedisFake(),
kme_CSA_settings: {
tokenUrl: mock.url,
username: 'user',
password: 'pass',
clientId: 'client',
scope: 'openid',
},
req: { url: '/', method: 'GET', headers: {} },
res,
});
await proxyScript.runInContext(ctx);
assert.strictEqual(res.statusCode, 200);
assert.strictEqual(res.body, 'Authorized');
} finally {
await mock.close();
}
});
});

View File

@@ -4,6 +4,7 @@ import vm from 'node:vm';
import { readFileSync } from 'node:fs';
import { fileURLToPath } from 'node:url';
import { dirname, join } from 'node:path';
import { create as xmlBuilder } from 'xmlbuilder2';
const __filename = fileURLToPath(import.meta.url);
const __dirname = dirname(__filename);
@@ -54,6 +55,9 @@ function makeContext(t, overrides = {}) {
post: t.mock.fn(async () => ({
data: { id_token: 'mock-token', expires_in: 9_999_999_999 },
})),
get: t.mock.fn(async () => ({
data: { items: [] },
})),
};
const ctx = vm.createContext({
@@ -62,6 +66,7 @@ function makeContext(t, overrides = {}) {
axios: axiosMock,
redis,
kme_CSA_settings,
xmlBuilder,
req: { url: '/', method: 'GET', headers: {} },
res,
...overrides,
@@ -157,7 +162,7 @@ describe('US3: authentication failure handling', () => {
response: { status: 401 },
});
const ctx = makeContext(t, {
axios: { post: t.mock.fn(async () => { throw axiosError; }) },
axios: { post: t.mock.fn(async () => { throw axiosError; }), get: t.mock.fn() },
});
await runScript(ctx);
@@ -169,7 +174,7 @@ describe('US3: authentication failure handling', () => {
test('timeout (ECONNABORTED) → 401 Unauthorized: token service timeout', async (t) => {
const axiosError = Object.assign(new Error('timeout'), { code: 'ECONNABORTED' });
const ctx = makeContext(t, {
axios: { post: t.mock.fn(async () => { throw axiosError; }) },
axios: { post: t.mock.fn(async () => { throw axiosError; }), get: t.mock.fn() },
});
await runScript(ctx);
@@ -181,7 +186,7 @@ describe('US3: authentication failure handling', () => {
test('timeout (ERR_CANCELED) → 401 Unauthorized: token service timeout', async (t) => {
const axiosError = Object.assign(new Error('canceled'), { code: 'ERR_CANCELED' });
const ctx = makeContext(t, {
axios: { post: t.mock.fn(async () => { throw axiosError; }) },
axios: { post: t.mock.fn(async () => { throw axiosError; }), get: t.mock.fn() },
});
await runScript(ctx);
@@ -194,6 +199,7 @@ describe('US3: authentication failure handling', () => {
const ctx = makeContext(t, {
axios: {
post: t.mock.fn(async () => ({ data: { expires_in: 9999 } })),
get: t.mock.fn(),
},
});
@@ -207,6 +213,7 @@ describe('US3: authentication failure handling', () => {
const ctx = makeContext(t, {
axios: {
post: t.mock.fn(async () => ({ data: { id_token: 'a-token' } })),
get: t.mock.fn(),
},
});
@@ -267,7 +274,7 @@ describe('stampede guard', () => {
await new Promise(resolve => setTimeout(resolve, 50));
return { data: { id_token: 'stampede-token', expires_in: 9_999_999_999 } };
});
const sharedAxios = { post: mockAxiosPost };
const sharedAxios = { post: mockAxiosPost, get: t.mock.fn() };
// Build two contexts sharing kme_CSA_settings, redis, and axios references
function makeRes(tctx) {
@@ -286,13 +293,13 @@ describe('stampede guard', () => {
const ctx1 = vm.createContext({
URLSearchParams, console, axios: sharedAxios,
redis, kme_CSA_settings,
redis, kme_CSA_settings, xmlBuilder,
req: { url: '/', method: 'GET', headers: {} },
res: res1,
});
const ctx2 = vm.createContext({
URLSearchParams, console, axios: sharedAxios,
redis, kme_CSA_settings,
redis, kme_CSA_settings, xmlBuilder,
req: { url: '/', method: 'GET', headers: {} },
res: res2,
});
@@ -309,3 +316,205 @@ describe('stampede guard', () => {
assert.strictEqual(res2.body, 'Authorized');
});
});
// ---------------------------------------------------------------------------
// Sitemap flow — US1 (T004)
// ---------------------------------------------------------------------------
describe('sitemap flow', () => {
function makeSitemapContext(t, axiosGetImpl, settingsOverrides = {}) {
const ctx = makeContext(t, {
req: { url: '/sitemap.xml', method: 'GET', headers: {} },
});
// Add sitemap-specific settings
ctx.kme_CSA_settings.searchApiBaseUrl = 'https://search.example.com/api';
ctx.kme_CSA_settings.tenant = 'test-tenant';
ctx.kme_CSA_settings.proxyBaseUrl = 'https://proxy.example.com';
Object.assign(ctx.kme_CSA_settings, settingsOverrides);
// Pre-seed token cache so getValidToken() returns immediately
ctx._store['authorization:token'] = 'sitemap-token';
ctx._store['authorization:expiry'] = '9999999999';
// Replace axios.get with the provided implementation
ctx._axios.get = t.mock.fn(axiosGetImpl ?? (async () => ({
data: { 'hydra:member': [] },
})));
return ctx;
}
test('happy path — items present → 200 with correct XML and loc values', async (t) => {
const ctx = makeSitemapContext(t, async () => ({
data: {
'hydra:member': [
{ 'hydra:member': [{ 'vkm:url': 'https://kme.example.com/doc-1' }] },
{ 'hydra:member': [{ 'vkm:url': 'https://kme.example.com/doc-2' }] },
],
},
}));
await runScript(ctx);
assert.strictEqual(ctx._res.statusCode, 200);
assert.strictEqual(ctx._res.headers['Content-Type'], 'application/xml');
assert.ok(ctx._res.body.includes('<?xml'), 'body should start with XML declaration');
assert.ok(ctx._res.body.includes('<urlset'), 'body should contain urlset');
assert.ok(
ctx._res.body.includes('<loc>https://proxy.example.com?kmeURL=https%3A%2F%2Fkme.example.com%2Fdoc-1</loc>'),
'body should contain encoded loc for doc-1',
);
assert.ok(
ctx._res.body.includes('<loc>https://proxy.example.com?kmeURL=https%3A%2F%2Fkme.example.com%2Fdoc-2</loc>'),
'body should contain encoded loc for doc-2',
);
});
test('happy path — zero items → 200 with empty urlset', async (t) => {
const ctx = makeSitemapContext(t, async () => ({ data: { 'hydra:member': [] } }));
await runScript(ctx);
assert.strictEqual(ctx._res.statusCode, 200);
assert.strictEqual(ctx._res.headers['Content-Type'], 'application/xml');
assert.ok(ctx._res.body.includes('<urlset'), 'body should contain urlset');
assert.ok(!ctx._res.body.includes('<url>'), 'body should not contain url elements');
});
test('items with empty vkm:url filtered — only valid items appear', async (t) => {
const ctx = makeSitemapContext(t, async () => ({
data: {
'hydra:member': [
{ 'hydra:member': [{ 'vkm:url': '' }] },
{ 'hydra:member': [{ 'vkm:url': 'https://kme.example.com/valid' }] },
],
},
}));
await runScript(ctx);
assert.strictEqual(ctx._res.statusCode, 200);
const locMatches = ctx._res.body.match(/<loc>/g);
assert.strictEqual(locMatches?.length ?? 0, 1, 'exactly one <loc> element expected');
assert.ok(ctx._res.body.includes('valid'), 'the valid URL should appear in the loc');
});
// US3 error scenarios (T011b)
test('upstream 503 → 502 with Search service error message', async (t) => {
const searchErr = Object.assign(new Error('Request failed with status code 503'), {
response: { status: 503 },
});
const ctx = makeSitemapContext(t, async () => { throw searchErr; });
await runScript(ctx);
assert.strictEqual(ctx._res.statusCode, 502);
assert.ok(ctx._res.body.includes('Search service error: HTTP 503'), `body was: ${ctx._res.body}`);
});
test('timeout ECONNABORTED → 504 Search service timeout', async (t) => {
const timeoutErr = Object.assign(new Error('timeout'), { code: 'ECONNABORTED' });
const ctx = makeSitemapContext(t, async () => { throw timeoutErr; });
await runScript(ctx);
assert.strictEqual(ctx._res.statusCode, 504);
assert.ok(ctx._res.body.includes('Search service timeout'), `body was: ${ctx._res.body}`);
});
test('timeout ERR_CANCELED → 504 Search service timeout', async (t) => {
const timeoutErr = Object.assign(new Error('canceled'), { code: 'ERR_CANCELED' });
const ctx = makeSitemapContext(t, async () => { throw timeoutErr; });
await runScript(ctx);
assert.strictEqual(ctx._res.statusCode, 504);
assert.ok(ctx._res.body.includes('Search service timeout'), `body was: ${ctx._res.body}`);
});
test('missing searchApiBaseUrl → 500 Configuration error', async (t) => {
const ctx = makeSitemapContext(t, null, { searchApiBaseUrl: undefined });
await runScript(ctx);
assert.strictEqual(ctx._res.statusCode, 500);
assert.strictEqual(ctx._res.body, 'Configuration error: missing required field: searchApiBaseUrl');
});
test('missing tenant → 500 Configuration error', async (t) => {
const ctx = makeSitemapContext(t, null, { tenant: undefined });
await runScript(ctx);
assert.strictEqual(ctx._res.statusCode, 500);
assert.strictEqual(ctx._res.body, 'Configuration error: missing required field: tenant');
});
test('missing proxyBaseUrl → 500 Configuration error', async (t) => {
const ctx = makeSitemapContext(t, null, { proxyBaseUrl: undefined });
await runScript(ctx);
assert.strictEqual(ctx._res.statusCode, 500);
assert.strictEqual(ctx._res.body, 'Configuration error: missing required field: proxyBaseUrl');
});
});
// ---------------------------------------------------------------------------
// Non-sitemap URL routing — regression guard (T009)
// ---------------------------------------------------------------------------
describe('non-sitemap URL routing', () => {
test('cache hit → no fetch → 200 Authorized', async (t) => {
const ctx = makeContext(t, {
req: { url: '/', method: 'GET', headers: {} },
axios: {
post: t.mock.fn(async () => { throw new Error('should not be called'); }),
get: t.mock.fn(),
},
});
// Pre-seed valid token
ctx._store['authorization:token'] = 'cached-tok';
ctx._store['authorization:expiry'] = '9999999999';
await runScript(ctx);
assert.strictEqual(ctx._res.statusCode, 200);
assert.strictEqual(ctx._res.body, 'Authorized');
// axios.post was set to throw, so if it was called the test would fail
});
test('cache miss → fresh fetch → 200 Authorized', async (t) => {
const ctx = makeContext(t, {
req: { url: '/', method: 'GET', headers: {} },
});
// No pre-seeded token → cache miss
await runScript(ctx);
assert.strictEqual(ctx._res.statusCode, 200);
assert.strictEqual(ctx._res.body, 'Authorized');
// Verify token was written to Redis
const hSetCalls = ctx._redis.hSet.mock.calls;
const tokenCall = hSetCalls.find(c => c.arguments[0] === 'authorization' && c.arguments[1] === 'token');
assert.ok(tokenCall, 'hSet should be called with token');
assert.strictEqual(tokenCall.arguments[2], 'mock-token');
});
test('token service down (ECONNABORTED) → 401 Unauthorized', async (t) => {
const timeoutErr = Object.assign(new Error('timeout'), { code: 'ECONNABORTED' });
const ctx = makeContext(t, {
req: { url: '/', method: 'GET', headers: {} },
axios: {
post: t.mock.fn(async () => { throw timeoutErr; }),
get: t.mock.fn(),
},
});
await runScript(ctx);
assert.strictEqual(ctx._res.statusCode, 401);
assert.ok(ctx._res.body.startsWith('Unauthorized:'), `body was: ${ctx._res.body}`);
});
});