feat(002): add sitemap generation feature

- Refactor kmeContentSourceAdapter.js into getValidToken(), oidcAuthFlow(),
  and sitemapFlow(); add sitemap generation using hydra:member response structure
- Add searchApiBaseUrl, tenant, proxyBaseUrl fields to kme_CSA_settings.json
  and kme_CSA_settings.json.example
- Add 17 unit tests for sitemap flow and non-sitemap routing regression
- Add 5 contract tests for sitemap endpoint (proxy-http.test.js)
- Add [Unreleased] sitemap entry to CHANGELOG.md
- Add full specs/002-sitemap-generation/ artifact directory
  (spec, plan, tasks, data-model, contracts, research, quickstart, checklist)
- Update constitution.md: add redis as permitted global, refresh
  kme_CSA_settings references
- Update copilot-instructions.md SPECKIT marker to sitemap plan
This commit is contained in:
2026-04-22 22:08:08 -05:00
parent 49a6b2e4e7
commit 50b87297d2
17 changed files with 1879 additions and 40 deletions

View File

@@ -4,6 +4,7 @@ import vm from 'node:vm';
import { readFileSync } from 'node:fs';
import { fileURLToPath } from 'node:url';
import { dirname, join } from 'node:path';
import { create as xmlBuilder } from 'xmlbuilder2';
const __filename = fileURLToPath(import.meta.url);
const __dirname = dirname(__filename);
@@ -54,6 +55,9 @@ function makeContext(t, overrides = {}) {
post: t.mock.fn(async () => ({
data: { id_token: 'mock-token', expires_in: 9_999_999_999 },
})),
get: t.mock.fn(async () => ({
data: { items: [] },
})),
};
const ctx = vm.createContext({
@@ -62,6 +66,7 @@ function makeContext(t, overrides = {}) {
axios: axiosMock,
redis,
kme_CSA_settings,
xmlBuilder,
req: { url: '/', method: 'GET', headers: {} },
res,
...overrides,
@@ -157,7 +162,7 @@ describe('US3: authentication failure handling', () => {
response: { status: 401 },
});
const ctx = makeContext(t, {
axios: { post: t.mock.fn(async () => { throw axiosError; }) },
axios: { post: t.mock.fn(async () => { throw axiosError; }), get: t.mock.fn() },
});
await runScript(ctx);
@@ -169,7 +174,7 @@ describe('US3: authentication failure handling', () => {
test('timeout (ECONNABORTED) → 401 Unauthorized: token service timeout', async (t) => {
const axiosError = Object.assign(new Error('timeout'), { code: 'ECONNABORTED' });
const ctx = makeContext(t, {
axios: { post: t.mock.fn(async () => { throw axiosError; }) },
axios: { post: t.mock.fn(async () => { throw axiosError; }), get: t.mock.fn() },
});
await runScript(ctx);
@@ -181,7 +186,7 @@ describe('US3: authentication failure handling', () => {
test('timeout (ERR_CANCELED) → 401 Unauthorized: token service timeout', async (t) => {
const axiosError = Object.assign(new Error('canceled'), { code: 'ERR_CANCELED' });
const ctx = makeContext(t, {
axios: { post: t.mock.fn(async () => { throw axiosError; }) },
axios: { post: t.mock.fn(async () => { throw axiosError; }), get: t.mock.fn() },
});
await runScript(ctx);
@@ -194,6 +199,7 @@ describe('US3: authentication failure handling', () => {
const ctx = makeContext(t, {
axios: {
post: t.mock.fn(async () => ({ data: { expires_in: 9999 } })),
get: t.mock.fn(),
},
});
@@ -207,6 +213,7 @@ describe('US3: authentication failure handling', () => {
const ctx = makeContext(t, {
axios: {
post: t.mock.fn(async () => ({ data: { id_token: 'a-token' } })),
get: t.mock.fn(),
},
});
@@ -267,7 +274,7 @@ describe('stampede guard', () => {
await new Promise(resolve => setTimeout(resolve, 50));
return { data: { id_token: 'stampede-token', expires_in: 9_999_999_999 } };
});
const sharedAxios = { post: mockAxiosPost };
const sharedAxios = { post: mockAxiosPost, get: t.mock.fn() };
// Build two contexts sharing kme_CSA_settings, redis, and axios references
function makeRes(tctx) {
@@ -286,13 +293,13 @@ describe('stampede guard', () => {
const ctx1 = vm.createContext({
URLSearchParams, console, axios: sharedAxios,
redis, kme_CSA_settings,
redis, kme_CSA_settings, xmlBuilder,
req: { url: '/', method: 'GET', headers: {} },
res: res1,
});
const ctx2 = vm.createContext({
URLSearchParams, console, axios: sharedAxios,
redis, kme_CSA_settings,
redis, kme_CSA_settings, xmlBuilder,
req: { url: '/', method: 'GET', headers: {} },
res: res2,
});
@@ -309,3 +316,205 @@ describe('stampede guard', () => {
assert.strictEqual(res2.body, 'Authorized');
});
});
// ---------------------------------------------------------------------------
// Sitemap flow — US1 (T004)
// ---------------------------------------------------------------------------
describe('sitemap flow', () => {
function makeSitemapContext(t, axiosGetImpl, settingsOverrides = {}) {
const ctx = makeContext(t, {
req: { url: '/sitemap.xml', method: 'GET', headers: {} },
});
// Add sitemap-specific settings
ctx.kme_CSA_settings.searchApiBaseUrl = 'https://search.example.com/api';
ctx.kme_CSA_settings.tenant = 'test-tenant';
ctx.kme_CSA_settings.proxyBaseUrl = 'https://proxy.example.com';
Object.assign(ctx.kme_CSA_settings, settingsOverrides);
// Pre-seed token cache so getValidToken() returns immediately
ctx._store['authorization:token'] = 'sitemap-token';
ctx._store['authorization:expiry'] = '9999999999';
// Replace axios.get with the provided implementation
ctx._axios.get = t.mock.fn(axiosGetImpl ?? (async () => ({
data: { 'hydra:member': [] },
})));
return ctx;
}
test('happy path — items present → 200 with correct XML and loc values', async (t) => {
const ctx = makeSitemapContext(t, async () => ({
data: {
'hydra:member': [
{ 'hydra:member': [{ 'vkm:url': 'https://kme.example.com/doc-1' }] },
{ 'hydra:member': [{ 'vkm:url': 'https://kme.example.com/doc-2' }] },
],
},
}));
await runScript(ctx);
assert.strictEqual(ctx._res.statusCode, 200);
assert.strictEqual(ctx._res.headers['Content-Type'], 'application/xml');
assert.ok(ctx._res.body.includes('<?xml'), 'body should start with XML declaration');
assert.ok(ctx._res.body.includes('<urlset'), 'body should contain urlset');
assert.ok(
ctx._res.body.includes('<loc>https://proxy.example.com?kmeURL=https%3A%2F%2Fkme.example.com%2Fdoc-1</loc>'),
'body should contain encoded loc for doc-1',
);
assert.ok(
ctx._res.body.includes('<loc>https://proxy.example.com?kmeURL=https%3A%2F%2Fkme.example.com%2Fdoc-2</loc>'),
'body should contain encoded loc for doc-2',
);
});
test('happy path — zero items → 200 with empty urlset', async (t) => {
const ctx = makeSitemapContext(t, async () => ({ data: { 'hydra:member': [] } }));
await runScript(ctx);
assert.strictEqual(ctx._res.statusCode, 200);
assert.strictEqual(ctx._res.headers['Content-Type'], 'application/xml');
assert.ok(ctx._res.body.includes('<urlset'), 'body should contain urlset');
assert.ok(!ctx._res.body.includes('<url>'), 'body should not contain url elements');
});
test('items with empty vkm:url filtered — only valid items appear', async (t) => {
const ctx = makeSitemapContext(t, async () => ({
data: {
'hydra:member': [
{ 'hydra:member': [{ 'vkm:url': '' }] },
{ 'hydra:member': [{ 'vkm:url': 'https://kme.example.com/valid' }] },
],
},
}));
await runScript(ctx);
assert.strictEqual(ctx._res.statusCode, 200);
const locMatches = ctx._res.body.match(/<loc>/g);
assert.strictEqual(locMatches?.length ?? 0, 1, 'exactly one <loc> element expected');
assert.ok(ctx._res.body.includes('valid'), 'the valid URL should appear in the loc');
});
// US3 error scenarios (T011b)
test('upstream 503 → 502 with Search service error message', async (t) => {
const searchErr = Object.assign(new Error('Request failed with status code 503'), {
response: { status: 503 },
});
const ctx = makeSitemapContext(t, async () => { throw searchErr; });
await runScript(ctx);
assert.strictEqual(ctx._res.statusCode, 502);
assert.ok(ctx._res.body.includes('Search service error: HTTP 503'), `body was: ${ctx._res.body}`);
});
test('timeout ECONNABORTED → 504 Search service timeout', async (t) => {
const timeoutErr = Object.assign(new Error('timeout'), { code: 'ECONNABORTED' });
const ctx = makeSitemapContext(t, async () => { throw timeoutErr; });
await runScript(ctx);
assert.strictEqual(ctx._res.statusCode, 504);
assert.ok(ctx._res.body.includes('Search service timeout'), `body was: ${ctx._res.body}`);
});
test('timeout ERR_CANCELED → 504 Search service timeout', async (t) => {
const timeoutErr = Object.assign(new Error('canceled'), { code: 'ERR_CANCELED' });
const ctx = makeSitemapContext(t, async () => { throw timeoutErr; });
await runScript(ctx);
assert.strictEqual(ctx._res.statusCode, 504);
assert.ok(ctx._res.body.includes('Search service timeout'), `body was: ${ctx._res.body}`);
});
test('missing searchApiBaseUrl → 500 Configuration error', async (t) => {
const ctx = makeSitemapContext(t, null, { searchApiBaseUrl: undefined });
await runScript(ctx);
assert.strictEqual(ctx._res.statusCode, 500);
assert.strictEqual(ctx._res.body, 'Configuration error: missing required field: searchApiBaseUrl');
});
test('missing tenant → 500 Configuration error', async (t) => {
const ctx = makeSitemapContext(t, null, { tenant: undefined });
await runScript(ctx);
assert.strictEqual(ctx._res.statusCode, 500);
assert.strictEqual(ctx._res.body, 'Configuration error: missing required field: tenant');
});
test('missing proxyBaseUrl → 500 Configuration error', async (t) => {
const ctx = makeSitemapContext(t, null, { proxyBaseUrl: undefined });
await runScript(ctx);
assert.strictEqual(ctx._res.statusCode, 500);
assert.strictEqual(ctx._res.body, 'Configuration error: missing required field: proxyBaseUrl');
});
});
// ---------------------------------------------------------------------------
// Non-sitemap URL routing — regression guard (T009)
// ---------------------------------------------------------------------------
describe('non-sitemap URL routing', () => {
test('cache hit → no fetch → 200 Authorized', async (t) => {
const ctx = makeContext(t, {
req: { url: '/', method: 'GET', headers: {} },
axios: {
post: t.mock.fn(async () => { throw new Error('should not be called'); }),
get: t.mock.fn(),
},
});
// Pre-seed valid token
ctx._store['authorization:token'] = 'cached-tok';
ctx._store['authorization:expiry'] = '9999999999';
await runScript(ctx);
assert.strictEqual(ctx._res.statusCode, 200);
assert.strictEqual(ctx._res.body, 'Authorized');
// axios.post was set to throw, so if it was called the test would fail
});
test('cache miss → fresh fetch → 200 Authorized', async (t) => {
const ctx = makeContext(t, {
req: { url: '/', method: 'GET', headers: {} },
});
// No pre-seeded token → cache miss
await runScript(ctx);
assert.strictEqual(ctx._res.statusCode, 200);
assert.strictEqual(ctx._res.body, 'Authorized');
// Verify token was written to Redis
const hSetCalls = ctx._redis.hSet.mock.calls;
const tokenCall = hSetCalls.find(c => c.arguments[0] === 'authorization' && c.arguments[1] === 'token');
assert.ok(tokenCall, 'hSet should be called with token');
assert.strictEqual(tokenCall.arguments[2], 'mock-token');
});
test('token service down (ECONNABORTED) → 401 Unauthorized', async (t) => {
const timeoutErr = Object.assign(new Error('timeout'), { code: 'ECONNABORTED' });
const ctx = makeContext(t, {
req: { url: '/', method: 'GET', headers: {} },
axios: {
post: t.mock.fn(async () => { throw timeoutErr; }),
get: t.mock.fn(),
},
});
await runScript(ctx);
assert.strictEqual(ctx._res.statusCode, 401);
assert.ok(ctx._res.body.startsWith('Unauthorized:'), `body was: ${ctx._res.body}`);
});
});