feat(002): add sitemap generation feature

- Refactor kmeContentSourceAdapter.js into getValidToken(), oidcAuthFlow(),
  and sitemapFlow(); add sitemap generation using hydra:member response structure
- Add searchApiBaseUrl, tenant, proxyBaseUrl fields to kme_CSA_settings.json
  and kme_CSA_settings.json.example
- Add 17 unit tests for sitemap flow and non-sitemap routing regression
- Add 5 contract tests for sitemap endpoint (proxy-http.test.js)
- Add [Unreleased] sitemap entry to CHANGELOG.md
- Add full specs/002-sitemap-generation/ artifact directory
  (spec, plan, tasks, data-model, contracts, research, quickstart, checklist)
- Update constitution.md: add redis as permitted global, refresh
  kme_CSA_settings references
- Update copilot-instructions.md SPECKIT marker to sitemap plan
This commit is contained in:
2026-04-22 22:08:08 -05:00
parent 49a6b2e4e7
commit 50b87297d2
17 changed files with 1879 additions and 40 deletions

View File

@@ -6,6 +6,7 @@ import { readFileSync } from 'node:fs';
import { fileURLToPath } from 'node:url';
import { dirname, join } from 'node:path';
import axios from 'axios';
import { create as xmlBuilder } from 'xmlbuilder2';
const __filename = fileURLToPath(import.meta.url);
const __dirname = dirname(__filename);
@@ -15,12 +16,12 @@ const proxyCode = readFileSync(proxyPath, 'utf-8');
const proxyScript = new vm.Script(proxyCode, { filename: 'kmeContentSourceAdapter.js' });
/**
* Start a minimal HTTP server that handles all POST requests with a fixed JSON body.
* Start a minimal HTTP server that handles all requests with a fixed JSON body.
* @param {number} statusCode
* @param {object} responseBody
* @returns {Promise<{ server: http.Server, url: string, close: () => Promise<void> }>}
*/
function startMockTokenServer(statusCode, responseBody) {
function startMockServer(statusCode, responseBody) {
return new Promise((resolve, reject) => {
const server = http.createServer((req, res) => {
res.writeHead(statusCode, { 'Content-Type': 'application/json' });
@@ -36,6 +37,11 @@ function startMockTokenServer(statusCode, responseBody) {
});
}
/**
* Start a mock token server (alias for backwards compatibility).
*/
const startMockTokenServer = startMockServer;
/** Build an in-memory Redis fake. */
function makeRedisFake() {
const _store = {};
@@ -76,6 +82,7 @@ describe('proxy HTTP contract: 200 OK', () => {
URLSearchParams,
console,
axios,
xmlBuilder,
redis: makeRedisFake(),
kme_CSA_settings: {
tokenUrl: mock.url,
@@ -113,6 +120,7 @@ describe('proxy HTTP contract: 401 Unauthorized', () => {
URLSearchParams,
console,
axios,
xmlBuilder,
redis: makeRedisFake(),
kme_CSA_settings: {
tokenUrl: mock.url,
@@ -135,3 +143,159 @@ describe('proxy HTTP contract: 401 Unauthorized', () => {
}
});
});
// ---------------------------------------------------------------------------
// Contract: sitemap endpoint (T005, T012)
// ---------------------------------------------------------------------------
describe('sitemap endpoint', () => {
/**
* Build a VM context wired to a real token server and a real search server.
* The token cache is pre-seeded so no real token exchange is needed.
*/
function makeSitemapCtx({ searchUrl, tokenUrl }) {
const redis = makeRedisFake();
// Pre-seed a valid token so no token fetch is needed
redis.hSet('authorization', 'token', 'sitemap-contract-token');
redis.hSet('authorization', 'expiry', '9999999999');
const res = makeRes();
const ctx = vm.createContext({
URLSearchParams,
console,
axios,
xmlBuilder,
redis,
kme_CSA_settings: {
tokenUrl: tokenUrl ?? 'http://127.0.0.1:1', // not used (cache hit)
username: 'user',
password: 'pass',
clientId: 'client',
scope: 'openid',
searchApiBaseUrl: searchUrl,
tenant: 'test',
proxyBaseUrl: 'https://proxy.example.com',
},
req: { url: '/sitemap.xml', method: 'GET', headers: {} },
res,
});
ctx._res = res;
return ctx;
}
test('full round-trip GET /sitemap.xml → 200 application/xml with loc elements', async () => {
const searchMock = await startMockServer(200, {
'hydra:member': [
{ 'hydra:member': [{ 'vkm:url': 'https://kme.example.com/doc-1' }] },
],
});
try {
const ctx = makeSitemapCtx({ searchUrl: searchMock.url });
await proxyScript.runInContext(ctx);
assert.strictEqual(ctx._res.statusCode, 200);
assert.ok(ctx._res.headers['Content-Type'].includes('application/xml'),
`Content-Type was: ${ctx._res.headers['Content-Type']}`);
assert.ok(ctx._res.body.startsWith('<?xml'), 'body should start with XML declaration');
assert.ok(ctx._res.body.includes('<loc>'), 'body should contain a loc element');
} finally {
await searchMock.close();
}
});
test('empty results round-trip → 200 application/xml with urlset and no url element', async () => {
const searchMock = await startMockServer(200, { 'hydra:member': [] });
try {
const ctx = makeSitemapCtx({ searchUrl: searchMock.url });
await proxyScript.runInContext(ctx);
assert.strictEqual(ctx._res.statusCode, 200);
assert.ok(ctx._res.headers['Content-Type'].includes('application/xml'),
`Content-Type was: ${ctx._res.headers['Content-Type']}`);
assert.ok(ctx._res.body.includes('<urlset'), 'body should contain urlset');
assert.ok(!ctx._res.body.includes('<url>'), 'body should not contain url elements for empty results');
} finally {
await searchMock.close();
}
});
test('search server returns 503 → adapter returns 502', async () => {
const searchMock = await startMockServer(503, { error: 'Service Unavailable' });
try {
const ctx = makeSitemapCtx({ searchUrl: searchMock.url });
await proxyScript.runInContext(ctx);
assert.strictEqual(ctx._res.statusCode, 502, `body was: ${ctx._res.body}`);
} finally {
await searchMock.close();
}
});
test('search server hangs > 10s → adapter returns 504 within 12s', async () => {
// Server that accepts connections but never responds
const server = await new Promise((resolve, reject) => {
const s = http.createServer(() => { /* intentionally hang */ });
s.listen(0, '127.0.0.1', () => {
const { port } = s.address();
const close = () => new Promise((res, rej) => s.close(err => err ? rej(err) : res()));
resolve({ server: s, url: `http://127.0.0.1:${port}`, close });
});
s.once('error', reject);
});
try {
const ctx = makeSitemapCtx({ searchUrl: server.url });
const start = Date.now();
await proxyScript.runInContext(ctx);
const elapsed = Date.now() - start;
assert.strictEqual(ctx._res.statusCode, 504, `body was: ${ctx._res.body}`);
assert.ok(elapsed < 12000, `Should respond within 12s, took ${elapsed}ms`);
} finally {
await server.close();
}
});
});
// ---------------------------------------------------------------------------
// Non-sitemap endpoint regression (T010)
// ---------------------------------------------------------------------------
describe('non-sitemap endpoint (regression)', () => {
test('GET / with valid OIDC credentials → 200 Authorized', async () => {
const mock = await startMockTokenServer(200, {
id_token: 'regression-token',
expires_in: 9_999_999_999,
});
try {
const res = makeRes();
const ctx = vm.createContext({
URLSearchParams,
console,
axios,
xmlBuilder,
redis: makeRedisFake(),
kme_CSA_settings: {
tokenUrl: mock.url,
username: 'user',
password: 'pass',
clientId: 'client',
scope: 'openid',
},
req: { url: '/', method: 'GET', headers: {} },
res,
});
await proxyScript.runInContext(ctx);
assert.strictEqual(res.statusCode, 200);
assert.strictEqual(res.body, 'Authorized');
} finally {
await mock.close();
}
});
});