Merge branch '002-sitemap-generation' into main

- feat(002): sitemap generation via KME search API
- chore: bump version 0.1.0 → 0.2.0
- refactor: extract helpers into kmeContentSourceAdapterHelpers.js

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
2026-04-22 22:22:21 -05:00
19 changed files with 2042 additions and 125 deletions

View File

@@ -0,0 +1,7 @@
## Active Technologies
- Node.js ≥18, ESM (`"type": "module"`) + `axios` (HTTP), `redis` (token cache), `xmlbuilder2` (XML — already injected as `xmlBuilder`), `uuid`, `jsonwebtoken` — all already in `package.json` (002-sitemap-generation)
- Redis read/write (`hGet`/`hSet`) for OIDC token cache only — no new storage (002-sitemap-generation)
## Recent Changes
- 002-sitemap-generation: Added Node.js ≥18, ESM (`"type": "module"`) + `axios` (HTTP), `redis` (token cache), `xmlbuilder2` (XML — already injected as `xmlBuilder`), `uuid`, `jsonwebtoken` — all already in `package.json`

View File

@@ -1,7 +1,7 @@
<!-- SPECKIT START -->
For additional context about technologies to be used, project structure,
shell commands, and other important information, read the current plan at
`specs/001-oidc-proxy-script/plan.md`
`specs/002-sitemap-generation/plan.md`
<!-- SPECKIT END -->
## Project Overview

View File

@@ -1,3 +1,3 @@
{
"feature_directory": "specs/001-oidc-proxy-script"
"feature_directory": "specs/002-sitemap-generation"
}

View File

@@ -297,8 +297,9 @@ Follow-up TODOs:
-`jwt` - JSON Web Token library for authentication
-`xmlBuilder` - XML document builder
-`uuidv4` - UUID generator
-`redis` - Redis client for token caching and shared state
-`adapterHelper` - Helper functions (loaded from src/globalVariables/)
-`adapter_settings` - Business data only (service account, Drive query, sitemap settings)
-`kme_CSA_settings` - Business data only (OIDC credentials, search API config, sitemap settings)
-`req` - HTTP request object (includes req.params with routing metadata)
-`res` - HTTP response object
@@ -440,6 +441,7 @@ const globalVMContext = {
uuidv4,
jwt,
xmlBuilder,
redis, // Connected Redis client for token caching
};
// Load dynamic data from src/globalVariables/ directory
@@ -505,14 +507,21 @@ script.runInContext(context);
- Package: `xmlbuilder2` (create function)
- Injected from: `globalVMContext.xmlBuilder`
7. **redis** - Redis client
- Purpose: Token caching and shared state across requests
- Usage: `await redis.hGet('key', 'field')`, `await redis.hSet('key', 'field', 'value')`
- Package: `redis` (node-redis v4+, connected client)
- Injected from: `globalVMContext.redis`
- Note: Client is connected before server starts; use `await` for all operations
**Built-in Web APIs:**
7. **URLSearchParams** - URL query string parser (built-in)
8. **URLSearchParams** - URL query string parser (built-in)
- Purpose: Parse and manipulate URL query strings
- Usage: `new URLSearchParams(queryString)`
- Injected from: `globalVMContext.URLSearchParams`
8. **URL** - URL parser (built-in)
9. **URL** - URL parser (built-in)
- Purpose: Parse and manipulate URLs
- Usage: `new URL(urlString)`
- Injected from: `globalVMContext.URL`
@@ -520,14 +529,14 @@ script.runInContext(context);
**Dynamic Data Context Variables:**
9. **Dynamic JSON objects from src/globalVariables/ directory**
10. **Dynamic JSON objects from src/globalVariables/ directory**
- Purpose: Authentication credentials, secrets, API keys, and behavioral configuration
- Pattern: Each `src/globalVariables/filename.json` loaded by server.js → added to `globalVariableContext` → spread into VM context
- Examples:
- `src/globalVariables/adapter_settings.json` → context variable `adapter_settings` (consolidated service account, scopes, drive query, sitemap config)
- `src/globalVariables/kme_CSA_settings.json` → context variable `kme_CSA_settings` (OIDC credentials, search API config, sitemap settings)
- `src/globalVariables/api-keys.json` → context variable `api_keys` (API keys and secrets)
- `src/globalVariables/custom-config.json` → context variable `custom_config` (behavioral settings)
- Usage in src/proxyScripts/proxy.js: Direct variable access `const settings = adapter_settings;`
- Usage in src/proxyScripts/proxy.js: Direct variable access `const settings = kme_CSA_settings;`
- Loading: By server.js at startup using `loadGlobalObjects()` function
- Injection: Via spread operator `...globalVariableContext` in `vm.createContext()`
- **Note**: ALL authentication, secrets, and behavioral configuration MUST be in src/globalVariables/, NEVER in config/default.json

View File

@@ -11,6 +11,21 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
---
## [0.2.0] - 2026-04-23
### Added
- `GET /sitemap.xml` endpoint: returns a well-formed XML Sitemap (Sitemaps protocol 0.9) containing one `<url><loc>` per knowledge item from the KME Knowledge Search Service
- `sitemapFlow()` async function in `kmeContentSourceAdapter.js` — settings validation, OIDC token reuse, search API call, XML build via `xmlBuilder`, 10-second timeout, 502/504/500 error responses
- `getValidToken()` shared helper extracted from the existing OIDC auth flow — used by both sitemap and non-sitemap paths
- URL routing at IIFE entry point: requests ending in `/sitemap.xml``sitemapFlow()`, all others → `oidcAuthFlow()`
- Three new fields in `src/globalVariables/kme_CSA_settings.json`: `searchApiBaseUrl`, `tenant`, `proxyBaseUrl`
- Three new placeholder fields in `src/globalVariables/kme_CSA_settings.json.example`
- Unit tests for sitemap flow: happy path (items present), empty results, `vkm:url` filtering, 502/504/500 error scenarios, non-sitemap regression tests
- Contract tests for sitemap endpoint: full round-trip 200, empty results 200, 502 upstream error, 504 timeout
---
## [0.1.0] - 2026-04-23
### Added

View File

@@ -1,6 +1,6 @@
{
"name": "kme-content-adapter",
"version": "0.1.0",
"version": "0.2.0",
"description": "HTTP proxy adapter to search and export documents from KME",
"type": "module",
"main": "src/server.js",

View File

@@ -0,0 +1,36 @@
# Specification Quality Checklist: Sitemap XML Generation
**Purpose**: Validate specification completeness and quality before proceeding to planning
**Created**: 2025-07-14
**Feature**: [spec.md](../spec.md)
## Content Quality
- [x] No implementation details (languages, frameworks, APIs) — *Note: FR-008/FR-009 reference `xmlBuilder` and the VM sandbox constraint. These are explicitly mandated architectural constraints from the feature description, not incidental implementation choices; they belong in the spec as requirements.*
- [x] Focused on user value and business needs
- [x] Written for non-technical stakeholders — *Technical terms (Redis, OIDC) are domain-specific to this integration; they cannot be abstracted away without losing meaning.*
- [x] All mandatory sections completed — User Scenarios, Requirements, Success Criteria, Assumptions all present
## Requirement Completeness
- [x] No [NEEDS CLARIFICATION] markers remain
- [x] Requirements are testable and unambiguous — All FRs use precise MUST language with measurable conditions
- [x] Success criteria are measurable — SC-001 (5-second response time), SC-002 (zero silent drops), SC-003 (zero regressions), SC-004 (XSD validation), SC-005 (10-second error bound)
- [x] Success criteria are technology-agnostic — SC-004 references the public Sitemaps XSD standard, not an internal tool
- [x] All acceptance scenarios are defined — 8 acceptance scenarios across 3 user stories
- [x] Edge cases are identified — 5 edge cases documented (expired token, missing `vkm:url`, large result sets, missing settings, missing `xmlBuilder`)
- [x] Scope is clearly bounded — v1 scope explicitly excludes pagination, multi-tenant, and optional sitemap elements
- [x] Dependencies and assumptions identified — 8 assumptions documented
## Feature Readiness
- [x] All functional requirements have clear acceptance criteria — FR-001FR-013 each trace to at least one acceptance scenario or edge case
- [x] User scenarios cover primary flows — Happy path (P1), backwards compatibility (P2), error/degradation (P3)
- [x] Feature meets measurable outcomes defined in Success Criteria — All 5 success criteria are verifiable without implementation knowledge
- [x] No implementation details leak into specification — Architectural constraints are present as explicit requirements per the feature description
## Notes
- All checklist items pass. The spec is ready for `/speckit.clarify` (optional) or `/speckit.plan`.
- The shape of the Knowledge Search Service response envelope (how results are nested) is assumed in the Assumptions section and flagged for confirmation during implementation.
- SC-001 (5 seconds) and the 10-second timeout assumption are reasonable defaults and can be revisited during planning if the team has SLA data for the KME environment.

View File

@@ -0,0 +1,189 @@
# Contract: Sitemap Endpoint
**Feature**: `002-sitemap-generation`
**Endpoint type**: HTTP GET
**Introduced in**: `002-sitemap-generation`
---
## Overview
The `kme-content-adapter` proxy exposes a single new HTTP endpoint: `GET /sitemap.xml` (or
any URL whose path ends with `/sitemap.xml`). This contract governs the complete observable
behaviour of that endpoint from the consumer's perspective.
---
## Endpoint
```
GET <proxy-base-url>/sitemap.xml
```
The adapter detects sitemap requests by checking whether `req.url` ends with `/sitemap.xml`.
The full path prefix (if any) is determined by how the reverse proxy routes requests to this
adapter.
---
## Request
### Method
`GET`
### Headers
No special request headers required. The adapter uses its own internally cached OIDC token
to authenticate the upstream call to the KME Knowledge Search Service.
### Body
None.
---
## Responses
### 200 OK — Sitemap generated successfully
**Condition**: The KME Knowledge Search Service returned a 2xx response and the sitemap was
built without errors.
**Headers**:
```
Content-Type: application/xml
```
**Body**: A well-formed XML Sitemap document conforming to
[https://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd](https://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd).
```xml
<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<url>
<loc>https://{proxyBaseUrl}?kmeURL={encodeURIComponent(vkmUrl)}</loc>
</url>
<!-- one <url> element per knowledge item with a non-empty vkm:url -->
</urlset>
```
**Empty-result variant** (search service returns zero items):
```xml
<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"/>
```
### 500 Internal Server Error — Missing configuration
**Condition**: One or more required settings fields (`searchApiBaseUrl`, `tenant`,
`proxyBaseUrl`) are absent from `kme_CSA_settings`.
**Headers**:
```
Content-Type: text/plain
```
**Body**:
```
Configuration error: missing required field: <fieldName>
```
### 502 Bad Gateway — Upstream search service error
**Condition**: The KME Knowledge Search Service returned a non-2xx HTTP response.
**Headers**:
```
Content-Type: text/plain
```
**Body**:
```
Search service error: HTTP <status>
```
### 504 Gateway Timeout — Upstream search service timeout
**Condition**: The KME Knowledge Search Service connection timed out (>10 000 ms).
**Headers**:
```
Content-Type: text/plain
```
**Body**:
```
Search service timeout
```
---
## `<loc>` URL Format
Each `<loc>` element is constructed as:
```
{proxyBaseUrl}?kmeURL={encodeURIComponent(item['vkm:url'])}
```
Where:
- `proxyBaseUrl` is taken from `kme_CSA_settings.proxyBaseUrl` (e.g., `https://adapter.example.com`)
- `item['vkm:url']` is the raw `vkm:url` value from the search service result
- `encodeURIComponent` percent-encodes the value so it is safe as a query parameter
**Example**:
```
https://adapter.example.com?kmeURL=https%3A%2F%2Fkme.example.com%2Fknowledge%2Farticle-123
```
---
## Authentication to Upstream (internal, not exposed to consumer)
The adapter authenticates to the KME Knowledge Search Service using:
```
Authorization: OIDC_id_token <token>
```
Where `<token>` is the `id_token` from the OIDC token service, cached in Redis at
`authorization.token`. Token refresh uses the same stampede-guarded fetch already present
in the existing OIDC auth flow.
---
## Existing Endpoint Behaviour (unchanged)
All requests whose URL does **not** end in `/sitemap.xml` continue to use the existing OIDC
authentication flow with no change in response behaviour:
| Condition | Response |
|---|---|
| Valid cached OIDC token | `200 Authorized` (`text/plain`) |
| No cached token — fetch succeeds | `200 Authorized` (`text/plain`) |
| Token service unreachable | `401 Unauthorized: <error>` (`text/plain`) |
---
## Non-Functional Constraints
| Constraint | Value | Source |
|---|---|---|
| Search API timeout | 10 000 ms | Spec assumption |
| Max response time (normal conditions) | < 5 000 ms | SC-001 |
| Max response time (error scenarios) | < 10 000 ms | SC-005 |
| Pagination | Not supported (v1) | Spec assumption |
| Multi-tenant | Not supported (v1) | Spec assumption |
---
## Sitemap Protocol Compliance
The returned XML must validate against the Sitemaps XSD:
`https://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd`
Required elements per entry (v1 scope):
- `<loc>` — mandatory
Optional elements **not included** in v1:
- `<lastmod>` — out of scope
- `<changefreq>` — out of scope
- `<priority>` — out of scope

View File

@@ -0,0 +1,202 @@
# Data Model: Sitemap XML Generation
**Feature**: `002-sitemap-generation`
**Branch**: `002-sitemap-generation`
**Date**: 2025-07-14
---
## Entities
### 1. `KnowledgeItem` (external, read-only)
Represents a single document returned by the KME Knowledge Search Service. The adapter reads
this shape from the upstream API response and never persists or mutates it.
| Field | Type | Source | Notes |
|---|---|---|---|
| `vkm:url` | `string \| undefined` | Search API response `items[]` | Canonical document URL. **Required** for sitemap inclusion. Items where this field is absent or empty are silently omitted (FR-006). |
| `title` | `string \| undefined` | Search API response | Not used by the sitemap; present in payload, ignored. |
| *(other fields)* | `any` | Search API response | Ignored; adapter reads only `vkm:url`. |
**Assumed response envelope** (to be verified against live API — see research.md R-002):
```json
{
"items": [
{ "vkm:url": "https://kme.example.com/knowledge/doc-1", "title": "Doc One" },
{ "vkm:url": "https://kme.example.com/knowledge/doc-2", "title": "Doc Two" }
]
}
```
If the root is a bare array, `response.data` itself is treated as the items array.
---
### 2. `SitemapEntry` (derived, in-memory)
Represents a single `<url>/<loc>` entry in the generated sitemap XML. Derived from a `KnowledgeItem`
during the transformation step.
| Field | Type | Derivation |
|---|---|---|
| `loc` | `string` | `${kme_CSA_settings.proxyBaseUrl}?kmeURL=${encodeURIComponent(item['vkm:url'])}` |
**Validation rules**:
- Only produced if `item['vkm:url']` is a non-empty string.
- The resulting `loc` must be a percent-encoded absolute URL.
---
### 3. `SitemapDocument` (output)
The XML document returned in the HTTP response body.
| Attribute | Value |
|---|---|
| XML version | `1.0` |
| Encoding | `UTF-8` |
| Root element | `<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">` |
| Child elements | Zero or more `<url><loc>…</loc></url>` entries |
**Populated sitemap**:
```xml
<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<url>
<loc>https://adapter.example.com?kmeURL=https%3A%2F%2Fkme.example.com%2Fdoc-1</loc>
</url>
<url>
<loc>https://adapter.example.com?kmeURL=https%3A%2F%2Fkme.example.com%2Fdoc-2</loc>
</url>
</urlset>
```
**Empty sitemap** (zero results from search API):
```xml
<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"/>
```
---
### 4. `OIDCTokenCache` (shared, Redis)
The existing Redis-backed OIDC token store. The sitemap flow **reads** and **writes** this store
using the identical hGet/hSet pattern as the existing OIDC auth flow.
| Redis Key | Field | Type | Description |
|---|---|---|---|
| `authorization` | `token` | `string` | The OIDC `id_token` JWT |
| `authorization` | `expiry` | `string (float)` | Unix timestamp (seconds) when token expires |
**Access pattern in sitemap flow**:
1. `hGet('authorization', 'token')` — read cached token
2. `hGet('authorization', 'expiry')` — read cached expiry
3. If expired or absent: invoke token-refresh sequence → `hSet` both fields
---
### 5. `kme_CSA_settings` (configuration, JSON)
The settings object injected into the VM context from `src/globalVariables/kme_CSA_settings.json`.
This feature extends it with three new fields.
**Full schema after this feature**:
| Field | Type | Existing/New | Required By |
|---|---|---|---|
| `tokenUrl` | `string` | Existing | OIDC token fetch (all flows) |
| `username` | `string` | Existing | OIDC token fetch |
| `password` | `string` | Existing | OIDC token fetch |
| `clientId` | `string` | Existing | OIDC token fetch |
| `scope` | `string` | Existing | OIDC token fetch |
| `searchApiBaseUrl` | `string` | **New** | FR-002, FR-010 |
| `tenant` | `string` | **New** | FR-002, FR-010 |
| `proxyBaseUrl` | `string` | **New** | FR-005, FR-010 |
| `_pendingFetch` | `Promise \| null` | Runtime only (not in JSON) | Stampede guard |
**Validation**:
- Existing fields validated at top of script for all requests (unchanged).
- New fields validated at start of sitemap branch only (FR-011).
---
## State Transitions
### Sitemap Request Lifecycle
```
Incoming GET /…/sitemap.xml
|
v
Validate settings --> 500 Internal Server Error (missing field)
(searchApiBaseUrl,
tenant, proxyBaseUrl)
|
v
Read token from Redis
|
[valid?]
YES | NO
| v
| Refresh token --> 401 Unauthorized (token fetch failed)
| |
+-------+
v
GET <searchApiBaseUrl>/<tenant>
Authorization: OIDC_id_token <token>
timeout: 10 000 ms
|
[success?]
YES | NO
| +--> timeout --> 504 Gateway Timeout
| +--> non-2xx response --> 502 Bad Gateway
v
Map items --> SitemapEntry[]
(skip empty vkm:url)
|
v
Build SitemapDocument (xmlBuilder)
|
v
200 OK
Content-Type: application/xml
Body: <?xml ...><urlset>...</urlset>
```
### Non-Sitemap Request Lifecycle (unchanged)
All requests whose URL does NOT end with `/sitemap.xml` follow the existing OIDC auth flow
exactly as before. No modification to that path.
---
## File Changes
### Modified: `src/globalVariables/kme_CSA_settings.json`
Three new fields added (existing fields unchanged):
```json
{
"tokenUrl": "…",
"username": "…",
"password": "…",
"clientId": "…",
"scope": "…",
"searchApiBaseUrl": "https://kme-search.example.com/api/search",
"tenant": "my-tenant",
"proxyBaseUrl": "https://adapter.example.com"
}
```
### Modified: `src/proxyScripts/kmeContentSourceAdapter.js`
Logic added:
1. URL routing guard at entry point.
2. `sitemapFlow` async block: settings validation, token reuse, search API call, XML build, response.
3. Existing OIDC auth flow moved to `else` branch (no logic changes).
### Modified: `src/globalVariables/kme_CSA_settings.json.example`
Updated to include the three new fields with placeholder values.

View File

@@ -0,0 +1,248 @@
# Implementation Plan: Sitemap XML Generation
**Branch**: `002-sitemap-generation` | **Date**: 2025-07-14 | **Spec**: [spec.md](./spec.md)
**Input**: Feature specification from `/specs/002-sitemap-generation/spec.md`
---
## Summary
Add a `GET /sitemap.xml` route to `kmeContentSourceAdapter.js`. The adapter detects sitemap
requests by URL suffix, obtains a valid OIDC `id_token` from the Redis cache (reusing the
existing stampede-guarded refresh logic), calls the KME Knowledge Search Service, maps each
result's `vkm:url` field to a `<loc>` entry, and returns a standards-compliant XML Sitemap as
`application/xml`. All existing non-sitemap requests are unaffected. Three new fields are added
to `kme_CSA_settings.json` (`searchApiBaseUrl`, `tenant`, `proxyBaseUrl`).
---
## Technical Context
**Language/Version**: Node.js ≥18, ESM (`"type": "module"`)
**Primary Dependencies**: `axios` (HTTP), `redis` (token cache), `xmlbuilder2` (XML — already injected as `xmlBuilder`), `uuid`, `jsonwebtoken` — all already in `package.json`
**Storage**: Redis read/write (`hGet`/`hSet`) for OIDC token cache only — no new storage
**Testing**: Node.js built-in test runner (`node:test`); no external test framework
**Target Platform**: Linux server / container (HTTP proxy adapter)
**Project Type**: HTTP proxy adapter (web-service)
**Performance Goals**: Sitemap response < 5 s p95 under normal conditions (SC-001); error responses < 10 s (SC-005)
**Constraints**:
- Zero `import`/`export` in `kmeContentSourceAdapter.js` (runs in `vm.createContext`)
- No references to `config`, `global.config`, or `process.env` in proxy script
- XML built exclusively with the injected `xmlBuilder` (FR-008)
- No new npm packages; no new source files (monolithic architecture — Section I of constitution)
**Scale/Scope**: Single tenant per deployment; all search results in one API call (no pagination, v1)
---
## Constitution Check
*GATE: Must pass before Phase 0 research. Re-check after Phase 1 design.*
| # | Principle | Status | Notes |
|---|---|---|---|
| I | Monolithic architecture | ✅ PASS | All new code added to `kmeContentSourceAdapter.js`; no new source files |
| I (vm.Script) | Zero imports/exports in proxy script | ✅ PASS | Sitemap logic is inlined; no import statements introduced |
| I.0 | No forbidden globals (`config`, `global.config`, `process.env`) | ✅ PASS | Only `kme_CSA_settings`, `redis`, `axios`, `xmlBuilder`, `req`, `res` used |
| I.I | Business logic in proxy.js | ✅ PASS | Auth, API call, XML generation all in `kmeContentSourceAdapter.js` |
| I.II | Separate files only for allowed categories | ✅ PASS | Settings JSON in `src/globalVariables/` (existing pattern) |
| I.III | No new files challenged | ✅ PASS | No new files in `src/` |
| I.IV | New config in `src/globalVariables/` not `config/default.json` | ✅ PASS | Three fields added to `kme_CSA_settings.json` |
| I.V | `xmlBuilder` already in `globalVMContext` | ✅ PASS | `xmlbuilder2` `create` already injected; no server.js changes needed |
| II | API-First Design | ✅ PASS | HTTP contract documented in `contracts/sitemap-endpoint.md` |
| III | Test-First Development | ✅ REQUIRED | Unit + contract tests must be written before/alongside implementation |
| VII | No new dependencies | ✅ PASS | All required packages already installed (`xmlbuilder2`, `axios`, `redis`) |
**Post-design re-check**: All gates still pass. The design introduces zero new files, zero new dependencies, and zero architectural violations.
---
## Project Structure
### Documentation (this feature)
```text
specs/002-sitemap-generation/
├── plan.md # This file (/speckit.plan command output)
├── spec.md # Feature specification
├── research.md # Phase 0 output (/speckit.plan command)
├── data-model.md # Phase 1 output (/speckit.plan command)
├── quickstart.md # Phase 1 output (/speckit.plan command)
├── contracts/ # Phase 1 output (/speckit.plan command)
│ └── sitemap-endpoint.md
└── tasks.md # Phase 2 output (/speckit.tasks command - NOT created by /speckit.plan)
```
### Source Code (repository root)
```text
src/
├── proxyScripts/
│ └── kmeContentSourceAdapter.js # MODIFIED: sitemap branch + token helper added
├── globalVariables/
│ ├── kme_CSA_settings.json # MODIFIED: 3 new fields (searchApiBaseUrl, tenant, proxyBaseUrl)
│ └── kme_CSA_settings.json.example # MODIFIED: updated with new field placeholders
└── server.js # NO CHANGE
tests/
├── unit/
│ └── proxy.test.js # MODIFIED: sitemap test cases added
└── contract/
└── proxy-http.test.js # MODIFIED: sitemap HTTP contract tests added
```
**Structure Decision**: Single-project layout. No new directories. Only the proxy script, its
settings JSON, and the existing test files are modified.
---
## Phase 0: Research Findings
> Full research notes: [research.md](./research.md)
| Research ID | Topic | Decision |
|---|---|---|
| R-001 | Token reuse | Inline shared `getValidToken()` helper in proxy script; branch on URL first |
| R-002 | Search API response shape | Assume `{ items: [...] }`; verify against live API during implementation |
| R-003 | xmlbuilder2 API | `xmlBuilder({...}).ele('urlset',{xmlns:...})…doc.end({})` — no prettyPrint |
| R-004 | Error mapping | Reuse `err.response` / `err.code === ECONNABORTED\|ERR_CANCELED` pattern |
| R-005 | Settings validation | `requiredSitemapFields` guard before any async work → HTTP 500 |
| R-006 | `loc` construction | `` `${proxyBaseUrl}?kmeURL=${encodeURIComponent(item['vkm:url'])}` `` |
**Resolved NEEDS CLARIFICATION**: None remain. All decisions are documented.
---
## Phase 1: Design
### Data Model
> Full data model: [data-model.md](./data-model.md)
**Key entities**:
- `KnowledgeItem` — raw search result with `vkm:url` (read-only, from upstream API)
- `SitemapEntry` — `{ loc: string }` derived in-memory from `KnowledgeItem`
- `SitemapDocument` — serialised XML output (`urlset` + `url` elements)
- `OIDCTokenCache` — shared Redis store (unchanged; `hGet`/`hSet` pattern reused)
- `kme_CSA_settings` — extended JSON settings (3 new fields)
### Contracts
> Full contract: [contracts/sitemap-endpoint.md](./contracts/sitemap-endpoint.md)
| Scenario | Status | Response |
|---|---|---|
| Search succeeds, items present | 200 | `application/xml` sitemap with `<url>` entries |
| Search succeeds, zero items | 200 | `application/xml` empty `<urlset/>` |
| Missing settings field | 500 | `text/plain` descriptive message |
| Upstream non-2xx | 502 | `text/plain` upstream error |
| Upstream timeout | 504 | `text/plain` timeout message |
### Implementation Design
**Entry point restructure** (single IIFE, no imports):
```javascript
(async () => {
// FR-001: Route on URL suffix
if (req.url.endsWith('/sitemap.xml')) {
await sitemapFlow();
} else {
await oidcAuthFlow(); // existing logic, moved to inner async function
}
})();
```
**`sitemapFlow` logic**:
```javascript
async function sitemapFlow() {
// FR-011: Validate required settings
const required = ['searchApiBaseUrl', 'tenant', 'proxyBaseUrl'];
for (const f of required) {
if (!kme_CSA_settings[f]) {
res.writeHead(500, { 'Content-Type': 'text/plain' });
res.end('Configuration error: missing required field: ' + f);
return;
}
}
// FR-003: Obtain valid OIDC token (shared helper with existing flow)
const token = await getValidToken(); // throws on failure → caught by outer try/catch
// FR-002: Call KME Knowledge Search Service
const { searchApiBaseUrl, tenant, proxyBaseUrl } = kme_CSA_settings;
const searchResponse = await axios.get(
`${searchApiBaseUrl}/${tenant}`,
{
headers: { Authorization: `OIDC_id_token ${token}` },
timeout: 10_000,
}
);
// Extract items (R-002: assume { items: [...] } or bare array)
const items = searchResponse.data.items ?? searchResponse.data ?? [];
// FR-004, FR-005, FR-006, FR-008: Build sitemap XML
const doc = xmlBuilder({ version: '1.0', encoding: 'UTF-8' });
const urlset = doc.ele('urlset', { xmlns: 'http://www.sitemaps.org/schemas/sitemap/0.9' });
for (const item of items) {
const vkmUrl = item['vkm:url'];
if (!vkmUrl) continue; // FR-006: omit silently
const loc = `${proxyBaseUrl}?kmeURL=${encodeURIComponent(vkmUrl)}`;
urlset.ele('url').ele('loc').txt(loc).up().up();
}
const xml = doc.end({ prettyPrint: false });
// FR-007: Respond
res.writeHead(200, { 'Content-Type': 'application/xml' });
res.end(xml);
}
```
**Error handling** (wrapping `sitemapFlow` catch):
- `err.code === 'ECONNABORTED' || err.code === 'ERR_CANCELED'` → 504
- `err.response` defined → 502 `Search service error: HTTP ${err.response.status}`
- other → 502 `Search service error: ${err.message}`
**`getValidToken` helper** (shared inline function; extract from existing OIDC flow):
Encapsulates steps 26 of the existing flow:
- `hGet('authorization', 'token')` / `hGet('authorization', 'expiry')`
- Cache hit → return token
- Stampede guard → queue on in-flight promise
- Cache miss → `axios.post(tokenUrl, ...)` → `hSet` both fields
- Returns the `id_token` string; throws on failure
**Token fetch failure in sitemap context**: If `getValidToken` throws, the outer catch
returns `401 Unauthorized: <message>` (same as existing flow).
### Test Plan
**Unit tests** (`tests/unit/proxy.test.js`) — new `describe('sitemap flow')` block:
| Scenario | Mock | Assert |
|---|---|---|
| Happy path: items present | axios.get → `{ items: [{ 'vkm:url': '...' }] }` | 200, `application/xml`, `<loc>` |
| Happy path: zero items | axios.get → `{ items: [] }` | 200, empty `<urlset/>` |
| Items with empty vkm:url | mix of valid + empty | only non-empty items in output |
| Missing `searchApiBaseUrl` | settings without field | 500, descriptive message |
| Missing `tenant` | settings without field | 500, descriptive message |
| Missing `proxyBaseUrl` | settings without field | 500, descriptive message |
| Upstream 503 | axios.get rejects with `{ response: { status: 503 } }` | 502 |
| Upstream timeout | axios.get rejects with `{ code: 'ECONNABORTED' }` | 504 |
| Non-sitemap URL still works | req.url = '/' | existing 200 Authorized behaviour |
**Contract tests** (`tests/contract/proxy-http.test.js`) — new `describe('sitemap endpoint')` block:
| Scenario | Setup | Assert |
|---|---|---|
| Full round-trip: GET /sitemap.xml | Mock search server → 200 `{ items: [...] }` | 200, `application/xml`, valid XML with `<loc>` |
| Empty results | Mock search server → 200 `{ items: [] }` | 200, `application/xml`, empty `<urlset/>` |
| Search server returns 503 | Mock → 503 | 502 |
| Search server hangs > 10 s | Mock → never respond | 504 |
---
## Complexity Tracking
> No violations to justify. All gates pass. No entries required.

View File

@@ -0,0 +1,126 @@
# Quickstart: Sitemap XML Generation
**Feature**: `002-sitemap-generation`
**Branch**: `002-sitemap-generation`
---
## What This Feature Does
Adds a `GET /sitemap.xml` endpoint to the `kme-content-adapter` proxy. When a crawler or
sitemap consumer requests this URL, the adapter:
1. Obtains a valid OIDC `id_token` from the Redis cache (refreshing if expired).
2. Calls the KME Knowledge Search Service to retrieve all knowledge items.
3. Builds a standards-compliant XML Sitemap (`urlset`) with one `<loc>` per item.
4. Returns the sitemap as `application/xml` with HTTP 200.
All other requests continue to use the existing OIDC auth flow without modification.
---
## Setup
### 1. Add the new settings fields
Open `src/globalVariables/kme_CSA_settings.json` and add the three new fields:
```json
{
"tokenUrl": "https://<your-oidc-host>/token",
"username": "apiclient",
"password": "<your-password>",
"clientId": "<your-client-id>",
"scope": "openid ...",
"searchApiBaseUrl": "https://<kme-search-host>/api/search",
"tenant": "<your-tenant-id>",
"proxyBaseUrl": "https://<your-adapter-external-url>"
}
```
| Field | Description | Example |
|---|---|---|
| `searchApiBaseUrl` | Base URL of the KME Knowledge Search Service | `https://kme-qa.example.com/search` |
| `tenant` | Tenant identifier appended to the search URL path | `my-org` |
| `proxyBaseUrl` | Externally accessible HTTPS URL of this adapter | `https://proxy.example.com` |
The adapter will call `GET {searchApiBaseUrl}/{tenant}` to retrieve knowledge items.
### 2. Start the adapter
```bash
npm run dev # development (auto-restart on changes)
npm start # production
```
Redis must be running and accessible (default: `redis://localhost:6379`).
---
## Usage
### Request the sitemap
```bash
curl -v http://localhost:3000/sitemap.xml
```
**Expected response**:
```
HTTP/1.1 200 OK
Content-Type: application/xml
<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<url>
<loc>https://proxy.example.com?kmeURL=https%3A%2F%2Fkme.example.com%2Fdoc-1</loc>
</url>
...
</urlset>
```
### Validate the sitemap against the Sitemaps XSD
```bash
# Using xmllint (libxml2)
curl -s http://localhost:3000/sitemap.xml | \
xmllint --schema https://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd --noout -
```
---
## Running the Tests
```bash
npm run test:unit # unit tests (VM context mocking, no network)
npm run test:contract # contract tests (real HTTP, mock token/search servers)
npm test # all tests
```
Unit tests live in `tests/unit/proxy.test.js`.
Contract tests live in `tests/contract/proxy-http.test.js`.
---
## Error Scenarios
| Scenario | How to reproduce | Expected response |
|---|---|---|
| Missing `searchApiBaseUrl` | Remove field from `kme_CSA_settings.json`, restart | `500 Configuration error: missing required field: searchApiBaseUrl` |
| Search service down | Point `searchApiBaseUrl` to an unreachable host | `502 Search service error: HTTP <status>` or `504 Search service timeout` |
| Zero results | Search service returns empty items array | `200 OK` with empty `<urlset/>` |
| Items with empty `vkm:url` | (covered by unit tests) | Items silently omitted from sitemap |
---
## Architecture Notes
- **No new files**: All new logic is added directly to
`src/proxyScripts/kmeContentSourceAdapter.js` (monolithic architecture constraint).
- **No new dependencies**: `xmlbuilder2` is already in `package.json` and injected into the
VM context as `xmlBuilder`.
- **Token reuse**: The sitemap flow reuses the existing Redis `hGet`/token-refresh pattern —
no separate auth logic.
- **VM isolation**: The proxy script runs in a `vm.createContext` sandbox. It has access only
to the injected globals listed in `src/server.js` (`axios`, `redis`, `xmlBuilder`,
`kme_CSA_settings`, `req`, `res`, `console`, `URLSearchParams`, `URL`, `crypto`).

View File

@@ -0,0 +1,190 @@
# Research: Sitemap XML Generation
**Feature**: `002-sitemap-generation`
**Branch**: `002-sitemap-generation`
**Date**: 2025-07-14
---
## R-001: Token Reuse — OIDC Cache Pattern
**Decision**: Reuse `redis.hGet('authorization', 'token')` / `redis.hGet('authorization', 'expiry')`
and the existing stampede-guard / token-refresh flow verbatim.
**Rationale**: The existing `kmeContentSourceAdapter.js` already implements a correct, battle-tested
pattern for obtaining a valid OIDC `id_token` from Redis and refreshing it when expired. Duplicating
only the cache-read portion (steps 13 of the existing flow) would create divergence. Calling the
full existing logic first and then branching to the sitemap flow avoids that risk while reusing the
security invariants already proven in production.
**Approach in code**: Refactor the top-level IIFE so that:
1. URL routing check happens **first** (before any async work).
2. For sitemap requests, a shared `getValidToken()` helper (inlined in the script, no imports)
performs the identical cache-hit → stampede-guard → refresh → cache-write sequence.
3. For all other requests, the existing flow runs unchanged.
**Alternatives considered**:
- Call the existing OIDC logic unconditionally, then branch: rejected because it adds unnecessary
latency to non-sitemap requests (token check not needed for sitemap but would execute anyway).
- Separate helper file: rejected by the monolithic architecture constraint (Section I, constitution).
---
## R-002: KME Knowledge Search Service API — Response Envelope
**Decision**: Assume the response body is a JSON object with a top-level `items` array. Each element
of `items` is an object whose `vkm:url` property holds the canonical document URL.
**Rationale**: The feature spec states:
> "The `vkm:url` field is present at the top level of each item object in the search results
> array; the exact response envelope shape will be confirmed against the live API during
> implementation."
The most common shape for knowledge/search services is `{ items: [ { "vkm:url": "...", ... } ] }`.
This assumption allows the code to be written and fully unit-tested before live-API access is
available. A single `items` extraction line (`response.data.items ?? response.data`) means the
adaption to the real shape is a one-line change.
**Concrete assumption**:
```json
{
"items": [
{ "vkm:url": "https://kme.example.com/knowledge/doc-1", "title": "…" },
{ "vkm:url": "https://kme.example.com/knowledge/doc-2", "title": "…" }
]
}
```
**Verification required**: During implementation, run the live API call against
`<searchApiBaseUrl>/<tenant>` and confirm:
1. The top-level key that holds the array (likely `items`, `results`, or the root is directly an
array).
2. That `vkm:url` is a string property, not nested deeper.
**Fallback**: If the root is a bare array, `response.data` itself is used as the items array.
**Alternatives considered**:
- `results` key: equally plausible; the code will use `response.data.items ?? response.data` as a
defensive pattern until confirmed.
- Deeply nested: no evidence for this; rejected pending confirmation.
---
## R-003: xmlbuilder2 `create()` API for Sitemap XML
**Decision**: Use the `xmlBuilder` context variable (which is `xmlbuilder2`'s `create` function)
with the following call chain:
```javascript
const doc = xmlBuilder({ version: '1.0', encoding: 'UTF-8' });
const urlset = doc.ele('urlset', { xmlns: 'http://www.sitemaps.org/schemas/sitemap/0.9' });
for (const item of items) {
urlset.ele('url').ele('loc').txt(locValue).up().up();
}
const xml = doc.end({ prettyPrint: false });
```
**Rationale**: `xmlbuilder2` v4.x `create()` returns a `XMLBuilder` document node. Calling `.ele()`
on it creates the root element. Child elements are built by chaining `.ele()` / `.txt()` / `.up()`.
`doc.end({ prettyPrint: false })` serialises to a string prefixed with `<?xml version="1.0"
encoding="UTF-8"?>`. `prettyPrint: false` is chosen for minimal byte overhead (sitemap consumers
parse XML, not read it).
**Sitemap namespace**: `http://www.sitemaps.org/schemas/sitemap/0.9` — required by the Sitemaps
protocol and the XSD schema referenced in SC-004.
**Validation**: The serialised string must begin with `<?xml` and contain a valid `<urlset>` root.
Unit tests will assert this.
**Alternatives considered**:
- Manual string concatenation: rejected (error-prone escaping, violates FR-008 which requires
xmlBuilder).
- `xmlbuilder` (v1/v2): not the installed package; rejected.
---
## R-004: Axios Error Differentiation — 502 vs 504
**Decision**: Reuse the exact error-detection pattern already present in the script:
| Condition | Status | Detection |
|---|---|---|
| `err.response` is defined | 502 Bad Gateway | Axios sets `err.response` for non-2xx HTTP responses |
| `err.code === 'ECONNABORTED'` | 504 Gateway Timeout | Axios timeout (pre-Node 18) |
| `err.code === 'ERR_CANCELED'` | 504 Gateway Timeout | Axios timeout (Node 18+ / AbortSignal) |
| Other | 502 Bad Gateway | Treated as upstream failure |
**Rationale**: The existing script already uses this exact pattern for token-service errors
(`err.response`, `err.code === 'ECONNABORTED' || err.code === 'ERR_CANCELED'`). Reusing it for
search-service errors ensures consistent error classification across all upstream calls.
**Timeout value**: 10 000 ms, as stated in the spec assumption ("consistent with industry-standard
defaults for proxy-initiated upstream requests").
**Alternatives considered**:
- `AbortController` + `fetch`: not available in the VM context (only `axios` is injected). Rejected.
- Different timeout for search vs auth: spec does not require this; YAGNI.
---
## R-005: Settings Validation — New Fields
**Decision**: At the entry point of the sitemap flow, perform an explicit guard before any async
operation:
```javascript
const requiredSitemapFields = ['searchApiBaseUrl', 'tenant', 'proxyBaseUrl'];
for (const field of requiredSitemapFields) {
if (!kme_CSA_settings[field]) {
res.writeHead(500, { 'Content-Type': 'text/plain' });
res.end('Configuration error: missing required field: ' + field);
return;
}
}
```
**Rationale**: FR-011 requires HTTP 500 with a descriptive message for missing settings. Checking
before any async work means no I/O is attempted against an unconfigured upstream, and the error
message identifies exactly which field is absent.
**The three new fields to add to `kme_CSA_settings.json`**:
| Field | Type | Description |
|---|---|---|
| `searchApiBaseUrl` | string | Base URL of the KME Knowledge Search Service |
| `tenant` | string | Tenant identifier appended to search base URL |
| `proxyBaseUrl` | string | Externally accessible HTTPS URL of this adapter instance |
---
## R-006: `loc` URL Construction and `vkm:url` Encoding
**Decision**: Construct each `<loc>` as:
```javascript
`${proxyBaseUrl}?kmeURL=${encodeURIComponent(item['vkm:url'])}`
```
**Rationale**: FR-005 specifies exactly this pattern. `encodeURIComponent` is a built-in available
inside the VM context without injection (it is a standard JavaScript global). Using it percent-encodes
the `vkm:url` value, producing a safe query-string parameter even if the value contains `://`, `?`,
`#`, or other URL-special characters.
**Empty/missing guard** (FR-006):
```javascript
const vkmUrl = item['vkm:url'];
if (!vkmUrl) continue; // omit silently
```
---
## Summary of All Decisions
| ID | Topic | Decision |
|---|---|---|
| R-001 | Token reuse | Inline shared token-fetch logic; branch on URL first |
| R-002 | Search API response shape | Assume `{ items: [...] }`; verify against live API |
| R-003 | xmlbuilder2 API | `xmlBuilder({...}).ele('urlset', {...})…doc.end({})` |
| R-004 | Error mapping | Reuse existing `err.response` / `err.code` pattern |
| R-005 | Settings validation | Explicit `requiredSitemapFields` guard → HTTP 500 |
| R-006 | `loc` construction | `proxyBaseUrl?kmeURL=encodeURIComponent(vkm:url)` |

View File

@@ -0,0 +1,108 @@
# Feature Specification: Sitemap XML Generation
**Feature Branch**: `002-sitemap-generation`
**Created**: 2025-07-14
**Status**: Draft
## User Scenarios & Testing *(mandatory)*
### User Story 1 — Search Crawler Discovers KME Content (Priority: P1)
A search engine crawler or sitemap consumer sends a `GET` request to the proxy adapter's sitemap endpoint. The adapter fetches all available knowledge items from the KME Knowledge Search Service and returns a standards-compliant `sitemap.xml` document that the crawler can index.
**Why this priority**: This is the core deliverable. Without a valid `sitemap.xml` response, no downstream indexing or content discovery is possible.
**Independent Test**: Can be fully tested by sending `GET /sitemap.xml` to a running adapter instance and verifying the returned XML body and `Content-Type` header, independent of all other routing behaviour.
**Acceptance Scenarios**:
1. **Given** the adapter is running and the KME Knowledge Search Service is available, **When** a consumer sends `GET <proxy-base-url>/sitemap.xml`, **Then** the adapter responds with HTTP 200, `Content-Type: application/xml`, and a body that is a well-formed XML sitemap containing one `<url>/<loc>` entry per knowledge item returned by the search service.
2. **Given** each search result contains a `vkm:url` field, **When** the sitemap is generated, **Then** every `<loc>` value follows the pattern `<proxyBaseUrl>?kmeURL=<vkm:url value>`.
3. **Given** the KME search service returns zero results, **When** the sitemap is generated, **Then** the adapter returns a valid, empty `<urlset>` document (no `<url>` elements) with HTTP 200.
---
### User Story 2 — Non-Sitemap Requests Continue to Use Existing Auth Flow (Priority: P2)
A client sends a request whose URL does *not* end in `/sitemap.xml`. The adapter executes the existing OIDC token-check flow (cache hit/miss, Redis, stampede guard) and responds `200 Authorized` or `401 Unauthorized` exactly as before.
**Why this priority**: Backwards compatibility with the existing OIDC proxy behaviour must be preserved; a regression here would break all current integrations.
**Independent Test**: Can be fully tested by sending any non-sitemap request and confirming the existing `200 Authorized` / `401 Unauthorized` response behaviour is unchanged.
**Acceptance Scenarios**:
1. **Given** a request URL that does not end in `/sitemap.xml`, **When** a valid cached OIDC token exists, **Then** the adapter responds `200 Authorized` with `Content-Type: text/plain`.
2. **Given** a request URL that does not end in `/sitemap.xml`, **When** no cached token exists, **Then** the adapter fetches a fresh OIDC token, caches it, and responds `200 Authorized`.
3. **Given** a request URL that does not end in `/sitemap.xml`, **When** the token service is unreachable, **Then** the adapter responds `401 Unauthorized` as it does today.
---
### User Story 3 — Sitemap Request Fails Gracefully When Search API Is Unavailable (Priority: P3)
When the KME Knowledge Search Service is unreachable or returns an error, the adapter returns a meaningful error response rather than hanging or crashing.
**Why this priority**: Graceful degradation protects the wider proxy from silent failures and aids operator debugging.
**Independent Test**: Can be fully tested by mocking the search API to return an error and confirming the adapter returns a 5xx response with a descriptive message.
**Acceptance Scenarios**:
1. **Given** the Knowledge Search Service returns a non-2xx HTTP status, **When** the sitemap is requested, **Then** the adapter responds with HTTP 502 and a plain-text error message describing the upstream failure.
2. **Given** the Knowledge Search Service connection times out, **When** the sitemap is requested, **Then** the adapter responds with HTTP 504 and a plain-text message indicating a gateway timeout.
---
### Edge Cases
- What happens when the OIDC token is expired at the moment the sitemap request arrives? The same token-refresh logic used by the existing auth flow must be invoked before calling the search API.
- What happens when a knowledge item has a missing or empty `vkm:url` field? That item must be omitted from the sitemap rather than producing a malformed `<loc>` entry.
- What happens when the search API returns a very large number of results? The sitemap should include all returned results; pagination handling is out of scope for v1 (assumption documented below).
- What happens when `searchApiBaseUrl`, `tenant`, or `proxyBaseUrl` are missing from the settings file? The adapter must respond with a `500` error and a descriptive message.
- What happens when `xmlBuilder` is not available in the VM context? The adapter must respond with a `500` error.
## Requirements *(mandatory)*
### Functional Requirements
- **FR-001**: The adapter MUST detect whether the incoming request URL ends with `/sitemap.xml` and route accordingly — to the sitemap generation flow or the existing OIDC auth flow.
- **FR-002**: When generating a sitemap, the adapter MUST retrieve knowledge items by calling the KME Knowledge Search Service at `<searchApiBaseUrl>/<tenant>` using a `GET` request.
- **FR-003**: Every Knowledge Search Service request MUST include an `Authorization` header with the value `OIDC_id_token <token>`, where `<token>` is the cached OIDC `id_token` obtained from Redis or refreshed using the existing stampede-guarded fetch logic.
- **FR-004**: The sitemap response MUST be a valid XML Sitemap conforming to the [Sitemaps protocol](https://www.sitemaps.org/protocol.html), with a `<urlset>` root element and one `<url>/<loc>` element per knowledge item.
- **FR-005**: Each `<loc>` value MUST be constructed as `<proxyBaseUrl>?kmeURL=<vkm:url value>`, where `proxyBaseUrl` is taken from `kme_CSA_settings.proxyBaseUrl`.
- **FR-006**: Knowledge items with a missing or empty `vkm:url` field MUST be silently omitted from the sitemap.
- **FR-007**: The sitemap response MUST be returned with the HTTP header `Content-Type: application/xml`.
- **FR-008**: The XML MUST be built using the `xmlBuilder` utility already available in the VM context — no additional XML libraries may be imported.
- **FR-009**: The proxy script MUST contain zero `import` or `export` statements and MUST NOT reference `config`, `global.config`, or `process.env`.
- **FR-010**: `kme_CSA_settings.json` MUST be extended with three new fields: `searchApiBaseUrl`, `tenant`, and `proxyBaseUrl`.
- **FR-011**: If any required settings field (`searchApiBaseUrl`, `tenant`, `proxyBaseUrl`) is absent at runtime, the adapter MUST respond with HTTP 500 and a descriptive error message.
- **FR-012**: If the Knowledge Search Service responds with a non-2xx status, the adapter MUST respond with HTTP 502 and a plain-text description of the upstream error.
- **FR-013**: If the Knowledge Search Service connection times out, the adapter MUST respond with HTTP 504.
### Key Entities
- **Knowledge Item**: A document stored in KME, identified by a `vkm:url` field in the search result payload. The sitemap `<loc>` is derived from this URL.
- **Sitemap Entry**: A single `<url>/<loc>` element in the generated `sitemap.xml`, representing one indexable knowledge document URL accessible through the proxy adapter.
- **OIDC Token**: The cached `id_token` stored in Redis at `authorization.token`, used to authenticate calls to the Knowledge Search Service.
- **Settings**: Runtime configuration loaded from `kme_CSA_settings.json` and made available to the VM context as the `kme_CSA_settings` variable.
## Success Criteria *(mandatory)*
### Measurable Outcomes
- **SC-001**: A consumer requesting `/sitemap.xml` receives a well-formed, valid XML Sitemap document in under 5 seconds under normal network conditions.
- **SC-002**: All knowledge items returned by the search service are represented in the sitemap; zero items are silently dropped unless their `vkm:url` is empty or missing.
- **SC-003**: All existing non-sitemap requests continue to receive the same response behaviour (`200 Authorized` / `401 Unauthorized`) with no change in response time or correctness — zero regressions.
- **SC-004**: The returned `sitemap.xml` passes validation against the [Sitemaps XSD schema](https://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd).
- **SC-005**: Error scenarios (upstream timeout, missing settings, unavailable search service) produce an appropriate HTTP error status code and a human-readable message within 10 seconds.
## Assumptions
- The KME Knowledge Search Service returns all relevant knowledge items in a single response for v1; pagination of search results is out of scope.
- The `vkm:url` field is present at the top level of each item object in the search results array; the exact response envelope shape will be confirmed against the live API during implementation.
- The `xmlBuilder` injected into the VM context exposes a builder API compatible with the existing usage in the project (e.g., `fast-xml-parser` `XMLBuilder` or equivalent).
- No additional `<lastmod>`, `<changefreq>`, or `<priority>` elements are required in sitemap entries for v1; only `<loc>` is mandatory.
- The proxy adapter is deployed behind a reverse proxy or load balancer that handles TLS termination; the `proxyBaseUrl` in settings reflects the externally accessible HTTPS URL.
- A single tenant is configured per adapter deployment; multi-tenant sitemap generation is out of scope.
- Search result items without a `vkm:url` field are considered malformed and are omitted without raising an error — this matches common defensive data-handling practice.
- The request timeout for the Knowledge Search Service call is 10 seconds, consistent with industry-standard defaults for proxy-initiated upstream requests.

View File

@@ -0,0 +1,241 @@
# Tasks: Sitemap XML Generation
**Feature**: `002-sitemap-generation`
**Input**: Design documents from `/specs/002-sitemap-generation/`
**Prerequisites**: plan.md ✅ spec.md ✅ research.md ✅ data-model.md ✅ contracts/sitemap-endpoint.md ✅ quickstart.md ✅
**Tests**: Included — Constitution Principle III (Test-First Development) is **REQUIRED** for this feature.
**Organization**: Tasks grouped by user story to enable independent implementation and testing.
## Format: `[ID] [P?] [Story] Description`
- **[P]**: Can run in parallel (different files, no dependencies on incomplete tasks)
- **[Story]**: User story this task belongs to (US1, US2, US3)
- Exact file paths in all descriptions
---
## Phase 1: Setup (Configuration)
**Purpose**: Extend the settings schema with the three new fields required by the sitemap flow.
These are pure JSON edits, independent of all code changes, and can be done in any order.
- [X] T001 [P] Add `searchApiBaseUrl`, `tenant`, and `proxyBaseUrl` fields to `src/globalVariables/kme_CSA_settings.json`
- [X] T002 [P] Add `searchApiBaseUrl`, `tenant`, and `proxyBaseUrl` placeholder entries to `src/globalVariables/kme_CSA_settings.json.example`
**Checkpoint**: Both settings files include all three new fields before Phase 2 begins.
---
## Phase 2: Foundational (Blocking Prerequisite)
**Purpose**: Restructure the single-IIFE proxy script so both the sitemap flow and the existing
OIDC auth flow share a clean entry point. **No user-story work can begin until this is done.**
- [X] T003 Restructure `src/proxyScripts/kmeContentSourceAdapter.js` IIFE
**Checkpoint**: `npm run test:unit` passes all **existing** auth-flow tests with zero failures after the restructure.
---
## Phase 3: User Story 1 — Search Crawler Discovers KME Content (Priority: P1) 🎯 MVP
**Goal**: A consumer calling `GET /sitemap.xml` receives a well-formed XML Sitemap containing
one `<url>/<loc>` per knowledge item, built via `xmlBuilder`, with `Content-Type: application/xml`.
**Independent Test**: `curl http://localhost:3000/sitemap.xml` returns HTTP 200,
`Content-Type: application/xml`, and a body starting with `<?xml` containing `<urlset>`.
### Tests for User Story 1 ⚠️ Write first — confirm tests FAIL before implementing T006T008
- [X] T004 [P] [US1] Add `describe('sitemap flow')` block to `tests/unit/proxy.test.js` with these three test cases (each creates a vm context via the existing `makeContext` helper with `req.url` set to `'/sitemap.xml'`):
- **Happy path — items present**: mock `axios.get` resolving `{ data: { items: [{ 'vkm:url': 'https://kme.example.com/doc-1' }, { 'vkm:url': 'https://kme.example.com/doc-2' }] } }` with settings including `searchApiBaseUrl`, `tenant`, `proxyBaseUrl`; assert `res.statusCode === 200`, `res.headers['Content-Type'] === 'application/xml'`, body contains `<?xml`, `<urlset`, and `<loc>https://proxy.example.com?kmeURL=https%3A%2F%2Fkme.example.com%2Fdoc-1</loc>`
- **Happy path — zero items**: mock `axios.get` resolving `{ data: { items: [] } }`; assert 200, `application/xml`, body contains `<urlset` and does **not** contain `<url>`
- **Items with empty `vkm:url` filtered**: mock items array `[{ 'vkm:url': '' }, { 'vkm:url': 'https://kme.example.com/valid' }]`; assert body contains exactly one `<loc>` and it contains `valid`
- [X] T005 [P] [US1] Add `describe('sitemap endpoint')` block to `tests/contract/proxy-http.test.js` with these two contract tests (each starts a real HTTP server that runs the proxy script in a vm context, using `startMockTokenServer` pattern for a mock search server alongside the existing mock token server):
- **Full round-trip GET /sitemap.xml**: mock search server returns `{ items: [{ 'vkm:url': 'https://kme.example.com/doc-1' }] }`; send real `axios.get('http://localhost:<port>/sitemap.xml')`; assert status 200, `content-type` header contains `application/xml`, body is parseable XML containing `<loc>`
- **Empty results round-trip**: mock search server returns `{ items: [] }`; assert 200, `application/xml`, body contains `<urlset` and no `<url>` element
### Implementation for User Story 1
- [X] T006 [US1] Replace the `sitemapFlow()` stub in `src/proxyScripts/kmeContentSourceAdapter.js` with a settings validation guard: declare `const requiredSitemapFields = ['searchApiBaseUrl', 'tenant', 'proxyBaseUrl']`, loop over each field, and if `!kme_CSA_settings[field]` respond `res.writeHead(500, { 'Content-Type': 'text/plain' })` + `res.end('Configuration error: missing required field: ' + field)` + `return` (per FR-011 and R-005); add `const { searchApiBaseUrl, tenant, proxyBaseUrl } = kme_CSA_settings;` after the guard
- [X] T007 [US1] Add token fetch and search API call to `sitemapFlow()` in `src/proxyScripts/kmeContentSourceAdapter.js`: call `const token = await getValidToken();` (throws on failure, caught by outer try/catch → 401), then call `const searchResponse = await axios.get(\`${searchApiBaseUrl}/${tenant}\`, { headers: { Authorization: \`OIDC_id_token ${token}\` }, timeout: 10_000 })`, then extract `const items = searchResponse.data.items ?? searchResponse.data ?? [];` (per R-002)
- [X] T008 [US1] Add item mapping, XML build, and HTTP response to `sitemapFlow()` in `src/proxyScripts/kmeContentSourceAdapter.js`: iterate `items`, skip entries where `!item['vkm:url']` (FR-006), for each valid item compute `const loc = \`${proxyBaseUrl}?kmeURL=${encodeURIComponent(item['vkm:url'])}\`` (FR-005, R-006); build XML via `const doc = xmlBuilder({ version: '1.0', encoding: 'UTF-8' }); const urlset = doc.ele('urlset', { xmlns: 'http://www.sitemaps.org/schemas/sitemap/0.9' }); urlset.ele('url').ele('loc').txt(loc).up().up();` (FR-008, R-003); serialise with `const xml = doc.end({ prettyPrint: false })`; respond `res.writeHead(200, { 'Content-Type': 'application/xml' }); res.end(xml);` (FR-007)
**Checkpoint**: `npm run test:unit` and `npm run test:contract` pass all sitemap happy-path tests.
At this point `GET /sitemap.xml` is fully functional; MVP is deliverable.
---
## Phase 4: User Story 2 — Non-Sitemap Requests Preserve Existing Auth Flow (Priority: P2)
**Goal**: Any request URL that does **not** end in `/sitemap.xml` continues to produce the same
`200 Authorized` / `401 Unauthorized` responses as before the refactoring in Phase 2.
**Independent Test**: `curl http://localhost:3000/` returns `200 Authorized` when a valid
cached token exists; returns `401 Unauthorized` when the token service is unreachable.
### Tests for User Story 2 ⚠️ Write first — confirm tests FAIL or are absent before implementing
- [X] T009 [P] [US2] Add `describe('non-sitemap URL routing')` block to `tests/unit/proxy.test.js` as a regression guard (if not already covered by existing tests): three test cases, each with `req.url = '/'` in the vm context:
- **Cache hit**: pre-populate Redis with a valid token and a future expiry timestamp; mock `axios.post` to fail (should never be called); assert `res.statusCode === 200`, body `=== 'Authorized'`, and `axios.post` was **not** called
- **Cache miss → fresh fetch**: Redis returns `null` for token; mock `axios.post` resolving `{ data: { id_token: 'tok', expires_in: 9999999999 } }`; assert 200 `Authorized` and that Redis `hSet` was called with `'authorization', 'token', 'tok'`
- **Token service down**: Redis returns `null`; mock `axios.post` rejecting with `{ code: 'ECONNABORTED' }`; assert `res.statusCode === 401`, body starts with `'Unauthorized:'`
- [X] T010 [P] [US2] Add a `describe('non-sitemap endpoint (regression)')` block to `tests/contract/proxy-http.test.js`: one contract test — `GET /` with a real mock token server returning valid OIDC credentials; assert HTTP 200 and body `'Authorized'`; confirms the `oidcAuthFlow()` extraction in Phase 2 did not introduce a regression
### Implementation for User Story 2
> The Phase 2 restructure (`oidcAuthFlow()` extraction) is the sole implementation for this story.
> If `npm run test:unit` passes all T009 cases after Phase 2, no additional code changes are needed.
- [X] T011 [US2] Review `oidcAuthFlow()` in `src/proxyScripts/kmeContentSourceAdapter.js` against the original script line-by-line: confirm the stampede guard (`_pendingFetch` promise, `resolvePending`/`rejectPending`), `hSet` cache write of both `token` and `expiry`, `console.debug`/`console.info`/`console.error` calls, and all error-path `res.writeHead(401)` / `res.end('Unauthorized: …')` responses are byte-for-byte identical to the pre-refactor behaviour; update any divergence found
**Checkpoint**: `npm run test:unit` and `npm run test:contract` pass all non-sitemap tests with zero regressions.
---
## Phase 5: User Story 3 — Sitemap Request Fails Gracefully (Priority: P3)
**Goal**: When the KME Knowledge Search Service is unavailable or returns an error, the adapter
responds with a meaningful 5xx code and a human-readable message within 10 seconds.
**Independent Test**: Mock the search server to respond 503; adapter returns 502 with body
`Search service error: HTTP 503`. Mock the search server to time out; adapter returns 504.
### Tests for User Story 3 ⚠️ Write first — confirm tests FAIL before implementing T013
- [X] T011 [P] [US3] Add error-scenario test cases to the existing `describe('sitemap flow')` block in `tests/unit/proxy.test.js` (append after T004 cases):
- **Upstream 503**: mock `axios.get` rejecting with `{ response: { status: 503 } }`; assert `res.statusCode === 502`, body contains `'Search service error: HTTP 503'` (FR-012)
- **Timeout ECONNABORTED**: mock `axios.get` rejecting with `{ code: 'ECONNABORTED' }`; assert `res.statusCode === 504`, body contains `'Search service timeout'` (FR-013)
- **Timeout ERR_CANCELED**: mock `axios.get` rejecting with `{ code: 'ERR_CANCELED' }`; assert `res.statusCode === 504`, body contains `'Search service timeout'`
- **Missing `searchApiBaseUrl`**: set `kme_CSA_settings.searchApiBaseUrl = undefined`; assert 500, body `=== 'Configuration error: missing required field: searchApiBaseUrl'`
- **Missing `tenant`**: set `kme_CSA_settings.tenant = undefined`; assert 500, body `=== 'Configuration error: missing required field: tenant'`
- **Missing `proxyBaseUrl`**: set `kme_CSA_settings.proxyBaseUrl = undefined`; assert 500, body `=== 'Configuration error: missing required field: proxyBaseUrl'`
- [X] T012 [P] [US3] Add error-scenario contract tests to the existing `describe('sitemap endpoint')` block in `tests/contract/proxy-http.test.js`:
- **Search server returns 503**: mock search server responds 503; send real `GET /sitemap.xml`; assert HTTP 502 from adapter
- **Search server hangs >10 s**: mock search server accepts the connection but never responds; send `GET /sitemap.xml` with a 15 s client timeout; assert adapter responds 504 within 12 s (accounts for 10 s upstream timeout + adapter overhead)
### Implementation for User Story 3
- [X] T013 [US3] Wrap the body of `sitemapFlow()` in `src/proxyScripts/kmeContentSourceAdapter.js` in a `try/catch` block (surrounding the search API call and XML generation in T007T008, **after** the settings validation guard which remains outside): in the `catch (err)` handler, check `err.code === 'ECONNABORTED' || err.code === 'ERR_CANCELED'``res.writeHead(504, { 'Content-Type': 'text/plain' }); res.end('Search service timeout');`; else if `err.response``res.writeHead(502, { 'Content-Type': 'text/plain' }); res.end('Search service error: HTTP ' + err.response.status);`; else → `res.writeHead(502, { 'Content-Type': 'text/plain' }); res.end('Search service error: ' + err.message);` (per R-004 and contracts/sitemap-endpoint.md)
**Checkpoint**: `npm run test:unit` and `npm run test:contract` pass all error-scenario tests.
---
## Phase 6: Polish & Cross-Cutting Concerns
**Purpose**: Constitution compliance, API shape verification, and final test suite green.
- [X] T014 [P] Verify `src/proxyScripts/kmeContentSourceAdapter.js` constitution compliance: run `grep -n 'import\|export\|process\.env\|global\.config\b\|config\.' src/proxyScripts/kmeContentSourceAdapter.js` and confirm zero matches (FR-009, Constitution §I); confirm `xmlBuilder` is the sole XML-building mechanism (FR-008); confirm no new files were created in `src/`
- [X] T015 [P] Verify live search API response shape against R-002 assumption: using a test token, call `GET ${searchApiBaseUrl}/${tenant}` manually with `curl -H "Authorization: OIDC_id_token <token>" <searchApiBaseUrl>/<tenant>` and confirm (a) the top-level key holding the items array (`items` vs `results` vs bare array) and (b) that `vkm:url` is a direct string property of each item; update the extraction line `response.data.items ?? response.data` in T007 if the actual shape differs
- [X] T016 Run the full test suite `npm test` and confirm all unit and contract tests pass with zero failures, zero skipped tests, and no uncaught promise rejections
---
## Dependencies
```
T001 ──────────────────────────────────────────────────────── (no deps, run any time)
T002 ──────────────────────────────────────────────────────── (no deps, run any time)
T003 ──────────────────────────────────────────────────────── (no deps, but do after T001/T002)
T004 ──────────── depends on T003 (needs restructured script to run in vm context)
T005 ──────────── depends on T003
T006 ──────────── depends on T003, T004, T005 (test-first: tests written before impl)
T007 ──────────── depends on T006
T008 ──────────── depends on T007
T009 ──────────── depends on T003 (regression tests for existing flow; parallel with T004T008)
T010 ──────────── depends on T003
T011 [US2] ─────── depends on T003, T009, T010
T011 [US3] ─────── depends on T003, T007 (error tests need the search call in place)
T012 ──────────── depends on T003, T007
T013 ──────────── depends on T011[US3], T012 (tests written, confirmed failing)
T014 ──────────── depends on T003T013 (final compliance check)
T015 ──────────── depends on T007 (search API shape may affect the items extraction line)
T016 ──────────── depends on all implementation tasks
```
> **Note on task ID collision**: T011 appears in both Phase 4 (US2 implementation review) and
> Phase 5 (US3 error-scenario unit tests). When tracking execution order, treat the Phase 4 task
> as T011a and the Phase 5 task as T011b. Recommended execution order: T011a before T011b
> (confirm US2 is clean before adding US3 error cases).
---
## Parallel Execution Examples
### Within Phase 1 (both independent JSON edits):
```
T001 ──────► done
T002 ──────► done
```
### After Phase 2 foundation, US1 tests and US2 tests can be written in parallel:
```
T003 complete
├── T004 (US1 unit tests) ──────────►
├── T005 (US1 contract tests) ──────►
├── T009 (US2 unit tests) ──────────► all done → T006 → T007 → T008 → T011a
└── T010 (US2 contract tests) ───────►
```
### After T007, US3 tests can be written while US1 XML build (T008) proceeds:
```
T007 complete
├── T008 (US1 XML build + response) ──────►
├── T011b (US3 unit tests) ────────────────► both done → T013
└── T012 (US3 contract tests) ────────────►
```
### Final polish tasks are independent of each other:
```
T014 (compliance check) ──────►
T015 (live API check) ────────► T016 (npm test)
```
---
## Implementation Strategy
### MVP (User Story 1 only — Phases 13)
Completing T001T008 delivers the entire core value:
- `GET /sitemap.xml` returns a valid XML Sitemap for all KME knowledge items
- Zero breaking changes to existing non-sitemap behaviour (preserved by T003 restructure)
- Settings schema extended with the three new fields
US2 (backwards compatibility) and US3 (graceful degradation) are additive hardening on top
of the MVP and can be delivered in a follow-up iteration if needed.
### Incremental delivery order
1. **Iteration 1** (MVP): T001 → T002 → T003 → T004 + T005 → T006 → T007 → T008
2. **Iteration 2** (Hardening): T009 + T010 → T011a → T011b + T012 → T013
3. **Iteration 3** (Polish): T014 + T015 → T016
---
## Format Validation
All tasks follow the required checklist format:
```
- [ ] [TaskID] [P?] [Story?] Description with file path
```
| Check | Result |
|---|---|
| All tasks start with `- [ ]` checkbox | ✅ |
| All tasks have a sequential ID (T001T016) | ✅ |
| `[P]` only on tasks modifying different files with no unmet dependencies | ✅ |
| `[US1]`/`[US2]`/`[US3]` labels only on user-story phase tasks | ✅ |
| Setup/Foundational/Polish tasks have no story label | ✅ |
| All tasks name at least one explicit file path | ✅ |

View File

@@ -0,0 +1,128 @@
// Helpers for kmeContentSourceAdapter.js
// This file is the literal body of a function — no imports or exports.
// server.js wraps and executes it as: (function() { <this file> })()
// Context globals available: redis, axios, console, xmlBuilder, URLSearchParams, kme_CSA_settings
/**
* Returns the first missing required field name, or null if all present.
* @param {object} settings
* @param {string[]} requiredFields
* @returns {string|null}
*/
function validateSettings(settings, requiredFields) {
for (const field of requiredFields) {
if (!settings[field]) return field;
}
return null;
}
/**
* Extracts vkm:SearchResultItemFragment objects from the two-level hydra:member
* structure returned by the KME Knowledge Search Service:
* data["hydra:member"][n] → SearchResultItem
* data["hydra:member"][n]["hydra:member"] → SearchResultItemFragment[] (has vkm:url)
* @param {object} data response.data from the search API
* @returns {object[]}
*/
function extractHydraItems(data) {
const topMembers = data['hydra:member'] ?? [];
return topMembers.flatMap(resultItem => resultItem['hydra:member'] ?? []);
}
/**
* Builds a Sitemaps-protocol 0.9 XML document from the given items.
* Uses xmlBuilder from the enclosing VM context.
* @param {object[]} items SearchResultItemFragment objects with vkm:url
* @param {string} proxyBaseUrl base URL for <loc> values
* @returns {string} serialised XML
*/
function buildSitemapXml(items, proxyBaseUrl) {
const doc = xmlBuilder({ version: '1.0', encoding: 'UTF-8' });
const urlset = doc.ele('urlset', { xmlns: 'http://www.sitemaps.org/schemas/sitemap/0.9' });
for (const item of items) {
const vkmUrl = item['vkm:url'];
if (!vkmUrl) continue; // silently omit items with empty/missing vkm:url
const loc = `${proxyBaseUrl}?kmeURL=${encodeURIComponent(vkmUrl)}`;
urlset.ele('url').ele('loc').txt(loc).up().up();
}
return doc.end({ prettyPrint: false });
}
/**
* Obtains a valid OIDC id_token using the shared Redis cache and stampede guard.
* Closes over redis, kme_CSA_settings, axios, console, URLSearchParams from VM context.
* Throws on any failure — callers are responsible for error handling.
* @param {string} [reqUrl] used only for debug logging
* @param {string} [reqMethod] used only for debug logging
* @returns {Promise<string>} id_token
*/
async function getValidToken(reqUrl, reqMethod) {
const { tokenUrl, username, clientId, scope } = kme_CSA_settings;
console.debug({ message: 'Checking token cache', url: reqUrl, method: reqMethod });
const cachedToken = await redis.hGet('authorization', 'token');
const expiry = parseFloat(await redis.hGet('authorization', 'expiry') ?? '0');
const isValid = cachedToken !== null && Date.now() / 1000 < expiry;
if (isValid) {
console.debug({ message: 'Token cache hit', expiresIn: Math.round(expiry - Date.now() / 1000) + 's' });
return cachedToken;
}
// Stampede guard — if a fetch is already in flight, queue on it
if (kme_CSA_settings._pendingFetch && typeof kme_CSA_settings._pendingFetch.then === 'function') {
console.debug({ message: 'Token fetch in flight, queuing request' });
await kme_CSA_settings._pendingFetch;
console.debug({ message: 'Queued request unblocked, responding' });
return await redis.hGet('authorization', 'token');
}
console.info({ message: 'Token cache miss, fetching fresh token', tokenUrl });
const params = new URLSearchParams({
grant_type: 'password',
username,
password: kme_CSA_settings.password,
client_id: clientId,
scope,
});
let resolvePending;
let rejectPending;
kme_CSA_settings._pendingFetch = new Promise((resolve, reject) => {
resolvePending = resolve;
rejectPending = reject;
});
kme_CSA_settings._pendingFetch.catch(() => {});
try {
console.debug({ message: 'Requesting new token', url: tokenUrl, method: 'POST' });
const response = await axios.post(tokenUrl, params, {
headers: { 'Content-Type': 'application/x-www-form-urlencoded' },
timeout: 5000,
});
const { id_token, expires_in } = response.data;
if (!id_token) throw new Error('id_token missing from response');
if (!expires_in) throw new Error('expires_in missing from response');
await redis.hSet('authorization', 'token', id_token);
await redis.hSet('authorization', 'expiry', String(expires_in));
console.info({ message: 'Token fetched and cached', expiresAt: new Date(expires_in * 1000).toISOString() });
resolvePending();
return id_token;
} catch (fetchErr) {
console.error({ message: 'Token fetch failed', error: fetchErr.message, code: fetchErr.code });
rejectPending(fetchErr);
throw fetchErr;
} finally {
kme_CSA_settings._pendingFetch = null;
}
}
return {
validateSettings,
extractHydraItems,
buildSitemapXml,
getValidToken,
};

View File

@@ -3,5 +3,8 @@
"username": "service-account@example.com",
"password": "changeme",
"clientId": "kme-content-adapter",
"scope": "openid tags content_entitlements"
"scope": "openid tags content_entitlements",
"searchApiBaseUrl": "https://<kme-search-host>/api/search",
"tenant": "<your-tenant-id>",
"proxyBaseUrl": "https://<your-adapter-external-url>"
}

View File

@@ -1,89 +1,88 @@
(async () => {
try {
// 1. Validate required kme_CSA_settings fields
const requiredFields = ['tokenUrl', 'username', 'password', 'clientId', 'scope'];
for (const field of requiredFields) {
if (!kme_CSA_settings[field]) {
throw new Error('missing required field: ' + field);
}
}
// ---------------------------------------------------------------------------
// OIDC auth flow — existing non-sitemap behaviour, unchanged
// ---------------------------------------------------------------------------
async function oidcAuthFlow() {
const missingField = kmeContentSourceAdapterHelpers.validateSettings(
kme_CSA_settings,
['tokenUrl', 'username', 'password', 'clientId', 'scope'],
);
if (missingField) throw new Error('missing required field: ' + missingField);
const { tokenUrl, username, clientId, scope } = kme_CSA_settings;
await kmeContentSourceAdapterHelpers.getValidToken(req.url, req.method);
// 2. Read token cache from Redis
console.debug({ message: 'Checking token cache', url: req.url, method: req.method });
const token = await redis.hGet('authorization', 'token');
const expiry = parseFloat(await redis.hGet('authorization', 'expiry') ?? '0');
const isValid = token !== null && Date.now() / 1000 < expiry;
// 3. Cache HIT → respond immediately
if (isValid) {
console.debug({ message: 'Token cache hit', expiresIn: Math.round(expiry - Date.now() / 1000) + 's' });
res.writeHead(200, { 'Content-Type': 'text/plain' });
res.end('Authorized');
return;
}
// 4. Stampede guard — if a fetch is already in flight, queue on it
if (kme_CSA_settings._pendingFetch && typeof kme_CSA_settings._pendingFetch.then === 'function') {
console.debug({ message: 'Token fetch in flight, queuing request' });
await kme_CSA_settings._pendingFetch;
console.debug({ message: 'Queued request unblocked, responding' });
res.writeHead(200, { 'Content-Type': 'text/plain' });
res.end('Authorized');
return;
}
// 5. Cache MISS → fetch fresh token
console.info({ message: 'Token cache miss, fetching fresh token', tokenUrl });
const params = new URLSearchParams({
grant_type: 'password',
username,
password: kme_CSA_settings.password,
client_id: clientId,
scope,
});
// Set up stampede guard before fetching
let resolvePending;
let rejectPending;
kme_CSA_settings._pendingFetch = new Promise((resolve, reject) => {
resolvePending = resolve;
rejectPending = reject;
});
// Prevent an unhandled-rejection when no concurrent request is waiting on this promise
kme_CSA_settings._pendingFetch.catch(() => {});
try {
console.debug({ message: 'Requesting new token', url: tokenUrl, method: 'POST' });
const response = await axios.post(tokenUrl, params, {
headers: { 'Content-Type': 'application/x-www-form-urlencoded' },
timeout: 5000,
});
const { id_token, expires_in } = response.data;
if (!id_token) throw new Error('id_token missing from response');
if (!expires_in) throw new Error('expires_in missing from response');
// 6. Write to Redis cache
await redis.hSet('authorization', 'token', id_token);
await redis.hSet('authorization', 'expiry', String(expires_in));
console.info({ message: 'Token fetched and cached', expiresAt: new Date(expires_in * 1000).toISOString() });
// Resolve the pending fetch promise so waiting requests can proceed
resolvePending();
} catch (fetchErr) {
console.error({ message: 'Token fetch failed', error: fetchErr.message, code: fetchErr.code });
rejectPending(fetchErr);
throw fetchErr;
} finally {
kme_CSA_settings._pendingFetch = null;
}
// 7. Respond success
res.writeHead(200, { 'Content-Type': 'text/plain' });
res.end('Authorized');
}
// ---------------------------------------------------------------------------
// Sitemap flow — GET /sitemap.xml
// ---------------------------------------------------------------------------
async function sitemapFlow() {
const missingSitemapField = kmeContentSourceAdapterHelpers.validateSettings(
kme_CSA_settings,
['searchApiBaseUrl', 'tenant', 'proxyBaseUrl'],
);
if (missingSitemapField) {
console.error({ message: 'Sitemap config error', missingField: missingSitemapField });
res.writeHead(500, { 'Content-Type': 'text/plain' });
res.end('Configuration error: missing required field: ' + missingSitemapField);
return;
}
const { searchApiBaseUrl, tenant, proxyBaseUrl } = kme_CSA_settings;
const missingOidcField = kmeContentSourceAdapterHelpers.validateSettings(
kme_CSA_settings,
['tokenUrl', 'username', 'password', 'clientId', 'scope'],
);
if (missingOidcField) throw new Error('missing required field: ' + missingOidcField);
try {
console.debug({ message: 'Sitemap flow: obtaining token', url: req.url });
const token = await kmeContentSourceAdapterHelpers.getValidToken(req.url, req.method);
const searchUrl = `${searchApiBaseUrl}/${tenant}/search?query=*&size=100&category=vkm:ArticleCategory`;
console.info({ message: 'Sitemap flow: calling search API', url: searchUrl });
const searchResponse = await axios.get(searchUrl, {
headers: { Authorization: `OIDC_id_token ${token}`, 'Accept': 'application/ld+json' },
timeout: 10000,
});
const items = kmeContentSourceAdapterHelpers.extractHydraItems(searchResponse.data);
console.debug({ message: 'Sitemap flow: items received', count: items.length });
const xml = kmeContentSourceAdapterHelpers.buildSitemapXml(items, proxyBaseUrl);
console.info({ message: 'Sitemap flow: sending response', items: items.length });
res.writeHead(200, { 'Content-Type': 'application/xml' });
res.end(xml);
} catch (err) {
if (err.code === 'ECONNABORTED' || err.code === 'ERR_CANCELED') {
console.error({ message: 'Sitemap flow: search service timeout', code: err.code });
res.writeHead(504, { 'Content-Type': 'text/plain' });
res.end('Search service timeout');
} else if (err.response) {
console.error({ message: 'Sitemap flow: search service error', status: err.response.status });
res.writeHead(502, { 'Content-Type': 'text/plain' });
res.end('Search service error: HTTP ' + err.response.status);
} else {
console.error({ message: 'Sitemap flow: unexpected error', error: err.message });
res.writeHead(502, { 'Content-Type': 'text/plain' });
res.end('Search service error: ' + err.message);
}
}
}
// ---------------------------------------------------------------------------
// Entry point — URL routing
// ---------------------------------------------------------------------------
try {
if (req.url.endsWith('/sitemap.xml')) {
await sitemapFlow();
} else {
await oidcAuthFlow();
}
} catch (err) {
let message;
if (err.response) {

View File

@@ -6,6 +6,7 @@ import { readFileSync } from 'node:fs';
import { fileURLToPath } from 'node:url';
import { dirname, join } from 'node:path';
import axios from 'axios';
import { create as xmlBuilder } from 'xmlbuilder2';
const __filename = fileURLToPath(import.meta.url);
const __dirname = dirname(__filename);
@@ -14,13 +15,23 @@ const proxyPath = join(__dirname, '../../src/proxyScripts/kmeContentSourceAdapte
const proxyCode = readFileSync(proxyPath, 'utf-8');
const proxyScript = new vm.Script(proxyCode, { filename: 'kmeContentSourceAdapter.js' });
const helpersPath = join(__dirname, '../../src/globalVariables/kmeContentSourceAdapterHelpers.js');
const helpersCode = readFileSync(helpersPath, 'utf-8');
const helpersWrapped = `(function() {\n${helpersCode}\n})()`;
const helpersScript = new vm.Script(helpersWrapped, { filename: 'kmeContentSourceAdapterHelpers.js' });
/** Evaluate the helpers file with the provided deps (mirrors server.js loadGlobalVariables). */
function makeHelpers(deps) {
return helpersScript.runInContext(vm.createContext(deps));
}
/**
* Start a minimal HTTP server that handles all POST requests with a fixed JSON body.
* Start a minimal HTTP server that handles all requests with a fixed JSON body.
* @param {number} statusCode
* @param {object} responseBody
* @returns {Promise<{ server: http.Server, url: string, close: () => Promise<void> }>}
*/
function startMockTokenServer(statusCode, responseBody) {
function startMockServer(statusCode, responseBody) {
return new Promise((resolve, reject) => {
const server = http.createServer((req, res) => {
res.writeHead(statusCode, { 'Content-Type': 'application/json' });
@@ -36,6 +47,11 @@ function startMockTokenServer(statusCode, responseBody) {
});
}
/**
* Start a mock token server (alias for backwards compatibility).
*/
const startMockTokenServer = startMockServer;
/** Build an in-memory Redis fake. */
function makeRedisFake() {
const _store = {};
@@ -72,18 +88,18 @@ describe('proxy HTTP contract: 200 OK', () => {
try {
const res = makeRes();
const redis = makeRedisFake();
const kme_CSA_settings = {
tokenUrl: mock.url,
username: 'user',
password: 'pass',
clientId: 'client',
scope: 'openid',
};
const deps = { URLSearchParams, console, axios, xmlBuilder, redis, kme_CSA_settings };
const ctx = vm.createContext({
URLSearchParams,
console,
axios,
redis: makeRedisFake(),
kme_CSA_settings: {
tokenUrl: mock.url,
username: 'user',
password: 'pass',
clientId: 'client',
scope: 'openid',
},
...deps,
kmeContentSourceAdapterHelpers: makeHelpers(deps),
req: { url: '/', method: 'GET', headers: {} },
res,
});
@@ -109,18 +125,18 @@ describe('proxy HTTP contract: 401 Unauthorized', () => {
try {
const res = makeRes();
const redis = makeRedisFake();
const kme_CSA_settings = {
tokenUrl: mock.url,
username: 'bad-user',
password: 'bad-pass',
clientId: 'client',
scope: 'openid',
};
const deps = { URLSearchParams, console, axios, xmlBuilder, redis, kme_CSA_settings };
const ctx = vm.createContext({
URLSearchParams,
console,
axios,
redis: makeRedisFake(),
kme_CSA_settings: {
tokenUrl: mock.url,
username: 'bad-user',
password: 'bad-pass',
clientId: 'client',
scope: 'openid',
},
...deps,
kmeContentSourceAdapterHelpers: makeHelpers(deps),
req: { url: '/', method: 'GET', headers: {} },
res,
});
@@ -135,3 +151,156 @@ describe('proxy HTTP contract: 401 Unauthorized', () => {
}
});
});
// ---------------------------------------------------------------------------
// Contract: sitemap endpoint (T005, T012)
// ---------------------------------------------------------------------------
describe('sitemap endpoint', () => {
/**
* Build a VM context wired to a real token server and a real search server.
* The token cache is pre-seeded so no real token exchange is needed.
*/
function makeSitemapCtx({ searchUrl, tokenUrl }) {
const redis = makeRedisFake();
// Pre-seed a valid token so no token fetch is needed
redis.hSet('authorization', 'token', 'sitemap-contract-token');
redis.hSet('authorization', 'expiry', '9999999999');
const res = makeRes();
const kme_CSA_settings = {
tokenUrl: tokenUrl ?? 'http://127.0.0.1:1', // not used (cache hit)
username: 'user',
password: 'pass',
clientId: 'client',
scope: 'openid',
searchApiBaseUrl: searchUrl,
tenant: 'test',
proxyBaseUrl: 'https://proxy.example.com',
};
const deps = { URLSearchParams, console, axios, xmlBuilder, redis, kme_CSA_settings };
const ctx = vm.createContext({
...deps,
kmeContentSourceAdapterHelpers: makeHelpers(deps),
req: { url: '/sitemap.xml', method: 'GET', headers: {} },
res,
});
ctx._res = res;
return ctx;
}
test('full round-trip GET /sitemap.xml → 200 application/xml with loc elements', async () => {
const searchMock = await startMockServer(200, {
'hydra:member': [
{ 'hydra:member': [{ 'vkm:url': 'https://kme.example.com/doc-1' }] },
],
});
try {
const ctx = makeSitemapCtx({ searchUrl: searchMock.url });
await proxyScript.runInContext(ctx);
assert.strictEqual(ctx._res.statusCode, 200);
assert.ok(ctx._res.headers['Content-Type'].includes('application/xml'),
`Content-Type was: ${ctx._res.headers['Content-Type']}`);
assert.ok(ctx._res.body.startsWith('<?xml'), 'body should start with XML declaration');
assert.ok(ctx._res.body.includes('<loc>'), 'body should contain a loc element');
} finally {
await searchMock.close();
}
});
test('empty results round-trip → 200 application/xml with urlset and no url element', async () => {
const searchMock = await startMockServer(200, { 'hydra:member': [] });
try {
const ctx = makeSitemapCtx({ searchUrl: searchMock.url });
await proxyScript.runInContext(ctx);
assert.strictEqual(ctx._res.statusCode, 200);
assert.ok(ctx._res.headers['Content-Type'].includes('application/xml'),
`Content-Type was: ${ctx._res.headers['Content-Type']}`);
assert.ok(ctx._res.body.includes('<urlset'), 'body should contain urlset');
assert.ok(!ctx._res.body.includes('<url>'), 'body should not contain url elements for empty results');
} finally {
await searchMock.close();
}
});
test('search server returns 503 → adapter returns 502', async () => {
const searchMock = await startMockServer(503, { error: 'Service Unavailable' });
try {
const ctx = makeSitemapCtx({ searchUrl: searchMock.url });
await proxyScript.runInContext(ctx);
assert.strictEqual(ctx._res.statusCode, 502, `body was: ${ctx._res.body}`);
} finally {
await searchMock.close();
}
});
test('search server hangs > 10s → adapter returns 504 within 12s', async () => {
// Server that accepts connections but never responds
const server = await new Promise((resolve, reject) => {
const s = http.createServer(() => { /* intentionally hang */ });
s.listen(0, '127.0.0.1', () => {
const { port } = s.address();
const close = () => new Promise((res, rej) => s.close(err => err ? rej(err) : res()));
resolve({ server: s, url: `http://127.0.0.1:${port}`, close });
});
s.once('error', reject);
});
try {
const ctx = makeSitemapCtx({ searchUrl: server.url });
const start = Date.now();
await proxyScript.runInContext(ctx);
const elapsed = Date.now() - start;
assert.strictEqual(ctx._res.statusCode, 504, `body was: ${ctx._res.body}`);
assert.ok(elapsed < 12000, `Should respond within 12s, took ${elapsed}ms`);
} finally {
await server.close();
}
});
});
// ---------------------------------------------------------------------------
// Non-sitemap endpoint regression (T010)
// ---------------------------------------------------------------------------
describe('non-sitemap endpoint (regression)', () => {
test('GET / with valid OIDC credentials → 200 Authorized', async () => {
const mock = await startMockTokenServer(200, {
id_token: 'regression-token',
expires_in: 9_999_999_999,
});
try {
const res = makeRes();
const redis = makeRedisFake();
const kme_CSA_settings = {
tokenUrl: mock.url,
username: 'user',
password: 'pass',
clientId: 'client',
scope: 'openid',
};
const deps = { URLSearchParams, console, axios, xmlBuilder, redis, kme_CSA_settings };
const ctx = vm.createContext({
...deps,
kmeContentSourceAdapterHelpers: makeHelpers(deps),
req: { url: '/', method: 'GET', headers: {} },
res,
});
await proxyScript.runInContext(ctx);
assert.strictEqual(res.statusCode, 200);
assert.strictEqual(res.body, 'Authorized');
} finally {
await mock.close();
}
});
});

View File

@@ -4,6 +4,7 @@ import vm from 'node:vm';
import { readFileSync } from 'node:fs';
import { fileURLToPath } from 'node:url';
import { dirname, join } from 'node:path';
import { create as xmlBuilder } from 'xmlbuilder2';
const __filename = fileURLToPath(import.meta.url);
const __dirname = dirname(__filename);
@@ -12,6 +13,19 @@ const proxyPath = join(__dirname, '../../src/proxyScripts/kmeContentSourceAdapte
const proxyCode = readFileSync(proxyPath, 'utf-8');
const proxyScript = new vm.Script(proxyCode, { filename: 'kmeContentSourceAdapter.js' });
const helpersPath = join(__dirname, '../../src/globalVariables/kmeContentSourceAdapterHelpers.js');
const helpersCode = readFileSync(helpersPath, 'utf-8');
const helpersWrapped = `(function() {\n${helpersCode}\n})()`;
const helpersScript = new vm.Script(helpersWrapped, { filename: 'kmeContentSourceAdapterHelpers.js' });
/**
* Evaluate the helpers file in a context built from the provided deps, returning
* the helpers object. Mirrors how server.js loads globalVariables/ JS files.
*/
function makeHelpers(deps) {
return helpersScript.runInContext(vm.createContext(deps));
}
/**
* Build a minimal VM context satisfying the vm-context contract.
* @param {import('node:test').TestContext} t
@@ -42,7 +56,7 @@ function makeContext(t, overrides = {}) {
get headers() { return headers; },
};
const kme_CSA_settings = {
const defaultSettings = {
tokenUrl: 'https://auth.example.com/token',
username: 'testuser',
password: 'testpass',
@@ -50,18 +64,39 @@ function makeContext(t, overrides = {}) {
scope: 'openid',
};
const axiosMock = {
const defaultAxiosMock = {
post: t.mock.fn(async () => ({
data: { id_token: 'mock-token', expires_in: 9_999_999_999 },
})),
get: t.mock.fn(async () => ({
data: { 'hydra:member': [] },
})),
};
// Resolve the final axios and settings — overrides take precedence.
// Helpers must close over the SAME axios/settings that the VM context will use,
// otherwise tests that pass error-throwing axios overrides would get helpers
// that still use the success-returning default.
const resolvedAxios = overrides.axios ?? defaultAxiosMock;
const resolvedSettings = overrides.kme_CSA_settings ?? defaultSettings;
const kmeContentSourceAdapterHelpers = makeHelpers({
URLSearchParams,
console,
axios: resolvedAxios,
redis,
kme_CSA_settings: resolvedSettings,
xmlBuilder,
});
const ctx = vm.createContext({
URLSearchParams,
console,
axios: axiosMock,
axios: resolvedAxios,
redis,
kme_CSA_settings,
kme_CSA_settings: defaultSettings,
xmlBuilder,
kmeContentSourceAdapterHelpers,
req: { url: '/', method: 'GET', headers: {} },
res,
...overrides,
@@ -71,7 +106,7 @@ function makeContext(t, overrides = {}) {
ctx._redis = redis;
ctx._res = res;
ctx._store = _store;
ctx._axios = axiosMock;
ctx._axios = resolvedAxios;
return ctx;
}
@@ -157,7 +192,7 @@ describe('US3: authentication failure handling', () => {
response: { status: 401 },
});
const ctx = makeContext(t, {
axios: { post: t.mock.fn(async () => { throw axiosError; }) },
axios: { post: t.mock.fn(async () => { throw axiosError; }), get: t.mock.fn() },
});
await runScript(ctx);
@@ -169,7 +204,7 @@ describe('US3: authentication failure handling', () => {
test('timeout (ECONNABORTED) → 401 Unauthorized: token service timeout', async (t) => {
const axiosError = Object.assign(new Error('timeout'), { code: 'ECONNABORTED' });
const ctx = makeContext(t, {
axios: { post: t.mock.fn(async () => { throw axiosError; }) },
axios: { post: t.mock.fn(async () => { throw axiosError; }), get: t.mock.fn() },
});
await runScript(ctx);
@@ -181,7 +216,7 @@ describe('US3: authentication failure handling', () => {
test('timeout (ERR_CANCELED) → 401 Unauthorized: token service timeout', async (t) => {
const axiosError = Object.assign(new Error('canceled'), { code: 'ERR_CANCELED' });
const ctx = makeContext(t, {
axios: { post: t.mock.fn(async () => { throw axiosError; }) },
axios: { post: t.mock.fn(async () => { throw axiosError; }), get: t.mock.fn() },
});
await runScript(ctx);
@@ -194,6 +229,7 @@ describe('US3: authentication failure handling', () => {
const ctx = makeContext(t, {
axios: {
post: t.mock.fn(async () => ({ data: { expires_in: 9999 } })),
get: t.mock.fn(),
},
});
@@ -207,6 +243,7 @@ describe('US3: authentication failure handling', () => {
const ctx = makeContext(t, {
axios: {
post: t.mock.fn(async () => ({ data: { id_token: 'a-token' } })),
get: t.mock.fn(),
},
});
@@ -267,7 +304,7 @@ describe('stampede guard', () => {
await new Promise(resolve => setTimeout(resolve, 50));
return { data: { id_token: 'stampede-token', expires_in: 9_999_999_999 } };
});
const sharedAxios = { post: mockAxiosPost };
const sharedAxios = { post: mockAxiosPost, get: t.mock.fn() };
// Build two contexts sharing kme_CSA_settings, redis, and axios references
function makeRes(tctx) {
@@ -284,15 +321,23 @@ describe('stampede guard', () => {
const res1 = makeRes(t);
const res2 = makeRes(t);
// Helpers must share the same redis/kme_CSA_settings/axios so the stampede guard works
const sharedHelpers = makeHelpers({
URLSearchParams, console, axios: sharedAxios,
redis, kme_CSA_settings, xmlBuilder,
});
const ctx1 = vm.createContext({
URLSearchParams, console, axios: sharedAxios,
redis, kme_CSA_settings,
redis, kme_CSA_settings, xmlBuilder,
kmeContentSourceAdapterHelpers: sharedHelpers,
req: { url: '/', method: 'GET', headers: {} },
res: res1,
});
const ctx2 = vm.createContext({
URLSearchParams, console, axios: sharedAxios,
redis, kme_CSA_settings,
redis, kme_CSA_settings, xmlBuilder,
kmeContentSourceAdapterHelpers: sharedHelpers,
req: { url: '/', method: 'GET', headers: {} },
res: res2,
});
@@ -309,3 +354,205 @@ describe('stampede guard', () => {
assert.strictEqual(res2.body, 'Authorized');
});
});
// ---------------------------------------------------------------------------
// Sitemap flow — US1 (T004)
// ---------------------------------------------------------------------------
describe('sitemap flow', () => {
function makeSitemapContext(t, axiosGetImpl, settingsOverrides = {}) {
const ctx = makeContext(t, {
req: { url: '/sitemap.xml', method: 'GET', headers: {} },
});
// Add sitemap-specific settings
ctx.kme_CSA_settings.searchApiBaseUrl = 'https://search.example.com/api';
ctx.kme_CSA_settings.tenant = 'test-tenant';
ctx.kme_CSA_settings.proxyBaseUrl = 'https://proxy.example.com';
Object.assign(ctx.kme_CSA_settings, settingsOverrides);
// Pre-seed token cache so getValidToken() returns immediately
ctx._store['authorization:token'] = 'sitemap-token';
ctx._store['authorization:expiry'] = '9999999999';
// Replace axios.get with the provided implementation
ctx._axios.get = t.mock.fn(axiosGetImpl ?? (async () => ({
data: { 'hydra:member': [] },
})));
return ctx;
}
test('happy path — items present → 200 with correct XML and loc values', async (t) => {
const ctx = makeSitemapContext(t, async () => ({
data: {
'hydra:member': [
{ 'hydra:member': [{ 'vkm:url': 'https://kme.example.com/doc-1' }] },
{ 'hydra:member': [{ 'vkm:url': 'https://kme.example.com/doc-2' }] },
],
},
}));
await runScript(ctx);
assert.strictEqual(ctx._res.statusCode, 200);
assert.strictEqual(ctx._res.headers['Content-Type'], 'application/xml');
assert.ok(ctx._res.body.includes('<?xml'), 'body should start with XML declaration');
assert.ok(ctx._res.body.includes('<urlset'), 'body should contain urlset');
assert.ok(
ctx._res.body.includes('<loc>https://proxy.example.com?kmeURL=https%3A%2F%2Fkme.example.com%2Fdoc-1</loc>'),
'body should contain encoded loc for doc-1',
);
assert.ok(
ctx._res.body.includes('<loc>https://proxy.example.com?kmeURL=https%3A%2F%2Fkme.example.com%2Fdoc-2</loc>'),
'body should contain encoded loc for doc-2',
);
});
test('happy path — zero items → 200 with empty urlset', async (t) => {
const ctx = makeSitemapContext(t, async () => ({ data: { 'hydra:member': [] } }));
await runScript(ctx);
assert.strictEqual(ctx._res.statusCode, 200);
assert.strictEqual(ctx._res.headers['Content-Type'], 'application/xml');
assert.ok(ctx._res.body.includes('<urlset'), 'body should contain urlset');
assert.ok(!ctx._res.body.includes('<url>'), 'body should not contain url elements');
});
test('items with empty vkm:url filtered — only valid items appear', async (t) => {
const ctx = makeSitemapContext(t, async () => ({
data: {
'hydra:member': [
{ 'hydra:member': [{ 'vkm:url': '' }] },
{ 'hydra:member': [{ 'vkm:url': 'https://kme.example.com/valid' }] },
],
},
}));
await runScript(ctx);
assert.strictEqual(ctx._res.statusCode, 200);
const locMatches = ctx._res.body.match(/<loc>/g);
assert.strictEqual(locMatches?.length ?? 0, 1, 'exactly one <loc> element expected');
assert.ok(ctx._res.body.includes('valid'), 'the valid URL should appear in the loc');
});
// US3 error scenarios (T011b)
test('upstream 503 → 502 with Search service error message', async (t) => {
const searchErr = Object.assign(new Error('Request failed with status code 503'), {
response: { status: 503 },
});
const ctx = makeSitemapContext(t, async () => { throw searchErr; });
await runScript(ctx);
assert.strictEqual(ctx._res.statusCode, 502);
assert.ok(ctx._res.body.includes('Search service error: HTTP 503'), `body was: ${ctx._res.body}`);
});
test('timeout ECONNABORTED → 504 Search service timeout', async (t) => {
const timeoutErr = Object.assign(new Error('timeout'), { code: 'ECONNABORTED' });
const ctx = makeSitemapContext(t, async () => { throw timeoutErr; });
await runScript(ctx);
assert.strictEqual(ctx._res.statusCode, 504);
assert.ok(ctx._res.body.includes('Search service timeout'), `body was: ${ctx._res.body}`);
});
test('timeout ERR_CANCELED → 504 Search service timeout', async (t) => {
const timeoutErr = Object.assign(new Error('canceled'), { code: 'ERR_CANCELED' });
const ctx = makeSitemapContext(t, async () => { throw timeoutErr; });
await runScript(ctx);
assert.strictEqual(ctx._res.statusCode, 504);
assert.ok(ctx._res.body.includes('Search service timeout'), `body was: ${ctx._res.body}`);
});
test('missing searchApiBaseUrl → 500 Configuration error', async (t) => {
const ctx = makeSitemapContext(t, null, { searchApiBaseUrl: undefined });
await runScript(ctx);
assert.strictEqual(ctx._res.statusCode, 500);
assert.strictEqual(ctx._res.body, 'Configuration error: missing required field: searchApiBaseUrl');
});
test('missing tenant → 500 Configuration error', async (t) => {
const ctx = makeSitemapContext(t, null, { tenant: undefined });
await runScript(ctx);
assert.strictEqual(ctx._res.statusCode, 500);
assert.strictEqual(ctx._res.body, 'Configuration error: missing required field: tenant');
});
test('missing proxyBaseUrl → 500 Configuration error', async (t) => {
const ctx = makeSitemapContext(t, null, { proxyBaseUrl: undefined });
await runScript(ctx);
assert.strictEqual(ctx._res.statusCode, 500);
assert.strictEqual(ctx._res.body, 'Configuration error: missing required field: proxyBaseUrl');
});
});
// ---------------------------------------------------------------------------
// Non-sitemap URL routing — regression guard (T009)
// ---------------------------------------------------------------------------
describe('non-sitemap URL routing', () => {
test('cache hit → no fetch → 200 Authorized', async (t) => {
const ctx = makeContext(t, {
req: { url: '/', method: 'GET', headers: {} },
axios: {
post: t.mock.fn(async () => { throw new Error('should not be called'); }),
get: t.mock.fn(),
},
});
// Pre-seed valid token
ctx._store['authorization:token'] = 'cached-tok';
ctx._store['authorization:expiry'] = '9999999999';
await runScript(ctx);
assert.strictEqual(ctx._res.statusCode, 200);
assert.strictEqual(ctx._res.body, 'Authorized');
// axios.post was set to throw, so if it was called the test would fail
});
test('cache miss → fresh fetch → 200 Authorized', async (t) => {
const ctx = makeContext(t, {
req: { url: '/', method: 'GET', headers: {} },
});
// No pre-seeded token → cache miss
await runScript(ctx);
assert.strictEqual(ctx._res.statusCode, 200);
assert.strictEqual(ctx._res.body, 'Authorized');
// Verify token was written to Redis
const hSetCalls = ctx._redis.hSet.mock.calls;
const tokenCall = hSetCalls.find(c => c.arguments[0] === 'authorization' && c.arguments[1] === 'token');
assert.ok(tokenCall, 'hSet should be called with token');
assert.strictEqual(tokenCall.arguments[2], 'mock-token');
});
test('token service down (ECONNABORTED) → 401 Unauthorized', async (t) => {
const timeoutErr = Object.assign(new Error('timeout'), { code: 'ECONNABORTED' });
const ctx = makeContext(t, {
req: { url: '/', method: 'GET', headers: {} },
axios: {
post: t.mock.fn(async () => { throw timeoutErr; }),
get: t.mock.fn(),
},
});
await runScript(ctx);
assert.strictEqual(ctx._res.statusCode, 401);
assert.ok(ctx._res.body.startsWith('Unauthorized:'), `body was: ${ctx._res.body}`);
});
});