Merge branch '002-sitemap-generation' into main
- feat(002): sitemap generation via KME search API - chore: bump version 0.1.0 → 0.2.0 - refactor: extract helpers into kmeContentSourceAdapterHelpers.js Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
7
.github/agents/copilot-instructions.md
vendored
7
.github/agents/copilot-instructions.md
vendored
@@ -0,0 +1,7 @@
|
|||||||
|
|
||||||
|
## Active Technologies
|
||||||
|
- Node.js ≥18, ESM (`"type": "module"`) + `axios` (HTTP), `redis` (token cache), `xmlbuilder2` (XML — already injected as `xmlBuilder`), `uuid`, `jsonwebtoken` — all already in `package.json` (002-sitemap-generation)
|
||||||
|
- Redis read/write (`hGet`/`hSet`) for OIDC token cache only — no new storage (002-sitemap-generation)
|
||||||
|
|
||||||
|
## Recent Changes
|
||||||
|
- 002-sitemap-generation: Added Node.js ≥18, ESM (`"type": "module"`) + `axios` (HTTP), `redis` (token cache), `xmlbuilder2` (XML — already injected as `xmlBuilder`), `uuid`, `jsonwebtoken` — all already in `package.json`
|
||||||
|
|||||||
2
.github/copilot-instructions.md
vendored
2
.github/copilot-instructions.md
vendored
@@ -1,7 +1,7 @@
|
|||||||
<!-- SPECKIT START -->
|
<!-- SPECKIT START -->
|
||||||
For additional context about technologies to be used, project structure,
|
For additional context about technologies to be used, project structure,
|
||||||
shell commands, and other important information, read the current plan at
|
shell commands, and other important information, read the current plan at
|
||||||
`specs/001-oidc-proxy-script/plan.md`
|
`specs/002-sitemap-generation/plan.md`
|
||||||
<!-- SPECKIT END -->
|
<!-- SPECKIT END -->
|
||||||
|
|
||||||
## Project Overview
|
## Project Overview
|
||||||
|
|||||||
@@ -1,3 +1,3 @@
|
|||||||
{
|
{
|
||||||
"feature_directory": "specs/001-oidc-proxy-script"
|
"feature_directory": "specs/002-sitemap-generation"
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -297,8 +297,9 @@ Follow-up TODOs:
|
|||||||
- ✅ `jwt` - JSON Web Token library for authentication
|
- ✅ `jwt` - JSON Web Token library for authentication
|
||||||
- ✅ `xmlBuilder` - XML document builder
|
- ✅ `xmlBuilder` - XML document builder
|
||||||
- ✅ `uuidv4` - UUID generator
|
- ✅ `uuidv4` - UUID generator
|
||||||
|
- ✅ `redis` - Redis client for token caching and shared state
|
||||||
- ✅ `adapterHelper` - Helper functions (loaded from src/globalVariables/)
|
- ✅ `adapterHelper` - Helper functions (loaded from src/globalVariables/)
|
||||||
- ✅ `adapter_settings` - Business data only (service account, Drive query, sitemap settings)
|
- ✅ `kme_CSA_settings` - Business data only (OIDC credentials, search API config, sitemap settings)
|
||||||
- ✅ `req` - HTTP request object (includes req.params with routing metadata)
|
- ✅ `req` - HTTP request object (includes req.params with routing metadata)
|
||||||
- ✅ `res` - HTTP response object
|
- ✅ `res` - HTTP response object
|
||||||
|
|
||||||
@@ -440,6 +441,7 @@ const globalVMContext = {
|
|||||||
uuidv4,
|
uuidv4,
|
||||||
jwt,
|
jwt,
|
||||||
xmlBuilder,
|
xmlBuilder,
|
||||||
|
redis, // Connected Redis client for token caching
|
||||||
};
|
};
|
||||||
|
|
||||||
// Load dynamic data from src/globalVariables/ directory
|
// Load dynamic data from src/globalVariables/ directory
|
||||||
@@ -505,14 +507,21 @@ script.runInContext(context);
|
|||||||
- Package: `xmlbuilder2` (create function)
|
- Package: `xmlbuilder2` (create function)
|
||||||
- Injected from: `globalVMContext.xmlBuilder`
|
- Injected from: `globalVMContext.xmlBuilder`
|
||||||
|
|
||||||
|
7. **redis** - Redis client
|
||||||
|
- Purpose: Token caching and shared state across requests
|
||||||
|
- Usage: `await redis.hGet('key', 'field')`, `await redis.hSet('key', 'field', 'value')`
|
||||||
|
- Package: `redis` (node-redis v4+, connected client)
|
||||||
|
- Injected from: `globalVMContext.redis`
|
||||||
|
- Note: Client is connected before server starts; use `await` for all operations
|
||||||
|
|
||||||
**Built-in Web APIs:**
|
**Built-in Web APIs:**
|
||||||
|
|
||||||
7. **URLSearchParams** - URL query string parser (built-in)
|
8. **URLSearchParams** - URL query string parser (built-in)
|
||||||
- Purpose: Parse and manipulate URL query strings
|
- Purpose: Parse and manipulate URL query strings
|
||||||
- Usage: `new URLSearchParams(queryString)`
|
- Usage: `new URLSearchParams(queryString)`
|
||||||
- Injected from: `globalVMContext.URLSearchParams`
|
- Injected from: `globalVMContext.URLSearchParams`
|
||||||
|
|
||||||
8. **URL** - URL parser (built-in)
|
9. **URL** - URL parser (built-in)
|
||||||
- Purpose: Parse and manipulate URLs
|
- Purpose: Parse and manipulate URLs
|
||||||
- Usage: `new URL(urlString)`
|
- Usage: `new URL(urlString)`
|
||||||
- Injected from: `globalVMContext.URL`
|
- Injected from: `globalVMContext.URL`
|
||||||
@@ -520,14 +529,14 @@ script.runInContext(context);
|
|||||||
|
|
||||||
**Dynamic Data Context Variables:**
|
**Dynamic Data Context Variables:**
|
||||||
|
|
||||||
9. **Dynamic JSON objects from src/globalVariables/ directory**
|
10. **Dynamic JSON objects from src/globalVariables/ directory**
|
||||||
- Purpose: Authentication credentials, secrets, API keys, and behavioral configuration
|
- Purpose: Authentication credentials, secrets, API keys, and behavioral configuration
|
||||||
- Pattern: Each `src/globalVariables/filename.json` loaded by server.js → added to `globalVariableContext` → spread into VM context
|
- Pattern: Each `src/globalVariables/filename.json` loaded by server.js → added to `globalVariableContext` → spread into VM context
|
||||||
- Examples:
|
- Examples:
|
||||||
- `src/globalVariables/adapter_settings.json` → context variable `adapter_settings` (consolidated service account, scopes, drive query, sitemap config)
|
- `src/globalVariables/kme_CSA_settings.json` → context variable `kme_CSA_settings` (OIDC credentials, search API config, sitemap settings)
|
||||||
- `src/globalVariables/api-keys.json` → context variable `api_keys` (API keys and secrets)
|
- `src/globalVariables/api-keys.json` → context variable `api_keys` (API keys and secrets)
|
||||||
- `src/globalVariables/custom-config.json` → context variable `custom_config` (behavioral settings)
|
- `src/globalVariables/custom-config.json` → context variable `custom_config` (behavioral settings)
|
||||||
- Usage in src/proxyScripts/proxy.js: Direct variable access `const settings = adapter_settings;`
|
- Usage in src/proxyScripts/proxy.js: Direct variable access `const settings = kme_CSA_settings;`
|
||||||
- Loading: By server.js at startup using `loadGlobalObjects()` function
|
- Loading: By server.js at startup using `loadGlobalObjects()` function
|
||||||
- Injection: Via spread operator `...globalVariableContext` in `vm.createContext()`
|
- Injection: Via spread operator `...globalVariableContext` in `vm.createContext()`
|
||||||
- **Note**: ALL authentication, secrets, and behavioral configuration MUST be in src/globalVariables/, NEVER in config/default.json
|
- **Note**: ALL authentication, secrets, and behavioral configuration MUST be in src/globalVariables/, NEVER in config/default.json
|
||||||
|
|||||||
15
CHANGELOG.md
15
CHANGELOG.md
@@ -11,6 +11,21 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
## [0.2.0] - 2026-04-23
|
||||||
|
|
||||||
|
### Added
|
||||||
|
|
||||||
|
- `GET /sitemap.xml` endpoint: returns a well-formed XML Sitemap (Sitemaps protocol 0.9) containing one `<url><loc>` per knowledge item from the KME Knowledge Search Service
|
||||||
|
- `sitemapFlow()` async function in `kmeContentSourceAdapter.js` — settings validation, OIDC token reuse, search API call, XML build via `xmlBuilder`, 10-second timeout, 502/504/500 error responses
|
||||||
|
- `getValidToken()` shared helper extracted from the existing OIDC auth flow — used by both sitemap and non-sitemap paths
|
||||||
|
- URL routing at IIFE entry point: requests ending in `/sitemap.xml` → `sitemapFlow()`, all others → `oidcAuthFlow()`
|
||||||
|
- Three new fields in `src/globalVariables/kme_CSA_settings.json`: `searchApiBaseUrl`, `tenant`, `proxyBaseUrl`
|
||||||
|
- Three new placeholder fields in `src/globalVariables/kme_CSA_settings.json.example`
|
||||||
|
- Unit tests for sitemap flow: happy path (items present), empty results, `vkm:url` filtering, 502/504/500 error scenarios, non-sitemap regression tests
|
||||||
|
- Contract tests for sitemap endpoint: full round-trip 200, empty results 200, 502 upstream error, 504 timeout
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
## [0.1.0] - 2026-04-23
|
## [0.1.0] - 2026-04-23
|
||||||
|
|
||||||
### Added
|
### Added
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "kme-content-adapter",
|
"name": "kme-content-adapter",
|
||||||
"version": "0.1.0",
|
"version": "0.2.0",
|
||||||
"description": "HTTP proxy adapter to search and export documents from KME",
|
"description": "HTTP proxy adapter to search and export documents from KME",
|
||||||
"type": "module",
|
"type": "module",
|
||||||
"main": "src/server.js",
|
"main": "src/server.js",
|
||||||
|
|||||||
36
specs/002-sitemap-generation/checklists/requirements.md
Normal file
36
specs/002-sitemap-generation/checklists/requirements.md
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
# Specification Quality Checklist: Sitemap XML Generation
|
||||||
|
|
||||||
|
**Purpose**: Validate specification completeness and quality before proceeding to planning
|
||||||
|
**Created**: 2025-07-14
|
||||||
|
**Feature**: [spec.md](../spec.md)
|
||||||
|
|
||||||
|
## Content Quality
|
||||||
|
|
||||||
|
- [x] No implementation details (languages, frameworks, APIs) — *Note: FR-008/FR-009 reference `xmlBuilder` and the VM sandbox constraint. These are explicitly mandated architectural constraints from the feature description, not incidental implementation choices; they belong in the spec as requirements.*
|
||||||
|
- [x] Focused on user value and business needs
|
||||||
|
- [x] Written for non-technical stakeholders — *Technical terms (Redis, OIDC) are domain-specific to this integration; they cannot be abstracted away without losing meaning.*
|
||||||
|
- [x] All mandatory sections completed — User Scenarios, Requirements, Success Criteria, Assumptions all present
|
||||||
|
|
||||||
|
## Requirement Completeness
|
||||||
|
|
||||||
|
- [x] No [NEEDS CLARIFICATION] markers remain
|
||||||
|
- [x] Requirements are testable and unambiguous — All FRs use precise MUST language with measurable conditions
|
||||||
|
- [x] Success criteria are measurable — SC-001 (5-second response time), SC-002 (zero silent drops), SC-003 (zero regressions), SC-004 (XSD validation), SC-005 (10-second error bound)
|
||||||
|
- [x] Success criteria are technology-agnostic — SC-004 references the public Sitemaps XSD standard, not an internal tool
|
||||||
|
- [x] All acceptance scenarios are defined — 8 acceptance scenarios across 3 user stories
|
||||||
|
- [x] Edge cases are identified — 5 edge cases documented (expired token, missing `vkm:url`, large result sets, missing settings, missing `xmlBuilder`)
|
||||||
|
- [x] Scope is clearly bounded — v1 scope explicitly excludes pagination, multi-tenant, and optional sitemap elements
|
||||||
|
- [x] Dependencies and assumptions identified — 8 assumptions documented
|
||||||
|
|
||||||
|
## Feature Readiness
|
||||||
|
|
||||||
|
- [x] All functional requirements have clear acceptance criteria — FR-001–FR-013 each trace to at least one acceptance scenario or edge case
|
||||||
|
- [x] User scenarios cover primary flows — Happy path (P1), backwards compatibility (P2), error/degradation (P3)
|
||||||
|
- [x] Feature meets measurable outcomes defined in Success Criteria — All 5 success criteria are verifiable without implementation knowledge
|
||||||
|
- [x] No implementation details leak into specification — Architectural constraints are present as explicit requirements per the feature description
|
||||||
|
|
||||||
|
## Notes
|
||||||
|
|
||||||
|
- All checklist items pass. The spec is ready for `/speckit.clarify` (optional) or `/speckit.plan`.
|
||||||
|
- The shape of the Knowledge Search Service response envelope (how results are nested) is assumed in the Assumptions section and flagged for confirmation during implementation.
|
||||||
|
- SC-001 (5 seconds) and the 10-second timeout assumption are reasonable defaults and can be revisited during planning if the team has SLA data for the KME environment.
|
||||||
189
specs/002-sitemap-generation/contracts/sitemap-endpoint.md
Normal file
189
specs/002-sitemap-generation/contracts/sitemap-endpoint.md
Normal file
@@ -0,0 +1,189 @@
|
|||||||
|
# Contract: Sitemap Endpoint
|
||||||
|
|
||||||
|
**Feature**: `002-sitemap-generation`
|
||||||
|
**Endpoint type**: HTTP GET
|
||||||
|
**Introduced in**: `002-sitemap-generation`
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
The `kme-content-adapter` proxy exposes a single new HTTP endpoint: `GET /sitemap.xml` (or
|
||||||
|
any URL whose path ends with `/sitemap.xml`). This contract governs the complete observable
|
||||||
|
behaviour of that endpoint from the consumer's perspective.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Endpoint
|
||||||
|
|
||||||
|
```
|
||||||
|
GET <proxy-base-url>/sitemap.xml
|
||||||
|
```
|
||||||
|
|
||||||
|
The adapter detects sitemap requests by checking whether `req.url` ends with `/sitemap.xml`.
|
||||||
|
The full path prefix (if any) is determined by how the reverse proxy routes requests to this
|
||||||
|
adapter.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Request
|
||||||
|
|
||||||
|
### Method
|
||||||
|
`GET`
|
||||||
|
|
||||||
|
### Headers
|
||||||
|
No special request headers required. The adapter uses its own internally cached OIDC token
|
||||||
|
to authenticate the upstream call to the KME Knowledge Search Service.
|
||||||
|
|
||||||
|
### Body
|
||||||
|
None.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Responses
|
||||||
|
|
||||||
|
### 200 OK — Sitemap generated successfully
|
||||||
|
|
||||||
|
**Condition**: The KME Knowledge Search Service returned a 2xx response and the sitemap was
|
||||||
|
built without errors.
|
||||||
|
|
||||||
|
**Headers**:
|
||||||
|
```
|
||||||
|
Content-Type: application/xml
|
||||||
|
```
|
||||||
|
|
||||||
|
**Body**: A well-formed XML Sitemap document conforming to
|
||||||
|
[https://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd](https://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd).
|
||||||
|
|
||||||
|
```xml
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
||||||
|
<url>
|
||||||
|
<loc>https://{proxyBaseUrl}?kmeURL={encodeURIComponent(vkmUrl)}</loc>
|
||||||
|
</url>
|
||||||
|
<!-- one <url> element per knowledge item with a non-empty vkm:url -->
|
||||||
|
</urlset>
|
||||||
|
```
|
||||||
|
|
||||||
|
**Empty-result variant** (search service returns zero items):
|
||||||
|
```xml
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"/>
|
||||||
|
```
|
||||||
|
|
||||||
|
### 500 Internal Server Error — Missing configuration
|
||||||
|
|
||||||
|
**Condition**: One or more required settings fields (`searchApiBaseUrl`, `tenant`,
|
||||||
|
`proxyBaseUrl`) are absent from `kme_CSA_settings`.
|
||||||
|
|
||||||
|
**Headers**:
|
||||||
|
```
|
||||||
|
Content-Type: text/plain
|
||||||
|
```
|
||||||
|
|
||||||
|
**Body**:
|
||||||
|
```
|
||||||
|
Configuration error: missing required field: <fieldName>
|
||||||
|
```
|
||||||
|
|
||||||
|
### 502 Bad Gateway — Upstream search service error
|
||||||
|
|
||||||
|
**Condition**: The KME Knowledge Search Service returned a non-2xx HTTP response.
|
||||||
|
|
||||||
|
**Headers**:
|
||||||
|
```
|
||||||
|
Content-Type: text/plain
|
||||||
|
```
|
||||||
|
|
||||||
|
**Body**:
|
||||||
|
```
|
||||||
|
Search service error: HTTP <status>
|
||||||
|
```
|
||||||
|
|
||||||
|
### 504 Gateway Timeout — Upstream search service timeout
|
||||||
|
|
||||||
|
**Condition**: The KME Knowledge Search Service connection timed out (>10 000 ms).
|
||||||
|
|
||||||
|
**Headers**:
|
||||||
|
```
|
||||||
|
Content-Type: text/plain
|
||||||
|
```
|
||||||
|
|
||||||
|
**Body**:
|
||||||
|
```
|
||||||
|
Search service timeout
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## `<loc>` URL Format
|
||||||
|
|
||||||
|
Each `<loc>` element is constructed as:
|
||||||
|
|
||||||
|
```
|
||||||
|
{proxyBaseUrl}?kmeURL={encodeURIComponent(item['vkm:url'])}
|
||||||
|
```
|
||||||
|
|
||||||
|
Where:
|
||||||
|
- `proxyBaseUrl` is taken from `kme_CSA_settings.proxyBaseUrl` (e.g., `https://adapter.example.com`)
|
||||||
|
- `item['vkm:url']` is the raw `vkm:url` value from the search service result
|
||||||
|
- `encodeURIComponent` percent-encodes the value so it is safe as a query parameter
|
||||||
|
|
||||||
|
**Example**:
|
||||||
|
```
|
||||||
|
https://adapter.example.com?kmeURL=https%3A%2F%2Fkme.example.com%2Fknowledge%2Farticle-123
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Authentication to Upstream (internal, not exposed to consumer)
|
||||||
|
|
||||||
|
The adapter authenticates to the KME Knowledge Search Service using:
|
||||||
|
|
||||||
|
```
|
||||||
|
Authorization: OIDC_id_token <token>
|
||||||
|
```
|
||||||
|
|
||||||
|
Where `<token>` is the `id_token` from the OIDC token service, cached in Redis at
|
||||||
|
`authorization.token`. Token refresh uses the same stampede-guarded fetch already present
|
||||||
|
in the existing OIDC auth flow.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Existing Endpoint Behaviour (unchanged)
|
||||||
|
|
||||||
|
All requests whose URL does **not** end in `/sitemap.xml` continue to use the existing OIDC
|
||||||
|
authentication flow with no change in response behaviour:
|
||||||
|
|
||||||
|
| Condition | Response |
|
||||||
|
|---|---|
|
||||||
|
| Valid cached OIDC token | `200 Authorized` (`text/plain`) |
|
||||||
|
| No cached token — fetch succeeds | `200 Authorized` (`text/plain`) |
|
||||||
|
| Token service unreachable | `401 Unauthorized: <error>` (`text/plain`) |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Non-Functional Constraints
|
||||||
|
|
||||||
|
| Constraint | Value | Source |
|
||||||
|
|---|---|---|
|
||||||
|
| Search API timeout | 10 000 ms | Spec assumption |
|
||||||
|
| Max response time (normal conditions) | < 5 000 ms | SC-001 |
|
||||||
|
| Max response time (error scenarios) | < 10 000 ms | SC-005 |
|
||||||
|
| Pagination | Not supported (v1) | Spec assumption |
|
||||||
|
| Multi-tenant | Not supported (v1) | Spec assumption |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Sitemap Protocol Compliance
|
||||||
|
|
||||||
|
The returned XML must validate against the Sitemaps XSD:
|
||||||
|
`https://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd`
|
||||||
|
|
||||||
|
Required elements per entry (v1 scope):
|
||||||
|
- `<loc>` — mandatory
|
||||||
|
|
||||||
|
Optional elements **not included** in v1:
|
||||||
|
- `<lastmod>` — out of scope
|
||||||
|
- `<changefreq>` — out of scope
|
||||||
|
- `<priority>` — out of scope
|
||||||
202
specs/002-sitemap-generation/data-model.md
Normal file
202
specs/002-sitemap-generation/data-model.md
Normal file
@@ -0,0 +1,202 @@
|
|||||||
|
# Data Model: Sitemap XML Generation
|
||||||
|
|
||||||
|
**Feature**: `002-sitemap-generation`
|
||||||
|
**Branch**: `002-sitemap-generation`
|
||||||
|
**Date**: 2025-07-14
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Entities
|
||||||
|
|
||||||
|
### 1. `KnowledgeItem` (external, read-only)
|
||||||
|
|
||||||
|
Represents a single document returned by the KME Knowledge Search Service. The adapter reads
|
||||||
|
this shape from the upstream API response and never persists or mutates it.
|
||||||
|
|
||||||
|
| Field | Type | Source | Notes |
|
||||||
|
|---|---|---|---|
|
||||||
|
| `vkm:url` | `string \| undefined` | Search API response `items[]` | Canonical document URL. **Required** for sitemap inclusion. Items where this field is absent or empty are silently omitted (FR-006). |
|
||||||
|
| `title` | `string \| undefined` | Search API response | Not used by the sitemap; present in payload, ignored. |
|
||||||
|
| *(other fields)* | `any` | Search API response | Ignored; adapter reads only `vkm:url`. |
|
||||||
|
|
||||||
|
**Assumed response envelope** (to be verified against live API — see research.md R-002):
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"items": [
|
||||||
|
{ "vkm:url": "https://kme.example.com/knowledge/doc-1", "title": "Doc One" },
|
||||||
|
{ "vkm:url": "https://kme.example.com/knowledge/doc-2", "title": "Doc Two" }
|
||||||
|
]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
If the root is a bare array, `response.data` itself is treated as the items array.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 2. `SitemapEntry` (derived, in-memory)
|
||||||
|
|
||||||
|
Represents a single `<url>/<loc>` entry in the generated sitemap XML. Derived from a `KnowledgeItem`
|
||||||
|
during the transformation step.
|
||||||
|
|
||||||
|
| Field | Type | Derivation |
|
||||||
|
|---|---|---|
|
||||||
|
| `loc` | `string` | `${kme_CSA_settings.proxyBaseUrl}?kmeURL=${encodeURIComponent(item['vkm:url'])}` |
|
||||||
|
|
||||||
|
**Validation rules**:
|
||||||
|
- Only produced if `item['vkm:url']` is a non-empty string.
|
||||||
|
- The resulting `loc` must be a percent-encoded absolute URL.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 3. `SitemapDocument` (output)
|
||||||
|
|
||||||
|
The XML document returned in the HTTP response body.
|
||||||
|
|
||||||
|
| Attribute | Value |
|
||||||
|
|---|---|
|
||||||
|
| XML version | `1.0` |
|
||||||
|
| Encoding | `UTF-8` |
|
||||||
|
| Root element | `<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">` |
|
||||||
|
| Child elements | Zero or more `<url><loc>…</loc></url>` entries |
|
||||||
|
|
||||||
|
**Populated sitemap**:
|
||||||
|
```xml
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
||||||
|
<url>
|
||||||
|
<loc>https://adapter.example.com?kmeURL=https%3A%2F%2Fkme.example.com%2Fdoc-1</loc>
|
||||||
|
</url>
|
||||||
|
<url>
|
||||||
|
<loc>https://adapter.example.com?kmeURL=https%3A%2F%2Fkme.example.com%2Fdoc-2</loc>
|
||||||
|
</url>
|
||||||
|
</urlset>
|
||||||
|
```
|
||||||
|
|
||||||
|
**Empty sitemap** (zero results from search API):
|
||||||
|
```xml
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"/>
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 4. `OIDCTokenCache` (shared, Redis)
|
||||||
|
|
||||||
|
The existing Redis-backed OIDC token store. The sitemap flow **reads** and **writes** this store
|
||||||
|
using the identical hGet/hSet pattern as the existing OIDC auth flow.
|
||||||
|
|
||||||
|
| Redis Key | Field | Type | Description |
|
||||||
|
|---|---|---|---|
|
||||||
|
| `authorization` | `token` | `string` | The OIDC `id_token` JWT |
|
||||||
|
| `authorization` | `expiry` | `string (float)` | Unix timestamp (seconds) when token expires |
|
||||||
|
|
||||||
|
**Access pattern in sitemap flow**:
|
||||||
|
1. `hGet('authorization', 'token')` — read cached token
|
||||||
|
2. `hGet('authorization', 'expiry')` — read cached expiry
|
||||||
|
3. If expired or absent: invoke token-refresh sequence → `hSet` both fields
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 5. `kme_CSA_settings` (configuration, JSON)
|
||||||
|
|
||||||
|
The settings object injected into the VM context from `src/globalVariables/kme_CSA_settings.json`.
|
||||||
|
This feature extends it with three new fields.
|
||||||
|
|
||||||
|
**Full schema after this feature**:
|
||||||
|
|
||||||
|
| Field | Type | Existing/New | Required By |
|
||||||
|
|---|---|---|---|
|
||||||
|
| `tokenUrl` | `string` | Existing | OIDC token fetch (all flows) |
|
||||||
|
| `username` | `string` | Existing | OIDC token fetch |
|
||||||
|
| `password` | `string` | Existing | OIDC token fetch |
|
||||||
|
| `clientId` | `string` | Existing | OIDC token fetch |
|
||||||
|
| `scope` | `string` | Existing | OIDC token fetch |
|
||||||
|
| `searchApiBaseUrl` | `string` | **New** | FR-002, FR-010 |
|
||||||
|
| `tenant` | `string` | **New** | FR-002, FR-010 |
|
||||||
|
| `proxyBaseUrl` | `string` | **New** | FR-005, FR-010 |
|
||||||
|
| `_pendingFetch` | `Promise \| null` | Runtime only (not in JSON) | Stampede guard |
|
||||||
|
|
||||||
|
**Validation**:
|
||||||
|
- Existing fields validated at top of script for all requests (unchanged).
|
||||||
|
- New fields validated at start of sitemap branch only (FR-011).
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## State Transitions
|
||||||
|
|
||||||
|
### Sitemap Request Lifecycle
|
||||||
|
|
||||||
|
```
|
||||||
|
Incoming GET /…/sitemap.xml
|
||||||
|
|
|
||||||
|
v
|
||||||
|
Validate settings --> 500 Internal Server Error (missing field)
|
||||||
|
(searchApiBaseUrl,
|
||||||
|
tenant, proxyBaseUrl)
|
||||||
|
|
|
||||||
|
v
|
||||||
|
Read token from Redis
|
||||||
|
|
|
||||||
|
[valid?]
|
||||||
|
YES | NO
|
||||||
|
| v
|
||||||
|
| Refresh token --> 401 Unauthorized (token fetch failed)
|
||||||
|
| |
|
||||||
|
+-------+
|
||||||
|
v
|
||||||
|
GET <searchApiBaseUrl>/<tenant>
|
||||||
|
Authorization: OIDC_id_token <token>
|
||||||
|
timeout: 10 000 ms
|
||||||
|
|
|
||||||
|
[success?]
|
||||||
|
YES | NO
|
||||||
|
| +--> timeout --> 504 Gateway Timeout
|
||||||
|
| +--> non-2xx response --> 502 Bad Gateway
|
||||||
|
v
|
||||||
|
Map items --> SitemapEntry[]
|
||||||
|
(skip empty vkm:url)
|
||||||
|
|
|
||||||
|
v
|
||||||
|
Build SitemapDocument (xmlBuilder)
|
||||||
|
|
|
||||||
|
v
|
||||||
|
200 OK
|
||||||
|
Content-Type: application/xml
|
||||||
|
Body: <?xml ...><urlset>...</urlset>
|
||||||
|
```
|
||||||
|
|
||||||
|
### Non-Sitemap Request Lifecycle (unchanged)
|
||||||
|
|
||||||
|
All requests whose URL does NOT end with `/sitemap.xml` follow the existing OIDC auth flow
|
||||||
|
exactly as before. No modification to that path.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## File Changes
|
||||||
|
|
||||||
|
### Modified: `src/globalVariables/kme_CSA_settings.json`
|
||||||
|
|
||||||
|
Three new fields added (existing fields unchanged):
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"tokenUrl": "…",
|
||||||
|
"username": "…",
|
||||||
|
"password": "…",
|
||||||
|
"clientId": "…",
|
||||||
|
"scope": "…",
|
||||||
|
"searchApiBaseUrl": "https://kme-search.example.com/api/search",
|
||||||
|
"tenant": "my-tenant",
|
||||||
|
"proxyBaseUrl": "https://adapter.example.com"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Modified: `src/proxyScripts/kmeContentSourceAdapter.js`
|
||||||
|
|
||||||
|
Logic added:
|
||||||
|
1. URL routing guard at entry point.
|
||||||
|
2. `sitemapFlow` async block: settings validation, token reuse, search API call, XML build, response.
|
||||||
|
3. Existing OIDC auth flow moved to `else` branch (no logic changes).
|
||||||
|
|
||||||
|
### Modified: `src/globalVariables/kme_CSA_settings.json.example`
|
||||||
|
|
||||||
|
Updated to include the three new fields with placeholder values.
|
||||||
248
specs/002-sitemap-generation/plan.md
Normal file
248
specs/002-sitemap-generation/plan.md
Normal file
@@ -0,0 +1,248 @@
|
|||||||
|
# Implementation Plan: Sitemap XML Generation
|
||||||
|
|
||||||
|
**Branch**: `002-sitemap-generation` | **Date**: 2025-07-14 | **Spec**: [spec.md](./spec.md)
|
||||||
|
**Input**: Feature specification from `/specs/002-sitemap-generation/spec.md`
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Summary
|
||||||
|
|
||||||
|
Add a `GET /sitemap.xml` route to `kmeContentSourceAdapter.js`. The adapter detects sitemap
|
||||||
|
requests by URL suffix, obtains a valid OIDC `id_token` from the Redis cache (reusing the
|
||||||
|
existing stampede-guarded refresh logic), calls the KME Knowledge Search Service, maps each
|
||||||
|
result's `vkm:url` field to a `<loc>` entry, and returns a standards-compliant XML Sitemap as
|
||||||
|
`application/xml`. All existing non-sitemap requests are unaffected. Three new fields are added
|
||||||
|
to `kme_CSA_settings.json` (`searchApiBaseUrl`, `tenant`, `proxyBaseUrl`).
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Technical Context
|
||||||
|
|
||||||
|
**Language/Version**: Node.js ≥18, ESM (`"type": "module"`)
|
||||||
|
**Primary Dependencies**: `axios` (HTTP), `redis` (token cache), `xmlbuilder2` (XML — already injected as `xmlBuilder`), `uuid`, `jsonwebtoken` — all already in `package.json`
|
||||||
|
**Storage**: Redis read/write (`hGet`/`hSet`) for OIDC token cache only — no new storage
|
||||||
|
**Testing**: Node.js built-in test runner (`node:test`); no external test framework
|
||||||
|
**Target Platform**: Linux server / container (HTTP proxy adapter)
|
||||||
|
**Project Type**: HTTP proxy adapter (web-service)
|
||||||
|
**Performance Goals**: Sitemap response < 5 s p95 under normal conditions (SC-001); error responses < 10 s (SC-005)
|
||||||
|
**Constraints**:
|
||||||
|
- Zero `import`/`export` in `kmeContentSourceAdapter.js` (runs in `vm.createContext`)
|
||||||
|
- No references to `config`, `global.config`, or `process.env` in proxy script
|
||||||
|
- XML built exclusively with the injected `xmlBuilder` (FR-008)
|
||||||
|
- No new npm packages; no new source files (monolithic architecture — Section I of constitution)
|
||||||
|
**Scale/Scope**: Single tenant per deployment; all search results in one API call (no pagination, v1)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Constitution Check
|
||||||
|
|
||||||
|
*GATE: Must pass before Phase 0 research. Re-check after Phase 1 design.*
|
||||||
|
|
||||||
|
| # | Principle | Status | Notes |
|
||||||
|
|---|---|---|---|
|
||||||
|
| I | Monolithic architecture | ✅ PASS | All new code added to `kmeContentSourceAdapter.js`; no new source files |
|
||||||
|
| I (vm.Script) | Zero imports/exports in proxy script | ✅ PASS | Sitemap logic is inlined; no import statements introduced |
|
||||||
|
| I.0 | No forbidden globals (`config`, `global.config`, `process.env`) | ✅ PASS | Only `kme_CSA_settings`, `redis`, `axios`, `xmlBuilder`, `req`, `res` used |
|
||||||
|
| I.I | Business logic in proxy.js | ✅ PASS | Auth, API call, XML generation all in `kmeContentSourceAdapter.js` |
|
||||||
|
| I.II | Separate files only for allowed categories | ✅ PASS | Settings JSON in `src/globalVariables/` (existing pattern) |
|
||||||
|
| I.III | No new files challenged | ✅ PASS | No new files in `src/` |
|
||||||
|
| I.IV | New config in `src/globalVariables/` not `config/default.json` | ✅ PASS | Three fields added to `kme_CSA_settings.json` |
|
||||||
|
| I.V | `xmlBuilder` already in `globalVMContext` | ✅ PASS | `xmlbuilder2` `create` already injected; no server.js changes needed |
|
||||||
|
| II | API-First Design | ✅ PASS | HTTP contract documented in `contracts/sitemap-endpoint.md` |
|
||||||
|
| III | Test-First Development | ✅ REQUIRED | Unit + contract tests must be written before/alongside implementation |
|
||||||
|
| VII | No new dependencies | ✅ PASS | All required packages already installed (`xmlbuilder2`, `axios`, `redis`) |
|
||||||
|
|
||||||
|
**Post-design re-check**: All gates still pass. The design introduces zero new files, zero new dependencies, and zero architectural violations.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Project Structure
|
||||||
|
|
||||||
|
### Documentation (this feature)
|
||||||
|
|
||||||
|
```text
|
||||||
|
specs/002-sitemap-generation/
|
||||||
|
├── plan.md # This file (/speckit.plan command output)
|
||||||
|
├── spec.md # Feature specification
|
||||||
|
├── research.md # Phase 0 output (/speckit.plan command)
|
||||||
|
├── data-model.md # Phase 1 output (/speckit.plan command)
|
||||||
|
├── quickstart.md # Phase 1 output (/speckit.plan command)
|
||||||
|
├── contracts/ # Phase 1 output (/speckit.plan command)
|
||||||
|
│ └── sitemap-endpoint.md
|
||||||
|
└── tasks.md # Phase 2 output (/speckit.tasks command - NOT created by /speckit.plan)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Source Code (repository root)
|
||||||
|
|
||||||
|
```text
|
||||||
|
src/
|
||||||
|
├── proxyScripts/
|
||||||
|
│ └── kmeContentSourceAdapter.js # MODIFIED: sitemap branch + token helper added
|
||||||
|
├── globalVariables/
|
||||||
|
│ ├── kme_CSA_settings.json # MODIFIED: 3 new fields (searchApiBaseUrl, tenant, proxyBaseUrl)
|
||||||
|
│ └── kme_CSA_settings.json.example # MODIFIED: updated with new field placeholders
|
||||||
|
└── server.js # NO CHANGE
|
||||||
|
|
||||||
|
tests/
|
||||||
|
├── unit/
|
||||||
|
│ └── proxy.test.js # MODIFIED: sitemap test cases added
|
||||||
|
└── contract/
|
||||||
|
└── proxy-http.test.js # MODIFIED: sitemap HTTP contract tests added
|
||||||
|
```
|
||||||
|
|
||||||
|
**Structure Decision**: Single-project layout. No new directories. Only the proxy script, its
|
||||||
|
settings JSON, and the existing test files are modified.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Phase 0: Research Findings
|
||||||
|
|
||||||
|
> Full research notes: [research.md](./research.md)
|
||||||
|
|
||||||
|
| Research ID | Topic | Decision |
|
||||||
|
|---|---|---|
|
||||||
|
| R-001 | Token reuse | Inline shared `getValidToken()` helper in proxy script; branch on URL first |
|
||||||
|
| R-002 | Search API response shape | Assume `{ items: [...] }`; verify against live API during implementation |
|
||||||
|
| R-003 | xmlbuilder2 API | `xmlBuilder({...}).ele('urlset',{xmlns:...})…doc.end({})` — no prettyPrint |
|
||||||
|
| R-004 | Error mapping | Reuse `err.response` / `err.code === ECONNABORTED\|ERR_CANCELED` pattern |
|
||||||
|
| R-005 | Settings validation | `requiredSitemapFields` guard before any async work → HTTP 500 |
|
||||||
|
| R-006 | `loc` construction | `` `${proxyBaseUrl}?kmeURL=${encodeURIComponent(item['vkm:url'])}` `` |
|
||||||
|
|
||||||
|
**Resolved NEEDS CLARIFICATION**: None remain. All decisions are documented.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Phase 1: Design
|
||||||
|
|
||||||
|
### Data Model
|
||||||
|
|
||||||
|
> Full data model: [data-model.md](./data-model.md)
|
||||||
|
|
||||||
|
**Key entities**:
|
||||||
|
- `KnowledgeItem` — raw search result with `vkm:url` (read-only, from upstream API)
|
||||||
|
- `SitemapEntry` — `{ loc: string }` derived in-memory from `KnowledgeItem`
|
||||||
|
- `SitemapDocument` — serialised XML output (`urlset` + `url` elements)
|
||||||
|
- `OIDCTokenCache` — shared Redis store (unchanged; `hGet`/`hSet` pattern reused)
|
||||||
|
- `kme_CSA_settings` — extended JSON settings (3 new fields)
|
||||||
|
|
||||||
|
### Contracts
|
||||||
|
|
||||||
|
> Full contract: [contracts/sitemap-endpoint.md](./contracts/sitemap-endpoint.md)
|
||||||
|
|
||||||
|
| Scenario | Status | Response |
|
||||||
|
|---|---|---|
|
||||||
|
| Search succeeds, items present | 200 | `application/xml` sitemap with `<url>` entries |
|
||||||
|
| Search succeeds, zero items | 200 | `application/xml` empty `<urlset/>` |
|
||||||
|
| Missing settings field | 500 | `text/plain` descriptive message |
|
||||||
|
| Upstream non-2xx | 502 | `text/plain` upstream error |
|
||||||
|
| Upstream timeout | 504 | `text/plain` timeout message |
|
||||||
|
|
||||||
|
### Implementation Design
|
||||||
|
|
||||||
|
**Entry point restructure** (single IIFE, no imports):
|
||||||
|
|
||||||
|
```javascript
|
||||||
|
(async () => {
|
||||||
|
// FR-001: Route on URL suffix
|
||||||
|
if (req.url.endsWith('/sitemap.xml')) {
|
||||||
|
await sitemapFlow();
|
||||||
|
} else {
|
||||||
|
await oidcAuthFlow(); // existing logic, moved to inner async function
|
||||||
|
}
|
||||||
|
})();
|
||||||
|
```
|
||||||
|
|
||||||
|
**`sitemapFlow` logic**:
|
||||||
|
|
||||||
|
```javascript
|
||||||
|
async function sitemapFlow() {
|
||||||
|
// FR-011: Validate required settings
|
||||||
|
const required = ['searchApiBaseUrl', 'tenant', 'proxyBaseUrl'];
|
||||||
|
for (const f of required) {
|
||||||
|
if (!kme_CSA_settings[f]) {
|
||||||
|
res.writeHead(500, { 'Content-Type': 'text/plain' });
|
||||||
|
res.end('Configuration error: missing required field: ' + f);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// FR-003: Obtain valid OIDC token (shared helper with existing flow)
|
||||||
|
const token = await getValidToken(); // throws on failure → caught by outer try/catch
|
||||||
|
|
||||||
|
// FR-002: Call KME Knowledge Search Service
|
||||||
|
const { searchApiBaseUrl, tenant, proxyBaseUrl } = kme_CSA_settings;
|
||||||
|
const searchResponse = await axios.get(
|
||||||
|
`${searchApiBaseUrl}/${tenant}`,
|
||||||
|
{
|
||||||
|
headers: { Authorization: `OIDC_id_token ${token}` },
|
||||||
|
timeout: 10_000,
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
// Extract items (R-002: assume { items: [...] } or bare array)
|
||||||
|
const items = searchResponse.data.items ?? searchResponse.data ?? [];
|
||||||
|
|
||||||
|
// FR-004, FR-005, FR-006, FR-008: Build sitemap XML
|
||||||
|
const doc = xmlBuilder({ version: '1.0', encoding: 'UTF-8' });
|
||||||
|
const urlset = doc.ele('urlset', { xmlns: 'http://www.sitemaps.org/schemas/sitemap/0.9' });
|
||||||
|
for (const item of items) {
|
||||||
|
const vkmUrl = item['vkm:url'];
|
||||||
|
if (!vkmUrl) continue; // FR-006: omit silently
|
||||||
|
const loc = `${proxyBaseUrl}?kmeURL=${encodeURIComponent(vkmUrl)}`;
|
||||||
|
urlset.ele('url').ele('loc').txt(loc).up().up();
|
||||||
|
}
|
||||||
|
const xml = doc.end({ prettyPrint: false });
|
||||||
|
|
||||||
|
// FR-007: Respond
|
||||||
|
res.writeHead(200, { 'Content-Type': 'application/xml' });
|
||||||
|
res.end(xml);
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Error handling** (wrapping `sitemapFlow` catch):
|
||||||
|
- `err.code === 'ECONNABORTED' || err.code === 'ERR_CANCELED'` → 504
|
||||||
|
- `err.response` defined → 502 `Search service error: HTTP ${err.response.status}`
|
||||||
|
- other → 502 `Search service error: ${err.message}`
|
||||||
|
|
||||||
|
**`getValidToken` helper** (shared inline function; extract from existing OIDC flow):
|
||||||
|
|
||||||
|
Encapsulates steps 2–6 of the existing flow:
|
||||||
|
- `hGet('authorization', 'token')` / `hGet('authorization', 'expiry')`
|
||||||
|
- Cache hit → return token
|
||||||
|
- Stampede guard → queue on in-flight promise
|
||||||
|
- Cache miss → `axios.post(tokenUrl, ...)` → `hSet` both fields
|
||||||
|
- Returns the `id_token` string; throws on failure
|
||||||
|
|
||||||
|
**Token fetch failure in sitemap context**: If `getValidToken` throws, the outer catch
|
||||||
|
returns `401 Unauthorized: <message>` (same as existing flow).
|
||||||
|
|
||||||
|
### Test Plan
|
||||||
|
|
||||||
|
**Unit tests** (`tests/unit/proxy.test.js`) — new `describe('sitemap flow')` block:
|
||||||
|
|
||||||
|
| Scenario | Mock | Assert |
|
||||||
|
|---|---|---|
|
||||||
|
| Happy path: items present | axios.get → `{ items: [{ 'vkm:url': '...' }] }` | 200, `application/xml`, `<loc>` |
|
||||||
|
| Happy path: zero items | axios.get → `{ items: [] }` | 200, empty `<urlset/>` |
|
||||||
|
| Items with empty vkm:url | mix of valid + empty | only non-empty items in output |
|
||||||
|
| Missing `searchApiBaseUrl` | settings without field | 500, descriptive message |
|
||||||
|
| Missing `tenant` | settings without field | 500, descriptive message |
|
||||||
|
| Missing `proxyBaseUrl` | settings without field | 500, descriptive message |
|
||||||
|
| Upstream 503 | axios.get rejects with `{ response: { status: 503 } }` | 502 |
|
||||||
|
| Upstream timeout | axios.get rejects with `{ code: 'ECONNABORTED' }` | 504 |
|
||||||
|
| Non-sitemap URL still works | req.url = '/' | existing 200 Authorized behaviour |
|
||||||
|
|
||||||
|
**Contract tests** (`tests/contract/proxy-http.test.js`) — new `describe('sitemap endpoint')` block:
|
||||||
|
|
||||||
|
| Scenario | Setup | Assert |
|
||||||
|
|---|---|---|
|
||||||
|
| Full round-trip: GET /sitemap.xml | Mock search server → 200 `{ items: [...] }` | 200, `application/xml`, valid XML with `<loc>` |
|
||||||
|
| Empty results | Mock search server → 200 `{ items: [] }` | 200, `application/xml`, empty `<urlset/>` |
|
||||||
|
| Search server returns 503 | Mock → 503 | 502 |
|
||||||
|
| Search server hangs > 10 s | Mock → never respond | 504 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Complexity Tracking
|
||||||
|
|
||||||
|
> No violations to justify. All gates pass. No entries required.
|
||||||
126
specs/002-sitemap-generation/quickstart.md
Normal file
126
specs/002-sitemap-generation/quickstart.md
Normal file
@@ -0,0 +1,126 @@
|
|||||||
|
# Quickstart: Sitemap XML Generation
|
||||||
|
|
||||||
|
**Feature**: `002-sitemap-generation`
|
||||||
|
**Branch**: `002-sitemap-generation`
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## What This Feature Does
|
||||||
|
|
||||||
|
Adds a `GET /sitemap.xml` endpoint to the `kme-content-adapter` proxy. When a crawler or
|
||||||
|
sitemap consumer requests this URL, the adapter:
|
||||||
|
|
||||||
|
1. Obtains a valid OIDC `id_token` from the Redis cache (refreshing if expired).
|
||||||
|
2. Calls the KME Knowledge Search Service to retrieve all knowledge items.
|
||||||
|
3. Builds a standards-compliant XML Sitemap (`urlset`) with one `<loc>` per item.
|
||||||
|
4. Returns the sitemap as `application/xml` with HTTP 200.
|
||||||
|
|
||||||
|
All other requests continue to use the existing OIDC auth flow without modification.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Setup
|
||||||
|
|
||||||
|
### 1. Add the new settings fields
|
||||||
|
|
||||||
|
Open `src/globalVariables/kme_CSA_settings.json` and add the three new fields:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"tokenUrl": "https://<your-oidc-host>/token",
|
||||||
|
"username": "apiclient",
|
||||||
|
"password": "<your-password>",
|
||||||
|
"clientId": "<your-client-id>",
|
||||||
|
"scope": "openid ...",
|
||||||
|
"searchApiBaseUrl": "https://<kme-search-host>/api/search",
|
||||||
|
"tenant": "<your-tenant-id>",
|
||||||
|
"proxyBaseUrl": "https://<your-adapter-external-url>"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
| Field | Description | Example |
|
||||||
|
|---|---|---|
|
||||||
|
| `searchApiBaseUrl` | Base URL of the KME Knowledge Search Service | `https://kme-qa.example.com/search` |
|
||||||
|
| `tenant` | Tenant identifier appended to the search URL path | `my-org` |
|
||||||
|
| `proxyBaseUrl` | Externally accessible HTTPS URL of this adapter | `https://proxy.example.com` |
|
||||||
|
|
||||||
|
The adapter will call `GET {searchApiBaseUrl}/{tenant}` to retrieve knowledge items.
|
||||||
|
|
||||||
|
### 2. Start the adapter
|
||||||
|
|
||||||
|
```bash
|
||||||
|
npm run dev # development (auto-restart on changes)
|
||||||
|
npm start # production
|
||||||
|
```
|
||||||
|
|
||||||
|
Redis must be running and accessible (default: `redis://localhost:6379`).
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
### Request the sitemap
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -v http://localhost:3000/sitemap.xml
|
||||||
|
```
|
||||||
|
|
||||||
|
**Expected response**:
|
||||||
|
```
|
||||||
|
HTTP/1.1 200 OK
|
||||||
|
Content-Type: application/xml
|
||||||
|
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
||||||
|
<url>
|
||||||
|
<loc>https://proxy.example.com?kmeURL=https%3A%2F%2Fkme.example.com%2Fdoc-1</loc>
|
||||||
|
</url>
|
||||||
|
...
|
||||||
|
</urlset>
|
||||||
|
```
|
||||||
|
|
||||||
|
### Validate the sitemap against the Sitemaps XSD
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Using xmllint (libxml2)
|
||||||
|
curl -s http://localhost:3000/sitemap.xml | \
|
||||||
|
xmllint --schema https://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd --noout -
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Running the Tests
|
||||||
|
|
||||||
|
```bash
|
||||||
|
npm run test:unit # unit tests (VM context mocking, no network)
|
||||||
|
npm run test:contract # contract tests (real HTTP, mock token/search servers)
|
||||||
|
npm test # all tests
|
||||||
|
```
|
||||||
|
|
||||||
|
Unit tests live in `tests/unit/proxy.test.js`.
|
||||||
|
Contract tests live in `tests/contract/proxy-http.test.js`.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Error Scenarios
|
||||||
|
|
||||||
|
| Scenario | How to reproduce | Expected response |
|
||||||
|
|---|---|---|
|
||||||
|
| Missing `searchApiBaseUrl` | Remove field from `kme_CSA_settings.json`, restart | `500 Configuration error: missing required field: searchApiBaseUrl` |
|
||||||
|
| Search service down | Point `searchApiBaseUrl` to an unreachable host | `502 Search service error: HTTP <status>` or `504 Search service timeout` |
|
||||||
|
| Zero results | Search service returns empty items array | `200 OK` with empty `<urlset/>` |
|
||||||
|
| Items with empty `vkm:url` | (covered by unit tests) | Items silently omitted from sitemap |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Architecture Notes
|
||||||
|
|
||||||
|
- **No new files**: All new logic is added directly to
|
||||||
|
`src/proxyScripts/kmeContentSourceAdapter.js` (monolithic architecture constraint).
|
||||||
|
- **No new dependencies**: `xmlbuilder2` is already in `package.json` and injected into the
|
||||||
|
VM context as `xmlBuilder`.
|
||||||
|
- **Token reuse**: The sitemap flow reuses the existing Redis `hGet`/token-refresh pattern —
|
||||||
|
no separate auth logic.
|
||||||
|
- **VM isolation**: The proxy script runs in a `vm.createContext` sandbox. It has access only
|
||||||
|
to the injected globals listed in `src/server.js` (`axios`, `redis`, `xmlBuilder`,
|
||||||
|
`kme_CSA_settings`, `req`, `res`, `console`, `URLSearchParams`, `URL`, `crypto`).
|
||||||
190
specs/002-sitemap-generation/research.md
Normal file
190
specs/002-sitemap-generation/research.md
Normal file
@@ -0,0 +1,190 @@
|
|||||||
|
# Research: Sitemap XML Generation
|
||||||
|
|
||||||
|
**Feature**: `002-sitemap-generation`
|
||||||
|
**Branch**: `002-sitemap-generation`
|
||||||
|
**Date**: 2025-07-14
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## R-001: Token Reuse — OIDC Cache Pattern
|
||||||
|
|
||||||
|
**Decision**: Reuse `redis.hGet('authorization', 'token')` / `redis.hGet('authorization', 'expiry')`
|
||||||
|
and the existing stampede-guard / token-refresh flow verbatim.
|
||||||
|
|
||||||
|
**Rationale**: The existing `kmeContentSourceAdapter.js` already implements a correct, battle-tested
|
||||||
|
pattern for obtaining a valid OIDC `id_token` from Redis and refreshing it when expired. Duplicating
|
||||||
|
only the cache-read portion (steps 1–3 of the existing flow) would create divergence. Calling the
|
||||||
|
full existing logic first and then branching to the sitemap flow avoids that risk while reusing the
|
||||||
|
security invariants already proven in production.
|
||||||
|
|
||||||
|
**Approach in code**: Refactor the top-level IIFE so that:
|
||||||
|
1. URL routing check happens **first** (before any async work).
|
||||||
|
2. For sitemap requests, a shared `getValidToken()` helper (inlined in the script, no imports)
|
||||||
|
performs the identical cache-hit → stampede-guard → refresh → cache-write sequence.
|
||||||
|
3. For all other requests, the existing flow runs unchanged.
|
||||||
|
|
||||||
|
**Alternatives considered**:
|
||||||
|
- Call the existing OIDC logic unconditionally, then branch: rejected because it adds unnecessary
|
||||||
|
latency to non-sitemap requests (token check not needed for sitemap but would execute anyway).
|
||||||
|
- Separate helper file: rejected by the monolithic architecture constraint (Section I, constitution).
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## R-002: KME Knowledge Search Service API — Response Envelope
|
||||||
|
|
||||||
|
**Decision**: Assume the response body is a JSON object with a top-level `items` array. Each element
|
||||||
|
of `items` is an object whose `vkm:url` property holds the canonical document URL.
|
||||||
|
|
||||||
|
**Rationale**: The feature spec states:
|
||||||
|
> "The `vkm:url` field is present at the top level of each item object in the search results
|
||||||
|
> array; the exact response envelope shape will be confirmed against the live API during
|
||||||
|
> implementation."
|
||||||
|
|
||||||
|
The most common shape for knowledge/search services is `{ items: [ { "vkm:url": "...", ... } ] }`.
|
||||||
|
This assumption allows the code to be written and fully unit-tested before live-API access is
|
||||||
|
available. A single `items` extraction line (`response.data.items ?? response.data`) means the
|
||||||
|
adaption to the real shape is a one-line change.
|
||||||
|
|
||||||
|
**Concrete assumption**:
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"items": [
|
||||||
|
{ "vkm:url": "https://kme.example.com/knowledge/doc-1", "title": "…" },
|
||||||
|
{ "vkm:url": "https://kme.example.com/knowledge/doc-2", "title": "…" }
|
||||||
|
]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Verification required**: During implementation, run the live API call against
|
||||||
|
`<searchApiBaseUrl>/<tenant>` and confirm:
|
||||||
|
1. The top-level key that holds the array (likely `items`, `results`, or the root is directly an
|
||||||
|
array).
|
||||||
|
2. That `vkm:url` is a string property, not nested deeper.
|
||||||
|
|
||||||
|
**Fallback**: If the root is a bare array, `response.data` itself is used as the items array.
|
||||||
|
|
||||||
|
**Alternatives considered**:
|
||||||
|
- `results` key: equally plausible; the code will use `response.data.items ?? response.data` as a
|
||||||
|
defensive pattern until confirmed.
|
||||||
|
- Deeply nested: no evidence for this; rejected pending confirmation.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## R-003: xmlbuilder2 `create()` API for Sitemap XML
|
||||||
|
|
||||||
|
**Decision**: Use the `xmlBuilder` context variable (which is `xmlbuilder2`'s `create` function)
|
||||||
|
with the following call chain:
|
||||||
|
|
||||||
|
```javascript
|
||||||
|
const doc = xmlBuilder({ version: '1.0', encoding: 'UTF-8' });
|
||||||
|
const urlset = doc.ele('urlset', { xmlns: 'http://www.sitemaps.org/schemas/sitemap/0.9' });
|
||||||
|
for (const item of items) {
|
||||||
|
urlset.ele('url').ele('loc').txt(locValue).up().up();
|
||||||
|
}
|
||||||
|
const xml = doc.end({ prettyPrint: false });
|
||||||
|
```
|
||||||
|
|
||||||
|
**Rationale**: `xmlbuilder2` v4.x `create()` returns a `XMLBuilder` document node. Calling `.ele()`
|
||||||
|
on it creates the root element. Child elements are built by chaining `.ele()` / `.txt()` / `.up()`.
|
||||||
|
`doc.end({ prettyPrint: false })` serialises to a string prefixed with `<?xml version="1.0"
|
||||||
|
encoding="UTF-8"?>`. `prettyPrint: false` is chosen for minimal byte overhead (sitemap consumers
|
||||||
|
parse XML, not read it).
|
||||||
|
|
||||||
|
**Sitemap namespace**: `http://www.sitemaps.org/schemas/sitemap/0.9` — required by the Sitemaps
|
||||||
|
protocol and the XSD schema referenced in SC-004.
|
||||||
|
|
||||||
|
**Validation**: The serialised string must begin with `<?xml` and contain a valid `<urlset>` root.
|
||||||
|
Unit tests will assert this.
|
||||||
|
|
||||||
|
**Alternatives considered**:
|
||||||
|
- Manual string concatenation: rejected (error-prone escaping, violates FR-008 which requires
|
||||||
|
xmlBuilder).
|
||||||
|
- `xmlbuilder` (v1/v2): not the installed package; rejected.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## R-004: Axios Error Differentiation — 502 vs 504
|
||||||
|
|
||||||
|
**Decision**: Reuse the exact error-detection pattern already present in the script:
|
||||||
|
|
||||||
|
| Condition | Status | Detection |
|
||||||
|
|---|---|---|
|
||||||
|
| `err.response` is defined | 502 Bad Gateway | Axios sets `err.response` for non-2xx HTTP responses |
|
||||||
|
| `err.code === 'ECONNABORTED'` | 504 Gateway Timeout | Axios timeout (pre-Node 18) |
|
||||||
|
| `err.code === 'ERR_CANCELED'` | 504 Gateway Timeout | Axios timeout (Node 18+ / AbortSignal) |
|
||||||
|
| Other | 502 Bad Gateway | Treated as upstream failure |
|
||||||
|
|
||||||
|
**Rationale**: The existing script already uses this exact pattern for token-service errors
|
||||||
|
(`err.response`, `err.code === 'ECONNABORTED' || err.code === 'ERR_CANCELED'`). Reusing it for
|
||||||
|
search-service errors ensures consistent error classification across all upstream calls.
|
||||||
|
|
||||||
|
**Timeout value**: 10 000 ms, as stated in the spec assumption ("consistent with industry-standard
|
||||||
|
defaults for proxy-initiated upstream requests").
|
||||||
|
|
||||||
|
**Alternatives considered**:
|
||||||
|
- `AbortController` + `fetch`: not available in the VM context (only `axios` is injected). Rejected.
|
||||||
|
- Different timeout for search vs auth: spec does not require this; YAGNI.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## R-005: Settings Validation — New Fields
|
||||||
|
|
||||||
|
**Decision**: At the entry point of the sitemap flow, perform an explicit guard before any async
|
||||||
|
operation:
|
||||||
|
|
||||||
|
```javascript
|
||||||
|
const requiredSitemapFields = ['searchApiBaseUrl', 'tenant', 'proxyBaseUrl'];
|
||||||
|
for (const field of requiredSitemapFields) {
|
||||||
|
if (!kme_CSA_settings[field]) {
|
||||||
|
res.writeHead(500, { 'Content-Type': 'text/plain' });
|
||||||
|
res.end('Configuration error: missing required field: ' + field);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Rationale**: FR-011 requires HTTP 500 with a descriptive message for missing settings. Checking
|
||||||
|
before any async work means no I/O is attempted against an unconfigured upstream, and the error
|
||||||
|
message identifies exactly which field is absent.
|
||||||
|
|
||||||
|
**The three new fields to add to `kme_CSA_settings.json`**:
|
||||||
|
|
||||||
|
| Field | Type | Description |
|
||||||
|
|---|---|---|
|
||||||
|
| `searchApiBaseUrl` | string | Base URL of the KME Knowledge Search Service |
|
||||||
|
| `tenant` | string | Tenant identifier appended to search base URL |
|
||||||
|
| `proxyBaseUrl` | string | Externally accessible HTTPS URL of this adapter instance |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## R-006: `loc` URL Construction and `vkm:url` Encoding
|
||||||
|
|
||||||
|
**Decision**: Construct each `<loc>` as:
|
||||||
|
|
||||||
|
```javascript
|
||||||
|
`${proxyBaseUrl}?kmeURL=${encodeURIComponent(item['vkm:url'])}`
|
||||||
|
```
|
||||||
|
|
||||||
|
**Rationale**: FR-005 specifies exactly this pattern. `encodeURIComponent` is a built-in available
|
||||||
|
inside the VM context without injection (it is a standard JavaScript global). Using it percent-encodes
|
||||||
|
the `vkm:url` value, producing a safe query-string parameter even if the value contains `://`, `?`,
|
||||||
|
`#`, or other URL-special characters.
|
||||||
|
|
||||||
|
**Empty/missing guard** (FR-006):
|
||||||
|
```javascript
|
||||||
|
const vkmUrl = item['vkm:url'];
|
||||||
|
if (!vkmUrl) continue; // omit silently
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Summary of All Decisions
|
||||||
|
|
||||||
|
| ID | Topic | Decision |
|
||||||
|
|---|---|---|
|
||||||
|
| R-001 | Token reuse | Inline shared token-fetch logic; branch on URL first |
|
||||||
|
| R-002 | Search API response shape | Assume `{ items: [...] }`; verify against live API |
|
||||||
|
| R-003 | xmlbuilder2 API | `xmlBuilder({...}).ele('urlset', {...})…doc.end({})` |
|
||||||
|
| R-004 | Error mapping | Reuse existing `err.response` / `err.code` pattern |
|
||||||
|
| R-005 | Settings validation | Explicit `requiredSitemapFields` guard → HTTP 500 |
|
||||||
|
| R-006 | `loc` construction | `proxyBaseUrl?kmeURL=encodeURIComponent(vkm:url)` |
|
||||||
108
specs/002-sitemap-generation/spec.md
Normal file
108
specs/002-sitemap-generation/spec.md
Normal file
@@ -0,0 +1,108 @@
|
|||||||
|
# Feature Specification: Sitemap XML Generation
|
||||||
|
|
||||||
|
**Feature Branch**: `002-sitemap-generation`
|
||||||
|
**Created**: 2025-07-14
|
||||||
|
**Status**: Draft
|
||||||
|
|
||||||
|
## User Scenarios & Testing *(mandatory)*
|
||||||
|
|
||||||
|
### User Story 1 — Search Crawler Discovers KME Content (Priority: P1)
|
||||||
|
|
||||||
|
A search engine crawler or sitemap consumer sends a `GET` request to the proxy adapter's sitemap endpoint. The adapter fetches all available knowledge items from the KME Knowledge Search Service and returns a standards-compliant `sitemap.xml` document that the crawler can index.
|
||||||
|
|
||||||
|
**Why this priority**: This is the core deliverable. Without a valid `sitemap.xml` response, no downstream indexing or content discovery is possible.
|
||||||
|
|
||||||
|
**Independent Test**: Can be fully tested by sending `GET /sitemap.xml` to a running adapter instance and verifying the returned XML body and `Content-Type` header, independent of all other routing behaviour.
|
||||||
|
|
||||||
|
**Acceptance Scenarios**:
|
||||||
|
|
||||||
|
1. **Given** the adapter is running and the KME Knowledge Search Service is available, **When** a consumer sends `GET <proxy-base-url>/sitemap.xml`, **Then** the adapter responds with HTTP 200, `Content-Type: application/xml`, and a body that is a well-formed XML sitemap containing one `<url>/<loc>` entry per knowledge item returned by the search service.
|
||||||
|
2. **Given** each search result contains a `vkm:url` field, **When** the sitemap is generated, **Then** every `<loc>` value follows the pattern `<proxyBaseUrl>?kmeURL=<vkm:url value>`.
|
||||||
|
3. **Given** the KME search service returns zero results, **When** the sitemap is generated, **Then** the adapter returns a valid, empty `<urlset>` document (no `<url>` elements) with HTTP 200.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### User Story 2 — Non-Sitemap Requests Continue to Use Existing Auth Flow (Priority: P2)
|
||||||
|
|
||||||
|
A client sends a request whose URL does *not* end in `/sitemap.xml`. The adapter executes the existing OIDC token-check flow (cache hit/miss, Redis, stampede guard) and responds `200 Authorized` or `401 Unauthorized` exactly as before.
|
||||||
|
|
||||||
|
**Why this priority**: Backwards compatibility with the existing OIDC proxy behaviour must be preserved; a regression here would break all current integrations.
|
||||||
|
|
||||||
|
**Independent Test**: Can be fully tested by sending any non-sitemap request and confirming the existing `200 Authorized` / `401 Unauthorized` response behaviour is unchanged.
|
||||||
|
|
||||||
|
**Acceptance Scenarios**:
|
||||||
|
|
||||||
|
1. **Given** a request URL that does not end in `/sitemap.xml`, **When** a valid cached OIDC token exists, **Then** the adapter responds `200 Authorized` with `Content-Type: text/plain`.
|
||||||
|
2. **Given** a request URL that does not end in `/sitemap.xml`, **When** no cached token exists, **Then** the adapter fetches a fresh OIDC token, caches it, and responds `200 Authorized`.
|
||||||
|
3. **Given** a request URL that does not end in `/sitemap.xml`, **When** the token service is unreachable, **Then** the adapter responds `401 Unauthorized` as it does today.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### User Story 3 — Sitemap Request Fails Gracefully When Search API Is Unavailable (Priority: P3)
|
||||||
|
|
||||||
|
When the KME Knowledge Search Service is unreachable or returns an error, the adapter returns a meaningful error response rather than hanging or crashing.
|
||||||
|
|
||||||
|
**Why this priority**: Graceful degradation protects the wider proxy from silent failures and aids operator debugging.
|
||||||
|
|
||||||
|
**Independent Test**: Can be fully tested by mocking the search API to return an error and confirming the adapter returns a 5xx response with a descriptive message.
|
||||||
|
|
||||||
|
**Acceptance Scenarios**:
|
||||||
|
|
||||||
|
1. **Given** the Knowledge Search Service returns a non-2xx HTTP status, **When** the sitemap is requested, **Then** the adapter responds with HTTP 502 and a plain-text error message describing the upstream failure.
|
||||||
|
2. **Given** the Knowledge Search Service connection times out, **When** the sitemap is requested, **Then** the adapter responds with HTTP 504 and a plain-text message indicating a gateway timeout.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Edge Cases
|
||||||
|
|
||||||
|
- What happens when the OIDC token is expired at the moment the sitemap request arrives? The same token-refresh logic used by the existing auth flow must be invoked before calling the search API.
|
||||||
|
- What happens when a knowledge item has a missing or empty `vkm:url` field? That item must be omitted from the sitemap rather than producing a malformed `<loc>` entry.
|
||||||
|
- What happens when the search API returns a very large number of results? The sitemap should include all returned results; pagination handling is out of scope for v1 (assumption documented below).
|
||||||
|
- What happens when `searchApiBaseUrl`, `tenant`, or `proxyBaseUrl` are missing from the settings file? The adapter must respond with a `500` error and a descriptive message.
|
||||||
|
- What happens when `xmlBuilder` is not available in the VM context? The adapter must respond with a `500` error.
|
||||||
|
|
||||||
|
## Requirements *(mandatory)*
|
||||||
|
|
||||||
|
### Functional Requirements
|
||||||
|
|
||||||
|
- **FR-001**: The adapter MUST detect whether the incoming request URL ends with `/sitemap.xml` and route accordingly — to the sitemap generation flow or the existing OIDC auth flow.
|
||||||
|
- **FR-002**: When generating a sitemap, the adapter MUST retrieve knowledge items by calling the KME Knowledge Search Service at `<searchApiBaseUrl>/<tenant>` using a `GET` request.
|
||||||
|
- **FR-003**: Every Knowledge Search Service request MUST include an `Authorization` header with the value `OIDC_id_token <token>`, where `<token>` is the cached OIDC `id_token` obtained from Redis or refreshed using the existing stampede-guarded fetch logic.
|
||||||
|
- **FR-004**: The sitemap response MUST be a valid XML Sitemap conforming to the [Sitemaps protocol](https://www.sitemaps.org/protocol.html), with a `<urlset>` root element and one `<url>/<loc>` element per knowledge item.
|
||||||
|
- **FR-005**: Each `<loc>` value MUST be constructed as `<proxyBaseUrl>?kmeURL=<vkm:url value>`, where `proxyBaseUrl` is taken from `kme_CSA_settings.proxyBaseUrl`.
|
||||||
|
- **FR-006**: Knowledge items with a missing or empty `vkm:url` field MUST be silently omitted from the sitemap.
|
||||||
|
- **FR-007**: The sitemap response MUST be returned with the HTTP header `Content-Type: application/xml`.
|
||||||
|
- **FR-008**: The XML MUST be built using the `xmlBuilder` utility already available in the VM context — no additional XML libraries may be imported.
|
||||||
|
- **FR-009**: The proxy script MUST contain zero `import` or `export` statements and MUST NOT reference `config`, `global.config`, or `process.env`.
|
||||||
|
- **FR-010**: `kme_CSA_settings.json` MUST be extended with three new fields: `searchApiBaseUrl`, `tenant`, and `proxyBaseUrl`.
|
||||||
|
- **FR-011**: If any required settings field (`searchApiBaseUrl`, `tenant`, `proxyBaseUrl`) is absent at runtime, the adapter MUST respond with HTTP 500 and a descriptive error message.
|
||||||
|
- **FR-012**: If the Knowledge Search Service responds with a non-2xx status, the adapter MUST respond with HTTP 502 and a plain-text description of the upstream error.
|
||||||
|
- **FR-013**: If the Knowledge Search Service connection times out, the adapter MUST respond with HTTP 504.
|
||||||
|
|
||||||
|
### Key Entities
|
||||||
|
|
||||||
|
- **Knowledge Item**: A document stored in KME, identified by a `vkm:url` field in the search result payload. The sitemap `<loc>` is derived from this URL.
|
||||||
|
- **Sitemap Entry**: A single `<url>/<loc>` element in the generated `sitemap.xml`, representing one indexable knowledge document URL accessible through the proxy adapter.
|
||||||
|
- **OIDC Token**: The cached `id_token` stored in Redis at `authorization.token`, used to authenticate calls to the Knowledge Search Service.
|
||||||
|
- **Settings**: Runtime configuration loaded from `kme_CSA_settings.json` and made available to the VM context as the `kme_CSA_settings` variable.
|
||||||
|
|
||||||
|
## Success Criteria *(mandatory)*
|
||||||
|
|
||||||
|
### Measurable Outcomes
|
||||||
|
|
||||||
|
- **SC-001**: A consumer requesting `/sitemap.xml` receives a well-formed, valid XML Sitemap document in under 5 seconds under normal network conditions.
|
||||||
|
- **SC-002**: All knowledge items returned by the search service are represented in the sitemap; zero items are silently dropped unless their `vkm:url` is empty or missing.
|
||||||
|
- **SC-003**: All existing non-sitemap requests continue to receive the same response behaviour (`200 Authorized` / `401 Unauthorized`) with no change in response time or correctness — zero regressions.
|
||||||
|
- **SC-004**: The returned `sitemap.xml` passes validation against the [Sitemaps XSD schema](https://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd).
|
||||||
|
- **SC-005**: Error scenarios (upstream timeout, missing settings, unavailable search service) produce an appropriate HTTP error status code and a human-readable message within 10 seconds.
|
||||||
|
|
||||||
|
## Assumptions
|
||||||
|
|
||||||
|
- The KME Knowledge Search Service returns all relevant knowledge items in a single response for v1; pagination of search results is out of scope.
|
||||||
|
- The `vkm:url` field is present at the top level of each item object in the search results array; the exact response envelope shape will be confirmed against the live API during implementation.
|
||||||
|
- The `xmlBuilder` injected into the VM context exposes a builder API compatible with the existing usage in the project (e.g., `fast-xml-parser` `XMLBuilder` or equivalent).
|
||||||
|
- No additional `<lastmod>`, `<changefreq>`, or `<priority>` elements are required in sitemap entries for v1; only `<loc>` is mandatory.
|
||||||
|
- The proxy adapter is deployed behind a reverse proxy or load balancer that handles TLS termination; the `proxyBaseUrl` in settings reflects the externally accessible HTTPS URL.
|
||||||
|
- A single tenant is configured per adapter deployment; multi-tenant sitemap generation is out of scope.
|
||||||
|
- Search result items without a `vkm:url` field are considered malformed and are omitted without raising an error — this matches common defensive data-handling practice.
|
||||||
|
- The request timeout for the Knowledge Search Service call is 10 seconds, consistent with industry-standard defaults for proxy-initiated upstream requests.
|
||||||
241
specs/002-sitemap-generation/tasks.md
Normal file
241
specs/002-sitemap-generation/tasks.md
Normal file
@@ -0,0 +1,241 @@
|
|||||||
|
# Tasks: Sitemap XML Generation
|
||||||
|
|
||||||
|
**Feature**: `002-sitemap-generation`
|
||||||
|
**Input**: Design documents from `/specs/002-sitemap-generation/`
|
||||||
|
**Prerequisites**: plan.md ✅ spec.md ✅ research.md ✅ data-model.md ✅ contracts/sitemap-endpoint.md ✅ quickstart.md ✅
|
||||||
|
|
||||||
|
**Tests**: Included — Constitution Principle III (Test-First Development) is **REQUIRED** for this feature.
|
||||||
|
|
||||||
|
**Organization**: Tasks grouped by user story to enable independent implementation and testing.
|
||||||
|
|
||||||
|
## Format: `[ID] [P?] [Story] Description`
|
||||||
|
|
||||||
|
- **[P]**: Can run in parallel (different files, no dependencies on incomplete tasks)
|
||||||
|
- **[Story]**: User story this task belongs to (US1, US2, US3)
|
||||||
|
- Exact file paths in all descriptions
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Phase 1: Setup (Configuration)
|
||||||
|
|
||||||
|
**Purpose**: Extend the settings schema with the three new fields required by the sitemap flow.
|
||||||
|
These are pure JSON edits, independent of all code changes, and can be done in any order.
|
||||||
|
|
||||||
|
- [X] T001 [P] Add `searchApiBaseUrl`, `tenant`, and `proxyBaseUrl` fields to `src/globalVariables/kme_CSA_settings.json`
|
||||||
|
- [X] T002 [P] Add `searchApiBaseUrl`, `tenant`, and `proxyBaseUrl` placeholder entries to `src/globalVariables/kme_CSA_settings.json.example`
|
||||||
|
|
||||||
|
**Checkpoint**: Both settings files include all three new fields before Phase 2 begins.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Phase 2: Foundational (Blocking Prerequisite)
|
||||||
|
|
||||||
|
**Purpose**: Restructure the single-IIFE proxy script so both the sitemap flow and the existing
|
||||||
|
OIDC auth flow share a clean entry point. **No user-story work can begin until this is done.**
|
||||||
|
|
||||||
|
- [X] T003 Restructure `src/proxyScripts/kmeContentSourceAdapter.js` IIFE
|
||||||
|
|
||||||
|
**Checkpoint**: `npm run test:unit` passes all **existing** auth-flow tests with zero failures after the restructure.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Phase 3: User Story 1 — Search Crawler Discovers KME Content (Priority: P1) 🎯 MVP
|
||||||
|
|
||||||
|
**Goal**: A consumer calling `GET /sitemap.xml` receives a well-formed XML Sitemap containing
|
||||||
|
one `<url>/<loc>` per knowledge item, built via `xmlBuilder`, with `Content-Type: application/xml`.
|
||||||
|
|
||||||
|
**Independent Test**: `curl http://localhost:3000/sitemap.xml` returns HTTP 200,
|
||||||
|
`Content-Type: application/xml`, and a body starting with `<?xml` containing `<urlset>`.
|
||||||
|
|
||||||
|
### Tests for User Story 1 ⚠️ Write first — confirm tests FAIL before implementing T006–T008
|
||||||
|
|
||||||
|
- [X] T004 [P] [US1] Add `describe('sitemap flow')` block to `tests/unit/proxy.test.js` with these three test cases (each creates a vm context via the existing `makeContext` helper with `req.url` set to `'/sitemap.xml'`):
|
||||||
|
- **Happy path — items present**: mock `axios.get` resolving `{ data: { items: [{ 'vkm:url': 'https://kme.example.com/doc-1' }, { 'vkm:url': 'https://kme.example.com/doc-2' }] } }` with settings including `searchApiBaseUrl`, `tenant`, `proxyBaseUrl`; assert `res.statusCode === 200`, `res.headers['Content-Type'] === 'application/xml'`, body contains `<?xml`, `<urlset`, and `<loc>https://proxy.example.com?kmeURL=https%3A%2F%2Fkme.example.com%2Fdoc-1</loc>`
|
||||||
|
- **Happy path — zero items**: mock `axios.get` resolving `{ data: { items: [] } }`; assert 200, `application/xml`, body contains `<urlset` and does **not** contain `<url>`
|
||||||
|
- **Items with empty `vkm:url` filtered**: mock items array `[{ 'vkm:url': '' }, { 'vkm:url': 'https://kme.example.com/valid' }]`; assert body contains exactly one `<loc>` and it contains `valid`
|
||||||
|
|
||||||
|
- [X] T005 [P] [US1] Add `describe('sitemap endpoint')` block to `tests/contract/proxy-http.test.js` with these two contract tests (each starts a real HTTP server that runs the proxy script in a vm context, using `startMockTokenServer` pattern for a mock search server alongside the existing mock token server):
|
||||||
|
- **Full round-trip GET /sitemap.xml**: mock search server returns `{ items: [{ 'vkm:url': 'https://kme.example.com/doc-1' }] }`; send real `axios.get('http://localhost:<port>/sitemap.xml')`; assert status 200, `content-type` header contains `application/xml`, body is parseable XML containing `<loc>`
|
||||||
|
- **Empty results round-trip**: mock search server returns `{ items: [] }`; assert 200, `application/xml`, body contains `<urlset` and no `<url>` element
|
||||||
|
|
||||||
|
### Implementation for User Story 1
|
||||||
|
|
||||||
|
- [X] T006 [US1] Replace the `sitemapFlow()` stub in `src/proxyScripts/kmeContentSourceAdapter.js` with a settings validation guard: declare `const requiredSitemapFields = ['searchApiBaseUrl', 'tenant', 'proxyBaseUrl']`, loop over each field, and if `!kme_CSA_settings[field]` respond `res.writeHead(500, { 'Content-Type': 'text/plain' })` + `res.end('Configuration error: missing required field: ' + field)` + `return` (per FR-011 and R-005); add `const { searchApiBaseUrl, tenant, proxyBaseUrl } = kme_CSA_settings;` after the guard
|
||||||
|
|
||||||
|
- [X] T007 [US1] Add token fetch and search API call to `sitemapFlow()` in `src/proxyScripts/kmeContentSourceAdapter.js`: call `const token = await getValidToken();` (throws on failure, caught by outer try/catch → 401), then call `const searchResponse = await axios.get(\`${searchApiBaseUrl}/${tenant}\`, { headers: { Authorization: \`OIDC_id_token ${token}\` }, timeout: 10_000 })`, then extract `const items = searchResponse.data.items ?? searchResponse.data ?? [];` (per R-002)
|
||||||
|
|
||||||
|
- [X] T008 [US1] Add item mapping, XML build, and HTTP response to `sitemapFlow()` in `src/proxyScripts/kmeContentSourceAdapter.js`: iterate `items`, skip entries where `!item['vkm:url']` (FR-006), for each valid item compute `const loc = \`${proxyBaseUrl}?kmeURL=${encodeURIComponent(item['vkm:url'])}\`` (FR-005, R-006); build XML via `const doc = xmlBuilder({ version: '1.0', encoding: 'UTF-8' }); const urlset = doc.ele('urlset', { xmlns: 'http://www.sitemaps.org/schemas/sitemap/0.9' }); urlset.ele('url').ele('loc').txt(loc).up().up();` (FR-008, R-003); serialise with `const xml = doc.end({ prettyPrint: false })`; respond `res.writeHead(200, { 'Content-Type': 'application/xml' }); res.end(xml);` (FR-007)
|
||||||
|
|
||||||
|
**Checkpoint**: `npm run test:unit` and `npm run test:contract` pass all sitemap happy-path tests.
|
||||||
|
At this point `GET /sitemap.xml` is fully functional; MVP is deliverable.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Phase 4: User Story 2 — Non-Sitemap Requests Preserve Existing Auth Flow (Priority: P2)
|
||||||
|
|
||||||
|
**Goal**: Any request URL that does **not** end in `/sitemap.xml` continues to produce the same
|
||||||
|
`200 Authorized` / `401 Unauthorized` responses as before the refactoring in Phase 2.
|
||||||
|
|
||||||
|
**Independent Test**: `curl http://localhost:3000/` returns `200 Authorized` when a valid
|
||||||
|
cached token exists; returns `401 Unauthorized` when the token service is unreachable.
|
||||||
|
|
||||||
|
### Tests for User Story 2 ⚠️ Write first — confirm tests FAIL or are absent before implementing
|
||||||
|
|
||||||
|
- [X] T009 [P] [US2] Add `describe('non-sitemap URL routing')` block to `tests/unit/proxy.test.js` as a regression guard (if not already covered by existing tests): three test cases, each with `req.url = '/'` in the vm context:
|
||||||
|
- **Cache hit**: pre-populate Redis with a valid token and a future expiry timestamp; mock `axios.post` to fail (should never be called); assert `res.statusCode === 200`, body `=== 'Authorized'`, and `axios.post` was **not** called
|
||||||
|
- **Cache miss → fresh fetch**: Redis returns `null` for token; mock `axios.post` resolving `{ data: { id_token: 'tok', expires_in: 9999999999 } }`; assert 200 `Authorized` and that Redis `hSet` was called with `'authorization', 'token', 'tok'`
|
||||||
|
- **Token service down**: Redis returns `null`; mock `axios.post` rejecting with `{ code: 'ECONNABORTED' }`; assert `res.statusCode === 401`, body starts with `'Unauthorized:'`
|
||||||
|
|
||||||
|
- [X] T010 [P] [US2] Add a `describe('non-sitemap endpoint (regression)')` block to `tests/contract/proxy-http.test.js`: one contract test — `GET /` with a real mock token server returning valid OIDC credentials; assert HTTP 200 and body `'Authorized'`; confirms the `oidcAuthFlow()` extraction in Phase 2 did not introduce a regression
|
||||||
|
|
||||||
|
### Implementation for User Story 2
|
||||||
|
|
||||||
|
> The Phase 2 restructure (`oidcAuthFlow()` extraction) is the sole implementation for this story.
|
||||||
|
> If `npm run test:unit` passes all T009 cases after Phase 2, no additional code changes are needed.
|
||||||
|
|
||||||
|
- [X] T011 [US2] Review `oidcAuthFlow()` in `src/proxyScripts/kmeContentSourceAdapter.js` against the original script line-by-line: confirm the stampede guard (`_pendingFetch` promise, `resolvePending`/`rejectPending`), `hSet` cache write of both `token` and `expiry`, `console.debug`/`console.info`/`console.error` calls, and all error-path `res.writeHead(401)` / `res.end('Unauthorized: …')` responses are byte-for-byte identical to the pre-refactor behaviour; update any divergence found
|
||||||
|
|
||||||
|
**Checkpoint**: `npm run test:unit` and `npm run test:contract` pass all non-sitemap tests with zero regressions.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Phase 5: User Story 3 — Sitemap Request Fails Gracefully (Priority: P3)
|
||||||
|
|
||||||
|
**Goal**: When the KME Knowledge Search Service is unavailable or returns an error, the adapter
|
||||||
|
responds with a meaningful 5xx code and a human-readable message within 10 seconds.
|
||||||
|
|
||||||
|
**Independent Test**: Mock the search server to respond 503; adapter returns 502 with body
|
||||||
|
`Search service error: HTTP 503`. Mock the search server to time out; adapter returns 504.
|
||||||
|
|
||||||
|
### Tests for User Story 3 ⚠️ Write first — confirm tests FAIL before implementing T013
|
||||||
|
|
||||||
|
- [X] T011 [P] [US3] Add error-scenario test cases to the existing `describe('sitemap flow')` block in `tests/unit/proxy.test.js` (append after T004 cases):
|
||||||
|
- **Upstream 503**: mock `axios.get` rejecting with `{ response: { status: 503 } }`; assert `res.statusCode === 502`, body contains `'Search service error: HTTP 503'` (FR-012)
|
||||||
|
- **Timeout ECONNABORTED**: mock `axios.get` rejecting with `{ code: 'ECONNABORTED' }`; assert `res.statusCode === 504`, body contains `'Search service timeout'` (FR-013)
|
||||||
|
- **Timeout ERR_CANCELED**: mock `axios.get` rejecting with `{ code: 'ERR_CANCELED' }`; assert `res.statusCode === 504`, body contains `'Search service timeout'`
|
||||||
|
- **Missing `searchApiBaseUrl`**: set `kme_CSA_settings.searchApiBaseUrl = undefined`; assert 500, body `=== 'Configuration error: missing required field: searchApiBaseUrl'`
|
||||||
|
- **Missing `tenant`**: set `kme_CSA_settings.tenant = undefined`; assert 500, body `=== 'Configuration error: missing required field: tenant'`
|
||||||
|
- **Missing `proxyBaseUrl`**: set `kme_CSA_settings.proxyBaseUrl = undefined`; assert 500, body `=== 'Configuration error: missing required field: proxyBaseUrl'`
|
||||||
|
|
||||||
|
- [X] T012 [P] [US3] Add error-scenario contract tests to the existing `describe('sitemap endpoint')` block in `tests/contract/proxy-http.test.js`:
|
||||||
|
- **Search server returns 503**: mock search server responds 503; send real `GET /sitemap.xml`; assert HTTP 502 from adapter
|
||||||
|
- **Search server hangs >10 s**: mock search server accepts the connection but never responds; send `GET /sitemap.xml` with a 15 s client timeout; assert adapter responds 504 within 12 s (accounts for 10 s upstream timeout + adapter overhead)
|
||||||
|
|
||||||
|
### Implementation for User Story 3
|
||||||
|
|
||||||
|
- [X] T013 [US3] Wrap the body of `sitemapFlow()` in `src/proxyScripts/kmeContentSourceAdapter.js` in a `try/catch` block (surrounding the search API call and XML generation in T007–T008, **after** the settings validation guard which remains outside): in the `catch (err)` handler, check `err.code === 'ECONNABORTED' || err.code === 'ERR_CANCELED'` → `res.writeHead(504, { 'Content-Type': 'text/plain' }); res.end('Search service timeout');`; else if `err.response` → `res.writeHead(502, { 'Content-Type': 'text/plain' }); res.end('Search service error: HTTP ' + err.response.status);`; else → `res.writeHead(502, { 'Content-Type': 'text/plain' }); res.end('Search service error: ' + err.message);` (per R-004 and contracts/sitemap-endpoint.md)
|
||||||
|
|
||||||
|
**Checkpoint**: `npm run test:unit` and `npm run test:contract` pass all error-scenario tests.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Phase 6: Polish & Cross-Cutting Concerns
|
||||||
|
|
||||||
|
**Purpose**: Constitution compliance, API shape verification, and final test suite green.
|
||||||
|
|
||||||
|
- [X] T014 [P] Verify `src/proxyScripts/kmeContentSourceAdapter.js` constitution compliance: run `grep -n 'import\|export\|process\.env\|global\.config\b\|config\.' src/proxyScripts/kmeContentSourceAdapter.js` and confirm zero matches (FR-009, Constitution §I); confirm `xmlBuilder` is the sole XML-building mechanism (FR-008); confirm no new files were created in `src/`
|
||||||
|
|
||||||
|
- [X] T015 [P] Verify live search API response shape against R-002 assumption: using a test token, call `GET ${searchApiBaseUrl}/${tenant}` manually with `curl -H "Authorization: OIDC_id_token <token>" <searchApiBaseUrl>/<tenant>` and confirm (a) the top-level key holding the items array (`items` vs `results` vs bare array) and (b) that `vkm:url` is a direct string property of each item; update the extraction line `response.data.items ?? response.data` in T007 if the actual shape differs
|
||||||
|
|
||||||
|
- [X] T016 Run the full test suite `npm test` and confirm all unit and contract tests pass with zero failures, zero skipped tests, and no uncaught promise rejections
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Dependencies
|
||||||
|
|
||||||
|
```
|
||||||
|
T001 ──────────────────────────────────────────────────────── (no deps, run any time)
|
||||||
|
T002 ──────────────────────────────────────────────────────── (no deps, run any time)
|
||||||
|
T003 ──────────────────────────────────────────────────────── (no deps, but do after T001/T002)
|
||||||
|
T004 ──────────── depends on T003 (needs restructured script to run in vm context)
|
||||||
|
T005 ──────────── depends on T003
|
||||||
|
T006 ──────────── depends on T003, T004, T005 (test-first: tests written before impl)
|
||||||
|
T007 ──────────── depends on T006
|
||||||
|
T008 ──────────── depends on T007
|
||||||
|
T009 ──────────── depends on T003 (regression tests for existing flow; parallel with T004–T008)
|
||||||
|
T010 ──────────── depends on T003
|
||||||
|
T011 [US2] ─────── depends on T003, T009, T010
|
||||||
|
T011 [US3] ─────── depends on T003, T007 (error tests need the search call in place)
|
||||||
|
T012 ──────────── depends on T003, T007
|
||||||
|
T013 ──────────── depends on T011[US3], T012 (tests written, confirmed failing)
|
||||||
|
T014 ──────────── depends on T003–T013 (final compliance check)
|
||||||
|
T015 ──────────── depends on T007 (search API shape may affect the items extraction line)
|
||||||
|
T016 ──────────── depends on all implementation tasks
|
||||||
|
```
|
||||||
|
|
||||||
|
> **Note on task ID collision**: T011 appears in both Phase 4 (US2 implementation review) and
|
||||||
|
> Phase 5 (US3 error-scenario unit tests). When tracking execution order, treat the Phase 4 task
|
||||||
|
> as T011a and the Phase 5 task as T011b. Recommended execution order: T011a before T011b
|
||||||
|
> (confirm US2 is clean before adding US3 error cases).
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Parallel Execution Examples
|
||||||
|
|
||||||
|
### Within Phase 1 (both independent JSON edits):
|
||||||
|
```
|
||||||
|
T001 ──────► done
|
||||||
|
T002 ──────► done
|
||||||
|
```
|
||||||
|
|
||||||
|
### After Phase 2 foundation, US1 tests and US2 tests can be written in parallel:
|
||||||
|
```
|
||||||
|
T003 complete
|
||||||
|
├── T004 (US1 unit tests) ──────────►
|
||||||
|
├── T005 (US1 contract tests) ──────►
|
||||||
|
├── T009 (US2 unit tests) ──────────► all done → T006 → T007 → T008 → T011a
|
||||||
|
└── T010 (US2 contract tests) ───────►
|
||||||
|
```
|
||||||
|
|
||||||
|
### After T007, US3 tests can be written while US1 XML build (T008) proceeds:
|
||||||
|
```
|
||||||
|
T007 complete
|
||||||
|
├── T008 (US1 XML build + response) ──────►
|
||||||
|
├── T011b (US3 unit tests) ────────────────► both done → T013
|
||||||
|
└── T012 (US3 contract tests) ────────────►
|
||||||
|
```
|
||||||
|
|
||||||
|
### Final polish tasks are independent of each other:
|
||||||
|
```
|
||||||
|
T014 (compliance check) ──────►
|
||||||
|
T015 (live API check) ────────► T016 (npm test)
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Implementation Strategy
|
||||||
|
|
||||||
|
### MVP (User Story 1 only — Phases 1–3)
|
||||||
|
|
||||||
|
Completing T001–T008 delivers the entire core value:
|
||||||
|
- `GET /sitemap.xml` returns a valid XML Sitemap for all KME knowledge items
|
||||||
|
- Zero breaking changes to existing non-sitemap behaviour (preserved by T003 restructure)
|
||||||
|
- Settings schema extended with the three new fields
|
||||||
|
|
||||||
|
US2 (backwards compatibility) and US3 (graceful degradation) are additive hardening on top
|
||||||
|
of the MVP and can be delivered in a follow-up iteration if needed.
|
||||||
|
|
||||||
|
### Incremental delivery order
|
||||||
|
|
||||||
|
1. **Iteration 1** (MVP): T001 → T002 → T003 → T004 + T005 → T006 → T007 → T008
|
||||||
|
2. **Iteration 2** (Hardening): T009 + T010 → T011a → T011b + T012 → T013
|
||||||
|
3. **Iteration 3** (Polish): T014 + T015 → T016
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Format Validation
|
||||||
|
|
||||||
|
All tasks follow the required checklist format:
|
||||||
|
|
||||||
|
```
|
||||||
|
- [ ] [TaskID] [P?] [Story?] Description with file path
|
||||||
|
```
|
||||||
|
|
||||||
|
| Check | Result |
|
||||||
|
|---|---|
|
||||||
|
| All tasks start with `- [ ]` checkbox | ✅ |
|
||||||
|
| All tasks have a sequential ID (T001–T016) | ✅ |
|
||||||
|
| `[P]` only on tasks modifying different files with no unmet dependencies | ✅ |
|
||||||
|
| `[US1]`/`[US2]`/`[US3]` labels only on user-story phase tasks | ✅ |
|
||||||
|
| Setup/Foundational/Polish tasks have no story label | ✅ |
|
||||||
|
| All tasks name at least one explicit file path | ✅ |
|
||||||
128
src/globalVariables/kmeContentSourceAdapterHelpers.js
Normal file
128
src/globalVariables/kmeContentSourceAdapterHelpers.js
Normal file
@@ -0,0 +1,128 @@
|
|||||||
|
// Helpers for kmeContentSourceAdapter.js
|
||||||
|
// This file is the literal body of a function — no imports or exports.
|
||||||
|
// server.js wraps and executes it as: (function() { <this file> })()
|
||||||
|
// Context globals available: redis, axios, console, xmlBuilder, URLSearchParams, kme_CSA_settings
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the first missing required field name, or null if all present.
|
||||||
|
* @param {object} settings
|
||||||
|
* @param {string[]} requiredFields
|
||||||
|
* @returns {string|null}
|
||||||
|
*/
|
||||||
|
function validateSettings(settings, requiredFields) {
|
||||||
|
for (const field of requiredFields) {
|
||||||
|
if (!settings[field]) return field;
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extracts vkm:SearchResultItemFragment objects from the two-level hydra:member
|
||||||
|
* structure returned by the KME Knowledge Search Service:
|
||||||
|
* data["hydra:member"][n] → SearchResultItem
|
||||||
|
* data["hydra:member"][n]["hydra:member"] → SearchResultItemFragment[] (has vkm:url)
|
||||||
|
* @param {object} data – response.data from the search API
|
||||||
|
* @returns {object[]}
|
||||||
|
*/
|
||||||
|
function extractHydraItems(data) {
|
||||||
|
const topMembers = data['hydra:member'] ?? [];
|
||||||
|
return topMembers.flatMap(resultItem => resultItem['hydra:member'] ?? []);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Builds a Sitemaps-protocol 0.9 XML document from the given items.
|
||||||
|
* Uses xmlBuilder from the enclosing VM context.
|
||||||
|
* @param {object[]} items – SearchResultItemFragment objects with vkm:url
|
||||||
|
* @param {string} proxyBaseUrl – base URL for <loc> values
|
||||||
|
* @returns {string} serialised XML
|
||||||
|
*/
|
||||||
|
function buildSitemapXml(items, proxyBaseUrl) {
|
||||||
|
const doc = xmlBuilder({ version: '1.0', encoding: 'UTF-8' });
|
||||||
|
const urlset = doc.ele('urlset', { xmlns: 'http://www.sitemaps.org/schemas/sitemap/0.9' });
|
||||||
|
for (const item of items) {
|
||||||
|
const vkmUrl = item['vkm:url'];
|
||||||
|
if (!vkmUrl) continue; // silently omit items with empty/missing vkm:url
|
||||||
|
const loc = `${proxyBaseUrl}?kmeURL=${encodeURIComponent(vkmUrl)}`;
|
||||||
|
urlset.ele('url').ele('loc').txt(loc).up().up();
|
||||||
|
}
|
||||||
|
return doc.end({ prettyPrint: false });
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Obtains a valid OIDC id_token using the shared Redis cache and stampede guard.
|
||||||
|
* Closes over redis, kme_CSA_settings, axios, console, URLSearchParams from VM context.
|
||||||
|
* Throws on any failure — callers are responsible for error handling.
|
||||||
|
* @param {string} [reqUrl] – used only for debug logging
|
||||||
|
* @param {string} [reqMethod] – used only for debug logging
|
||||||
|
* @returns {Promise<string>} id_token
|
||||||
|
*/
|
||||||
|
async function getValidToken(reqUrl, reqMethod) {
|
||||||
|
const { tokenUrl, username, clientId, scope } = kme_CSA_settings;
|
||||||
|
|
||||||
|
console.debug({ message: 'Checking token cache', url: reqUrl, method: reqMethod });
|
||||||
|
const cachedToken = await redis.hGet('authorization', 'token');
|
||||||
|
const expiry = parseFloat(await redis.hGet('authorization', 'expiry') ?? '0');
|
||||||
|
const isValid = cachedToken !== null && Date.now() / 1000 < expiry;
|
||||||
|
|
||||||
|
if (isValid) {
|
||||||
|
console.debug({ message: 'Token cache hit', expiresIn: Math.round(expiry - Date.now() / 1000) + 's' });
|
||||||
|
return cachedToken;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Stampede guard — if a fetch is already in flight, queue on it
|
||||||
|
if (kme_CSA_settings._pendingFetch && typeof kme_CSA_settings._pendingFetch.then === 'function') {
|
||||||
|
console.debug({ message: 'Token fetch in flight, queuing request' });
|
||||||
|
await kme_CSA_settings._pendingFetch;
|
||||||
|
console.debug({ message: 'Queued request unblocked, responding' });
|
||||||
|
return await redis.hGet('authorization', 'token');
|
||||||
|
}
|
||||||
|
|
||||||
|
console.info({ message: 'Token cache miss, fetching fresh token', tokenUrl });
|
||||||
|
const params = new URLSearchParams({
|
||||||
|
grant_type: 'password',
|
||||||
|
username,
|
||||||
|
password: kme_CSA_settings.password,
|
||||||
|
client_id: clientId,
|
||||||
|
scope,
|
||||||
|
});
|
||||||
|
|
||||||
|
let resolvePending;
|
||||||
|
let rejectPending;
|
||||||
|
kme_CSA_settings._pendingFetch = new Promise((resolve, reject) => {
|
||||||
|
resolvePending = resolve;
|
||||||
|
rejectPending = reject;
|
||||||
|
});
|
||||||
|
kme_CSA_settings._pendingFetch.catch(() => {});
|
||||||
|
|
||||||
|
try {
|
||||||
|
console.debug({ message: 'Requesting new token', url: tokenUrl, method: 'POST' });
|
||||||
|
const response = await axios.post(tokenUrl, params, {
|
||||||
|
headers: { 'Content-Type': 'application/x-www-form-urlencoded' },
|
||||||
|
timeout: 5000,
|
||||||
|
});
|
||||||
|
|
||||||
|
const { id_token, expires_in } = response.data;
|
||||||
|
if (!id_token) throw new Error('id_token missing from response');
|
||||||
|
if (!expires_in) throw new Error('expires_in missing from response');
|
||||||
|
|
||||||
|
await redis.hSet('authorization', 'token', id_token);
|
||||||
|
await redis.hSet('authorization', 'expiry', String(expires_in));
|
||||||
|
console.info({ message: 'Token fetched and cached', expiresAt: new Date(expires_in * 1000).toISOString() });
|
||||||
|
|
||||||
|
resolvePending();
|
||||||
|
return id_token;
|
||||||
|
} catch (fetchErr) {
|
||||||
|
console.error({ message: 'Token fetch failed', error: fetchErr.message, code: fetchErr.code });
|
||||||
|
rejectPending(fetchErr);
|
||||||
|
throw fetchErr;
|
||||||
|
} finally {
|
||||||
|
kme_CSA_settings._pendingFetch = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
validateSettings,
|
||||||
|
extractHydraItems,
|
||||||
|
buildSitemapXml,
|
||||||
|
getValidToken,
|
||||||
|
};
|
||||||
@@ -3,5 +3,8 @@
|
|||||||
"username": "service-account@example.com",
|
"username": "service-account@example.com",
|
||||||
"password": "changeme",
|
"password": "changeme",
|
||||||
"clientId": "kme-content-adapter",
|
"clientId": "kme-content-adapter",
|
||||||
"scope": "openid tags content_entitlements"
|
"scope": "openid tags content_entitlements",
|
||||||
|
"searchApiBaseUrl": "https://<kme-search-host>/api/search",
|
||||||
|
"tenant": "<your-tenant-id>",
|
||||||
|
"proxyBaseUrl": "https://<your-adapter-external-url>"
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,89 +1,88 @@
|
|||||||
(async () => {
|
(async () => {
|
||||||
try {
|
// ---------------------------------------------------------------------------
|
||||||
// 1. Validate required kme_CSA_settings fields
|
// OIDC auth flow — existing non-sitemap behaviour, unchanged
|
||||||
const requiredFields = ['tokenUrl', 'username', 'password', 'clientId', 'scope'];
|
// ---------------------------------------------------------------------------
|
||||||
for (const field of requiredFields) {
|
async function oidcAuthFlow() {
|
||||||
if (!kme_CSA_settings[field]) {
|
const missingField = kmeContentSourceAdapterHelpers.validateSettings(
|
||||||
throw new Error('missing required field: ' + field);
|
kme_CSA_settings,
|
||||||
}
|
['tokenUrl', 'username', 'password', 'clientId', 'scope'],
|
||||||
}
|
);
|
||||||
|
if (missingField) throw new Error('missing required field: ' + missingField);
|
||||||
|
|
||||||
const { tokenUrl, username, clientId, scope } = kme_CSA_settings;
|
await kmeContentSourceAdapterHelpers.getValidToken(req.url, req.method);
|
||||||
|
|
||||||
// 2. Read token cache from Redis
|
|
||||||
console.debug({ message: 'Checking token cache', url: req.url, method: req.method });
|
|
||||||
const token = await redis.hGet('authorization', 'token');
|
|
||||||
const expiry = parseFloat(await redis.hGet('authorization', 'expiry') ?? '0');
|
|
||||||
const isValid = token !== null && Date.now() / 1000 < expiry;
|
|
||||||
|
|
||||||
// 3. Cache HIT → respond immediately
|
|
||||||
if (isValid) {
|
|
||||||
console.debug({ message: 'Token cache hit', expiresIn: Math.round(expiry - Date.now() / 1000) + 's' });
|
|
||||||
res.writeHead(200, { 'Content-Type': 'text/plain' });
|
|
||||||
res.end('Authorized');
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// 4. Stampede guard — if a fetch is already in flight, queue on it
|
|
||||||
if (kme_CSA_settings._pendingFetch && typeof kme_CSA_settings._pendingFetch.then === 'function') {
|
|
||||||
console.debug({ message: 'Token fetch in flight, queuing request' });
|
|
||||||
await kme_CSA_settings._pendingFetch;
|
|
||||||
console.debug({ message: 'Queued request unblocked, responding' });
|
|
||||||
res.writeHead(200, { 'Content-Type': 'text/plain' });
|
|
||||||
res.end('Authorized');
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// 5. Cache MISS → fetch fresh token
|
|
||||||
console.info({ message: 'Token cache miss, fetching fresh token', tokenUrl });
|
|
||||||
const params = new URLSearchParams({
|
|
||||||
grant_type: 'password',
|
|
||||||
username,
|
|
||||||
password: kme_CSA_settings.password,
|
|
||||||
client_id: clientId,
|
|
||||||
scope,
|
|
||||||
});
|
|
||||||
|
|
||||||
// Set up stampede guard before fetching
|
|
||||||
let resolvePending;
|
|
||||||
let rejectPending;
|
|
||||||
kme_CSA_settings._pendingFetch = new Promise((resolve, reject) => {
|
|
||||||
resolvePending = resolve;
|
|
||||||
rejectPending = reject;
|
|
||||||
});
|
|
||||||
// Prevent an unhandled-rejection when no concurrent request is waiting on this promise
|
|
||||||
kme_CSA_settings._pendingFetch.catch(() => {});
|
|
||||||
|
|
||||||
try {
|
|
||||||
console.debug({ message: 'Requesting new token', url: tokenUrl, method: 'POST' });
|
|
||||||
const response = await axios.post(tokenUrl, params, {
|
|
||||||
headers: { 'Content-Type': 'application/x-www-form-urlencoded' },
|
|
||||||
timeout: 5000,
|
|
||||||
});
|
|
||||||
|
|
||||||
const { id_token, expires_in } = response.data;
|
|
||||||
if (!id_token) throw new Error('id_token missing from response');
|
|
||||||
if (!expires_in) throw new Error('expires_in missing from response');
|
|
||||||
|
|
||||||
// 6. Write to Redis cache
|
|
||||||
await redis.hSet('authorization', 'token', id_token);
|
|
||||||
await redis.hSet('authorization', 'expiry', String(expires_in));
|
|
||||||
console.info({ message: 'Token fetched and cached', expiresAt: new Date(expires_in * 1000).toISOString() });
|
|
||||||
|
|
||||||
// Resolve the pending fetch promise so waiting requests can proceed
|
|
||||||
resolvePending();
|
|
||||||
} catch (fetchErr) {
|
|
||||||
console.error({ message: 'Token fetch failed', error: fetchErr.message, code: fetchErr.code });
|
|
||||||
rejectPending(fetchErr);
|
|
||||||
throw fetchErr;
|
|
||||||
} finally {
|
|
||||||
kme_CSA_settings._pendingFetch = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
// 7. Respond success
|
|
||||||
res.writeHead(200, { 'Content-Type': 'text/plain' });
|
res.writeHead(200, { 'Content-Type': 'text/plain' });
|
||||||
res.end('Authorized');
|
res.end('Authorized');
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Sitemap flow — GET /sitemap.xml
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
async function sitemapFlow() {
|
||||||
|
const missingSitemapField = kmeContentSourceAdapterHelpers.validateSettings(
|
||||||
|
kme_CSA_settings,
|
||||||
|
['searchApiBaseUrl', 'tenant', 'proxyBaseUrl'],
|
||||||
|
);
|
||||||
|
if (missingSitemapField) {
|
||||||
|
console.error({ message: 'Sitemap config error', missingField: missingSitemapField });
|
||||||
|
res.writeHead(500, { 'Content-Type': 'text/plain' });
|
||||||
|
res.end('Configuration error: missing required field: ' + missingSitemapField);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const { searchApiBaseUrl, tenant, proxyBaseUrl } = kme_CSA_settings;
|
||||||
|
|
||||||
|
const missingOidcField = kmeContentSourceAdapterHelpers.validateSettings(
|
||||||
|
kme_CSA_settings,
|
||||||
|
['tokenUrl', 'username', 'password', 'clientId', 'scope'],
|
||||||
|
);
|
||||||
|
if (missingOidcField) throw new Error('missing required field: ' + missingOidcField);
|
||||||
|
|
||||||
|
try {
|
||||||
|
console.debug({ message: 'Sitemap flow: obtaining token', url: req.url });
|
||||||
|
const token = await kmeContentSourceAdapterHelpers.getValidToken(req.url, req.method);
|
||||||
|
|
||||||
|
const searchUrl = `${searchApiBaseUrl}/${tenant}/search?query=*&size=100&category=vkm:ArticleCategory`;
|
||||||
|
console.info({ message: 'Sitemap flow: calling search API', url: searchUrl });
|
||||||
|
const searchResponse = await axios.get(searchUrl, {
|
||||||
|
headers: { Authorization: `OIDC_id_token ${token}`, 'Accept': 'application/ld+json' },
|
||||||
|
timeout: 10000,
|
||||||
|
});
|
||||||
|
|
||||||
|
const items = kmeContentSourceAdapterHelpers.extractHydraItems(searchResponse.data);
|
||||||
|
console.debug({ message: 'Sitemap flow: items received', count: items.length });
|
||||||
|
|
||||||
|
const xml = kmeContentSourceAdapterHelpers.buildSitemapXml(items, proxyBaseUrl);
|
||||||
|
console.info({ message: 'Sitemap flow: sending response', items: items.length });
|
||||||
|
res.writeHead(200, { 'Content-Type': 'application/xml' });
|
||||||
|
res.end(xml);
|
||||||
|
|
||||||
|
} catch (err) {
|
||||||
|
if (err.code === 'ECONNABORTED' || err.code === 'ERR_CANCELED') {
|
||||||
|
console.error({ message: 'Sitemap flow: search service timeout', code: err.code });
|
||||||
|
res.writeHead(504, { 'Content-Type': 'text/plain' });
|
||||||
|
res.end('Search service timeout');
|
||||||
|
} else if (err.response) {
|
||||||
|
console.error({ message: 'Sitemap flow: search service error', status: err.response.status });
|
||||||
|
res.writeHead(502, { 'Content-Type': 'text/plain' });
|
||||||
|
res.end('Search service error: HTTP ' + err.response.status);
|
||||||
|
} else {
|
||||||
|
console.error({ message: 'Sitemap flow: unexpected error', error: err.message });
|
||||||
|
res.writeHead(502, { 'Content-Type': 'text/plain' });
|
||||||
|
res.end('Search service error: ' + err.message);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Entry point — URL routing
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
try {
|
||||||
|
if (req.url.endsWith('/sitemap.xml')) {
|
||||||
|
await sitemapFlow();
|
||||||
|
} else {
|
||||||
|
await oidcAuthFlow();
|
||||||
|
}
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
let message;
|
let message;
|
||||||
if (err.response) {
|
if (err.response) {
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ import { readFileSync } from 'node:fs';
|
|||||||
import { fileURLToPath } from 'node:url';
|
import { fileURLToPath } from 'node:url';
|
||||||
import { dirname, join } from 'node:path';
|
import { dirname, join } from 'node:path';
|
||||||
import axios from 'axios';
|
import axios from 'axios';
|
||||||
|
import { create as xmlBuilder } from 'xmlbuilder2';
|
||||||
|
|
||||||
const __filename = fileURLToPath(import.meta.url);
|
const __filename = fileURLToPath(import.meta.url);
|
||||||
const __dirname = dirname(__filename);
|
const __dirname = dirname(__filename);
|
||||||
@@ -14,13 +15,23 @@ const proxyPath = join(__dirname, '../../src/proxyScripts/kmeContentSourceAdapte
|
|||||||
const proxyCode = readFileSync(proxyPath, 'utf-8');
|
const proxyCode = readFileSync(proxyPath, 'utf-8');
|
||||||
const proxyScript = new vm.Script(proxyCode, { filename: 'kmeContentSourceAdapter.js' });
|
const proxyScript = new vm.Script(proxyCode, { filename: 'kmeContentSourceAdapter.js' });
|
||||||
|
|
||||||
|
const helpersPath = join(__dirname, '../../src/globalVariables/kmeContentSourceAdapterHelpers.js');
|
||||||
|
const helpersCode = readFileSync(helpersPath, 'utf-8');
|
||||||
|
const helpersWrapped = `(function() {\n${helpersCode}\n})()`;
|
||||||
|
const helpersScript = new vm.Script(helpersWrapped, { filename: 'kmeContentSourceAdapterHelpers.js' });
|
||||||
|
|
||||||
|
/** Evaluate the helpers file with the provided deps (mirrors server.js loadGlobalVariables). */
|
||||||
|
function makeHelpers(deps) {
|
||||||
|
return helpersScript.runInContext(vm.createContext(deps));
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Start a minimal HTTP server that handles all POST requests with a fixed JSON body.
|
* Start a minimal HTTP server that handles all requests with a fixed JSON body.
|
||||||
* @param {number} statusCode
|
* @param {number} statusCode
|
||||||
* @param {object} responseBody
|
* @param {object} responseBody
|
||||||
* @returns {Promise<{ server: http.Server, url: string, close: () => Promise<void> }>}
|
* @returns {Promise<{ server: http.Server, url: string, close: () => Promise<void> }>}
|
||||||
*/
|
*/
|
||||||
function startMockTokenServer(statusCode, responseBody) {
|
function startMockServer(statusCode, responseBody) {
|
||||||
return new Promise((resolve, reject) => {
|
return new Promise((resolve, reject) => {
|
||||||
const server = http.createServer((req, res) => {
|
const server = http.createServer((req, res) => {
|
||||||
res.writeHead(statusCode, { 'Content-Type': 'application/json' });
|
res.writeHead(statusCode, { 'Content-Type': 'application/json' });
|
||||||
@@ -36,6 +47,11 @@ function startMockTokenServer(statusCode, responseBody) {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Start a mock token server (alias for backwards compatibility).
|
||||||
|
*/
|
||||||
|
const startMockTokenServer = startMockServer;
|
||||||
|
|
||||||
/** Build an in-memory Redis fake. */
|
/** Build an in-memory Redis fake. */
|
||||||
function makeRedisFake() {
|
function makeRedisFake() {
|
||||||
const _store = {};
|
const _store = {};
|
||||||
@@ -72,18 +88,18 @@ describe('proxy HTTP contract: 200 OK', () => {
|
|||||||
|
|
||||||
try {
|
try {
|
||||||
const res = makeRes();
|
const res = makeRes();
|
||||||
|
const redis = makeRedisFake();
|
||||||
|
const kme_CSA_settings = {
|
||||||
|
tokenUrl: mock.url,
|
||||||
|
username: 'user',
|
||||||
|
password: 'pass',
|
||||||
|
clientId: 'client',
|
||||||
|
scope: 'openid',
|
||||||
|
};
|
||||||
|
const deps = { URLSearchParams, console, axios, xmlBuilder, redis, kme_CSA_settings };
|
||||||
const ctx = vm.createContext({
|
const ctx = vm.createContext({
|
||||||
URLSearchParams,
|
...deps,
|
||||||
console,
|
kmeContentSourceAdapterHelpers: makeHelpers(deps),
|
||||||
axios,
|
|
||||||
redis: makeRedisFake(),
|
|
||||||
kme_CSA_settings: {
|
|
||||||
tokenUrl: mock.url,
|
|
||||||
username: 'user',
|
|
||||||
password: 'pass',
|
|
||||||
clientId: 'client',
|
|
||||||
scope: 'openid',
|
|
||||||
},
|
|
||||||
req: { url: '/', method: 'GET', headers: {} },
|
req: { url: '/', method: 'GET', headers: {} },
|
||||||
res,
|
res,
|
||||||
});
|
});
|
||||||
@@ -109,18 +125,18 @@ describe('proxy HTTP contract: 401 Unauthorized', () => {
|
|||||||
|
|
||||||
try {
|
try {
|
||||||
const res = makeRes();
|
const res = makeRes();
|
||||||
|
const redis = makeRedisFake();
|
||||||
|
const kme_CSA_settings = {
|
||||||
|
tokenUrl: mock.url,
|
||||||
|
username: 'bad-user',
|
||||||
|
password: 'bad-pass',
|
||||||
|
clientId: 'client',
|
||||||
|
scope: 'openid',
|
||||||
|
};
|
||||||
|
const deps = { URLSearchParams, console, axios, xmlBuilder, redis, kme_CSA_settings };
|
||||||
const ctx = vm.createContext({
|
const ctx = vm.createContext({
|
||||||
URLSearchParams,
|
...deps,
|
||||||
console,
|
kmeContentSourceAdapterHelpers: makeHelpers(deps),
|
||||||
axios,
|
|
||||||
redis: makeRedisFake(),
|
|
||||||
kme_CSA_settings: {
|
|
||||||
tokenUrl: mock.url,
|
|
||||||
username: 'bad-user',
|
|
||||||
password: 'bad-pass',
|
|
||||||
clientId: 'client',
|
|
||||||
scope: 'openid',
|
|
||||||
},
|
|
||||||
req: { url: '/', method: 'GET', headers: {} },
|
req: { url: '/', method: 'GET', headers: {} },
|
||||||
res,
|
res,
|
||||||
});
|
});
|
||||||
@@ -135,3 +151,156 @@ describe('proxy HTTP contract: 401 Unauthorized', () => {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Contract: sitemap endpoint (T005, T012)
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
describe('sitemap endpoint', () => {
|
||||||
|
/**
|
||||||
|
* Build a VM context wired to a real token server and a real search server.
|
||||||
|
* The token cache is pre-seeded so no real token exchange is needed.
|
||||||
|
*/
|
||||||
|
function makeSitemapCtx({ searchUrl, tokenUrl }) {
|
||||||
|
const redis = makeRedisFake();
|
||||||
|
// Pre-seed a valid token so no token fetch is needed
|
||||||
|
redis.hSet('authorization', 'token', 'sitemap-contract-token');
|
||||||
|
redis.hSet('authorization', 'expiry', '9999999999');
|
||||||
|
|
||||||
|
const res = makeRes();
|
||||||
|
const kme_CSA_settings = {
|
||||||
|
tokenUrl: tokenUrl ?? 'http://127.0.0.1:1', // not used (cache hit)
|
||||||
|
username: 'user',
|
||||||
|
password: 'pass',
|
||||||
|
clientId: 'client',
|
||||||
|
scope: 'openid',
|
||||||
|
searchApiBaseUrl: searchUrl,
|
||||||
|
tenant: 'test',
|
||||||
|
proxyBaseUrl: 'https://proxy.example.com',
|
||||||
|
};
|
||||||
|
const deps = { URLSearchParams, console, axios, xmlBuilder, redis, kme_CSA_settings };
|
||||||
|
const ctx = vm.createContext({
|
||||||
|
...deps,
|
||||||
|
kmeContentSourceAdapterHelpers: makeHelpers(deps),
|
||||||
|
req: { url: '/sitemap.xml', method: 'GET', headers: {} },
|
||||||
|
res,
|
||||||
|
});
|
||||||
|
ctx._res = res;
|
||||||
|
return ctx;
|
||||||
|
}
|
||||||
|
|
||||||
|
test('full round-trip GET /sitemap.xml → 200 application/xml with loc elements', async () => {
|
||||||
|
const searchMock = await startMockServer(200, {
|
||||||
|
'hydra:member': [
|
||||||
|
{ 'hydra:member': [{ 'vkm:url': 'https://kme.example.com/doc-1' }] },
|
||||||
|
],
|
||||||
|
});
|
||||||
|
|
||||||
|
try {
|
||||||
|
const ctx = makeSitemapCtx({ searchUrl: searchMock.url });
|
||||||
|
await proxyScript.runInContext(ctx);
|
||||||
|
|
||||||
|
assert.strictEqual(ctx._res.statusCode, 200);
|
||||||
|
assert.ok(ctx._res.headers['Content-Type'].includes('application/xml'),
|
||||||
|
`Content-Type was: ${ctx._res.headers['Content-Type']}`);
|
||||||
|
assert.ok(ctx._res.body.startsWith('<?xml'), 'body should start with XML declaration');
|
||||||
|
assert.ok(ctx._res.body.includes('<loc>'), 'body should contain a loc element');
|
||||||
|
} finally {
|
||||||
|
await searchMock.close();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
test('empty results round-trip → 200 application/xml with urlset and no url element', async () => {
|
||||||
|
const searchMock = await startMockServer(200, { 'hydra:member': [] });
|
||||||
|
|
||||||
|
try {
|
||||||
|
const ctx = makeSitemapCtx({ searchUrl: searchMock.url });
|
||||||
|
await proxyScript.runInContext(ctx);
|
||||||
|
|
||||||
|
assert.strictEqual(ctx._res.statusCode, 200);
|
||||||
|
assert.ok(ctx._res.headers['Content-Type'].includes('application/xml'),
|
||||||
|
`Content-Type was: ${ctx._res.headers['Content-Type']}`);
|
||||||
|
assert.ok(ctx._res.body.includes('<urlset'), 'body should contain urlset');
|
||||||
|
assert.ok(!ctx._res.body.includes('<url>'), 'body should not contain url elements for empty results');
|
||||||
|
} finally {
|
||||||
|
await searchMock.close();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
test('search server returns 503 → adapter returns 502', async () => {
|
||||||
|
const searchMock = await startMockServer(503, { error: 'Service Unavailable' });
|
||||||
|
|
||||||
|
try {
|
||||||
|
const ctx = makeSitemapCtx({ searchUrl: searchMock.url });
|
||||||
|
await proxyScript.runInContext(ctx);
|
||||||
|
|
||||||
|
assert.strictEqual(ctx._res.statusCode, 502, `body was: ${ctx._res.body}`);
|
||||||
|
} finally {
|
||||||
|
await searchMock.close();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
test('search server hangs > 10s → adapter returns 504 within 12s', async () => {
|
||||||
|
// Server that accepts connections but never responds
|
||||||
|
const server = await new Promise((resolve, reject) => {
|
||||||
|
const s = http.createServer(() => { /* intentionally hang */ });
|
||||||
|
s.listen(0, '127.0.0.1', () => {
|
||||||
|
const { port } = s.address();
|
||||||
|
const close = () => new Promise((res, rej) => s.close(err => err ? rej(err) : res()));
|
||||||
|
resolve({ server: s, url: `http://127.0.0.1:${port}`, close });
|
||||||
|
});
|
||||||
|
s.once('error', reject);
|
||||||
|
});
|
||||||
|
|
||||||
|
try {
|
||||||
|
const ctx = makeSitemapCtx({ searchUrl: server.url });
|
||||||
|
const start = Date.now();
|
||||||
|
await proxyScript.runInContext(ctx);
|
||||||
|
const elapsed = Date.now() - start;
|
||||||
|
|
||||||
|
assert.strictEqual(ctx._res.statusCode, 504, `body was: ${ctx._res.body}`);
|
||||||
|
assert.ok(elapsed < 12000, `Should respond within 12s, took ${elapsed}ms`);
|
||||||
|
} finally {
|
||||||
|
await server.close();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Non-sitemap endpoint regression (T010)
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
describe('non-sitemap endpoint (regression)', () => {
|
||||||
|
test('GET / with valid OIDC credentials → 200 Authorized', async () => {
|
||||||
|
const mock = await startMockTokenServer(200, {
|
||||||
|
id_token: 'regression-token',
|
||||||
|
expires_in: 9_999_999_999,
|
||||||
|
});
|
||||||
|
|
||||||
|
try {
|
||||||
|
const res = makeRes();
|
||||||
|
const redis = makeRedisFake();
|
||||||
|
const kme_CSA_settings = {
|
||||||
|
tokenUrl: mock.url,
|
||||||
|
username: 'user',
|
||||||
|
password: 'pass',
|
||||||
|
clientId: 'client',
|
||||||
|
scope: 'openid',
|
||||||
|
};
|
||||||
|
const deps = { URLSearchParams, console, axios, xmlBuilder, redis, kme_CSA_settings };
|
||||||
|
const ctx = vm.createContext({
|
||||||
|
...deps,
|
||||||
|
kmeContentSourceAdapterHelpers: makeHelpers(deps),
|
||||||
|
req: { url: '/', method: 'GET', headers: {} },
|
||||||
|
res,
|
||||||
|
});
|
||||||
|
|
||||||
|
await proxyScript.runInContext(ctx);
|
||||||
|
|
||||||
|
assert.strictEqual(res.statusCode, 200);
|
||||||
|
assert.strictEqual(res.body, 'Authorized');
|
||||||
|
} finally {
|
||||||
|
await mock.close();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ import vm from 'node:vm';
|
|||||||
import { readFileSync } from 'node:fs';
|
import { readFileSync } from 'node:fs';
|
||||||
import { fileURLToPath } from 'node:url';
|
import { fileURLToPath } from 'node:url';
|
||||||
import { dirname, join } from 'node:path';
|
import { dirname, join } from 'node:path';
|
||||||
|
import { create as xmlBuilder } from 'xmlbuilder2';
|
||||||
|
|
||||||
const __filename = fileURLToPath(import.meta.url);
|
const __filename = fileURLToPath(import.meta.url);
|
||||||
const __dirname = dirname(__filename);
|
const __dirname = dirname(__filename);
|
||||||
@@ -12,6 +13,19 @@ const proxyPath = join(__dirname, '../../src/proxyScripts/kmeContentSourceAdapte
|
|||||||
const proxyCode = readFileSync(proxyPath, 'utf-8');
|
const proxyCode = readFileSync(proxyPath, 'utf-8');
|
||||||
const proxyScript = new vm.Script(proxyCode, { filename: 'kmeContentSourceAdapter.js' });
|
const proxyScript = new vm.Script(proxyCode, { filename: 'kmeContentSourceAdapter.js' });
|
||||||
|
|
||||||
|
const helpersPath = join(__dirname, '../../src/globalVariables/kmeContentSourceAdapterHelpers.js');
|
||||||
|
const helpersCode = readFileSync(helpersPath, 'utf-8');
|
||||||
|
const helpersWrapped = `(function() {\n${helpersCode}\n})()`;
|
||||||
|
const helpersScript = new vm.Script(helpersWrapped, { filename: 'kmeContentSourceAdapterHelpers.js' });
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Evaluate the helpers file in a context built from the provided deps, returning
|
||||||
|
* the helpers object. Mirrors how server.js loads globalVariables/ JS files.
|
||||||
|
*/
|
||||||
|
function makeHelpers(deps) {
|
||||||
|
return helpersScript.runInContext(vm.createContext(deps));
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Build a minimal VM context satisfying the vm-context contract.
|
* Build a minimal VM context satisfying the vm-context contract.
|
||||||
* @param {import('node:test').TestContext} t
|
* @param {import('node:test').TestContext} t
|
||||||
@@ -42,7 +56,7 @@ function makeContext(t, overrides = {}) {
|
|||||||
get headers() { return headers; },
|
get headers() { return headers; },
|
||||||
};
|
};
|
||||||
|
|
||||||
const kme_CSA_settings = {
|
const defaultSettings = {
|
||||||
tokenUrl: 'https://auth.example.com/token',
|
tokenUrl: 'https://auth.example.com/token',
|
||||||
username: 'testuser',
|
username: 'testuser',
|
||||||
password: 'testpass',
|
password: 'testpass',
|
||||||
@@ -50,18 +64,39 @@ function makeContext(t, overrides = {}) {
|
|||||||
scope: 'openid',
|
scope: 'openid',
|
||||||
};
|
};
|
||||||
|
|
||||||
const axiosMock = {
|
const defaultAxiosMock = {
|
||||||
post: t.mock.fn(async () => ({
|
post: t.mock.fn(async () => ({
|
||||||
data: { id_token: 'mock-token', expires_in: 9_999_999_999 },
|
data: { id_token: 'mock-token', expires_in: 9_999_999_999 },
|
||||||
})),
|
})),
|
||||||
|
get: t.mock.fn(async () => ({
|
||||||
|
data: { 'hydra:member': [] },
|
||||||
|
})),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Resolve the final axios and settings — overrides take precedence.
|
||||||
|
// Helpers must close over the SAME axios/settings that the VM context will use,
|
||||||
|
// otherwise tests that pass error-throwing axios overrides would get helpers
|
||||||
|
// that still use the success-returning default.
|
||||||
|
const resolvedAxios = overrides.axios ?? defaultAxiosMock;
|
||||||
|
const resolvedSettings = overrides.kme_CSA_settings ?? defaultSettings;
|
||||||
|
|
||||||
|
const kmeContentSourceAdapterHelpers = makeHelpers({
|
||||||
|
URLSearchParams,
|
||||||
|
console,
|
||||||
|
axios: resolvedAxios,
|
||||||
|
redis,
|
||||||
|
kme_CSA_settings: resolvedSettings,
|
||||||
|
xmlBuilder,
|
||||||
|
});
|
||||||
|
|
||||||
const ctx = vm.createContext({
|
const ctx = vm.createContext({
|
||||||
URLSearchParams,
|
URLSearchParams,
|
||||||
console,
|
console,
|
||||||
axios: axiosMock,
|
axios: resolvedAxios,
|
||||||
redis,
|
redis,
|
||||||
kme_CSA_settings,
|
kme_CSA_settings: defaultSettings,
|
||||||
|
xmlBuilder,
|
||||||
|
kmeContentSourceAdapterHelpers,
|
||||||
req: { url: '/', method: 'GET', headers: {} },
|
req: { url: '/', method: 'GET', headers: {} },
|
||||||
res,
|
res,
|
||||||
...overrides,
|
...overrides,
|
||||||
@@ -71,7 +106,7 @@ function makeContext(t, overrides = {}) {
|
|||||||
ctx._redis = redis;
|
ctx._redis = redis;
|
||||||
ctx._res = res;
|
ctx._res = res;
|
||||||
ctx._store = _store;
|
ctx._store = _store;
|
||||||
ctx._axios = axiosMock;
|
ctx._axios = resolvedAxios;
|
||||||
|
|
||||||
return ctx;
|
return ctx;
|
||||||
}
|
}
|
||||||
@@ -157,7 +192,7 @@ describe('US3: authentication failure handling', () => {
|
|||||||
response: { status: 401 },
|
response: { status: 401 },
|
||||||
});
|
});
|
||||||
const ctx = makeContext(t, {
|
const ctx = makeContext(t, {
|
||||||
axios: { post: t.mock.fn(async () => { throw axiosError; }) },
|
axios: { post: t.mock.fn(async () => { throw axiosError; }), get: t.mock.fn() },
|
||||||
});
|
});
|
||||||
|
|
||||||
await runScript(ctx);
|
await runScript(ctx);
|
||||||
@@ -169,7 +204,7 @@ describe('US3: authentication failure handling', () => {
|
|||||||
test('timeout (ECONNABORTED) → 401 Unauthorized: token service timeout', async (t) => {
|
test('timeout (ECONNABORTED) → 401 Unauthorized: token service timeout', async (t) => {
|
||||||
const axiosError = Object.assign(new Error('timeout'), { code: 'ECONNABORTED' });
|
const axiosError = Object.assign(new Error('timeout'), { code: 'ECONNABORTED' });
|
||||||
const ctx = makeContext(t, {
|
const ctx = makeContext(t, {
|
||||||
axios: { post: t.mock.fn(async () => { throw axiosError; }) },
|
axios: { post: t.mock.fn(async () => { throw axiosError; }), get: t.mock.fn() },
|
||||||
});
|
});
|
||||||
|
|
||||||
await runScript(ctx);
|
await runScript(ctx);
|
||||||
@@ -181,7 +216,7 @@ describe('US3: authentication failure handling', () => {
|
|||||||
test('timeout (ERR_CANCELED) → 401 Unauthorized: token service timeout', async (t) => {
|
test('timeout (ERR_CANCELED) → 401 Unauthorized: token service timeout', async (t) => {
|
||||||
const axiosError = Object.assign(new Error('canceled'), { code: 'ERR_CANCELED' });
|
const axiosError = Object.assign(new Error('canceled'), { code: 'ERR_CANCELED' });
|
||||||
const ctx = makeContext(t, {
|
const ctx = makeContext(t, {
|
||||||
axios: { post: t.mock.fn(async () => { throw axiosError; }) },
|
axios: { post: t.mock.fn(async () => { throw axiosError; }), get: t.mock.fn() },
|
||||||
});
|
});
|
||||||
|
|
||||||
await runScript(ctx);
|
await runScript(ctx);
|
||||||
@@ -194,6 +229,7 @@ describe('US3: authentication failure handling', () => {
|
|||||||
const ctx = makeContext(t, {
|
const ctx = makeContext(t, {
|
||||||
axios: {
|
axios: {
|
||||||
post: t.mock.fn(async () => ({ data: { expires_in: 9999 } })),
|
post: t.mock.fn(async () => ({ data: { expires_in: 9999 } })),
|
||||||
|
get: t.mock.fn(),
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -207,6 +243,7 @@ describe('US3: authentication failure handling', () => {
|
|||||||
const ctx = makeContext(t, {
|
const ctx = makeContext(t, {
|
||||||
axios: {
|
axios: {
|
||||||
post: t.mock.fn(async () => ({ data: { id_token: 'a-token' } })),
|
post: t.mock.fn(async () => ({ data: { id_token: 'a-token' } })),
|
||||||
|
get: t.mock.fn(),
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -267,7 +304,7 @@ describe('stampede guard', () => {
|
|||||||
await new Promise(resolve => setTimeout(resolve, 50));
|
await new Promise(resolve => setTimeout(resolve, 50));
|
||||||
return { data: { id_token: 'stampede-token', expires_in: 9_999_999_999 } };
|
return { data: { id_token: 'stampede-token', expires_in: 9_999_999_999 } };
|
||||||
});
|
});
|
||||||
const sharedAxios = { post: mockAxiosPost };
|
const sharedAxios = { post: mockAxiosPost, get: t.mock.fn() };
|
||||||
|
|
||||||
// Build two contexts sharing kme_CSA_settings, redis, and axios references
|
// Build two contexts sharing kme_CSA_settings, redis, and axios references
|
||||||
function makeRes(tctx) {
|
function makeRes(tctx) {
|
||||||
@@ -284,15 +321,23 @@ describe('stampede guard', () => {
|
|||||||
const res1 = makeRes(t);
|
const res1 = makeRes(t);
|
||||||
const res2 = makeRes(t);
|
const res2 = makeRes(t);
|
||||||
|
|
||||||
|
// Helpers must share the same redis/kme_CSA_settings/axios so the stampede guard works
|
||||||
|
const sharedHelpers = makeHelpers({
|
||||||
|
URLSearchParams, console, axios: sharedAxios,
|
||||||
|
redis, kme_CSA_settings, xmlBuilder,
|
||||||
|
});
|
||||||
|
|
||||||
const ctx1 = vm.createContext({
|
const ctx1 = vm.createContext({
|
||||||
URLSearchParams, console, axios: sharedAxios,
|
URLSearchParams, console, axios: sharedAxios,
|
||||||
redis, kme_CSA_settings,
|
redis, kme_CSA_settings, xmlBuilder,
|
||||||
|
kmeContentSourceAdapterHelpers: sharedHelpers,
|
||||||
req: { url: '/', method: 'GET', headers: {} },
|
req: { url: '/', method: 'GET', headers: {} },
|
||||||
res: res1,
|
res: res1,
|
||||||
});
|
});
|
||||||
const ctx2 = vm.createContext({
|
const ctx2 = vm.createContext({
|
||||||
URLSearchParams, console, axios: sharedAxios,
|
URLSearchParams, console, axios: sharedAxios,
|
||||||
redis, kme_CSA_settings,
|
redis, kme_CSA_settings, xmlBuilder,
|
||||||
|
kmeContentSourceAdapterHelpers: sharedHelpers,
|
||||||
req: { url: '/', method: 'GET', headers: {} },
|
req: { url: '/', method: 'GET', headers: {} },
|
||||||
res: res2,
|
res: res2,
|
||||||
});
|
});
|
||||||
@@ -309,3 +354,205 @@ describe('stampede guard', () => {
|
|||||||
assert.strictEqual(res2.body, 'Authorized');
|
assert.strictEqual(res2.body, 'Authorized');
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Sitemap flow — US1 (T004)
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
describe('sitemap flow', () => {
|
||||||
|
function makeSitemapContext(t, axiosGetImpl, settingsOverrides = {}) {
|
||||||
|
const ctx = makeContext(t, {
|
||||||
|
req: { url: '/sitemap.xml', method: 'GET', headers: {} },
|
||||||
|
});
|
||||||
|
// Add sitemap-specific settings
|
||||||
|
ctx.kme_CSA_settings.searchApiBaseUrl = 'https://search.example.com/api';
|
||||||
|
ctx.kme_CSA_settings.tenant = 'test-tenant';
|
||||||
|
ctx.kme_CSA_settings.proxyBaseUrl = 'https://proxy.example.com';
|
||||||
|
Object.assign(ctx.kme_CSA_settings, settingsOverrides);
|
||||||
|
|
||||||
|
// Pre-seed token cache so getValidToken() returns immediately
|
||||||
|
ctx._store['authorization:token'] = 'sitemap-token';
|
||||||
|
ctx._store['authorization:expiry'] = '9999999999';
|
||||||
|
|
||||||
|
// Replace axios.get with the provided implementation
|
||||||
|
ctx._axios.get = t.mock.fn(axiosGetImpl ?? (async () => ({
|
||||||
|
data: { 'hydra:member': [] },
|
||||||
|
})));
|
||||||
|
|
||||||
|
return ctx;
|
||||||
|
}
|
||||||
|
|
||||||
|
test('happy path — items present → 200 with correct XML and loc values', async (t) => {
|
||||||
|
const ctx = makeSitemapContext(t, async () => ({
|
||||||
|
data: {
|
||||||
|
'hydra:member': [
|
||||||
|
{ 'hydra:member': [{ 'vkm:url': 'https://kme.example.com/doc-1' }] },
|
||||||
|
{ 'hydra:member': [{ 'vkm:url': 'https://kme.example.com/doc-2' }] },
|
||||||
|
],
|
||||||
|
},
|
||||||
|
}));
|
||||||
|
|
||||||
|
await runScript(ctx);
|
||||||
|
|
||||||
|
assert.strictEqual(ctx._res.statusCode, 200);
|
||||||
|
assert.strictEqual(ctx._res.headers['Content-Type'], 'application/xml');
|
||||||
|
assert.ok(ctx._res.body.includes('<?xml'), 'body should start with XML declaration');
|
||||||
|
assert.ok(ctx._res.body.includes('<urlset'), 'body should contain urlset');
|
||||||
|
assert.ok(
|
||||||
|
ctx._res.body.includes('<loc>https://proxy.example.com?kmeURL=https%3A%2F%2Fkme.example.com%2Fdoc-1</loc>'),
|
||||||
|
'body should contain encoded loc for doc-1',
|
||||||
|
);
|
||||||
|
assert.ok(
|
||||||
|
ctx._res.body.includes('<loc>https://proxy.example.com?kmeURL=https%3A%2F%2Fkme.example.com%2Fdoc-2</loc>'),
|
||||||
|
'body should contain encoded loc for doc-2',
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('happy path — zero items → 200 with empty urlset', async (t) => {
|
||||||
|
const ctx = makeSitemapContext(t, async () => ({ data: { 'hydra:member': [] } }));
|
||||||
|
|
||||||
|
await runScript(ctx);
|
||||||
|
|
||||||
|
assert.strictEqual(ctx._res.statusCode, 200);
|
||||||
|
assert.strictEqual(ctx._res.headers['Content-Type'], 'application/xml');
|
||||||
|
assert.ok(ctx._res.body.includes('<urlset'), 'body should contain urlset');
|
||||||
|
assert.ok(!ctx._res.body.includes('<url>'), 'body should not contain url elements');
|
||||||
|
});
|
||||||
|
|
||||||
|
test('items with empty vkm:url filtered — only valid items appear', async (t) => {
|
||||||
|
const ctx = makeSitemapContext(t, async () => ({
|
||||||
|
data: {
|
||||||
|
'hydra:member': [
|
||||||
|
{ 'hydra:member': [{ 'vkm:url': '' }] },
|
||||||
|
{ 'hydra:member': [{ 'vkm:url': 'https://kme.example.com/valid' }] },
|
||||||
|
],
|
||||||
|
},
|
||||||
|
}));
|
||||||
|
|
||||||
|
await runScript(ctx);
|
||||||
|
|
||||||
|
assert.strictEqual(ctx._res.statusCode, 200);
|
||||||
|
const locMatches = ctx._res.body.match(/<loc>/g);
|
||||||
|
assert.strictEqual(locMatches?.length ?? 0, 1, 'exactly one <loc> element expected');
|
||||||
|
assert.ok(ctx._res.body.includes('valid'), 'the valid URL should appear in the loc');
|
||||||
|
});
|
||||||
|
|
||||||
|
// US3 error scenarios (T011b)
|
||||||
|
|
||||||
|
test('upstream 503 → 502 with Search service error message', async (t) => {
|
||||||
|
const searchErr = Object.assign(new Error('Request failed with status code 503'), {
|
||||||
|
response: { status: 503 },
|
||||||
|
});
|
||||||
|
const ctx = makeSitemapContext(t, async () => { throw searchErr; });
|
||||||
|
|
||||||
|
await runScript(ctx);
|
||||||
|
|
||||||
|
assert.strictEqual(ctx._res.statusCode, 502);
|
||||||
|
assert.ok(ctx._res.body.includes('Search service error: HTTP 503'), `body was: ${ctx._res.body}`);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('timeout ECONNABORTED → 504 Search service timeout', async (t) => {
|
||||||
|
const timeoutErr = Object.assign(new Error('timeout'), { code: 'ECONNABORTED' });
|
||||||
|
const ctx = makeSitemapContext(t, async () => { throw timeoutErr; });
|
||||||
|
|
||||||
|
await runScript(ctx);
|
||||||
|
|
||||||
|
assert.strictEqual(ctx._res.statusCode, 504);
|
||||||
|
assert.ok(ctx._res.body.includes('Search service timeout'), `body was: ${ctx._res.body}`);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('timeout ERR_CANCELED → 504 Search service timeout', async (t) => {
|
||||||
|
const timeoutErr = Object.assign(new Error('canceled'), { code: 'ERR_CANCELED' });
|
||||||
|
const ctx = makeSitemapContext(t, async () => { throw timeoutErr; });
|
||||||
|
|
||||||
|
await runScript(ctx);
|
||||||
|
|
||||||
|
assert.strictEqual(ctx._res.statusCode, 504);
|
||||||
|
assert.ok(ctx._res.body.includes('Search service timeout'), `body was: ${ctx._res.body}`);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('missing searchApiBaseUrl → 500 Configuration error', async (t) => {
|
||||||
|
const ctx = makeSitemapContext(t, null, { searchApiBaseUrl: undefined });
|
||||||
|
|
||||||
|
await runScript(ctx);
|
||||||
|
|
||||||
|
assert.strictEqual(ctx._res.statusCode, 500);
|
||||||
|
assert.strictEqual(ctx._res.body, 'Configuration error: missing required field: searchApiBaseUrl');
|
||||||
|
});
|
||||||
|
|
||||||
|
test('missing tenant → 500 Configuration error', async (t) => {
|
||||||
|
const ctx = makeSitemapContext(t, null, { tenant: undefined });
|
||||||
|
|
||||||
|
await runScript(ctx);
|
||||||
|
|
||||||
|
assert.strictEqual(ctx._res.statusCode, 500);
|
||||||
|
assert.strictEqual(ctx._res.body, 'Configuration error: missing required field: tenant');
|
||||||
|
});
|
||||||
|
|
||||||
|
test('missing proxyBaseUrl → 500 Configuration error', async (t) => {
|
||||||
|
const ctx = makeSitemapContext(t, null, { proxyBaseUrl: undefined });
|
||||||
|
|
||||||
|
await runScript(ctx);
|
||||||
|
|
||||||
|
assert.strictEqual(ctx._res.statusCode, 500);
|
||||||
|
assert.strictEqual(ctx._res.body, 'Configuration error: missing required field: proxyBaseUrl');
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Non-sitemap URL routing — regression guard (T009)
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
describe('non-sitemap URL routing', () => {
|
||||||
|
test('cache hit → no fetch → 200 Authorized', async (t) => {
|
||||||
|
const ctx = makeContext(t, {
|
||||||
|
req: { url: '/', method: 'GET', headers: {} },
|
||||||
|
axios: {
|
||||||
|
post: t.mock.fn(async () => { throw new Error('should not be called'); }),
|
||||||
|
get: t.mock.fn(),
|
||||||
|
},
|
||||||
|
});
|
||||||
|
// Pre-seed valid token
|
||||||
|
ctx._store['authorization:token'] = 'cached-tok';
|
||||||
|
ctx._store['authorization:expiry'] = '9999999999';
|
||||||
|
|
||||||
|
await runScript(ctx);
|
||||||
|
|
||||||
|
assert.strictEqual(ctx._res.statusCode, 200);
|
||||||
|
assert.strictEqual(ctx._res.body, 'Authorized');
|
||||||
|
// axios.post was set to throw, so if it was called the test would fail
|
||||||
|
});
|
||||||
|
|
||||||
|
test('cache miss → fresh fetch → 200 Authorized', async (t) => {
|
||||||
|
const ctx = makeContext(t, {
|
||||||
|
req: { url: '/', method: 'GET', headers: {} },
|
||||||
|
});
|
||||||
|
// No pre-seeded token → cache miss
|
||||||
|
|
||||||
|
await runScript(ctx);
|
||||||
|
|
||||||
|
assert.strictEqual(ctx._res.statusCode, 200);
|
||||||
|
assert.strictEqual(ctx._res.body, 'Authorized');
|
||||||
|
// Verify token was written to Redis
|
||||||
|
const hSetCalls = ctx._redis.hSet.mock.calls;
|
||||||
|
const tokenCall = hSetCalls.find(c => c.arguments[0] === 'authorization' && c.arguments[1] === 'token');
|
||||||
|
assert.ok(tokenCall, 'hSet should be called with token');
|
||||||
|
assert.strictEqual(tokenCall.arguments[2], 'mock-token');
|
||||||
|
});
|
||||||
|
|
||||||
|
test('token service down (ECONNABORTED) → 401 Unauthorized', async (t) => {
|
||||||
|
const timeoutErr = Object.assign(new Error('timeout'), { code: 'ECONNABORTED' });
|
||||||
|
const ctx = makeContext(t, {
|
||||||
|
req: { url: '/', method: 'GET', headers: {} },
|
||||||
|
axios: {
|
||||||
|
post: t.mock.fn(async () => { throw timeoutErr; }),
|
||||||
|
get: t.mock.fn(),
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
await runScript(ctx);
|
||||||
|
|
||||||
|
assert.strictEqual(ctx._res.statusCode, 401);
|
||||||
|
assert.ok(ctx._res.body.startsWith('Unauthorized:'), `body was: ${ctx._res.body}`);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|||||||
Reference in New Issue
Block a user