diff --git a/.github/agents/copilot-instructions.md b/.github/agents/copilot-instructions.md new file mode 100644 index 0000000..e2007a6 --- /dev/null +++ b/.github/agents/copilot-instructions.md @@ -0,0 +1,43 @@ +# google-drive-content-adapter Development Guidelines + +Auto-generated from all feature plans. Last updated: 2026-03-06 + +## Active Technologies +- Node.js v20.x LTS (or later), JavaScript ES2022+ with JSDoc type annotations + `googleapis` (36.x) - justified for OAuth 2.0 and Drive API integration (see Complexity Tracking) (001-drive-proxy-adapter) +- N/A - stateless proxy, no persistence layer (001-drive-proxy-adapter) +- Node.js 18.0.0+ (LTS), JavaScript ES2022+ with ES modules + googleapis (^140.0.0) - Google Drive API client library (001-drive-proxy-adapter) +- N/A (stateless proxy, no persistent storage) (001-drive-proxy-adapter) +- Node.js 18+ (LTS), JavaScript ES2022+ with ES modules + `googleapis` (Google Drive API client - only approved external dependency) (001-drive-proxy-adapter) +- N/A (stateless, no persistence - fetches fresh data from Drive API on each request) (001-drive-proxy-adapter) +- Node.js 18+ (LTS), JavaScript ES2022+ with ES modules + `googleapis` (Google Drive API + OAuth 2.0), Node.js built-ins only otherwise (001-drive-proxy-adapter) +- N/A (stateless, no persistence layer, no caching) (001-drive-proxy-adapter) +- JavaScript ES2022+ / Node.js 18 LTS or later + googleapis (Google Drive API v3 client), xmlbuilder2 (sitemap XML generation) (001-drive-proxy-adapter) +- N/A (no persistent storage, always fetch fresh from Google Drive API) (001-drive-proxy-adapter) +- JavaScript ES2022+ (Node.js LTS v18.0.0+) (001-drive-proxy-adapter) +- N/A (no persistence - sitemap generated on-demand from Drive API) (001-drive-proxy-adapter) + +- Node.js v20.x LTS (with fallback support for v18.x LTS) (001-drive-proxy-adapter) + +## Project Structure + +```text +src/ +tests/ +``` + +## Commands + +# Add commands for Node.js v20.x LTS (with fallback support for v18.x LTS) + +## Code Style + +Node.js v20.x LTS (with fallback support for v18.x LTS): Follow standard conventions + +## Recent Changes +- 001-drive-proxy-adapter: Added JavaScript ES2022+ (Node.js LTS v18.0.0+) +- 001-drive-proxy-adapter: Added JavaScript ES2022+ / Node.js 18 LTS or later + googleapis (Google Drive API v3 client), xmlbuilder2 (sitemap XML generation) +- 001-drive-proxy-adapter: Added Node.js 18+ (LTS), JavaScript ES2022+ with ES modules + `googleapis` (Google Drive API + OAuth 2.0), Node.js built-ins only otherwise + + + + diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md new file mode 100644 index 0000000..49871d6 --- /dev/null +++ b/.github/copilot-instructions.md @@ -0,0 +1,215 @@ +# Copilot Instructions: Google Drive Content Adapter + +This project uses **Specify** (Spec-Driven Development workflow) to manage feature development through structured specifications, planning, and task execution. + +## Project Status + +**Early Stage**: This repository was recently initialized from the Specify template. No production code exists yet—only the Specify workflow infrastructure. + +## Workflow Commands + +All feature development follows the Specify workflow using specialized agents: + +### Feature Lifecycle + +1. **Create specification**: `/speckit.specify [feature description]` + - Generates `specs/[###-feature-name]/spec.md` with user stories, requirements, and edge cases + - Creates feature branch + +2. **Create implementation plan**: `/speckit.plan` + - Generates `plan.md` with tech stack, architecture, and project structure + - Produces research artifacts and API contracts + - Must pass constitution checks before proceeding + +3. **Generate tasks**: `/speckit.tasks` + - Creates `tasks.md` with dependency-ordered implementation tasks + - Groups tasks by user story for independent implementation + - Enables parallel development where possible + +4. **Execute implementation**: `/speckit.implement` + - Processes tasks from `tasks.md` + - Checks checklists before proceeding (if any exist) + - Follows TDD workflow when tests are specified + +### Supporting Commands + +- `/speckit.checklist`: Generate custom checklists (UX, security, performance, etc.) +- `/speckit.clarify`: Identify underspecified areas and encode answers back into spec +- `/speckit.analyze`: Cross-artifact consistency analysis (spec/plan/tasks) +- `/speckit.taskstoissues`: Convert tasks.md into GitHub issues +- `/speckit.constitution`: Create/update project constitution + +## File Structure + +``` +.specify/ +├── memory/ +│ └── constitution.md # Project principles and standards +├── scripts/bash/ +│ ├── check-prerequisites.sh # Validate workflow state +│ ├── create-new-feature.sh # Initialize new feature branch +│ └── update-agent-context.sh # Sync agent prompts with templates +└── templates/ # Templates for spec, plan, tasks, checklists + +.github/ +├── agents/ # Agent definitions for each workflow step +└── prompts/ # Agent prompt templates + +specs/[###-feature-name]/ # Feature-specific documentation +├── spec.md # Feature specification +├── plan.md # Implementation plan +├── tasks.md # Task list +├── research.md # Technical research +├── data-model.md # Data models +├── quickstart.md # Getting started guide +├── contracts/ # API contracts +└── checklists/ # Custom checklists +``` + +## Constitution (MANDATORY) + +The project constitution at `.specify/memory/constitution.md` defines **non-negotiable principles**: + +### Core Principles + +1. **Modular Architecture**: Discrete modules with clear boundaries, independently testable +2. **API-First Design**: Document APIs before implementation; follow RESTful principles +3. **Test-First Development (NON-NEGOTIABLE)**: + - Write failing tests first + - Get user approval of test scenarios + - Implement minimum code to pass + - Maintain 80%+ code coverage +4. **Security & Privacy by Default**: Encrypt sensitive data, use OAuth 2.0, implement least privilege +5. **Observability & Debuggability**: Structured logs, request tracing, performance metrics +6. **Semantic Versioning**: MAJOR.MINOR.PATCH with migration guides for breaking changes +7. **Simplicity & YAGNI**: Implement only demonstrated needs; justify complexity + +### Quality Gates (ALL must pass before merge) + +- ✅ All tests pass (unit, integration, e2e) +- ✅ Code coverage ≥ 80% +- ✅ No critical security vulnerabilities +- ✅ Documentation updated +- ✅ Performance regression checks pass + +### API Standards + +- Accept/return JSON +- Use HTTP methods semantically (GET, POST, PUT, DELETE, PATCH) +- Return appropriate status codes +- Include rate limiting headers +- Version endpoints explicitly (`/v1/`, `/v2/`) +- Document with OpenAPI/Swagger + +## Helper Scripts + +### check-prerequisites.sh + +Validates workflow state before agent execution: + +```bash +# JSON output with feature directory and available docs +.specify/scripts/bash/check-prerequisites.sh --json + +# Require tasks.md exists (for implementation phase) +.specify/scripts/bash/check-prerequisites.sh --json --require-tasks --include-tasks + +# Output just path variables +.specify/scripts/bash/check-prerequisites.sh --paths-only +``` + +### create-new-feature.sh + +Initialize new feature branch and directory structure: + +```bash +.specify/scripts/bash/create-new-feature.sh "feature description" +``` + +## Development Workflow + +### Starting a New Feature + +1. Run `.specify/scripts/bash/create-new-feature.sh "feature description"` OR use `/speckit.specify` +2. Feature branch created: `###-feature-name` +3. Directory created: `specs/###-feature-name/` + +### Implementation Flow + +``` +spec.md (requirements) + → plan.md (architecture + tech stack) + → tasks.md (implementation tasks) + → implementation (code + tests) +``` + +### Task Organization + +Tasks in `tasks.md` are organized by: +- **Phase 1: Setup** - Project initialization +- **Phase 2: Foundational** - Core infrastructure (blocking for all stories) +- **Phase 3+: User Stories** - Grouped by story priority (P1, P2, P3...) + - Tests written FIRST (must fail before implementation) + - Implementation follows tests + - Each story independently testable + +Tasks marked `[P]` can run in parallel (different files, no dependencies). + +### Checklist Validation + +If checklists exist in `specs/[feature]/checklists/`: +- `/speckit.implement` checks completion status before proceeding +- All items must be checked off `[x]` unless user approves proceeding anyway +- Common checklist types: UX, security, performance, accessibility + +## Key Conventions + +### User Stories + +- **Prioritized** (P1, P2, P3...) by importance +- **Independently testable** - each story is a standalone MVP slice +- Include acceptance scenarios in Given-When-Then format +- Specify why each priority level was chosen + +### Test-Driven Development + +1. Write tests FIRST (contract → integration → unit) +2. Ensure tests FAIL before implementation +3. Get user approval on test scenarios +4. Implement minimum code to pass +5. Refactor while maintaining green tests + +### Naming Conventions + +- Feature branches: `###-feature-name` (auto-numbered) +- Requirements: `FR-001`, `FR-002`, etc. +- Tasks: `T001`, `T002`, etc. +- Checklist items: `CHK001`, `CHK002`, etc. +- Mark unclear items: `[NEEDS CLARIFICATION: reason]` + +## Working with Agents + +### Agent Context + +Agents load context from: +- Constitution (`constitution.md`) +- Templates (`.specify/templates/*.md`) +- Feature docs (`specs/[feature]/*.md`) + +When templates change, run: +```bash +.specify/scripts/bash/update-agent-context.sh +``` + +### Agent Auto-Approval + +VS Code auto-approves scripts in `.specify/scripts/bash/` and `.specify/scripts/powershell/` (see `.vscode/settings.json`). + +## Important Notes + +- **No production code exists yet** - implement according to constitution principles +- **Always check constitution** before making architectural decisions +- **Write tests first** - TDD is non-negotiable per constitution +- **Document APIs** before implementing them +- **Use Specify agents** for feature work rather than ad-hoc implementation +- **Validate prerequisites** with `check-prerequisites.sh` before running agents diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..11f0b83 --- /dev/null +++ b/.gitignore @@ -0,0 +1,34 @@ +# Dependencies +node_modules/ + +# Environment variables +.env +.env.local +.env.*.local + +# Service Account credentials (NEVER commit!) +global/*.json + +# Logs +*.log +npm-debug.log* +yarn-debug.log* +yarn-error.log* + +# OS files +.DS_Store +Thumbs.db + +# IDE +.vscode/ +.idea/ +*.swp +*.swo +*~ + +# Test coverage +coverage/ + +# Temporary files +*.tmp +.temp/ diff --git a/.specify/memory/constitution.md b/.specify/memory/constitution.md index a4670ff..b170cea 100644 --- a/.specify/memory/constitution.md +++ b/.specify/memory/constitution.md @@ -1,50 +1,755 @@ -# [PROJECT_NAME] Constitution - + + +# Proxy Scripts Constitution ## Core Principles -### [PRINCIPLE_1_NAME] - -[PRINCIPLE_1_DESCRIPTION] - +### I. Monolithic Architecture (NON-NEGOTIABLE) -### [PRINCIPLE_2_NAME] - -[PRINCIPLE_2_DESCRIPTION] - +**ALL business logic, data processing, authentication, and request handling MUST exist within the `src/proxyScripts/proxy.js` file.** Pure utility/helper functions MAY be extracted to `src/globalVariables/googleDriveAdapterHelper.js` if they improve code organization. The `server.js` file should ONLY handle: +- HTTP server setup +- Configuration loading +- Global object injection into isolated context +- Loading src/proxyScripts/proxy.js via `vm.Script` and `vm.createContext` +- Loading src/globalVariables/googleDriveAdapterHelper.js via `vm.Script` (optional) +- Per-request context creation with all necessary globals -### [PRINCIPLE_3_NAME] - -[PRINCIPLE_3_DESCRIPTION] - +**Implementation via vm.Script**: +`src/proxyScripts/proxy.js` MUST be loaded using Node.js `vm.Script` and executed in isolated contexts created per-request with `vm.createContext`. This ensures: +- Complete isolation from server.js module system +- All dependencies provided explicitly through context objects +- Zero ability to import/export modules +- Pure functional execution with injected dependencies -### [PRINCIPLE_4_NAME] - -[PRINCIPLE_4_DESCRIPTION] - +**Rationale**: Monolithic architecture enables simple packaging as a single IVA Studio proxy script and prevents fragmentation of business logic across multiple files. Using `vm.Script` enforces architectural boundaries at runtime, making it impossible for `src/proxyScripts/proxy.js` to access Node.js module system or file system, ensuring ALL functionality exists in one isolated, dependency-injected file. -### [PRINCIPLE_5_NAME] - -[PRINCIPLE_5_DESCRIPTION] - +**Helper Functions Pattern**: Pure utility functions (XML escaping, validation, formatting, routing) MAY be extracted to `src/globalVariables/googleDriveAdapterHelper.js` to improve readability and maintainability. The helpers module: +- MUST be loaded via `vm.Script` (same isolation as proxy.js) +- MUST evaluate to a single JavaScript object with all helper functions +- MUST have ZERO imports/exports +- Is injected as `googleDriveAdapterHelper` global object into VM context +- Contains ONLY pure utilities, NO business logic or state -## [SECTION_2_NAME] - +### I. Zero External Imports or Exports from `src/proxyScripts/proxy.js` (NON-NEGOTIABLE) -[SECTION_2_CONTENT] - +`src/proxyScripts/proxy.js` MUST have **ZERO import statements**. All dependencies MUST be provided through `vm.createContext` by server.js. -## [SECTION_3_NAME] - +`src/proxyScripts/proxy.js` MUST have **ZERO export statements**. The file MUST be pure JavaScript code executed in an isolated VM context. -[SECTION_3_CONTENT] - +**File system access** from `src/proxyScripts/proxy.js` is **ABSOLUTELY PROHIBITED** under any circumstances. The `fs` module MUST NOT be accessible. + +**External libraries** (axios, jwt, googleapis, etc.) MUST NOT be imported. Dependencies are injected through VM context by server.js. + +**Rationale**: Using `vm.Script` and `vm.createContext` enforces architectural boundaries at the VM level. src/proxyScripts/proxy.js runs in an isolated context with NO access to Node.js module system, file system, or process globals. ALL dependencies must be explicitly injected per-request through the context object, ensuring src/proxyScripts/proxy.js contains ONLY pure business logic with zero capability for I/O operations. + +**For data files that src/proxyScripts/proxy.js needs** (service account keys, certificates, secrets): +1. Place JSON files in `src/globalVariables/` directory +2. server.js loads them at startup using `loadGlobalObjects()` +3. server.js injects them into VM context per-request via `vm.createContext` +4. src/proxyScripts/proxy.js accesses them as simple variables in context (e.g., `google_drive_settings`) + +**Example**: +- File: `src/globalVariables/google_drive_settings.json` +- Loading: server.js reads and assigns to `globalVariableContext.google_drive_settings` +- Injection: server.js adds `google_drive_settings: globalVariableContext.google_drive_settings` to context +- Access in src/proxyScripts/proxy.js: Direct variable access `google_drive_settings.serviceAccount` + +**Enforcement**: +- src/proxyScripts/proxy.js MUST have NO `import` statements (file should start with comments, then code) +- src/proxyScripts/proxy.js MUST have NO `export` statements (no module.exports, no export keyword) +- Any `import` or `export` in src/proxyScripts/proxy.js MUST be rejected immediately +- server.js MUST load src/proxyScripts/proxy.js using `vm.Script` constructor +- server.js MUST execute via `script.runInContext(context)` with fresh context per request +- All dependencies injected through `vm.createContext({ ... })` context object +- VM isolation prevents access to require(), import(), fs, process, and Node.js globals + + +#### I.I What MUST Be in src/proxyScripts/proxy.js + +The following MUST be implemented in `src/proxyScripts/proxy.js` (or extracted to googleDriveAdapterHelper.js if pure utilities): + +1. **Authentication**: Service Account JWT, OAuth flows, token management (MUST be in proxy.js) +2. **Business Logic**: All request handling, routing, and processing (MUST be in proxy.js) +3. **Data Transformation**: Document parsing, XML generation, data mapping (MUST be in proxy.js or googleDriveAdapterHelper.js) +4. **API Integration**: Drive API queries, error mapping, response handling (MUST be in proxy.js) +5. **Request Queue**: FIFO queue for sequential processing (MUST be in proxy.js) +6. **Utility Functions**: Request ID generation, validation, XML escaping, date formatting (MAY be in googleDriveAdapterHelper.js) +7. **Error Handling**: All error mapping and HTTP status code logic (MAY be in googleDriveAdapterHelper.js) + +**Helper Extraction Guidelines**: +- ✅ **CAN extract**: Pure functions, validators, formatters, XML utilities, error mappers, route parsers +- ❌ **MUST NOT extract**: Authentication, API calls, request queue, cached state, business decisions + +**NO EXCEPTIONS for business logic** - Even complex authentication (OAuth 2.0, JWT) must be in proxy.js. + +#### I.II What Can Be Separate Files + +ONLY the following infrastructure modules may exist outside `src/proxyScripts/proxy.js`: + +1. **src/logger.js**: Structured logging with console replacement (ONLY logging, no business logic) +2. **src/server.js**: HTTP server bootstrap and configuration (ONLY server setup, no business logic) +3. **config/**: JSON configuration files (data files, not code) +4. **src/globalVariables/**: JSON data files AND googleDriveAdapterHelper.js module + - `*.json`: Runtime data loaded at startup (credentials, settings) + - `googleDriveAdapterHelper.js`: Pure utility functions loaded via vm.Script (OPTIONAL) +5. **src/proxyScripts/**: Directory containing the main proxy script (proxy.js) + +**Test files are exempt** - Test utilities may exist solely for test compatibility if needed, but MUST NOT be imported by production code. + +**File Structure**: +``` +src/ +├── proxyScripts/ +│ └── proxy.js # Main business logic (authentication, API, queue) +├── globalVariables/ +│ ├── *.json # Data files for VM context +│ └── googleDriveAdapterHelper.js # Pure utility functions (OPTIONAL) +├── logger.js # Structured logging +└── server.js # HTTP server bootstrap +config/ +└── default.json # Infrastructure settings +``` + +**googleDriveAdapterHelper.js Pattern (Literal Function Body)**: +- MUST be loaded using `vm.Script` (same isolation as proxy.js) +- MUST contain LITERAL FUNCTION BODY with `return` statement (NOT valid standalone JS) +- server.js wraps it: `(function() { })()` +- Function body returns a single JavaScript object containing all exports +- MUST have ZERO imports/exports (pure vm.Script execution) +- Loaded by `loadGlobalVariables()` which scans for both JSON and JS files +- Filename determines global key: `googleDriveAdapterHelper.js` → `globalVariableContext.googleDriveAdapterHelper` +- Injected as `googleDriveAdapterHelper` global object into VM context +- Contains ONLY pure utilities: validators, formatters, XML, error mappers +- MUST NOT contain: authentication, API calls, state, business decisions +- Executed in context with full access to globalVMContext and globalVariableContext + +#### I.III Enforcement + +During code review and planning: +- ANY file in `src/proxyScripts/` besides `proxy.js` MUST be challenged +- ANY file in `src/globalVariables/` besides `googleDriveAdapterHelper.js` and `*.json` MUST be challenged +- ANY file in `src/` besides `proxyScripts/`, `globalVariables/`, `logger.js`, `server.js` MUST be challenged +- Authentication, even if complex, MUST be in `src/proxyScripts/proxy.js` (never in googleDriveAdapterHelper.js) +- Business logic MUST be in `src/proxyScripts/proxy.js` (never in googleDriveAdapterHelper.js) +- Exceptions require explicit constitutional justification with measurable trade-offs +- When in doubt about helpers extraction, keep it in `src/proxyScripts/proxy.js` + +**RED FLAGS to reject immediately:** +- Separate files for: auth, database, utilities, helpers, services, controllers, models +- Any file containing business logic or domain knowledge +- Multiple files "organizing" the codebase + +#### I.IV Configuration + +- Configuration for the Node.js web server infrastructure should be stored as JSON in `config/default.json`. +- `config/default.json` MUST contain ONLY infrastructure settings: server (host, port), logging level +- `config/default.json` MUST NOT contain authentication credentials, secrets, API keys, or behavioral configuration +- Authentication credentials, secrets, and ALL behavioral configuration MUST be stored in `src/globalVariables/` directory as JSON files +- Global JSON files are automatically loaded by server.js and made available as global objects +- server.js should validate both configuration from `config/default.json` AND global objects from `src/globalVariables/` directory + +#### I.V Global Objects Provided by server.js + +The `server.js` file MUST inject the following objects into VM context for use by `src/proxyScripts/proxy.js`: + +**VM Context Injection Pattern:** + +server.js uses a spread operator pattern for cleaner context creation: + +```javascript +// Define static VM context (libraries and built-ins) +const globalVMContext = { + URLSearchParams, + URL, + console: logger, + crypto, + axios, + uuidv4, + jwt, + xmlBuilder, +}; + +// Load dynamic data from src/globalVariables/ directory +let globalVariableContext = {}; // Populated by loadGlobalVariables() + +// Load all global variables (JSON data + JS function modules) at startup +loadGlobalVariables(); +// Pattern: +// 1. Single-pass scan of globalVariables/ for *.json and *.js files (excluding *.example.*) +// 2. Categorize into jsonFiles and jsFiles arrays +// 3. Load JSON files first (data) → globalVariableContext[filename] +// 4. Load JS files second (functions) → globalVariableContext[filename] +// 5. JS files execute in context with {...globalVMContext, ...globalVariableContext} +// Example: googleDriveAdapterHelper.js returns object → globalVariableContext.helpers = object +// Example: google_drive_settings.json → globalVariableContext.google_drive_settings = data + +// Per-request: Create fresh context with all dependencies +const context = vm.createContext({ + ...globalVMContext, // Spread static dependencies + ...globalVariableContext, // Spread dynamic data (JSON + function modules) + req, // Fresh request object + res // Fresh response object +}); +script.runInContext(context); +``` + +**Note:** src/proxyScripts/proxy.js accesses these as direct variables (e.g., `google_drive_settings`, not `globalThis["google_drive_settings"]`). The VM context makes all properties available as top-level variables. + +**Core Infrastructure Context Variables:** + +1. **console** - Custom logger from `logger.js` + - Purpose: Structured JSON logging + - Usage: `console.info()`, `console.debug()`, `console.error()` + - Injected from: `globalVMContext.console` (set to `logger`) + +2. **crypto** - Web Crypto API (built-in) + - Purpose: UUID generation, cryptographic operations + - Usage: `crypto.randomUUID()`, etc. + - Injected from: `globalVMContext.crypto` + - Note: Web Crypto API available by default in Node.js + +3. **axios** - HTTP client library + - Purpose: Making HTTP requests to external APIs + - Usage: `axios.get(url)`, `axios.post(url, data)` + - Package: `axios` + - Injected from: `globalVMContext.axios` + +4. **uuidv4** - UUID v4 generator + - Purpose: Generate RFC4122 compliant UUIDs + - Usage: `uuidv4()` returns string like "110ec58a-a0f2-4ac4-8393-c866d813b8d1" + - Package: `uuid` (v4 function only) + - Injected from: `globalVMContext.uuidv4` + +5. **jwt** - JSON Web Token library + - Purpose: Creating and verifying JWTs for authentication + - Usage: `jwt.sign(payload, secret)`, `jwt.verify(token, secret)` + - Package: `jsonwebtoken` + - Injected from: `globalVMContext.jwt` + +6. **xmlBuilder** - XML builder/generator + - Purpose: Constructing XML documents programmatically + - Usage: `xmlBuilder({ root: { child: 'value' } })` + - Package: `xmlbuilder2` (create function) + - Injected from: `globalVMContext.xmlBuilder` + +**Built-in Web APIs:** + +7. **URLSearchParams** - URL query string parser (built-in) + - Purpose: Parse and manipulate URL query strings + - Usage: `new URLSearchParams(queryString)` + - Injected from: `globalVMContext.URLSearchParams` + +8. **URL** - URL parser (built-in) + - Purpose: Parse and manipulate URLs + - Usage: `new URL(urlString)` + - Injected from: `globalVMContext.URL` + - Note: Currently not included in globalVMContext but available in Node.js by default + +**Dynamic Data Context Variables:** + +9. **Dynamic JSON objects from src/globalVariables/ directory** + - Purpose: Authentication credentials, secrets, API keys, and behavioral configuration + - Pattern: Each `src/globalVariables/filename.json` loaded by server.js → added to `globalVariableContext` → spread into VM context + - Examples: + - `src/globalVariables/google_drive_settings.json` → context variable `google_drive_settings` (consolidated service account, scopes, drive query, sitemap config) + - `src/globalVariables/api-keys.json` → context variable `api_keys` (API keys and secrets) + - `src/globalVariables/custom-config.json` → context variable `custom_config` (behavioral settings) + - Usage in src/proxyScripts/proxy.js: Direct variable access `const settings = google_drive_settings;` + - Loading: By server.js at startup using `loadGlobalObjects()` function + - Injection: Via spread operator `...globalVariableContext` in `vm.createContext()` + - **Note**: ALL authentication, secrets, and behavioral configuration MUST be in src/globalVariables/, NEVER in config/default.json + +**Helper Functions Module:** + +10. **googleDriveAdapterHelper** - Pure utility functions object (OPTIONAL) + - Purpose: Extracted helper functions for code organization + - Source: `src/globalVariables/googleDriveAdapterHelper.js` loaded via `vm.Script` + - Loading: server.js loads via `loadGlobalVariables()` at startup + - **Literal Function Body Pattern**: + - File contains LITERAL BODY of a function (NOT valid standalone JavaScript) + - Uses bare `return {...}` statement to export object + - server.js wraps it: `const wrappedCode = '(function() {\n' + code + '\n})()'` + - Creates IIFE that executes function body and captures returned object + - Pattern separates content (file) from execution wrapper (server) + - Example file structure: + ```javascript + // File: src/globalVariables/googleDriveAdapterHelper.js + // This is a LITERAL FUNCTION BODY (not valid standalone JS) + + class DocumentCountExceededError extends Error { + constructor(message) { + super(message); + this.name = 'DocumentCountExceededError'; + } + } + + function generateRequestId() { + return `req_${crypto.randomUUID()}`; + } + + // ... more functions ... + + return { + DocumentCountExceededError, + generateRequestId, + // ... all exports + }; + ``` + - server.js wrapping logic: + ```javascript + const wrappedCode = `(function() {\n${code}\n})()`; + const script = new vm.Script(wrappedCode, { filename: file }); + const context = vm.createContext({...globalVMContext, ...globalVariableContext}); + const returnedObject = script.runInContext(context); + globalVariableContext[varName] = returnedObject; + ``` + - Generic Loading Pattern: + - All .js files in globalVariables/ are loaded automatically + - Filename determines key: `googleDriveAdapterHelper.js` → `globalVariableContext.googleDriveAdapterHelper` + - Executed in tempContext with `{...globalVMContext, ...globalVariableContext}` + - Can access all VM globals: axios, jwt, crypto, console, etc. + - Can access previously loaded JSON data and function modules + - Injection: Spread into VM context via `...globalVariableContext` + - Usage in src/proxyScripts/proxy.js: `googleDriveAdapterHelper.functionName()` (e.g., `googleDriveAdapterHelper.generateRequestId()`) + - Contains: Pure utilities only (validators, formatters, XML, error mappers, route parsers) + - MUST NOT contain: Authentication, API calls, state, business logic + - Example functions: + - `googleDriveAdapterHelper.generateRequestId()` - UUID generation + - `googleDriveAdapterHelper.validateDocumentId(id)` - Document ID validation + - `googleDriveAdapterHelper.escapeXml(str)` - XML character escaping + - `googleDriveAdapterHelper.generateSitemap(docs, baseUrl)` - Sitemap generation + - `googleDriveAdapterHelper.mapDriveErrorToHttp(error)` - Error mapping + - `googleDriveAdapterHelper.parseRoute(method, url)` - Route parsing + - `googleDriveAdapterHelper.DocumentCountExceededError` - Custom error class + - Generic Pattern Note: You can add more .js files (e.g., `utils.js`, `validators.js`) + and they will be automatically loaded as `globalVariableContext.utils`, etc. + +**Request/Response Objects:** + +11. **req** - HTTP IncomingMessage + - Purpose: Access request data (URL, method, headers, body) + - Injected fresh: Per-request from `http.createServer((req, res) => ...)` + +12. **res** - HTTP ServerResponse + - Purpose: Send response to client + - Injected fresh: Per-request from `http.createServer((req, res) => ...)` + +**Rationale**: Using `vm.createContext` with spread operator pattern for dependency injection achieves: +- **Runtime-enforced isolation** - src/proxyScripts/proxy.js physically cannot access Node.js module system or file system +- **Zero imports possible** - VM context has no `require()` or `import()` capability +- **Explicit dependencies** - All available objects must be explicitly listed in globalVMContext or globalVariableContext +- **Clean organization** - Static dependencies (globalVMContext) separated from dynamic data (globalVariableContext) +- **Per-request isolation** - Fresh context per request prevents cross-request state leakage +- **Testing simplicity** - Mock entire context object instead of individual module imports +- **Clear contracts** - Context spread pattern documents every dependency src/proxyScripts/proxy.js uses +- **Security boundaries** - VM sandbox prevents escape to underlying system +- **DRY principle** - Spread operators eliminate repetitive property declarations + + +#### I.VI Logging + +Modify server.js to replace the global `console` object with the `logger` export from `logger.js`. This will make all console.log, console.info, console.error calls throughout the application use the custom logger. + +Logging should use `logger.js` module that has the following functions: + +- log - which defaults to the 'info' function +- info - which writes to stdout +- debug - which prefixes the output with "[DEBUG]" written in red font and writes to stdout +- error - which prefixes the output with "[ERROR]" written in red font and writes to stderr + +### II. API-First Design + +Every feature MUST expose a clear, documented API before implementation begins. APIs MUST follow RESTful principles where applicable, use consistent naming conventions, and include comprehensive error handling with meaningful status codes and messages. + +**Rationale**: API-first design ensures contracts are stable, enables parallel front-end/back-end work, facilitates integration testing, and produces naturally documented systems. + +### III. Test-First Development (NON-NEGOTIABLE) + +Test-Driven Development is MANDATORY for all production code. The cycle MUST be: + +1. Write failing tests +2. Obtain user approval of test scenarios +3. Implement minimum code to pass tests +4. Refactor while maintaining green tests + +Unit tests MUST achieve minimum 80% code coverage. Integration tests MUST cover all API contracts and critical user flows. + +**Rationale**: TDD catches defects early, documents expected behavior, enables confident refactoring, and ensures all code paths are exercised. + +### IV. Security & Privacy by Default + +All user data MUST be treated as sensitive. OAuth tokens, credentials, and personal information MUST be encrypted at rest and in transit. The principle of least privilege MUST govern all access controls. Audit logging MUST track all data access and modifications. + +**Rationale**: Privacy violations damage trust and carry legal liability. Security must be foundational, not retrofitted. + +### V. Observability & Debuggability + +All operations MUST emit structured logs with appropriate severity levels (DEBUG, INFO, WARN, ERROR). Errors MUST include context (request IDs, user IDs, operation names) sufficient for diagnosis. Performance-critical paths MUST expose metrics (latency, throughput, error rates). + +**Rationale**: Production issues are inevitable. Observable systems reduce mean time to resolution and enable proactive problem detection. + +### VI. Semantic Versioning & Change Management + +All public APIs MUST follow semantic versioning (MAJOR.MINOR.PATCH): + +- MAJOR: Breaking changes that require consumer updates +- MINOR: Backward-compatible feature additions +- PATCH: Backward-compatible bug fixes + +Breaking changes MUST include migration guides and deprecation notices for at least one MINOR version before removal. + +**Rationale**: Clear versioning communicates impact, enables safe upgrades, and respects downstream consumers' need for stability. + +### VII. Simplicity, Minimal Dependencies & YAGNI + +Implement only features with demonstrated need. Choose the simplest solution that satisfies current requirements. Reject premature optimization and speculative features. Complexity MUST be explicitly justified with measurable benefits. + +**Dependency Minimization**: Prefer Node.js built-in modules over external npm packages. Each external dependency MUST be justified by: + +- Significant functionality that would take >2 days to implement correctly +- Active maintenance and security track record +- Clear, documented benefit that outweighs maintenance risk + +Prohibited without explicit approval: + +- Utility libraries for functionality Node.js provides natively (fs, path, crypto, http, etc.) +- Heavy framework dependencies when lightweight alternatives exist +- Multiple packages solving the same problem + +**Rationale**: External dependencies introduce supply chain risk, increase bundle size, complicate auditing, and create maintenance burden. Node.js built-ins are stable, well-tested, and maintained by the platform. + +## API Design Standards + +All external APIs MUST: + +- Accept and return JSON for structured data +- Use standard HTTP methods (GET, POST, PUT, DELETE, PATCH) semantically +- Return appropriate HTTP status codes (2xx success, 4xx client errors, 5xx server errors) +- Include rate limiting headers where applicable +- Version endpoints explicitly (e.g., /v1/, /v2/) +- Document all parameters, responses, and error codes using OpenAPI/Swagger + +Response formats MUST be consistent and include: + +- Timestamp of response generation +- Request correlation ID for tracing +- Pagination metadata for list operations +- Clear error messages with actionable guidance + +## Security & Data Protection + +Authentication & Authorization MUST: + +- Never log or expose credentials, tokens, or API keys +- Validate all input to prevent injection attacks +- Apply rate limiting to prevent abuse + +Data Handling MUST: + +- Minimize data retention—delete temporary files promptly +- Encrypt sensitive data using industry-standard algorithms (AES-256 or equivalent) +- Sanitize all user-supplied content before processing +- Implement CSRF protection for web interfaces + +## Development Workflow + +Code Reviews MUST: + +- Verify alignment with all constitutional principles +- Check test coverage meets minimum thresholds +- Validate API contracts match documentation +- Confirm security best practices are followed + +Quality Gates (ALL must pass before merge): + +- All tests pass (unit, integration, end-to-end) +- Code coverage ≥ 80% +- No critical security vulnerabilities (use automated scanning) +- Documentation updated for API/behavior changes +- Performance regression checks pass + +Deployment MUST: + +- Use automated CI/CD pipelines +- Include smoke tests post-deployment +- Support rollback within 5 minutes +- Include release notes documenting all changes + +## Technology Stack + +**Platform**: Node.js (LTS version or later) + +**Mandatory Baseline**: + +- Use Node.js built-in modules as first choice (fs, path, crypto, http, https, stream, util, url, querystring, etc.) +- **DO NOT use 'events' EventEmitter** - implement simple patterns directly (e.g., Promise-based queues) +- Plain JavaScript (ES2022+) without TypeScript +- JSDoc comments for type documentation where needed +- JavaScript tooling (ESLint, Prettier) does not count against dependency budget +- Native test runner (node:test) or minimal test framework + +**Dependency Approval Process**: +Any external npm package (excluding JavaScript tooling like ESLint and Prettier) MUST be justified in the feature specification with: + +1. **Functionality gap**: What Node.js built-ins cannot do +2. **Implementation cost**: Estimated effort to build vs. maintain dependency +3. **Risk assessment**: Package security, maintenance history, download stats +4. **Alternatives considered**: Why alternatives were rejected + +**Examples of acceptable dependencies** (when justified): + +- xmlbuilder2 +- axios +- uuid + +**Examples of prohibited dependencies** (use Node.js built-ins or inline implementations instead): + +- lodash/underscore (use native Array/Object methods) +- moment/date-fns (use native Date, Intl.DateTimeFormat) +- rimraf (use fs.rm with recursive: true) +- mkdirp (use fs.mkdir with recursive: true) +- **EventEmitter from 'events'** (implement simple queue classes directly - no need for event system) +- express/fastify (use native http/https for simple servers + +**Node.js built-in modules to prefer:** +- Use 'node:' prefix for clarity: `import crypto from 'node:crypto'` +- Acceptable built-ins: fs, path, crypto, http, https, stream, util, url, querystring, etc. +- NOT acceptable: 'events' EventEmitter - implement patterns directly without event system + +IMPORTANT : All dependencies that are not acceptable must be approved when running plan and task agents ## Governance - -[GOVERNANCE_RULES] - +This constitution supersedes all other development practices and guidelines. When conflicts arise between this document and team conventions, the constitution takes precedence. -**Version**: [CONSTITUTION_VERSION] | **Ratified**: [RATIFICATION_DATE] | **Last Amended**: [LAST_AMENDED_DATE] - +Amendments to this constitution require: + +1. Documented justification explaining the need for change +2. Impact analysis of affected systems and workflows +3. Approval from project maintainers +4. Migration plan for any breaking changes +5. Update of version number following semantic versioning rules + +All pull requests, code reviews, and design discussions MUST verify compliance with constitutional principles. Exceptions MUST be rare, explicitly justified with measurable trade-offs, and documented in the relevant specification or plan. + +For runtime development guidance, refer to `.github/prompts/` and `.github/agents/` files which operationalize these principles into agent workflows. + +**Version**: 1.14.0 | **Ratified**: 2026-03-05 | **Last Amended**: 2026-03-07 diff --git a/README.md b/README.md new file mode 100644 index 0000000..7b95d6d --- /dev/null +++ b/README.md @@ -0,0 +1,278 @@ +# Google Drive Sitemap Adapter + +HTTP service that generates XML sitemaps listing all accessible documents in a Google Drive account. Uses Service Account authentication for secure, automated access. + +## Features + +- **Sitemap Generation**: XML sitemap at `/sitemap.xml` listing all accessible Google Drive documents +- **RESTful URLs**: Document links in format `/documents/{documentId}` per sitemap protocol +- **Service Account Auth**: JWT-based authentication using Google Service Account credentials +- **Pagination Support**: Handles large document sets (up to 50,000 URLs per sitemap protocol) +- **50k Limit Enforcement**: Returns 413 error if document count exceeds sitemap protocol limit +- **FIFO Request Queue**: Concurrent requests processed sequentially (one at a time) +- **Rate Limit Handling**: Returns 429 with Retry-After header when Drive API rate limits +- **No Retry on 503**: Fails immediately on Drive API unavailability (per spec) +- **Minimal Dependencies**: Only `googleapis` package required + +## Quick Start + +### Prerequisites + +- Node.js v18.x or later +- Google Cloud Project with Drive API enabled +- Service Account credentials with Drive API access + +### Setup + +1. **Install dependencies**: + ```bash + npm install + ``` + +2. **Configure Service Account** (see `specs/001-drive-proxy-adapter/quickstart.md` for detailed steps): + - Create Service Account in Google Cloud Console + - Download service account key JSON file + - Share Drive files/folders with service account email + - Place key file at `config/service-account-key.json` + +3. **Configure environment**: + ```bash + cp .env.example .env + # Edit .env with your service account email + ``` + +4. **Start the server**: + ```bash + npm start + # or for development with auto-reload: + npm run dev + ``` + +5. **Generate sitemap**: + ```bash + curl http://localhost:3000/sitemap.xml + ``` + +### Usage Examples + +```bash +# Get sitemap of all documents +curl http://localhost:3000/sitemap.xml + +# Verify XML format +curl http://localhost:3000/sitemap.xml | xmllint --noout - + +# Count documents in sitemap +curl http://localhost:3000/sitemap.xml | grep -c '' +``` + +## Architecture + +### Monolithic Design + +This project follows a **monolithic architecture** as specified in the project constitution: + +- **Single Route File**: ALL routing, business logic, and Drive API integration in `src/proxy.js` (~350 LOC) +- **Utility Modules**: Separate files for auth, logging, XML utils (constitution-compliant separation of concerns) +- **Configuration as Data**: JSON configuration in `config/default.json` loaded into `global.config` at startup +- **Minimal Dependencies**: Only `googleapis` package for Drive API integration + +### Why Monolithic? + +Rationale defined in constitution: +1. **Simplicity**: Easy to understand, debug, and maintain +2. **Direct Code Flow**: No dependency injection, no framework magic +3. **YAGNI Principle**: No premature abstraction for a focused service + +### Structure + +``` +src/ +├── server.js # HTTP server, config loader, validation +├── proxy.js # Request handler with FIFO queue integration +├── drive-client.js # Drive API integration with 50k limit enforcement +├── sitemap-generator.js # Sitemap XML generation with RESTful URLs +├── queue.js # FIFO request queue (sequential processing) +├── auth.js # Service Account authentication +├── logger.js # Structured logging utility +├── utils.js # Request ID, validation +└── xml-utils.js # XML escaping +``` + +## Testing + +### Test Structure + +Tests follow **TDD workflow** with real assertions: + +``` +tests/ +├── contract/ # API contract tests (HTTP interface) +├── integration/ # Drive API integration tests +└── unit/ # Pure function unit tests +``` + +### Running Tests + +```bash +# All tests +npm test + +# Specific test suites +npm run test:unit +npm run test:integration +npm run test:contract +``` + +### Coverage Requirements + +- **Minimum**: 80% code coverage (enforced) +- **Tests Written First**: TDD mandatory per constitution +- **Real Assertions**: No placeholder tests + +## Configuration + +Configuration is loaded from `config/default.json` and merged with environment variables: + +```json +{ + "server": { + "port": 3000, + "host": "0.0.0.0", + "baseUrl": "http://localhost:3000" + }, + "google": { + "serviceAccountEmail": "service@project.iam.gserviceaccount.com", + "serviceAccountKeyPath": "./config/service-account-key.json", + "scopes": ["https://www.googleapis.com/auth/drive.readonly"] + }, + "sitemap": { + "maxUrls": 50000 + }, + "logging": { + "level": "info" + } +} +``` + +Environment variables override JSON config (e.g., `PORT`, `GOOGLE_SERVICE_ACCOUNT_EMAIL`). + +## API Documentation + +### Endpoints + +- `GET /sitemap.xml` - XML sitemap of all accessible documents (200 OK with XML body) +- `GET /*` - All other paths return 404 Not Found (empty body) + +### Response Headers + +Successful sitemap response (200 OK): +- `Content-Type: application/xml; charset=utf-8` +- `X-Request-Id: req_` - Request tracing ID +- `X-Document-Count: ` - Number of documents in sitemap + +### Error Responses + +All errors return **HTTP status code only** with **no response body** (per specification): + +- `401 Unauthorized` - Service account authentication failed +- `404 Not Found` - Path is not /sitemap.xml +- `413 Payload Too Large` - Document count exceeds 50,000 (sitemap protocol limit) +- `429 Too Many Requests` - Drive API rate limit exceeded (includes `Retry-After` header in seconds) +- `500 Internal Server Error` - Server error +- `503 Service Unavailable` - Drive API unavailable (NO RETRY per specification) + +## Performance Characteristics + +- **Cold Start**: < 10 seconds to accepting requests +- **Sitemap Generation**: < 5 seconds for 10,000 documents +- **Concurrent Requests**: 10+ without degradation +- **Memory Usage**: < 256MB under normal load + +## Development + +### Project Structure + +``` +google-drive-content-adapter/ +├── config/ +│ └── default.json # Configuration +├── src/ +│ ├── server.js # HTTP server +│ ├── proxy.js # Request handler (monolithic) +│ ├── auth.js # Service Account auth +│ ├── logger.js # Structured logging +│ ├── utils.js # Utilities +│ └── xml-utils.js # XML escaping +├── tests/ +│ ├── contract/ # API contract tests +│ ├── integration/ # Integration tests +│ └── unit/ # Unit tests +├── specs/ +│ └── 001-drive-proxy-adapter/ # Feature spec, plan, tasks +├── .env.example # Environment template +├── package.json # Dependencies and scripts +└── README.md # This file +``` + +### Development Workflow + +1. **Write Tests First** (TDD) +2. **Implement Minimum Code** +3. **Run Tests**: `npm test` +4. **Run in Development**: `npm run dev` + +## Deployment + +### Docker + +```dockerfile +FROM node:18-alpine +WORKDIR /app +COPY package*.json ./ +RUN npm ci --production +COPY src/ ./src/ +COPY config/ ./config/ +CMD ["node", "src/server.js"] +EXPOSE 3000 +``` + +```bash +docker build -t drive-sitemap-adapter . +docker run -p 3000:3000 -v $(pwd)/config:/app/config drive-sitemap-adapter +``` + +### Direct Node.js + +```bash +NODE_ENV=production npm start +``` + +## Troubleshooting + +### Authentication Failed (401) +- Verify service account key file exists at `config/service-account-key.json` +- Check service account email matches configuration +- Ensure Drive API is enabled in Google Cloud project + +### Empty Sitemap +- Service account needs access to Drive files +- Share files/folders with service account email +- Check service account has "Viewer" permission + +### Rate Limit (429) +- Wait for time specified in `Retry-After` header +- Reduce frequency of sitemap requests +- Check Google Cloud Console quotas + +## License + +ISC + +## Documentation + +For detailed setup and usage instructions, see: +- [Quick Start Guide](specs/001-drive-proxy-adapter/quickstart.md) +- [Feature Specification](specs/001-drive-proxy-adapter/spec.md) +- [Implementation Plan](specs/001-drive-proxy-adapter/plan.md) +- [Data Model](specs/001-drive-proxy-adapter/data-model.md) diff --git a/config/default.json b/config/default.json new file mode 100644 index 0000000..608a158 --- /dev/null +++ b/config/default.json @@ -0,0 +1,9 @@ +{ + "server": { + "port": 3000, + "host": "0.0.0.0" + }, + "logging": { + "level": "debug" + } +} diff --git a/package-lock.json b/package-lock.json new file mode 100644 index 0000000..b745104 --- /dev/null +++ b/package-lock.json @@ -0,0 +1,533 @@ +{ + "name": "google-drive-content-adapter", + "version": "1.0.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "google-drive-content-adapter", + "version": "1.0.0", + "license": "ISC", + "dependencies": { + "axios": "^1.13.6", + "jsonwebtoken": "^9.0.3", + "uuid": "^13.0.0", + "xmlbuilder2": "^4.0.3" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@oozcitak/dom": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/@oozcitak/dom/-/dom-2.0.2.tgz", + "integrity": "sha512-GjpKhkSYC3Mj4+lfwEyI1dqnsKTgwGy48ytZEhm4A/xnH/8z9M3ZVXKr/YGQi3uCLs1AEBS+x5T2JPiueEDW8w==", + "license": "MIT", + "dependencies": { + "@oozcitak/infra": "^2.0.2", + "@oozcitak/url": "^3.0.0", + "@oozcitak/util": "^10.0.0" + }, + "engines": { + "node": ">=20.0" + } + }, + "node_modules/@oozcitak/infra": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/@oozcitak/infra/-/infra-2.0.2.tgz", + "integrity": "sha512-2g+E7hoE2dgCz/APPOEK5s3rMhJvNxSMBrP+U+j1OWsIbtSpWxxlUjq1lU8RIsFJNYv7NMlnVsCuHcUzJW+8vA==", + "license": "MIT", + "dependencies": { + "@oozcitak/util": "^10.0.0" + }, + "engines": { + "node": ">=20.0" + } + }, + "node_modules/@oozcitak/url": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/@oozcitak/url/-/url-3.0.0.tgz", + "integrity": "sha512-ZKfET8Ak1wsLAiLWNfFkZc/BraDccuTJKR6svTYc7sVjbR+Iu0vtXdiDMY4o6jaFl5TW2TlS7jbLl4VovtAJWQ==", + "license": "MIT", + "dependencies": { + "@oozcitak/infra": "^2.0.2", + "@oozcitak/util": "^10.0.0" + }, + "engines": { + "node": ">=20.0" + } + }, + "node_modules/@oozcitak/util": { + "version": "10.0.0", + "resolved": "https://registry.npmjs.org/@oozcitak/util/-/util-10.0.0.tgz", + "integrity": "sha512-hAX0pT/73190NLqBPPWSdBVGtbY6VOhWYK3qqHqtXQ1gK7kS2yz4+ivsN07hpJ6I3aeMtKP6J6npsEKOAzuTLA==", + "license": "MIT", + "engines": { + "node": ">=20.0" + } + }, + "node_modules/argparse": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz", + "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==", + "license": "Python-2.0" + }, + "node_modules/asynckit": { + "version": "0.4.0", + "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", + "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==", + "license": "MIT" + }, + "node_modules/axios": { + "version": "1.13.6", + "resolved": "https://registry.npmjs.org/axios/-/axios-1.13.6.tgz", + "integrity": "sha512-ChTCHMouEe2kn713WHbQGcuYrr6fXTBiu460OTwWrWob16g1bXn4vtz07Ope7ewMozJAnEquLk5lWQWtBig9DQ==", + "license": "MIT", + "dependencies": { + "follow-redirects": "^1.15.11", + "form-data": "^4.0.5", + "proxy-from-env": "^1.1.0" + } + }, + "node_modules/buffer-equal-constant-time": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/buffer-equal-constant-time/-/buffer-equal-constant-time-1.0.1.tgz", + "integrity": "sha512-zRpUiDwd/xk6ADqPMATG8vc9VPrkck7T07OIx0gnjmJAnHnTVXNQG3vfvWNuiZIkwu9KrKdA1iJKfsfTVxE6NA==", + "license": "BSD-3-Clause" + }, + "node_modules/call-bind-apply-helpers": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/call-bind-apply-helpers/-/call-bind-apply-helpers-1.0.2.tgz", + "integrity": "sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==", + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0", + "function-bind": "^1.1.2" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/combined-stream": { + "version": "1.0.8", + "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz", + "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==", + "license": "MIT", + "dependencies": { + "delayed-stream": "~1.0.0" + }, + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/delayed-stream": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", + "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==", + "license": "MIT", + "engines": { + "node": ">=0.4.0" + } + }, + "node_modules/dunder-proto": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz", + "integrity": "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==", + "license": "MIT", + "dependencies": { + "call-bind-apply-helpers": "^1.0.1", + "es-errors": "^1.3.0", + "gopd": "^1.2.0" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/ecdsa-sig-formatter": { + "version": "1.0.11", + "resolved": "https://registry.npmjs.org/ecdsa-sig-formatter/-/ecdsa-sig-formatter-1.0.11.tgz", + "integrity": "sha512-nagl3RYrbNv6kQkeJIpt6NJZy8twLB/2vtz6yN9Z4vRKHN4/QZJIEbqohALSgwKdnksuY3k5Addp5lg8sVoVcQ==", + "license": "Apache-2.0", + "dependencies": { + "safe-buffer": "^5.0.1" + } + }, + "node_modules/es-define-property": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.1.tgz", + "integrity": "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-errors": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz", + "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-object-atoms": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/es-object-atoms/-/es-object-atoms-1.1.1.tgz", + "integrity": "sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==", + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-set-tostringtag": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/es-set-tostringtag/-/es-set-tostringtag-2.1.0.tgz", + "integrity": "sha512-j6vWzfrGVfyXxge+O0x5sh6cvxAog0a/4Rdd2K36zCMV5eJ+/+tOAngRO8cODMNWbVRdVlmGZQL2YS3yR8bIUA==", + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0", + "get-intrinsic": "^1.2.6", + "has-tostringtag": "^1.0.2", + "hasown": "^2.0.2" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/follow-redirects": { + "version": "1.15.11", + "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.11.tgz", + "integrity": "sha512-deG2P0JfjrTxl50XGCDyfI97ZGVCxIpfKYmfyrQ54n5FO/0gfIES8C/Psl6kWVDolizcaaxZJnTS0QSMxvnsBQ==", + "funding": [ + { + "type": "individual", + "url": "https://github.com/sponsors/RubenVerborgh" + } + ], + "license": "MIT", + "engines": { + "node": ">=4.0" + }, + "peerDependenciesMeta": { + "debug": { + "optional": true + } + } + }, + "node_modules/form-data": { + "version": "4.0.5", + "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.5.tgz", + "integrity": "sha512-8RipRLol37bNs2bhoV67fiTEvdTrbMUYcFTiy3+wuuOnUog2QBHCZWXDRijWQfAkhBj2Uf5UnVaiWwA5vdd82w==", + "license": "MIT", + "dependencies": { + "asynckit": "^0.4.0", + "combined-stream": "^1.0.8", + "es-set-tostringtag": "^2.1.0", + "hasown": "^2.0.2", + "mime-types": "^2.1.12" + }, + "engines": { + "node": ">= 6" + } + }, + "node_modules/function-bind": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz", + "integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/get-intrinsic": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.3.0.tgz", + "integrity": "sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==", + "license": "MIT", + "dependencies": { + "call-bind-apply-helpers": "^1.0.2", + "es-define-property": "^1.0.1", + "es-errors": "^1.3.0", + "es-object-atoms": "^1.1.1", + "function-bind": "^1.1.2", + "get-proto": "^1.0.1", + "gopd": "^1.2.0", + "has-symbols": "^1.1.0", + "hasown": "^2.0.2", + "math-intrinsics": "^1.1.0" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/get-proto": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/get-proto/-/get-proto-1.0.1.tgz", + "integrity": "sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==", + "license": "MIT", + "dependencies": { + "dunder-proto": "^1.0.1", + "es-object-atoms": "^1.0.0" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/gopd": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/gopd/-/gopd-1.2.0.tgz", + "integrity": "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/has-symbols": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.1.0.tgz", + "integrity": "sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/has-tostringtag": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/has-tostringtag/-/has-tostringtag-1.0.2.tgz", + "integrity": "sha512-NqADB8VjPFLM2V0VvHUewwwsw0ZWBaIdgo+ieHtK3hasLz4qeCRjYcqfB6AQrBggRKppKF8L52/VqdVsO47Dlw==", + "license": "MIT", + "dependencies": { + "has-symbols": "^1.0.3" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/hasown": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz", + "integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==", + "license": "MIT", + "dependencies": { + "function-bind": "^1.1.2" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/js-yaml": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.1.tgz", + "integrity": "sha512-qQKT4zQxXl8lLwBtHMWwaTcGfFOZviOJet3Oy/xmGk2gZH677CJM9EvtfdSkgWcATZhj/55JZ0rmy3myCT5lsA==", + "license": "MIT", + "dependencies": { + "argparse": "^2.0.1" + }, + "bin": { + "js-yaml": "bin/js-yaml.js" + } + }, + "node_modules/jsonwebtoken": { + "version": "9.0.3", + "resolved": "https://registry.npmjs.org/jsonwebtoken/-/jsonwebtoken-9.0.3.tgz", + "integrity": "sha512-MT/xP0CrubFRNLNKvxJ2BYfy53Zkm++5bX9dtuPbqAeQpTVe0MQTFhao8+Cp//EmJp244xt6Drw/GVEGCUj40g==", + "license": "MIT", + "dependencies": { + "jws": "^4.0.1", + "lodash.includes": "^4.3.0", + "lodash.isboolean": "^3.0.3", + "lodash.isinteger": "^4.0.4", + "lodash.isnumber": "^3.0.3", + "lodash.isplainobject": "^4.0.6", + "lodash.isstring": "^4.0.1", + "lodash.once": "^4.0.0", + "ms": "^2.1.1", + "semver": "^7.5.4" + }, + "engines": { + "node": ">=12", + "npm": ">=6" + } + }, + "node_modules/jwa": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/jwa/-/jwa-2.0.1.tgz", + "integrity": "sha512-hRF04fqJIP8Abbkq5NKGN0Bbr3JxlQ+qhZufXVr0DvujKy93ZCbXZMHDL4EOtodSbCWxOqR8MS1tXA5hwqCXDg==", + "license": "MIT", + "dependencies": { + "buffer-equal-constant-time": "^1.0.1", + "ecdsa-sig-formatter": "1.0.11", + "safe-buffer": "^5.0.1" + } + }, + "node_modules/jws": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/jws/-/jws-4.0.1.tgz", + "integrity": "sha512-EKI/M/yqPncGUUh44xz0PxSidXFr/+r0pA70+gIYhjv+et7yxM+s29Y+VGDkovRofQem0fs7Uvf4+YmAdyRduA==", + "license": "MIT", + "dependencies": { + "jwa": "^2.0.1", + "safe-buffer": "^5.0.1" + } + }, + "node_modules/lodash.includes": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/lodash.includes/-/lodash.includes-4.3.0.tgz", + "integrity": "sha512-W3Bx6mdkRTGtlJISOvVD/lbqjTlPPUDTMnlXZFnVwi9NKJ6tiAk6LVdlhZMm17VZisqhKcgzpO5Wz91PCt5b0w==", + "license": "MIT" + }, + "node_modules/lodash.isboolean": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/lodash.isboolean/-/lodash.isboolean-3.0.3.tgz", + "integrity": "sha512-Bz5mupy2SVbPHURB98VAcw+aHh4vRV5IPNhILUCsOzRmsTmSQ17jIuqopAentWoehktxGd9e/hbIXq980/1QJg==", + "license": "MIT" + }, + "node_modules/lodash.isinteger": { + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/lodash.isinteger/-/lodash.isinteger-4.0.4.tgz", + "integrity": "sha512-DBwtEWN2caHQ9/imiNeEA5ys1JoRtRfY3d7V9wkqtbycnAmTvRRmbHKDV4a0EYc678/dia0jrte4tjYwVBaZUA==", + "license": "MIT" + }, + "node_modules/lodash.isnumber": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/lodash.isnumber/-/lodash.isnumber-3.0.3.tgz", + "integrity": "sha512-QYqzpfwO3/CWf3XP+Z+tkQsfaLL/EnUlXWVkIk5FUPc4sBdTehEqZONuyRt2P67PXAk+NXmTBcc97zw9t1FQrw==", + "license": "MIT" + }, + "node_modules/lodash.isplainobject": { + "version": "4.0.6", + "resolved": "https://registry.npmjs.org/lodash.isplainobject/-/lodash.isplainobject-4.0.6.tgz", + "integrity": "sha512-oSXzaWypCMHkPC3NvBEaPHf0KsA5mvPrOPgQWDsbg8n7orZ290M0BmC/jgRZ4vcJ6DTAhjrsSYgdsW/F+MFOBA==", + "license": "MIT" + }, + "node_modules/lodash.isstring": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/lodash.isstring/-/lodash.isstring-4.0.1.tgz", + "integrity": "sha512-0wJxfxH1wgO3GrbuP+dTTk7op+6L41QCXbGINEmD+ny/G/eCqGzxyCsh7159S+mgDDcoarnBw6PC1PS5+wUGgw==", + "license": "MIT" + }, + "node_modules/lodash.once": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/lodash.once/-/lodash.once-4.1.1.tgz", + "integrity": "sha512-Sb487aTOCr9drQVL8pIxOzVhafOjZN9UU54hiN8PU3uAiSV7lx1yYNpbNmex2PK6dSJoNTSJUUswT651yww3Mg==", + "license": "MIT" + }, + "node_modules/math-intrinsics": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/math-intrinsics/-/math-intrinsics-1.1.0.tgz", + "integrity": "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/mime-db": { + "version": "1.52.0", + "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz", + "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/mime-types": { + "version": "2.1.35", + "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz", + "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==", + "license": "MIT", + "dependencies": { + "mime-db": "1.52.0" + }, + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/ms": { + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", + "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", + "license": "MIT" + }, + "node_modules/proxy-from-env": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz", + "integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==", + "license": "MIT" + }, + "node_modules/safe-buffer": { + "version": "5.2.1", + "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz", + "integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT" + }, + "node_modules/semver": { + "version": "7.7.4", + "resolved": "https://registry.npmjs.org/semver/-/semver-7.7.4.tgz", + "integrity": "sha512-vFKC2IEtQnVhpT78h1Yp8wzwrf8CM+MzKMHGJZfBtzhZNycRFnXsHk6E5TxIkkMsgNS7mdX3AGB7x2QM2di4lA==", + "license": "ISC", + "bin": { + "semver": "bin/semver.js" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/uuid": { + "version": "13.0.0", + "resolved": "https://registry.npmjs.org/uuid/-/uuid-13.0.0.tgz", + "integrity": "sha512-XQegIaBTVUjSHliKqcnFqYypAd4S+WCYt5NIeRs6w/UAry7z8Y9j5ZwRRL4kzq9U3sD6v+85er9FvkEaBpji2w==", + "funding": [ + "https://github.com/sponsors/broofa", + "https://github.com/sponsors/ctavan" + ], + "license": "MIT", + "bin": { + "uuid": "dist-node/bin/uuid" + } + }, + "node_modules/xmlbuilder2": { + "version": "4.0.3", + "resolved": "https://registry.npmjs.org/xmlbuilder2/-/xmlbuilder2-4.0.3.tgz", + "integrity": "sha512-bx8Q1STctnNaaDymWnkfQLKofs0mGNN7rLLapJlGuV3VlvegD7Ls4ggMjE3aUSWItCCzU0PEv45lI87iSigiCA==", + "license": "MIT", + "dependencies": { + "@oozcitak/dom": "^2.0.2", + "@oozcitak/infra": "^2.0.2", + "@oozcitak/util": "^10.0.0", + "js-yaml": "^4.1.1" + }, + "engines": { + "node": ">=20.0" + } + } + } +} diff --git a/package.json b/package.json new file mode 100644 index 0000000..ddf3720 --- /dev/null +++ b/package.json @@ -0,0 +1,34 @@ +{ + "name": "google-drive-content-adapter", + "version": "1.0.0", + "description": "HTTP proxy adapter to search and export documents from Google Drive", + "type": "module", + "main": "src/server.js", + "scripts": { + "dev": "node --watch src/server.js", + "start": "node src/server.js", + "test": "node --test tests/**/*.test.js", + "test:unit": "node --test tests/unit/**/*.test.js", + "test:integration": "node --test tests/integration/**/*.test.js", + "test:contract": "node --test tests/contract/**/*.test.js", + "clean": "rm -rvf dist/* & rm -rvf **/*.old & rm -rvf **/*.{backup,backup-new,backup-old,backup-regenerated} & rm -rvf **/*-old.js" + }, + "keywords": [ + "google-drive", + "proxy", + "markdown", + "export", + "adapter" + ], + "author": "", + "license": "ISC", + "engines": { + "node": ">=18.0.0" + }, + "dependencies": { + "axios": "^1.13.6", + "jsonwebtoken": "^9.0.3", + "uuid": "^13.0.0", + "xmlbuilder2": "^4.0.3" + } +} diff --git a/specs/001-drive-proxy-adapter/checklists/requirements.md b/specs/001-drive-proxy-adapter/checklists/requirements.md new file mode 100644 index 0000000..aa75dd5 --- /dev/null +++ b/specs/001-drive-proxy-adapter/checklists/requirements.md @@ -0,0 +1,77 @@ +# Specification Quality Checklist: Google Drive HTTP Proxy Adapter + +**Purpose**: Validate specification completeness and quality before proceeding to planning +**Created**: 2026-03-06 +**Feature**: [spec.md](../spec.md) + +## Content Quality + +- [x] No implementation details (languages, frameworks, APIs) +- [x] Focused on user value and business needs +- [x] Written for non-technical stakeholders +- [x] All mandatory sections completed + +## Requirement Completeness + +- [x] No [NEEDS CLARIFICATION] markers remain +- [x] Requirements are testable and unambiguous +- [x] Success criteria are measurable +- [x] Success criteria are technology-agnostic (no implementation details) +- [x] All acceptance scenarios are defined +- [x] Edge cases are identified +- [x] Scope is clearly bounded +- [x] Dependencies and assumptions identified + +## Feature Readiness + +- [x] All functional requirements have clear acceptance criteria +- [x] User scenarios cover primary flows +- [x] Feature meets measurable outcomes defined in Success Criteria +- [x] No implementation details leak into specification + +## Validation Notes + +### Content Quality Review +- ✅ Specification avoids implementation details (no mention of specific npm packages, frameworks beyond Node.js requirement from constitution) +- ✅ Focus is on user capabilities (HTTP requests, document export, sitemap generation) +- ✅ Language is accessible to non-developers (clear descriptions of HTTP endpoints and document formats) +- ✅ All sections (User Scenarios, Requirements, Success Criteria, Assumptions, Out of Scope) are complete + +### Requirement Completeness Review +- ✅ No [NEEDS CLARIFICATION] markers present - all requirements are fully specified +- ✅ Requirements are testable: + - FR-001 through FR-020 can all be verified through automated tests + - Each functional requirement specifies a MUST condition that is verifiable +- ✅ Success criteria are measurable with specific metrics: + - SC-001: 5 seconds for 10,000 documents + - SC-002: 3 seconds for <1MB documents + - SC-003: 100 concurrent requests + - SC-004 through SC-010: All have quantifiable targets +- ✅ Success criteria avoid implementation details (focus on timing, throughput, quality metrics) +- ✅ Acceptance scenarios follow Given-When-Then format with clear conditions +- ✅ Edge cases comprehensive (10 scenarios covering errors, permissions, formats, scale) +- ✅ Scope clearly bounded with Assumptions and Out of Scope sections +- ✅ Dependencies on Google Drive API and OAuth 2.0 explicitly stated + +### Feature Readiness Review +- ✅ Each functional requirement (FR-001 through FR-020) maps to acceptance scenarios in user stories +- ✅ Three user stories cover complete functionality: + - P1: Core document export (foundational value) + - P2: Sitemap generation (discovery mechanism) + - P3: Multiple formats (enhancement) +- ✅ Success criteria SC-001 through SC-010 provide clear quality gates +- ✅ Implementation details appropriately deferred (no database choices, no framework selection beyond constitution's Node.js requirement, no API route implementation specifics) + +## Overall Assessment + +**Status**: ✅ **PASS** - Specification is complete and ready for `/speckit.plan` + +The specification successfully: +1. Defines three independently testable user stories with clear priorities +2. Provides 20 concrete functional requirements +3. Establishes 10 measurable success criteria +4. Identifies comprehensive edge cases and assumptions +5. Clearly bounds scope with explicit Out of Scope section +6. Maintains technology-agnostic language while aligning with constitution's Node.js requirement + +**Recommendation**: Proceed to planning phase with `/speckit.plan` command. diff --git a/specs/001-drive-proxy-adapter/contracts/openapi.yaml b/specs/001-drive-proxy-adapter/contracts/openapi.yaml new file mode 100644 index 0000000..a42b473 --- /dev/null +++ b/specs/001-drive-proxy-adapter/contracts/openapi.yaml @@ -0,0 +1,290 @@ +openapi: 3.0.3 +info: + title: Google Drive Sitemap Adapter API + description: | + HTTP adapter for generating XML sitemaps listing accessible Google Drive documents. + + ## Overview + This adapter provides a single endpoint (`/sitemap.xml`) that generates a valid XML sitemap + conforming to the sitemap protocol (https://www.sitemaps.org/protocol.html). + + The sitemap lists all documents accessible to the configured Google Service Account, + with URLs pointing back to this adapter using document IDs. + + ## Authentication + The adapter uses OAuth 2.0 Service Account authentication to access Google Drive. + External clients do not need to authenticate with this API. + + ## Rate Limiting + Google Drive API rate limits are handled gracefully. If rate limited, the adapter + returns HTTP 429 with a Retry-After header indicating seconds until retry. + + ## Sitemap Protocol Compliance + - Maximum 50,000 URLs per sitemap (protocol limit) + - Each URL includes document ID and last modified timestamp + - Always returns fresh data (no caching) + + version: 1.0.0 + contact: + name: API Support + license: + name: ISC + +servers: + - url: http://localhost:3000 + description: Development server + - url: https://adapter.example.com + description: Production server + +tags: + - name: Sitemap + description: XML sitemap generation + +paths: + /sitemap.xml: + get: + summary: Generate XML sitemap + description: | + Returns an XML sitemap listing all accessible Google Drive documents. + + Each URL in the sitemap points to this adapter with a document ID: + `{baseUrl}/{documentId}` + + The sitemap is generated on-demand (no caching) and may take up to 5 seconds + for drives containing up to 10,000 documents. + + ## Sitemap Format + Conforms to https://www.sitemaps.org/protocol.html: + - ``: Absolute URL with document ID + - ``: Last modified timestamp (ISO 8601) + + ## Document Retrieval + Note: The URLs in the sitemap point back to this adapter, but document retrieval + endpoints are not implemented. This adapter only generates sitemaps for discovery. + + operationId: getSitemap + tags: + - Sitemap + responses: + '200': + description: Successfully generated sitemap + headers: + Content-Type: + description: Always application/xml + schema: + type: string + example: application/xml + Content-Length: + description: Size of sitemap in bytes + schema: + type: integer + example: 204800 + content: + application/xml: + schema: + type: string + format: xml + example: | + + + + https://adapter.example.com/1BxAA_example123 + 2026-03-06T10:30:00.000Z + + + https://adapter.example.com/1CyBB_example456 + 2026-03-05T14:20:00.000Z + + + '401': + description: Unauthorized - OAuth authentication failed + headers: + Content-Length: + description: Always 0 (no response body) + schema: + type: integer + example: 0 + '429': + description: Too Many Requests - Rate limited by Google Drive API + headers: + Retry-After: + description: Seconds to wait before retrying + schema: + type: integer + example: 60 + Content-Length: + description: Always 0 (no response body) + schema: + type: integer + example: 0 + '500': + description: Internal Server Error + headers: + Content-Length: + description: Always 0 (no response body) + schema: + type: integer + example: 0 + '503': + description: Service Unavailable - Google Drive API is down + headers: + Content-Length: + description: Always 0 (no response body) + schema: + type: integer + example: 0 + + /{documentId}: + get: + summary: Document retrieval endpoint (NOT IMPLEMENTED) + description: | + This endpoint is referenced in sitemap URLs but is not implemented. + The adapter only generates sitemaps; it does not serve documents. + + Clients should treat sitemap URLs as metadata only. + + operationId: getDocument + tags: + - Documents (Not Implemented) + parameters: + - name: documentId + in: path + description: Google Drive document ID + required: true + schema: + type: string + pattern: '^[a-zA-Z0-9_-]+$' + example: 1BxAA_example123 + responses: + '404': + description: Not Found - Document retrieval not implemented + headers: + Content-Length: + description: Always 0 (no response body) + schema: + type: integer + example: 0 + + /{anyOtherPath}: + get: + summary: All other paths + description: | + Any path other than `/sitemap.xml` returns 404 Not Found. + + operationId: notFound + tags: + - Routing + parameters: + - name: anyOtherPath + in: path + description: Any path other than /sitemap.xml + required: true + schema: + type: string + responses: + '404': + description: Not Found + headers: + Content-Length: + description: Always 0 (no response body) + schema: + type: integer + example: 0 + +components: + schemas: + Sitemap: + type: object + description: XML sitemap structure (logical representation, actual response is XML) + properties: + xmlns: + type: string + description: XML namespace for sitemap protocol + example: http://www.sitemaps.org/schemas/sitemap/0.9 + urls: + type: array + description: Array of URL entries + items: + $ref: '#/components/schemas/SitemapUrl' + maxItems: 50000 + + SitemapUrl: + type: object + description: Single URL entry in sitemap + required: + - loc + - lastmod + properties: + loc: + type: string + format: uri + description: Absolute URL to document (adapter URL + document ID) + example: https://adapter.example.com/1BxAA_example123 + lastmod: + type: string + format: date-time + description: Last modified timestamp in ISO 8601 format + example: 2026-03-06T10:30:00.000Z + + Error: + type: object + description: Error response (note - most errors return empty body per spec) + properties: + code: + type: integer + description: HTTP status code + example: 500 + message: + type: string + description: Error message (not included in actual responses) + example: Internal Server Error + + responses: + UnauthorizedError: + description: Unauthorized - OAuth authentication failed + headers: + Content-Length: + schema: + type: integer + example: 0 + + RateLimitError: + description: Too Many Requests - Rate limited by Google Drive API + headers: + Retry-After: + description: Seconds to wait before retrying + schema: + type: integer + example: 60 + Content-Length: + schema: + type: integer + example: 0 + + InternalError: + description: Internal Server Error + headers: + Content-Length: + schema: + type: integer + example: 0 + + ServiceUnavailable: + description: Service Unavailable - Google Drive API is down + headers: + Content-Length: + schema: + type: integer + example: 0 + + NotFound: + description: Not Found - Path not recognized + headers: + Content-Length: + schema: + type: integer + example: 0 + +externalDocs: + description: Sitemap Protocol Specification + url: https://www.sitemaps.org/protocol.html diff --git a/specs/001-drive-proxy-adapter/contracts/openapi.yaml.backup-export-version b/specs/001-drive-proxy-adapter/contracts/openapi.yaml.backup-export-version new file mode 100644 index 0000000..b00057f --- /dev/null +++ b/specs/001-drive-proxy-adapter/contracts/openapi.yaml.backup-export-version @@ -0,0 +1,454 @@ +openapi: 3.0.3 +info: + title: Google Drive HTTP Proxy Adapter API + description: | + HTTP proxy adapter for exporting Google Drive documents in multiple formats (Markdown, HTML, PDF) + and generating XML sitemaps of accessible documents. + + ## Authentication + The adapter uses OAuth 2.0 to access Google Drive on behalf of configured users. + External clients do not need to authenticate with this API directly. + + ## Rate Limiting + API requests are rate-limited to 100 requests per minute per IP address. + Rate limit information is included in response headers. + version: 1.0.0 + contact: + name: API Support + license: + name: MIT + +servers: + - url: http://localhost:3000 + description: Development server + - url: https://api.example.com + description: Production server + +tags: + - name: Documents + description: Document export operations + - name: Discovery + description: Document discovery and listing + - name: Health + description: Service health monitoring + +paths: + /health: + get: + summary: Health check endpoint + description: Returns service health status and version information + tags: + - Health + responses: + '200': + description: Service is healthy + content: + application/json: + schema: + type: object + properties: + status: + type: string + example: ok + version: + type: string + example: 1.0.0 + uptime: + type: number + description: Service uptime in seconds + example: 86400 + + /sitemap.xml: + get: + summary: Generate sitemap of accessible documents + description: | + Returns an XML sitemap listing all Google Drive documents accessible to the configured user. + Follows the sitemap protocol specification (https://www.sitemaps.org/protocol.html). + tags: + - Discovery + responses: + '200': + description: Sitemap generated successfully + headers: + Content-Type: + schema: + type: string + example: application/xml; charset=utf-8 + X-Request-Id: + schema: + type: string + format: uuid + description: Unique request identifier for tracing + X-Document-Count: + schema: + type: integer + description: Number of documents in the sitemap + content: + application/xml: + schema: + type: string + format: xml + example: | + + + + http://localhost:3000/1BxAA_example123 + 2026-03-06T10:30:00Z + + + http://localhost:3000/2CyBB_example456 + 2026-03-05T14:20:00Z + + + '401': + $ref: '#/components/responses/Unauthorized' + '429': + $ref: '#/components/responses/RateLimited' + '500': + $ref: '#/components/responses/InternalError' + '503': + $ref: '#/components/responses/ServiceUnavailable' + + /{documentId}: + get: + summary: Export Google Drive document in specified format + description: | + Fetches a Google Drive document by ID and exports it in the requested format. + Supports Markdown (default), HTML, and PDF formats. + tags: + - Documents + parameters: + - name: documentId + in: path + required: true + description: Google Drive file ID (8-128 alphanumeric characters, hyphens, or underscores) + schema: + type: string + pattern: '^[a-zA-Z0-9_-]{8,128}$' + example: 1BxAA_example123 + + - name: format + in: query + required: false + description: Export format (defaults to markdown if not specified) + schema: + type: string + enum: + - markdown + - html + - pdf + default: markdown + example: markdown + + responses: + '200': + description: Document exported successfully + headers: + Content-Type: + schema: + type: string + enum: + - text/markdown; charset=utf-8 + - text/html; charset=utf-8 + - application/pdf + description: MIME type of exported document + X-Request-Id: + schema: + type: string + format: uuid + description: Unique request identifier for tracing + X-Document-Title: + schema: + type: string + description: Original document title from Google Drive + X-Document-Modified: + schema: + type: string + format: date-time + description: Last modified timestamp (ISO 8601) + content: + text/markdown: + schema: + type: string + example: | + # Document Title + + This is a paragraph with **bold** and *italic* text. + + ## Section Heading + + - List item 1 + - List item 2 + + text/html: + schema: + type: string + example: | + + + Document Title + +

Document Title

+

This is a paragraph with bold and italic text.

+ + + + application/pdf: + schema: + type: string + format: binary + + '400': + $ref: '#/components/responses/BadRequest' + '401': + $ref: '#/components/responses/Unauthorized' + '403': + $ref: '#/components/responses/Forbidden' + '404': + $ref: '#/components/responses/NotFound' + '413': + $ref: '#/components/responses/PayloadTooLarge' + '415': + $ref: '#/components/responses/UnsupportedMediaType' + '429': + $ref: '#/components/responses/RateLimited' + '500': + $ref: '#/components/responses/InternalError' + '503': + $ref: '#/components/responses/ServiceUnavailable' + +components: + schemas: + ErrorResponse: + type: object + required: + - error + - timestamp + properties: + error: + type: object + required: + - code + - message + - requestId + properties: + code: + type: string + description: Machine-readable error code + enum: + - DOCUMENT_NOT_FOUND + - DOCUMENT_FORBIDDEN + - UNAUTHORIZED + - INVALID_FORMAT + - UNSUPPORTED_DOCUMENT_TYPE + - RATE_LIMITED + - DRIVE_API_ERROR + - INTERNAL_ERROR + - PAYLOAD_TOO_LARGE + example: DOCUMENT_NOT_FOUND + message: + type: string + description: Human-readable error message + example: Document with ID '1BxAA_example123' does not exist or is not accessible + details: + type: object + description: Optional additional context + additionalProperties: true + requestId: + type: string + format: uuid + description: Request ID for support and debugging + example: 550e8400-e29b-41d4-a716-446655440000 + timestamp: + type: string + format: date-time + description: ISO 8601 timestamp when error occurred + example: '2026-03-06T10:30:00.123Z' + + responses: + BadRequest: + description: Invalid request parameters + headers: + X-Request-Id: + schema: + type: string + format: uuid + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + example: + error: + code: INVALID_FORMAT + message: "Invalid format 'docx'. Supported formats: markdown, html, pdf" + requestId: 550e8400-e29b-41d4-a716-446655440000 + timestamp: '2026-03-06T10:30:00.123Z' + + Unauthorized: + description: Authentication failed or missing + headers: + X-Request-Id: + schema: + type: string + format: uuid + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + example: + error: + code: UNAUTHORIZED + message: Authentication with Google Drive failed + requestId: 550e8400-e29b-41d4-a716-446655440001 + timestamp: '2026-03-06T10:30:01.456Z' + + Forbidden: + description: User lacks permission to access the document + headers: + X-Request-Id: + schema: + type: string + format: uuid + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + example: + error: + code: DOCUMENT_FORBIDDEN + message: You do not have permission to access this document + requestId: 550e8400-e29b-41d4-a716-446655440002 + timestamp: '2026-03-06T10:30:02.789Z' + + NotFound: + description: Document does not exist + headers: + X-Request-Id: + schema: + type: string + format: uuid + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + example: + error: + code: DOCUMENT_NOT_FOUND + message: Document with ID '1BxAA_invalid' does not exist or is not accessible + requestId: 550e8400-e29b-41d4-a716-446655440003 + timestamp: '2026-03-06T10:30:03.012Z' + + PayloadTooLarge: + description: Document exceeds maximum size limit + headers: + X-Request-Id: + schema: + type: string + format: uuid + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + example: + error: + code: PAYLOAD_TOO_LARGE + message: Document size exceeds maximum limit of 100MB + requestId: 550e8400-e29b-41d4-a716-446655440004 + timestamp: '2026-03-06T10:30:04.345Z' + + UnsupportedMediaType: + description: Document type cannot be exported in requested format + headers: + X-Request-Id: + schema: + type: string + format: uuid + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + example: + error: + code: UNSUPPORTED_DOCUMENT_TYPE + message: Document type 'application/vnd.google-apps.form' cannot be exported as PDF + requestId: 550e8400-e29b-41d4-a716-446655440005 + timestamp: '2026-03-06T10:30:05.678Z' + + RateLimited: + description: Rate limit exceeded + headers: + X-Request-Id: + schema: + type: string + format: uuid + X-RateLimit-Limit: + schema: + type: integer + description: Maximum requests per minute + example: 100 + X-RateLimit-Remaining: + schema: + type: integer + description: Remaining requests in current window + example: 0 + X-RateLimit-Reset: + schema: + type: integer + description: Unix timestamp when rate limit resets + example: 1709724660 + Retry-After: + schema: + type: integer + description: Seconds until rate limit resets + example: 60 + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + example: + error: + code: RATE_LIMITED + message: Rate limit exceeded. Please retry after 60 seconds + requestId: 550e8400-e29b-41d4-a716-446655440006 + timestamp: '2026-03-06T10:30:06.901Z' + + InternalError: + description: Internal server error + headers: + X-Request-Id: + schema: + type: string + format: uuid + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + example: + error: + code: INTERNAL_ERROR + message: An unexpected error occurred while processing your request + requestId: 550e8400-e29b-41d4-a716-446655440007 + timestamp: '2026-03-06T10:30:07.234Z' + + ServiceUnavailable: + description: Service temporarily unavailable (Google Drive API down or rate limited) + headers: + X-Request-Id: + schema: + type: string + format: uuid + Retry-After: + schema: + type: integer + description: Seconds until service may be available + example: 300 + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + example: + error: + code: DRIVE_API_ERROR + message: Google Drive API is temporarily unavailable. Please retry later + requestId: 550e8400-e29b-41d4-a716-446655440008 + timestamp: '2026-03-06T10:30:08.567Z' diff --git a/specs/001-drive-proxy-adapter/contracts/sitemap-api.md b/specs/001-drive-proxy-adapter/contracts/sitemap-api.md new file mode 100644 index 0000000..f652f4f --- /dev/null +++ b/specs/001-drive-proxy-adapter/contracts/sitemap-api.md @@ -0,0 +1,436 @@ +# API Contract: Sitemap Endpoint + +**Feature**: 001-drive-proxy-adapter +**Date**: 2026-03-07 +**Phase**: 1 - Design & Contracts +**Endpoint**: `GET /sitemap.xml` + +## Overview + +The `/sitemap.xml` endpoint returns an XML sitemap listing all Google Drive documents accessible to the Service Account. This is the only endpoint exposed by the adapter. + +--- + +## Endpoint Definition + +### URL +``` +GET /sitemap.xml +``` + +### Authentication +- **Method**: None (endpoint is public) +- **Backend Authentication**: Service Account JWT to Google Drive API (transparent to client) +- **Credentials**: Loaded from `GOOGLE_SERVICE_ACCOUNT_KEY` environment variable + +### Request + +**Method**: `GET` + +**Headers**: +- None required + +**Query Parameters**: +- None supported + +**Request Body**: +- None (GET request) + +**Example Request**: +```http +GET /sitemap.xml HTTP/1.1 +Host: adapter.example.com +User-Agent: Mozilla/5.0 +``` + +--- + +## Response Specifications + +### Success Response (200 OK) + +**Status Code**: `200 OK` + +**Headers**: +- `Content-Type: application/xml` +- `Content-Length: {size_in_bytes}` + +**Body**: Valid XML sitemap conforming to sitemap protocol + +**XML Schema**: +```xml + + + + https://adapter.example.com/documents/{documentId} + 2026-03-06T10:30:00.000Z + + + +``` + +**Field Descriptions**: +- ``: Root element with sitemap namespace +- ``: Individual URL entry (0 to 50,000 entries) +- ``: Absolute URL to document using RESTful format `/documents/{documentId}` +- ``: ISO 8601 timestamp of last document modification + +**Constraints**: +- Maximum 50,000 `` entries (sitemap protocol limit per spec.md FR-015) +- Maximum 50MB uncompressed (protocol limit, not enforced) +- All `` URLs use same base URL (configured via `BASE_URL` env var) +- All `` URLs use RESTful path format: `/documents/{documentId}` + +**Example Response**: +```http +HTTP/1.1 200 OK +Content-Type: application/xml +Content-Length: 4582 + + + + + https://adapter.example.com/documents/1BxAA_example123 + 2026-03-06T10:30:00.000Z + + + https://adapter.example.com/documents/1CyBB_example456 + 2026-03-05T14:20:00.000Z + + + https://adapter.example.com/documents/1DzCC_example789 + 2026-03-04T08:15:00.000Z + + +``` + +**Performance Targets** (from spec.md success criteria): +- Response time: < 5 seconds for up to 10,000 documents +- Memory usage: < 256MB under normal load +- Concurrent requests: Support 10 concurrent requests without degradation + +--- + +### Not Found Response (404) + +**Status Code**: `404 Not Found` + +**Headers**: None + +**Body**: Empty (per spec.md clarification: "HTTP status code only, no error response body") + +**When Returned**: +- Any path other than `/sitemap.xml` (per spec.md FR-007) + +**Example Response**: +```http +HTTP/1.1 404 Not Found + +``` + +--- + +### Unauthorized Response (401) + +**Status Code**: `401 Unauthorized` + +**Headers**: None + +**Body**: Empty (per spec.md clarification: "HTTP status code only, no error response body") + +**When Returned**: +- Service Account JWT authentication failed (per spec.md FR-010) +- OAuth token refresh failed +- Invalid Service Account credentials + +**Example Response**: +```http +HTTP/1.1 401 Unauthorized + +``` + +**Client Action**: Check Service Account credentials in `GOOGLE_SERVICE_ACCOUNT_KEY` environment variable + +--- + +### Rate Limited Response (429) + +**Status Code**: `429 Too Many Requests` + +**Headers**: +- `Retry-After: {seconds}` (integer, seconds until retry allowed) + +**Body**: Empty (per spec.md clarification: "HTTP status code only, no error response body") + +**When Returned**: +- Google Drive API rate limit exceeded (per spec.md FR-013) +- Quota exhausted for Service Account + +**Example Response**: +```http +HTTP/1.1 429 Too Many Requests +Retry-After: 60 + +``` + +**Client Action**: Wait `Retry-After` seconds before retrying request + +**Retry-After Values**: +- Derived from Google Drive API `Retry-After` header if available +- Default: 60 seconds if not specified by Drive API + +--- + +### Internal Server Error (500) + +**Status Code**: `500 Internal Server Error` + +**Headers**: None + +**Body**: Empty (per spec.md clarification: "HTTP status code only, no error response body") + +**When Returned**: +- Unexpected server error (per spec.md FR-008) +- Configuration error (missing environment variables) +- XML generation failure + +**Example Response**: +```http +HTTP/1.1 500 Internal Server Error + +``` + +**Client Action**: Report error to adapter administrator + +**Server Logging**: All 500 errors logged with stack trace to stderr (per spec.md FR-012) + +--- + +### Service Unavailable Response (503) + +**Status Code**: `503 Service Unavailable` + +**Headers**: None + +**Body**: Empty (per spec.md clarification: "HTTP status code only, no error response body") + +**When Returned**: +- Google Drive API unavailable (per spec.md FR-017) +- Drive API returns 503 status (no retries per spec clarification) + +**Example Response**: +```http +HTTP/1.1 503 Service Unavailable + +``` + +**Client Action**: Retry request later (Drive API temporarily unavailable) + +**Retry Behavior**: Adapter does NOT retry Drive API 503 errors; immediately returns 503 to client (per spec.md FR-017 clarification) + +--- + +## Error Handling Specification + +### Error Response Format + +**All error responses follow same pattern**: +- Status code indicates error type +- No response body (per spec.md clarification) +- Minimal headers (only `Retry-After` for 429) + +**Rationale**: Simplicity, consistency, fail-fast approach + +### Error Status Code Matrix + +| Error Condition | Status Code | Headers | Body | Retry? | +|----------------|-------------|---------|------|--------| +| Authentication failed | 401 | None | Empty | No (fix credentials) | +| Rate limit exceeded | 429 | `Retry-After` | Empty | Yes (after delay) | +| Drive API unavailable | 503 | None | Empty | Yes (later) | +| Internal error | 500 | None | Empty | No (report to admin) | +| Path not found | 404 | None | Empty | No | + +--- + +## Logging Specification + +### Request Logging (stdout) + +**All requests logged with**: +- Timestamp (ISO 8601) +- HTTP method and path +- Response status code +- Response time (milliseconds) + +**Example**: +``` +[2026-03-07T14:30:15.456Z] GET /sitemap.xml -> 200 (1234ms) +[2026-03-07T14:30:20.789Z] GET /sitemap.xml -> 429 (234ms) +[2026-03-07T14:30:25.012Z] GET /invalid.xml -> 404 (1ms) +``` + +### Error Logging (stderr) + +**All errors logged with**: +- Timestamp (ISO 8601) +- Request ID (for correlation) +- Error message +- Stack trace (for 500 errors) + +**Example**: +``` +[2026-03-07T14:30:20.789Z] [ERROR] Rate limit exceeded: Drive API quota exhausted +[2026-03-07T14:30:25.012Z] [ERROR] Authentication failed: Invalid Service Account key +[2026-03-07T14:30:30.345Z] [ERROR] Drive API unavailable: Connection timeout +``` + +--- + +## Contract Tests + +### Test Scenarios + +1. **Successful sitemap generation** + - Request: `GET /sitemap.xml` + - Expected: 200 status, valid XML, `Content-Type: application/xml` + +2. **Not found for other paths** + - Request: `GET /invalid.xml` + - Expected: 404 status, empty body + +3. **Rate limiting** + - Simulate Drive API 429 response + - Expected: 429 status, `Retry-After` header, empty body + +4. **Authentication failure** + - Simulate invalid credentials + - Expected: 401 status, empty body + +5. **Service unavailable** + - Simulate Drive API 503 response + - Expected: 503 status, empty body (no retries) + +6. **XML schema validation** + - Request: `GET /sitemap.xml` + - Validate XML against sitemap protocol schema + +7. **URL format validation** + - Request: `GET /sitemap.xml` + - Verify all `` URLs use `/documents/{documentId}` format + +### Test Assertions + +**XML Schema Validation**: +- Root element: `` +- Each `` has required `` child +- Each `` is valid ISO 8601 timestamp +- Maximum 50,000 `` entries + +**URL Format Validation**: +- All `` URLs are absolute (start with http:// or https://) +- All `` URLs use RESTful format: `{baseUrl}/documents/{documentId}` +- Document IDs match regex: `^[a-zA-Z0-9_-]+$` + +**Header Validation**: +- 200 responses include `Content-Type: application/xml` +- 429 responses include `Retry-After` header with integer value +- All error responses have empty body + +--- + +## Configuration + +### Environment Variables + +| Variable | Required | Default | Description | +|----------|----------|---------|-------------| +| `GOOGLE_SERVICE_ACCOUNT_KEY` | Yes | None | Inline JSON of Service Account key file | +| `BASE_URL` | Yes | None | Base URL for sitemap links (e.g., `https://adapter.example.com`) | +| `PORT` | No | 3000 | HTTP server port | + +**Example .env**: +```bash +GOOGLE_SERVICE_ACCOUNT_KEY='{"type":"service_account","project_id":"...","private_key":"-----BEGIN PRIVATE KEY-----\n...\n-----END PRIVATE KEY-----\n","client_email":"...@developer.gserviceaccount.com",...}' +BASE_URL=https://adapter.example.com +PORT=3000 +``` + +--- + +## Compatibility + +### Sitemap Protocol Compliance + +**Protocol**: https://www.sitemaps.org/protocol.html + +**Compliance**: +- ✅ Valid XML with namespace +- ✅ `` with absolute URLs +- ✅ `` with W3C Datetime format (ISO 8601) +- ✅ Maximum 50,000 URLs +- ✅ Maximum 50MB uncompressed size + +**Optional Elements Not Used**: +- ``: Not applicable (no historical change data) +- ``: Not applicable (all documents equal priority) + +### HTTP Compliance + +**HTTP Version**: HTTP/1.1 + +**Methods Supported**: `GET` only + +**Status Codes Used**: 200, 401, 404, 429, 500, 503 + +**Headers Used**: +- Response: `Content-Type`, `Content-Length`, `Retry-After` +- Request: Standard HTTP headers accepted, none required + +--- + +## Security Considerations + +### Authentication +- Service Account credentials secured in environment variable (not in code or config files) +- Credentials never logged or exposed in error messages +- Read-only Drive scope (`drive.readonly`) - no write permissions + +### Rate Limiting +- Transparent propagation of Drive API rate limits to client +- No internal rate limiting (rely on Drive API limits) + +### Input Validation +- Path validation: Only `/sitemap.xml` accepted +- Method validation: Only `GET` accepted +- No query parameters processed (rejection not required, just ignored) + +### Output Sanitization +- All URLs XML-escaped to prevent injection +- All timestamps XML-escaped (though ISO 8601 format doesn't contain XML special chars) + +--- + +## Versioning + +**Current Version**: 1.0.0 (initial implementation) + +**Future Changes**: +- Breaking changes (new required parameters): Major version bump (2.0.0) +- Backward-compatible additions (query parameters): Minor version bump (1.1.0) +- Bug fixes: Patch version bump (1.0.1) + +**Deprecation Policy**: +- Breaking changes include migration guide +- Deprecated features supported for at least one minor version + +--- + +## References + +- Feature Specification: `/specs/001-drive-proxy-adapter/spec.md` +- Data Model: `/specs/001-drive-proxy-adapter/data-model.md` +- Research Document: `/specs/001-drive-proxy-adapter/research.md` +- Sitemap Protocol: https://www.sitemaps.org/protocol.html +- Google Drive API v3: https://developers.google.com/drive/api/v3/reference + diff --git a/specs/001-drive-proxy-adapter/contracts/sitemap-xml-schema.md b/specs/001-drive-proxy-adapter/contracts/sitemap-xml-schema.md new file mode 100644 index 0000000..b1826d7 --- /dev/null +++ b/specs/001-drive-proxy-adapter/contracts/sitemap-xml-schema.md @@ -0,0 +1,382 @@ +# API Contract: Sitemap XML Endpoint + +**Feature**: 001-drive-proxy-adapter +**Contract Type**: HTTP API +**Endpoint**: `/sitemap.xml` +**Version**: 1.0.0 +**Date**: 2026-03-07 + +--- + +## Endpoint Specification + +### `GET /sitemap.xml` + +Generate an XML sitemap of all accessible Google Drive documents. + +--- + +## Request + +### HTTP Method +`GET` + +### URL +`/sitemap.xml` + +### Query Parameters +None + +### Request Headers +None required + +### Request Body +None (GET request) + +--- + +## Response + +### Success Response (200 OK) + +**Status Code**: `200 OK` + +**Response Headers**: +``` +Content-Type: application/xml; charset=utf-8 +Content-Length: {size_in_bytes} +``` + +**Response Body** (XML): +```xml + + + + http://example.com/documents/{documentId1} + 2026-03-07 + + + http://example.com/documents/{documentId2} + 2026-03-06 + + + +``` + +**XML Schema Requirements**: +- Root element: `` with namespace `xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"` +- Each document: `` element containing: + - `` (REQUIRED): Absolute URL in format `{baseUrl}/documents/{documentId}` + - Must be URL-encoded + - Must escape XML special characters: `&` → `&`, `<` → `<`, `>` → `>`, `"` → `"`, `'` → `'` + - `` (OPTIONAL): ISO 8601 date format + - Format: `YYYY-MM-DD` or `YYYY-MM-DDTHH:MM:SS+00:00` + - Omitted if Drive API provides no `modifiedTime` + +**Empty Drive Response** (0 documents): +```xml + + + +``` + +**Constraints**: +- Maximum 50,000 `` entries (sitemap protocol limit) +- If >50,000 documents exist, return 413 error instead + +--- + +### Error Responses + +#### 404 Not Found + +**Trigger**: Request to any endpoint other than `/sitemap.xml` + +**Status Code**: `404 Not Found` + +**Response Headers**: None + +**Response Body**: Empty (no content) + +**Example**: +``` +GET /documents/abc123 → 404 Not Found (empty body) +GET /api/sitemap → 404 Not Found (empty body) +POST /sitemap.xml → 404 Not Found (empty body) +``` + +--- + +#### 413 Payload Too Large + +**Trigger**: Google Drive contains more than 50,000 documents + +**Status Code**: `413 Payload Too Large` + +**Response Headers**: None + +**Response Body**: Empty (no content) + +**Rationale**: Sitemap protocol limits sitemaps to 50,000 URLs. This error prevents oversized sitemap generation. + +--- + +#### 429 Too Many Requests + +**Trigger**: Google Drive API returns rate limit error + +**Status Code**: `429 Too Many Requests` + +**Response Headers**: +``` +Retry-After: {seconds} +``` + +**Response Body**: Empty (no content) + +**Example**: +``` +HTTP/1.1 429 Too Many Requests +Retry-After: 60 + +(empty body) +``` + +**Rationale**: Client should retry after the specified number of seconds. + +--- + +#### 401 Unauthorized + +**Trigger**: Service Account token refresh failed + +**Status Code**: `401 Unauthorized` + +**Response Headers**: None + +**Response Body**: Empty (no content) + +**Rationale**: Authentication failed. Check Service Account credentials configuration. + +--- + +#### 503 Service Unavailable + +**Trigger**: Google Drive API returns 503 error + +**Status Code**: `503 Service Unavailable` + +**Response Headers**: None + +**Response Body**: Empty (no content) + +**Behavior**: No retries - immediately pass through 503 to client per specification. + +--- + +#### 500 Internal Server Error + +**Trigger**: Unexpected error during sitemap generation + +**Status Code**: `500 Internal Server Error` + +**Response Headers**: None + +**Response Body**: Empty (no content) + +**Rationale**: Unexpected server error. Check logs for details. + +--- + +## Examples + +### Example 1: Successful Sitemap (3 documents) + +**Request**: +```http +GET /sitemap.xml HTTP/1.1 +Host: example.com +``` + +**Response**: +```http +HTTP/1.1 200 OK +Content-Type: application/xml; charset=utf-8 +Content-Length: 512 + + + + + http://example.com/documents/1A2B3C4D5E6F7G8H + 2026-03-07 + + + http://example.com/documents/9I0J1K2L3M4N5O6P + 2026-03-05 + + + http://example.com/documents/7Q8R9S0T1U2V3W4X + 2026-03-01 + + +``` + +--- + +### Example 2: Empty Drive + +**Request**: +```http +GET /sitemap.xml HTTP/1.1 +Host: example.com +``` + +**Response**: +```http +HTTP/1.1 200 OK +Content-Type: application/xml; charset=utf-8 +Content-Length: 123 + + + + +``` + +--- + +### Example 3: Rate Limit Exceeded + +**Request**: +```http +GET /sitemap.xml HTTP/1.1 +Host: example.com +``` + +**Response**: +```http +HTTP/1.1 429 Too Many Requests +Retry-After: 120 + +``` + +--- + +### Example 4: Too Many Documents + +**Request**: +```http +GET /sitemap.xml HTTP/1.1 +Host: example.com +``` + +**Response**: +```http +HTTP/1.1 413 Payload Too Large + +``` + +--- + +### Example 5: Invalid Endpoint + +**Request**: +```http +GET /documents/abc123 HTTP/1.1 +Host: example.com +``` + +**Response**: +```http +HTTP/1.1 404 Not Found + +``` + +--- + +## Contract Validation + +### XML Schema Validation + +The sitemap XML MUST validate against the sitemap protocol schema: +- **Namespace**: `http://www.sitemaps.org/schemas/sitemap/0.9` +- **Root element**: `` +- **Child elements**: Zero or more `` elements +- **URL elements**: Each contains `` (required) and `` (optional) + +**Validation Tools**: +- XML parser (ensure well-formed XML) +- Sitemap validator: [https://www.xml-sitemaps.com/validate-xml-sitemap.html](https://www.xml-sitemaps.com/validate-xml-sitemap.html) +- XSD schema validation against official sitemap schema + +--- + +### Contract Testing Requirements + +All contract tests MUST verify: + +1. **Success Path**: + - Response status 200 + - Content-Type header is `application/xml; charset=utf-8` + - Response body is valid XML + - XML contains correct namespace + - All `` URLs are absolute and properly formatted + - All `` URLs follow pattern: `{baseUrl}/documents/{documentId}` + - All `` dates are valid ISO 8601 format (if present) + +2. **Error Handling**: + - Invalid endpoints return 404 with empty body + - >50k documents returns 413 with empty body + - Rate limiting returns 429 with `Retry-After` header and empty body + - Drive API 503 returns 503 with empty body (no retries) + - All error responses have no `Content-Type` header + - All error responses have empty body + +3. **Edge Cases**: + - Empty Drive (0 documents) returns valid sitemap with no `` entries + - Documents without `modifiedTime` omit `` tag + - Special characters in document IDs are properly URL-encoded + - XML special characters in URLs are properly escaped + +--- + +## Breaking Changes + +Changes that constitute breaking changes (require MAJOR version bump): + +1. Changing URL format from `/documents/{id}` to different format +2. Changing XML namespace or root element structure +3. Removing `` field entirely +4. Changing error response status codes +5. Adding required query parameters +6. Changing response Content-Type + +--- + +## References + +- [Sitemap Protocol Specification](https://www.sitemaps.org/protocol.html) +- [Google Sitemap Guidelines](https://developers.google.com/search/docs/crawling-indexing/sitemaps/build-sitemap) +- [XML Specification](https://www.w3.org/TR/xml/) +- [ISO 8601 Date Format](https://en.wikipedia.org/wiki/ISO_8601) + +--- + +## Version History + +| Version | Date | Changes | +|---------|------|---------| +| 1.0.0 | 2026-03-07 | Initial contract specification | + +--- + +## Summary + +This contract defines the complete API specification for the `/sitemap.xml` endpoint, including: + +1. **Request/response formats** with examples +2. **Error handling** with all status codes (404, 413, 429, 401, 503, 500) +3. **XML schema requirements** for sitemap format +4. **Validation criteria** for contract testing +5. **Breaking change policy** for version management + +All error responses follow the spec requirement: **status code only, no response body** (except 429 which includes `Retry-After` header). diff --git a/specs/001-drive-proxy-adapter/data-model.md b/specs/001-drive-proxy-adapter/data-model.md new file mode 100644 index 0000000..e208d14 --- /dev/null +++ b/specs/001-drive-proxy-adapter/data-model.md @@ -0,0 +1,493 @@ +# Data Model: Google Drive HTTP Proxy Adapter + +**Feature**: 001-drive-proxy-adapter +**Phase**: 1 - Design & Contracts +**Date**: 2026-03-07 + +## Overview + +This document defines the data structures, entities, and their relationships for the Google Drive HTTP Proxy Adapter. The system is stateless (no persistence layer) with all entities representing runtime state or API payloads. + +--- + +## Core Entities + +### 1. Document + +Represents a file in Google Drive. Extracted from Drive API response. + +**JSDoc Type Definition**: +```javascript +/** + * @typedef {Object} Document + * @property {string} id - Google Drive file ID (unique identifier) + * @property {string} name - Document title/filename + * @property {string} mimeType - MIME type (e.g., 'application/pdf', 'text/plain') + * @property {string} [modifiedTime] - ISO 8601 timestamp of last modification (optional) + */ +``` + +**Validation Rules**: +- `id`: REQUIRED, non-empty string +- `name`: REQUIRED, non-empty string +- `mimeType`: REQUIRED, non-empty string +- `modifiedTime`: OPTIONAL, must be valid ISO 8601 format if present + +**Source**: Drive API `files.list()` response with fields: `files(id, name, mimeType, modifiedTime)` + +**Usage**: +- Retrieved during sitemap generation +- Transformed into SitemapEntry for XML output +- No filtering by mimeType (all file types included per spec) + +--- + +### 2. SitemapEntry + +Represents a single URL entry in the XML sitemap. + +**JSDoc Type Definition**: +```javascript +/** + * @typedef {Object} SitemapEntry + * @property {string} loc - Absolute URL to document (RESTful format: /documents/{id}) + * @property {string} [lastmod] - ISO 8601 date of last modification (YYYY-MM-DD format) + */ +``` + +**Validation Rules**: +- `loc`: REQUIRED, must be absolute URL (http:// or https://), properly escaped XML special chars +- `lastmod`: OPTIONAL, must be ISO 8601 date format (YYYY-MM-DD or YYYY-MM-DDTHH:MM:SS+00:00) + +**Transformation from Document**: +```javascript +/** + * Transform Document to SitemapEntry + * @param {Document} doc - Source document from Drive API + * @param {string} baseUrl - Base URL for sitemap (from config) + * @returns {SitemapEntry} + */ +function toSitemapEntry(doc, baseUrl) { + return { + loc: `${baseUrl}/documents/${encodeURIComponent(doc.id)}`, + lastmod: doc.modifiedTime ? new Date(doc.modifiedTime).toISOString().split('T')[0] : undefined + }; +} +``` + +**Usage**: +- Generated during XML sitemap construction +- Each entry becomes `......` in XML + +--- + +### 3. HTTPRequestContext + +Represents the context for an incoming HTTP request. + +**JSDoc Type Definition**: +```javascript +/** + * @typedef {Object} HTTPRequestContext + * @property {string} requestId - Unique identifier for request tracing (UUID) + * @property {string} method - HTTP method (e.g., 'GET') + * @property {string} path - Request path (e.g., '/sitemap.xml') + * @property {string} clientIp - Client IP address + * @property {number} timestamp - Request start time (Unix timestamp in ms) + */ +``` + +**Validation Rules**: +- `requestId`: REQUIRED, unique per request (generated via crypto.randomUUID()) +- `method`: REQUIRED, HTTP method string +- `path`: REQUIRED, URL path string +- `clientIp`: REQUIRED, IP address string +- `timestamp`: REQUIRED, positive integer + +**Generation**: +```javascript +import { randomUUID } from 'crypto'; + +/** + * Create request context from incoming HTTP request + * @param {http.IncomingMessage} req - Node.js HTTP request object + * @returns {HTTPRequestContext} + */ +function createRequestContext(req) { + return { + requestId: randomUUID(), + method: req.method, + path: req.url, + clientIp: req.socket.remoteAddress, + timestamp: Date.now() + }; +} +``` + +**Usage**: +- Created at request entry point +- Used for logging (trace requests through logs) +- Passed to queue for processing + +--- + +### 4. ServiceAccountCredentials + +Represents Google Service Account JWT authentication credentials. + +**JSDoc Type Definition**: +```javascript +/** + * @typedef {Object} ServiceAccountCredentials + * @property {string} client_email - Service Account email address + * @property {string} private_key - RSA private key (PEM format) + * @property {string} project_id - Google Cloud project ID + * @property {string} [token_uri] - OAuth token endpoint (default: googleapis.com) + */ +``` + +**Validation Rules**: +- `client_email`: REQUIRED, valid email format ending with `.gserviceaccount.com` +- `private_key`: REQUIRED, must start with `-----BEGIN PRIVATE KEY-----` +- `project_id`: REQUIRED, non-empty string +- `token_uri`: OPTIONAL, defaults to Google's OAuth endpoint + +**Source**: Loaded from `GOOGLE_SERVICE_ACCOUNT_KEY` environment variable (inline JSON) + +**Validation Function**: +```javascript +/** + * Validate Service Account credentials structure + * @param {Object} creds - Parsed JSON credentials + * @throws {Error} If validation fails + */ +function validateCredentials(creds) { + if (!creds.client_email || !creds.client_email.endsWith('.gserviceaccount.com')) { + throw new Error('Invalid client_email in Service Account credentials'); + } + if (!creds.private_key || !creds.private_key.startsWith('-----BEGIN PRIVATE KEY-----')) { + throw new Error('Invalid private_key in Service Account credentials'); + } + if (!creds.project_id) { + throw new Error('Missing project_id in Service Account credentials'); + } +} +``` + +**Security**: +- NEVER log `private_key` field +- Mask in logs: `client_email: xxx***@project.iam.gserviceaccount.com` + +--- + +### 5. Configuration + +Represents application runtime configuration. + +**JSDoc Type Definition**: +```javascript +/** + * @typedef {Object} ServerConfig + * @property {number} port - HTTP server port + * @property {string} baseUrl - Base URL for sitemap links (absolute URL) + */ + +/** + * @typedef {Object} DriveConfig + * @property {string} query - Drive API query filter (q parameter) + * @property {string} fields - Fields to retrieve from Drive API + * @property {number} pageSize - Maximum results per page (Drive API pagination) + * @property {string} scope - OAuth scope for Drive access + */ + +/** + * @typedef {Object} Configuration + * @property {ServerConfig} server - HTTP server configuration + * @property {DriveConfig} drive - Google Drive API configuration + */ +``` + +**Default Values**: +```javascript +const DEFAULT_CONFIG = { + server: { + port: 3000, + baseUrl: 'http://localhost:3000' + }, + drive: { + query: 'trashed = false', + fields: 'files(id, name, mimeType, modifiedTime)', + pageSize: 1000, + scope: 'https://www.googleapis.com/auth/drive.readonly' + } +}; +``` + +**Loading**: +- `config/config.js`: Exports server configuration (port, baseUrl from env vars) +- `config/settings.js`: Exports Drive configuration (query from env var, loaded into global `settings`) + +**Validation**: +- `port`: Must be 1-65535 +- `baseUrl`: Must be valid absolute URL (http:// or https://) +- `query`: Non-empty string (Drive API query syntax) +- `pageSize`: 1-1000 (Drive API limit) + +--- + +### 6. RequestQueue + +Represents the FIFO queue for /sitemap.xml requests. + +**JSDoc Type Definition**: +```javascript +/** + * @typedef {Object} QueuedRequest + * @property {Function} handler - Async function to execute (returns Promise) + * @property {Function} resolve - Promise resolve callback + * @property {Function} reject - Promise reject callback + */ + +/** + * @typedef {Object} RequestQueue + * @property {boolean} processing - Whether a request is currently being processed + * @property {QueuedRequest[]} queue - Array of pending requests (FIFO) + */ +``` + +**State Transitions**: +``` +IDLE (processing: false, queue: []) + ↓ New request arrives +PROCESSING (processing: true, queue: []) + ↓ New request arrives while processing +PROCESSING (processing: true, queue: [req1]) + ↓ Current request completes +PROCESSING (processing: true, queue: []) → Process req1 + ↓ req1 completes, queue empty +IDLE (processing: false, queue: []) +``` + +**Operations**: +- `enqueue(handler)`: Add request to queue, start processing if idle +- `processNext()`: Process next request in FIFO order, recursively call until queue empty + +**Implementation**: See research.md Section 3 for EventEmitter-based code pattern + +--- + +## State Machines + +### Authentication State + +``` +UNINITIALIZED + ↓ Load credentials from env var +VALIDATING + ↓ Parse JSON, validate structure + ├─ Success → AUTHENTICATED + └─ Failure → FATAL_ERROR (exit(1)) + +AUTHENTICATED + ↓ Token expiry detected during request +REFRESHING + ├─ Success → AUTHENTICATED + └─ Failure → UNAUTHORIZED (return 401) +``` + +**Note**: googleapis SDK manages token refresh automatically. Our code only handles: +1. Initial credential loading/validation (startup) +2. Error mapping (401 if refresh fails during request) + +--- + +### Request Processing State + +``` +RECEIVED + ↓ Create RequestContext, log request +QUEUED + ↓ Wait for queue availability (FIFO) +PROCESSING + ↓ Query Drive API + ├─ Success (≤50k docs) → GENERATING_XML + ├─ Error (>50k docs) → PAYLOAD_TOO_LARGE (413) + ├─ Error (Rate limit) → RATE_LIMITED (429 + Retry-After) + ├─ Error (503) → SERVICE_UNAVAILABLE (503, no retry) + └─ Error (Other) → INTERNAL_ERROR (500) + +GENERATING_XML + ↓ Build sitemap XML from documents + ├─ Success → COMPLETED (200 + XML) + └─ Error → INTERNAL_ERROR (500) + +COMPLETED + ↓ Log response, return to client +``` + +--- + +## Data Flow Diagrams + +### Sitemap Generation Flow + +``` +[Client] --GET /sitemap.xml--> [Server] + ↓ + [Create RequestContext] + ↓ + [Enqueue in RequestQueue] + ↓ + [Wait for queue slot (FIFO)] + ↓ + [Query Drive API files.list()] + ↓ + [Paginate through results] + ↓ + [Check count ≤ 50,000] + ↓ + YES ←─────┴─────→ NO + ↓ ↓ + [Transform Documents] [Return 413] + to SitemapEntries + ↓ + [Generate XML string] + ↓ + [Return 200 + XML] +``` + +### Error Handling Flow + +``` +[Error Occurs] + ↓ +[Identify Error Type] + ↓ + ├─ Drive API 429 → Extract rate limit info → Set Retry-After → 429 + ├─ Drive API 503 → No retry → 503 + ├─ Document count > 50k → 413 + ├─ Token refresh failed → 401 + ├─ Invalid endpoint → 404 + └─ Unknown error → Log stack → 500 + ↓ +[Set status code, NO response body] + ↓ +[Log error to stderr with context] + ↓ +[Return response to client] +``` + +--- + +## API Response Formats + +### Successful Sitemap Response (200 OK) + +**Headers**: +``` +Content-Type: application/xml; charset=utf-8 +Content-Length: {size} +``` + +**Body** (XML): +```xml + + + + http://example.com/documents/1A2B3C4D + 2026-03-07 + + + http://example.com/documents/5E6F7G8H + 2026-03-06 + + +``` + +### Error Responses (4xx/5xx) + +**All error responses**: +- **Headers**: No Content-Type (empty body) +- **Body**: Empty (per spec: status code only, no body) +- **Special case**: 429 includes `Retry-After: {seconds}` header + +**Status codes**: +- 404 Not Found: Invalid endpoint +- 413 Payload Too Large: >50,000 documents +- 429 Too Many Requests: Drive API rate limit (includes Retry-After header) +- 401 Unauthorized: Token refresh failed +- 503 Service Unavailable: Drive API unavailable (no retry) +- 500 Internal Server Error: Unexpected error + +--- + +## Validation Rules Summary + +### Input Validation +- Environment variables: + - `GOOGLE_SERVICE_ACCOUNT_KEY`: Required, valid JSON with client_email/private_key + - `PORT`: Optional, 1-65535 + - `BASE_URL`: Optional, valid absolute URL + - `DRIVE_QUERY`: Optional, non-empty string + +### Output Validation +- Sitemap XML: + - Valid XML structure (well-formed) + - Proper namespace declaration + - All URLs properly escaped (XML entities: &, <, >, ", ') + - All URLs absolute (include protocol + domain) + - Document count ≤ 50,000 + +### Runtime Validation +- HTTP requests: + - Only GET method for /sitemap.xml (others return 404) + - Only /sitemap.xml path supported (others return 404) + +--- + +## Edge Cases & Error Handling + +| Scenario | Data Impact | Response | +|----------|-------------|----------| +| Empty Drive (0 documents) | Empty urlset in XML | 200 OK with empty sitemap | +| Exactly 50,000 documents | Valid sitemap | 200 OK | +| 50,001 documents | Abort XML generation | 413 Payload Too Large | +| Drive API pagination (>1000 docs) | Multiple API calls, single result set | 200 OK after all pages collected | +| Document with special chars in ID | URL-encode document ID | Properly encoded loc URL | +| Document with no modifiedTime | SitemapEntry.lastmod undefined | Omit tag from XML | +| Concurrent requests | Queue up to N requests | Process sequentially (FIFO) | +| Request while processing | Add to queue array | Wait for turn, then process | +| Fatal error (invalid creds) | Cannot initialize auth client | Log error, exit(1) | +| Port already in use | Cannot bind server | Log error, exit(1) | + +--- + +## Performance Considerations + +### Memory Usage +- **Document array**: ~100 bytes per document × 50k max = ~5MB peak +- **XML string**: ~200 bytes per entry × 50k max = ~10MB peak +- **Total estimated**: ~20MB for max load (within 256MB constraint) + +### API Call Efficiency +- Use `fields` parameter to request only needed data (reduces payload size) +- Pagination: 1000 documents per page (Drive API limit) +- For 50k documents: ~50 API calls (sequential, within same request processing) + +### Caching Strategy +- **NO CACHING**: Per spec requirement "each sitemap request fetches current list" +- Fresh data on every request (trade-off: latency vs. freshness) + +--- + +## Summary + +This data model provides: +1. **Clear entity definitions** with JSDoc type annotations (per constitution: JavaScript + JSDoc) +2. **Validation rules** for all inputs and outputs +3. **State machines** for authentication and request processing +4. **Data flow diagrams** showing transformation pipelines +5. **Error handling patterns** for all edge cases +6. **Performance constraints** aligned with success criteria (<256MB memory, <5s response time) + +All entities are stateless runtime structures - no persistence layer required. diff --git a/specs/001-drive-proxy-adapter/plan.md b/specs/001-drive-proxy-adapter/plan.md new file mode 100644 index 0000000..eec8231 --- /dev/null +++ b/specs/001-drive-proxy-adapter/plan.md @@ -0,0 +1,156 @@ +# Implementation Plan: Google Drive HTTP Proxy Adapter + +**Branch**: `001-drive-proxy-adapter` | **Date**: 2026-03-07 | **Spec**: [spec.md](./spec.md) +**Input**: Feature specification from `/specs/001-drive-proxy-adapter/spec.md` + +**Note**: This template is filled in by the `/speckit.plan` command. See `.specify/templates/plan-template.md` for the execution workflow. + +## Summary + +Build a Node.js HTTP server that provides a single `/sitemap.xml` endpoint to generate XML sitemaps of Google Drive documents. The system authenticates using a Service Account (JWT-based), queries the Drive API for accessible documents, and returns a sitemap with RESTful URLs (`/documents/{documentId}`). Key features include: FIFO request queuing for concurrent requests, configurable Drive API filters via config/settings.js, 413 error handling for >50k documents, plain text logging to stdout/stderr, and immediate crash (exit code 1) on fatal errors. All clarifications from 3 sessions (10 total Q&A pairs) are now incorporated into design. + +## Technical Context + +**Language/Version**: JavaScript ES2022+ (Node.js LTS v18.0.0+) +**Primary Dependencies**: + +- `googleapis` v140.0.0 (Google Drive API client - justified: official Google SDK, handles OAuth2/JWT complexity, Drive API protocol implementation) +- Node.js built-ins: `http`, `fs`, `path`, `events` (for FIFO queue) + **Storage**: N/A (no persistence - sitemap generated on-demand from Drive API) + **Testing**: Node.js native test runner (`node:test`) with unit, integration, and contract test suites + **Target Platform**: Linux/macOS server environment, containerizable + **Project Type**: Web service (HTTP proxy adapter with monolithic route architecture) + **Performance Goals**: +- `/sitemap.xml` response < 5 seconds for drives with ≤10k documents +- Handle 10 concurrent requests (queued FIFO, processed sequentially) +- Startup time < 10 seconds (cold start to accepting requests) + **Constraints**: +- Memory usage < 256MB under normal load +- No file-based logging (stdout/stderr only) +- No retries on Drive API 503 errors (fail immediately) +- 50,000 document limit (sitemap protocol constraint) +- FIFO queue for /sitemap.xml requests (one at a time to prevent concurrent Drive API operations) + **Scale/Scope**: +- Single endpoint (`/sitemap.xml`) +- Support up to 50k Drive documents (enforced limit) +- 95% success rate for sitemap requests +- Service Account JWT token refresh automatically + +## Constitution Check + +_GATE: Must pass before Phase 0 research. Re-check after Phase 1 design._ + +### ✅ I. Monolithic Architecture + +- **Status**: COMPLIANT +- **Rationale**: All proxy logic in `src/proxy.js`, routed from `src/server.js`. Configuration in `config/settings.js` (Drive API filter), loaded into global `settings`. Logging uses `src/console.js` (aliased as `console.js` with log/info/debug/error functions). +- **Phase 1 Verification**: data-model.md confirms stateless architecture, no persistence layer. All entities are runtime structures (Document, SitemapEntry, HTTPRequestContext, RequestQueue). Monolithic route pattern maintained. + +### ✅ II. API-First Design + +- **Status**: COMPLIANT +- **Rationale**: Single API endpoint `/sitemap.xml` fully specified in spec.md with RESTful URL format, HTTP status codes (200, 404, 413, 429, 503), and XML response format (sitemap protocol). Error handling documented (no response body, status codes only). +- **Phase 1 Verification**: contracts/sitemap-xml-schema.md provides complete API contract with request/response formats, XML schema requirements, validation criteria, and version history. quickstart.md documents API usage with examples. + +### ⚠️ III. Test-First Development (NON-NEGOTIABLE) + +- **Status**: TO BE VERIFIED IN PHASE 2 +- **Action Required**: Tasks.md must include test-first workflow: + 1. Write failing unit tests for Drive API client, JWT auth, sitemap generator + 2. Write failing integration tests for /sitemap.xml endpoint (200, 413, 429, 503 scenarios) + 3. Write failing contract tests for XML sitemap format validation + 4. Obtain user approval of test scenarios before implementation + 5. Implement minimum code to pass tests (80%+ coverage requirement) +- **Phase 1 Note**: Test structure defined in plan.md (tests/unit/, tests/integration/, tests/contract/) and quickstart.md documents test execution commands. + +### ✅ IV. Security & Privacy by Default + +- **Status**: COMPLIANT +- **Rationale**: Service Account credentials loaded from `GOOGLE_SERVICE_ACCOUNT_KEY` env var (inline JSON), never logged. JWT tokens handled by googleapis SDK. No user data stored (stateless sitemap generation). Drive API read-only scope (`https://www.googleapis.com/auth/drive.readonly`). +- **Phase 1 Verification**: data-model.md includes security note on ServiceAccountCredentials entity: "NEVER log private_key field, mask client_email in logs". quickstart.md documents security best practices section. + +### ✅ V. Observability & Debuggability + +- **Status**: COMPLIANT +- **Rationale**: Plain text logging format `[timestamp] [level] message` to stdout/stderr. Request logging includes endpoint + response status. Error logging includes error messages for debugging. Fatal errors logged to stderr before crashing with exit code 1. +- **Phase 1 Verification**: research.md Section 5 details logging implementation with formatMessage function and log event capture list. data-model.md includes HTTPRequestContext entity with requestId for tracing. + +### ✅ VI. Semantic Versioning & Change Management + +- **Status**: COMPLIANT +- **Rationale**: package.json at v1.0.0. Single endpoint API `/sitemap.xml` - breaking changes would require version bump and migration guide. Sitemap XML format follows public sitemap protocol standard. +- **Phase 1 Verification**: contracts/sitemap-xml-schema.md includes "Breaking Changes" section defining what constitutes MAJOR version bump. Version history table tracks changes. quickstart.md versioned at 1.0.0. + +### ✅ VII. Simplicity, Minimal Dependencies & YAGNI + +- **Status**: COMPLIANT WITH JUSTIFICATION +- **Dependencies**: + - ✅ `googleapis@140.0.0` - **JUSTIFIED**: Official Google SDK, handles complex OAuth2/JWT flow, implements Drive API v3 protocol, active maintenance. Alternative (manual JWT + REST calls) would take >2 days and risk protocol errors. + - ✅ Node.js built-ins: `http` (server), `fs` (config loading), `path` (file paths), `events` (FIFO queue via EventEmitter), `crypto` (randomUUID for request tracing) +- **No speculative features**: Only implements /sitemap.xml endpoint (document export removed from scope in Session 2). No caching, no health checks, no admin UI. +- **YAGNI applied**: Rejected retry logic (per spec: fail immediately on 503), rejected file logging (stdout/stderr only), rejected concurrent processing (FIFO queue mandated). +- **Phase 1 Verification**: research.md Section 6 documents Technology Stack Validation - confirms only googleapis as external dependency. data-model.md uses only built-in types (no ORM, no database). quickstart.md confirms minimal dependencies section. + +### Constitution Check Summary (Post-Phase 1) + +- **PASS**: All 7 constitutional principles satisfied after Phase 1 design +- **Action Items**: Phase 2 tasks.md must enforce TDD workflow with test approval gate +- **Design Artifacts Complete**: + - ✅ research.md - All technical unknowns resolved + - ✅ data-model.md - Entities, state machines, validation rules documented + - ✅ contracts/sitemap-xml-schema.md - Complete API contract with examples + - ✅ quickstart.md - Installation, configuration, usage, troubleshooting guide + - ✅ Agent context updated - Copilot instructions.md includes language/database/project type + +## Project Structure + +### Documentation (this feature) + +```text +specs/001-drive-proxy-adapter/ +├── plan.md # This file (/speckit.plan command output) +├── research.md # Phase 0 output (/speckit.plan command) +├── data-model.md # Phase 1 output (/speckit.plan command) +├── quickstart.md # Phase 1 output (/speckit.plan command) +├── contracts/ # Phase 1 output (/speckit.plan command) +│ └── sitemap-xml-schema.md +└── tasks.md # Phase 2 output (/speckit.tasks command - NOT created by /speckit.plan) +``` + +### Source Code (repository root) + +```text +# Single project - Monolithic proxy architecture (per Constitution Principle I) +src/ +├── server.js # HTTP server entry point, routes all requests to proxy.js +├── proxy.js # Monolithic route handler (all sitemap logic inline) +├── console.js # Logging module (console.js alias: log/info/debug/error) +├── auth.js # Service Account JWT authentication (googleapis wrapper) +├── utils.js # Inline utility functions (if needed - prefer inline in proxy.js) +└── xml-utils.js # XML generation utilities (sitemap format) + +config/ +├── config.js # Server configuration (port, base URL) - JSON export +└── settings.js # Drive API query filter configuration - loaded into global `settings` + +tests/ +├── contract/ # XML sitemap format validation tests +│ └── sitemap-schema.test.js +├── integration/ # End-to-end /sitemap.xml endpoint tests +│ ├── sitemap-endpoint.test.js +│ ├── error-scenarios.test.js +│ └── queue-concurrency.test.js +└── unit/ # Unit tests for Drive API client, JWT, sitemap generator + ├── drive-client.test.js + ├── auth.test.js + ├── sitemap-generator.test.js + └── queue.test.js +``` + +**Structure Decision**: Single project with monolithic architecture. All proxy logic consolidated in `src/proxy.js` per Constitution Principle I. The `server.js` routes all requests to `proxy.js`. Configuration split between `config/config.js` (server settings) and `config/settings.js` (Drive API filter - loaded into global `settings` variable). Testing organized by contract/integration/unit layers to support TDD workflow (Constitution Principle III). + +## Complexity Tracking + +> **Fill ONLY if Constitution Check has violations that must be justified** + +**NO VIOLATIONS** - All constitutional principles satisfied. No complexity justification required. diff --git a/specs/001-drive-proxy-adapter/quickstart.md b/specs/001-drive-proxy-adapter/quickstart.md new file mode 100644 index 0000000..00826e0 --- /dev/null +++ b/specs/001-drive-proxy-adapter/quickstart.md @@ -0,0 +1,495 @@ +# Quickstart Guide: Google Drive HTTP Proxy Adapter + +**Feature**: 001-drive-proxy-adapter +**Date**: 2026-03-07 +**Version**: 1.0.0 + +--- + +## Overview + +The Google Drive HTTP Proxy Adapter is a Node.js application that generates XML sitemaps of Google Drive documents. It provides a single HTTP endpoint (`/sitemap.xml`) that queries the Google Drive API and returns a sitemap listing all accessible documents with links in RESTful format. + +**Key Features**: +- Service Account authentication (JWT-based, no user interaction) +- Sitemap protocol compliant (50,000 URL limit enforced) +- FIFO request queuing (sequential processing) +- Configurable Drive API filters +- Plain text logging to stdout/stderr + +--- + +## Prerequisites + +1. **Node.js**: v18.0.0 or later (LTS version recommended) +2. **Google Cloud Project**: With Drive API enabled +3. **Service Account**: JSON key file with Drive API access +4. **Network Access**: Connectivity to googleapis.com + +--- + +## Installation + +### 1. Clone Repository + +```bash +git clone +cd google-drive-content-adapter +``` + +### 2. Install Dependencies + +```bash +npm install +``` + +**Dependencies**: +- `googleapis@^140.0.0` - Official Google API client for Node.js + +--- + +## Configuration + +### 1. Service Account Setup + +**Create Service Account** (Google Cloud Console): +1. Navigate to [IAM & Admin > Service Accounts](https://console.cloud.google.com/iam-admin/serviceaccounts) +2. Click "Create Service Account" +3. Name: `drive-sitemap-adapter` (or your choice) +4. Grant role: None required if accessing service account's own Drive +5. Click "Create Key" → Choose JSON format → Download key file + +**Enable Drive API**: +1. Navigate to [APIs & Services > Library](https://console.cloud.google.com/apis/library) +2. Search for "Google Drive API" +3. Click "Enable" + +**Grant Access** (if accessing user drives): +- Share Drive folders/files with Service Account email (`xxx@project.iam.gserviceaccount.com`) +- OR configure domain-wide delegation (for G Suite organizations) + +--- + +### 2. Environment Variables + +Create `.env` file in project root (or set environment variables): + +```bash +# REQUIRED: Service Account credentials (inline JSON) +GOOGLE_SERVICE_ACCOUNT_KEY='{"type":"service_account","project_id":"your-project","private_key_id":"...","private_key":"-----BEGIN PRIVATE KEY-----\n...\n-----END PRIVATE KEY-----\n","client_email":"xxx@project.iam.gserviceaccount.com","client_id":"...","auth_uri":"https://accounts.google.com/o/oauth2/auth","token_uri":"https://oauth2.googleapis.com/token","auth_provider_x509_cert_url":"https://www.googleapis.com/oauth2/v1/certs","client_x509_cert_url":"..."}' + +# OPTIONAL: Server configuration +PORT=3000 # Default: 3000 +BASE_URL=http://localhost:3000 # Default: http://localhost:3000 + +# OPTIONAL: Drive API query filter +DRIVE_QUERY="trashed = false" # Default: "trashed = false" +``` + +**Important Notes**: +- `GOOGLE_SERVICE_ACCOUNT_KEY` must be a single-line JSON string (escape newlines in private_key) +- `BASE_URL` should match your production domain for sitemap URLs +- `DRIVE_QUERY` supports Drive API query syntax ([docs](https://developers.google.com/drive/api/guides/search-files)) + +--- + +### 3. Configuration Files + +**config/config.js**: Server settings (auto-generated from env vars) +```javascript +export default { + server: { + port: process.env.PORT || 3000, + baseUrl: process.env.BASE_URL || 'http://localhost:3000' + } +}; +``` + +**config/settings.js**: Drive API configuration +```javascript +export default { + drive: { + query: process.env.DRIVE_QUERY || "trashed = false", + fields: 'files(id, name, mimeType, modifiedTime)', + pageSize: 1000, + scope: 'https://www.googleapis.com/auth/drive.readonly' + } +}; +``` + +**To customize Drive API filter**, edit `config/settings.js` or set `DRIVE_QUERY` env var. + +--- + +## Usage + +### Start Server (Development) + +```bash +npm run dev +``` + +**Output**: +``` +[2026-03-07T10:00:00.000Z] [INFO] Server configuration loaded: port=3000, baseUrl=http://localhost:3000 +[2026-03-07T10:00:00.100Z] [INFO] Service Account authenticated: xxx***@project.iam.gserviceaccount.com +[2026-03-07T10:00:00.200Z] [INFO] HTTP server listening on port 3000 +``` + +--- + +### Start Server (Production) + +```bash +npm start +``` + +--- + +### Request Sitemap + +**Using curl**: +```bash +curl http://localhost:3000/sitemap.xml +``` + +**Expected Response** (200 OK): +```xml + + + + http://localhost:3000/documents/1A2B3C4D5E6F7G8H + 2026-03-07 + + + http://localhost:3000/documents/9I0J1K2L3M4N5O6P + 2026-03-05 + + +``` + +--- + +## Testing + +### Run All Tests + +```bash +npm test +``` + +**Test Suites**: +- `tests/unit/` - Unit tests for Drive client, auth, sitemap generator, queue +- `tests/integration/` - End-to-end endpoint tests for /sitemap.xml +- `tests/contract/` - XML sitemap schema validation tests + +--- + +### Run Specific Test Suite + +```bash +npm run test:unit # Unit tests only +npm run test:integration # Integration tests only +npm run test:contract # Contract tests only +``` + +--- + +## API Reference + +### Endpoint: `GET /sitemap.xml` + +**Description**: Generate XML sitemap of all accessible Google Drive documents. + +**Request**: +```http +GET /sitemap.xml HTTP/1.1 +Host: example.com +``` + +**Success Response** (200 OK): +```http +HTTP/1.1 200 OK +Content-Type: application/xml; charset=utf-8 +Content-Length: {size} + + + + + +``` + +**Error Responses**: +- `404 Not Found` - Invalid endpoint (only /sitemap.xml supported) +- `413 Payload Too Large` - More than 50,000 documents in Drive +- `429 Too Many Requests` - Rate limit exceeded (includes `Retry-After` header) +- `401 Unauthorized` - Authentication failed +- `503 Service Unavailable` - Drive API unavailable +- `500 Internal Server Error` - Unexpected error + +**Note**: All error responses have **empty body** (status code only). + +See [contracts/sitemap-xml-schema.md](./contracts/sitemap-xml-schema.md) for full API contract. + +--- + +## Architecture + +### Project Structure + +``` +google-drive-content-adapter/ +├── src/ +│ ├── server.js # HTTP server entry point +│ ├── proxy.js # Monolithic route handler (sitemap logic) +│ ├── logger.js # Logging module (console.js alias) +│ ├── auth.js # Service Account JWT authentication +│ └── xml-utils.js # XML generation utilities +├── config/ +│ ├── config.js # Server configuration (port, baseUrl) +│ └── settings.js # Drive API filter configuration +├── tests/ +│ ├── unit/ # Unit tests +│ ├── integration/ # Integration tests +│ └── contract/ # Contract tests +├── specs/ # Feature specifications and planning docs +│ └── 001-drive-proxy-adapter/ +│ ├── spec.md +│ ├── plan.md +│ ├── research.md +│ ├── data-model.md +│ ├── quickstart.md (this file) +│ └── contracts/ +│ └── sitemap-xml-schema.md +├── package.json +└── README.md +``` + +--- + +### Request Flow + +``` +1. Client → GET /sitemap.xml +2. Server → Create RequestContext (ID, timestamp) +3. Server → Enqueue request (FIFO queue) +4. Queue → Process request (sequential, one at a time) +5. Proxy → Authenticate with Service Account JWT +6. Proxy → Query Drive API files.list() (paginate if >1000 docs) +7. Proxy → Check count ≤ 50,000 +8. Proxy → Transform Documents to SitemapEntries +9. Proxy → Generate XML sitemap +10. Server → Return 200 + XML (or error status) +11. Queue → Process next request +``` + +--- + +## Troubleshooting + +### 1. Fatal Error: Invalid Service Account Credentials + +**Error**: +``` +[2026-03-07T10:00:00.000Z] [ERROR] FATAL: Invalid client_email in Service Account credentials +``` + +**Solution**: +- Check `GOOGLE_SERVICE_ACCOUNT_KEY` env var is valid JSON +- Ensure `client_email` field ends with `.gserviceaccount.com` +- Ensure `private_key` field starts with `-----BEGIN PRIVATE KEY-----` +- Verify no extra escaping/quotes in JSON string + +--- + +### 2. Fatal Error: Port Already in Use + +**Error**: +``` +[2026-03-07T10:00:00.000Z] [ERROR] FATAL: Unable to bind to port 3000 (EADDRINUSE) +``` + +**Solution**: +- Change `PORT` env var to different port (e.g., 8080) +- OR stop other process using port 3000: `lsof -ti:3000 | xargs kill` + +--- + +### 3. 401 Unauthorized Response + +**Cause**: Service Account token refresh failed + +**Solution**: +- Verify Service Account has Drive API access (share folders with service account email) +- Check Drive API is enabled in Google Cloud Console +- Ensure scope is correct: `https://www.googleapis.com/auth/drive.readonly` + +--- + +### 4. 413 Payload Too Large Response + +**Cause**: Google Drive contains more than 50,000 documents + +**Solution**: +- Adjust `DRIVE_QUERY` to filter documents (e.g., by folder, date, file type) +- Example: `DRIVE_QUERY="'folder-id' in parents and trashed = false"` + +--- + +### 5. 429 Too Many Requests Response + +**Cause**: Drive API rate limit exceeded + +**Solution**: +- Wait for time specified in `Retry-After` response header (seconds) +- Reduce request frequency +- Consider Drive API quota limits ([docs](https://developers.google.com/drive/api/guides/limits)) + +--- + +### 6. 503 Service Unavailable Response + +**Cause**: Google Drive API is temporarily unavailable + +**Solution**: +- Wait and retry manually (no automatic retries per spec) +- Check [Google Workspace Status Dashboard](https://www.google.com/appsstatus) + +--- + +## Performance Tips + +### 1. Optimize Drive Query Filter + +**Default** (all files): +```javascript +DRIVE_QUERY="trashed = false" +``` + +**Filter by folder**: +```javascript +DRIVE_QUERY="'folder-id' in parents and trashed = false" +``` + +**Filter by date**: +```javascript +DRIVE_QUERY="modifiedTime > '2026-01-01T00:00:00' and trashed = false" +``` + +**Filter by MIME type**: +```javascript +DRIVE_QUERY="mimeType = 'application/pdf' and trashed = false" +``` + +See [Drive API search query syntax](https://developers.google.com/drive/api/guides/search-files) for more options. + +--- + +### 2. Adjust BASE_URL for Production + +**Development**: +``` +BASE_URL=http://localhost:3000 +``` + +**Production**: +``` +BASE_URL=https://your-domain.com +``` + +This ensures sitemap URLs point to the correct domain. + +--- + +### 3. Monitor Memory Usage + +**Check memory usage** (production): +```bash +node --inspect src/server.js +# Open chrome://inspect in Chrome DevTools +``` + +**Expected**: <256MB under normal load (<10 concurrent requests) + +--- + +## Security Best Practices + +1. **Never commit** Service Account JSON key file to version control +2. **Use environment variables** for all sensitive configuration +3. **Restrict Service Account permissions** to minimum required (readonly scope) +4. **Monitor logs** for unauthorized access attempts +5. **Use HTTPS** in production (configure reverse proxy like nginx) +6. **Filter credentials from logs** (private_key field never logged) + +--- + +## Deployment + +### Docker (Recommended) + +**Dockerfile**: +```dockerfile +FROM node:18-alpine +WORKDIR /app +COPY package*.json ./ +RUN npm ci --only=production +COPY . . +EXPOSE 3000 +CMD ["npm", "start"] +``` + +**Build and run**: +```bash +docker build -t drive-sitemap-adapter . +docker run -p 3000:3000 \ + -e GOOGLE_SERVICE_ACCOUNT_KEY='{"type":"service_account",...}' \ + -e BASE_URL=https://your-domain.com \ + drive-sitemap-adapter +``` + +--- + +### Cloud Platforms + +**Google Cloud Run**: +```bash +gcloud run deploy drive-sitemap-adapter \ + --source . \ + --set-env-vars BASE_URL=https://your-domain.com \ + --set-secrets GOOGLE_SERVICE_ACCOUNT_KEY=service-account-key:latest +``` + +**AWS ECS / Fargate**: Use environment variables in task definition + +**Heroku**: Set environment variables via Heroku CLI or dashboard + +--- + +## Additional Resources + +- **Feature Specification**: [specs/001-drive-proxy-adapter/spec.md](./spec.md) +- **Implementation Plan**: [specs/001-drive-proxy-adapter/plan.md](./plan.md) +- **Research Document**: [specs/001-drive-proxy-adapter/research.md](./research.md) +- **Data Model**: [specs/001-drive-proxy-adapter/data-model.md](./data-model.md) +- **API Contract**: [specs/001-drive-proxy-adapter/contracts/sitemap-xml-schema.md](./contracts/sitemap-xml-schema.md) +- **Google Drive API Docs**: [https://developers.google.com/drive/api/v3/reference](https://developers.google.com/drive/api/v3/reference) +- **Sitemap Protocol**: [https://www.sitemaps.org/protocol.html](https://www.sitemaps.org/protocol.html) + +--- + +## Support + +For issues or questions, refer to: +1. This quickstart guide +2. Feature specification (spec.md) for requirements +3. Research document (research.md) for technical decisions +4. Contract documentation (contracts/) for API details + +--- + +## Version History + +| Version | Date | Changes | +|---------|------|---------| +| 1.0.0 | 2026-03-07 | Initial quickstart guide | diff --git a/specs/001-drive-proxy-adapter/research.md b/specs/001-drive-proxy-adapter/research.md new file mode 100644 index 0000000..6ede1a3 --- /dev/null +++ b/specs/001-drive-proxy-adapter/research.md @@ -0,0 +1,368 @@ +# Research: Google Drive HTTP Proxy Adapter + +**Feature**: 001-drive-proxy-adapter +**Phase**: 0 - Outline & Research +**Date**: 2026-03-07 + +## Overview + +This research document consolidates findings from all clarification sessions (10 Q&A pairs across 3 sessions) and investigates technical decisions for building a Node.js HTTP proxy adapter that generates XML sitemaps from Google Drive documents using Service Account authentication. + +## Research Areas + +### 1. Google Drive API Service Account Authentication + +**Decision**: Use Service Account with JWT-based authentication (server-to-server, no user interaction) + +**Rationale**: +- Service Account provides server-to-server authentication without user login flow +- JWT tokens generated programmatically from JSON key file (client_email + private_key) +- Ideal for proxy/adapter scenarios where application acts on behalf of domain users +- Tokens auto-refresh via googleapis SDK (handles expiry transparently) + +**Implementation Approach**: +- Load JSON key file from environment variable `GOOGLE_SERVICE_ACCOUNT_KEY` (inline JSON string) +- Use `googleapis` npm package `google.auth.GoogleAuth` class with JWT configuration +- Set scope to `https://www.googleapis.com/auth/drive.readonly` (read-only access) +- SDK automatically manages token lifecycle (generation, refresh, caching) + +**Alternatives Considered**: +- ❌ OAuth 2.0 user flow - Requires interactive browser login, unsuitable for proxy adapter +- ❌ API key authentication - Not supported for Drive API (OAuth required) +- ❌ Manual JWT implementation - Complex signing/token exchange, googleapis SDK already provides this + +**References**: +- [Google Service Account Documentation](https://cloud.google.com/iam/docs/service-accounts) +- [googleapis Node.js Client](https://github.com/googleapis/google-api-nodejs-client) + +--- + +### 2. XML Sitemap Generation (Sitemap Protocol) + +**Decision**: Generate XML sitemap conforming to sitemaps.org protocol, enforce 50,000 URL limit + +**Rationale**: +- Sitemap protocol specifies max 50,000 URLs per sitemap file +- Each URL entry requires `` (required), optional `` (from Drive modifiedTime) +- Must use proper XML namespace: `xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"` +- URLs must be absolute (include base URL prefix) + +**Implementation Approach**: +- Query Drive API: `drive.files.list()` with fields `files(id, name, mimeType, modifiedTime)` +- Count results - if >50,000, return HTTP 413 Payload Too Large immediately +- Build XML using template literals (Node.js native approach) or minimal XML library +- Format URLs as RESTful paths: `{baseUrl}/documents/{documentId}` +- Include `` using ISO 8601 format from Drive API `modifiedTime` field + +**Alternatives Considered**: +- ❌ Sitemap index with multiple sitemaps - Over-engineering for initial requirement (YAGNI) +- ❌ Paginated sitemaps - Not requested in spec, adds complexity +- ✅ Node.js built-in XML generation (template literals) - Simple for flat structure +- ⚠️ `xmlbuilder2` npm package - Consider if XML escaping becomes complex (acceptable dependency per constitution if justified) + +**References**: +- [Sitemaps.org Protocol](https://www.sitemaps.org/protocol.html) +- [Google Sitemap Guidelines](https://developers.google.com/search/docs/crawling-indexing/sitemaps/build-sitemap) + +--- + +### 3. Concurrency Control - FIFO Request Queue + +**Decision**: Implement FIFO queue for `/sitemap.xml` requests, process one at a time + +**Rationale** (from Session 3 clarification): +- Prevents concurrent Drive API queries that could cause rate limiting issues +- Ensures predictable resource usage (single Drive API operation at a time) +- Simple queue semantics: first request in, first request served +- If request fails, continue to next in queue (no retry per spec) + +**Implementation Approach**: +- Use Node.js EventEmitter pattern for queue implementation (built-in module) +- Maintain array of pending request handlers (FIFO array: push to end, shift from start) +- Check queue state before processing: + - If queue empty: start processing immediately + - If queue busy: add request to pending array +- Emit 'complete' event to trigger next request processing + +**Code Pattern**: +```javascript +import { EventEmitter } from 'events'; + +class SitemapQueue extends EventEmitter { + constructor() { + super(); + this.processing = false; + this.queue = []; + } + + async process(handler) { + return new Promise((resolve, reject) => { + this.queue.push({ handler, resolve, reject }); + if (!this.processing) this.processNext(); + }); + } + + async processNext() { + if (this.queue.length === 0) { + this.processing = false; + return; + } + this.processing = true; + const { handler, resolve, reject } = this.queue.shift(); + try { + const result = await handler(); + resolve(result); + } catch (error) { + reject(error); + } finally { + this.processNext(); // Process next in queue + } + } +} +``` + +**Alternatives Considered**: +- ❌ Concurrent processing with rate limiting - More complex, not required per clarification +- ❌ External queue (Redis, RabbitMQ) - Over-engineering for single-server deployment +- ❌ Worker pool - Unnecessary complexity for sequential processing requirement + +--- + +### 4. Error Handling Strategy + +**Decision**: Status-code-only errors (no response body), crash on fatal errors, immediate 503 passthrough + +**Rationale** (consolidated from all 3 sessions): +- **Clarification**: HTTP status code only, no error response body (Session 1) +- **Clarification**: Return 429 with `Retry-After` header for rate limiting (Session 1) +- **Clarification**: No retries on Drive API 503, immediately return 503 to client (Session 2) +- **Clarification**: Crash with exit code 1 on fatal errors (invalid credentials, port binding failure) (Session 3) +- **Clarification**: Return 413 for >50k documents (Session 3) + +**Error Scenarios**: +| Scenario | HTTP Status | Response Body | Retry-After Header | Action | +|----------|-------------|---------------|-------------------|--------| +| Successful sitemap | 200 OK | XML sitemap | N/A | Return sitemap | +| Invalid endpoint | 404 Not Found | Empty | N/A | Status only | +| >50k documents | 413 Payload Too Large | Empty | N/A | Status only | +| Drive API rate limit | 429 Too Many Requests | Empty | Seconds until retry | Status + header | +| OAuth token expired | 401 Unauthorized | Empty | N/A | Token refresh failed | +| Drive API unavailable (503) | 503 Service Unavailable | Empty | N/A | No retry, immediate passthrough | +| Internal error | 500 Internal Server Error | Empty | N/A | Log error, return status | +| Fatal startup error | N/A | N/A | N/A | Log to stderr, exit(1) | + +**Implementation Approach**: +- Use try-catch blocks in request handler +- Map googleapis SDK errors to HTTP status codes +- Set `Retry-After` header by extracting from Drive API error response +- Detect fatal errors during startup (invalid credentials, port EADDRINUSE) +- Use `logger.error()` for stderr logging before `process.exit(1)` + +--- + +### 5. Logging Format and Destination + +**Decision**: Plain text logging to stdout/stderr with format `[timestamp] [level] message` + +**Rationale** (from Session 3 clarification): +- Simple, human-readable format for container/cloud environments +- stdout for informational logs (info, debug) +- stderr for errors (error level) +- No file-based logging (per constitution: "stdout/stderr only") +- Timestamp helps with debugging time-sequence issues + +**Implementation Approach** (already exists in codebase): +```javascript +// src/logger.js (aliased as console.js per constitution) +const formatMessage = (level, message) => { + const timestamp = new Date().toISOString(); + return `[${timestamp}] [${level.toUpperCase()}] ${message}`; +}; + +export const logger = { + log: (msg) => console.log(formatMessage('info', msg)), + info: (msg) => console.log(formatMessage('info', msg)), + debug: (msg) => console.log(formatMessage('debug', msg)), + error: (msg) => console.error(formatMessage('error', msg)) +}; +``` + +**Log Events to Capture**: +- Server startup: port, base URL configuration +- Incoming request: method, endpoint, client IP +- Request completion: status code, response time +- Drive API interaction: query start, document count, completion time +- Errors: error type, message, stack trace (if available) +- Fatal errors: critical error message before crash + +**Alternatives Considered**: +- ❌ JSON structured logging - Over-engineering for initial requirement, plain text is simpler +- ❌ File-based logging - Explicitly rejected in constitution and clarifications +- ❌ External logging service (Sentry, LogDNA) - Not required, adds dependency + +--- + +### 6. Configuration Management + +**Decision**: Split configuration between server settings (config/config.js) and Drive API filter (config/settings.js), load credentials from environment variable + +**Rationale** (from Sessions 2 & 3 clarifications): +- **Clarification**: Service Account credentials in env var `GOOGLE_SERVICE_ACCOUNT_KEY` (Session 2) +- **Clarification**: Drive API filter configurable in `config/settings.js` (Session 3) +- Server configuration (port, base URL) in `config/config.js` (per constitution) +- settings.js loaded into global `settings` variable (per constitution) + +**Configuration Schema**: + +`config/config.js`: +```javascript +export default { + server: { + port: process.env.PORT || 3000, + baseUrl: process.env.BASE_URL || 'http://localhost:3000' + } +}; +``` + +`config/settings.js`: +```javascript +export default { + drive: { + // Drive API query filter (q parameter) + // Default: all files excluding trashed + query: process.env.DRIVE_QUERY || "trashed = false", + // Fields to retrieve + fields: 'files(id, name, mimeType, modifiedTime)', + // Maximum results per page + pageSize: 1000 + } +}; +``` + +**Environment Variables**: +- `GOOGLE_SERVICE_ACCOUNT_KEY` (required): JSON key file content (inline string) +- `PORT` (optional): Server port (default: 3000) +- `BASE_URL` (optional): Base URL for sitemap URLs (default: http://localhost:3000) +- `DRIVE_QUERY` (optional): Drive API query filter (default: "trashed = false") + +**Startup Validation**: +- Check `GOOGLE_SERVICE_ACCOUNT_KEY` is present and valid JSON +- Validate JSON contains required fields: `client_email`, `private_key` +- If validation fails: log critical error to stderr, exit(1) +- Check port is available (catch EADDRINUSE error), exit(1) if unavailable + +**Alternatives Considered**: +- ❌ Credentials file on disk - Environment variable approach is more secure and container-friendly +- ❌ Hardcoded Drive query - Explicitly rejected in Session 3 clarification +- ❌ Database configuration storage - Over-engineering for simple key-value config + +--- + +## Technology Stack Validation + +### Core Dependencies + +| Package | Version | Justification | Constitution Compliance | +|---------|---------|---------------|------------------------| +| `googleapis` | ^140.0.0 | Official Google SDK, handles OAuth2/JWT complexity, implements Drive API v3 protocol. Alternative (manual implementation) would take >2 days and risk protocol errors. | ✅ APPROVED (documented in plan.md) | + +### Node.js Built-ins Used +- `http` - HTTP server +- `fs` - Configuration file loading +- `path` - File path utilities +- `events` - FIFO queue implementation (EventEmitter) +- `url` - URL parsing for request routing + +**No additional external dependencies required** - All other functionality (XML generation, logging, queue) implemented using Node.js built-ins. + +--- + +## Best Practices Research + +### 1. Service Account Security +- **Never log credentials**: Filter private_key from logs +- **Validate JSON structure**: Check required fields before use +- **Scope restriction**: Use minimal scope (readonly) +- **Token lifecycle**: Let googleapis SDK manage refresh automatically + +### 2. HTTP Server Best Practices +- **Graceful shutdown**: Handle SIGTERM/SIGINT for cleanup +- **Request timeout**: Set reasonable timeout (30-60 seconds for Drive API calls) +- **Error boundaries**: Catch all errors to prevent crashes (except fatal startup errors) +- **Content-Type headers**: Always set appropriate headers (application/xml for sitemap) + +### 3. Google Drive API Best Practices +- **Pagination**: Use pageToken for >1000 results (Drive API default page size) +- **Field filtering**: Request only needed fields to reduce payload size +- **Rate limiting**: Handle 429 errors gracefully (already in spec) +- **Exponential backoff**: NOT required per spec (no retries on 503) + +### 4. Sitemap Generation Best Practices +- **XML escaping**: Escape special characters in URLs (&, <, >, ", ') +- **Absolute URLs**: Always use full URLs with protocol and domain +- **Date format**: Use ISO 8601 format for lastmod (YYYY-MM-DD or YYYY-MM-DDTHH:MM:SS+00:00) +- **URL encoding**: Encode document IDs if they contain special characters + +--- + +## Integration Patterns + +### Request Flow +``` +Client Request → HTTP Server → FIFO Queue → Drive API Query → XML Generation → Response + ↓ + (Sequential Processing) +``` + +### Authentication Flow +``` +Startup → Load GOOGLE_SERVICE_ACCOUNT_KEY → Parse JSON → Create GoogleAuth Client + ↓ +Request → Check Token Expiry → Auto-Refresh (if needed) → Use Token for Drive API +``` + +### Error Flow +``` +Error Occurs → Map to HTTP Status → Set Headers (Retry-After if 429) → Return Status Code (no body) + ↓ + Log Error (stderr) → Include context (request ID, error message) +``` + +--- + +## Open Questions & Assumptions + +### Resolved via Clarifications (All 3 Sessions) +✅ Authentication method → Service Account with JWT +✅ URL format → `/documents/{documentId}` (RESTful) +✅ Error response format → Status code only, no body +✅ Rate limiting behavior → 429 with Retry-After header +✅ Drive API 503 handling → No retries, immediate passthrough +✅ Credentials storage → Inline JSON in env var +✅ Logging destination → stdout/stderr only +✅ >50k documents handling → 413 error +✅ Fatal error handling → Crash with exit code 1 +✅ Concurrent requests → FIFO queue, sequential processing +✅ Log format → Plain text `[timestamp] [level] message` +✅ Drive query filter → Configurable in config/settings.js + +### Assumptions (from spec.md) +- Service Account has domain-wide delegation if accessing user drives +- Base URL configured correctly for production environment +- Node.js v18+ LTS available on deployment platform +- Network connectivity to googleapis.com available + +--- + +## Summary + +All technical unknowns from the specification have been resolved through 3 clarification sessions (10 Q&A pairs total). Key research findings: + +1. **Authentication**: googleapis SDK with Service Account JWT (load from env var) +2. **Sitemap Protocol**: Enforce 50k limit, use standard XML namespace, include lastmod +3. **Concurrency**: FIFO queue using Node.js EventEmitter (sequential processing) +4. **Error Handling**: Status-only responses, crash on fatal errors, no retries on 503 +5. **Logging**: Plain text format to stdout/stderr (no files) +6. **Configuration**: Split between config.js (server) and settings.js (Drive query filter) + +**No remaining NEEDS CLARIFICATION items** - Ready to proceed to Phase 1 design. diff --git a/specs/001-drive-proxy-adapter/spec.md b/specs/001-drive-proxy-adapter/spec.md new file mode 100644 index 0000000..5e2808c --- /dev/null +++ b/specs/001-drive-proxy-adapter/spec.md @@ -0,0 +1,158 @@ +# Feature Specification: Google Drive HTTP Proxy Adapter + +**Feature Branch**: `001-drive-proxy-adapter` +**Created**: 2026-03-06 +**Updated**: 2026-03-07 +**Status**: Draft +**Input**: User description: "I want to build a node.js application that provides an http proxy adapter to search and export documents from Google Drive. HTTP requests to 'sitemap.xml' should use a query to list documents in Google Drive. The links returned in the 'sitemap.xml' should link back to this adapter with a document id." + +**Scope Change (2026-03-07)**: Simplified to only handle sitemap.xml generation. Document export functionality removed from scope. + +## Clarifications + +### Session 2026-03-06 + +- Q: Architecture approach - format conversion vs metadata-only vs hybrid? → A: Use metadata exportLinks to fetch and stream files through adapter (hybrid: metadata discovery + content streaming) +- Q: How to handle Markdown format (not in Drive API exportLinks)? → A: Check exportLinks for text/x-markdown; if unavailable, convert from HTML export +- Q: What error response format (JSON/text/status-only)? → A: HTTP status code only, no error response body +- Q: Rate limiting behavior when Drive API limits hit? → A: Return 429 with Retry-After header indicating seconds until retry +- Q: Maximum document size limit for streaming? → A: Stream up to 20MB maximum; return 413 Payload Too Large for larger documents + +### Session 2026-03-07 + +- **SCOPE CHANGE**: Removed all document export functionality. System now only generates sitemap.xml with document IDs. The links in the sitemap point back to the adapter with document IDs, but the adapter does not implement the document retrieval endpoints. +- Q: Authentication method for Google Drive API? → A: Service Account with JSON key file (JWT-based, server-to-server authentication) +- Q: Sitemap URL format for document links? → A: /documents/{documentId} (RESTful, clear resource path) +- Q: Retry behavior when Drive API returns 503? → A: No retries, immediately return 503 to client +- Q: Service account credentials storage method? → A: Inline JSON in env var (GOOGLE_SERVICE_ACCOUNT_KEY) +- Q: Logging output destination? → A: stdout/stderr only (console logging, no files) + +### Session 3 (2026-03-07) + +- Q: How should the system handle cases where >50,000 documents exist in Google Drive (exceeding sitemap protocol limit)? → A: Return 413 error if >50k documents exist +- Q: How should the system handle fatal errors (e.g., invalid service account credentials, unable to bind to port)? → A: Log critical error + crash with exit code 1 +- Q: How should the system handle concurrent requests to /sitemap.xml? → A: Queue requests, process one at a time (FIFO) +- Q: What format should be used for log messages? → A: Plain text logging format [timestamp] [level] message +- Q: Should the Drive API query filter be hardcoded or configurable? → A: Drive API filter should be configurable in config/settings.js file (not hardcoded) + +## User Scenarios & Testing _(mandatory)_ + +### User Story 1 - Generate Sitemap of Available Documents (Priority: P1) + +A user makes an HTTP GET request to `/sitemap.xml` and receives a valid XML sitemap listing all accessible Google Drive documents with links back to the adapter (document IDs only, no export functionality). + +**Why this priority**: This is the core and only functionality. Enables document discovery and generates a sitemap with links containing document IDs. This makes the adapter useful for indexing scenarios (e.g., search engines, content aggregators). + +**Independent Test**: Can be tested by making GET request to `/sitemap.xml` and verifying: (1) valid XML sitemap format, (2) contains URLs pointing to adapter endpoints with document IDs, (3) reflects documents accessible in user's Google Drive. + +**Acceptance Scenarios**: + +1. **Given** user has access to Google Drive documents, **When** user requests `/sitemap.xml`, **Then** system returns 200 status with valid XML sitemap +2. **Given** sitemap is generated, **When** examining the XML, **Then** each `` entry contains a `` pointing to the adapter using RESTful format (e.g., `http://adapter-host/documents/{documentId}`) +3. **Given** multiple documents in Google Drive, **When** sitemap is generated, **Then** all accessible documents are included in the sitemap +4. **Given** user lacks permission to certain documents, **When** sitemap is generated, **Then** those documents are excluded from the sitemap +5. **Given** the adapter base URL is configured, **When** sitemap is generated, **Then** all URLs use the configured base URL + +--- + +### Edge Cases + +- What happens when Google Drive API is unavailable or rate-limited? → Return 503 Service Unavailable immediately without retries if API returns 503; return 429 Too Many Requests with Retry-After header if rate limited +- What happens when OAuth token expires during request? → Attempt token refresh; if failed, return 401 Unauthorized +- How are shared drive documents handled? → Treat same as My Drive documents if user has access +- What happens with password-protected or restricted documents? → Exclude from sitemap (filter out documents without read access) +- How are document updates reflected in sitemap? → Each sitemap request fetches current list; no caching +- What if there are more than 50,000 documents (sitemap limit)? → Return 413 Payload Too Large error (enforces sitemap protocol limit) +- How are non-document files handled (images, videos, etc.)? → Include all files in sitemap regardless of type +- What happens if no documents are accessible? → Return valid sitemap XML with no URL entries +- What happens when multiple /sitemap.xml requests arrive simultaneously? → Requests are queued and processed sequentially in FIFO order (one at a time) +- What happens when service account credentials are invalid or missing at startup? → Log critical error to stderr and crash with exit code 1 +- How are Drive API query filters customized? → Configure filters in config/settings.js file (not hardcoded) +- What happens if config/settings.js is missing or malformed? → Log critical error to stderr and crash with exit code 1 + +## Requirements _(mandatory)_ + +### Functional Requirements + +- **FR-001**: System MUST provide an HTTP server that listens for incoming requests +- **FR-002**: System MUST authenticate with Google Drive API using Service Account with JSON key file (JWT-based, server-to-server authentication) +- **FR-003**: System MUST handle GET requests to `/sitemap.xml` endpoint +- **FR-004**: System MUST query Google Drive API to retrieve list of accessible documents for sitemap generation +- **FR-005**: System MUST generate valid XML sitemap conforming to sitemap protocol (https://www.sitemaps.org/protocol.html) +- **FR-006**: System MUST include document metadata in sitemap (URL with RESTful path format `/documents/{documentId}`, last modified date if available) +- **FR-007**: System MUST return HTTP 404 Not Found for any endpoint other than `/sitemap.xml` +- **FR-008**: System MUST return appropriate HTTP status codes (200 OK, 401 Unauthorized, 413 Payload Too Large, 429 Too Many Requests, 500 Internal Server Error, 503 Service Unavailable) +- **FR-009**: System MUST include Content-Type: application/xml header for sitemap responses +- **FR-010**: System MUST handle OAuth token refresh when tokens expire +- **FR-011**: System MUST log all incoming requests to stdout/stderr using plain text format: [timestamp] [level] message (includes endpoint and response status) +- **FR-012**: System MUST log errors to stdout/stderr using plain text format: [timestamp] [level] message (includes request ID and error message for debugging) +- **FR-013**: System MUST handle Google Drive API rate limiting gracefully by returning 429 status with Retry-After header indicating seconds until retry +- **FR-017**: System MUST NOT retry when Google Drive API returns 503; instead immediately return 503 to client +- **FR-014**: System MUST support configuration via environment variables (port, base URL) +- **FR-018**: System MUST load Service Account credentials from environment variable GOOGLE_SERVICE_ACCOUNT_KEY containing inline JSON key file content +- **FR-015**: System MUST return 413 Payload Too Large if Google Drive contains more than 50,000 documents (enforces sitemap protocol limit) +- **FR-016**: System MUST filter out documents user lacks read access to from sitemap +- **FR-019**: System MUST process /sitemap.xml requests sequentially using a FIFO queue (one request at a time to prevent concurrent Drive API operations) +- **FR-020**: System MUST crash with exit code 1 after logging critical errors (e.g., invalid service account credentials, unable to bind to port, missing required configuration) +- **FR-021**: System MUST load Drive API query filter configuration from config/settings.js file (not hardcoded in source) + +### Key Entities + +- **Document**: Represents a file in Google Drive. Key attributes include: document ID (unique identifier), title, MIME type, last modified timestamp, permissions status +- **Sitemap Entry**: Represents a document listing in the sitemap XML. Attributes include: location URL (RESTful path `/documents/{documentId}`), last modified date +- **HTTP Request Context**: Represents an incoming request. Attributes include: request ID (for tracing), Service Account JWT token, requested endpoint, client IP +- **Service Account Credentials**: Represents JWT-based authentication state. Attributes include: client email, private key (from JSON key file), access token (generated via JWT), token expiry time, scopes granted +- **Configuration**: Represents application settings. Attributes include: Drive API query filter (loaded from config/settings.js), server port, base URL, request queue (FIFO for /sitemap.xml requests) + +## Success Criteria _(mandatory)_ + +### Measurable Outcomes + +- **SC-001**: Users can request `/sitemap.xml` and receive a valid XML sitemap within 5 seconds for drives containing up to 10,000 documents +- **SC-002**: System successfully handles at least 10 concurrent sitemap requests without errors (queued and processed sequentially in FIFO order) +- **SC-003**: 95% of sitemap requests complete successfully (200 status code) +- **SC-004**: System responds to invalid endpoint requests (404) within 1 second +- **SC-005**: System gracefully handles Google Drive API rate limits without crashing, returning 429 status codes with Retry-After headers +- **SC-006**: Service Account JWT token generation succeeds automatically in >99% of expiration scenarios +- **SC-007**: System startup time from cold start to accepting requests is under 10 seconds +- **SC-008**: System memory usage remains under 256MB under normal load (< 10 concurrent requests) +- **SC-011**: All logs output to stdout/stderr only using plain text format [timestamp] [level] message; no log files created on disk +- **SC-009**: Sitemap includes all accessible documents (100% coverage for documents with read permission) +- **SC-010**: Generated sitemap XML validates against sitemap protocol schema +- **SC-012**: System returns 413 Payload Too Large when Drive contains >50,000 documents (prevents oversized sitemap generation) +- **SC-013**: System terminates with exit code 1 within 5 seconds of encountering fatal configuration or startup errors + +## Assumptions _(optional)_ + +- Service Account has valid JSON key file credentials configured for Google Drive access +- The adapter runs as a trusted application with appropriate scopes for Google Drive access (read-only, https://www.googleapis.com/auth/drive.readonly) +- Service Account JSON key file is provided via GOOGLE_SERVICE_ACCOUNT_KEY environment variable as inline JSON string +- Network connectivity to Google Drive API (https://www.googleapis.com/drive/v3/) is available +- Document IDs in sitemap URLs are Google Drive file IDs, not custom identifiers +- Sitemap URLs use RESTful path format: `/documents/{documentId}` +- Sitemap generation queries "My Drive" and shared drives where service account has access +- Default port is 3000 unless configured otherwise +- System runs on Node.js LTS version (v18 or later) +- Environment supports async/await and ES modules +- Base URL for sitemap links is configured via environment variable +- Drive API query filter is configured in config/settings.js file (allows customization without code changes) +- System processes sitemap requests sequentially to avoid concurrent Drive API query conflicts +- Fatal errors (invalid credentials, port binding failure, missing configuration) cause immediate termination with exit code 1 + +## Out of Scope _(optional)_ + +- Document export functionality (Markdown, HTML, PDF) - removed from original scope +- Document editing or creation capabilities +- Document content retrieval or streaming +- User authentication/authorization beyond Google Service Account (JWT-based) +- Document caching or local storage (always fetch fresh list from Google Drive) +- Automatic retry logic for Drive API 503 errors (fail immediately instead) +- File-based logging (logs output to console only) +- Custom domain mapping or URL shortening +- Analytics or usage tracking +- Document versioning or revision history access +- Folder hierarchy preservation in sitemap (flat list of documents) +- Batch operations +- WebSocket or Server-Sent Events for real-time updates +- Admin interface or dashboard +- Health check endpoint (only /sitemap.xml is supported) diff --git a/specs/001-drive-proxy-adapter/tasks.md b/specs/001-drive-proxy-adapter/tasks.md new file mode 100644 index 0000000..cee6ed8 --- /dev/null +++ b/specs/001-drive-proxy-adapter/tasks.md @@ -0,0 +1,385 @@ +# Tasks: Google Drive HTTP Proxy Adapter + +**Input**: Design documents from `/specs/001-drive-proxy-adapter/` +**Prerequisites**: plan.md, spec.md, research.md, data-model.md, contracts/, quickstart.md + +**Feature**: Generate XML sitemaps from Google Drive documents via HTTP endpoint +**Key Clarifications Incorporated** (10 total): + +1. Service Account JWT auth with inline JSON env var +2. RESTful URL format `/documents/{documentId}` +3. No retries on 503 errors +4. stdout/stderr logging only +5. 413 error for >50k documents +6. Crash with exit code 1 for fatal errors +7. FIFO queue for concurrent requests +8. Plain text logging format `[timestamp] [level] message` +9. Configurable Drive API filter in config/settings.js +10. Status code only errors (no response body) + +**Tests**: ✅ Test-First Development enforced per Constitution Principle III + +**Organization**: Tasks are grouped by user story (only US1 exists for this feature - single endpoint system) + +--- + +## Format: `- [ ] [ID] [P?] [Story?] Description` + +- **[P]**: Can run in parallel (different files, no dependencies) +- **[Story]**: User story label (US1, US2, etc.) - only for user story phases +- Include exact file paths in descriptions + +--- + +## Phase 1: Setup (Shared Infrastructure) + +**Purpose**: Project initialization and basic structure + +- [ ] T001 Initialize Node.js project with package.json at repository root +- [ ] T002 Install googleapis dependency v140.0.0 in package.json +- [ ] T003 [P] Create src/ directory for application source code +- [ ] T004 [P] Create config/ directory for configuration files +- [ ] T005 [P] Create tests/unit/ directory for unit tests +- [ ] T006 [P] Create tests/integration/ directory for integration tests +- [ ] T007 [P] Create tests/contract/ directory for contract tests +- [ ] T008 Configure Node.js native test runner in package.json with test scripts +- [ ] T009 [P] Setup ESLint configuration in .eslintrc.json for ES2022+ JavaScript +- [ ] T010 [P] Create .env.example file documenting required environment variables + +--- + +## Phase 2: Foundational (Blocking Prerequisites) + +**Purpose**: Core infrastructure that MUST be complete before user story implementation + +**⚠️ CRITICAL**: User Story 1 cannot begin until this phase is complete + +- [ ] T011 Create console.js module in src/ with formatMessage function and log/info/debug/error methods (plain text format: `[timestamp] [level] message`) +- [ ] T012 Create config/config.js exporting server configuration (port, baseUrl from env vars) +- [ ] T013 Create config/settings.js exporting Drive API configuration (query filter from env var DRIVE_QUERY or default "trashed = false", fields, pageSize, scope) +- [ ] T014 Create auth.js module in src/ for Service Account JWT authentication using googleapis GoogleAuth class +- [ ] T015 Add credential validation function in src/auth.js to check client_email, private_key, project_id structure +- [ ] T016 Implement fatal error handler in src/auth.js that logs to stderr and exits with code 1 if credentials invalid +- [ ] T017 Create xml-utils.js module in src/ with XML escaping utilities for special characters (&, <, >, ", ') +- [ ] T018 Implement FIFO request queue class in src/queue.js using Node.js EventEmitter with processing flag and queue array +- [ ] T019 Create server.js entry point in src/ that sets up HTTP server with http module + +**Checkpoint**: Foundation ready - User Story 1 implementation can now begin + +--- + +## Phase 3: User Story 1 - Generate Sitemap of Available Documents (Priority: P1) 🎯 MVP + +**Goal**: Users can request `/sitemap.xml` and receive a valid XML sitemap listing all accessible Google Drive documents with RESTful links containing document IDs + +**Independent Test**: Make GET request to `/sitemap.xml` and verify: (1) 200 status with valid XML sitemap format, (2) URLs use RESTful format `/documents/{documentId}`, (3) reflects documents in Google Drive, (4) handles >50k documents with 413, (5) queues concurrent requests in FIFO order + +**Why this is the complete feature**: This feature has only one user story. The system provides a single endpoint for sitemap generation. + +--- + +### Tests for User Story 1 (Test-First Development) ⚠️ + +> **CONSTITUTION REQUIREMENT**: Write these tests FIRST, ensure they FAIL, obtain user approval before implementation + +#### Contract Tests + +- [ ] T020 [P] [US1] Contract test for /sitemap.xml success response (200 OK) in tests/contract/sitemap-schema.test.js - verify XML structure, namespace, Content-Type header +- [ ] T021 [P] [US1] Contract test for /sitemap.xml with empty Drive (0 documents) in tests/contract/sitemap-schema.test.js - verify empty urlset is valid +- [ ] T022 [P] [US1] Contract test for XML special character escaping in tests/contract/sitemap-schema.test.js - verify &, <, >, ", ' are properly escaped in URLs +- [ ] T023 [P] [US1] Contract test for lastmod date format validation in tests/contract/sitemap-schema.test.js - verify ISO 8601 format YYYY-MM-DD + +#### Integration Tests + +- [ ] T024 [P] [US1] Integration test for /sitemap.xml endpoint success scenario in tests/integration/sitemap-endpoint.test.js - mock Drive API, verify 200 response with valid XML +- [ ] T025 [P] [US1] Integration test for /sitemap.xml with >50k documents in tests/integration/error-scenarios.test.js - verify 413 response with no body +- [ ] T026 [P] [US1] Integration test for /sitemap.xml with Drive API rate limiting in tests/integration/error-scenarios.test.js - verify 429 response with Retry-After header and no body +- [ ] T027 [P] [US1] Integration test for /sitemap.xml with Drive API 503 error in tests/integration/error-scenarios.test.js - verify 503 passthrough with no retry and no body +- [ ] T028 [P] [US1] Integration test for invalid endpoint requests in tests/integration/error-scenarios.test.js - verify 404 response with no body for non-/sitemap.xml paths +- [ ] T029 [P] [US1] Integration test for concurrent requests to /sitemap.xml in tests/integration/queue-concurrency.test.js - verify FIFO processing (one at a time) +- [ ] T030 [P] [US1] Integration test for Service Account token refresh in tests/integration/sitemap-endpoint.test.js - mock token expiry, verify 401 if refresh fails + +#### Unit Tests + +- [ ] T031 [P] [US1] Unit test for Drive API client query execution in tests/unit/drive-client.test.js - mock googleapis drive.files.list() call +- [ ] T032 [P] [US1] Unit test for Drive API pagination handling in tests/unit/drive-client.test.js - verify pageToken logic for >1000 documents +- [ ] T033 [P] [US1] Unit test for Service Account JWT authentication in tests/unit/auth.test.js - verify GoogleAuth client creation from env var JSON +- [ ] T034 [P] [US1] Unit test for credential validation in tests/unit/auth.test.js - verify detection of invalid client_email, private_key, project_id +- [ ] T035 [P] [US1] Unit test for sitemap XML generation in tests/unit/sitemap-generator.test.js - verify XML structure and URL format /documents/{documentId} +- [ ] T036 [P] [US1] Unit test for Document to SitemapEntry transformation in tests/unit/sitemap-generator.test.js - verify baseUrl + /documents/ + documentId concatenation +- [ ] T037 [P] [US1] Unit test for lastmod date formatting in tests/unit/sitemap-generator.test.js - verify ISO 8601 YYYY-MM-DD format from modifiedTime +- [ ] T038 [P] [US1] Unit test for FIFO queue enqueue/dequeue in tests/unit/queue.test.js - verify sequential processing order +- [ ] T039 [P] [US1] Unit test for FIFO queue concurrent request handling in tests/unit/queue.test.js - verify processing flag prevents simultaneous execution +- [ ] T040 [P] [US1] Unit test for XML special character escaping in tests/unit/sitemap-generator.test.js - verify escapeXml function handles &, <, >, ", ' + +**TEST APPROVAL CHECKPOINT**: Present test scenarios to user for approval before proceeding to implementation + +--- + +### Implementation for User Story 1 + +#### Drive API Integration + +- [X] T041 [P] [US1] Create drive-client.js module in src/ with function to initialize googleapis drive client using auth from src/auth.js +- [X] T042 [US1] Implement queryDocuments function in src/drive-client.js to call drive.files.list() with query from config/settings.js and fields: files(id, name, mimeType, modifiedTime) +- [X] T043 [US1] Implement pagination logic in src/drive-client.js to handle pageToken and collect all results up to 50,000 limit +- [X] T044 [US1] Add document count validation in src/drive-client.js to return error if count exceeds 50,000 +- [X] T045 [US1] Implement error mapping in src/drive-client.js to detect Drive API 429 (rate limit), 503 (unavailable), auth failures + +#### Sitemap Generation + +- [X] T046 [P] [US1] Create sitemap-generator.js module in src/ with function to transform Document array to SitemapEntry array +- [X] T047 [US1] Implement toSitemapEntry function in src/sitemap-generator.js to construct loc URLs using baseUrl + /documents/ + encodeURIComponent(documentId) +- [X] T048 [US1] Implement lastmod date extraction in src/sitemap-generator.js to format modifiedTime as ISO 8601 date (YYYY-MM-DD) +- [X] T049 [US1] Implement generateSitemapXML function in src/sitemap-generator.js to build XML string with proper namespace and escaped URLs using xml-utils.js +- [X] T050 [US1] Add empty sitemap handling in src/sitemap-generator.js to return valid XML with empty urlset when 0 documents + +#### Request Routing and Error Handling + +- [X] T051 [US1] Create proxy.js monolithic route handler in src/ that imports queue, drive-client, sitemap-generator modules +- [X] T052 [US1] Implement request handler function in src/proxy.js that checks if path is /sitemap.xml (404 for all other paths with no response body) +- [X] T053 [US1] Implement FIFO queue integration in src/proxy.js to enqueue /sitemap.xml requests using queue.process() from src/queue.js +- [X] T054 [US1] Implement sitemap generation flow in src/proxy.js: authenticate → query Drive API → check count → transform to sitemap → generate XML +- [X] T055 [US1] Implement error response handling in src/proxy.js for 413 (>50k docs), 429 (rate limit with Retry-After header), 503 (Drive unavailable), 401 (auth failed), 500 (unexpected) - all with NO response body +- [X] T056 [US1] Add HTTP response headers in src/proxy.js: Content-Type: application/xml; charset=utf-8 for 200 responses, no Content-Type for errors +- [X] T057 [US1] Extract Retry-After value from Drive API 429 error in src/proxy.js and set Retry-After header in seconds + +#### Logging and Observability + +- [X] T058 [US1] Add request logging in src/proxy.js to log incoming requests with method, path, client IP using console.info() from src/console.js +- [X] T059 [US1] Add response logging in src/proxy.js to log status code and response time for each request using console.info() +- [X] T060 [US1] Add Drive API operation logging in src/drive-client.js to log query start, document count, and completion time using console.debug() +- [X] T061 [US1] Add error logging in src/proxy.js to log errors with request context (requestId) and error message using console.error() to stderr +- [X] T062 [US1] Implement requestId generation in src/proxy.js using crypto.randomUUID() for request tracing + +#### Server Lifecycle + +- [X] T063 [US1] Implement HTTP server setup in src/server.js to route all requests to src/proxy.js handler +- [X] T064 [US1] Load configuration in src/server.js from config/config.js and config/settings.js on startup +- [X] T065 [US1] Load Service Account credentials in src/server.js from GOOGLE_SERVICE_ACCOUNT_KEY env var on startup +- [X] T066 [US1] Add startup validation in src/server.js to call credential validation from src/auth.js and exit(1) on failure +- [X] T067 [US1] Implement server binding in src/server.js to listen on port from config, catch EADDRINUSE error and exit(1) with error log +- [X] T068 [US1] Add startup logging in src/server.js to log server configuration (port, baseUrl), Service Account email (masked), and "server listening" message using console.info() +- [X] T069 [US1] Implement graceful shutdown handler in src/server.js for SIGTERM/SIGINT signals to log shutdown and close server + +**Checkpoint**: User Story 1 complete - /sitemap.xml endpoint fully functional with all 10 clarifications implemented + +--- + +## Phase 4: Polish & Cross-Cutting Concerns + +**Purpose**: Final validation, documentation, and quality improvements + +- [X] T070 [P] Update README.md with quickstart instructions referencing specs/001-drive-proxy-adapter/quickstart.md +- [X] T071 [P] Create .env.example file with all required environment variables documented per quickstart.md +- [X] T072 Validate test coverage meets 80%+ requirement per constitution using Node.js test runner coverage +- [ ] T073 Run all tests (npm test) and verify 100% pass rate +- [ ] T074 Manual validation: Start server and request /sitemap.xml, verify valid XML response +- [ ] T075 Manual validation: Test >50k documents scenario, verify 413 response with no body +- [ ] T076 Manual validation: Test invalid endpoint, verify 404 response with no body +- [ ] T077 Manual validation: Test concurrent requests, verify FIFO processing (sequential execution) +- [ ] T078 Manual validation: Test fatal error scenarios (invalid credentials, port in use), verify exit code 1 +- [X] T079 [P] Code cleanup: Remove unused imports, add JSDoc comments for all public functions +- [ ] T080 Run ESLint and fix any linting errors +- [~] T081 Verify all log output uses plain text format `[timestamp] [level] message` per research.md Section 5 +- [X] T082 Verify Drive API filter is loaded from config/settings.js not hardcoded per clarification #9 +- [ ] T083 Run quickstart.md validation: follow installation and usage instructions from scratch + +--- + +## Dependencies & Execution Order + +### Phase Dependencies + +- **Setup (Phase 1)**: No dependencies - start immediately +- **Foundational (Phase 2)**: Depends on Setup (Phase 1) - BLOCKS User Story 1 +- **User Story 1 (Phase 3)**: Depends on Foundational (Phase 2) - This is the only user story +- **Polish (Phase 4)**: Depends on User Story 1 completion + +### Within User Story 1 + +**Test-First Sequence**: + +1. Write ALL tests (T020-T040) - can run in parallel [P] +2. STOP: Obtain user approval of test scenarios +3. Verify tests FAIL (no implementation yet) +4. Proceed to implementation + +**Implementation Sequence**: + +1. Drive API Integration (T041-T045) +2. Sitemap Generation (T046-T050) - can run in parallel with T041-T045 +3. Request Routing (T051-T057) - depends on T041-T050 +4. Logging (T058-T062) - can run in parallel with T051-T057 +5. Server Lifecycle (T063-T069) - depends on T051-T062 + +### Parallel Opportunities + +**Phase 1 Setup** - All can run in parallel: + +- T003, T004, T005, T006, T007 (directory creation) +- T009, T010 (config files) + +**Phase 2 Foundational** - Groups can run in parallel: + +- T011, T012, T013, T017 (utility modules) +- T014, T015, T016 (auth module) +- T018, T019 (queue and server scaffolding) + +**Phase 3 Tests** - All tests can run in parallel: + +- Contract tests: T020, T021, T022, T023 +- Integration tests: T024-T030 +- Unit tests: T031-T040 + +**Phase 3 Implementation** - Within groups: + +- T041, T046 (drive-client and sitemap-generator start in parallel) +- T058-T062 (all logging tasks in parallel) + +**Phase 4 Polish**: + +- T070, T071, T079, T081, T082 (documentation and cleanup) + +--- + +## Parallel Example: User Story 1 Tests + +```bash +# Launch all contract tests together: +Task: "Contract test for /sitemap.xml success response in tests/contract/sitemap-schema.test.js" +Task: "Contract test for /sitemap.xml with empty Drive in tests/contract/sitemap-schema.test.js" +Task: "Contract test for XML special character escaping in tests/contract/sitemap-schema.test.js" +Task: "Contract test for lastmod date format validation in tests/contract/sitemap-schema.test.js" + +# Launch all integration tests together: +Task: "Integration test for /sitemap.xml endpoint success in tests/integration/sitemap-endpoint.test.js" +Task: "Integration test for >50k documents in tests/integration/error-scenarios.test.js" +Task: "Integration test for Drive API rate limiting in tests/integration/error-scenarios.test.js" +Task: "Integration test for Drive API 503 error in tests/integration/error-scenarios.test.js" +Task: "Integration test for invalid endpoints in tests/integration/error-scenarios.test.js" +Task: "Integration test for concurrent requests in tests/integration/queue-concurrency.test.js" +Task: "Integration test for token refresh in tests/integration/sitemap-endpoint.test.js" + +# Launch all unit tests together: +Task: "Unit test for Drive API client query execution in tests/unit/drive-client.test.js" +Task: "Unit test for Drive API pagination handling in tests/unit/drive-client.test.js" +Task: "Unit test for Service Account JWT authentication in tests/unit/auth.test.js" +Task: "Unit test for credential validation in tests/unit/auth.test.js" +Task: "Unit test for sitemap XML generation in tests/unit/sitemap-generator.test.js" +Task: "Unit test for Document to SitemapEntry transformation in tests/unit/sitemap-generator.test.js" +Task: "Unit test for lastmod date formatting in tests/unit/sitemap-generator.test.js" +Task: "Unit test for FIFO queue enqueue/dequeue in tests/unit/queue.test.js" +Task: "Unit test for FIFO queue concurrent request handling in tests/unit/queue.test.js" +Task: "Unit test for XML special character escaping in tests/unit/sitemap-generator.test.js" +``` + +--- + +## Implementation Strategy + +### MVP = Complete Feature (User Story 1 Only) + +This feature is inherently MVP-sized: + +1. Complete Phase 1: Setup → Project initialized +2. Complete Phase 2: Foundational → Infrastructure ready (CRITICAL BLOCKER) +3. Complete Phase 3: User Story 1 → **FULL FEATURE COMPLETE** +4. Complete Phase 4: Polish → Production ready +5. **VALIDATE**: Test /sitemap.xml independently with all 10 clarifications verified + +### No Incremental Delivery Needed + +Unlike multi-story features, this feature has only one user story. The MVP IS the complete feature: + +- Single endpoint: `/sitemap.xml` +- All requirements in User Story 1 +- No additional stories to add later + +### Validation Checklist (All 10 Clarifications) + +Before marking feature complete, verify: + +1. ✅ Service Account JWT auth works with inline JSON from `GOOGLE_SERVICE_ACCOUNT_KEY` env var +2. ✅ Sitemap URLs use RESTful format: `/documents/{documentId}` +3. ✅ Drive API 503 errors pass through immediately with NO retries +4. ✅ All logs output to stdout/stderr only (no log files) +5. ✅ System returns 413 error when >50,000 documents exist +6. ✅ Fatal errors (invalid credentials, port conflict) crash with exit code 1 +7. ✅ Concurrent /sitemap.xml requests queue in FIFO order and process sequentially +8. ✅ Log format is plain text: `[timestamp] [level] message` +9. ✅ Drive API query filter loads from `config/settings.js` (configurable, not hardcoded) +10. ✅ All error responses return status code only with NO response body (except 429 includes Retry-After header) + +--- + +## Task Summary + +**Total Tasks**: 83 + +- **Phase 1 (Setup)**: 10 tasks +- **Phase 2 (Foundational)**: 9 tasks (BLOCKING) +- **Phase 3 (User Story 1)**: + - Tests: 21 tasks (T020-T040) + - Implementation: 29 tasks (T041-T069) +- **Phase 4 (Polish)**: 14 tasks + +**Parallel Opportunities**: + +- Phase 1: 7 tasks can run in parallel +- Phase 2: 6 tasks can run in parallel +- Phase 3 Tests: All 21 tests can run in parallel +- Phase 3 Implementation: Up to 4 tasks can run in parallel at certain points +- Phase 4: 5 tasks can run in parallel + +**Independent Test Criteria**: User Story 1 is independently testable via: + +1. GET /sitemap.xml returns 200 with valid XML +2. URLs follow RESTful format /documents/{documentId} +3. > 50k documents returns 413 (no body) +4. Concurrent requests process sequentially (FIFO) +5. Fatal errors crash with exit code 1 +6. Logs use plain text format to stdout/stderr +7. Drive API filter loads from config/settings.js + +**Suggested MVP Scope**: Complete all phases (this is a single-story feature) + +--- + +## Format Validation + +✅ **ALL tasks follow checklist format**: + +- Checkbox: `- [ ]` +- Task ID: Sequential (T001-T083) +- [P] marker: Present only on parallelizable tasks +- [Story] label: Present only on User Story 1 phase tasks (US1) +- Description: Includes clear action and exact file path +- File paths: All absolute and specific + +✅ **Organization by user story**: + +- Setup phase: No story label (infrastructure) +- Foundational phase: No story label (blocking prerequisites) +- User Story 1 phase: All tasks marked [US1] +- Polish phase: No story label (cross-cutting) + +✅ **Compliance with constitution**: + +- Test-First Development: Tests (T020-T040) come before implementation with approval gate +- Monolithic architecture: Single proxy.js for all logic per plan.md +- Minimal dependencies: Only googleapis + Node.js built-ins per research.md +- Observability: Plain text logging to stdout/stderr per clarification #4, #8 + +--- + +## Notes + +- This feature has only ONE user story (sitemap generation), so all implementation tasks are in Phase 3 +- The feature specification explicitly removed document export functionality from scope (Session 2) +- All 10 clarifications from 3 sessions are incorporated into task descriptions +- Test-first development is mandatory per Constitution Principle III (non-negotiable) +- FIFO queue ensures sequential processing of concurrent requests (no parallel Drive API operations) +- Fatal errors must crash immediately with exit code 1 (no graceful degradation) +- Error responses have NO body (status code only), except 429 includes Retry-After header +- Drive API query filter MUST be configurable via config/settings.js (not hardcoded) diff --git a/src/globalVariables/googleDriveAdapterHelper.js b/src/globalVariables/googleDriveAdapterHelper.js new file mode 100644 index 0000000..15126c9 --- /dev/null +++ b/src/globalVariables/googleDriveAdapterHelper.js @@ -0,0 +1,313 @@ +/** + * Helper Functions Module for Proxy Script + * + * This module contains pure utility/helper functions extracted from proxy.js + * to improve code organization while maintaining vm.Script isolation pattern. + * + * ARCHITECTURE: + * - This file contains the LITERAL BODY of a function + * - server.js wraps this in a function: (function() { })() + * - Function returns a single object containing all helper functions + * - Injected into globalVariableContext for access by proxy.js + * - NO IMPORTS - All dependencies provided via VM context + * + * Globals expected (provided by server.js): + * - crypto: Web Crypto API (for randomUUID()) + * - console: Custom logger + * + * @returns {Object} Helpers object with all utility functions + */ + +/** + * Custom error for document count exceeding limit + */ +class DocumentCountExceededError extends Error { +constructor(count, limit) { + super(`Document count ${count} exceeds limit of ${limit}`); + this.name = "DocumentCountExceededError"; + this.count = count; + this.limit = limit; + this.statusCode = 413; +} +} + +// ============================================================================= +// Utility Functions +// ============================================================================= + +/** + * Generate a unique request ID for tracing + * Uses UUID v4 for uniqueness + * + * @returns {string} Request ID in format: req_ + */ +function generateRequestId() { +return `req_${crypto.randomUUID()}`; +} + +/** + * Validate document ID format + * Google Drive IDs are alphanumeric with hyphens and underscores + * + * @param {string} id - Document ID to validate + * @returns {boolean} True if valid + */ +function validateDocumentId(id) { + if (!id || typeof id !== "string") { + return false; + } + + // Google Drive IDs are typically 8-128 characters + // Characters: a-z, A-Z, 0-9, -, _ + const pattern = /^[a-zA-Z0-9_-]{8,128}$/; + return pattern.test(id); +} + +/** + * Validate document count against limit + * + * @param {number} count - Document count + * @param {number} limit - Maximum allowed (default: 50000) + * @throws {DocumentCountExceededError} If count > limit + */ +function validateDocumentCount(count, limit = 50000) { + if (count > limit) { + throw new DocumentCountExceededError(count, limit); + } +} + +// ============================================================================= +// XML Utilities +// ============================================================================= + +/** + * Escape special XML characters + * Prevents XML injection and ensures valid XML output + * + * @param {string} str - String to escape + * @returns {string} Escaped string safe for XML + */ +function escapeXml(str) { + if (!str) return ""; + + return str + .replace(/&/g, "&") + .replace(//g, ">") + .replace(/"/g, """) + .replace(/'/g, "'"); +} + +// ============================================================================= +// Error Mapping +// ============================================================================= + +/** + * Map Drive API error to HTTP status code and retry info + * + * Per specification: + * - 429: Rate limit - include Retry-After header + * - 503: Service unavailable - NO RETRY (fail immediately) + * - 401: Authentication failed + * - 500: Other errors + * + * @param {Error} error - Drive API error + * @returns {Object} { statusCode, retryAfter? } + */ +function mapDriveErrorToHttp(error) { + // Handle DocumentCountExceededError + if (error instanceof DocumentCountExceededError) { + return { statusCode: 413 }; + } + + // Extract status code from Drive API error + const statusCode = error.response?.status || error.code || 500; + + // Handle rate limiting (429) + if (statusCode === 429) { + // Extract Retry-After from response headers if present + const retryAfter = error.response?.headers?.["retry-after"]; + const retryAfterSeconds = retryAfter ? parseInt(retryAfter, 10) : 60; + + return { + statusCode: 429, + retryAfter: retryAfterSeconds, + }; + } + + // Handle service unavailable (503) - NO RETRY per spec + if (statusCode === 503) { + return { statusCode: 503 }; + } + + // Handle authentication errors + if (statusCode === 401 || statusCode === 403) { + return { statusCode: statusCode }; + } + + // All other errors map to 500 + return { statusCode: 500 }; +} + +// ============================================================================= +// Sitemap Functions +// ============================================================================= + +/** + * Transform Drive document to sitemap entry + * + * Creates RESTful URL in format: {baseUrl}/documents/{documentId} + * Per specification clarification #2. + * + * @param {Object} document - Drive API document + * @param {string} document.id - Document ID + * @param {string} document.modifiedTime - ISO 8601 timestamp + * @param {string} baseUrl - Base URL for the adapter + * @returns {Object} Sitemap entry { loc, lastmod } + */ +function toSitemapEntry(document, baseUrl) { + if (!document || !document.id) { + console.error("Invalid document for sitemap entry", { document }); + return null; + } + + // RESTful URL format: /documents/{documentId} + const loc = `${baseUrl}/documents/${encodeURIComponent(document.id)}`; + + // Format lastmod as ISO 8601 date (YYYY-MM-DD) + let lastmod; + if (document.modifiedTime) { + try { + const date = new Date(document.modifiedTime); + lastmod = date.toISOString().split("T")[0]; // Extract YYYY-MM-DD + } catch (error) { + console.error("Invalid modifiedTime for document", { + documentId: document.id, + modifiedTime: document.modifiedTime, + }); + lastmod = new Date().toISOString().split("T")[0]; // Fallback to today + } + } else { + lastmod = new Date().toISOString().split("T")[0]; // Fallback to today + } + + return { loc, lastmod }; +} + +/** + * Transform array of Drive documents to sitemap entries + * + * @param {Array} documents - Array of Drive API documents + * @param {string} baseUrl - Base URL for the adapter + * @returns {Array} Array of sitemap entries + */ +function transformDocumentsToSitemapEntries(documents, baseUrl) { + if (!Array.isArray(documents)) { + console.error("Documents must be an array", { documents }); + return []; + } + + return documents + .map((doc) => toSitemapEntry(doc, baseUrl)) + .filter((entry) => entry !== null); +} + +/** + * Generate XML sitemap from sitemap entries + * + * Handles empty sitemap (0 documents) case - returns valid XML with empty urlset. + * + * @param {Array} sitemapEntries - Array of { loc, lastmod } objects + * @returns {string} Complete XML sitemap string + */ +function generateSitemapXML(sitemapEntries) { + let xml = '\n'; + xml += '\n'; + + // Handle empty sitemap - valid XML with no elements + if (!sitemapEntries || sitemapEntries.length === 0) { + xml += ""; + return xml; + } + + for (const entry of sitemapEntries) { + xml += " \n"; + xml += ` ${escapeXml(entry.loc)}\n`; + xml += ` ${escapeXml(entry.lastmod)}\n`; + xml += " \n"; + } + + xml += ""; + + return xml; +} + +/** + * Main sitemap generation function + * + * Combines document transformation and XML generation. + * + * @param {Array} documents - Array of Drive API documents + * @param {string} baseUrl - Base URL for the adapter + * @returns {string} Complete XML sitemap + */ +function generateSitemap(documents, baseUrl) { + const entries = transformDocumentsToSitemapEntries(documents, baseUrl); + return generateSitemapXML(entries); +} + +// ============================================================================= +// Route Parsing +// ============================================================================= + +/** + * Parse route from request + * @param {string} method - HTTP method + * @param {string} url - Request URL + * @returns {Object} Route info or error + */ +function parseRoute(method, url) { + if (method !== "GET") { + return { route: null, error: "Method not allowed", statusCode: 405 }; + } + + const urlObj = new URL(url, "http://localhost"); + const path = urlObj.pathname; + + // Match any path containing 'sitemap.xml' + if (path.includes("sitemap.xml")) { + return { route: "sitemap" }; + } + +// All other paths return 404 +return { route: null, error: "Not found", statusCode: 404 }; +} + +// ============================================================================= +// Return helpers object with all functions +// ============================================================================= + +return { + // Error classes + DocumentCountExceededError, + + // Utilities + generateRequestId, + validateDocumentId, + validateDocumentCount, + + // XML + escapeXml, + + // Error mapping + mapDriveErrorToHttp, + + // Sitemap + toSitemapEntry, + transformDocumentsToSitemapEntries, + generateSitemapXML, + generateSitemap, + + // Routing + parseRoute, +}; diff --git a/src/globalVariables/google_drive_settings.json b/src/globalVariables/google_drive_settings.json new file mode 100644 index 0000000..b173e12 --- /dev/null +++ b/src/globalVariables/google_drive_settings.json @@ -0,0 +1,23 @@ +{ + "serviceAccount": { + "type": "service_account", + "project_id": "black-portfolio-486723-f2", + "private_key_id": "01d829a7ef4b4a85506ad31718bb4331c8618183", + "private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvAIBADANBgkqhkiG9w0BAQEFAASCBKYwggSiAgEAAoIBAQChyph2XLEiFtmN\nvnetGXI4xXwwH1rxd9ieDD9jMfBGvTjAcbhLILO4BBa+bWMyHzjyK7AKp4a4Bs5w\ny3dYHCFlCF/46kf2j+wcCDmnBTHcjS61E+ycSy9Jznz7myZac3bzf4QT3z4po02S\n+okJdH1CKGFepWfQysmATCQ8+nQQCovjydQI6WX+q5LlUpGv3WLjHJsX0YNpy1fS\nHglMw57iY5s7qprdFJNlQqgVy9TQGxzcIZtRFW6YriMhZty7Nxdr5hvLWgqtSNuw\ntFzqeWcJyuCbROlDTWGmKtlwa8lRmtNXWSrIPNKvDaicr7HLUoMh3phfMMtxi1OC\nfZhXvd0BAgMBAAECggEAAURGka5/uh1ejvfPzy6ILKzSUEcOPB5zhx0VOeHSMInt\nWII0wUeoaTi6I/EpGK4CcQYcajXwmyQzADKInNC3ZneJ1vyga1F/cE1Ubw6ZFYcT\nB5AIOdrzwU69KeQM4/exgF4rhsi4T2+aeoZ3gD1jLcDGuTdqGYTH7iHSerooLS1h\nGwG5wSy0fn5vpRNfMZLZ9ZdPQi+PQujiVDkuABYdax018kHyqFTxCzaxX6uuSKMH\nDFh3k6q/WUxgEmQfs6cvJKVbcXs8vPU4ROaz2shId3NV/jev1orrRpFOl5ptKBxh\nbtOykl21r96gGTN2zs32KgleDYFDpscDk3Ik2FqXkQKBgQDTJ6iSCvjUflWl6HQz\nuuDuXMElK842vr3SLAgUskB4hY6Hy4kZskfIth+KvAM+wxS6JPUXx2p6VaL8P4Av\nJklwQdXHEfSRrgBncUtfFcrcvvykOioaL17fS2EPsbjjdsBt9oZCiahRDSwoIhrE\nwKqwHlrmVWC7EluNRxxzPKFuEQKBgQDEJxPVSWQUIWtCHwa1NnU1oxvUTdSSkGRJ\n3AG9zAu3r0mwqA2BvAVDbfDQmvWgXALvmaLcIkKJOEcTv7MuH9o5TyEoBBJ4DtKY\nQWDcPn0rBCvX7GQ3b5r9DhbiubtCotyJQnL6LIpwZpJC+sDTq5x6nI3TrOrFmyGA\n4XlNL+5P8QKBgD7479wGK6lrt+1Pwv/+dsB/pxaH1usavY+llA9gDbwj0JsNB2lD\ncwcX0ZZVdf5Mvay6AuJBla7ARWhHI9pr57Dz4WaKI08i/nnbHuhPnn1w8/WiZxYC\nFKAxYdQFY6dqrf7da7MCTNFHRWj+qs8MyprVorRYuA1ybx1WHNT9OwORAoGABJ/L\nNucBBfx3s9pZZSJAhyAuQsYG8eGXi6o1HE1YJV9rhE+h6eIN2bYYzEIq8jnZE97y\nWPAx01xRSKTnS3oSwfEcnf3ilZP74P0BlI+gkcgKZI+9GRV3eOnBHl00jfCa9F1t\nqnosVVQFtLCGpTbRfI5+RXQ5IKl0k749BtXPb3ECgYBLO3S+EBTcsS9fYzq9dNSk\n5WG2DEqezgciJ46ZPtLqLs4IhDNVozhorY9lfg9LfQpY5gAJR88eLtdAv0quDbYn\npxxMglR09qNmcX/nPvRfCvua45n2VaTRuUdyEzjKUtUlp5oVWo4XmKLqPTOeJkuh\nT85FN2oAmw0ZGJ0eLiU7tQ==\n-----END PRIVATE KEY-----\n", + "client_email": "n8n-service@black-portfolio-486723-f2.iam.gserviceaccount.com", + "client_id": "108246676308214231920", + "auth_uri": "https://accounts.google.com/o/oauth2/auth", + "token_uri": "https://oauth2.googleapis.com/token", + "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs", + "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/n8n-service%40black-portfolio-486723-f2.iam.gserviceaccount.com", + "universe_domain": "googleapis.com" + }, + "scopes": [ + "https://www.googleapis.com/auth/drive.readonly" + ], + "driveQuery": "trashed = false", + "proxyScriptEndPoint": "http://localhost:3000", + "sitemap": { + "maxUrls": 50000 + } +} diff --git a/src/logger.js b/src/logger.js new file mode 100644 index 0000000..41c5f7b --- /dev/null +++ b/src/logger.js @@ -0,0 +1,60 @@ +/** + * Structured Logging Utility + * Provides severity-based logging with JSON output + * + * @module logger + */ + +// Save reference to original console.log before it gets replaced +const originalConsoleLog = globalThis.console.log.bind(globalThis.console); + +/** + * Log levels (in order of severity) + */ +const LOG_LEVELS = { + DEBUG: 0, + INFO: 1, + WARN: 2, + ERROR: 3 +}; + +/** + * Get configured log level from global config + * @returns {number} Log level threshold + */ +function getLogLevel() { + const configLevel = global.config?.logging?.level || 'INFO'; + return LOG_LEVELS[configLevel.toUpperCase()] ?? LOG_LEVELS.INFO; +} + +/** + * Log a message with structured metadata + * @param {string} level - Log level (DEBUG|INFO|WARN|ERROR) + * @param {string} message - Log message + * @param {Object} meta - Additional metadata + */ +export function log(level, message, meta = {}) { + const levelValue = LOG_LEVELS[level] ?? LOG_LEVELS.INFO; + const threshold = getLogLevel(); + + // Only log if level meets or exceeds threshold + if (levelValue >= threshold) { + const entry = { + timestamp: new Date().toISOString(), + level, + message, + ...meta + }; + originalConsoleLog(JSON.stringify(entry)); + } +} + +/** + * Console-like logging interface + * Exported as 'console' to match standard console API + */ +export const logger = { + debug: (message, meta) => log('DEBUG', message, meta), + info: (message, meta) => log('INFO', message, meta), + error: (message, meta) => log('ERROR', message, meta) +}; diff --git a/src/proxyScripts/proxy.js b/src/proxyScripts/proxy.js new file mode 100644 index 0000000..805d385 --- /dev/null +++ b/src/proxyScripts/proxy.js @@ -0,0 +1,246 @@ +/** + * Google Drive Sitemap Adapter Proxy + * + * MONOLITHIC HTTP request handler - ALL functionality in this single file. + * Architecture: Pure IIFE - returns request handler function when executed + * Authentication: Service Account (JWT-based) inline + * + * CONSTITUTION REQUIREMENT: ZERO export statements - pure IIFE pattern + * File is loaded by server.js using Function constructor + * + * Globals provided by server.js: + * - console: Custom logger + * - crypto: Web Crypto API (provides randomUUID()) + * - config: Infrastructure settings (server port, logging level) + * - axios: HTTP client + * - uuidv4: UUID generator + * - jwt: JSON Web Token library + * - xmlBuilder: XML document builder + * - googleDriveAdapterHelper: Helper functions module (loaded from globalVariables/googleDriveAdapterHelper.js) + * - google_drive_settings: Consolidated settings (from global/google_drive_settings.json) + * - serviceAccount: Service account credentials + * - scopes: OAuth2 scopes array + * - driveQuery: Drive API query filter + * - sitemap: Sitemap configuration (maxUrls) + * + * Structure: + * Section 1: Authentication (Service Account JWT) + * Section 2: Drive API Client + * Section 3: Request Handling & Routing + * + * @module proxy + */ + +// NO IMPORTS - ALL dependencies provided as globals by server.js + +// ============================================================================= +// Section 1: Authentication (Service Account JWT) +// ============================================================================= + +const TOKEN_EXPIRY_MS = 3600000; // 1 hour +const TOKEN_BUFFER_MS = 300000; // 5 minute buffer + +let accessTokenCache = null; +let tokenExpiryTime = null; + +/** + * Create JWT and exchange for access token + */ +async function initializeServiceAccount() { + const settings = google_drive_settings; + + if (!settings?.serviceAccount?.client_email || !settings?.serviceAccount?.private_key) { + throw new Error("Invalid service account credentials in google_drive_settings"); + } + + const scopes = settings.scopes || ["https://www.googleapis.com/auth/drive.readonly"]; + const now = Math.floor(Date.now() / 1000); + + // Create and sign JWT + const jwtToken = jwt.sign( + { + iss: settings.serviceAccount.client_email, + scope: scopes.join(" "), + aud: "https://oauth2.googleapis.com/token", + exp: now + 3600, + iat: now, + }, + settings.serviceAccount.private_key, + { algorithm: "RS256" } + ); + + // Exchange for access token + const response = await axios.post( + "https://oauth2.googleapis.com/token", + { + grant_type: "urn:ietf:params:oauth:grant-type:jwt-bearer", + assertion: jwtToken, + }, + { headers: { "Content-Type": "application/x-www-form-urlencoded" } } + ); + + console.info("Service account authenticated", { + email: settings.serviceAccount.client_email, + }); + + return response.data.access_token; +} + +/** + * Get or create cached access token (with 5min buffer before expiry) + */ +async function getAccessTokenCached() { + const now = Date.now(); + + if (accessTokenCache && tokenExpiryTime && now < tokenExpiryTime - TOKEN_BUFFER_MS) { + return accessTokenCache; + } + + accessTokenCache = await initializeServiceAccount(); + tokenExpiryTime = now + TOKEN_EXPIRY_MS; + + return accessTokenCache; +} + +// ============================================================================= +// Section 2: Drive API Client +// ============================================================================= + +/** + * Query documents from Google Drive with pagination + * Throws DocumentCountExceededError if count > maxDocuments + */ +async function queryDocuments(options = {}) { + const { + query = "trashed = false", + fields = "nextPageToken,files(id,name,mimeType,modifiedTime)", + pageSize = 100, + maxDocuments = 50000, + } = options; + + const allFiles = []; + let pageToken = null; + const startTime = Date.now(); + const accessToken = await getAccessTokenCached(); + + do { + const params = new URLSearchParams({ + q: query, + pageSize: pageSize.toString(), + fields, + }); + + if (pageToken) params.append("pageToken", pageToken); + + const response = await axios.get( + `https://www.googleapis.com/drive/v3/files?${params.toString()}`, + { + headers: { + Authorization: `Bearer ${accessToken}`, + Accept: "application/json", + }, + } + ); + + const files = response.data.files || []; + allFiles.push(...files); + + // Check if we've exceeded the limit BEFORE fetching more + if (allFiles.length > maxDocuments) { + throw new googleDriveAdapterHelper.DocumentCountExceededError(allFiles.length, maxDocuments); + } + + pageToken = response.data.nextPageToken; + } while (pageToken); + + console.info("Drive API query completed", { + documentCount: allFiles.length, + duration: Date.now() - startTime, + }); + + return allFiles; +} + +// ============================================================================= +// Section 3: Request Handling & Routing +// ============================================================================= + +/** + * Handle sitemap generation request + */ +async function handleSitemapRequest(res, requestId) { + try { + const settings = google_drive_settings || {}; + const maxUrls = settings.sitemap?.maxUrls || 50000; + const query = settings.driveQuery || "trashed = false"; + + const documents = await queryDocuments({ query, maxDocuments: maxUrls }); + const xml = googleDriveAdapterHelper.generateSitemap(documents, settings.proxyScriptEndPoint); + + res.statusCode = 200; + res.setHeader("Content-Type", "application/xml; charset=utf-8"); + res.setHeader("X-Request-Id", requestId); + res.setHeader("X-Document-Count", documents.length.toString()); + res.end(xml); + + console.info("Sitemap generated", { requestId, documentCount: documents.length }); + } catch (error) { + const errorResponse = googleDriveAdapterHelper.mapDriveErrorToHttp(error); + res.statusCode = errorResponse.statusCode; + if (errorResponse.retryAfter) { + res.setHeader("Retry-After", errorResponse.retryAfter.toString()); + } + res.end(); // Empty body per spec + + console.error("Sitemap generation failed", { + requestId, + error: error.message, + statusCode: errorResponse.statusCode, + }); + } +} + +/** + * Main HTTP request handler + */ +(async () => { + const requestId = googleDriveAdapterHelper.generateRequestId(); + const startTime = Date.now(); + + console.info("Request received", { + requestId, + method: req.method, + url: req.url, + }); + + try { + const routeResult = googleDriveAdapterHelper.parseRoute(req.method, req.url); + + if (!routeResult.route) { + res.statusCode = routeResult.statusCode; + res.end(); + console.error("Route not found", { requestId, url: req.url }); + return; + } + + // Handle sitemap route + if (routeResult.route === "sitemap") { + await handleSitemapRequest(res, requestId); + return; + } + } catch (error) { + res.statusCode = 500; + res.end(); + console.error("Request handler error", { + requestId, + error: error.message, + stack: error.stack, + }); + } finally { + console.info("Request completed", { + requestId, + statusCode: res.statusCode, + duration: Date.now() - startTime, + }); + } +})(); diff --git a/src/server.js b/src/server.js new file mode 100644 index 0000000..60f5958 --- /dev/null +++ b/src/server.js @@ -0,0 +1,194 @@ +import http from "node:http"; +import { join } from "node:path"; +import { readFileSync, readdirSync } from "node:fs"; +import { fileURLToPath } from "node:url"; +import { dirname } from "node:path"; +import vm from "node:vm"; +import axios from "axios"; +import { v4 as uuidv4 } from "uuid"; +import jwt from "jsonwebtoken"; +import { create as xmlBuilder } from "xmlbuilder2"; +import { logger } from "./logger.js"; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = dirname(__filename); + +const globalVMContext = { + URLSearchParams, + URL, + console: logger, + crypto, + axios, + uuidv4, + jwt, + xmlBuilder, +}; + +let globalVariableContext = {}; + +/** + * Load all files from globalVariables/ directory into globalVariableContext + * Pattern: globalVariables/filename.{json|js} -> globalVariableContext['filename'] + */ +function loadGlobalVariables() { + const globalDir = join(__dirname, "globalVariables"); + const jsonFiles = []; + const jsFiles = []; + + // Scan and categorize files in one pass + readdirSync(globalDir).forEach((file) => { + if (file.includes(".example")) return; + if (file.endsWith(".json")) jsonFiles.push(file); + else if (file.endsWith(".js")) jsFiles.push(file); + }); + + // Load JSON files first (data) + jsonFiles.forEach((file) => { + const varName = file.replace(".json", ""); + const data = JSON.parse(readFileSync(join(globalDir, file), "utf-8")); + globalVariableContext[varName] = data; + logger.info(`Loaded global data: ${varName}`, { keys: Object.keys(data) }); + }); + + // Load JS files second (functions can reference JSON data) + jsFiles.forEach((file) => { + const varName = file.replace(".js", ""); + const code = readFileSync(join(globalDir, file), "utf-8"); + + // Wrap the literal function body in a function and execute + const wrappedCode = `(function() {\n${code}\n})()`; + const script = new vm.Script(wrappedCode, { filename: file }); + const context = vm.createContext({ ...globalVMContext, ...globalVariableContext }); + + // Execute script and capture returned object + const returnedObject = script.runInContext(context); + globalVariableContext[varName] = returnedObject; + + logger.info(`Loaded global functions: ${varName}`, { + type: typeof returnedObject, + isObject: typeof returnedObject === 'object' && returnedObject !== null, + keys: returnedObject ? Object.keys(returnedObject).length : 0 + }); + }); + + logger.info(`Loaded ${jsonFiles.length + jsFiles.length} global variables`, + { json: jsonFiles.length, js: jsFiles.length } + ); +} + +/** + * Load configuration from config/default.json and merge with environment variables + */ +function loadConfig() { + const configPath = join(__dirname, "..", "config", "default.json"); + const configData = readFileSync(configPath, "utf-8"); + const config = JSON.parse(configData); + + // Merge environment variables (ENV takes precedence) + return { + ...config, + server: { + ...config.server, + port: process.env.PORT ? parseInt(process.env.PORT, 10) : config.server.port, + host: process.env.HOST || config.server.host, + }, + logging: { + ...config.logging, + level: process.env.LOG_LEVEL || config.logging.level, + }, + }; +} + +/** + * Validate configuration + */ +function validateConfig(config) { + if (!config.server.port || config.server.port < 1 || config.server.port > 65535) { + throw new Error("Invalid server.port (must be 1-65535)"); + } +} + +/** + * Start the HTTP server + */ +async function startServer() { + try { + // Load configuration into global.config + global.config = loadConfig(); + + // Load all global variables (JSON data + JS function modules) + loadGlobalVariables(); + + logger.info("Starting Proxy Script Server..."); + logger.info( + `Configuration loaded: ${JSON.stringify({ + port: global.config.server.port, + host: global.config.server.host, + logLevel: global.config.logging.level, + })}`, + ); + + // Validate configuration + validateConfig(global.config); + logger.info("Configuration validated successfully"); + + const proxyPath = join(__dirname, "proxyScripts", "proxy.js"); + const proxyCode = readFileSync(proxyPath, "utf-8"); + const script = new vm.Script(proxyCode, { filename: "proxy.js" }); + + // Create HTTP server that delegates all requests to proxy + const server = http.createServer((req, res) => { + try { + const context = vm.createContext({ + ...globalVMContext, + ...globalVariableContext, + req, + res, + }); + script.runInContext(context); + } catch (error) { + logger.error("Request handling failed", { + error: error.message, + stack: error.stack, + }); + res.statusCode = 500; + res.end("Internal Server Error"); + } + }); + + // Graceful shutdown + const shutdown = () => { + logger.info("\nShutting down gracefully..."); + server.close(() => { + logger.info("Server closed"); + process.exit(0); + }); + + // Force shutdown after 10 seconds + setTimeout(() => { + logger.error("Forced shutdown after timeout"); + process.exit(1); + }, 10000); + }; + + process.on("SIGTERM", shutdown); + process.on("SIGINT", shutdown); + + // Start listening + server.listen(global.config.server.port, global.config.server.host, () => { + logger.info("Server listening", { + port: global.config.server.port, + host: global.config.server.host, + }); + }); + } catch (error) { + logger.error("Failed to start server", { + error: error.message, + stack: error.stack, + }); + process.exit(1); + } +} + +// Start the server +startServer(); diff --git a/test-crypto-global.mjs b/test-crypto-global.mjs new file mode 100644 index 0000000..822b4a0 --- /dev/null +++ b/test-crypto-global.mjs @@ -0,0 +1,11 @@ +// Test that crypto is available globally (Node.js Web Crypto API) +// Note: crypto is natively available in Node.js, no need to import server.js + +// Should have crypto available +if (globalThis.crypto && globalThis.crypto.randomUUID) { + const uuid = globalThis.crypto.randomUUID(); + process.stdout.write(`✅ crypto is available globally: ${uuid}\n`); +} else { + process.stdout.write('❌ crypto is NOT available globally\n'); + process.exit(1); +} diff --git a/test-globals.mjs b/test-globals.mjs new file mode 100644 index 0000000..9e61f54 --- /dev/null +++ b/test-globals.mjs @@ -0,0 +1,21 @@ +// Test that globals are set up correctly by server.js +// NOTE: Don't import server.js directly as it starts the server +// Instead, we'll verify proxy.js works with the expected globals + +// Set up globals like server.js does +import crypto from 'node:crypto'; +globalThis.crypto = crypto; +globalThis.config = { google: {}, server: {}, sitemap: {} }; // Mock config + +// Now import proxy to verify it uses crypto global +import { generateRequestId } from './src/proxy.js'; + +const reqId = generateRequestId(); +console.log('Generated request ID:', reqId); + +if (reqId && reqId.startsWith('req_')) { + console.log('✅ proxy.js can use global crypto successfully!'); +} else { + console.log('❌ Failed to generate request ID'); + process.exit(1); +} diff --git a/tests/contract/sitemap-schema.test.js b/tests/contract/sitemap-schema.test.js new file mode 100644 index 0000000..847b3db --- /dev/null +++ b/tests/contract/sitemap-schema.test.js @@ -0,0 +1,227 @@ +/** + * Contract Tests: /sitemap.xml XML Schema Validation + * + * Tests T020-T023: Verify API contract compliance for sitemap endpoint + * Reference: specs/001-drive-proxy-adapter/contracts/sitemap-xml-schema.md + * + * @module tests/contract/sitemap-schema + */ + +import { describe, it } from 'node:test'; +import assert from 'node:assert/strict'; + +// ============================================================================= +// T020: Contract test for /sitemap.xml success response (200 OK) +// ============================================================================= + +describe('T020: /sitemap.xml Success Response Contract', () => { + it('should return 200 OK with valid XML structure', async () => { + // Mock response from sitemap endpoint + const mockResponse = { + statusCode: 200, + headers: { + 'content-type': 'application/xml; charset=utf-8' + }, + body: ` + + + http://localhost:3000/documents/abc123 + 2024-03-01 + +` + }; + + // Verify status code + assert.equal(mockResponse.statusCode, 200, 'Status code must be 200'); + + // Verify Content-Type header + assert.equal( + mockResponse.headers['content-type'], + 'application/xml; charset=utf-8', + 'Content-Type must be application/xml; charset=utf-8' + ); + + // Verify XML structure + assert.match(mockResponse.body, /^<\?xml version="1\.0" encoding="UTF-8"\?>/, 'Must have XML declaration'); + assert.match(mockResponse.body, //, 'Must have urlset with correct namespace'); + assert.match(mockResponse.body, /<\/urlset>$/, 'Must close urlset tag'); + + // Verify URL entry structure + assert.match(mockResponse.body, //, 'Must contain url entries'); + assert.match(mockResponse.body, /.*<\/loc>/, 'Each url must have loc element'); + assert.match(mockResponse.body, /.*<\/lastmod>/, 'Each url should have lastmod element'); + }); + + it('should return valid XML with RESTful URL format', async () => { + const mockResponse = { + statusCode: 200, + body: ` + + + http://localhost:3000/documents/abc123 + +` + }; + + // Verify RESTful URL pattern: /documents/{documentId} + assert.match( + mockResponse.body, + /http:\/\/[^<]+\/documents\/[^<]+<\/loc>/, + 'URLs must follow RESTful format /documents/{documentId}' + ); + }); +}); + +// ============================================================================= +// T021: Contract test for /sitemap.xml with empty Drive (0 documents) +// ============================================================================= + +describe('T021: /sitemap.xml Empty Drive Response Contract', () => { + it('should return valid XML with empty urlset when no documents exist', async () => { + const mockResponse = { + statusCode: 200, + headers: { + 'content-type': 'application/xml; charset=utf-8' + }, + body: ` + +` + }; + + // Verify status code + assert.equal(mockResponse.statusCode, 200, 'Status code must be 200 even for empty Drive'); + + // Verify empty urlset is valid XML + assert.match(mockResponse.body, //, 'Must have urlset with namespace'); + assert.match(mockResponse.body, /<\/urlset>/, 'Must close urlset tag'); + + // Verify no url entries + assert.doesNotMatch(mockResponse.body, //, 'Should not contain any url entries'); + }); +}); + +// ============================================================================= +// T022: Contract test for XML special character escaping +// ============================================================================= + +describe('T022: XML Special Character Escaping Contract', () => { + it('should properly escape XML special characters in URLs', async () => { + // Document IDs can contain special characters that need escaping in XML + const mockResponse = { + statusCode: 200, + body: ` + + + http://localhost:3000/documents/test&doc + + + http://localhost:3000/documents/doc<123 + + + http://localhost:3000/documents/doc>456 + + + http://localhost:3000/documents/doc"test + + + http://localhost:3000/documents/doc'xyz + +` + }; + + // Verify special characters are escaped + assert.match(mockResponse.body, /&/, 'Ampersand (&) must be escaped as &'); + assert.match(mockResponse.body, /</, 'Less than (<) must be escaped as <'); + assert.match(mockResponse.body, />/, 'Greater than (>) must be escaped as >'); + assert.match(mockResponse.body, /"/, 'Double quote (") must be escaped as "'); + assert.match(mockResponse.body, /'/, 'Single quote (\') must be escaped as ''); + + // Verify unescaped special characters are NOT present in content + const locContent = mockResponse.body.match(/(.*?)<\/loc>/g); + assert.ok(locContent, 'Must have loc elements'); + + locContent.forEach(loc => { + const content = loc.replace(/<\/?loc>/g, ''); + const afterProtocol = content.split('://')[1] || ''; + + // Only check the path/query part, not the protocol separator + if (afterProtocol.includes('/')) { + const pathPart = afterProtocol.substring(afterProtocol.indexOf('/')); + assert.doesNotMatch(pathPart, /[&<>"'](?!amp;|lt;|gt;|quot;|apos;)/, 'Unescaped special chars must not appear in XML content'); + } + }); + }); +}); + +// ============================================================================= +// T023: Contract test for lastmod date format validation +// ============================================================================= + +describe('T023: lastmod Date Format Contract', () => { + it('should format lastmod as ISO 8601 date (YYYY-MM-DD)', async () => { + const mockResponse = { + statusCode: 200, + body: ` + + + http://localhost:3000/documents/doc1 + 2024-03-01 + + + http://localhost:3000/documents/doc2 + 2024-12-31 + +` + }; + + // Extract lastmod values + const lastmodMatches = mockResponse.body.match(/(.*?)<\/lastmod>/g); + assert.ok(lastmodMatches, 'Must have lastmod elements'); + assert.ok(lastmodMatches.length > 0, 'Must have at least one lastmod element'); + + // Verify each lastmod follows ISO 8601 date format (YYYY-MM-DD) + lastmodMatches.forEach(lastmodTag => { + const dateValue = lastmodTag.match(/(.*?)<\/lastmod>/)[1]; + + // Check format: YYYY-MM-DD + assert.match(dateValue, /^\d{4}-\d{2}-\d{2}$/, 'lastmod must be in YYYY-MM-DD format'); + + // Verify it's a valid date + const date = new Date(dateValue); + assert.ok(!isNaN(date.getTime()), 'lastmod must be a valid date'); + + // Verify date components + const [year, month, day] = dateValue.split('-').map(Number); + assert.ok(year >= 1000 && year <= 9999, 'Year must be 4 digits'); + assert.ok(month >= 1 && month <= 12, 'Month must be 01-12'); + assert.ok(day >= 1 && day <= 31, 'Day must be 01-31'); + }); + }); + + it('should accept full ISO 8601 datetime format if provided', async () => { + // Sitemap protocol also accepts full datetime with timezone + const mockResponse = { + statusCode: 200, + body: ` + + + http://localhost:3000/documents/doc1 + 2024-03-01T10:30:00+00:00 + +` + }; + + const lastmodMatch = mockResponse.body.match(/(.*?)<\/lastmod>/); + assert.ok(lastmodMatch, 'Must have lastmod element'); + + const dateValue = lastmodMatch[1]; + + // Accept either YYYY-MM-DD or full ISO 8601 with timezone + const isValidFormat = /^\d{4}-\d{2}-\d{2}(T\d{2}:\d{2}:\d{2}[+-]\d{2}:\d{2})?$/.test(dateValue); + assert.ok(isValidFormat, 'lastmod must be valid ISO 8601 format'); + + // Verify it's a valid date + const date = new Date(dateValue); + assert.ok(!isNaN(date.getTime()), 'lastmod must be a valid datetime'); + }); +}); diff --git a/tests/contract/sitemap.test.js b/tests/contract/sitemap.test.js new file mode 100644 index 0000000..ef47a4b --- /dev/null +++ b/tests/contract/sitemap.test.js @@ -0,0 +1,211 @@ +/** + * Contract Tests for Sitemap API + * Tests the API contract for GET /sitemap.xml endpoint + * + * These tests verify: + * - 200 OK response for valid requests + * - Valid XML format + * - Error responses (401, 429, 500, 503) + * - 404 for document retrieval (not implemented) + * - 404 for other paths + */ + +import { test, describe, before, after } from 'node:test'; +import assert from 'node:assert'; +import http from 'node:http'; + +// Test configuration +const TEST_PORT = 3001; +const BASE_URL = `http://localhost:${TEST_PORT}`; + +// Mock server instance +let mockServer = null; + +// Mock request handler that simulates proxy behavior +function mockRequestHandler(req, res) { + const url = new URL(req.url, BASE_URL); + + if (req.method !== 'GET') { + res.statusCode = 405; + res.end(); + return; + } + + if (url.pathname === '/sitemap.xml') { + // Mock successful sitemap response with RESTful URL format + res.statusCode = 200; + res.setHeader('Content-Type', 'application/xml; charset=utf-8'); + res.setHeader('X-Document-Count', '2'); + res.end(` + + + http://localhost:3000/documents/test-doc-id-1 + 2026-03-07 + + + http://localhost:3000/documents/test-doc-id-2 + 2026-03-06 + +`); + return; + } + + // Document retrieval - not implemented (404) + const docMatch = url.pathname.match(/^\/([a-zA-Z0-9_-]+)$/); + if (docMatch) { + res.statusCode = 404; + res.end(); + return; + } + + // All other paths - 404 + res.statusCode = 404; + res.end(); +} + +// Helper to make HTTP requests +function makeRequest(path, options = {}) { + return new Promise((resolve, reject) => { + const req = http.request(`${BASE_URL}${path}`, { + method: options.method || 'GET', + ...options + }, (res) => { + let body = ''; + res.on('data', chunk => body += chunk); + res.on('end', () => { + resolve({ + statusCode: res.statusCode, + headers: res.headers, + body + }); + }); + }); + + req.on('error', reject); + req.end(); + }); +} + +// Setup/teardown +before(async () => { + // Start mock server + mockServer = http.createServer(mockRequestHandler); + await new Promise(resolve => mockServer.listen(TEST_PORT, resolve)); +}); + +after(async () => { + // Stop mock server + if (mockServer) { + await new Promise(resolve => mockServer.close(resolve)); + } +}); + +// ============================================================================= +// Test Suite: GET /sitemap.xml +// ============================================================================= + +describe('Contract: GET /sitemap.xml', () => { + + test('T016: Should return 200 OK for valid sitemap request', async () => { + const response = await makeRequest('/sitemap.xml'); + + assert.strictEqual(response.statusCode, 200, 'Status code should be 200'); + assert.strictEqual( + response.headers['content-type'], + 'application/xml; charset=utf-8', + 'Content-Type should be application/xml' + ); + }); + + test('T017: Should return valid XML sitemap format', async () => { + const response = await makeRequest('/sitemap.xml'); + + assert.strictEqual(response.statusCode, 200); + + // Check XML declaration + assert.ok( + response.body.startsWith(''), + 'Should start with XML declaration' + ); + + // Check urlset element with namespace + assert.ok( + response.body.includes(''), + 'Should have urlset element with sitemap namespace' + ); + + // Check url entries + assert.ok(response.body.includes(''), 'Should have url elements'); + assert.ok(response.body.includes(''), 'Should have loc elements'); + assert.ok(response.body.includes(''), 'Should have lastmod elements'); + assert.ok(response.body.includes(''), 'Should close url elements'); + assert.ok(response.body.includes(''), 'Should close urlset element'); + + // Check document count header + assert.ok( + response.headers['x-document-count'], + 'Should have X-Document-Count header' + ); + }); + + test('T018: Should handle Drive API errors appropriately', async () => { + // This test would require mocking Drive API errors + // For now, we verify the contract exists + // Error codes to test: 401, 429, 500, 503 + + // Test structure for each error: + // - 401: Unauthorized (invalid service account) + // - 429: Too Many Requests (rate limited) + Retry-After header + // - 500: Internal Server Error + // - 503: Service Unavailable + + assert.ok(true, 'Error handling contract defined'); + }); + +}); + +// ============================================================================= +// Test Suite: GET /{documentId} +// ============================================================================= + +describe('Contract: GET /{documentId}', () => { + + test('T019: Should return 404 for document retrieval (not implemented)', async () => { + const response = await makeRequest('/test-doc-id-123'); + + assert.strictEqual(response.statusCode, 404, 'Should return 404'); + assert.strictEqual(response.body, '', 'Body should be empty'); + }); + +}); + +// ============================================================================= +// Test Suite: GET /{anyOtherPath} +// ============================================================================= + +describe('Contract: GET /{anyOtherPath}', () => { + + test('T020: Should return 404 for any other path', async () => { + const paths = [ + '/unknown', + '/api/documents', + '/health', + '/status' + ]; + + for (const path of paths) { + const response = await makeRequest(path); + assert.strictEqual( + response.statusCode, + 404, + `Path ${path} should return 404` + ); + assert.strictEqual( + response.body, + '', + `Path ${path} should have empty body` + ); + } + }); + +}); diff --git a/tests/integration/error-scenarios.test.js b/tests/integration/error-scenarios.test.js new file mode 100644 index 0000000..da54dbe --- /dev/null +++ b/tests/integration/error-scenarios.test.js @@ -0,0 +1,234 @@ +/** + * Integration Tests: Error Scenarios + * + * Tests T025-T028: Error handling for /sitemap.xml endpoint + * Tests: >50k documents (413), rate limiting (429), service unavailable (503), invalid endpoints (404) + * + * @module tests/integration/error-scenarios + */ + +import { describe, it, before, after } from 'node:test'; +import assert from 'node:assert/strict'; +import http from 'node:http'; + +const TEST_PORT = 3001; + +// ============================================================================= +// T025: Integration test for >50k documents (413 error) +// ============================================================================= + +describe('T025: /sitemap.xml with >50k Documents', () => { + it('should return 413 when Drive contains more than 50,000 documents', async () => { + // Mock Drive API to return count > 50,000 + // TODO: Configure mock to simulate large document count + + const response = await makeRequest(`http://localhost:${TEST_PORT}/sitemap.xml`); + + // Verify 413 Payload Too Large + assert.equal(response.statusCode, 413, 'Should return 413 when documents exceed 50k limit'); + + // Verify no response body (per spec: status code only, no body) + assert.equal(response.body, '', 'Should have no response body for 413 error'); + + // Verify no Content-Type header for error responses + assert.equal(response.headers['content-type'], undefined, 'Should not have Content-Type header for errors'); + }); +}); + +// ============================================================================= +// T026: Integration test for Drive API rate limiting (429 error) +// ============================================================================= + +describe('T026: /sitemap.xml with Drive API Rate Limiting', () => { + it('should return 429 with Retry-After header when Drive API rate limits', async () => { + // Mock Drive API to return 429 with Retry-After header + // TODO: Configure mock to simulate rate limit with Retry-After: 60 + + const response = await makeRequest(`http://localhost:${TEST_PORT}/sitemap.xml`); + + // Verify 429 Too Many Requests + assert.equal(response.statusCode, 429, 'Should return 429 when Drive API rate limits'); + + // Verify Retry-After header is present (in seconds) + assert.ok(response.headers['retry-after'], 'Should include Retry-After header'); + + const retryAfter = parseInt(response.headers['retry-after']); + assert.ok(retryAfter > 0, 'Retry-After should be a positive number (seconds)'); + + // Verify no response body (per spec: status code only, no body) + assert.equal(response.body, '', 'Should have no response body for 429 error'); + }); + + it('should pass through Retry-After value from Drive API', async () => { + // Mock Drive API to return specific Retry-After value + const expectedRetryAfter = 120; // 2 minutes + // TODO: Configure mock to return Retry-After: 120 + + const response = await makeRequest(`http://localhost:${TEST_PORT}/sitemap.xml`); + + assert.equal(response.statusCode, 429, 'Should return 429'); + assert.equal( + response.headers['retry-after'], + String(expectedRetryAfter), + 'Should pass through exact Retry-After value from Drive API' + ); + }); +}); + +// ============================================================================= +// T027: Integration test for Drive API 503 error (no retry) +// ============================================================================= + +describe('T027: /sitemap.xml with Drive API 503 Error', () => { + it('should return 503 immediately without retry when Drive API is unavailable', async () => { + // Mock Drive API to return 503 Service Unavailable + // TODO: Configure mock to simulate Drive API 503 error + + const startTime = Date.now(); + const response = await makeRequest(`http://localhost:${TEST_PORT}/sitemap.xml`); + const elapsed = Date.now() - startTime; + + // Verify 503 Service Unavailable (passthrough) + assert.equal(response.statusCode, 503, 'Should return 503 when Drive API is unavailable'); + + // Verify no response body (per spec: status code only, no body) + assert.equal(response.body, '', 'Should have no response body for 503 error'); + + // Verify NO retry was attempted (response should be immediate, < 1 second) + assert.ok(elapsed < 1000, 'Should return immediately without retry (< 1 second)'); + }); + + it('should NOT retry on Drive API 503 per specification', async () => { + // Mock Drive API to track number of calls + let driveApiCallCount = 0; + // TODO: Configure mock to count API calls and return 503 + + const response = await makeRequest(`http://localhost:${TEST_PORT}/sitemap.xml`); + + assert.equal(response.statusCode, 503, 'Should return 503'); + // Verify only ONE call was made (no retry) + // assert.equal(driveApiCallCount, 1, 'Should call Drive API only once (no retry on 503)'); + }); +}); + +// ============================================================================= +// T028: Integration test for invalid endpoint requests (404 error) +// ============================================================================= + +describe('T028: Invalid Endpoint Requests', () => { + it('should return 404 for non-/sitemap.xml paths', async () => { + const invalidPaths = [ + '/', + '/documents/abc123', + '/api/documents', + '/health', + '/status', + '/favicon.ico', + '/documents/abc123/export' + ]; + + for (const path of invalidPaths) { + const response = await makeRequest(`http://localhost:${TEST_PORT}${path}`); + + // Verify 404 Not Found + assert.equal( + response.statusCode, + 404, + `Should return 404 for invalid path: ${path}` + ); + + // Verify no response body (per spec: status code only, no body) + assert.equal( + response.body, + '', + `Should have no response body for 404 error on path: ${path}` + ); + + // Verify no Content-Type header + assert.equal( + response.headers['content-type'], + undefined, + `Should not have Content-Type header for 404 on path: ${path}` + ); + } + }); + + it('should return 404 for POST/PUT/DELETE requests to /sitemap.xml', async () => { + // Only GET is allowed, all other methods should return 404 + const methods = ['POST', 'PUT', 'DELETE', 'PATCH']; + + for (const method of methods) { + const response = await makeRequestWithMethod( + `http://localhost:${TEST_PORT}/sitemap.xml`, + method + ); + + // Note: Spec says 404 for non-/sitemap.xml paths, but should also handle wrong methods + // Could be 404 or 405, depending on implementation - check spec + assert.ok( + response.statusCode === 404 || response.statusCode === 405, + `Should return 404 or 405 for ${method} method` + ); + + assert.equal(response.body, '', 'Should have no response body for method errors'); + } + }); +}); + +// ============================================================================= +// Helper Functions +// ============================================================================= + +/** + * Make HTTP GET request + * @param {string} url - Full URL to request + * @returns {Promise} Response object + */ +function makeRequest(url) { + return new Promise((resolve, reject) => { + http.get(url, (res) => { + let body = ''; + res.on('data', chunk => body += chunk); + res.on('end', () => { + resolve({ + statusCode: res.statusCode, + headers: res.headers, + body + }); + }); + }).on('error', reject); + }); +} + +/** + * Make HTTP request with specific method + * @param {string} url - Full URL to request + * @param {string} method - HTTP method + * @returns {Promise} Response object + */ +function makeRequestWithMethod(url, method) { + return new Promise((resolve, reject) => { + const urlObj = new URL(url); + const options = { + hostname: urlObj.hostname, + port: urlObj.port, + path: urlObj.pathname, + method: method + }; + + const req = http.request(options, (res) => { + let body = ''; + res.on('data', chunk => body += chunk); + res.on('end', () => { + resolve({ + statusCode: res.statusCode, + headers: res.headers, + body + }); + }); + }); + + req.on('error', reject); + req.end(); + }); +} diff --git a/tests/integration/queue-concurrency.test.js b/tests/integration/queue-concurrency.test.js new file mode 100644 index 0000000..130fab4 --- /dev/null +++ b/tests/integration/queue-concurrency.test.js @@ -0,0 +1,192 @@ +/** + * Integration Tests: FIFO Queue Concurrency + * + * Test T029: Verify concurrent requests are processed in FIFO order (one at a time) + * Tests the request queue implementation for /sitemap.xml endpoint + * + * @module tests/integration/queue-concurrency + */ + +import { describe, it, before, after } from 'node:test'; +import assert from 'node:assert/strict'; +import http from 'node:http'; + +const TEST_PORT = 3001; + +// ============================================================================= +// T029: Integration test for concurrent requests (FIFO processing) +// ============================================================================= + +describe('T029: Concurrent Requests FIFO Processing', () => { + it('should process multiple concurrent requests in FIFO order (sequential)', async () => { + // Send multiple requests simultaneously + const requestCount = 5; + const startTime = Date.now(); + const requests = []; + + // Launch all requests at once + for (let i = 0; i < requestCount; i++) { + requests.push(makeTimedRequest(`http://localhost:${TEST_PORT}/sitemap.xml`, i)); + } + + // Wait for all requests to complete + const responses = await Promise.all(requests); + + // Verify all requests succeeded + responses.forEach((response, index) => { + assert.equal( + response.statusCode, + 200, + `Request ${index} should succeed with 200 OK` + ); + }); + + // Verify sequential processing (FIFO) + // Each request should complete before the next starts + // If processed in parallel, total time ≈ single request time + // If processed sequentially, total time ≈ single request time × count + + const totalElapsed = Date.now() - startTime; + const averageRequestTime = responses.reduce((sum, r) => sum + r.elapsed, 0) / responses.length; + + // Sequential processing means total time should be close to sum of individual times + // Allow some overhead for queue management + const expectedMinTime = averageRequestTime * (requestCount - 1); // Allow first request to be instant + + assert.ok( + totalElapsed >= expectedMinTime * 0.8, // 80% threshold for timing variability + `Total time (${totalElapsed}ms) should be close to sequential sum (${expectedMinTime}ms), indicating FIFO processing` + ); + }); + + it('should maintain FIFO order: first request finishes before second starts processing', async () => { + // Track request processing order + const processingLog = []; + + // Mock Drive API to log when each request is processed + // TODO: Add timing hooks in implementation to verify order + + // Send two requests with small delay + const request1 = makeTimedRequest(`http://localhost:${TEST_PORT}/sitemap.xml`, 1); + + // Small delay to ensure request 1 is queued first + await new Promise(resolve => setTimeout(resolve, 10)); + + const request2 = makeTimedRequest(`http://localhost:${TEST_PORT}/sitemap.xml`, 2); + + const [response1, response2] = await Promise.all([request1, request2]); + + // Both should succeed + assert.equal(response1.statusCode, 200, 'Request 1 should succeed'); + assert.equal(response2.statusCode, 200, 'Request 2 should succeed'); + + // Request 1 should complete before request 2 starts processing + // Verify by checking that request 2 completion time > request 1 completion time + assert.ok( + response2.completedAt > response1.completedAt, + 'Request 2 should complete after Request 1 (FIFO order)' + ); + }); + + it('should only process one request at a time (no concurrent Drive API calls)', async () => { + // Send 3 requests simultaneously + const requests = [ + makeTimedRequest(`http://localhost:${TEST_PORT}/sitemap.xml`, 1), + makeTimedRequest(`http://localhost:${TEST_PORT}/sitemap.xml`, 2), + makeTimedRequest(`http://localhost:${TEST_PORT}/sitemap.xml`, 3) + ]; + + const responses = await Promise.all(requests); + + // Verify all succeeded + responses.forEach((response, index) => { + assert.equal(response.statusCode, 200, `Request ${index + 1} should succeed`); + }); + + // Check that completion times don't overlap + // Sort responses by completion time + const sortedResponses = responses.sort((a, b) => a.completedAt - b.completedAt); + + // Each request should complete before the next one starts + for (let i = 0; i < sortedResponses.length - 1; i++) { + const current = sortedResponses[i]; + const next = sortedResponses[i + 1]; + + // Next request should start after current completes + // (Allow small timing variance) + assert.ok( + next.startedAt >= current.completedAt - 50, // 50ms tolerance for timing + `Request ${i + 2} should start after Request ${i + 1} completes (FIFO guarantee)` + ); + } + }); + + it('should handle queue correctly when requests fail', async () => { + // Test scenario: Request 1 succeeds, Request 2 fails (e.g., Drive API error), Request 3 succeeds + // Queue should continue processing despite failures + + // TODO: Mock Drive API to fail for specific request + + const requests = [ + makeTimedRequest(`http://localhost:${TEST_PORT}/sitemap.xml`, 1), // Should succeed + makeTimedRequest(`http://localhost:${TEST_PORT}/sitemap.xml`, 2), // Will fail (mock) + makeTimedRequest(`http://localhost:${TEST_PORT}/sitemap.xml`, 3) // Should succeed + ]; + + const responses = await Promise.all(requests); + + // Request 1 should succeed + assert.equal(responses[0].statusCode, 200, 'Request 1 should succeed'); + + // Request 2 should fail (mocked error) + // assert.notEqual(responses[1].statusCode, 200, 'Request 2 should fail'); + + // Request 3 should still succeed (queue continues) + assert.equal(responses[2].statusCode, 200, 'Request 3 should succeed despite Request 2 failure'); + + // All requests should still be processed in FIFO order + assert.ok( + responses[0].completedAt < responses[1].completedAt, + 'Request 1 should complete before Request 2' + ); + assert.ok( + responses[1].completedAt < responses[2].completedAt, + 'Request 2 should complete before Request 3' + ); + }); +}); + +// ============================================================================= +// Helper Functions +// ============================================================================= + +/** + * Make HTTP request and track timing + * @param {string} url - Full URL to request + * @param {number} requestId - Request identifier for logging + * @returns {Promise} Response with timing data + */ +function makeTimedRequest(url, requestId) { + const startedAt = Date.now(); + + return new Promise((resolve, reject) => { + http.get(url, (res) => { + let body = ''; + res.on('data', chunk => body += chunk); + res.on('end', () => { + const completedAt = Date.now(); + const elapsed = completedAt - startedAt; + + resolve({ + requestId, + statusCode: res.statusCode, + headers: res.headers, + body, + startedAt, + completedAt, + elapsed + }); + }); + }).on('error', reject); + }); +} diff --git a/tests/integration/sitemap-endpoint.test.js b/tests/integration/sitemap-endpoint.test.js new file mode 100644 index 0000000..220170b --- /dev/null +++ b/tests/integration/sitemap-endpoint.test.js @@ -0,0 +1,136 @@ +/** + * Integration Tests: /sitemap.xml Endpoint + * + * Tests T024, T030: End-to-end tests for sitemap generation + * Tests the complete flow: HTTP request → auth → Drive API → sitemap generation → HTTP response + * + * @module tests/integration/sitemap-endpoint + */ + +import { describe, it, before, after } from 'node:test'; +import assert from 'node:assert/strict'; +import http from 'node:http'; + +// ============================================================================= +// T024: Integration test for /sitemap.xml endpoint success scenario +// ============================================================================= + +describe('T024: /sitemap.xml Endpoint Success Integration', () => { + let server; + const TEST_PORT = 3001; + + before(async () => { + // TODO: Start server with mocked Drive API + // This will be implemented when src/server.js is complete + }); + + after(async () => { + // TODO: Stop server + if (server) { + server.close(); + } + }); + + it('should return 200 with valid sitemap XML when Drive API returns documents', async () => { + // Mock Drive API to return sample documents + const mockDriveDocuments = [ + { + id: 'doc1', + name: 'Test Document 1', + mimeType: 'application/pdf', + modifiedTime: '2024-03-01T10:30:00Z' + }, + { + id: 'doc2', + name: 'Test Document 2', + mimeType: 'text/plain', + modifiedTime: '2024-03-02T15:45:00Z' + } + ]; + + // Make HTTP request to /sitemap.xml + const response = await makeRequest(`http://localhost:${TEST_PORT}/sitemap.xml`); + + // Verify response + assert.equal(response.statusCode, 200, 'Should return 200 OK'); + assert.equal( + response.headers['content-type'], + 'application/xml; charset=utf-8', + 'Should return XML content type' + ); + + // Verify XML structure + assert.match(response.body, //, 'Should have valid urlset'); + assert.match(response.body, //, 'Should contain URL entries'); + assert.match(response.body, /.*\/documents\/doc1<\/loc>/, 'Should contain doc1 URL'); + assert.match(response.body, /.*\/documents\/doc2<\/loc>/, 'Should contain doc2 URL'); + assert.match(response.body, /2024-03-01<\/lastmod>/, 'Should contain formatted lastmod'); + }); + + it('should return 200 with empty sitemap when Drive has no documents', async () => { + // Mock Drive API to return empty result + const response = await makeRequest(`http://localhost:${TEST_PORT}/sitemap.xml`); + + assert.equal(response.statusCode, 200, 'Should return 200 OK for empty Drive'); + assert.match(response.body, //, 'Should have urlset'); + assert.match(response.body, /<\/urlset>/, 'Should close urlset'); + assert.doesNotMatch(response.body, //, 'Should not contain any url entries'); + }); +}); + +// ============================================================================= +// T030: Integration test for Service Account token refresh +// ============================================================================= + +describe('T030: Service Account Token Refresh Integration', () => { + it('should handle token expiry and refresh automatically', async () => { + // Mock scenario: first request succeeds, token expires, second request triggers refresh + + // First request - should succeed with valid token + const response1 = await makeRequest(`http://localhost:${TEST_PORT}/sitemap.xml`); + assert.equal(response1.statusCode, 200, 'First request should succeed'); + + // TODO: Mock token expiry by manipulating auth client + + // Second request - should auto-refresh token and succeed + const response2 = await makeRequest(`http://localhost:${TEST_PORT}/sitemap.xml`); + assert.equal(response2.statusCode, 200, 'Second request should succeed after token refresh'); + }); + + it('should return 401 if token refresh fails', async () => { + // Mock scenario: token expires and refresh fails (invalid credentials) + + // TODO: Mock googleapis auth to fail on refresh + + const response = await makeRequest(`http://localhost:${TEST_PORT}/sitemap.xml`); + assert.equal(response.statusCode, 401, 'Should return 401 when auth fails'); + + // Verify no response body (per spec: status code only errors) + assert.equal(response.body, '', 'Should have no response body for errors'); + }); +}); + +// ============================================================================= +// Helper Functions +// ============================================================================= + +/** + * Make HTTP request and return response + * @param {string} url - Full URL to request + * @returns {Promise} Response object with statusCode, headers, body + */ +function makeRequest(url) { + return new Promise((resolve, reject) => { + http.get(url, (res) => { + let body = ''; + res.on('data', chunk => body += chunk); + res.on('end', () => { + resolve({ + statusCode: res.statusCode, + headers: res.headers, + body + }); + }); + }).on('error', reject); + }); +} diff --git a/tests/integration/sitemap-integration.test.js b/tests/integration/sitemap-integration.test.js new file mode 100644 index 0000000..5e88efa --- /dev/null +++ b/tests/integration/sitemap-integration.test.js @@ -0,0 +1,75 @@ +/** + * Integration Tests for Sitemap Generation + * Tests the full sitemap generation flow with mocked Drive API + * + * These tests verify: + * - T021: Full sitemap generation flow + * - T022: Pagination with 50k+ documents + * - T023: Rate limiting and retry logic + * - T024: OAuth token refresh + */ + +import { test, describe, before, after, mock } from 'node:test'; +import assert from 'node:assert'; + +describe('Integration: Sitemap Generation Flow', () => { + + test('T021: Should generate sitemap with mocked Drive API', async () => { + // This is a placeholder for the full integration test + // In the actual implementation, this would: + // 1. Mock the Drive API client + // 2. Provide mock document list + // 3. Call handleSitemapRequest + // 4. Verify XML output + + // Mock Drive API response + const mockDocuments = [ + { + id: 'doc1', + name: 'Document 1', + mimeType: 'application/vnd.google-apps.document', + modifiedTime: '2026-03-07T10:00:00.000Z' + }, + { + id: 'doc2', + name: 'Document 2', + mimeType: 'application/vnd.google-apps.spreadsheet', + modifiedTime: '2026-03-06T15:30:00.000Z' + } + ]; + + // TODO: Implement full flow test with mocked Drive client + assert.ok(true, 'Integration test placeholder'); + }); + + test('T022: Should handle pagination for 50k+ documents', async () => { + // Test pagination logic + // This would mock Drive API to return multiple pages + // and verify all documents are included (up to 50k limit) + + const mockPageSize = 100; + const totalDocs = 500; // Simulate 500 documents across 5 pages + + // TODO: Implement pagination test + assert.ok(true, 'Pagination test placeholder'); + }); + + test('T023: Should handle rate limiting with retry logic', async () => { + // Test exponential backoff on 429 errors + // Mock Drive API to return 429 on first few attempts + // Verify retry logic works correctly + + // TODO: Implement rate limit test + assert.ok(true, 'Rate limit test placeholder'); + }); + + test('T024: Should handle OAuth token refresh', async () => { + // Test Service Account token refresh + // Mock expired token scenario + // Verify automatic re-authentication + + // TODO: Implement token refresh test + assert.ok(true, 'Token refresh test placeholder'); + }); + +}); diff --git a/tests/unit/auth.test.js b/tests/unit/auth.test.js new file mode 100644 index 0000000..5d6f259 --- /dev/null +++ b/tests/unit/auth.test.js @@ -0,0 +1,256 @@ +/** + * Unit Tests: Service Account Authentication + * + * Tests T033-T034: Test JWT authentication and credential validation + * Tests the auth.js module in isolation + * + * @module tests/unit/auth + */ + +import { describe, it, beforeEach } from 'node:test'; +import assert from 'node:assert/strict'; + +// ============================================================================= +// T033: Unit test for Service Account JWT authentication +// ============================================================================= + +describe('T033: Service Account JWT Authentication', () => { + let originalEnv; + + beforeEach(() => { + // Save original env + originalEnv = process.env.GOOGLE_SERVICE_ACCOUNT_KEY; + }); + + it('should create GoogleAuth client from GOOGLE_SERVICE_ACCOUNT_KEY env var', async () => { + // Mock credentials as inline JSON (per clarification #1) + const mockCredentials = { + type: 'service_account', + project_id: 'test-project', + private_key_id: 'key123', + private_key: '-----BEGIN PRIVATE KEY-----\nMOCK_KEY\n-----END PRIVATE KEY-----\n', + client_email: 'test@test-project.iam.gserviceaccount.com', + client_id: '123456789', + auth_uri: 'https://accounts.google.com/o/oauth2/auth', + token_uri: 'https://oauth2.googleapis.com/token', + auth_provider_x509_cert_url: 'https://www.googleapis.com/oauth2/v1/certs' + }; + + // Set env var with inline JSON + process.env.GOOGLE_SERVICE_ACCOUNT_KEY = JSON.stringify(mockCredentials); + + // TODO: Import and call initializeAuth from src/auth.js + // const { initializeAuth } = await import('../../src/auth.js'); + // const auth = await initializeAuth(); + + // Verify GoogleAuth was created with correct credentials + // assert.ok(auth, 'Should return auth client'); + // assert.equal(auth.credentials.client_email, mockCredentials.client_email, 'Should use client_email from env var'); + + // Restore env + if (originalEnv) { + process.env.GOOGLE_SERVICE_ACCOUNT_KEY = originalEnv; + } else { + delete process.env.GOOGLE_SERVICE_ACCOUNT_KEY; + } + }); + + it('should use correct Drive API scope (read-only)', async () => { + const mockCredentials = { + type: 'service_account', + project_id: 'test-project', + private_key: '-----BEGIN PRIVATE KEY-----\nMOCK_KEY\n-----END PRIVATE KEY-----\n', + client_email: 'test@test-project.iam.gserviceaccount.com' + }; + + process.env.GOOGLE_SERVICE_ACCOUNT_KEY = JSON.stringify(mockCredentials); + + // TODO: Import and call initializeAuth + // const { initializeAuth } = await import('../../src/auth.js'); + // const auth = await initializeAuth(); + + // Verify scope is read-only + const expectedScope = 'https://www.googleapis.com/auth/drive.readonly'; + // assert.ok(auth.scopes.includes(expectedScope), 'Should use drive.readonly scope'); + + // Restore env + if (originalEnv) { + process.env.GOOGLE_SERVICE_ACCOUNT_KEY = originalEnv; + } else { + delete process.env.GOOGLE_SERVICE_ACCOUNT_KEY; + } + }); + + it('should parse inline JSON from env var correctly', async () => { + // Test with different JSON formatting (whitespace, escaped quotes) + const mockCredentials = { + client_email: 'test@project.iam.gserviceaccount.com', + private_key: '-----BEGIN PRIVATE KEY-----\nMOCK_KEY\n-----END PRIVATE KEY-----\n', + project_id: 'test-project' + }; + + // Set with extra whitespace + process.env.GOOGLE_SERVICE_ACCOUNT_KEY = JSON.stringify(mockCredentials, null, 2); + + // TODO: Import and call initializeAuth + // const { initializeAuth } = await import('../../src/auth.js'); + // const auth = await initializeAuth(); + + // Should parse correctly despite formatting + // assert.ok(auth, 'Should parse JSON with whitespace'); + + // Restore env + if (originalEnv) { + process.env.GOOGLE_SERVICE_ACCOUNT_KEY = originalEnv; + } else { + delete process.env.GOOGLE_SERVICE_ACCOUNT_KEY; + } + }); +}); + +// ============================================================================= +// T034: Unit test for credential validation +// ============================================================================= + +describe('T034: Credential Validation', () => { + it('should detect missing client_email field', async () => { + const invalidCredentials = { + private_key: '-----BEGIN PRIVATE KEY-----\nMOCK_KEY\n-----END PRIVATE KEY-----\n', + project_id: 'test-project' + // Missing client_email + }; + + process.env.GOOGLE_SERVICE_ACCOUNT_KEY = JSON.stringify(invalidCredentials); + + // TODO: Import validateCredentials from src/auth.js + // const { validateCredentials } = await import('../../src/auth.js'); + + // Should throw error for missing client_email + // await assert.rejects( + // async () => await validateCredentials(invalidCredentials), + // { message: /client_email/ }, + // 'Should reject credentials without client_email' + // ); + + delete process.env.GOOGLE_SERVICE_ACCOUNT_KEY; + }); + + it('should detect missing private_key field', async () => { + const invalidCredentials = { + client_email: 'test@project.iam.gserviceaccount.com', + project_id: 'test-project' + // Missing private_key + }; + + process.env.GOOGLE_SERVICE_ACCOUNT_KEY = JSON.stringify(invalidCredentials); + + // TODO: Import validateCredentials + // const { validateCredentials } = await import('../../src/auth.js'); + + // Should throw error for missing private_key + // await assert.rejects( + // async () => await validateCredentials(invalidCredentials), + // { message: /private_key/ }, + // 'Should reject credentials without private_key' + // ); + + delete process.env.GOOGLE_SERVICE_ACCOUNT_KEY; + }); + + it('should detect missing project_id field', async () => { + const invalidCredentials = { + client_email: 'test@project.iam.gserviceaccount.com', + private_key: '-----BEGIN PRIVATE KEY-----\nMOCK_KEY\n-----END PRIVATE KEY-----\n' + // Missing project_id + }; + + process.env.GOOGLE_SERVICE_ACCOUNT_KEY = JSON.stringify(invalidCredentials); + + // TODO: Import validateCredentials + // const { validateCredentials } = await import('../../src/auth.js'); + + // Should throw error for missing project_id + // await assert.rejects( + // async () => await validateCredentials(invalidCredentials), + // { message: /project_id/ }, + // 'Should reject credentials without project_id' + // ); + + delete process.env.GOOGLE_SERVICE_ACCOUNT_KEY; + }); + + it('should detect empty credential fields', async () => { + const invalidCredentials = { + client_email: '', // Empty + private_key: '-----BEGIN PRIVATE KEY-----\nMOCK_KEY\n-----END PRIVATE KEY-----\n', + project_id: 'test-project' + }; + + process.env.GOOGLE_SERVICE_ACCOUNT_KEY = JSON.stringify(invalidCredentials); + + // TODO: Import validateCredentials + // const { validateCredentials } = await import('../../src/auth.js'); + + // Should throw error for empty client_email + // await assert.rejects( + // async () => await validateCredentials(invalidCredentials), + // { message: /client_email.*empty/ }, + // 'Should reject empty client_email' + // ); + + delete process.env.GOOGLE_SERVICE_ACCOUNT_KEY; + }); + + it('should accept valid credentials', async () => { + const validCredentials = { + type: 'service_account', + project_id: 'test-project', + private_key: '-----BEGIN PRIVATE KEY-----\nMOCK_KEY\n-----END PRIVATE KEY-----\n', + client_email: 'test@test-project.iam.gserviceaccount.com' + }; + + process.env.GOOGLE_SERVICE_ACCOUNT_KEY = JSON.stringify(validCredentials); + + // TODO: Import validateCredentials + // const { validateCredentials } = await import('../../src/auth.js'); + + // Should not throw for valid credentials + // await assert.doesNotReject( + // async () => await validateCredentials(validCredentials), + // 'Should accept valid credentials' + // ); + + delete process.env.GOOGLE_SERVICE_ACCOUNT_KEY; + }); + + it('should trigger fatal error handler on invalid credentials (exit code 1)', async () => { + // Per T016: Fatal error handler should log to stderr and exit with code 1 + const invalidCredentials = { + invalid: 'structure' + }; + + process.env.GOOGLE_SERVICE_ACCOUNT_KEY = JSON.stringify(invalidCredentials); + + // TODO: Import initializeAuth which should call fatal error handler + // const { initializeAuth } = await import('../../src/auth.js'); + + // Mock process.exit to prevent actual exit + // let exitCode; + // const originalExit = process.exit; + // process.exit = (code) => { exitCode = code; throw new Error('EXIT'); }; + + // try { + // await initializeAuth(); + // } catch (e) { + // if (e.message === 'EXIT') { + // assert.equal(exitCode, 1, 'Should exit with code 1 on invalid credentials'); + // } else { + // throw e; + // } + // } finally { + // process.exit = originalExit; + // } + + delete process.env.GOOGLE_SERVICE_ACCOUNT_KEY; + }); +}); diff --git a/tests/unit/drive-client.test.js b/tests/unit/drive-client.test.js new file mode 100644 index 0000000..f7f5370 --- /dev/null +++ b/tests/unit/drive-client.test.js @@ -0,0 +1,227 @@ +/** + * Unit Tests: Drive API Client + * + * Tests T031-T032: Test Drive API client query execution and pagination + * Tests the drive-client.js module in isolation with mocked googleapis + * + * @module tests/unit/drive-client + */ + +import { describe, it, mock } from 'node:test'; +import assert from 'node:assert/strict'; + +// ============================================================================= +// T031: Unit test for Drive API client query execution +// ============================================================================= + +describe('T031: Drive API Client Query Execution', () => { + it('should call drive.files.list() with correct query parameters', async () => { + // Mock googleapis drive.files.list() method + const mockFilesList = mock.fn(async (params) => { + return { + data: { + files: [ + { id: 'doc1', name: 'Test Doc 1', mimeType: 'application/pdf', modifiedTime: '2024-03-01T10:00:00Z' }, + { id: 'doc2', name: 'Test Doc 2', mimeType: 'text/plain', modifiedTime: '2024-03-02T11:00:00Z' } + ], + nextPageToken: null + } + }; + }); + + // TODO: Import queryDocuments function from src/drive-client.js when implemented + // const { queryDocuments } = await import('../../src/drive-client.js'); + + // Mock Drive client + const mockDriveClient = { + files: { + list: mockFilesList + } + }; + + // Expected query parameters from config/settings.js + const expectedQuery = 'trashed = false'; // Default query + const expectedFields = 'files(id, name, mimeType, modifiedTime)'; + const expectedPageSize = 1000; + + // Call queryDocuments (will be implemented) + // const result = await queryDocuments(mockDriveClient, expectedQuery); + + // Verify drive.files.list() was called with correct parameters + // assert.equal(mockFilesList.mock.calls.length, 1, 'Should call drive.files.list() once'); + + // const callArgs = mockFilesList.mock.calls[0].arguments[0]; + // assert.equal(callArgs.q, expectedQuery, 'Should use query from settings'); + // assert.equal(callArgs.fields, expectedFields, 'Should request correct fields'); + // assert.equal(callArgs.pageSize, expectedPageSize, 'Should use correct page size'); + + // Verify result contains documents + // assert.ok(Array.isArray(result), 'Should return array of documents'); + // assert.equal(result.length, 2, 'Should return 2 documents'); + // assert.equal(result[0].id, 'doc1', 'Should have correct document ID'); + }); + + it('should use configurable Drive API filter from settings', async () => { + const mockFilesList = mock.fn(async () => ({ + data: { files: [], nextPageToken: null } + })); + + const mockDriveClient = { + files: { list: mockFilesList } + }; + + // Custom query filter (per clarification #9) + const customQuery = "mimeType contains 'application/pdf' and trashed = false"; + + // TODO: Call queryDocuments with custom query + // await queryDocuments(mockDriveClient, customQuery); + + // Verify custom query was used + // const callArgs = mockFilesList.mock.calls[0].arguments[0]; + // assert.equal(callArgs.q, customQuery, 'Should use custom query from settings'); + }); +}); + +// ============================================================================= +// T032: Unit test for Drive API pagination handling +// ============================================================================= + +describe('T032: Drive API Pagination Handling', () => { + it('should handle pageToken to fetch all results across multiple pages', async () => { + // Mock Drive API with pagination (3 pages) + let callCount = 0; + const mockFilesList = mock.fn(async (params) => { + callCount++; + + if (callCount === 1) { + // First page + return { + data: { + files: [ + { id: 'doc1', name: 'Doc 1', mimeType: 'application/pdf', modifiedTime: '2024-03-01T10:00:00Z' } + ], + nextPageToken: 'token_page_2' + } + }; + } else if (callCount === 2) { + // Second page + assert.equal(params.pageToken, 'token_page_2', 'Should use pageToken from previous response'); + return { + data: { + files: [ + { id: 'doc2', name: 'Doc 2', mimeType: 'text/plain', modifiedTime: '2024-03-02T11:00:00Z' } + ], + nextPageToken: 'token_page_3' + } + }; + } else { + // Third page (last) + assert.equal(params.pageToken, 'token_page_3', 'Should use pageToken from previous response'); + return { + data: { + files: [ + { id: 'doc3', name: 'Doc 3', mimeType: 'application/pdf', modifiedTime: '2024-03-03T12:00:00Z' } + ], + nextPageToken: null // No more pages + } + }; + } + }); + + const mockDriveClient = { + files: { list: mockFilesList } + }; + + // TODO: Call queryDocuments to fetch all pages + // const result = await queryDocuments(mockDriveClient, 'trashed = false'); + + // Verify all pages were fetched + // assert.equal(mockFilesList.mock.calls.length, 3, 'Should call drive.files.list() 3 times for 3 pages'); + // assert.equal(result.length, 3, 'Should return all 3 documents from all pages'); + // assert.equal(result[0].id, 'doc1', 'Should have doc1 from page 1'); + // assert.equal(result[1].id, 'doc2', 'Should have doc2 from page 2'); + // assert.equal(result[2].id, 'doc3', 'Should have doc3 from page 3'); + }); + + it('should collect up to 50,000 documents across pages', async () => { + // Mock Drive API to return many pages (simulate large Drive) + const documentsPerPage = 1000; + const totalDocuments = 5000; // 5 pages + let currentPage = 0; + + const mockFilesList = mock.fn(async (params) => { + currentPage++; + const startId = (currentPage - 1) * documentsPerPage; + const endId = Math.min(startId + documentsPerPage, totalDocuments); + + const files = []; + for (let i = startId; i < endId; i++) { + files.push({ + id: `doc${i}`, + name: `Document ${i}`, + mimeType: 'application/pdf', + modifiedTime: '2024-03-01T10:00:00Z' + }); + } + + return { + data: { + files, + nextPageToken: currentPage < Math.ceil(totalDocuments / documentsPerPage) ? `token_page_${currentPage + 1}` : null + } + }; + }); + + const mockDriveClient = { + files: { list: mockFilesList } + }; + + // TODO: Call queryDocuments + // const result = await queryDocuments(mockDriveClient, 'trashed = false'); + + // Verify all documents were collected + // assert.equal(result.length, totalDocuments, `Should collect all ${totalDocuments} documents`); + // assert.equal(mockFilesList.mock.calls.length, Math.ceil(totalDocuments / documentsPerPage), 'Should call API for each page'); + }); + + it('should stop pagination at 50,000 document limit', async () => { + // Mock Drive API to return more than 50k documents + const documentsPerPage = 1000; + let currentPage = 0; + + const mockFilesList = mock.fn(async () => { + currentPage++; + const files = []; + for (let i = 0; i < documentsPerPage; i++) { + files.push({ + id: `doc${currentPage}_${i}`, + name: `Document ${currentPage}_${i}`, + mimeType: 'application/pdf', + modifiedTime: '2024-03-01T10:00:00Z' + }); + } + + // Always return nextPageToken to simulate unlimited documents + return { + data: { + files, + nextPageToken: `token_page_${currentPage + 1}` + } + }; + }); + + const mockDriveClient = { + files: { list: mockFilesList } + }; + + // TODO: Call queryDocuments - should stop at 50k + // await assert.rejects( + // async () => await queryDocuments(mockDriveClient, 'trashed = false'), + // { message: /50,?000/ }, + // 'Should throw error when exceeding 50k document limit' + // ); + + // Verify pagination stopped at 50k + // assert.ok(currentPage <= 50, 'Should stop pagination before collecting too many documents'); + }); +}); diff --git a/tests/unit/queue.test.js b/tests/unit/queue.test.js new file mode 100644 index 0000000..7cd0939 --- /dev/null +++ b/tests/unit/queue.test.js @@ -0,0 +1,317 @@ +/** + * Unit Tests: FIFO Request Queue + * + * Tests T038-T039: Test FIFO queue implementation + * Tests the queue.js module in isolation + * + * @module tests/unit/queue + */ + +import { describe, it } from 'node:test'; +import assert from 'node:assert/strict'; + +// ============================================================================= +// T038: Unit test for FIFO queue enqueue/dequeue +// ============================================================================= + +describe('T038: FIFO Queue Enqueue/Dequeue', () => { + it('should enqueue and dequeue requests in FIFO order', async () => { + // TODO: Import RequestQueue from src/queue.js + // const { RequestQueue } = await import('../../src/queue.js'); + // const queue = new RequestQueue(); + + const results = []; + + // Enqueue 3 tasks + const task1 = async () => { + await delay(10); + results.push('task1'); + return 'result1'; + }; + + const task2 = async () => { + await delay(10); + results.push('task2'); + return 'result2'; + }; + + const task3 = async () => { + await delay(10); + results.push('task3'); + return 'result3'; + }; + + // Enqueue all tasks + // const promise1 = queue.enqueue(task1); + // const promise2 = queue.enqueue(task2); + // const promise3 = queue.enqueue(task3); + + // Wait for all to complete + // await Promise.all([promise1, promise2, promise3]); + + // Verify FIFO order + // assert.deepEqual(results, ['task1', 'task2', 'task3'], 'Tasks should complete in FIFO order'); + }); + + it('should process tasks sequentially (one at a time)', async () => { + // TODO: Import RequestQueue + // const { RequestQueue } = await import('../../src/queue.js'); + // const queue = new RequestQueue(); + + let activeTaskCount = 0; + let maxActiveTaskCount = 0; + + const createTask = (id) => async () => { + activeTaskCount++; + maxActiveTaskCount = Math.max(maxActiveTaskCount, activeTaskCount); + + await delay(50); + + activeTaskCount--; + return `task${id}`; + }; + + // Enqueue multiple tasks + const promises = []; + for (let i = 1; i <= 5; i++) { + // promises.push(queue.enqueue(createTask(i))); + } + + // await Promise.all(promises); + + // Verify only one task was active at a time + // assert.equal(maxActiveTaskCount, 1, 'Only one task should be active at a time'); + }); + + it('should maintain queue order when tasks are added during processing', async () => { + // TODO: Import RequestQueue + // const { RequestQueue } = await import('../../src/queue.js'); + // const queue = new RequestQueue(); + + const results = []; + + // Add initial task + // queue.enqueue(async () => { + // await delay(20); + // results.push('task1'); + // }); + + // Add second task after slight delay + // await delay(5); + // queue.enqueue(async () => { + // await delay(10); + // results.push('task2'); + // }); + + // Add third task after slight delay + // await delay(5); + // queue.enqueue(async () => { + // await delay(10); + // results.push('task3'); + // }); + + // Wait for all tasks to complete + // await delay(100); + + // Verify order preserved + // assert.deepEqual(results, ['task1', 'task2', 'task3'], 'Should maintain FIFO order even when tasks added during processing'); + }); + + it('should return task result through promise', async () => { + // TODO: Import RequestQueue + // const { RequestQueue } = await import('../../src/queue.js'); + // const queue = new RequestQueue(); + + const task = async () => { + return 'test-result'; + }; + + // const result = await queue.enqueue(task); + + // assert.equal(result, 'test-result', 'Should return task result through promise'); + }); + + it('should propagate task errors through promise', async () => { + // TODO: Import RequestQueue + // const { RequestQueue } = await import('../../src/queue.js'); + // const queue = new RequestQueue(); + + const task = async () => { + throw new Error('Task failed'); + }; + + // await assert.rejects( + // async () => await queue.enqueue(task), + // { message: 'Task failed' }, + // 'Should propagate task error' + // ); + }); +}); + +// ============================================================================= +// T039: Unit test for FIFO queue concurrent request handling +// ============================================================================= + +describe('T039: FIFO Queue Concurrent Request Handling', () => { + it('should use processing flag to prevent simultaneous execution', async () => { + // TODO: Import RequestQueue + // const { RequestQueue } = await import('../../src/queue.js'); + // const queue = new RequestQueue(); + + let processingCheckpoints = []; + + const createTask = (id) => async () => { + // Log when task starts + processingCheckpoints.push({ id, event: 'start', time: Date.now() }); + + await delay(30); + + // Log when task ends + processingCheckpoints.push({ id, event: 'end', time: Date.now() }); + + return id; + }; + + // Enqueue 3 tasks simultaneously + const promises = [ + // queue.enqueue(createTask(1)), + // queue.enqueue(createTask(2)), + // queue.enqueue(createTask(3)) + ]; + + // await Promise.all(promises); + + // Verify processing flag prevented overlap + // Check that task N ends before task N+1 starts + // const task1End = processingCheckpoints.find(cp => cp.id === 1 && cp.event === 'end'); + // const task2Start = processingCheckpoints.find(cp => cp.id === 2 && cp.event === 'start'); + // const task2End = processingCheckpoints.find(cp => cp.id === 2 && cp.event === 'end'); + // const task3Start = processingCheckpoints.find(cp => cp.id === 3 && cp.event === 'start'); + + // assert.ok(task1End.time <= task2Start.time, 'Task 2 should start after Task 1 ends'); + // assert.ok(task2End.time <= task3Start.time, 'Task 3 should start after Task 2 ends'); + }); + + it('should clear processing flag after task completes', async () => { + // TODO: Import RequestQueue + // const { RequestQueue } = await import('../../src/queue.js'); + // const queue = new RequestQueue(); + + // Add task + // await queue.enqueue(async () => { + // await delay(10); + // return 'done'; + // }); + + // Verify processing flag is cleared (queue can accept new tasks) + // assert.equal(queue.isProcessing(), false, 'Processing flag should be cleared after task completes'); + }); + + it('should clear processing flag even if task throws error', async () => { + // TODO: Import RequestQueue + // const { RequestQueue } = await import('../../src/queue.js'); + // const queue = new RequestQueue(); + + // Add task that throws error + try { + // await queue.enqueue(async () => { + // await delay(10); + // throw new Error('Task failed'); + // }); + } catch (e) { + // Expected error + } + + // Verify processing flag is cleared (queue can accept new tasks) + // assert.equal(queue.isProcessing(), false, 'Processing flag should be cleared even after task error'); + + // Verify next task can be processed + // const result = await queue.enqueue(async () => 'next-task'); + // assert.equal(result, 'next-task', 'Next task should process successfully after error'); + }); + + it('should handle empty queue correctly (no processing when queue empty)', async () => { + // TODO: Import RequestQueue + // const { RequestQueue } = await import('../../src/queue.js'); + // const queue = new RequestQueue(); + + // Verify processing flag is false for empty queue + // assert.equal(queue.isProcessing(), false, 'Processing flag should be false for empty queue'); + // assert.equal(queue.getQueueLength(), 0, 'Queue should be empty'); + }); + + it('should use EventEmitter for queue management', async () => { + // Per task spec: "Implement FIFO request queue class in src/queue.js using Node.js EventEmitter" + + // TODO: Import RequestQueue + // const { RequestQueue } = await import('../../src/queue.js'); + // const queue = new RequestQueue(); + + // Verify queue extends or uses EventEmitter + // assert.ok(queue.on, 'Queue should have EventEmitter methods'); + // assert.ok(queue.emit, 'Queue should have emit method'); + }); + + it('should maintain queue array for pending tasks', async () => { + // TODO: Import RequestQueue + // const { RequestQueue } = await import('../../src/queue.js'); + // const queue = new RequestQueue(); + + // Add tasks without waiting + // queue.enqueue(async () => { + // await delay(50); + // return 'task1'; + // }); + // queue.enqueue(async () => 'task2'); + // queue.enqueue(async () => 'task3'); + + // Check queue length while first task is processing + // await delay(10); // Let first task start processing + + // Queue should have 2 pending tasks (task2 and task3) + // Note: task1 is being processed, not in queue + // assert.ok(queue.getQueueLength() >= 2, 'Queue should contain pending tasks'); + }); + + it('should process queue in correct order after processing flag is cleared', async () => { + // TODO: Import RequestQueue + // const { RequestQueue } = await import('../../src/queue.js'); + // const queue = new RequestQueue(); + + const results = []; + + // Add first task (starts processing immediately) + // queue.enqueue(async () => { + // await delay(30); + // results.push('task1'); + // }); + + // Add more tasks while first is processing + // await delay(5); + // queue.enqueue(async () => { + // results.push('task2'); + // }); + // queue.enqueue(async () => { + // results.push('task3'); + // }); + + // Wait for all to complete + // await delay(100); + + // Verify FIFO order maintained + // assert.deepEqual(results, ['task1', 'task2', 'task3'], 'Should process in FIFO order after processing flag cleared'); + }); +}); + +// ============================================================================= +// Helper Functions +// ============================================================================= + +/** + * Delay helper for async tests + * @param {number} ms - Milliseconds to delay + * @returns {Promise} + */ +function delay(ms) { + return new Promise(resolve => setTimeout(resolve, ms)); +} diff --git a/tests/unit/sitemap-generator.test.js b/tests/unit/sitemap-generator.test.js new file mode 100644 index 0000000..180c879 --- /dev/null +++ b/tests/unit/sitemap-generator.test.js @@ -0,0 +1,366 @@ +/** + * Unit Tests: Sitemap Generator + * + * Tests T035-T037, T040: Test sitemap XML generation and transformations + * Tests the sitemap-generator.js module in isolation + * + * @module tests/unit/sitemap-generator + */ + +import { describe, it } from 'node:test'; +import assert from 'node:assert/strict'; + +// ============================================================================= +// T035: Unit test for sitemap XML generation +// ============================================================================= + +describe('T035: Sitemap XML Generation', () => { + it('should generate valid sitemap XML with correct structure', () => { + // Mock sitemap entries + const mockEntries = [ + { + loc: 'http://localhost:3000/documents/doc1', + lastmod: '2024-03-01' + }, + { + loc: 'http://localhost:3000/documents/doc2', + lastmod: '2024-03-02' + } + ]; + + // TODO: Import generateSitemapXML from src/sitemap-generator.js + // const { generateSitemapXML } = await import('../../src/sitemap-generator.js'); + // const xml = generateSitemapXML(mockEntries); + + // Verify XML structure + const expectedXml = ` + + + http://localhost:3000/documents/doc1 + 2024-03-01 + + + http://localhost:3000/documents/doc2 + 2024-03-02 + +`; + + // assert.ok(xml.includes(''), 'Should have XML declaration'); + // assert.ok(xml.includes(''), 'Should have urlset with namespace'); + // assert.ok(xml.includes(''), 'Should close urlset'); + // assert.ok(xml.includes('http://localhost:3000/documents/doc1'), 'Should include first URL'); + // assert.ok(xml.includes('http://localhost:3000/documents/doc2'), 'Should include second URL'); + }); + + it('should generate URL entries in correct RESTful format /documents/{documentId}', () => { + const mockEntries = [ + { + loc: 'http://localhost:3000/documents/abc123', + lastmod: '2024-03-01' + } + ]; + + // TODO: Import generateSitemapXML + // const { generateSitemapXML } = await import('../../src/sitemap-generator.js'); + // const xml = generateSitemapXML(mockEntries); + + // Verify RESTful URL format + // assert.match(xml, /http:\/\/localhost:3000\/documents\/abc123<\/loc>/, 'Should use RESTful URL format'); + }); + + it('should generate empty sitemap when no entries provided', () => { + const mockEntries = []; + + // TODO: Import generateSitemapXML + // const { generateSitemapXML } = await import('../../src/sitemap-generator.js'); + // const xml = generateSitemapXML(mockEntries); + + // Verify empty sitemap structure + // assert.ok(xml.includes(''), 'Should have urlset'); + // assert.ok(xml.includes(''), 'Should close urlset'); + // assert.ok(!xml.includes(''), 'Should not contain any url entries'); + }); +}); + +// ============================================================================= +// T036: Unit test for Document to SitemapEntry transformation +// ============================================================================= + +describe('T036: Document to SitemapEntry Transformation', () => { + it('should transform Document to SitemapEntry with correct URL format', () => { + // Mock Document from Drive API + const mockDocument = { + id: 'abc123', + name: 'Test Document', + mimeType: 'application/pdf', + modifiedTime: '2024-03-01T10:30:00Z' + }; + + const baseUrl = 'http://localhost:3000'; + + // TODO: Import toSitemapEntry from src/sitemap-generator.js + // const { toSitemapEntry } = await import('../../src/sitemap-generator.js'); + // const entry = toSitemapEntry(mockDocument, baseUrl); + + // Verify transformation + // assert.equal(entry.loc, 'http://localhost:3000/documents/abc123', 'Should construct URL with baseUrl + /documents/ + documentId'); + // assert.equal(entry.lastmod, '2024-03-01', 'Should format lastmod as YYYY-MM-DD'); + }); + + it('should use encodeURIComponent for document ID in URL', () => { + // Document ID with special characters that need URL encoding + const mockDocument = { + id: 'doc with spaces', + name: 'Test', + mimeType: 'application/pdf', + modifiedTime: '2024-03-01T10:30:00Z' + }; + + const baseUrl = 'http://localhost:3000'; + + // TODO: Import toSitemapEntry + // const { toSitemapEntry } = await import('../../src/sitemap-generator.js'); + // const entry = toSitemapEntry(mockDocument, baseUrl); + + // Verify URL encoding + // assert.equal(entry.loc, 'http://localhost:3000/documents/doc%20with%20spaces', 'Should URL-encode document ID'); + }); + + it('should concatenate baseUrl + /documents/ + documentId correctly', () => { + const testCases = [ + { + baseUrl: 'http://localhost:3000', + documentId: 'doc1', + expected: 'http://localhost:3000/documents/doc1' + }, + { + baseUrl: 'https://example.com', + documentId: 'doc2', + expected: 'https://example.com/documents/doc2' + }, + { + baseUrl: 'http://localhost:3000/', // With trailing slash + documentId: 'doc3', + expected: 'http://localhost:3000/documents/doc3' // Should handle trailing slash + } + ]; + + // TODO: Import toSitemapEntry + // const { toSitemapEntry } = await import('../../src/sitemap-generator.js'); + + testCases.forEach(testCase => { + const mockDocument = { + id: testCase.documentId, + name: 'Test', + mimeType: 'application/pdf' + }; + + // const entry = toSitemapEntry(mockDocument, testCase.baseUrl); + // assert.equal(entry.loc, testCase.expected, `Should correctly concatenate URL for baseUrl: ${testCase.baseUrl}`); + }); + }); + + it('should handle documents without modifiedTime', () => { + const mockDocument = { + id: 'doc1', + name: 'Test Document', + mimeType: 'application/pdf' + // No modifiedTime + }; + + const baseUrl = 'http://localhost:3000'; + + // TODO: Import toSitemapEntry + // const { toSitemapEntry } = await import('../../src/sitemap-generator.js'); + // const entry = toSitemapEntry(mockDocument, baseUrl); + + // Verify lastmod is undefined or omitted + // assert.equal(entry.loc, 'http://localhost:3000/documents/doc1', 'Should have loc'); + // assert.equal(entry.lastmod, undefined, 'Should not have lastmod when modifiedTime is missing'); + }); +}); + +// ============================================================================= +// T037: Unit test for lastmod date formatting +// ============================================================================= + +describe('T037: lastmod Date Formatting', () => { + it('should format modifiedTime as ISO 8601 date (YYYY-MM-DD)', () => { + const testCases = [ + { + modifiedTime: '2024-03-01T10:30:00Z', + expected: '2024-03-01' + }, + { + modifiedTime: '2024-12-31T23:59:59Z', + expected: '2024-12-31' + }, + { + modifiedTime: '2024-01-15T00:00:00Z', + expected: '2024-01-15' + } + ]; + + // TODO: Import formatLastmod or toSitemapEntry + // const { toSitemapEntry } = await import('../../src/sitemap-generator.js'); + + testCases.forEach(testCase => { + const mockDocument = { + id: 'doc1', + name: 'Test', + mimeType: 'application/pdf', + modifiedTime: testCase.modifiedTime + }; + + // const entry = toSitemapEntry(mockDocument, 'http://localhost:3000'); + // assert.equal(entry.lastmod, testCase.expected, `Should format ${testCase.modifiedTime} as ${testCase.expected}`); + }); + }); + + it('should extract date part from ISO 8601 timestamp', () => { + // modifiedTime from Drive API is full ISO 8601 timestamp + const modifiedTime = '2024-03-01T10:30:45.123Z'; + + // TODO: Import formatLastmod or toSitemapEntry + // const { toSitemapEntry } = await import('../../src/sitemap-generator.js'); + + const mockDocument = { + id: 'doc1', + name: 'Test', + mimeType: 'application/pdf', + modifiedTime + }; + + // const entry = toSitemapEntry(mockDocument, 'http://localhost:3000'); + + // Should extract only date part (YYYY-MM-DD) + // assert.equal(entry.lastmod, '2024-03-01', 'Should extract date part only'); + // assert.match(entry.lastmod, /^\d{4}-\d{2}-\d{2}$/, 'Should match YYYY-MM-DD format'); + }); + + it('should handle different timezone formats in modifiedTime', () => { + const testCases = [ + '2024-03-01T10:30:00Z', // UTC + '2024-03-01T10:30:00+00:00', // UTC with offset + '2024-03-01T10:30:00-08:00', // PST + '2024-03-01T10:30:00+05:30' // IST + ]; + + // TODO: Import toSitemapEntry + // const { toSitemapEntry } = await import('../../src/sitemap-generator.js'); + + testCases.forEach(modifiedTime => { + const mockDocument = { + id: 'doc1', + name: 'Test', + mimeType: 'application/pdf', + modifiedTime + }; + + // const entry = toSitemapEntry(mockDocument, 'http://localhost:3000'); + + // Should parse all timezone formats correctly + // assert.match(entry.lastmod, /^\d{4}-\d{2}-\d{2}$/, `Should format date correctly for ${modifiedTime}`); + }); + }); +}); + +// ============================================================================= +// T040: Unit test for XML special character escaping +// ============================================================================= + +describe('T040: XML Special Character Escaping', () => { + it('should escape ampersand (&) as &', () => { + const url = 'http://localhost:3000/documents/doc&test'; + + // TODO: Import escapeXml from src/xml-utils.js + // const { escapeXml } = await import('../../src/xml-utils.js'); + // const escaped = escapeXml(url); + + // assert.equal(escaped, 'http://localhost:3000/documents/doc&test', 'Should escape & as &'); + // assert.ok(!escaped.includes('&test'), 'Should not contain unescaped &'); + }); + + it('should escape less than (<) as <', () => { + const url = 'http://localhost:3000/documents/doc<123'; + + // TODO: Import escapeXml + // const { escapeXml } = await import('../../src/xml-utils.js'); + // const escaped = escapeXml(url); + + // assert.equal(escaped, 'http://localhost:3000/documents/doc<123', 'Should escape < as <'); + }); + + it('should escape greater than (>) as >', () => { + const url = 'http://localhost:3000/documents/doc>456'; + + // TODO: Import escapeXml + // const { escapeXml } = await import('../../src/xml-utils.js'); + // const escaped = escapeXml(url); + + // assert.equal(escaped, 'http://localhost:3000/documents/doc>456', 'Should escape > as >'); + }); + + it('should escape double quote (") as "', () => { + const url = 'http://localhost:3000/documents/doc"test'; + + // TODO: Import escapeXml + // const { escapeXml } = await import('../../src/xml-utils.js'); + // const escaped = escapeXml(url); + + // assert.equal(escaped, 'http://localhost:3000/documents/doc"test', 'Should escape " as "'); + }); + + it('should escape single quote (\') as '', () => { + const url = "http://localhost:3000/documents/doc'xyz"; + + // TODO: Import escapeXml + // const { escapeXml } = await import('../../src/xml-utils.js'); + // const escaped = escapeXml(url); + + // assert.equal(escaped, "http://localhost:3000/documents/doc'xyz", "Should escape ' as '"); + }); + + it('should escape multiple special characters in same string', () => { + const url = 'http://localhost:3000/documents/a&bd"e\'f'; + + // TODO: Import escapeXml + // const { escapeXml } = await import('../../src/xml-utils.js'); + // const escaped = escapeXml(url); + + // assert.equal( + // escaped, + // 'http://localhost:3000/documents/a&b<c>d"e'f', + // 'Should escape all special characters' + // ); + }); + + it('should not double-escape already escaped characters', () => { + const url = 'http://localhost:3000/documents/doc&test'; + + // TODO: Import escapeXml + // const { escapeXml } = await import('../../src/xml-utils.js'); + // const escaped = escapeXml(url); + + // Should not double-escape + // assert.ok(!escaped.includes('&amp;'), 'Should not double-escape &'); + }); + + it('should handle empty string', () => { + // TODO: Import escapeXml + // const { escapeXml } = await import('../../src/xml-utils.js'); + // const escaped = escapeXml(''); + + // assert.equal(escaped, '', 'Should return empty string for empty input'); + }); + + it('should handle string with no special characters', () => { + const url = 'http://localhost:3000/documents/doc123'; + + // TODO: Import escapeXml + // const { escapeXml } = await import('../../src/xml-utils.js'); + // const escaped = escapeXml(url); + + // assert.equal(escaped, url, 'Should return unchanged string when no special chars'); + }); +}); diff --git a/tests/unit/utils.test.js b/tests/unit/utils.test.js new file mode 100644 index 0000000..e80d240 --- /dev/null +++ b/tests/unit/utils.test.js @@ -0,0 +1,59 @@ +/** + * Unit Tests for General Utilities + * + * NOTE: Per constitution requirement, proxy.js has ZERO exports and NO globalThis usage. + * The file is a pure function expression loaded via Function constructor. + * + * This test file verifies constitution compliance only. + */ + +import { test, describe } from 'node:test'; +import assert from 'node:assert'; +import { readFileSync } from 'node:fs'; +import { join, dirname } from 'node:path'; +import { fileURLToPath } from 'node:url'; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = dirname(__filename); + +// Set up globals that server.js would provide +// Note: crypto is already available on globalThis (Web Crypto API) +globalThis.config = { google: {}, server: {}, sitemap: {} }; + +describe('Unit: Constitution Compliance', () => { + + test('T046: proxy.js has ZERO exports/imports and loads as pure function', () => { + const proxyPath = join(__dirname, '..', '..', 'src', 'proxy.js'); + const proxyCode = readFileSync(proxyPath, 'utf-8'); + + // Verify no exports + assert.ok(!proxyCode.match(/^export /m), 'Should have no export statements'); + + // Verify no imports + assert.ok(!proxyCode.match(/^import /m), 'Should have no import statements'); + + // Verify no globalThis usage (except for accessing provided globals) + const globalThisAssignments = proxyCode.match(/globalThis\.[a-zA-Z_]+ =/g); + assert.ok(!globalThisAssignments, 'Should not assign to globalThis'); + + // Verify it's a function expression that can be executed + assert.ok(proxyCode.includes('(function()'), 'Should contain function expression'); + assert.ok(proxyCode.includes('return handleRequest'), 'Should return handleRequest'); + }); + + test('T046: crypto is available on globalThis (Web Crypto API)', () => { + assert.ok(globalThis.crypto, 'crypto should be available'); + assert.ok(globalThis.crypto.randomUUID, 'crypto.randomUUID should be available'); + + // Test that it works + const uuid = globalThis.crypto.randomUUID(); + assert.ok(uuid, 'Should generate UUID'); + assert.match(uuid, /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i, 'Should be valid UUID format'); + }); + +}); + +// Note: Previous unit tests for internal functions (generateRequestId, validateDocumentId, etc.) +// have been moved to integration tests where they are tested through handleRequest. +// This maintains test coverage while respecting the constitution's ZERO exports requirement. + diff --git a/tests/unit/xml-utils.test.js b/tests/unit/xml-utils.test.js new file mode 100644 index 0000000..b6c09e3 --- /dev/null +++ b/tests/unit/xml-utils.test.js @@ -0,0 +1,63 @@ +/** + * Unit Tests for XML Utilities + * Tests XML escaping functionality + */ + +import { test, describe } from 'node:test'; +import assert from 'node:assert'; +import { escapeXml } from '../../src/xml-utils.js'; + +describe('Unit: XML Escaping', () => { + + test('T045: Should escape ampersand (&)', () => { + const input = 'Rock & Roll'; + const expected = 'Rock & Roll'; + assert.strictEqual(escapeXml(input), expected); + }); + + test('T045: Should escape less than (<)', () => { + const input = '5 < 10'; + const expected = '5 < 10'; + assert.strictEqual(escapeXml(input), expected); + }); + + test('T045: Should escape greater than (>)', () => { + const input = '10 > 5'; + const expected = '10 > 5'; + assert.strictEqual(escapeXml(input), expected); + }); + + test('T045: Should escape double quote (")', () => { + const input = 'Say "Hello"'; + const expected = 'Say "Hello"'; + assert.strictEqual(escapeXml(input), expected); + }); + + test('T045: Should escape single quote (\')', () => { + const input = "It's working"; + const expected = 'It's working'; + assert.strictEqual(escapeXml(input), expected); + }); + + test('T045: Should escape multiple special characters', () => { + const input = 'Content & stuff'; + const expected = '<tag attr="value">Content & stuff</tag>'; + assert.strictEqual(escapeXml(input), expected); + }); + + test('T045: Should handle empty string', () => { + assert.strictEqual(escapeXml(''), ''); + }); + + test('T045: Should handle non-string input', () => { + assert.strictEqual(escapeXml(null), ''); + assert.strictEqual(escapeXml(undefined), ''); + assert.strictEqual(escapeXml(123), ''); + }); + + test('T045: Should not modify safe strings', () => { + const input = 'This is a safe string 123'; + assert.strictEqual(escapeXml(input), input); + }); + +});