Extract helper functions from proxy.js into helpers.js module

- Create src/globalVariables/helpers.js (315 lines)
- Extract 11 pure utility functions from proxy.js
- Reduce proxy.js from 752 to 493 lines (35% reduction)
- Load helpers via vm.Script with same isolation pattern
- Update constitution to document helper extraction pattern

Extracted functions:
- generateRequestId, validateDocumentId, validateDocumentCount
- escapeXml, mapDriveErrorToHttp
- toSitemapEntry, transformDocumentsToSitemapEntries
- generateSitemapXML, generateSitemap
- parseRoute, DocumentCountExceededError class

Architecture:
- helpers.js loaded via vm.Script (IIFE returning object)
- Injected as 'helpers' global object into VM context
- proxy.js accesses via helpers.functionName() pattern
- Maintains zero-import isolation pattern

Constitution version: 1.16.0 → 1.17.0

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
2026-03-07 10:52:49 -06:00
parent f6710203c7
commit b263311a43
3 changed files with 434 additions and 288 deletions

View File

@@ -0,0 +1,315 @@
/**
* Helper Functions Module for Proxy Script
*
* This module contains pure utility/helper functions extracted from proxy.js
* to improve code organization while maintaining vm.Script isolation pattern.
*
* ARCHITECTURE:
* - Loaded by server.js using vm.Script (same as proxy.js)
* - Returns a single object containing all helper functions
* - Injected into globalVariableContext for access by proxy.js
* - NO IMPORTS - All dependencies provided via VM context
*
* Globals expected (provided by server.js):
* - crypto: Web Crypto API (for randomUUID())
* - console: Custom logger
*
* @returns {Object} Helpers object with all utility functions
*/
// Wrap in IIFE that returns helpers object
(function createHelpers() {
/**
* Custom error for document count exceeding limit
*/
class DocumentCountExceededError extends Error {
constructor(count, limit) {
super(`Document count ${count} exceeds limit of ${limit}`);
this.name = "DocumentCountExceededError";
this.count = count;
this.limit = limit;
this.statusCode = 413;
}
}
// =============================================================================
// Utility Functions
// =============================================================================
/**
* Generate a unique request ID for tracing
* Uses UUID v4 for uniqueness
*
* @returns {string} Request ID in format: req_<uuid>
*/
function generateRequestId() {
return `req_${crypto.randomUUID()}`;
}
/**
* Validate document ID format
* Google Drive IDs are alphanumeric with hyphens and underscores
*
* @param {string} id - Document ID to validate
* @returns {boolean} True if valid
*/
function validateDocumentId(id) {
if (!id || typeof id !== "string") {
return false;
}
// Google Drive IDs are typically 8-128 characters
// Characters: a-z, A-Z, 0-9, -, _
const pattern = /^[a-zA-Z0-9_-]{8,128}$/;
return pattern.test(id);
}
/**
* Validate document count against limit
*
* @param {number} count - Document count
* @param {number} limit - Maximum allowed (default: 50000)
* @throws {DocumentCountExceededError} If count > limit
*/
function validateDocumentCount(count, limit = 50000) {
if (count > limit) {
throw new DocumentCountExceededError(count, limit);
}
}
// =============================================================================
// XML Utilities
// =============================================================================
/**
* Escape special XML characters
* Prevents XML injection and ensures valid XML output
*
* @param {string} str - String to escape
* @returns {string} Escaped string safe for XML
*/
function escapeXml(str) {
if (!str) return "";
return str
.replace(/&/g, "&amp;")
.replace(/</g, "&lt;")
.replace(/>/g, "&gt;")
.replace(/"/g, "&quot;")
.replace(/'/g, "&apos;");
}
// =============================================================================
// Error Mapping
// =============================================================================
/**
* Map Drive API error to HTTP status code and retry info
*
* Per specification:
* - 429: Rate limit - include Retry-After header
* - 503: Service unavailable - NO RETRY (fail immediately)
* - 401: Authentication failed
* - 500: Other errors
*
* @param {Error} error - Drive API error
* @returns {Object} { statusCode, retryAfter? }
*/
function mapDriveErrorToHttp(error) {
// Handle DocumentCountExceededError
if (error instanceof DocumentCountExceededError) {
return { statusCode: 413 };
}
// Extract status code from Drive API error
const statusCode = error.response?.status || error.code || 500;
// Handle rate limiting (429)
if (statusCode === 429) {
// Extract Retry-After from response headers if present
const retryAfter = error.response?.headers?.["retry-after"];
const retryAfterSeconds = retryAfter ? parseInt(retryAfter, 10) : 60;
return {
statusCode: 429,
retryAfter: retryAfterSeconds,
};
}
// Handle service unavailable (503) - NO RETRY per spec
if (statusCode === 503) {
return { statusCode: 503 };
}
// Handle authentication errors
if (statusCode === 401 || statusCode === 403) {
return { statusCode: statusCode };
}
// All other errors map to 500
return { statusCode: 500 };
}
// =============================================================================
// Sitemap Functions
// =============================================================================
/**
* Transform Drive document to sitemap entry
*
* Creates RESTful URL in format: {baseUrl}/documents/{documentId}
* Per specification clarification #2.
*
* @param {Object} document - Drive API document
* @param {string} document.id - Document ID
* @param {string} document.modifiedTime - ISO 8601 timestamp
* @param {string} baseUrl - Base URL for the adapter
* @returns {Object} Sitemap entry { loc, lastmod }
*/
function toSitemapEntry(document, baseUrl) {
if (!document || !document.id) {
console.error("Invalid document for sitemap entry", { document });
return null;
}
// RESTful URL format: /documents/{documentId}
const loc = `${baseUrl}/documents/${encodeURIComponent(document.id)}`;
// Format lastmod as ISO 8601 date (YYYY-MM-DD)
let lastmod;
if (document.modifiedTime) {
try {
const date = new Date(document.modifiedTime);
lastmod = date.toISOString().split("T")[0]; // Extract YYYY-MM-DD
} catch (error) {
console.error("Invalid modifiedTime for document", {
documentId: document.id,
modifiedTime: document.modifiedTime,
});
lastmod = new Date().toISOString().split("T")[0]; // Fallback to today
}
} else {
lastmod = new Date().toISOString().split("T")[0]; // Fallback to today
}
return { loc, lastmod };
}
/**
* Transform array of Drive documents to sitemap entries
*
* @param {Array<Object>} documents - Array of Drive API documents
* @param {string} baseUrl - Base URL for the adapter
* @returns {Array<Object>} Array of sitemap entries
*/
function transformDocumentsToSitemapEntries(documents, baseUrl) {
if (!Array.isArray(documents)) {
console.error("Documents must be an array", { documents });
return [];
}
return documents
.map((doc) => toSitemapEntry(doc, baseUrl))
.filter((entry) => entry !== null);
}
/**
* Generate XML sitemap from sitemap entries
*
* Handles empty sitemap (0 documents) case - returns valid XML with empty urlset.
*
* @param {Array<Object>} sitemapEntries - Array of { loc, lastmod } objects
* @returns {string} Complete XML sitemap string
*/
function generateSitemapXML(sitemapEntries) {
let xml = '<?xml version="1.0" encoding="UTF-8"?>\n';
xml += '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n';
// Handle empty sitemap - valid XML with no <url> elements
if (!sitemapEntries || sitemapEntries.length === 0) {
xml += "</urlset>";
return xml;
}
for (const entry of sitemapEntries) {
xml += " <url>\n";
xml += ` <loc>${escapeXml(entry.loc)}</loc>\n`;
xml += ` <lastmod>${escapeXml(entry.lastmod)}</lastmod>\n`;
xml += " </url>\n";
}
xml += "</urlset>";
return xml;
}
/**
* Main sitemap generation function
*
* Combines document transformation and XML generation.
*
* @param {Array<Object>} documents - Array of Drive API documents
* @param {string} baseUrl - Base URL for the adapter
* @returns {string} Complete XML sitemap
*/
function generateSitemap(documents, baseUrl) {
const entries = transformDocumentsToSitemapEntries(documents, baseUrl);
return generateSitemapXML(entries);
}
// =============================================================================
// Route Parsing
// =============================================================================
/**
* Parse route from request
* @param {string} method - HTTP method
* @param {string} url - Request URL
* @returns {Object} Route info or error
*/
function parseRoute(method, url) {
if (method !== "GET") {
return { route: null, error: "Method not allowed", statusCode: 405 };
}
const urlObj = new URL(url, "http://localhost");
const path = urlObj.pathname;
// Match any path containing 'sitemap.xml'
if (path.includes("sitemap.xml")) {
return { route: "sitemap" };
}
// All other paths return 404
return { route: null, error: "Not found", statusCode: 404 };
}
// =============================================================================
// Return helpers object with all functions
// =============================================================================
return {
// Error classes
DocumentCountExceededError,
// Utilities
generateRequestId,
validateDocumentId,
validateDocumentCount,
// XML
escapeXml,
// Error mapping
mapDriveErrorToHttp,
// Sitemap
toSitemapEntry,
transformDocumentsToSitemapEntries,
generateSitemapXML,
generateSitemap,
// Routing
parseRoute,
};
})();

View File

@@ -16,6 +16,7 @@
* - uuidv4: UUID generator
* - jwt: JSON Web Token library
* - xmlBuilder: XML document builder
* - helpers: Helper functions module (loaded from globalVariables/helpers.js)
* - google_drive_settings: Consolidated settings (from global/google_drive_settings.json)
* - serviceAccount: Service account credentials
* - scopes: OAuth2 scopes array
@@ -24,12 +25,9 @@
*
* Structure:
* Section 1: Authentication (Service Account JWT)
* Section 2: Utility Functions
* Section 3: XML Utilities
* Section 4: Request Queue (FIFO)
* Section 5: Drive API Client
* Section 6: Sitemap Generation
* Section 7: Request Handling & Routing
* Section 2: Request Queue (FIFO)
* Section 3: Drive API Client
* Section 4: Request Handling & Routing
*
* @module proxy
*/
@@ -173,61 +171,7 @@ function clearAuthCache() {
}
// =============================================================================
// Section 2: Utility Functions
// =============================================================================
/**
* Generate a unique request ID for tracing
* Uses UUID v4 for uniqueness
*
* @returns {string} Request ID in format: req_<uuid>
*/
function generateRequestId() {
return `req_${crypto.randomUUID()}`;
}
/**
* Validate document ID format
* Google Drive IDs are alphanumeric with hyphens and underscores
*
* @param {string} id - Document ID to validate
* @returns {boolean} True if valid
*/
function validateDocumentId(id) {
if (!id || typeof id !== "string") {
return false;
}
// Google Drive IDs are typically 8-128 characters
// Characters: a-z, A-Z, 0-9, -, _
const pattern = /^[a-zA-Z0-9_-]{8,128}$/;
return pattern.test(id);
}
// =============================================================================
// Section 3: XML Utilities
// =============================================================================
/**
* Escape special XML characters
* Prevents XML injection and ensures valid XML output
*
* @param {string} str - String to escape
* @returns {string} Escaped string safe for XML
*/
function escapeXml(str) {
if (!str) return "";
return str
.replace(/&/g, "&amp;")
.replace(/</g, "&lt;")
.replace(/>/g, "&gt;")
.replace(/"/g, "&quot;")
.replace(/'/g, "&apos;");
}
// =============================================================================
// Section 4: Request Queue (FIFO)
// Section 2: Request Queue (FIFO)
// =============================================================================
/**
@@ -314,22 +258,9 @@ class RequestQueue {
const requestQueue = new RequestQueue();
// =============================================================================
// Section 5: Drive API Client
// Section 3: Drive API Client
// =============================================================================
/**
* Custom error for document count exceeding limit
*/
class DocumentCountExceededError extends Error {
constructor(count, limit) {
super(`Document count ${count} exceeds limit of ${limit}`);
this.name = "DocumentCountExceededError";
this.count = count;
this.limit = limit;
this.statusCode = 413;
}
}
/**
* Query documents from Google Drive with pagination
*
@@ -405,7 +336,7 @@ async function queryDocuments(options = {}) {
count: allFiles.length,
limit: maxDocuments,
});
throw new DocumentCountExceededError(allFiles.length, maxDocuments);
throw new helpers.DocumentCountExceededError(allFiles.length, maxDocuments);
}
pageToken = response.data.nextPageToken;
@@ -421,7 +352,7 @@ async function queryDocuments(options = {}) {
return allFiles;
} catch (error) {
// Re-throw DocumentCountExceededError as-is
if (error instanceof DocumentCountExceededError) {
if (error instanceof helpers.DocumentCountExceededError) {
throw error;
}
@@ -436,200 +367,10 @@ async function queryDocuments(options = {}) {
}
}
/**
* Map Drive API error to HTTP status code and retry info
*
* Per specification:
* - 429: Rate limit - include Retry-After header
* - 503: Service unavailable - NO RETRY (fail immediately)
* - 401: Authentication failed
* - 500: Other errors
*
* @param {Error} error - Drive API error
* @returns {Object} { statusCode, retryAfter? }
*/
function mapDriveErrorToHttp(error) {
// Handle DocumentCountExceededError
if (error instanceof DocumentCountExceededError) {
return { statusCode: 413 };
}
// Extract status code from Drive API error
const statusCode = error.response?.status || error.code || 500;
// Handle rate limiting (429)
if (statusCode === 429) {
// Extract Retry-After from response headers if present
const retryAfter = error.response?.headers?.["retry-after"];
const retryAfterSeconds = retryAfter ? parseInt(retryAfter, 10) : 60;
return {
statusCode: 429,
retryAfter: retryAfterSeconds,
};
}
// Handle service unavailable (503) - NO RETRY per spec
if (statusCode === 503) {
return { statusCode: 503 };
}
// Handle authentication errors
if (statusCode === 401 || statusCode === 403) {
return { statusCode: statusCode };
}
// All other errors map to 500
return { statusCode: 500 };
}
/**
* Validate document count against limit
*
* @param {number} count - Document count
* @param {number} limit - Maximum allowed (default: 50000)
* @throws {DocumentCountExceededError} If count > limit
*/
function validateDocumentCount(count, limit = 50000) {
if (count > limit) {
throw new DocumentCountExceededError(count, limit);
}
}
// =============================================================================
// Section 6: Sitemap Generation
// Section 4: Request Handling & Routing
// =============================================================================
/**
* Transform Drive document to sitemap entry
*
* Creates RESTful URL in format: {baseUrl}/documents/{documentId}
* Per specification clarification #2.
*
* @param {Object} document - Drive API document
* @param {string} document.id - Document ID
* @param {string} document.modifiedTime - ISO 8601 timestamp
* @param {string} baseUrl - Base URL for the adapter
* @returns {Object} Sitemap entry { loc, lastmod }
*/
function toSitemapEntry(document, baseUrl) {
if (!document || !document.id) {
console.error("Invalid document for sitemap entry", { document });
return null;
}
// RESTful URL format: /documents/{documentId}
const loc = `${baseUrl}/documents/${encodeURIComponent(document.id)}`;
// Format lastmod as ISO 8601 date (YYYY-MM-DD)
let lastmod;
if (document.modifiedTime) {
try {
const date = new Date(document.modifiedTime);
lastmod = date.toISOString().split("T")[0]; // Extract YYYY-MM-DD
} catch (error) {
console.error("Invalid modifiedTime for document", {
documentId: document.id,
modifiedTime: document.modifiedTime,
});
lastmod = new Date().toISOString().split("T")[0]; // Fallback to today
}
} else {
lastmod = new Date().toISOString().split("T")[0]; // Fallback to today
}
return { loc, lastmod };
}
/**
* Transform array of Drive documents to sitemap entries
*
* @param {Array<Object>} documents - Array of Drive API documents
* @param {string} baseUrl - Base URL for the adapter
* @returns {Array<Object>} Array of sitemap entries
*/
function transformDocumentsToSitemapEntries(documents, baseUrl) {
if (!Array.isArray(documents)) {
console.error("Documents must be an array", { documents });
return [];
}
return documents
.map((doc) => toSitemapEntry(doc, baseUrl))
.filter((entry) => entry !== null);
}
/**
* Generate XML sitemap from sitemap entries
*
* Handles empty sitemap (0 documents) case - returns valid XML with empty urlset.
*
* @param {Array<Object>} sitemapEntries - Array of { loc, lastmod } objects
* @returns {string} Complete XML sitemap string
*/
function generateSitemapXML(sitemapEntries) {
let xml = '<?xml version="1.0" encoding="UTF-8"?>\n';
xml += '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n';
// Handle empty sitemap - valid XML with no <url> elements
if (!sitemapEntries || sitemapEntries.length === 0) {
xml += "</urlset>";
return xml;
}
for (const entry of sitemapEntries) {
xml += " <url>\n";
xml += ` <loc>${escapeXml(entry.loc)}</loc>\n`;
xml += ` <lastmod>${escapeXml(entry.lastmod)}</lastmod>\n`;
xml += " </url>\n";
}
xml += "</urlset>";
return xml;
}
/**
* Main sitemap generation function
*
* Combines document transformation and XML generation.
*
* @param {Array<Object>} documents - Array of Drive API documents
* @param {string} baseUrl - Base URL for the adapter
* @returns {string} Complete XML sitemap
*/
function generateSitemap(documents, baseUrl) {
const entries = transformDocumentsToSitemapEntries(documents, baseUrl);
return generateSitemapXML(entries);
}
// =============================================================================
// Section 7: Request Handling & Routing
// =============================================================================
/**
* Parse route from request
* @param {string} method - HTTP method
* @param {string} url - Request URL
* @returns {Object} Route info or error
*/
function parseRoute(method, url) {
if (method !== "GET") {
return { route: null, error: "Method not allowed", statusCode: 405 };
}
const urlObj = new URL(url, "http://localhost");
const path = urlObj.pathname;
// Match any path containing 'sitemap.xml'
if (path.includes("sitemap.xml")) {
return { route: "sitemap" };
}
// All other paths return 404
return { route: null, error: "Not found", statusCode: 404 };
}
/**
* Handle sitemap generation request
* Wrapped in FIFO queue to ensure sequential processing.
@@ -653,7 +394,7 @@ async function handleSitemapRequest(res, requestId) {
});
// Generate sitemap XML with RESTful URLs
const xml = generateSitemap(documents, settings.proxyScriptEndPoint);
const xml = helpers.generateSitemap(documents, settings.proxyScriptEndPoint);
// Send successful response
res.statusCode = 200;
@@ -668,7 +409,7 @@ async function handleSitemapRequest(res, requestId) {
});
} catch (error) {
// Map Drive API error to HTTP status code
const errorResponse = mapDriveErrorToHttp(error);
const errorResponse = helpers.mapDriveErrorToHttp(error);
res.statusCode = errorResponse.statusCode;
@@ -697,7 +438,7 @@ async function handleSitemapRequest(res, requestId) {
* @param {Object} res - HTTP response object
*/
(async () => {
const requestId = generateRequestId();
const requestId = helpers.generateRequestId();
const startTime = Date.now();
console.info("Request received", {
@@ -708,7 +449,7 @@ async function handleSitemapRequest(res, requestId) {
try {
// Parse route
const routeResult = parseRoute(req.method, req.url);
const routeResult = helpers.parseRoute(req.method, req.url);
if (!routeResult.route) {
res.statusCode = routeResult.statusCode;