diff --git a/src/cli/executor.ts b/src/cli/executor.ts index a79b0b1..08abb23 100644 --- a/src/cli/executor.ts +++ b/src/cli/executor.ts @@ -93,8 +93,81 @@ export async function executeCommand(cmd: CLICommand): Promise { } /** - * Execute Obsidian CLI command with vault context + * Execute an Obsidian CLI command with timeout, collecting stdout as a raw Buffer. + * Use this when the output may be binary (e.g. reading non-text vault files). */ +export async function executeCommandBinary(cmd: CLICommand): Promise { + const timeout = cmd.timeout || getCommandTimeout(cmd.command); + + return new Promise((resolve) => { + const child = spawn(cmd.command, cmd.args, { + cwd: cmd.cwd || process.cwd(), + shell: true, + }); + + const stdoutChunks: Buffer[] = []; + let stderr = ''; + let timedOut = false; + + const timeoutId = setTimeout(() => { + timedOut = true; + child.kill('SIGTERM'); + logger.warn('CLI command timed out', { command: cmd.command, timeout }); + }, timeout); + + child.stdout?.on('data', (data: Buffer) => { + stdoutChunks.push(Buffer.isBuffer(data) ? data : Buffer.from(data)); + }); + + child.stderr?.on('data', (data) => { + stderr += data.toString(); + }); + + child.on('close', (code) => { + clearTimeout(timeoutId); + const stdoutBuffer = Buffer.concat(stdoutChunks); + resolve({ + stdout: stdoutBuffer.toString('utf8').trim(), + stdoutBuffer, + stderr: stderr.trim(), + exitCode: code || 0, + timedOut, + }); + }); + + child.on('error', (error) => { + clearTimeout(timeoutId); + logger.error('CLI command spawn error', { error: error.message }); + resolve({ + stdout: '', + stdoutBuffer: Buffer.alloc(0), + stderr: error.message, + exitCode: 1, + timedOut: false, + }); + }); + }); +} + +/** + * Execute Obsidian CLI command with vault context, collecting stdout as a raw Buffer. + */ +export async function executeObsidianCommandBinary( + subcommand: string, + args: string[] = [], + options?: { timeout?: number } +): Promise { + const vaultName = process.env.OBSIDIAN_VAULT; + if (!vaultName) { + throw new Error('OBSIDIAN_VAULT environment variable not set'); + } + const fullArgs = [subcommand, '--vault', vaultName, ...args]; + return executeCommandBinary({ + command: '/Applications/Obsidian.app/Contents/MacOS/obsidian', + args: fullArgs, + timeout: options?.timeout, + }); +} export async function executeObsidianCommand( subcommand: string, args: string[] = [], diff --git a/src/tools/file-operations.ts b/src/tools/file-operations.ts index b0b6c6b..4b49f60 100644 --- a/src/tools/file-operations.ts +++ b/src/tools/file-operations.ts @@ -4,7 +4,7 @@ */ import { ObsidianMCPServer, createToolHandler } from '../server.js'; -import { executeObsidianCommand } from '../cli/executor.js'; +import { executeObsidianCommand, executeObsidianCommandBinary } from '../cli/executor.js'; import { formatForMCP } from '../cli/parser.js'; import { handleCLIResult } from '../utils/error-handler.js'; import { logger } from '../utils/logger.js'; @@ -20,21 +20,20 @@ import { sanitizeParameters } from '../validation/sanitizer.js'; import { formatParam } from '../utils/cli-helpers.js'; /** - * Detect binary content that cannot be meaningfully returned as text. + * Detect binary content from a raw Buffer. * Checks for null bytes (definitive binary marker) or a high ratio of - * non-printable characters (catches ZIP, images, compiled files, etc.). + * non-printable characters in the first 8KB (catches ZIP, images, compiled files, etc.). */ -function isBinaryContent(content: string): boolean { - if (content.length === 0) return false; +function isBinaryContent(buf: Buffer): boolean { + if (buf.length === 0) return false; + const sample = buf.slice(0, 8192); // Null bytes are never present in valid UTF-8 text - if (content.includes('\0')) return true; - // Sample the first 8KB for performance on large files - const sample = content.slice(0, 8192); + if (sample.includes(0x00)) return true; let nonPrintable = 0; for (let i = 0; i < sample.length; i++) { - const code = sample.charCodeAt(i); - // Allow tab (9), newline (10), carriage return (13), and standard printable range - if (code !== 9 && code !== 10 && code !== 13 && (code < 32 || code > 126) && (code < 128)) { + const byte = sample[i]; + // Allow tab (9), newline (10), carriage return (13), and standard printable ASCII + if (byte !== 9 && byte !== 10 && byte !== 13 && (byte < 32 || byte === 127)) { nonPrintable++; } } @@ -152,7 +151,7 @@ export async function registerFileOperationTools(server: ObsidianMCPServer): Pro // T031: Read note tool server.registerTool( 'obsidian_read_note', - 'Read the content of a note from the Obsidian vault. Specify either the note name (file) or full path (path). For large files (e.g. PDFs), use max_chars and offset to read in chunks and avoid exceeding context limits.', + 'Read the content of a note from the Obsidian vault. Specify either the note name (file) or full path (path). For large files (e.g. PDFs), use max_chars and offset to read in chunks and avoid exceeding context limits. Binary files (ZIP, images, compiled files, etc.) are automatically detected and returned as a base64-encoded string prefixed with "BASE64:" — the client must base64-decode the value to recover the original binary content.', { type: 'object', properties: { @@ -175,7 +174,7 @@ export async function registerFileOperationTools(server: ObsidianMCPServer): Pro }, }, createToolHandler( - 'Read the content of a note', + 'Read the content of a note. Binary files are returned as a base64-encoded string prefixed with "BASE64:" — decode it to recover the original binary content.', { type: 'object', properties: { @@ -205,19 +204,18 @@ export async function registerFileOperationTools(server: ObsidianMCPServer): Pro if (sanitized.file) cmdArgs.push(formatParam('file', sanitized.file as string)); if (sanitized.path) cmdArgs.push(formatParam('path', sanitized.path as string)); - const result = await executeObsidianCommand('read', cmdArgs); + // Use binary-safe executor so stdout is collected as a raw Buffer, + // preventing UTF-8 decoding from corrupting binary file content. + const result = await executeObsidianCommandBinary('read', cmdArgs); handleCLIResult(result, { operation: 'read_note', identifier: sanitized.file || sanitized.path }); - // Detect binary content — non-text files (ZIP, PDF raw bytes, images, etc.) - // will contain null bytes or a high proportion of non-printable characters. - const raw = result.stdout; - if (isBinaryContent(raw)) { - const identifier = sanitized.file || sanitized.path; + // Detect binary content from the raw buffer and return as base64 + if (result.stdoutBuffer && isBinaryContent(result.stdoutBuffer)) { return { content: [ { type: 'text', - text: `Cannot read "${identifier}": file appears to be binary (e.g. ZIP, image, compiled file). obsidian_read_note only supports text-based files such as Markdown, plain text, and PDF notes exported as text.`, + text: `BASE64:${result.stdoutBuffer.toString('base64')}`, }, ], }; @@ -225,6 +223,7 @@ export async function registerFileOperationTools(server: ObsidianMCPServer): Pro const offset: number = validated.offset ?? 0; const maxChars: number = validated.max_chars ?? 50000; + const raw = result.stdout; const totalChars = raw.length; const chunk = raw.slice(offset, offset + maxChars); const isTruncated = offset + maxChars < totalChars; diff --git a/src/utils/types.ts b/src/utils/types.ts index 405cd7d..888eff6 100644 --- a/src/utils/types.ts +++ b/src/utils/types.ts @@ -63,6 +63,7 @@ export enum ObsidianErrorType { */ export interface CLIResult { stdout: string; + stdoutBuffer?: Buffer; stderr: string; exitCode: number; timedOut?: boolean;