fix: return binary vault files as base64 in obsidian_read_note fixes #9

Previously, binary files (ZIP, images, compiled files) were read via
data.toString() which corrupted the bytes through UTF-8 decoding,
making the content unrecoverable on the client side.

Changes:
- executor.ts: add executeCommandBinary / executeObsidianCommandBinary
  that collect stdout chunks as raw Buffers instead of strings
- types.ts: add optional stdoutBuffer field to CLIResult
- file-operations.ts:
  - obsidian_read_note now uses executeObsidianCommandBinary so the
    raw bytes are preserved before any decoding happens
  - isBinaryContent() now operates on the raw Buffer (null byte check
    + >10% non-printable byte ratio on first 8KB sample)
  - Binary files are returned as "BASE64:<base64string>" so the client
    can reliably decode back to the original binary
  - Tool descriptions updated to document the BASE64: prefix convention

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
2026-04-30 18:43:00 -05:00
parent 4067520cd8
commit ef02d14f18
3 changed files with 94 additions and 21 deletions

View File

@@ -93,8 +93,81 @@ export async function executeCommand(cmd: CLICommand): Promise<CLIResult> {
} }
/** /**
* Execute Obsidian CLI command with vault context * Execute an Obsidian CLI command with timeout, collecting stdout as a raw Buffer.
* Use this when the output may be binary (e.g. reading non-text vault files).
*/ */
export async function executeCommandBinary(cmd: CLICommand): Promise<CLIResult> {
const timeout = cmd.timeout || getCommandTimeout(cmd.command);
return new Promise((resolve) => {
const child = spawn(cmd.command, cmd.args, {
cwd: cmd.cwd || process.cwd(),
shell: true,
});
const stdoutChunks: Buffer[] = [];
let stderr = '';
let timedOut = false;
const timeoutId = setTimeout(() => {
timedOut = true;
child.kill('SIGTERM');
logger.warn('CLI command timed out', { command: cmd.command, timeout });
}, timeout);
child.stdout?.on('data', (data: Buffer) => {
stdoutChunks.push(Buffer.isBuffer(data) ? data : Buffer.from(data));
});
child.stderr?.on('data', (data) => {
stderr += data.toString();
});
child.on('close', (code) => {
clearTimeout(timeoutId);
const stdoutBuffer = Buffer.concat(stdoutChunks);
resolve({
stdout: stdoutBuffer.toString('utf8').trim(),
stdoutBuffer,
stderr: stderr.trim(),
exitCode: code || 0,
timedOut,
});
});
child.on('error', (error) => {
clearTimeout(timeoutId);
logger.error('CLI command spawn error', { error: error.message });
resolve({
stdout: '',
stdoutBuffer: Buffer.alloc(0),
stderr: error.message,
exitCode: 1,
timedOut: false,
});
});
});
}
/**
* Execute Obsidian CLI command with vault context, collecting stdout as a raw Buffer.
*/
export async function executeObsidianCommandBinary(
subcommand: string,
args: string[] = [],
options?: { timeout?: number }
): Promise<CLIResult> {
const vaultName = process.env.OBSIDIAN_VAULT;
if (!vaultName) {
throw new Error('OBSIDIAN_VAULT environment variable not set');
}
const fullArgs = [subcommand, '--vault', vaultName, ...args];
return executeCommandBinary({
command: '/Applications/Obsidian.app/Contents/MacOS/obsidian',
args: fullArgs,
timeout: options?.timeout,
});
}
export async function executeObsidianCommand( export async function executeObsidianCommand(
subcommand: string, subcommand: string,
args: string[] = [], args: string[] = [],

View File

@@ -4,7 +4,7 @@
*/ */
import { ObsidianMCPServer, createToolHandler } from '../server.js'; import { ObsidianMCPServer, createToolHandler } from '../server.js';
import { executeObsidianCommand } from '../cli/executor.js'; import { executeObsidianCommand, executeObsidianCommandBinary } from '../cli/executor.js';
import { formatForMCP } from '../cli/parser.js'; import { formatForMCP } from '../cli/parser.js';
import { handleCLIResult } from '../utils/error-handler.js'; import { handleCLIResult } from '../utils/error-handler.js';
import { logger } from '../utils/logger.js'; import { logger } from '../utils/logger.js';
@@ -20,21 +20,20 @@ import { sanitizeParameters } from '../validation/sanitizer.js';
import { formatParam } from '../utils/cli-helpers.js'; import { formatParam } from '../utils/cli-helpers.js';
/** /**
* Detect binary content that cannot be meaningfully returned as text. * Detect binary content from a raw Buffer.
* Checks for null bytes (definitive binary marker) or a high ratio of * Checks for null bytes (definitive binary marker) or a high ratio of
* non-printable characters (catches ZIP, images, compiled files, etc.). * non-printable characters in the first 8KB (catches ZIP, images, compiled files, etc.).
*/ */
function isBinaryContent(content: string): boolean { function isBinaryContent(buf: Buffer): boolean {
if (content.length === 0) return false; if (buf.length === 0) return false;
const sample = buf.slice(0, 8192);
// Null bytes are never present in valid UTF-8 text // Null bytes are never present in valid UTF-8 text
if (content.includes('\0')) return true; if (sample.includes(0x00)) return true;
// Sample the first 8KB for performance on large files
const sample = content.slice(0, 8192);
let nonPrintable = 0; let nonPrintable = 0;
for (let i = 0; i < sample.length; i++) { for (let i = 0; i < sample.length; i++) {
const code = sample.charCodeAt(i); const byte = sample[i];
// Allow tab (9), newline (10), carriage return (13), and standard printable range // Allow tab (9), newline (10), carriage return (13), and standard printable ASCII
if (code !== 9 && code !== 10 && code !== 13 && (code < 32 || code > 126) && (code < 128)) { if (byte !== 9 && byte !== 10 && byte !== 13 && (byte < 32 || byte === 127)) {
nonPrintable++; nonPrintable++;
} }
} }
@@ -152,7 +151,7 @@ export async function registerFileOperationTools(server: ObsidianMCPServer): Pro
// T031: Read note tool // T031: Read note tool
server.registerTool( server.registerTool(
'obsidian_read_note', 'obsidian_read_note',
'Read the content of a note from the Obsidian vault. Specify either the note name (file) or full path (path). For large files (e.g. PDFs), use max_chars and offset to read in chunks and avoid exceeding context limits.', 'Read the content of a note from the Obsidian vault. Specify either the note name (file) or full path (path). For large files (e.g. PDFs), use max_chars and offset to read in chunks and avoid exceeding context limits. Binary files (ZIP, images, compiled files, etc.) are automatically detected and returned as a base64-encoded string prefixed with "BASE64:" — the client must base64-decode the value to recover the original binary content.',
{ {
type: 'object', type: 'object',
properties: { properties: {
@@ -175,7 +174,7 @@ export async function registerFileOperationTools(server: ObsidianMCPServer): Pro
}, },
}, },
createToolHandler( createToolHandler(
'Read the content of a note', 'Read the content of a note. Binary files are returned as a base64-encoded string prefixed with "BASE64:" — decode it to recover the original binary content.',
{ {
type: 'object', type: 'object',
properties: { properties: {
@@ -205,19 +204,18 @@ export async function registerFileOperationTools(server: ObsidianMCPServer): Pro
if (sanitized.file) cmdArgs.push(formatParam('file', sanitized.file as string)); if (sanitized.file) cmdArgs.push(formatParam('file', sanitized.file as string));
if (sanitized.path) cmdArgs.push(formatParam('path', sanitized.path as string)); if (sanitized.path) cmdArgs.push(formatParam('path', sanitized.path as string));
const result = await executeObsidianCommand('read', cmdArgs); // Use binary-safe executor so stdout is collected as a raw Buffer,
// preventing UTF-8 decoding from corrupting binary file content.
const result = await executeObsidianCommandBinary('read', cmdArgs);
handleCLIResult(result, { operation: 'read_note', identifier: sanitized.file || sanitized.path }); handleCLIResult(result, { operation: 'read_note', identifier: sanitized.file || sanitized.path });
// Detect binary content — non-text files (ZIP, PDF raw bytes, images, etc.) // Detect binary content from the raw buffer and return as base64
// will contain null bytes or a high proportion of non-printable characters. if (result.stdoutBuffer && isBinaryContent(result.stdoutBuffer)) {
const raw = result.stdout;
if (isBinaryContent(raw)) {
const identifier = sanitized.file || sanitized.path;
return { return {
content: [ content: [
{ {
type: 'text', type: 'text',
text: `Cannot read "${identifier}": file appears to be binary (e.g. ZIP, image, compiled file). obsidian_read_note only supports text-based files such as Markdown, plain text, and PDF notes exported as text.`, text: `BASE64:${result.stdoutBuffer.toString('base64')}`,
}, },
], ],
}; };
@@ -225,6 +223,7 @@ export async function registerFileOperationTools(server: ObsidianMCPServer): Pro
const offset: number = validated.offset ?? 0; const offset: number = validated.offset ?? 0;
const maxChars: number = validated.max_chars ?? 50000; const maxChars: number = validated.max_chars ?? 50000;
const raw = result.stdout;
const totalChars = raw.length; const totalChars = raw.length;
const chunk = raw.slice(offset, offset + maxChars); const chunk = raw.slice(offset, offset + maxChars);
const isTruncated = offset + maxChars < totalChars; const isTruncated = offset + maxChars < totalChars;

View File

@@ -63,6 +63,7 @@ export enum ObsidianErrorType {
*/ */
export interface CLIResult { export interface CLIResult {
stdout: string; stdout: string;
stdoutBuffer?: Buffer;
stderr: string; stderr: string;
exitCode: number; exitCode: number;
timedOut?: boolean; timedOut?: boolean;