fix: return binary vault files as base64 in obsidian_read_note fixes #9

Previously, binary files (ZIP, images, compiled files) were read via
data.toString() which corrupted the bytes through UTF-8 decoding,
making the content unrecoverable on the client side.

Changes:
- executor.ts: add executeCommandBinary / executeObsidianCommandBinary
  that collect stdout chunks as raw Buffers instead of strings
- types.ts: add optional stdoutBuffer field to CLIResult
- file-operations.ts:
  - obsidian_read_note now uses executeObsidianCommandBinary so the
    raw bytes are preserved before any decoding happens
  - isBinaryContent() now operates on the raw Buffer (null byte check
    + >10% non-printable byte ratio on first 8KB sample)
  - Binary files are returned as "BASE64:<base64string>" so the client
    can reliably decode back to the original binary
  - Tool descriptions updated to document the BASE64: prefix convention

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
2026-04-30 18:43:00 -05:00
parent 4067520cd8
commit ef02d14f18
3 changed files with 94 additions and 21 deletions

View File

@@ -93,8 +93,81 @@ export async function executeCommand(cmd: CLICommand): Promise<CLIResult> {
}
/**
* Execute Obsidian CLI command with vault context
* Execute an Obsidian CLI command with timeout, collecting stdout as a raw Buffer.
* Use this when the output may be binary (e.g. reading non-text vault files).
*/
export async function executeCommandBinary(cmd: CLICommand): Promise<CLIResult> {
const timeout = cmd.timeout || getCommandTimeout(cmd.command);
return new Promise((resolve) => {
const child = spawn(cmd.command, cmd.args, {
cwd: cmd.cwd || process.cwd(),
shell: true,
});
const stdoutChunks: Buffer[] = [];
let stderr = '';
let timedOut = false;
const timeoutId = setTimeout(() => {
timedOut = true;
child.kill('SIGTERM');
logger.warn('CLI command timed out', { command: cmd.command, timeout });
}, timeout);
child.stdout?.on('data', (data: Buffer) => {
stdoutChunks.push(Buffer.isBuffer(data) ? data : Buffer.from(data));
});
child.stderr?.on('data', (data) => {
stderr += data.toString();
});
child.on('close', (code) => {
clearTimeout(timeoutId);
const stdoutBuffer = Buffer.concat(stdoutChunks);
resolve({
stdout: stdoutBuffer.toString('utf8').trim(),
stdoutBuffer,
stderr: stderr.trim(),
exitCode: code || 0,
timedOut,
});
});
child.on('error', (error) => {
clearTimeout(timeoutId);
logger.error('CLI command spawn error', { error: error.message });
resolve({
stdout: '',
stdoutBuffer: Buffer.alloc(0),
stderr: error.message,
exitCode: 1,
timedOut: false,
});
});
});
}
/**
* Execute Obsidian CLI command with vault context, collecting stdout as a raw Buffer.
*/
export async function executeObsidianCommandBinary(
subcommand: string,
args: string[] = [],
options?: { timeout?: number }
): Promise<CLIResult> {
const vaultName = process.env.OBSIDIAN_VAULT;
if (!vaultName) {
throw new Error('OBSIDIAN_VAULT environment variable not set');
}
const fullArgs = [subcommand, '--vault', vaultName, ...args];
return executeCommandBinary({
command: '/Applications/Obsidian.app/Contents/MacOS/obsidian',
args: fullArgs,
timeout: options?.timeout,
});
}
export async function executeObsidianCommand(
subcommand: string,
args: string[] = [],

View File

@@ -4,7 +4,7 @@
*/
import { ObsidianMCPServer, createToolHandler } from '../server.js';
import { executeObsidianCommand } from '../cli/executor.js';
import { executeObsidianCommand, executeObsidianCommandBinary } from '../cli/executor.js';
import { formatForMCP } from '../cli/parser.js';
import { handleCLIResult } from '../utils/error-handler.js';
import { logger } from '../utils/logger.js';
@@ -20,21 +20,20 @@ import { sanitizeParameters } from '../validation/sanitizer.js';
import { formatParam } from '../utils/cli-helpers.js';
/**
* Detect binary content that cannot be meaningfully returned as text.
* Detect binary content from a raw Buffer.
* Checks for null bytes (definitive binary marker) or a high ratio of
* non-printable characters (catches ZIP, images, compiled files, etc.).
* non-printable characters in the first 8KB (catches ZIP, images, compiled files, etc.).
*/
function isBinaryContent(content: string): boolean {
if (content.length === 0) return false;
function isBinaryContent(buf: Buffer): boolean {
if (buf.length === 0) return false;
const sample = buf.slice(0, 8192);
// Null bytes are never present in valid UTF-8 text
if (content.includes('\0')) return true;
// Sample the first 8KB for performance on large files
const sample = content.slice(0, 8192);
if (sample.includes(0x00)) return true;
let nonPrintable = 0;
for (let i = 0; i < sample.length; i++) {
const code = sample.charCodeAt(i);
// Allow tab (9), newline (10), carriage return (13), and standard printable range
if (code !== 9 && code !== 10 && code !== 13 && (code < 32 || code > 126) && (code < 128)) {
const byte = sample[i];
// Allow tab (9), newline (10), carriage return (13), and standard printable ASCII
if (byte !== 9 && byte !== 10 && byte !== 13 && (byte < 32 || byte === 127)) {
nonPrintable++;
}
}
@@ -152,7 +151,7 @@ export async function registerFileOperationTools(server: ObsidianMCPServer): Pro
// T031: Read note tool
server.registerTool(
'obsidian_read_note',
'Read the content of a note from the Obsidian vault. Specify either the note name (file) or full path (path). For large files (e.g. PDFs), use max_chars and offset to read in chunks and avoid exceeding context limits.',
'Read the content of a note from the Obsidian vault. Specify either the note name (file) or full path (path). For large files (e.g. PDFs), use max_chars and offset to read in chunks and avoid exceeding context limits. Binary files (ZIP, images, compiled files, etc.) are automatically detected and returned as a base64-encoded string prefixed with "BASE64:" — the client must base64-decode the value to recover the original binary content.',
{
type: 'object',
properties: {
@@ -175,7 +174,7 @@ export async function registerFileOperationTools(server: ObsidianMCPServer): Pro
},
},
createToolHandler(
'Read the content of a note',
'Read the content of a note. Binary files are returned as a base64-encoded string prefixed with "BASE64:" — decode it to recover the original binary content.',
{
type: 'object',
properties: {
@@ -205,19 +204,18 @@ export async function registerFileOperationTools(server: ObsidianMCPServer): Pro
if (sanitized.file) cmdArgs.push(formatParam('file', sanitized.file as string));
if (sanitized.path) cmdArgs.push(formatParam('path', sanitized.path as string));
const result = await executeObsidianCommand('read', cmdArgs);
// Use binary-safe executor so stdout is collected as a raw Buffer,
// preventing UTF-8 decoding from corrupting binary file content.
const result = await executeObsidianCommandBinary('read', cmdArgs);
handleCLIResult(result, { operation: 'read_note', identifier: sanitized.file || sanitized.path });
// Detect binary content — non-text files (ZIP, PDF raw bytes, images, etc.)
// will contain null bytes or a high proportion of non-printable characters.
const raw = result.stdout;
if (isBinaryContent(raw)) {
const identifier = sanitized.file || sanitized.path;
// Detect binary content from the raw buffer and return as base64
if (result.stdoutBuffer && isBinaryContent(result.stdoutBuffer)) {
return {
content: [
{
type: 'text',
text: `Cannot read "${identifier}": file appears to be binary (e.g. ZIP, image, compiled file). obsidian_read_note only supports text-based files such as Markdown, plain text, and PDF notes exported as text.`,
text: `BASE64:${result.stdoutBuffer.toString('base64')}`,
},
],
};
@@ -225,6 +223,7 @@ export async function registerFileOperationTools(server: ObsidianMCPServer): Pro
const offset: number = validated.offset ?? 0;
const maxChars: number = validated.max_chars ?? 50000;
const raw = result.stdout;
const totalChars = raw.length;
const chunk = raw.slice(offset, offset + maxChars);
const isTruncated = offset + maxChars < totalChars;

View File

@@ -63,6 +63,7 @@ export enum ObsidianErrorType {
*/
export interface CLIResult {
stdout: string;
stdoutBuffer?: Buffer;
stderr: string;
exitCode: number;
timedOut?: boolean;