fix: return binary vault files as base64 in obsidian_read_note fixes #9
Previously, binary files (ZIP, images, compiled files) were read via
data.toString() which corrupted the bytes through UTF-8 decoding,
making the content unrecoverable on the client side.
Changes:
- executor.ts: add executeCommandBinary / executeObsidianCommandBinary
that collect stdout chunks as raw Buffers instead of strings
- types.ts: add optional stdoutBuffer field to CLIResult
- file-operations.ts:
- obsidian_read_note now uses executeObsidianCommandBinary so the
raw bytes are preserved before any decoding happens
- isBinaryContent() now operates on the raw Buffer (null byte check
+ >10% non-printable byte ratio on first 8KB sample)
- Binary files are returned as "BASE64:<base64string>" so the client
can reliably decode back to the original binary
- Tool descriptions updated to document the BASE64: prefix convention
Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
@@ -93,8 +93,81 @@ export async function executeCommand(cmd: CLICommand): Promise<CLIResult> {
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute Obsidian CLI command with vault context
|
||||
* Execute an Obsidian CLI command with timeout, collecting stdout as a raw Buffer.
|
||||
* Use this when the output may be binary (e.g. reading non-text vault files).
|
||||
*/
|
||||
export async function executeCommandBinary(cmd: CLICommand): Promise<CLIResult> {
|
||||
const timeout = cmd.timeout || getCommandTimeout(cmd.command);
|
||||
|
||||
return new Promise((resolve) => {
|
||||
const child = spawn(cmd.command, cmd.args, {
|
||||
cwd: cmd.cwd || process.cwd(),
|
||||
shell: true,
|
||||
});
|
||||
|
||||
const stdoutChunks: Buffer[] = [];
|
||||
let stderr = '';
|
||||
let timedOut = false;
|
||||
|
||||
const timeoutId = setTimeout(() => {
|
||||
timedOut = true;
|
||||
child.kill('SIGTERM');
|
||||
logger.warn('CLI command timed out', { command: cmd.command, timeout });
|
||||
}, timeout);
|
||||
|
||||
child.stdout?.on('data', (data: Buffer) => {
|
||||
stdoutChunks.push(Buffer.isBuffer(data) ? data : Buffer.from(data));
|
||||
});
|
||||
|
||||
child.stderr?.on('data', (data) => {
|
||||
stderr += data.toString();
|
||||
});
|
||||
|
||||
child.on('close', (code) => {
|
||||
clearTimeout(timeoutId);
|
||||
const stdoutBuffer = Buffer.concat(stdoutChunks);
|
||||
resolve({
|
||||
stdout: stdoutBuffer.toString('utf8').trim(),
|
||||
stdoutBuffer,
|
||||
stderr: stderr.trim(),
|
||||
exitCode: code || 0,
|
||||
timedOut,
|
||||
});
|
||||
});
|
||||
|
||||
child.on('error', (error) => {
|
||||
clearTimeout(timeoutId);
|
||||
logger.error('CLI command spawn error', { error: error.message });
|
||||
resolve({
|
||||
stdout: '',
|
||||
stdoutBuffer: Buffer.alloc(0),
|
||||
stderr: error.message,
|
||||
exitCode: 1,
|
||||
timedOut: false,
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute Obsidian CLI command with vault context, collecting stdout as a raw Buffer.
|
||||
*/
|
||||
export async function executeObsidianCommandBinary(
|
||||
subcommand: string,
|
||||
args: string[] = [],
|
||||
options?: { timeout?: number }
|
||||
): Promise<CLIResult> {
|
||||
const vaultName = process.env.OBSIDIAN_VAULT;
|
||||
if (!vaultName) {
|
||||
throw new Error('OBSIDIAN_VAULT environment variable not set');
|
||||
}
|
||||
const fullArgs = [subcommand, '--vault', vaultName, ...args];
|
||||
return executeCommandBinary({
|
||||
command: '/Applications/Obsidian.app/Contents/MacOS/obsidian',
|
||||
args: fullArgs,
|
||||
timeout: options?.timeout,
|
||||
});
|
||||
}
|
||||
export async function executeObsidianCommand(
|
||||
subcommand: string,
|
||||
args: string[] = [],
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
*/
|
||||
|
||||
import { ObsidianMCPServer, createToolHandler } from '../server.js';
|
||||
import { executeObsidianCommand } from '../cli/executor.js';
|
||||
import { executeObsidianCommand, executeObsidianCommandBinary } from '../cli/executor.js';
|
||||
import { formatForMCP } from '../cli/parser.js';
|
||||
import { handleCLIResult } from '../utils/error-handler.js';
|
||||
import { logger } from '../utils/logger.js';
|
||||
@@ -20,21 +20,20 @@ import { sanitizeParameters } from '../validation/sanitizer.js';
|
||||
import { formatParam } from '../utils/cli-helpers.js';
|
||||
|
||||
/**
|
||||
* Detect binary content that cannot be meaningfully returned as text.
|
||||
* Detect binary content from a raw Buffer.
|
||||
* Checks for null bytes (definitive binary marker) or a high ratio of
|
||||
* non-printable characters (catches ZIP, images, compiled files, etc.).
|
||||
* non-printable characters in the first 8KB (catches ZIP, images, compiled files, etc.).
|
||||
*/
|
||||
function isBinaryContent(content: string): boolean {
|
||||
if (content.length === 0) return false;
|
||||
function isBinaryContent(buf: Buffer): boolean {
|
||||
if (buf.length === 0) return false;
|
||||
const sample = buf.slice(0, 8192);
|
||||
// Null bytes are never present in valid UTF-8 text
|
||||
if (content.includes('\0')) return true;
|
||||
// Sample the first 8KB for performance on large files
|
||||
const sample = content.slice(0, 8192);
|
||||
if (sample.includes(0x00)) return true;
|
||||
let nonPrintable = 0;
|
||||
for (let i = 0; i < sample.length; i++) {
|
||||
const code = sample.charCodeAt(i);
|
||||
// Allow tab (9), newline (10), carriage return (13), and standard printable range
|
||||
if (code !== 9 && code !== 10 && code !== 13 && (code < 32 || code > 126) && (code < 128)) {
|
||||
const byte = sample[i];
|
||||
// Allow tab (9), newline (10), carriage return (13), and standard printable ASCII
|
||||
if (byte !== 9 && byte !== 10 && byte !== 13 && (byte < 32 || byte === 127)) {
|
||||
nonPrintable++;
|
||||
}
|
||||
}
|
||||
@@ -152,7 +151,7 @@ export async function registerFileOperationTools(server: ObsidianMCPServer): Pro
|
||||
// T031: Read note tool
|
||||
server.registerTool(
|
||||
'obsidian_read_note',
|
||||
'Read the content of a note from the Obsidian vault. Specify either the note name (file) or full path (path). For large files (e.g. PDFs), use max_chars and offset to read in chunks and avoid exceeding context limits.',
|
||||
'Read the content of a note from the Obsidian vault. Specify either the note name (file) or full path (path). For large files (e.g. PDFs), use max_chars and offset to read in chunks and avoid exceeding context limits. Binary files (ZIP, images, compiled files, etc.) are automatically detected and returned as a base64-encoded string prefixed with "BASE64:" — the client must base64-decode the value to recover the original binary content.',
|
||||
{
|
||||
type: 'object',
|
||||
properties: {
|
||||
@@ -175,7 +174,7 @@ export async function registerFileOperationTools(server: ObsidianMCPServer): Pro
|
||||
},
|
||||
},
|
||||
createToolHandler(
|
||||
'Read the content of a note',
|
||||
'Read the content of a note. Binary files are returned as a base64-encoded string prefixed with "BASE64:" — decode it to recover the original binary content.',
|
||||
{
|
||||
type: 'object',
|
||||
properties: {
|
||||
@@ -205,19 +204,18 @@ export async function registerFileOperationTools(server: ObsidianMCPServer): Pro
|
||||
if (sanitized.file) cmdArgs.push(formatParam('file', sanitized.file as string));
|
||||
if (sanitized.path) cmdArgs.push(formatParam('path', sanitized.path as string));
|
||||
|
||||
const result = await executeObsidianCommand('read', cmdArgs);
|
||||
// Use binary-safe executor so stdout is collected as a raw Buffer,
|
||||
// preventing UTF-8 decoding from corrupting binary file content.
|
||||
const result = await executeObsidianCommandBinary('read', cmdArgs);
|
||||
handleCLIResult(result, { operation: 'read_note', identifier: sanitized.file || sanitized.path });
|
||||
|
||||
// Detect binary content — non-text files (ZIP, PDF raw bytes, images, etc.)
|
||||
// will contain null bytes or a high proportion of non-printable characters.
|
||||
const raw = result.stdout;
|
||||
if (isBinaryContent(raw)) {
|
||||
const identifier = sanitized.file || sanitized.path;
|
||||
// Detect binary content from the raw buffer and return as base64
|
||||
if (result.stdoutBuffer && isBinaryContent(result.stdoutBuffer)) {
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: 'text',
|
||||
text: `Cannot read "${identifier}": file appears to be binary (e.g. ZIP, image, compiled file). obsidian_read_note only supports text-based files such as Markdown, plain text, and PDF notes exported as text.`,
|
||||
text: `BASE64:${result.stdoutBuffer.toString('base64')}`,
|
||||
},
|
||||
],
|
||||
};
|
||||
@@ -225,6 +223,7 @@ export async function registerFileOperationTools(server: ObsidianMCPServer): Pro
|
||||
|
||||
const offset: number = validated.offset ?? 0;
|
||||
const maxChars: number = validated.max_chars ?? 50000;
|
||||
const raw = result.stdout;
|
||||
const totalChars = raw.length;
|
||||
const chunk = raw.slice(offset, offset + maxChars);
|
||||
const isTruncated = offset + maxChars < totalChars;
|
||||
|
||||
@@ -63,6 +63,7 @@ export enum ObsidianErrorType {
|
||||
*/
|
||||
export interface CLIResult {
|
||||
stdout: string;
|
||||
stdoutBuffer?: Buffer;
|
||||
stderr: string;
|
||||
exitCode: number;
|
||||
timedOut?: boolean;
|
||||
|
||||
Reference in New Issue
Block a user