fix: return binary vault files as base64 in obsidian_read_note fixes #9
Previously, binary files (ZIP, images, compiled files) were read via
data.toString() which corrupted the bytes through UTF-8 decoding,
making the content unrecoverable on the client side.
Changes:
- executor.ts: add executeCommandBinary / executeObsidianCommandBinary
that collect stdout chunks as raw Buffers instead of strings
- types.ts: add optional stdoutBuffer field to CLIResult
- file-operations.ts:
- obsidian_read_note now uses executeObsidianCommandBinary so the
raw bytes are preserved before any decoding happens
- isBinaryContent() now operates on the raw Buffer (null byte check
+ >10% non-printable byte ratio on first 8KB sample)
- Binary files are returned as "BASE64:<base64string>" so the client
can reliably decode back to the original binary
- Tool descriptions updated to document the BASE64: prefix convention
Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
@@ -93,8 +93,81 @@ export async function executeCommand(cmd: CLICommand): Promise<CLIResult> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Execute Obsidian CLI command with vault context
|
* Execute an Obsidian CLI command with timeout, collecting stdout as a raw Buffer.
|
||||||
|
* Use this when the output may be binary (e.g. reading non-text vault files).
|
||||||
*/
|
*/
|
||||||
|
export async function executeCommandBinary(cmd: CLICommand): Promise<CLIResult> {
|
||||||
|
const timeout = cmd.timeout || getCommandTimeout(cmd.command);
|
||||||
|
|
||||||
|
return new Promise((resolve) => {
|
||||||
|
const child = spawn(cmd.command, cmd.args, {
|
||||||
|
cwd: cmd.cwd || process.cwd(),
|
||||||
|
shell: true,
|
||||||
|
});
|
||||||
|
|
||||||
|
const stdoutChunks: Buffer[] = [];
|
||||||
|
let stderr = '';
|
||||||
|
let timedOut = false;
|
||||||
|
|
||||||
|
const timeoutId = setTimeout(() => {
|
||||||
|
timedOut = true;
|
||||||
|
child.kill('SIGTERM');
|
||||||
|
logger.warn('CLI command timed out', { command: cmd.command, timeout });
|
||||||
|
}, timeout);
|
||||||
|
|
||||||
|
child.stdout?.on('data', (data: Buffer) => {
|
||||||
|
stdoutChunks.push(Buffer.isBuffer(data) ? data : Buffer.from(data));
|
||||||
|
});
|
||||||
|
|
||||||
|
child.stderr?.on('data', (data) => {
|
||||||
|
stderr += data.toString();
|
||||||
|
});
|
||||||
|
|
||||||
|
child.on('close', (code) => {
|
||||||
|
clearTimeout(timeoutId);
|
||||||
|
const stdoutBuffer = Buffer.concat(stdoutChunks);
|
||||||
|
resolve({
|
||||||
|
stdout: stdoutBuffer.toString('utf8').trim(),
|
||||||
|
stdoutBuffer,
|
||||||
|
stderr: stderr.trim(),
|
||||||
|
exitCode: code || 0,
|
||||||
|
timedOut,
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
child.on('error', (error) => {
|
||||||
|
clearTimeout(timeoutId);
|
||||||
|
logger.error('CLI command spawn error', { error: error.message });
|
||||||
|
resolve({
|
||||||
|
stdout: '',
|
||||||
|
stdoutBuffer: Buffer.alloc(0),
|
||||||
|
stderr: error.message,
|
||||||
|
exitCode: 1,
|
||||||
|
timedOut: false,
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Execute Obsidian CLI command with vault context, collecting stdout as a raw Buffer.
|
||||||
|
*/
|
||||||
|
export async function executeObsidianCommandBinary(
|
||||||
|
subcommand: string,
|
||||||
|
args: string[] = [],
|
||||||
|
options?: { timeout?: number }
|
||||||
|
): Promise<CLIResult> {
|
||||||
|
const vaultName = process.env.OBSIDIAN_VAULT;
|
||||||
|
if (!vaultName) {
|
||||||
|
throw new Error('OBSIDIAN_VAULT environment variable not set');
|
||||||
|
}
|
||||||
|
const fullArgs = [subcommand, '--vault', vaultName, ...args];
|
||||||
|
return executeCommandBinary({
|
||||||
|
command: '/Applications/Obsidian.app/Contents/MacOS/obsidian',
|
||||||
|
args: fullArgs,
|
||||||
|
timeout: options?.timeout,
|
||||||
|
});
|
||||||
|
}
|
||||||
export async function executeObsidianCommand(
|
export async function executeObsidianCommand(
|
||||||
subcommand: string,
|
subcommand: string,
|
||||||
args: string[] = [],
|
args: string[] = [],
|
||||||
|
|||||||
@@ -4,7 +4,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
import { ObsidianMCPServer, createToolHandler } from '../server.js';
|
import { ObsidianMCPServer, createToolHandler } from '../server.js';
|
||||||
import { executeObsidianCommand } from '../cli/executor.js';
|
import { executeObsidianCommand, executeObsidianCommandBinary } from '../cli/executor.js';
|
||||||
import { formatForMCP } from '../cli/parser.js';
|
import { formatForMCP } from '../cli/parser.js';
|
||||||
import { handleCLIResult } from '../utils/error-handler.js';
|
import { handleCLIResult } from '../utils/error-handler.js';
|
||||||
import { logger } from '../utils/logger.js';
|
import { logger } from '../utils/logger.js';
|
||||||
@@ -20,21 +20,20 @@ import { sanitizeParameters } from '../validation/sanitizer.js';
|
|||||||
import { formatParam } from '../utils/cli-helpers.js';
|
import { formatParam } from '../utils/cli-helpers.js';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Detect binary content that cannot be meaningfully returned as text.
|
* Detect binary content from a raw Buffer.
|
||||||
* Checks for null bytes (definitive binary marker) or a high ratio of
|
* Checks for null bytes (definitive binary marker) or a high ratio of
|
||||||
* non-printable characters (catches ZIP, images, compiled files, etc.).
|
* non-printable characters in the first 8KB (catches ZIP, images, compiled files, etc.).
|
||||||
*/
|
*/
|
||||||
function isBinaryContent(content: string): boolean {
|
function isBinaryContent(buf: Buffer): boolean {
|
||||||
if (content.length === 0) return false;
|
if (buf.length === 0) return false;
|
||||||
|
const sample = buf.slice(0, 8192);
|
||||||
// Null bytes are never present in valid UTF-8 text
|
// Null bytes are never present in valid UTF-8 text
|
||||||
if (content.includes('\0')) return true;
|
if (sample.includes(0x00)) return true;
|
||||||
// Sample the first 8KB for performance on large files
|
|
||||||
const sample = content.slice(0, 8192);
|
|
||||||
let nonPrintable = 0;
|
let nonPrintable = 0;
|
||||||
for (let i = 0; i < sample.length; i++) {
|
for (let i = 0; i < sample.length; i++) {
|
||||||
const code = sample.charCodeAt(i);
|
const byte = sample[i];
|
||||||
// Allow tab (9), newline (10), carriage return (13), and standard printable range
|
// Allow tab (9), newline (10), carriage return (13), and standard printable ASCII
|
||||||
if (code !== 9 && code !== 10 && code !== 13 && (code < 32 || code > 126) && (code < 128)) {
|
if (byte !== 9 && byte !== 10 && byte !== 13 && (byte < 32 || byte === 127)) {
|
||||||
nonPrintable++;
|
nonPrintable++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -152,7 +151,7 @@ export async function registerFileOperationTools(server: ObsidianMCPServer): Pro
|
|||||||
// T031: Read note tool
|
// T031: Read note tool
|
||||||
server.registerTool(
|
server.registerTool(
|
||||||
'obsidian_read_note',
|
'obsidian_read_note',
|
||||||
'Read the content of a note from the Obsidian vault. Specify either the note name (file) or full path (path). For large files (e.g. PDFs), use max_chars and offset to read in chunks and avoid exceeding context limits.',
|
'Read the content of a note from the Obsidian vault. Specify either the note name (file) or full path (path). For large files (e.g. PDFs), use max_chars and offset to read in chunks and avoid exceeding context limits. Binary files (ZIP, images, compiled files, etc.) are automatically detected and returned as a base64-encoded string prefixed with "BASE64:" — the client must base64-decode the value to recover the original binary content.',
|
||||||
{
|
{
|
||||||
type: 'object',
|
type: 'object',
|
||||||
properties: {
|
properties: {
|
||||||
@@ -175,7 +174,7 @@ export async function registerFileOperationTools(server: ObsidianMCPServer): Pro
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
createToolHandler(
|
createToolHandler(
|
||||||
'Read the content of a note',
|
'Read the content of a note. Binary files are returned as a base64-encoded string prefixed with "BASE64:" — decode it to recover the original binary content.',
|
||||||
{
|
{
|
||||||
type: 'object',
|
type: 'object',
|
||||||
properties: {
|
properties: {
|
||||||
@@ -205,19 +204,18 @@ export async function registerFileOperationTools(server: ObsidianMCPServer): Pro
|
|||||||
if (sanitized.file) cmdArgs.push(formatParam('file', sanitized.file as string));
|
if (sanitized.file) cmdArgs.push(formatParam('file', sanitized.file as string));
|
||||||
if (sanitized.path) cmdArgs.push(formatParam('path', sanitized.path as string));
|
if (sanitized.path) cmdArgs.push(formatParam('path', sanitized.path as string));
|
||||||
|
|
||||||
const result = await executeObsidianCommand('read', cmdArgs);
|
// Use binary-safe executor so stdout is collected as a raw Buffer,
|
||||||
|
// preventing UTF-8 decoding from corrupting binary file content.
|
||||||
|
const result = await executeObsidianCommandBinary('read', cmdArgs);
|
||||||
handleCLIResult(result, { operation: 'read_note', identifier: sanitized.file || sanitized.path });
|
handleCLIResult(result, { operation: 'read_note', identifier: sanitized.file || sanitized.path });
|
||||||
|
|
||||||
// Detect binary content — non-text files (ZIP, PDF raw bytes, images, etc.)
|
// Detect binary content from the raw buffer and return as base64
|
||||||
// will contain null bytes or a high proportion of non-printable characters.
|
if (result.stdoutBuffer && isBinaryContent(result.stdoutBuffer)) {
|
||||||
const raw = result.stdout;
|
|
||||||
if (isBinaryContent(raw)) {
|
|
||||||
const identifier = sanitized.file || sanitized.path;
|
|
||||||
return {
|
return {
|
||||||
content: [
|
content: [
|
||||||
{
|
{
|
||||||
type: 'text',
|
type: 'text',
|
||||||
text: `Cannot read "${identifier}": file appears to be binary (e.g. ZIP, image, compiled file). obsidian_read_note only supports text-based files such as Markdown, plain text, and PDF notes exported as text.`,
|
text: `BASE64:${result.stdoutBuffer.toString('base64')}`,
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
};
|
};
|
||||||
@@ -225,6 +223,7 @@ export async function registerFileOperationTools(server: ObsidianMCPServer): Pro
|
|||||||
|
|
||||||
const offset: number = validated.offset ?? 0;
|
const offset: number = validated.offset ?? 0;
|
||||||
const maxChars: number = validated.max_chars ?? 50000;
|
const maxChars: number = validated.max_chars ?? 50000;
|
||||||
|
const raw = result.stdout;
|
||||||
const totalChars = raw.length;
|
const totalChars = raw.length;
|
||||||
const chunk = raw.slice(offset, offset + maxChars);
|
const chunk = raw.slice(offset, offset + maxChars);
|
||||||
const isTruncated = offset + maxChars < totalChars;
|
const isTruncated = offset + maxChars < totalChars;
|
||||||
|
|||||||
@@ -63,6 +63,7 @@ export enum ObsidianErrorType {
|
|||||||
*/
|
*/
|
||||||
export interface CLIResult {
|
export interface CLIResult {
|
||||||
stdout: string;
|
stdout: string;
|
||||||
|
stdoutBuffer?: Buffer;
|
||||||
stderr: string;
|
stderr: string;
|
||||||
exitCode: number;
|
exitCode: number;
|
||||||
timedOut?: boolean;
|
timedOut?: boolean;
|
||||||
|
|||||||
Reference in New Issue
Block a user