From 4067520cd81ff29753bc604e185a2c3a3cb60c33 Mon Sep 17 00:00:00 2001 From: "Peter.Morton" Date: Thu, 30 Apr 2026 18:39:32 -0500 Subject: [PATCH] fix: detect and reject binary files in obsidian_read_note fixes #9 When obsidian_read_note read a binary file (ZIP, image, compiled binary, etc.) the CLI returned raw bytes that were corrupted by text decoding, producing an unusable response. Added isBinaryContent() helper that checks for null bytes (definitive binary marker) and a >10% ratio of non-printable characters in the first 8KB of content. When binary content is detected the tool returns a clear error message instead of garbled bytes. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/tools/file-operations.ts | 42 +++++++++++++++++++++++++++++++++--- 1 file changed, 39 insertions(+), 3 deletions(-) diff --git a/src/tools/file-operations.ts b/src/tools/file-operations.ts index 8f9d484..b0b6c6b 100644 --- a/src/tools/file-operations.ts +++ b/src/tools/file-operations.ts @@ -19,6 +19,28 @@ import { import { sanitizeParameters } from '../validation/sanitizer.js'; import { formatParam } from '../utils/cli-helpers.js'; +/** + * Detect binary content that cannot be meaningfully returned as text. + * Checks for null bytes (definitive binary marker) or a high ratio of + * non-printable characters (catches ZIP, images, compiled files, etc.). + */ +function isBinaryContent(content: string): boolean { + if (content.length === 0) return false; + // Null bytes are never present in valid UTF-8 text + if (content.includes('\0')) return true; + // Sample the first 8KB for performance on large files + const sample = content.slice(0, 8192); + let nonPrintable = 0; + for (let i = 0; i < sample.length; i++) { + const code = sample.charCodeAt(i); + // Allow tab (9), newline (10), carriage return (13), and standard printable range + if (code !== 9 && code !== 10 && code !== 13 && (code < 32 || code > 126) && (code < 128)) { + nonPrintable++; + } + } + return nonPrintable / sample.length > 0.1; +} + /** * Register all file operation tools */ @@ -186,11 +208,25 @@ export async function registerFileOperationTools(server: ObsidianMCPServer): Pro const result = await executeObsidianCommand('read', cmdArgs); handleCLIResult(result, { operation: 'read_note', identifier: sanitized.file || sanitized.path }); + // Detect binary content — non-text files (ZIP, PDF raw bytes, images, etc.) + // will contain null bytes or a high proportion of non-printable characters. + const raw = result.stdout; + if (isBinaryContent(raw)) { + const identifier = sanitized.file || sanitized.path; + return { + content: [ + { + type: 'text', + text: `Cannot read "${identifier}": file appears to be binary (e.g. ZIP, image, compiled file). obsidian_read_note only supports text-based files such as Markdown, plain text, and PDF notes exported as text.`, + }, + ], + }; + } + const offset: number = validated.offset ?? 0; const maxChars: number = validated.max_chars ?? 50000; - const fullContent = result.stdout; - const totalChars = fullContent.length; - const chunk = fullContent.slice(offset, offset + maxChars); + const totalChars = raw.length; + const chunk = raw.slice(offset, offset + maxChars); const isTruncated = offset + maxChars < totalChars; let text = chunk;