fix: detect and reject binary files in obsidian_read_note fixes #9

When obsidian_read_note read a binary file (ZIP, image, compiled binary, etc.) the CLI returned raw bytes that were corrupted by text decoding, producing an unusable response. Added isBinaryContent() helper that checks for null bytes (definitive binary marker) and a >10% ratio of non-printable characters in the first 8KB of content. When binary content is detected the tool returns a clear error message instead of garbled bytes. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-04-30 18:39:32 -05:00
parent 8bc0094604
commit 4067520cd8
1 changed files with 39 additions and 3 deletions
--- a/src/tools/file-operations.ts
+++ b/src/tools/file-operations.ts
@@ -19,6 +19,28 @@ import {
 import { sanitizeParameters } from '../validation/sanitizer.js';
 import { formatParam } from '../utils/cli-helpers.js';
 /**
 * Detect binary content that cannot be meaningfully returned as text.
 * Checks for null bytes (definitive binary marker) or a high ratio of
 * non-printable characters (catches ZIP, images, compiled files, etc.).
 */
 function isBinaryContent(content: string): boolean {
  if (content.length === 0) return false;
  // Null bytes are never present in valid UTF-8 text
  if (content.includes('\0')) return true;
  // Sample the first 8KB for performance on large files
  const sample = content.slice(0, 8192);
  let nonPrintable = 0;
  for (let i = 0; i < sample.length; i++) {
    const code = sample.charCodeAt(i);
    // Allow tab (9), newline (10), carriage return (13), and standard printable range
    if (code !== 9 && code !== 10 && code !== 13 && (code < 32 || code > 126) && (code < 128)) {
      nonPrintable++;
    }
  }
  return nonPrintable / sample.length > 0.1;
 }
 /**
 * Register all file operation tools
 */
@@ -186,11 +208,25 @@ export async function registerFileOperationTools(server: ObsidianMCPServer): Pro
        const result = await executeObsidianCommand('read', cmdArgs);
        handleCLIResult(result, { operation: 'read_note', identifier: sanitized.file || sanitized.path });
        // Detect binary content — non-text files (ZIP, PDF raw bytes, images, etc.)
        // will contain null bytes or a high proportion of non-printable characters.
        const raw = result.stdout;
        if (isBinaryContent(raw)) {
          const identifier = sanitized.file || sanitized.path;
          return {
            content: [
              {
                type: 'text',
                text: `Cannot read "${identifier}": file appears to be binary (e.g. ZIP, image, compiled file). obsidian_read_note only supports text-based files such as Markdown, plain text, and PDF notes exported as text.`,
              },
            ],
          };
        }
        const offset: number = validated.offset ?? 0;
        const maxChars: number = validated.max_chars ?? 50000;
-        const fullContent = result.stdout;
+        const totalChars = raw.length;
-        const totalChars = fullContent.length;
+        const chunk = raw.slice(offset, offset + maxChars);
        const chunk = fullContent.slice(offset, offset + maxChars);
        const isTruncated = offset + maxChars < totalChars;
        let text = chunk;