fix: detect and reject binary files in obsidian_read_note fixes #9

When obsidian_read_note read a binary file (ZIP, image, compiled
binary, etc.) the CLI returned raw bytes that were corrupted by text
decoding, producing an unusable response.

Added isBinaryContent() helper that checks for null bytes (definitive
binary marker) and a >10% ratio of non-printable characters in the
first 8KB of content. When binary content is detected the tool returns
a clear error message instead of garbled bytes.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
2026-04-30 18:39:32 -05:00
parent 8bc0094604
commit 4067520cd8

View File

@@ -19,6 +19,28 @@ import {
import { sanitizeParameters } from '../validation/sanitizer.js'; import { sanitizeParameters } from '../validation/sanitizer.js';
import { formatParam } from '../utils/cli-helpers.js'; import { formatParam } from '../utils/cli-helpers.js';
/**
* Detect binary content that cannot be meaningfully returned as text.
* Checks for null bytes (definitive binary marker) or a high ratio of
* non-printable characters (catches ZIP, images, compiled files, etc.).
*/
function isBinaryContent(content: string): boolean {
if (content.length === 0) return false;
// Null bytes are never present in valid UTF-8 text
if (content.includes('\0')) return true;
// Sample the first 8KB for performance on large files
const sample = content.slice(0, 8192);
let nonPrintable = 0;
for (let i = 0; i < sample.length; i++) {
const code = sample.charCodeAt(i);
// Allow tab (9), newline (10), carriage return (13), and standard printable range
if (code !== 9 && code !== 10 && code !== 13 && (code < 32 || code > 126) && (code < 128)) {
nonPrintable++;
}
}
return nonPrintable / sample.length > 0.1;
}
/** /**
* Register all file operation tools * Register all file operation tools
*/ */
@@ -186,11 +208,25 @@ export async function registerFileOperationTools(server: ObsidianMCPServer): Pro
const result = await executeObsidianCommand('read', cmdArgs); const result = await executeObsidianCommand('read', cmdArgs);
handleCLIResult(result, { operation: 'read_note', identifier: sanitized.file || sanitized.path }); handleCLIResult(result, { operation: 'read_note', identifier: sanitized.file || sanitized.path });
// Detect binary content — non-text files (ZIP, PDF raw bytes, images, etc.)
// will contain null bytes or a high proportion of non-printable characters.
const raw = result.stdout;
if (isBinaryContent(raw)) {
const identifier = sanitized.file || sanitized.path;
return {
content: [
{
type: 'text',
text: `Cannot read "${identifier}": file appears to be binary (e.g. ZIP, image, compiled file). obsidian_read_note only supports text-based files such as Markdown, plain text, and PDF notes exported as text.`,
},
],
};
}
const offset: number = validated.offset ?? 0; const offset: number = validated.offset ?? 0;
const maxChars: number = validated.max_chars ?? 50000; const maxChars: number = validated.max_chars ?? 50000;
const fullContent = result.stdout; const totalChars = raw.length;
const totalChars = fullContent.length; const chunk = raw.slice(offset, offset + maxChars);
const chunk = fullContent.slice(offset, offset + maxChars);
const isTruncated = offset + maxChars < totalChars; const isTruncated = offset + maxChars < totalChars;
let text = chunk; let text = chunk;