fix: detect and reject binary files in obsidian_read_note fixes #9
When obsidian_read_note read a binary file (ZIP, image, compiled binary, etc.) the CLI returned raw bytes that were corrupted by text decoding, producing an unusable response. Added isBinaryContent() helper that checks for null bytes (definitive binary marker) and a >10% ratio of non-printable characters in the first 8KB of content. When binary content is detected the tool returns a clear error message instead of garbled bytes. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
@@ -19,6 +19,28 @@ import {
|
|||||||
import { sanitizeParameters } from '../validation/sanitizer.js';
|
import { sanitizeParameters } from '../validation/sanitizer.js';
|
||||||
import { formatParam } from '../utils/cli-helpers.js';
|
import { formatParam } from '../utils/cli-helpers.js';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Detect binary content that cannot be meaningfully returned as text.
|
||||||
|
* Checks for null bytes (definitive binary marker) or a high ratio of
|
||||||
|
* non-printable characters (catches ZIP, images, compiled files, etc.).
|
||||||
|
*/
|
||||||
|
function isBinaryContent(content: string): boolean {
|
||||||
|
if (content.length === 0) return false;
|
||||||
|
// Null bytes are never present in valid UTF-8 text
|
||||||
|
if (content.includes('\0')) return true;
|
||||||
|
// Sample the first 8KB for performance on large files
|
||||||
|
const sample = content.slice(0, 8192);
|
||||||
|
let nonPrintable = 0;
|
||||||
|
for (let i = 0; i < sample.length; i++) {
|
||||||
|
const code = sample.charCodeAt(i);
|
||||||
|
// Allow tab (9), newline (10), carriage return (13), and standard printable range
|
||||||
|
if (code !== 9 && code !== 10 && code !== 13 && (code < 32 || code > 126) && (code < 128)) {
|
||||||
|
nonPrintable++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nonPrintable / sample.length > 0.1;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Register all file operation tools
|
* Register all file operation tools
|
||||||
*/
|
*/
|
||||||
@@ -186,11 +208,25 @@ export async function registerFileOperationTools(server: ObsidianMCPServer): Pro
|
|||||||
const result = await executeObsidianCommand('read', cmdArgs);
|
const result = await executeObsidianCommand('read', cmdArgs);
|
||||||
handleCLIResult(result, { operation: 'read_note', identifier: sanitized.file || sanitized.path });
|
handleCLIResult(result, { operation: 'read_note', identifier: sanitized.file || sanitized.path });
|
||||||
|
|
||||||
|
// Detect binary content — non-text files (ZIP, PDF raw bytes, images, etc.)
|
||||||
|
// will contain null bytes or a high proportion of non-printable characters.
|
||||||
|
const raw = result.stdout;
|
||||||
|
if (isBinaryContent(raw)) {
|
||||||
|
const identifier = sanitized.file || sanitized.path;
|
||||||
|
return {
|
||||||
|
content: [
|
||||||
|
{
|
||||||
|
type: 'text',
|
||||||
|
text: `Cannot read "${identifier}": file appears to be binary (e.g. ZIP, image, compiled file). obsidian_read_note only supports text-based files such as Markdown, plain text, and PDF notes exported as text.`,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
const offset: number = validated.offset ?? 0;
|
const offset: number = validated.offset ?? 0;
|
||||||
const maxChars: number = validated.max_chars ?? 50000;
|
const maxChars: number = validated.max_chars ?? 50000;
|
||||||
const fullContent = result.stdout;
|
const totalChars = raw.length;
|
||||||
const totalChars = fullContent.length;
|
const chunk = raw.slice(offset, offset + maxChars);
|
||||||
const chunk = fullContent.slice(offset, offset + maxChars);
|
|
||||||
const isTruncated = offset + maxChars < totalChars;
|
const isTruncated = offset + maxChars < totalChars;
|
||||||
|
|
||||||
let text = chunk;
|
let text = chunk;
|
||||||
|
|||||||
Reference in New Issue
Block a user