From dc777e3be0ca183ad9d2d1a9799e8e859cd0a8ba Mon Sep 17 00:00:00 2001 From: Ning Date: Fri, 6 Mar 2026 14:24:24 +0800 Subject: [PATCH] fix(store): handle emoji-only filenames in handelize (#302) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Convert emoji codepoints to hex representation (e.g. 🐘 → 1f418) instead of crashing, so files like 🐘.md can be indexed without halting the entire update process. Fixes #302 --- src/store.ts | 15 ++++++++++++++- test/store.helpers.unit.test.ts | 13 +++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/src/store.ts b/src/store.ts index ea07b3c..ef37a06 100644 --- a/src/store.ts +++ b/src/store.ts @@ -958,16 +958,26 @@ export function getDocid(hash: string): string { * - Preserve folder structure (a/b/c/d.md stays structured) * - Preserve file extension */ +/** Replace emoji/symbol codepoints with their hex representation (e.g. 🐘 → 1f418) */ +function emojiToHex(str: string): string { + return str.replace(/(?:\p{So}\p{Mn}?|\p{Sk})+/gu, (run) => { + // Split the run into individual emoji and convert each to hex, dash-separated + return [...run].filter(c => /\p{So}|\p{Sk}/u.test(c)) + .map(c => c.codePointAt(0)!.toString(16)).join('-'); + }); +} + export function handelize(path: string): string { if (!path || path.trim() === '') { throw new Error('handelize: path cannot be empty'); } // Allow route-style "$" filenames while still rejecting paths with no usable content. + // Emoji (\p{So}) counts as valid content — they get converted to hex codepoints below. const segments = path.split('/').filter(Boolean); const lastSegment = segments[segments.length - 1] || ''; const filenameWithoutExt = lastSegment.replace(/\.[^.]+$/, ''); - const hasValidContent = /[\p{L}\p{N}$]/u.test(filenameWithoutExt); + const hasValidContent = /[\p{L}\p{N}\p{So}\p{Sk}$]/u.test(filenameWithoutExt); if (!hasValidContent) { throw new Error(`handelize: path "${path}" has no valid filename content`); } @@ -979,6 +989,9 @@ export function handelize(path: string): string { .map((segment, idx, arr) => { const isLastSegment = idx === arr.length - 1; + // Convert emoji to hex codepoints before cleaning + segment = emojiToHex(segment); + if (isLastSegment) { // For the filename (last segment), preserve the extension const extMatch = segment.match(/(\.[a-z0-9]+)$/i); diff --git a/test/store.helpers.unit.test.ts b/test/store.helpers.unit.test.ts index 3303187..4fd1414 100644 --- a/test/store.helpers.unit.test.ts +++ b/test/store.helpers.unit.test.ts @@ -137,6 +137,19 @@ describe("handelize", () => { expect(handelize("日本語-notes.md")).toBe("日本語-notes.md"); }); + test("handles emoji filenames (issue #302)", () => { + // Emoji-only filenames should convert to hex codepoints + expect(handelize("🐘.md")).toBe("1f418.md"); + expect(handelize("🎉.md")).toBe("1f389.md"); + // Emoji mixed with text + expect(handelize("notes 🐘.md")).toBe("notes-1f418.md"); + expect(handelize("🐘 elephant.md")).toBe("1f418-elephant.md"); + // Multiple emojis + expect(handelize("🐘🎉.md")).toBe("1f418-1f389.md"); + // Emoji in directory names + expect(handelize("🐘/notes.md")).toBe("1f418/notes.md"); + }); + test("handles dates and times in filenames", () => { expect(handelize("meeting-2025-01-15.md")).toBe("meeting-2025-01-15.md"); expect(handelize("notes 2025/01/15.md")).toBe("notes-2025/01/15.md");