fix(store): handle emoji-only filenames in handelize (#302)

Convert emoji codepoints to hex representation (e.g. 🐘 → 1f418) instead
of crashing, so files like 🐘.md can be indexed without halting the
entire update process.

Fixes #302
This commit is contained in:
Ning 2026-03-06 14:24:24 +08:00
parent 40610c3aa6
commit dc777e3be0
No known key found for this signature in database
GPG Key ID: 667900F0E65B2259
2 changed files with 27 additions and 1 deletions

View File

@ -958,16 +958,26 @@ export function getDocid(hash: string): string {
* - Preserve folder structure (a/b/c/d.md stays structured)
* - Preserve file extension
*/
/** Replace emoji/symbol codepoints with their hex representation (e.g. 🐘 → 1f418) */
function emojiToHex(str: string): string {
return str.replace(/(?:\p{So}\p{Mn}?|\p{Sk})+/gu, (run) => {
// Split the run into individual emoji and convert each to hex, dash-separated
return [...run].filter(c => /\p{So}|\p{Sk}/u.test(c))
.map(c => c.codePointAt(0)!.toString(16)).join('-');
});
}
export function handelize(path: string): string {
if (!path || path.trim() === '') {
throw new Error('handelize: path cannot be empty');
}
// Allow route-style "$" filenames while still rejecting paths with no usable content.
// Emoji (\p{So}) counts as valid content — they get converted to hex codepoints below.
const segments = path.split('/').filter(Boolean);
const lastSegment = segments[segments.length - 1] || '';
const filenameWithoutExt = lastSegment.replace(/\.[^.]+$/, '');
const hasValidContent = /[\p{L}\p{N}$]/u.test(filenameWithoutExt);
const hasValidContent = /[\p{L}\p{N}\p{So}\p{Sk}$]/u.test(filenameWithoutExt);
if (!hasValidContent) {
throw new Error(`handelize: path "${path}" has no valid filename content`);
}
@ -979,6 +989,9 @@ export function handelize(path: string): string {
.map((segment, idx, arr) => {
const isLastSegment = idx === arr.length - 1;
// Convert emoji to hex codepoints before cleaning
segment = emojiToHex(segment);
if (isLastSegment) {
// For the filename (last segment), preserve the extension
const extMatch = segment.match(/(\.[a-z0-9]+)$/i);

View File

@ -137,6 +137,19 @@ describe("handelize", () => {
expect(handelize("日本語-notes.md")).toBe("日本語-notes.md");
});
test("handles emoji filenames (issue #302)", () => {
// Emoji-only filenames should convert to hex codepoints
expect(handelize("🐘.md")).toBe("1f418.md");
expect(handelize("🎉.md")).toBe("1f389.md");
// Emoji mixed with text
expect(handelize("notes 🐘.md")).toBe("notes-1f418.md");
expect(handelize("🐘 elephant.md")).toBe("1f418-elephant.md");
// Multiple emojis
expect(handelize("🐘🎉.md")).toBe("1f418-1f389.md");
// Emoji in directory names
expect(handelize("🐘/notes.md")).toBe("1f418/notes.md");
});
test("handles dates and times in filenames", () => {
expect(handelize("meeting-2025-01-15.md")).toBe("meeting-2025-01-15.md");
expect(handelize("notes 2025/01/15.md")).toBe("notes-2025/01/15.md");