fix(playlists): better M3U paths matching across different UTF representations (#4890)

* fix: improve playlist path normalization for cross-platform compatibility

Signed-off-by: Deluan <deluan@navidrome.org>

* fix: log normalized path when playlist path is not found

Signed-off-by: Deluan <deluan@navidrome.org>

* test: enhance Unicode normalization tests for playlist paths

Signed-off-by: Deluan <deluan@navidrome.org>

* fix: enhance playlist path normalization for cross-platform compatibility

See https://github.com/navidrome/navidrome/pull/4789#issuecomment-3645724780

Signed-off-by: Deluan <deluan@navidrome.org>

* fix: improve playlist path normalization to handle fullwidth characters and enhance cross-platform compatibility

Signed-off-by: Deluan <deluan@navidrome.org>

* formatting

Signed-off-by: Deluan <deluan@navidrome.org>

* fix: adjust chunk size for M3U parsing to optimize SQLite expression tree depth

Signed-off-by: Deluan <deluan@navidrome.org>

---------

Signed-off-by: Deluan <deluan@navidrome.org>
This commit is contained in:
Deluan Quintão 2026-01-24 12:47:43 -05:00 committed by GitHub
parent c6c1c16923
commit b455546fdf
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 275 additions and 34 deletions

View File

@ -179,7 +179,9 @@ func (s *playlists) parseNSP(_ context.Context, pls *model.Playlist, reader io.R
func (s *playlists) parseM3U(ctx context.Context, pls *model.Playlist, folder *model.Folder, reader io.Reader) error {
mediaFileRepository := s.ds.MediaFile(ctx)
var mfs model.MediaFiles
for lines := range slice.CollectChunks(slice.LinesFrom(reader), 400) {
// Chunk size of 100 lines, as each line can generate up to 4 lookup candidates
// (NFC/NFD × raw/lowercase), and SQLite has a max expression tree depth of 1000.
for lines := range slice.CollectChunks(slice.LinesFrom(reader), 100) {
filteredLines := make([]string, 0, len(lines))
for _, line := range lines {
line := strings.TrimSpace(line)
@ -206,33 +208,66 @@ func (s *playlists) parseM3U(ctx context.Context, pls *model.Playlist, folder *m
continue
}
// Normalize to NFD for filesystem compatibility (macOS). Database stores paths in NFD.
// See https://github.com/navidrome/navidrome/issues/4663
resolvedPaths = slice.Map(resolvedPaths, func(path string) string {
return strings.ToLower(norm.NFD.String(path))
})
// SQLite comparisons do not perform Unicode normalization, and filesystem normalization
// differs across platforms (macOS often yields NFD, while Linux/Windows typically use NFC).
// Generate lookup candidates for both forms so playlist entries match DB paths regardless
// of the original normalization. See https://github.com/navidrome/navidrome/issues/4884
//
// We also include the original (non-lowercased) paths because SQLite's COLLATE NOCASE
// only handles ASCII case-insensitivity. Non-ASCII characters like fullwidth letters
// (e.g., vs ) are not matched case-insensitively by NOCASE.
lookupCandidates := make([]string, 0, len(resolvedPaths)*4)
seen := make(map[string]struct{}, len(resolvedPaths)*4)
for _, path := range resolvedPaths {
// Add original paths first (for exact matching of non-ASCII characters)
nfcRaw := norm.NFC.String(path)
if _, ok := seen[nfcRaw]; !ok {
seen[nfcRaw] = struct{}{}
lookupCandidates = append(lookupCandidates, nfcRaw)
}
nfdRaw := norm.NFD.String(path)
if _, ok := seen[nfdRaw]; !ok {
seen[nfdRaw] = struct{}{}
lookupCandidates = append(lookupCandidates, nfdRaw)
}
found, err := mediaFileRepository.FindByPaths(resolvedPaths)
// Add lowercased paths (for ASCII case-insensitive matching via NOCASE)
nfc := strings.ToLower(nfcRaw)
if _, ok := seen[nfc]; !ok {
seen[nfc] = struct{}{}
lookupCandidates = append(lookupCandidates, nfc)
}
nfd := strings.ToLower(nfdRaw)
if _, ok := seen[nfd]; !ok {
seen[nfd] = struct{}{}
lookupCandidates = append(lookupCandidates, nfd)
}
}
found, err := mediaFileRepository.FindByPaths(lookupCandidates)
if err != nil {
log.Warn(ctx, "Error reading files from DB", "playlist", pls.Name, err)
continue
}
// Build lookup map with library-qualified keys, normalized for comparison
// Build lookup map with library-qualified keys, normalized for comparison.
// Canonicalize to NFC so NFD/NFC become comparable.
existing := make(map[string]int, len(found))
for idx := range found {
// Normalize to lowercase for case-insensitive comparison
// Key format: "libraryID:path"
key := fmt.Sprintf("%d:%s", found[idx].LibraryID, strings.ToLower(found[idx].Path))
key := fmt.Sprintf("%d:%s", found[idx].LibraryID, strings.ToLower(norm.NFC.String(found[idx].Path)))
existing[key] = idx
}
// Find media files in the order of the resolved paths, to keep playlist order
for _, path := range resolvedPaths {
idx, ok := existing[path]
key := strings.ToLower(norm.NFC.String(path))
idx, ok := existing[key]
if ok {
mfs = append(mfs, found[idx])
} else {
log.Warn(ctx, "Path in playlist not found", "playlist", pls.Name, "path", path)
// Prefer logging a composed representation when possible to avoid confusing output
// with decomposed combining marks.
log.Warn(ctx, "Path in playlist not found", "playlist", pls.Name, "path", norm.NFC.String(path))
}
}
}
@ -394,7 +429,20 @@ func (s *playlists) resolvePaths(ctx context.Context, folder *model.Folder, line
func (s *playlists) updatePlaylist(ctx context.Context, newPls *model.Playlist) error {
owner, _ := request.UserFrom(ctx)
// Try to find existing playlist by path. Since filesystem normalization differs across
// platforms (macOS uses NFD, Linux/Windows use NFC), we try both forms to match
// playlists that may have been imported on a different platform.
pls, err := s.ds.Playlist(ctx).FindByPath(newPls.Path)
if errors.Is(err, model.ErrNotFound) {
// Try alternate normalization form
altPath := norm.NFD.String(newPls.Path)
if altPath == newPls.Path {
altPath = norm.NFC.String(newPls.Path)
}
if altPath != newPls.Path {
pls, err = s.ds.Playlist(ctx).FindByPath(altPath)
}
}
if err != nil && !errors.Is(err, model.ErrNotFound) {
return err
}

View File

@ -135,6 +135,55 @@ var _ = Describe("Playlists", func() {
})
})
DescribeTable("Playlist filename Unicode normalization (regression fix-playlist-filename-normalization)",
func(storedForm, filesystemForm string) {
// Use Polish characters that decompose: ó (U+00F3) -> o + combining acute (U+006F + U+0301)
plsNameNFC := "Piosenki_Polskie_zółć" // NFC form (composed)
plsNameNFD := norm.NFD.String(plsNameNFC)
Expect(plsNameNFD).ToNot(Equal(plsNameNFC)) // Verify they differ
nameByForm := map[string]string{"NFC": plsNameNFC, "NFD": plsNameNFD}
storedName := nameByForm[storedForm]
filesystemName := nameByForm[filesystemForm]
tmpDir := GinkgoT().TempDir()
mockLibRepo.SetData([]model.Library{{ID: 1, Path: tmpDir}})
ds.MockedMediaFile = &mockedMediaFileFromListRepo{data: []string{}}
ps = core.NewPlaylists(ds)
// Create the playlist file on disk with the filesystem's normalization form
plsFile := tmpDir + "/" + filesystemName + ".m3u"
Expect(os.WriteFile(plsFile, []byte("#PLAYLIST:Test\n"), 0600)).To(Succeed())
// Pre-populate mock repo with the stored normalization form
storedPath := tmpDir + "/" + storedName + ".m3u"
existingPls := &model.Playlist{
ID: "existing-id",
Name: "Existing Playlist",
Path: storedPath,
Sync: true,
}
mockPlsRepo.data = map[string]*model.Playlist{storedPath: existingPls}
// Import using the filesystem's normalization form
plsFolder := &model.Folder{
ID: "1",
LibraryID: 1,
LibraryPath: tmpDir,
Path: "",
Name: "",
}
pls, err := ps.ImportFile(ctx, plsFolder, filesystemName+".m3u")
Expect(err).ToNot(HaveOccurred())
// Should update existing playlist, not create new one
Expect(pls.ID).To(Equal("existing-id"))
Expect(pls.Name).To(Equal("Existing Playlist"))
},
Entry("finds NFD-stored playlist when filesystem provides NFC path", "NFD", "NFC"),
Entry("finds NFC-stored playlist when filesystem provides NFD path", "NFC", "NFD"),
)
Describe("Cross-library relative paths", func() {
var tmpDir, plsDir, songsDir string
@ -446,23 +495,79 @@ var _ = Describe("Playlists", func() {
Expect(pls.Tracks[0].Path).To(Equal("abc/tEsT1.Mp3"))
})
It("handles Unicode normalization when comparing paths (NFD vs NFC)", func() {
// Simulate macOS filesystem: stores paths in NFD (decomposed) form
// "è" (U+00E8) in NFC becomes "e" + "◌̀" (U+0065 + U+0300) in NFD
nfdPath := "artist/Mich" + string([]rune{'e', '\u0300'}) + "le/song.mp3" // NFD: e + combining grave
repo.data = []string{nfdPath}
// Simulate Apple Music M3U: uses NFC (composed) form
nfcPath := "/music/artist/Mich\u00E8le/song.mp3" // NFC: single è character
m3u := nfcPath + "\n"
// Fullwidth characters (e.g., ) are not handled by SQLite's NOCASE collation,
// so we need exact matching for non-ASCII characters.
It("matches fullwidth characters exactly (SQLite NOCASE limitation)", func() {
// Fullwidth uppercase (U+FF21, U+FF23, U+FF32, U+FF2F, U+FF33, U+FF33)
repo.data = []string{
"plex/02 - .flac",
}
m3u := "/music/plex/02 - .flac\n"
f := strings.NewReader(m3u)
pls, err := ps.ImportM3U(ctx, f)
Expect(err).ToNot(HaveOccurred())
Expect(pls.Tracks).To(HaveLen(1))
// Should match despite different Unicode normalization forms
Expect(pls.Tracks[0].Path).To(Equal(nfdPath))
Expect(pls.Tracks[0].Path).To(Equal("plex/02 - .flac"))
})
// Unicode normalization tests: NFC (composed) vs NFD (decomposed) forms
// macOS stores paths in NFD, Linux/Windows use NFC. Playlists may use either form.
DescribeTable("matches paths across Unicode NFC/NFD normalization",
func(description, pathNFC string, dbForm, playlistForm norm.Form) {
pathNFD := norm.NFD.String(pathNFC)
Expect(pathNFD).ToNot(Equal(pathNFC), "test path should have decomposable characters")
// Set up DB with specified normalization form
var dbPath string
if dbForm == norm.NFC {
dbPath = pathNFC
} else {
dbPath = pathNFD
}
repo.data = []string{dbPath}
// Set up playlist with specified normalization form
var playlistPath string
if playlistForm == norm.NFC {
playlistPath = pathNFC
} else {
playlistPath = pathNFD
}
m3u := "/music/" + playlistPath + "\n"
f := strings.NewReader(m3u)
pls, err := ps.ImportM3U(ctx, f)
Expect(err).ToNot(HaveOccurred())
Expect(pls.Tracks).To(HaveLen(1))
Expect(pls.Tracks[0].Path).To(Equal(dbPath))
},
// French: è (U+00E8) decomposes to e + combining grave (U+0065 + U+0300)
Entry("French diacritics - DB:NFD, playlist:NFC",
"macOS DB with Apple Music playlist",
"artist/Michèle/song.mp3", norm.NFD, norm.NFC),
// Japanese Katakana: ド (U+30C9) decomposes to ト (U+30C8) + combining dakuten (U+3099)
Entry("Japanese Katakana with dakuten - DB:NFC, playlist:NFC (#4884)",
"Linux/Windows DB with NFC playlist",
"artist/\u30a2\u30a4\u30c9\u30eb/\u30c9\u30ea\u30fc\u30e0\u30bd\u30f3\u30b0.mp3", norm.NFC, norm.NFC),
Entry("Japanese Katakana with dakuten - DB:NFD, playlist:NFC (#4884)",
"macOS DB with NFC playlist",
"artist/\u30a2\u30a4\u30c9\u30eb/\u30c9\u30ea\u30fc\u30e0\u30bd\u30f3\u30b0.mp3", norm.NFD, norm.NFC),
// Cyrillic: й (U+0439) decomposes to и (U+0438) + combining breve (U+0306)
Entry("Cyrillic characters - DB:NFD, playlist:NFC (#4791)",
"macOS DB with NFC playlist",
"Жуки/Батарейка/01 - Разлюбила.mp3", norm.NFD, norm.NFC),
// Polish: ó (U+00F3) decomposes to o + combining acute (U+0301)
Entry("Polish diacritics - DB:NFD, playlist:NFC (#4663)",
"macOS DB with NFC playlist",
"Zespół/Człowiek/Piosenka o miłości.mp3", norm.NFD, norm.NFC),
Entry("Polish diacritics - DB:NFC, playlist:NFD",
"Linux/Windows DB with macOS-exported playlist",
"Zespół/Człowiek/Piosenka o miłości.mp3", norm.NFC, norm.NFD),
)
})
Describe("InPlaylistsPath", func() {
@ -563,9 +668,6 @@ func (r *mockedMediaFileFromListRepo) FindByPaths(paths []string) (model.MediaFi
var mfs model.MediaFiles
for idx, dataPath := range r.data {
// Normalize the data path to NFD (simulates macOS filesystem storage)
normalizedDataPath := norm.NFD.String(dataPath)
for _, requestPath := range paths {
// Strip library qualifier if present (format: "libraryID:path")
actualPath := requestPath
@ -577,12 +679,9 @@ func (r *mockedMediaFileFromListRepo) FindByPaths(paths []string) (model.MediaFi
}
}
// The request path should already be normalized to NFD by production code
// before calling FindByPaths (to match DB storage)
normalizedRequestPath := norm.NFD.String(actualPath)
// Case-insensitive comparison (like SQL's "collate nocase")
if strings.EqualFold(normalizedRequestPath, normalizedDataPath) {
// Case-insensitive comparison (like SQL's "collate nocase"), but with no
// implicit Unicode normalization (SQLite does not normalize NFC/NFD).
if strings.EqualFold(actualPath, dataPath) {
mfs = append(mfs, model.MediaFile{
ID: strconv.Itoa(idx),
Path: dataPath, // Return original path from DB
@ -597,10 +696,16 @@ func (r *mockedMediaFileFromListRepo) FindByPaths(paths []string) (model.MediaFi
type mockedPlaylistRepo struct {
last *model.Playlist
data map[string]*model.Playlist // keyed by path
model.PlaylistRepository
}
func (r *mockedPlaylistRepo) FindByPath(string) (*model.Playlist, error) {
func (r *mockedPlaylistRepo) FindByPath(path string) (*model.Playlist, error) {
if r.data != nil {
if pls, ok := r.data[path]; ok {
return pls, nil
}
}
return nil, model.ErrNotFound
}

View File

@ -561,4 +561,92 @@ var _ = Describe("MediaRepository", func() {
})
})
})
Describe("FindByPaths", func() {
// Test fixtures for Unicode and case-sensitivity tests
var testFiles []model.MediaFile
BeforeEach(func() {
testFiles = []model.MediaFile{
{ID: "findpath-1", LibraryID: 1, Path: "artist/Album/track.mp3", Title: "Track"},
{ID: "findpath-2", LibraryID: 1, Path: "artist/Album/UPPER.mp3", Title: "Upper"},
// Fullwidth uppercase: (U+FF21 U+FF23 U+FF32 U+FF2F U+FF33 U+FF33)
{ID: "findpath-3", LibraryID: 1, Path: "plex/02 - .flac", Title: "Fullwidth"},
// French diacritic: è (U+00E8, can decompose to e + combining grave)
{ID: "findpath-4", LibraryID: 1, Path: "artist/Michèle/song.mp3", Title: "French"},
}
for _, mf := range testFiles {
Expect(mr.Put(&mf)).To(Succeed())
}
})
AfterEach(func() {
for _, mf := range testFiles {
_ = mr.Delete(mf.ID)
}
})
It("finds files by exact path", func() {
results, err := mr.FindByPaths([]string{"1:artist/Album/track.mp3"})
Expect(err).ToNot(HaveOccurred())
Expect(results).To(HaveLen(1))
Expect(results[0].ID).To(Equal("findpath-1"))
})
It("finds files case-insensitively for ASCII characters (NOCASE)", func() {
// SQLite's COLLATE NOCASE handles ASCII case-insensitivity
results, err := mr.FindByPaths([]string{"1:ARTIST/ALBUM/TRACK.MP3"})
Expect(err).ToNot(HaveOccurred())
Expect(results).To(HaveLen(1))
Expect(results[0].ID).To(Equal("findpath-1"))
})
It("finds fullwidth characters only with exact case match (SQLite NOCASE limitation)", func() {
// SQLite's NOCASE does NOT handle fullwidth uppercase/lowercase equivalence
// The DB has fullwidth uppercase , searching with exact match should work
results, err := mr.FindByPaths([]string{"1:plex/02 - .flac"})
Expect(err).ToNot(HaveOccurred())
Expect(results).To(HaveLen(1))
Expect(results[0].ID).To(Equal("findpath-3"))
// Searching with fullwidth lowercase should NOT match
// (this is the SQLite limitation that requires exact matching for non-ASCII)
results, err = mr.FindByPaths([]string{"1:plex/02 - .flac"})
Expect(err).ToNot(HaveOccurred())
Expect(results).To(BeEmpty())
})
It("returns multiple files when querying multiple paths", func() {
results, err := mr.FindByPaths([]string{
"1:artist/Album/track.mp3",
"1:artist/Album/UPPER.mp3",
})
Expect(err).ToNot(HaveOccurred())
Expect(results).To(HaveLen(2))
})
It("returns empty slice for non-existent paths", func() {
results, err := mr.FindByPaths([]string{"1:nonexistent/path.mp3"})
Expect(err).ToNot(HaveOccurred())
Expect(results).To(BeEmpty())
})
It("returns empty slice for empty input", func() {
results, err := mr.FindByPaths([]string{})
Expect(err).ToNot(HaveOccurred())
Expect(results).To(BeEmpty())
})
It("handles library-qualified paths correctly", func() {
// Library 1 should find the file
results, err := mr.FindByPaths([]string{"1:artist/Album/track.mp3"})
Expect(err).ToNot(HaveOccurred())
Expect(results).To(HaveLen(1))
// Library 2 should NOT find it (file is in library 1)
results, err = mr.FindByPaths([]string{"2:artist/Album/track.mp3"})
Expect(err).ToNot(HaveOccurred())
Expect(results).To(BeEmpty())
})
})
})