diff --git a/Makefile b/Makefile index 1cfc86aa6..f29c27981 100644 --- a/Makefile +++ b/Makefile @@ -36,8 +36,9 @@ watch: ##@Development Start Go tests in watch mode (re-run when code changes) go tool ginkgo watch -tags=netgo -notify ./... .PHONY: watch +PKG ?= ./... test: ##@Development Run Go tests - go test -tags netgo ./... + go test -tags netgo $(PKG) .PHONY: test testrace: ##@Development Run Go tests with race detector diff --git a/conf/configuration.go b/conf/configuration.go index d15b0de3e..1223ffedf 100644 --- a/conf/configuration.go +++ b/conf/configuration.go @@ -132,6 +132,7 @@ type scannerOptions struct { ArtistJoiner string GenreSeparators string // Deprecated: Use Tags.genre.Split instead GroupAlbumReleases bool // Deprecated: Use PID.Album instead + FollowSymlinks bool // Whether to follow symlinks when scanning directories } type subsonicOptions struct { @@ -499,6 +500,7 @@ func init() { viper.SetDefault("scanner.artistjoiner", consts.ArtistJoiner) viper.SetDefault("scanner.genreseparators", "") viper.SetDefault("scanner.groupalbumreleases", false) + viper.SetDefault("scanner.followsymlinks", true) viper.SetDefault("subsonic.appendsubtitle", true) viper.SetDefault("subsonic.artistparticipations", false) diff --git a/persistence/album_repository.go b/persistence/album_repository.go index be2af3ee3..3f238ee23 100644 --- a/persistence/album_repository.go +++ b/persistence/album_repository.go @@ -315,7 +315,7 @@ func (r *albumRepository) GetTouchedAlbums(libID int) (model.AlbumCursor, error) // RefreshPlayCounts updates the play count and last play date annotations for all albums, based // on the media files associated with them. func (r *albumRepository) RefreshPlayCounts() (int64, error) { - query := rawSQL(` + query := Expr(` with play_counts as ( select user_id, album_id, sum(play_count) as total_play_count, max(play_date) as last_play_date from media_file diff --git a/persistence/artist_repository.go b/persistence/artist_repository.go index 7602be381..ecb8f8bf6 100644 --- a/persistence/artist_repository.go +++ b/persistence/artist_repository.go @@ -239,7 +239,7 @@ func (r *artistRepository) purgeEmpty() error { // RefreshPlayCounts updates the play count and last play date annotations for all artists, based // on the media files associated with them. func (r *artistRepository) RefreshPlayCounts() (int64, error) { - query := rawSQL(` + query := Expr(` with play_counts as ( select user_id, atom as artist_id, sum(play_count) as total_play_count, max(play_date) as last_play_date from media_file @@ -259,76 +259,123 @@ on conflict (user_id, item_id, item_type) do update return r.executeSQL(query) } -// RefreshStats updates the stats field for all artists, based on the media files associated with them. -// BFR Maybe filter by "touched" artists? +// RefreshStats updates the stats field for artists whose associated media files were updated after the oldest recorded library scan time. +// It processes artists in batches to handle potentially large updates. func (r *artistRepository) RefreshStats() (int64, error) { - // First get all counters, one query groups by artist/role, and another with totals per artist. - // Union both queries and group by artist to get a single row of counters per artist/role. - // Then format the counters in a JSON object, one key for each role. - // Finally update the artist table with the new counters - // In all queries, atom is the artist ID and path is the role (or "total" for the totals) - query := rawSQL(` --- CTE to get counters for each artist, grouped by role -with artist_role_counters as ( - -- Get counters for each artist, grouped by role - -- (remove the index from the role: composer[0] => composer - select atom as artist_id, - substr( - replace(jt.path, '$.', ''), - 1, - case when instr(replace(jt.path, '$.', ''), '[') > 0 - then instr(replace(jt.path, '$.', ''), '[') - 1 - else length(replace(jt.path, '$.', '')) - end - ) as role, - count(distinct album_id) as album_count, - count(mf.id) as count, - sum(size) as size - from media_file mf - left join json_tree(participants) jt - where atom is not null and key = 'id' - group by atom, role -), + touchedArtistsQuerySQL := ` + SELECT DISTINCT mfa.artist_id + FROM media_file_artists mfa + JOIN media_file mf ON mfa.media_file_id = mf.id + WHERE mf.updated_at > (SELECT last_scan_at FROM library ORDER BY last_scan_at ASC LIMIT 1) + ` --- CTE to get the totals for each artist -artist_total_counters as ( - select mfa.artist_id, - 'total' as role, - count(distinct mf.album_id) as album_count, - count(distinct mf.id) as count, - sum(mf.size) as size - from (select artist_id, media_file_id - from main.media_file_artists) as mfa - join main.media_file mf on mfa.media_file_id = mf.id - group by mfa.artist_id -), + var allTouchedArtistIDs []string + if err := r.db.NewQuery(touchedArtistsQuerySQL).Column(&allTouchedArtistIDs); err != nil { + return 0, fmt.Errorf("fetching touched artist IDs: %w", err) + } --- CTE to combine role and total counters -combined_counters as ( - select artist_id, role, album_count, count, size - from artist_role_counters - union - select artist_id, role, album_count, count, size - from artist_total_counters -), + if len(allTouchedArtistIDs) == 0 { + log.Debug(r.ctx, "RefreshStats: No artists to update.") + return 0, nil + } + log.Debug(r.ctx, "RefreshStats: Found artists to update.", "count", len(allTouchedArtistIDs)) --- CTE to format the counters in a JSON object -artist_counters as ( - select artist_id as id, - json_group_object( - replace(role, '"', ''), - json_object('a', album_count, 'm', count, 's', size) - ) as counters - from combined_counters - group by artist_id -) + // Template for the batch update with placeholder markers that we'll replace + batchUpdateStatsSQL := ` + WITH artist_role_counters AS ( + SELECT jt.atom AS artist_id, + substr( + replace(jt.path, '$.', ''), + 1, + CASE WHEN instr(replace(jt.path, '$.', ''), '[') > 0 + THEN instr(replace(jt.path, '$.', ''), '[') - 1 + ELSE length(replace(jt.path, '$.', '')) + END + ) AS role, + count(DISTINCT mf.album_id) AS album_count, + count(mf.id) AS count, + sum(mf.size) AS size + FROM media_file mf + JOIN json_tree(mf.participants) jt ON jt.key = 'id' AND jt.atom IS NOT NULL + WHERE jt.atom IN (ROLE_IDS_PLACEHOLDER) -- Will replace with actual placeholders + GROUP BY jt.atom, role + ), + artist_total_counters AS ( + SELECT mfa.artist_id, + 'total' AS role, + count(DISTINCT mf.album_id) AS album_count, + count(DISTINCT mf.id) AS count, + sum(mf.size) AS size + FROM media_file_artists mfa + JOIN media_file mf ON mfa.media_file_id = mf.id + WHERE mfa.artist_id IN (TOTAL_IDS_PLACEHOLDER) -- Will replace with actual placeholders + GROUP BY mfa.artist_id + ), + combined_counters AS ( + SELECT artist_id, role, album_count, count, size FROM artist_role_counters + UNION + SELECT artist_id, role, album_count, count, size FROM artist_total_counters + ), + artist_counters AS ( + SELECT artist_id AS id, + json_group_object( + replace(role, '"', ''), + json_object('a', album_count, 'm', count, 's', size) + ) AS counters + FROM combined_counters + GROUP BY artist_id + ) + UPDATE artist + SET stats = coalesce((SELECT counters FROM artist_counters ac WHERE ac.id = artist.id), '{}'), + updated_at = datetime(current_timestamp, 'localtime') + WHERE artist.id IN (UPDATE_IDS_PLACEHOLDER) AND artist.id <> '';` // Will replace with actual placeholders --- Update the artist table with the new counters -update artist -set stats = coalesce((select counters from artist_counters where artist_counters.id = artist.id), '{}'), - updated_at = datetime(current_timestamp, 'localtime') -where id <> ''; -- always true, to avoid warnings`) - return r.executeSQL(query) + var totalRowsAffected int64 = 0 + const batchSize = 1000 + + batchCounter := 0 + for artistIDBatch := range slice.CollectChunks(slices.Values(allTouchedArtistIDs), batchSize) { + batchCounter++ + log.Trace(r.ctx, "RefreshStats: Processing batch", "batchNum", batchCounter, "batchSize", len(artistIDBatch)) + + // Create placeholders for each ID in the IN clauses + placeholders := make([]string, len(artistIDBatch)) + for i := range artistIDBatch { + placeholders[i] = "?" + } + // Don't add extra parentheses, the IN clause already expects them in SQL syntax + inClause := strings.Join(placeholders, ",") + + // Replace the placeholder markers with actual SQL placeholders + batchSQL := strings.Replace(batchUpdateStatsSQL, "ROLE_IDS_PLACEHOLDER", inClause, 1) + batchSQL = strings.Replace(batchSQL, "TOTAL_IDS_PLACEHOLDER", inClause, 1) + batchSQL = strings.Replace(batchSQL, "UPDATE_IDS_PLACEHOLDER", inClause, 1) + + // Create a single parameter array with all IDs (repeated 3 times for each IN clause) + // We need to repeat each ID 3 times (once for each IN clause) + var args []interface{} + for _, id := range artistIDBatch { + args = append(args, id) // For ROLE_IDS_PLACEHOLDER + } + for _, id := range artistIDBatch { + args = append(args, id) // For TOTAL_IDS_PLACEHOLDER + } + for _, id := range artistIDBatch { + args = append(args, id) // For UPDATE_IDS_PLACEHOLDER + } + + // Now use Expr with the expanded SQL and all parameters + sqlizer := Expr(batchSQL, args...) + + rowsAffected, err := r.executeSQL(sqlizer) + if err != nil { + return totalRowsAffected, fmt.Errorf("executing batch update for artist stats (batch %d): %w", batchCounter, err) + } + totalRowsAffected += rowsAffected + } + + log.Debug(r.ctx, "RefreshStats: Successfully updated stats.", "totalArtistsProcessed", len(allTouchedArtistIDs), "totalDBRowsAffected", totalRowsAffected) + return totalRowsAffected, nil } func (r *artistRepository) Search(q string, offset int, size int, includeMissing bool) (model.Artists, error) { diff --git a/persistence/helpers.go b/persistence/helpers.go index a1bc85b86..73815ae45 100644 --- a/persistence/helpers.go +++ b/persistence/helpers.go @@ -57,14 +57,6 @@ func toCamelCase(str string) string { }) } -// rawSQL is a string that will be used as is in the SQL query executor -// It does not support arguments -type rawSQL string - -func (r rawSQL) ToSql() (string, []interface{}, error) { - return string(r), nil, nil -} - func Exists(subTable string, cond squirrel.Sqlizer) existsCond { return existsCond{subTable: subTable, cond: cond, not: false} } diff --git a/persistence/library_repository.go b/persistence/library_repository.go index 6fa4f4dea..5ec54b964 100644 --- a/persistence/library_repository.go +++ b/persistence/library_repository.go @@ -136,7 +136,7 @@ func (r *libraryRepository) ScanEnd(id int) error { return err } // https://www.sqlite.org/pragma.html#pragma_optimize - _, err = r.executeSQL(rawSQL("PRAGMA optimize=0x10012;")) + _, err = r.executeSQL(Expr("PRAGMA optimize=0x10012;")) return err } diff --git a/persistence/tag_repository.go b/persistence/tag_repository.go index fcbad6ab3..d63584af0 100644 --- a/persistence/tag_repository.go +++ b/persistence/tag_repository.go @@ -60,7 +60,7 @@ where tag.id = updated_values.id; ` for _, table := range []string{"album", "media_file"} { start := time.Now() - query := rawSQL(fmt.Sprintf(template, table)) + query := Expr(fmt.Sprintf(template, table)) c, err := r.executeSQL(query) log.Debug(r.ctx, "Updated tag counts", "table", table, "elapsed", time.Since(start), "updated", c) if err != nil { diff --git a/scanner/walk_dir_tree.go b/scanner/walk_dir_tree.go index ba87f2628..4f9f26b1b 100644 --- a/scanner/walk_dir_tree.go +++ b/scanner/walk_dir_tree.go @@ -11,6 +11,7 @@ import ( "strings" "time" + "github.com/navidrome/navidrome/conf" "github.com/navidrome/navidrome/consts" "github.com/navidrome/navidrome/core" "github.com/navidrome/navidrome/log" @@ -266,6 +267,10 @@ func isDirOrSymlinkToDir(fsys fs.FS, baseDir string, dirEnt fs.DirEntry) (bool, if dirEnt.Type()&fs.ModeSymlink == 0 { return false, nil } + // If symlinks are disabled, return false for symlinks + if !conf.Server.Scanner.FollowSymlinks { + return false, nil + } // Does this symlink point to a directory? fileInfo, err := fs.Stat(fsys, path.Join(baseDir, dirEnt.Name())) if err != nil { diff --git a/scanner/walk_dir_tree_test.go b/scanner/walk_dir_tree_test.go index 9a21b4a92..c4278ef82 100644 --- a/scanner/walk_dir_tree_test.go +++ b/scanner/walk_dir_tree_test.go @@ -8,6 +8,8 @@ import ( "path/filepath" "testing/fstest" + "github.com/navidrome/navidrome/conf" + "github.com/navidrome/navidrome/conf/configtest" "github.com/navidrome/navidrome/core/storage" "github.com/navidrome/navidrome/model" . "github.com/onsi/ginkgo/v2" @@ -17,8 +19,15 @@ import ( var _ = Describe("walk_dir_tree", func() { Describe("walkDirTree", func() { - var fsys storage.MusicFS + var ( + fsys storage.MusicFS + job *scanJob + ctx context.Context + ) + BeforeEach(func() { + DeferCleanup(configtest.SetupConfig()) + ctx = GinkgoT().Context() fsys = &mockMusicFS{ FS: fstest.MapFS{ "root/a/.ndignore": {Data: []byte("ignored/*")}, @@ -32,21 +41,22 @@ var _ = Describe("walk_dir_tree", func() { "root/d/f1.mp3": {}, "root/d/f2.mp3": {}, "root/d/f3.mp3": {}, + "root/e/original/f1.mp3": {}, + "root/e/symlink": {Mode: fs.ModeSymlink, Data: []byte("root/e/original")}, }, } - }) - - It("walks all directories", func() { - job := &scanJob{ + job = &scanJob{ fs: fsys, lib: model.Library{Path: "/music"}, } - ctx := context.Background() + }) + + // Helper function to call walkDirTree and collect folders from the results channel + getFolders := func() map[string]*folderEntry { results, err := walkDirTree(ctx, job) Expect(err).ToNot(HaveOccurred()) folders := map[string]*folderEntry{} - g := errgroup.Group{} g.Go(func() error { for folder := range results { @@ -55,24 +65,42 @@ var _ = Describe("walk_dir_tree", func() { return nil }) _ = g.Wait() + return folders + } - Expect(folders).To(HaveLen(6)) - Expect(folders["root/a/ignored"].audioFiles).To(BeEmpty()) - Expect(folders["root/a"].audioFiles).To(SatisfyAll( - HaveLen(2), - HaveKey("f1.mp3"), - HaveKey("f2.mp3"), - )) - Expect(folders["root/a"].imageFiles).To(BeEmpty()) - Expect(folders["root/b"].audioFiles).To(BeEmpty()) - Expect(folders["root/b"].imageFiles).To(SatisfyAll( - HaveLen(1), - HaveKey("cover.jpg"), - )) - Expect(folders["root/c"].audioFiles).To(BeEmpty()) - Expect(folders["root/c"].imageFiles).To(BeEmpty()) - Expect(folders).ToNot(HaveKey("root/d")) - }) + DescribeTable("symlink handling", + func(followSymlinks bool, expectedFolderCount int) { + conf.Server.Scanner.FollowSymlinks = followSymlinks + folders := getFolders() + + Expect(folders).To(HaveLen(expectedFolderCount + 2)) // +2 for `.` and `root` + + // Basic folder structure checks + Expect(folders["root/a"].audioFiles).To(SatisfyAll( + HaveLen(2), + HaveKey("f1.mp3"), + HaveKey("f2.mp3"), + )) + Expect(folders["root/a"].imageFiles).To(BeEmpty()) + Expect(folders["root/b"].audioFiles).To(BeEmpty()) + Expect(folders["root/b"].imageFiles).To(SatisfyAll( + HaveLen(1), + HaveKey("cover.jpg"), + )) + Expect(folders["root/c"].audioFiles).To(BeEmpty()) + Expect(folders["root/c"].imageFiles).To(BeEmpty()) + Expect(folders).ToNot(HaveKey("root/d")) + + // Symlink specific checks + if followSymlinks { + Expect(folders["root/e/symlink"].audioFiles).To(HaveLen(1)) + } else { + Expect(folders).ToNot(HaveKey("root/e/symlink")) + } + }, + Entry("with symlinks enabled", true, 7), + Entry("with symlinks disabled", false, 6), + ) }) Describe("helper functions", func() { @@ -81,74 +109,88 @@ var _ = Describe("walk_dir_tree", func() { baseDir := filepath.Join("tests", "fixtures") Describe("isDirOrSymlinkToDir", func() { - It("returns true for normal dirs", func() { - dirEntry := getDirEntry("tests", "fixtures") - Expect(isDirOrSymlinkToDir(fsys, baseDir, dirEntry)).To(BeTrue()) + BeforeEach(func() { + DeferCleanup(configtest.SetupConfig()) }) - It("returns true for symlinks to dirs", func() { - dirEntry := getDirEntry(baseDir, "symlink2dir") - Expect(isDirOrSymlinkToDir(fsys, baseDir, dirEntry)).To(BeTrue()) - }) - It("returns false for files", func() { - dirEntry := getDirEntry(baseDir, "test.mp3") - Expect(isDirOrSymlinkToDir(fsys, baseDir, dirEntry)).To(BeFalse()) - }) - It("returns false for symlinks to files", func() { - dirEntry := getDirEntry(baseDir, "symlink") - Expect(isDirOrSymlinkToDir(fsys, baseDir, dirEntry)).To(BeFalse()) - }) - }) - Describe("isDirIgnored", func() { - It("returns false for normal dirs", func() { - Expect(isDirIgnored("empty_folder")).To(BeFalse()) - }) - It("returns true when folder name starts with a `.`", func() { - Expect(isDirIgnored(".hidden_folder")).To(BeTrue()) - }) - It("returns false when folder name starts with ellipses", func() { - Expect(isDirIgnored("...unhidden_folder")).To(BeFalse()) - }) - It("returns true when folder name is $Recycle.Bin", func() { - Expect(isDirIgnored("$Recycle.Bin")).To(BeTrue()) - }) - It("returns true when folder name is #snapshot", func() { - Expect(isDirIgnored("#snapshot")).To(BeTrue()) - }) - }) - }) - Describe("fullReadDir", func() { - var fsys fakeFS - var ctx context.Context - BeforeEach(func() { - ctx = context.Background() - fsys = fakeFS{MapFS: fstest.MapFS{ - "root/a/f1": {}, - "root/b/f2": {}, - "root/c/f3": {}, - }} + Context("with symlinks enabled", func() { + BeforeEach(func() { + conf.Server.Scanner.FollowSymlinks = true + }) + + DescribeTable("returns expected result", + func(dirName string, expected bool) { + dirEntry := getDirEntry("tests/fixtures", dirName) + Expect(isDirOrSymlinkToDir(fsys, baseDir, dirEntry)).To(Equal(expected)) + }, + Entry("normal dir", "empty_folder", true), + Entry("symlink to dir", "symlink2dir", true), + Entry("regular file", "test.mp3", false), + Entry("symlink to file", "symlink", false), + ) + }) + + Context("with symlinks disabled", func() { + BeforeEach(func() { + conf.Server.Scanner.FollowSymlinks = false + }) + + DescribeTable("returns expected result", + func(dirName string, expected bool) { + dirEntry := getDirEntry("tests/fixtures", dirName) + Expect(isDirOrSymlinkToDir(fsys, baseDir, dirEntry)).To(Equal(expected)) + }, + Entry("normal dir", "empty_folder", true), + Entry("symlink to dir", "symlink2dir", false), + Entry("regular file", "test.mp3", false), + Entry("symlink to file", "symlink", false), + ) + }) }) - It("reads all entries", func() { - dir, _ := fsys.Open("root") - entries := fullReadDir(ctx, dir.(fs.ReadDirFile)) - Expect(entries).To(HaveLen(3)) - Expect(entries[0].Name()).To(Equal("a")) - Expect(entries[1].Name()).To(Equal("b")) - Expect(entries[2].Name()).To(Equal("c")) + + Describe("isDirIgnored", func() { + DescribeTable("returns expected result", + func(dirName string, expected bool) { + Expect(isDirIgnored(dirName)).To(Equal(expected)) + }, + Entry("normal dir", "empty_folder", false), + Entry("hidden dir", ".hidden_folder", true), + Entry("dir starting with ellipsis", "...unhidden_folder", false), + Entry("recycle bin", "$Recycle.Bin", true), + Entry("snapshot dir", "#snapshot", true), + ) }) - It("skips entries with permission error", func() { - fsys.failOn = "b" - dir, _ := fsys.Open("root") - entries := fullReadDir(ctx, dir.(fs.ReadDirFile)) - Expect(entries).To(HaveLen(2)) - Expect(entries[0].Name()).To(Equal("a")) - Expect(entries[1].Name()).To(Equal("c")) - }) - It("aborts if it keeps getting 'readdirent: no such file or directory'", func() { - fsys.err = fs.ErrNotExist - dir, _ := fsys.Open("root") - entries := fullReadDir(ctx, dir.(fs.ReadDirFile)) - Expect(entries).To(BeEmpty()) + + Describe("fullReadDir", func() { + var ( + fsys fakeFS + ctx context.Context + ) + + BeforeEach(func() { + ctx = GinkgoT().Context() + fsys = fakeFS{MapFS: fstest.MapFS{ + "root/a/f1": {}, + "root/b/f2": {}, + "root/c/f3": {}, + }} + }) + + DescribeTable("reading directory entries", + func(failOn string, expectedErr error, expectedNames []string) { + fsys.failOn = failOn + fsys.err = expectedErr + dir, _ := fsys.Open("root") + entries := fullReadDir(ctx, dir.(fs.ReadDirFile)) + Expect(entries).To(HaveLen(len(expectedNames))) + for i, name := range expectedNames { + Expect(entries[i].Name()).To(Equal(name)) + } + }, + Entry("reads all entries", "", nil, []string{"a", "b", "c"}), + Entry("skips entries with permission error", "b", nil, []string{"a", "c"}), + Entry("aborts on fs.ErrNotExist", "", fs.ErrNotExist, []string{}), + ) }) }) }) @@ -205,11 +247,54 @@ func getDirEntry(baseDir, name string) os.DirEntry { panic(fmt.Sprintf("Could not find %s in %s", name, baseDir)) } +// mockMusicFS is a mock implementation of the MusicFS interface that supports symlinks type mockMusicFS struct { storage.MusicFS fs.FS } +// Open resolves symlinks func (m *mockMusicFS) Open(name string) (fs.File, error) { - return m.FS.Open(name) + f, err := m.FS.Open(name) + if err != nil { + return nil, err + } + + info, err := f.Stat() + if err != nil { + f.Close() + return nil, err + } + + if info.Mode()&fs.ModeSymlink != 0 { + // For symlinks, read the target path from the Data field + target := string(m.FS.(fstest.MapFS)[name].Data) + f.Close() + return m.FS.Open(target) + } + + return f, nil +} + +// Stat uses Open to resolve symlinks +func (m *mockMusicFS) Stat(name string) (fs.FileInfo, error) { + f, err := m.Open(name) + if err != nil { + return nil, err + } + defer f.Close() + return f.Stat() +} + +// ReadDir uses Open to resolve symlinks +func (m *mockMusicFS) ReadDir(name string) ([]fs.DirEntry, error) { + f, err := m.Open(name) + if err != nil { + return nil, err + } + defer f.Close() + if dirFile, ok := f.(fs.ReadDirFile); ok { + return dirFile.ReadDir(-1) + } + return nil, fmt.Errorf("not a directory") }