navidrome/persistence/folder_repository.go
Deluan Quintão 28d5299ffc
feat(scanner): implement selective folder scanning and file system watcher improvements (#4674)
* feat: Add selective folder scanning capability

Implement targeted scanning of specific library/folder pairs without
full recursion. This enables efficient rescanning of individual folders
when changes are detected, significantly reducing scan time for large
libraries.

Key changes:
- Add ScanTarget struct and ScanFolders API to Scanner interface
- Implement CLI flag --targets for specifying libraryID:folderPath pairs
- Add FolderRepository.GetByPaths() for batch folder info retrieval
- Create loadSpecificFolders() for non-recursive directory loading
- Scope GC operations to affected libraries only (with TODO for full impl)
- Add comprehensive tests for selective scanning behavior

The selective scan:
- Only processes specified folders (no subdirectory recursion)
- Maintains library isolation
- Runs full maintenance pipeline scoped to affected libraries
- Supports both full and quick scan modes

Examples:
  navidrome scan --targets "1:Music/Rock,1:Music/Jazz"
  navidrome scan --full --targets "2:Classical"

* feat(folder): replace GetByPaths with GetFolderUpdateInfo for improved folder updates retrieval

Signed-off-by: Deluan <deluan@navidrome.org>

* test: update parseTargets test to handle folder names with spaces

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor(folder): remove unused LibraryPath struct and update GC logging message

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor(folder): enhance external scanner to support target-specific scanning

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor(scanner): simplify scanner methods

Signed-off-by: Deluan <deluan@navidrome.org>

* feat(watcher): implement folder scanning notifications with deduplication

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor(watcher): add resolveFolderPath function for testability

Signed-off-by: Deluan <deluan@navidrome.org>

* feat(watcher): implement path ignoring based on .ndignore patterns

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor(scanner): implement IgnoreChecker for managing .ndignore patterns

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor(ignore_checker): rename scanner to lineScanner for clarity

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor(scanner): enhance ScanTarget struct with String method for better target representation

Signed-off-by: Deluan <deluan@navidrome.org>

* fix(scanner): validate library ID to prevent negative values

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor(scanner): simplify GC method by removing library ID parameter

Signed-off-by: Deluan <deluan@navidrome.org>

* feat(scanner): update folder scanning to include all descendants of specified folders

Signed-off-by: Deluan <deluan@navidrome.org>

* feat(subsonic): allow selective scan in the /startScan endpoint

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor(scanner): update CallScan to handle specific library/folder pairs

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor(scanner): streamline scanning logic by removing scanAll method

Signed-off-by: Deluan <deluan@navidrome.org>

* test: enhance mockScanner for thread safety and improve test reliability

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor(scanner): move scanner.ScanTarget to model.ScanTarget

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor: move scanner types to model,implement MockScanner

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor(scanner): update scanner interface and implementations to use model.Scanner

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor(folder_repository): normalize target path handling by using filepath.Clean

Signed-off-by: Deluan <deluan@navidrome.org>

* test(folder_repository): add comprehensive tests for folder retrieval and child exclusion

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor(scanner): simplify selective scan logic using slice.Filter

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor(scanner): streamline phase folder and album creation by removing unnecessary library parameter

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor(scanner): move initialization logic from phase_1 to the scanner itself

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor(tests): rename selective scan test file to scanner_selective_test.go

Signed-off-by: Deluan <deluan@navidrome.org>

* feat(configuration): add DevSelectiveWatcher configuration option

Signed-off-by: Deluan <deluan@navidrome.org>

* feat(watcher): enhance .ndignore handling for folder deletions and file changes

Signed-off-by: Deluan <deluan@navidrome.org>

* docs(scanner): comments

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor(scanner): enhance walkDirTree to support target folder scanning

Signed-off-by: Deluan <deluan@navidrome.org>

* fix(scanner, watcher): handle errors when pushing ignore patterns for folders

Signed-off-by: Deluan <deluan@navidrome.org>

* Update scanner/phase_1_folders.go

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

* refactor(scanner): replace parseTargets function with direct call to scanner.ParseTargets

Signed-off-by: Deluan <deluan@navidrome.org>

* test(scanner): add tests for ScanBegin and ScanEnd functionality

Signed-off-by: Deluan <deluan@navidrome.org>

* fix(library): update PRAGMA optimize to check table sizes without ANALYZE

Signed-off-by: Deluan <deluan@navidrome.org>

* test(scanner): refactor tests

Signed-off-by: Deluan <deluan@navidrome.org>

* feat(ui): add selective scan options and update translations

Signed-off-by: Deluan <deluan@navidrome.org>

* feat(ui): add quick and full scan options for individual libraries

Signed-off-by: Deluan <deluan@navidrome.org>

* feat(ui): add Scan buttonsto the LibraryList

Signed-off-by: Deluan <deluan@navidrome.org>

* feat(scan): update scanning parameters from 'path' to 'target' for selective scans.

* refactor(scan): move ParseTargets function to model package

* test(scan): suppress unused return value from SetUserLibraries in tests

* feat(gc): enhance garbage collection to support selective library purging

Signed-off-by: Deluan <deluan@navidrome.org>

* fix(scanner): prevent race condition when scanning deleted folders

When the watcher detects changes in a folder that gets deleted before
the scanner runs (due to the 10-second delay), the scanner was
prematurely removing these folders from the tracking map, preventing
them from being marked as missing.

The issue occurred because `newFolderEntry` was calling `popLastUpdate`
before verifying the folder actually exists on the filesystem.

Changes:
- Move fs.Stat check before newFolderEntry creation in loadDir to
  ensure deleted folders remain in lastUpdates for finalize() to handle
- Add early existence check in walkDirTree to skip non-existent target
  folders with a warning log
- Add unit test verifying non-existent folders aren't removed from
  lastUpdates prematurely
- Add integration test for deleted folder scenario with ScanFolders

Fixes the issue where deleting entire folders (e.g., /music/AC_DC)
wouldn't mark tracks as missing when using selective folder scanning.

* refactor(scan): streamline folder entry creation and update handling

Signed-off-by: Deluan <deluan@navidrome.org>

* feat(scan): add '@Recycle' (QNAP) to ignored directories list

Signed-off-by: Deluan <deluan@navidrome.org>

* fix(log): improve thread safety in logging level management

* test(scan): move unit tests for ParseTargets function

Signed-off-by: Deluan <deluan@navidrome.org>

* review

Signed-off-by: Deluan <deluan@navidrome.org>

---------

Signed-off-by: Deluan <deluan@navidrome.org>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Co-authored-by: deluan <deluan.quintao@mechanical-orchard.com>
2025-11-14 22:15:43 -05:00

217 lines
6.0 KiB
Go

package persistence
import (
"context"
"encoding/json"
"fmt"
"os"
"path/filepath"
"slices"
"strings"
"time"
. "github.com/Masterminds/squirrel"
"github.com/navidrome/navidrome/log"
"github.com/navidrome/navidrome/model"
"github.com/navidrome/navidrome/utils/slice"
"github.com/pocketbase/dbx"
)
type folderRepository struct {
sqlRepository
}
type dbFolder struct {
*model.Folder `structs:",flatten"`
ImageFiles string `structs:"-" json:"-"`
}
func (f *dbFolder) PostScan() error {
var err error
if f.ImageFiles != "" {
if err = json.Unmarshal([]byte(f.ImageFiles), &f.Folder.ImageFiles); err != nil {
return fmt.Errorf("parsing folder image files from db: %w", err)
}
}
return nil
}
func (f *dbFolder) PostMapArgs(args map[string]any) error {
if f.Folder.ImageFiles == nil {
args["image_files"] = "[]"
} else {
imgFiles, err := json.Marshal(f.Folder.ImageFiles)
if err != nil {
return fmt.Errorf("marshalling image files: %w", err)
}
args["image_files"] = string(imgFiles)
}
return nil
}
type dbFolders []dbFolder
func (fs dbFolders) toModels() []model.Folder {
return slice.Map(fs, func(f dbFolder) model.Folder { return *f.Folder })
}
func newFolderRepository(ctx context.Context, db dbx.Builder) model.FolderRepository {
r := &folderRepository{}
r.ctx = ctx
r.db = db
r.tableName = "folder"
return r
}
func (r folderRepository) selectFolder(options ...model.QueryOptions) SelectBuilder {
sql := r.newSelect(options...).Columns("folder.*", "library.path as library_path").
Join("library on library.id = folder.library_id")
return r.applyLibraryFilter(sql)
}
func (r folderRepository) Get(id string) (*model.Folder, error) {
sq := r.selectFolder().Where(Eq{"folder.id": id})
var res dbFolder
err := r.queryOne(sq, &res)
return res.Folder, err
}
func (r folderRepository) GetByPath(lib model.Library, path string) (*model.Folder, error) {
id := model.NewFolder(lib, path).ID
return r.Get(id)
}
func (r folderRepository) GetAll(opt ...model.QueryOptions) ([]model.Folder, error) {
sq := r.selectFolder(opt...)
var res dbFolders
err := r.queryAll(sq, &res)
return res.toModels(), err
}
func (r folderRepository) CountAll(opt ...model.QueryOptions) (int64, error) {
query := r.newSelect(opt...).Columns("count(*)")
query = r.applyLibraryFilter(query)
return r.count(query)
}
func (r folderRepository) GetFolderUpdateInfo(lib model.Library, targetPaths ...string) (map[string]model.FolderUpdateInfo, error) {
where := And{
Eq{"library_id": lib.ID},
Eq{"missing": false},
}
// If specific paths are requested, include those folders and all their descendants
if len(targetPaths) > 0 {
// Collect folder IDs for exact target folders and path conditions for descendants
folderIDs := make([]string, 0, len(targetPaths))
pathConditions := make(Or, 0, len(targetPaths)*2)
for _, targetPath := range targetPaths {
if targetPath == "" || targetPath == "." {
// Root path - include everything in this library
pathConditions = Or{}
folderIDs = nil
break
}
// Clean the path to normalize it. Paths stored in the folder table do not have leading/trailing slashes.
cleanPath := strings.TrimPrefix(targetPath, string(os.PathSeparator))
cleanPath = filepath.Clean(cleanPath)
// Include the target folder itself by ID
folderIDs = append(folderIDs, model.FolderID(lib, cleanPath))
// Include all descendants: folders whose path field equals or starts with the target path
// Note: Folder.Path is the directory path, so children have path = targetPath
pathConditions = append(pathConditions, Eq{"path": cleanPath})
pathConditions = append(pathConditions, Like{"path": cleanPath + "/%"})
}
// Combine conditions: exact folder IDs OR descendant path patterns
if len(folderIDs) > 0 {
where = append(where, Or{Eq{"id": folderIDs}, pathConditions})
} else if len(pathConditions) > 0 {
where = append(where, pathConditions)
}
}
sq := r.newSelect().Columns("id", "updated_at", "hash").Where(where)
var res []struct {
ID string
UpdatedAt time.Time
Hash string
}
err := r.queryAll(sq, &res)
if err != nil {
return nil, err
}
m := make(map[string]model.FolderUpdateInfo, len(res))
for _, f := range res {
m[f.ID] = model.FolderUpdateInfo{UpdatedAt: f.UpdatedAt, Hash: f.Hash}
}
return m, nil
}
func (r folderRepository) Put(f *model.Folder) error {
dbf := dbFolder{Folder: f}
_, err := r.put(dbf.ID, &dbf)
return err
}
func (r folderRepository) MarkMissing(missing bool, ids ...string) error {
log.Debug(r.ctx, "Marking folders as missing", "ids", ids, "missing", missing)
for chunk := range slices.Chunk(ids, 200) {
sq := Update(r.tableName).
Set("missing", missing).
Set("updated_at", time.Now()).
Where(Eq{"id": chunk})
_, err := r.executeSQL(sq)
if err != nil {
return err
}
}
return nil
}
func (r folderRepository) GetTouchedWithPlaylists() (model.FolderCursor, error) {
query := r.selectFolder().Where(And{
Eq{"missing": false},
Gt{"num_playlists": 0},
ConcatExpr("folder.updated_at > library.last_scan_at"),
})
cursor, err := queryWithStableResults[dbFolder](r.sqlRepository, query)
if err != nil {
return nil, err
}
return func(yield func(model.Folder, error) bool) {
for f, err := range cursor {
if !yield(*f.Folder, err) || err != nil {
return
}
}
}, nil
}
func (r folderRepository) purgeEmpty(libraryIDs ...int) error {
sq := Delete(r.tableName).Where(And{
Eq{"num_audio_files": 0},
Eq{"num_playlists": 0},
Eq{"image_files": "[]"},
ConcatExpr("id not in (select parent_id from folder)"),
ConcatExpr("id not in (select folder_id from media_file)"),
})
// If libraryIDs are specified, only purge folders from those libraries
if len(libraryIDs) > 0 {
sq = sq.Where(Eq{"library_id": libraryIDs})
}
c, err := r.executeSQL(sq)
if err != nil {
return fmt.Errorf("purging empty folders: %w", err)
}
if c > 0 {
log.Debug(r.ctx, "Purging empty folders", "totalDeleted", c)
}
return nil
}
var _ model.FolderRepository = (*folderRepository)(nil)