navidrome/persistence/criteria_sql_benchmark_test.go
Deluan Quintão 74185dc6d1
fix(smartplaylists): optimize smart playlist performance for role and tag criteria (#5515)
* fix(server): optimize smart playlist role queries for large criteria (#5511)

Role-based smart playlist criteria (artist, composer, etc.) now query
the indexed media_file_artists join table instead of parsing JSON via
json_tree() on every row. Multiple conditions for the same role within
an OR group are merged into a single EXISTS subquery (batched at 200
to stay under SQLite's expression tree depth limit).

A composite index (media_file_id, role) replaces the now-redundant
single-column (media_file_id) index on media_file_artists.

Benchmark (40k tracks, 500 patterns, 3 artists/track):
- Merged join-table: 15ms  (9.3x faster)
- Merged json_tree:  30ms  (4.6x faster)
- Unmerged baseline: 137ms

* refactor: simplify role condition SQL generation and benchmark

Extract shared roleCondSQL/roleExistsSQL helpers to deduplicate the
EXISTS template between roleCond and roleCondGroup. Use slices.Chunk
for batching per project convention. Extract runBenchQuery helper to
eliminate triplicated benchmark execution loop.

* chore: raise roleCondBatchSize to 350

The empirical SQLite limit is 496 conditions per merged EXISTS
subquery. Raising from 200 to 350 reduces the number of batches
(e.g. 500 patterns now splits into 2 batches instead of 3).

* fix(server): apply OR-merge optimization to tag conditions too

Generalize mergeRoleConds into mergeJsonConds to also collapse multiple
tag conditions for the same tag (e.g. genre) within OR groups. This
gives the same ~5x speedup for tag-heavy smart playlists as the role
optimization gives for artist-heavy ones.

* refactor: benchmark uses real criteria pipeline instead of hand-built SQL

The "Current" sub-benchmark now builds criteria.Criteria expressions and
runs them through the actual newSmartPlaylistCriteria → Where() → ToSql()
pipeline, validating the real production code path. The baseline still
uses hand-built SQL representing the old json_tree approach.

* fix: stabilize merged group ordering and close rows before error check

Sort group keys in mergeJsonConds so the merged additions have
deterministic order across runs, improving SQLite statement cache reuse.
Move rows.Close() before rows.Err() in benchmark helper.
2026-05-22 18:00:13 -03:00

237 lines
6.7 KiB
Go

package persistence
import (
"context"
"encoding/json"
"fmt"
"path/filepath"
"strings"
"testing"
"github.com/Masterminds/squirrel"
"github.com/navidrome/navidrome/conf"
"github.com/navidrome/navidrome/conf/configtest"
"github.com/navidrome/navidrome/db"
"github.com/navidrome/navidrome/log"
"github.com/navidrome/navidrome/model"
"github.com/navidrome/navidrome/model/criteria"
"github.com/navidrome/navidrome/model/request"
"github.com/pocketbase/dbx"
)
const (
benchNumArtists = 1_000
benchNumTracks = 40_000
benchNumPatterns = 500
benchArtistsPerTrack = 3
)
// BenchmarkSmartPlaylistRole compares role-based smart playlist query performance
// between the current implementation (merged join-table via criteria pipeline) and
// the old baseline (unmerged json_tree subqueries).
func BenchmarkSmartPlaylistRole(b *testing.B) {
configtest.SetupConfig()
tmpDir := b.TempDir()
conf.Server.DbPath = filepath.Join(tmpDir, "bench-smartpl.db")
cleanup := db.Init(context.Background())
defer cleanup()
log.SetLevel(log.LevelFatal)
conn := dbx.NewFromDB(db.Db(), db.Dialect)
ctx := log.NewContext(context.Background())
user := model.User{ID: "bench-user", UserName: "bench", Name: "Bench User", IsAdmin: true}
ctx = request.WithUser(ctx, user)
setupBenchData(b, ctx, conn, user)
criteria.AddRoles([]string{"artist"})
// Build the criteria expression: 500 "contains artist" patterns in an OR group
anyExprs := make(criteria.Any, benchNumPatterns)
for i := range benchNumPatterns {
anyExprs[i] = criteria.Contains{"artist": fmt.Sprintf("Artist %04d", i)}
}
expr := criteria.Criteria{Expression: anyExprs, Sort: "title", Limit: 500}
b.Run("Current", func(b *testing.B) {
benchmarkCriteriaPipeline(b, ctx, expr)
})
b.Run("Baseline_UnmergedJSONTree", func(b *testing.B) {
benchmarkUnmergedJSONTree(b, ctx)
})
}
// benchmarkCriteriaPipeline runs the criteria through the actual production code path:
// newSmartPlaylistCriteria → Where() → ToSql(), then executes the resulting query.
func benchmarkCriteriaPipeline(b *testing.B, ctx context.Context, expr criteria.Criteria) {
b.Helper()
cSQL := newSmartPlaylistCriteria(expr)
// Build the full query matching buildSmartPlaylistQuery + addCriteria
sq := squirrel.Select("media_file.id").From("media_file")
cond, err := cSQL.Where()
if err != nil {
b.Fatal(err)
}
sq = sq.Where(cond)
if expr.Limit > 0 {
sq = sq.Limit(uint64(expr.Limit))
}
if order := cSQL.OrderBy(); order != "" {
sq = sq.OrderBy(order)
}
query, args, err := sq.PlaceholderFormat(squirrel.Question).ToSql()
if err != nil {
b.Fatal(err)
}
runBenchQuery(b, ctx, query, args)
}
// benchmarkUnmergedJSONTree builds the old-style query with N separate json_tree EXISTS
// subqueries (the pre-optimization baseline).
func benchmarkUnmergedJSONTree(b *testing.B, ctx context.Context) {
b.Helper()
var sb strings.Builder
sb.WriteString("SELECT media_file.id FROM media_file WHERE (")
args := make([]any, 0, benchNumPatterns)
for i := range benchNumPatterns {
if i > 0 {
sb.WriteString(" OR ")
}
sb.WriteString("exists (select 1 from json_tree(media_file.participants, '$.artist') where key='name' and value LIKE ?)")
args = append(args, fmt.Sprintf("%%Artist %04d%%", i))
}
sb.WriteString(") ORDER BY media_file.title LIMIT 500")
runBenchQuery(b, ctx, sb.String(), args)
}
func runBenchQuery(b *testing.B, ctx context.Context, query string, args []any) {
b.Helper()
sqlDB := db.Db()
b.ResetTimer()
for range b.N {
rows, err := sqlDB.QueryContext(ctx, query, args...)
if err != nil {
b.Fatal(err)
}
for rows.Next() {
var id string
_ = rows.Scan(&id)
}
rows.Close()
if err := rows.Err(); err != nil {
b.Fatal(err)
}
}
}
func setupBenchData(b *testing.B, ctx context.Context, conn *dbx.DB, user model.User) {
b.Helper()
sqlDB := db.Db()
ur := NewUserRepository(ctx, conn)
if err := ur.Put(&user); err != nil {
b.Fatal(err)
}
if err := ur.SetUserLibraries(user.ID, []int{1}); err != nil {
b.Fatal(err)
}
tx, err := sqlDB.Begin()
if err != nil {
b.Fatal(err)
}
// Create artists
artistStmt, err := tx.Prepare("INSERT INTO artist (id, name) VALUES (?, ?)")
if err != nil {
b.Fatal(err)
}
for i := range benchNumArtists {
if _, err := artistStmt.Exec(fmt.Sprintf("artist-%04d", i), fmt.Sprintf("Artist %04d", i)); err != nil {
b.Fatal(err)
}
}
artistStmt.Close()
// Ensure folder exists
folderID := "bench-folder"
if _, err := tx.Exec("INSERT OR IGNORE INTO folder (id, library_id, path, name, parent_id) VALUES (?, 1, '.', '.', '')", folderID); err != nil {
b.Fatal(err)
}
// Create media files with participants JSON, cycling through artists
mfStmt, err := tx.Prepare(`INSERT INTO media_file (id, path, title, album, artist, artist_id, album_id,
duration, year, size, suffix, tags, participants, lyrics, library_id, folder_id, pid, codec)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`)
if err != nil {
b.Fatal(err)
}
// Populate media_file_artists join table
mfaStmt, err := tx.Prepare("INSERT INTO media_file_artists (media_file_id, artist_id, role, sub_role) VALUES (?, ?, ?, ?)")
if err != nil {
b.Fatal(err)
}
for i := range benchNumTracks {
trackID := fmt.Sprintf("track-%05d", i)
// Assign benchArtistsPerTrack artists to each track, cycling through the pool
artistEntries := make([]map[string]string, benchArtistsPerTrack)
for a := range benchArtistsPerTrack {
artistIdx := (i + a) % benchNumArtists
artistEntries[a] = map[string]string{
"id": fmt.Sprintf("artist-%04d", artistIdx),
"name": fmt.Sprintf("Artist %04d", artistIdx),
}
}
primaryArtistIdx := i % benchNumArtists
primaryArtistID := fmt.Sprintf("artist-%04d", primaryArtistIdx)
primaryArtistName := fmt.Sprintf("Artist %04d", primaryArtistIdx)
participants := map[string][]map[string]string{"artist": artistEntries}
participantsJSON, _ := json.Marshal(participants)
if _, err := mfStmt.Exec(
trackID,
fmt.Sprintf("music/%s.mp3", trackID),
fmt.Sprintf("Track %05d", i),
"Bench Album",
primaryArtistName,
primaryArtistID,
"bench-album",
180, 2024, 5000000, "mp3",
"{}",
string(participantsJSON),
"[]",
1, folderID, trackID, "mp3",
); err != nil {
b.Fatal(err)
}
// Insert all artist associations into the join table
for a := range benchArtistsPerTrack {
artistIdx := (i + a) % benchNumArtists
artistID := fmt.Sprintf("artist-%04d", artistIdx)
if _, err := mfaStmt.Exec(trackID, artistID, "artist", ""); err != nil {
b.Fatal(err)
}
}
}
mfStmt.Close()
mfaStmt.Close()
if err := tx.Commit(); err != nil {
b.Fatal(err)
}
b.Logf("Setup complete: %d artists, %d tracks (%d artists/track), %d patterns",
benchNumArtists, benchNumTracks, benchArtistsPerTrack, benchNumPatterns)
}