Merge 70f43437b5da8a1cd7440b3d3922267591cc1026 into 94eb6c522b63198bdc4565442d86918ad43156e5

This commit is contained in:
Mignonne Patterson 2026-05-01 12:19:11 -04:00 committed by GitHub
commit 5ed4704044
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 663 additions and 2 deletions

View File

@ -110,6 +110,7 @@ type configOptions struct {
PID pidOptions `json:",omitzero"`
Inspect inspectOptions `json:",omitzero"`
Subsonic subsonicOptions `json:",omitzero"`
SQLite sqliteOptions `json:",omitzero"`
LastFM lastfmOptions `json:",omitzero"`
Deezer deezerOptions `json:",omitzero"`
ListenBrainz listenBrainzOptions `json:",omitzero"`

20
conf/sqlite_options.go Normal file
View File

@ -0,0 +1,20 @@
package conf
// sqliteOptions configures SQLite database behavior
type sqliteOptions struct {
// JournalMode sets the SQLite journal mode (WAL, DELETE, etc)
// Default: WAL - provides better concurrency but may not work on network filesystems
JournalMode string `json:",omitzero"`
// BusyTimeout sets how long SQLite should wait for locks to clear (milliseconds)
// Default: 5000 - waits up to 5 seconds before returning "database is locked"
BusyTimeout int `json:",omitzero"`
// SyncMode controls how aggressively SQLite writes to disk
// Default: NORMAL - good balance of safety and performance
SyncMode string `json:",omitzero"`
// MaxConnections limits concurrent database connections
// Default: 0 (uses max(4, runtime.NumCPU()))
MaxConnections int `json:",omitzero"`
}

View File

@ -0,0 +1,311 @@
{
"annotations": {
"list": []
},
"editable": true,
"gnetId": null,
"graphTooltip": 0,
"id": null,
"links": [],
"panels": [
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "Prometheus",
"fieldConfig": {
"defaults": {
"custom": {}
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 0
},
"hiddenSeries": false,
"id": 2,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "7.4.0",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "rate(sqlite_lock_wait_duration_seconds_sum[5m])",
"interval": "",
"legendFormat": "{{operation}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "SQLite Lock Wait Duration (5m rate)",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "s",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "Prometheus",
"fieldConfig": {
"defaults": {
"custom": {}
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 0
},
"hiddenSeries": false,
"id": 4,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "7.4.0",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "rate(sqlite_lock_errors_total[5m])",
"interval": "",
"legendFormat": "{{operation}} - {{type}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "SQLite Lock Errors (5m rate)",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "Prometheus",
"fieldConfig": {
"defaults": {
"custom": {}
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 8
},
"hiddenSeries": false,
"id": 6,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "7.4.0",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "rate(sqlite_operation_retries_total[5m])",
"interval": "",
"legendFormat": "{{operation}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "SQLite Operation Retries (5m rate)",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
}
],
"schemaVersion": 27,
"style": "dark",
"tags": ["navidrome", "sqlite"],
"templating": {
"list": []
},
"time": {
"from": "now-6h",
"to": "now"
},
"timepicker": {},
"timezone": "",
"title": "Navidrome SQLite Monitoring",
"version": 1
}

53
core/metrics/sqlite.go Normal file
View File

@ -0,0 +1,53 @@
package metrics
import (
"github.com/prometheus/client_golang/prometheus"
)
var (
sqliteLockWaitDuration = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "sqlite_lock_wait_duration_seconds",
Help: "Time spent waiting for SQLite locks to be released",
Buckets: []float64{.005, .01, .025, .05, .1, .25, .5, 1, 2.5, 5, 10},
},
[]string{"operation"},
)
sqliteLockErrors = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "sqlite_lock_errors_total",
Help: "Number of SQLite lock-related errors",
},
[]string{"operation", "type"},
)
sqliteRetries = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "sqlite_operation_retries_total",
Help: "Number of retried SQLite operations",
},
[]string{"operation"},
)
)
func init() {
prometheus.MustRegister(sqliteLockWaitDuration)
prometheus.MustRegister(sqliteLockErrors)
prometheus.MustRegister(sqliteRetries)
}
// ObserveSQLiteLockWait records the duration spent waiting for a lock
func ObserveSQLiteLockWait(operation string, duration float64) {
sqliteLockWaitDuration.WithLabelValues(operation).Observe(duration)
}
// IncrementSQLiteLockError increments the counter for lock-related errors
func IncrementSQLiteLockError(operation, errType string) {
sqliteLockErrors.WithLabelValues(operation, errType).Inc()
}
// IncrementSQLiteRetry increments the counter for operation retries
func IncrementSQLiteRetry(operation string) {
sqliteRetries.WithLabelValues(operation).Inc()
}

View File

@ -41,7 +41,32 @@ func Db() *sql.DB {
}
log.Debug("Opening DataBase", "dbPath", Path, "driver", Driver)
db, err := sql.Open(Driver, Path)
db.SetMaxOpenConns(max(4, runtime.NumCPU()))
maxConns := max(4, runtime.NumCPU())
if conf.Server.SQLite.MaxConnections > 0 {
maxConns = conf.Server.SQLite.MaxConnections
}
db.SetMaxOpenConns(maxConns)
// Configure SQLite PRAGMAs to improve concurrency and reduce "database is locked" errors
// WAL allows concurrent readers while a writer is active
// busy_timeout tells SQLite how long to wait for a lock before error
// Note: some network filesystems (NFS/CIFS) may not fully support WAL
if conf.Server.SQLite.JournalMode != "" {
if _, err := db.Exec("PRAGMA journal_mode=" + conf.Server.SQLite.JournalMode + ";"); err != nil {
log.Error("Error setting PRAGMA journal_mode", "mode", conf.Server.SQLite.JournalMode, err)
}
}
if conf.Server.SQLite.BusyTimeout > 0 {
if _, err := db.Exec(fmt.Sprintf("PRAGMA busy_timeout=%d;", conf.Server.SQLite.BusyTimeout)); err != nil {
log.Error("Error setting PRAGMA busy_timeout", "timeout", conf.Server.SQLite.BusyTimeout, err)
}
}
if conf.Server.SQLite.SyncMode != "" {
if _, err := db.Exec("PRAGMA synchronous=" + conf.Server.SQLite.SyncMode + ";"); err != nil {
log.Error("Error setting PRAGMA synchronous", "mode", conf.Server.SQLite.SyncMode, err)
}
}
if err != nil {
log.Fatal("Error opening database", err)
}

View File

@ -0,0 +1,25 @@
-- +migrate Up
-- Enable WAL mode and set busy timeout if not already set
UPDATE user_property SET value = 'WAL'
WHERE name = 'sqlite_journal_mode' AND NOT EXISTS (
SELECT 1 FROM user_property WHERE name = 'sqlite_journal_mode'
);
INSERT INTO user_property (name, value)
SELECT 'sqlite_busy_timeout', '5000'
WHERE NOT EXISTS (
SELECT 1 FROM user_property WHERE name = 'sqlite_busy_timeout'
);
INSERT INTO user_property (name, value)
SELECT 'sqlite_sync_mode', 'NORMAL'
WHERE NOT EXISTS (
SELECT 1 FROM user_property WHERE name = 'sqlite_sync_mode'
);
-- +migrate Down
DELETE FROM user_property WHERE name IN (
'sqlite_journal_mode',
'sqlite_busy_timeout',
'sqlite_sync_mode'
);

47
docs/config/sqlite.md Normal file
View File

@ -0,0 +1,47 @@
# SQLite Configuration Options
The following SQLite-specific configuration options are available under the `SQLite` section:
## Basic Options
### JournalMode
- **Default:** `"WAL"`
- **Options:** `"DELETE"`, `"TRUNCATE"`, `"PERSIST"`, `"MEMORY"`, `"WAL"`, `"OFF"`
- **Description:** Controls how SQLite manages its journal file. WAL (Write-Ahead Logging) mode generally provides better concurrency and performance but may not work on some network filesystems.
### BusyTimeout
- **Default:** `5000` (milliseconds)
- **Description:** How long SQLite should wait when the database is locked before returning a "database is locked" error. Higher values allow more concurrency but may impact responsiveness.
### SyncMode
- **Default:** `"NORMAL"`
- **Options:** `"OFF"`, `"NORMAL"`, `"FULL"`, `"EXTRA"`
- **Description:** Controls how aggressively SQLite writes data to disk. NORMAL provides a good balance between safety and performance.
### MaxConnections
- **Default:** `0` (uses max(4, number of CPU cores))
- **Description:** Maximum number of concurrent database connections. Lower this if you experience "database is locked" errors, especially on network filesystems.
## Example Configuration
```toml
[SQLite]
JournalMode = "WAL" # Enable Write-Ahead Logging for better concurrency
BusyTimeout = 5000 # Wait up to 5 seconds for locks to clear
SyncMode = "NORMAL" # Good balance of durability and performance
MaxConnections = 4 # Limit concurrent connections if needed
```
## Network Filesystem Considerations
If your database is on a network filesystem (NFS, CIFS, etc.):
1. Consider moving the database to local storage
2. If using network storage is required:
- Set `JournalMode = "DELETE"`
- Lower `MaxConnections` to reduce contention
- Increase `BusyTimeout` for better reliability

View File

@ -0,0 +1,92 @@
# SQLite Monitoring
Navidrome provides several Prometheus metrics to monitor SQLite database performance and lock contention:
## Available Metrics
### Lock Wait Duration
- **Metric**: `sqlite_lock_wait_duration_seconds`
- **Type**: Histogram
- **Labels**: `operation`
- **Description**: Time spent waiting for SQLite locks to be released
- **Use Case**: Identify operations that are frequently blocked by locks
### Lock Errors
- **Metric**: `sqlite_lock_errors_total`
- **Type**: Counter
- **Labels**: `operation`, `type`
- **Description**: Number of SQLite lock-related errors
- **Types**:
- `retryable`: Temporary lock errors that can be retried
- `non_retryable`: Fatal errors that cannot be retried
### Operation Retries
- **Metric**: `sqlite_operation_retries_total`
- **Type**: Counter
- **Labels**: `operation`
- **Description**: Number of retried SQLite operations
- **Use Case**: Track which operations require frequent retries
## Grafana Dashboard
A pre-configured Grafana dashboard is available at `contrib/grafana/sqlite-dashboard.json`.
This dashboard provides visualizations for:
- Lock wait duration trends
- Lock error rates by operation
- Retry rates by operation
## Alerting Recommendations
Consider setting up alerts for:
1. High lock wait durations (> 1s)
2. Increasing error rates
3. Frequent retries on specific operations
Example Prometheus alert rules:
```yaml
groups:
- name: SQLiteAlerts
rules:
- alert: SQLiteLongLockWaits
expr: rate(sqlite_lock_wait_duration_seconds_sum[5m]) > 1
for: 5m
labels:
severity: warning
annotations:
description: "SQLite operations are waiting long for locks"
- alert: SQLiteHighErrorRate
expr: rate(sqlite_lock_errors_total[5m]) > 0.1
for: 5m
labels:
severity: warning
annotations:
description: "High rate of SQLite lock errors"
```
## Troubleshooting with Metrics
If you see:
1. High lock wait durations:
- Consider adjusting `SQLite.BusyTimeout`
- Review concurrent operations
- Check if database is on network storage
2. Many retryable errors:
- Increase `SQLite.BusyTimeout`
- Consider reducing `SQLite.MaxConnections`
- Enable WAL mode if not using network storage
3. High retry rates:
- Review operations causing contention
- Consider batching updates
- Check for long-running transactions

View File

@ -111,7 +111,13 @@ func (r *playlistRepository) Put(p *model.Playlist, cols ...string) error {
}
pls.UpdatedAt = time.Now()
id, err := r.put(pls.ID, pls)
var id string
err := RetryWithBackoff(r.ctx, "playlist_put", func() error {
var putErr error
id, putErr = r.put(pls.ID, pls)
return putErr
}, 3, 100*time.Millisecond, 2*time.Second)
if err != nil {
return err
}

81
persistence/retry.go Normal file
View File

@ -0,0 +1,81 @@
package persistence
import (
"context"
"database/sql"
"errors"
"math/rand"
"strings"
"time"
"github.com/navidrome/navidrome/core/metrics"
"github.com/navidrome/navidrome/log"
)
// RetryWithBackoff attempts an operation with exponential backoff
// maxAttempts: maximum number of attempts (minimum 1)
// initialDelay: delay before first retry
// maxDelay: maximum delay between retries
func RetryWithBackoff(ctx context.Context, operation string, op func() error, maxAttempts int, initialDelay, maxDelay time.Duration) error {
var lastErr error
delay := initialDelay
startTime := time.Now()
for attempt := 1; attempt <= maxAttempts; attempt++ {
err := op()
if err == nil {
if attempt > 1 {
// Record successful retry
metrics.IncrementSQLiteRetry(operation)
}
return nil
}
lastErr = err
if !isRetryableError(err) {
// Record non-retryable error
metrics.IncrementSQLiteLockError(operation, "non_retryable")
return err
}
metrics.IncrementSQLiteLockError(operation, "retryable")
if attempt == maxAttempts {
break
}
// Use exponential backoff with jitter
jitter := time.Duration(float64(delay) * (0.5 + rand.Float64())) // 50-150% of base delay
if jitter > maxDelay {
jitter = maxDelay
}
log.Debug(ctx, "Retrying operation after error",
"operation", operation,
"attempt", attempt,
"maxAttempts", maxAttempts,
"delay", jitter,
"error", err)
select {
case <-time.After(jitter):
metrics.ObserveSQLiteLockWait(operation, jitter.Seconds())
case <-ctx.Done():
return ctx.Err()
}
delay *= 2
}
return lastErr
}
func isRetryableError(err error) bool {
if err == nil {
return false
}
errStr := err.Error()
return strings.Contains(errStr, "database is locked") ||
strings.Contains(errStr, "busy") ||
errors.Is(err, sql.ErrConnDone)
}