refactor(transcoding): enhance AAC command handling and support for audio channels in streaming

Signed-off-by: Deluan <deluan@navidrome.org>
This commit is contained in:
Deluan 2026-02-08 20:03:50 -05:00
parent 4a50142dd6
commit 01b1fc90a9
12 changed files with 111 additions and 41 deletions

View File

@ -151,7 +151,7 @@ var (
Name: "aac audio",
TargetFormat: "aac",
DefaultBitRate: 256,
Command: "ffmpeg -i %s -ss %t -map 0:a:0 -b:a %bk -v 0 -c:a aac -f adts -",
Command: "ffmpeg -i %s -ss %t -map 0:a:0 -b:a %bk -v 0 -c:a aac -f ipod -movflags frag_keyframe+empty_moov -",
},
{
Name: "flac audio",

View File

@ -176,7 +176,7 @@ func (a *archiver) addFileToZip(ctx context.Context, z *zip.Writer, mf model.Med
var r io.ReadCloser
if format != "raw" && format != "" {
r, err = a.ms.DoStream(ctx, &mf, format, bitrate, 0, 0, 0)
r, err = a.ms.DoStream(ctx, &mf, format, bitrate, 0, 0, 0, 0)
} else {
r, err = os.Open(path)
}

View File

@ -44,7 +44,7 @@ var _ = Describe("Archiver", func() {
}}).Return(mfs, nil)
ds.On("MediaFile", mock.Anything).Return(mfRepo)
ms.On("DoStream", mock.Anything, mock.Anything, "mp3", 128, 0, 0, 0).Return(io.NopCloser(strings.NewReader("test")), nil).Times(3)
ms.On("DoStream", mock.Anything, mock.Anything, "mp3", 128, 0, 0, 0, 0).Return(io.NopCloser(strings.NewReader("test")), nil).Times(3)
out := new(bytes.Buffer)
err := arch.ZipAlbum(context.Background(), "1", "mp3", 128, out)
@ -73,7 +73,7 @@ var _ = Describe("Archiver", func() {
}}).Return(mfs, nil)
ds.On("MediaFile", mock.Anything).Return(mfRepo)
ms.On("DoStream", mock.Anything, mock.Anything, "mp3", 128, 0, 0, 0).Return(io.NopCloser(strings.NewReader("test")), nil).Times(2)
ms.On("DoStream", mock.Anything, mock.Anything, "mp3", 128, 0, 0, 0, 0).Return(io.NopCloser(strings.NewReader("test")), nil).Times(2)
out := new(bytes.Buffer)
err := arch.ZipArtist(context.Background(), "1", "mp3", 128, out)
@ -104,7 +104,7 @@ var _ = Describe("Archiver", func() {
}
sh.On("Load", mock.Anything, "1").Return(share, nil)
ms.On("DoStream", mock.Anything, mock.Anything, "mp3", 128, 0, 0, 0).Return(io.NopCloser(strings.NewReader("test")), nil).Times(2)
ms.On("DoStream", mock.Anything, mock.Anything, "mp3", 128, 0, 0, 0, 0).Return(io.NopCloser(strings.NewReader("test")), nil).Times(2)
out := new(bytes.Buffer)
err := arch.ZipShare(context.Background(), "1", out)
@ -136,7 +136,7 @@ var _ = Describe("Archiver", func() {
plRepo := &mockPlaylistRepository{}
plRepo.On("GetWithTracks", "1", true, false).Return(pls, nil)
ds.On("Playlist", mock.Anything).Return(plRepo)
ms.On("DoStream", mock.Anything, mock.Anything, "mp3", 128, 0, 0, 0).Return(io.NopCloser(strings.NewReader("test")), nil).Times(2)
ms.On("DoStream", mock.Anything, mock.Anything, "mp3", 128, 0, 0, 0, 0).Return(io.NopCloser(strings.NewReader("test")), nil).Times(2)
out := new(bytes.Buffer)
err := arch.ZipPlaylist(context.Background(), "1", "mp3", 128, out)
@ -217,8 +217,8 @@ type mockMediaStreamer struct {
core.MediaStreamer
}
func (m *mockMediaStreamer) DoStream(ctx context.Context, mf *model.MediaFile, reqFormat string, reqBitRate int, reqSampleRate int, reqBitDepth int, reqOffset int) (*core.Stream, error) {
args := m.Called(ctx, mf, reqFormat, reqBitRate, reqSampleRate, reqBitDepth, reqOffset)
func (m *mockMediaStreamer) DoStream(ctx context.Context, mf *model.MediaFile, reqFormat string, reqBitRate int, reqSampleRate int, reqBitDepth int, reqChannels int, reqOffset int) (*core.Stream, error) {
args := m.Called(ctx, mf, reqFormat, reqBitRate, reqSampleRate, reqBitDepth, reqChannels, reqOffset)
if args.Error(1) != nil {
return nil, args.Error(1)
}

View File

@ -171,13 +171,17 @@ func (j *ffCmd) wait() {
_ = j.out.Close()
}
// defaultCommands maps format to the known default command template.
// defaultCommands maps format to the known default command templates.
// Used to detect whether a user has customized their transcoding command.
var defaultCommands = map[string]string{
"mp3": "ffmpeg -i %s -ss %t -map 0:a:0 -b:a %bk -v 0 -f mp3 -",
"opus": "ffmpeg -i %s -ss %t -map 0:a:0 -b:a %bk -v 0 -c:a libopus -f opus -",
"aac": "ffmpeg -i %s -ss %t -map 0:a:0 -b:a %bk -v 0 -c:a aac -f adts -",
"flac": "ffmpeg -i %s -ss %t -map 0:a:0 -v 0 -c:a flac -f flac -",
// Multiple entries per format support smooth upgrades (e.g. aac changed from adts to ipod).
var defaultCommands = map[string][]string{
"mp3": {"ffmpeg -i %s -ss %t -map 0:a:0 -b:a %bk -v 0 -f mp3 -"},
"opus": {"ffmpeg -i %s -ss %t -map 0:a:0 -b:a %bk -v 0 -c:a libopus -f opus -"},
"aac": {
"ffmpeg -i %s -ss %t -map 0:a:0 -b:a %bk -v 0 -c:a aac -f ipod -movflags frag_keyframe+empty_moov -",
"ffmpeg -i %s -ss %t -map 0:a:0 -b:a %bk -v 0 -c:a aac -f adts -", // legacy default
},
"flac": {"ffmpeg -i %s -ss %t -map 0:a:0 -v 0 -c:a flac -f flac -"},
}
// formatCodecMap maps target format to ffmpeg codec flag.
@ -192,14 +196,22 @@ var formatCodecMap = map[string]string{
var formatOutputMap = map[string]string{
"mp3": "mp3",
"opus": "opus",
"aac": "adts",
"aac": "ipod",
"flac": "flac",
}
// isDefaultCommand returns true if the command matches the known default for this format.
// isDefaultCommand returns true if the command matches any known default for this format.
func isDefaultCommand(format, command string) bool {
defaultCmd, ok := defaultCommands[format]
return ok && command == defaultCmd
defaults, ok := defaultCommands[format]
if !ok {
return false
}
for _, d := range defaults {
if command == d {
return true
}
}
return false
}
// buildDynamicArgs programmatically constructs ffmpeg arguments for known formats,
@ -240,6 +252,11 @@ func buildDynamicArgs(opts TranscodeOptions) []string {
args = append(args, "-f", outputFmt)
}
// For AAC in MP4 container, enable fragmented MP4 for pipe-safe streaming
if opts.Format == "aac" {
args = append(args, "-movflags", "frag_keyframe+empty_moov")
}
args = append(args, "-")
return args
}

View File

@ -86,6 +86,9 @@ var _ = Describe("ffmpeg", func() {
Expect(isDefaultCommand("opus", "ffmpeg -i %s -ss %t -map 0:a:0 -b:a %bk -v 0 -c:a libopus -f opus -")).To(BeTrue())
})
It("returns true for known default aac command", func() {
Expect(isDefaultCommand("aac", "ffmpeg -i %s -ss %t -map 0:a:0 -b:a %bk -v 0 -c:a aac -f ipod -movflags frag_keyframe+empty_moov -")).To(BeTrue())
})
It("returns true for legacy default aac command", func() {
Expect(isDefaultCommand("aac", "ffmpeg -i %s -ss %t -map 0:a:0 -b:a %bk -v 0 -c:a aac -f adts -")).To(BeTrue())
})
It("returns true for known default flac command", func() {
@ -174,7 +177,7 @@ var _ = Describe("ffmpeg", func() {
}))
})
It("builds aac args correctly", func() {
It("builds aac args with fragmented MP4 container", func() {
args := buildDynamicArgs(TranscodeOptions{
Format: "aac",
FilePath: "/music/file.flac",
@ -186,7 +189,8 @@ var _ = Describe("ffmpeg", func() {
"-c:a", "aac",
"-b:a", "256k",
"-v", "0",
"-f", "adts",
"-f", "ipod",
"-movflags", "frag_keyframe+empty_moov",
"-",
}))
})

View File

@ -19,8 +19,8 @@ import (
)
type MediaStreamer interface {
NewStream(ctx context.Context, id string, reqFormat string, reqBitRate int, reqSampleRate int, reqBitDepth int, offset int) (*Stream, error)
DoStream(ctx context.Context, mf *model.MediaFile, reqFormat string, reqBitRate int, reqSampleRate int, reqBitDepth int, reqOffset int) (*Stream, error)
NewStream(ctx context.Context, id string, reqFormat string, reqBitRate int, reqSampleRate int, reqBitDepth int, reqChannels int, offset int) (*Stream, error)
DoStream(ctx context.Context, mf *model.MediaFile, reqFormat string, reqBitRate int, reqSampleRate int, reqBitDepth int, reqChannels int, reqOffset int) (*Stream, error)
}
type TranscodingCache cache.FileCache
@ -43,23 +43,24 @@ type streamJob struct {
bitRate int
sampleRate int
bitDepth int
channels int
offset int
}
func (j *streamJob) Key() string {
return fmt.Sprintf("%s.%s.%d.%d.%d.%s.%d", j.mf.ID, j.mf.UpdatedAt.Format(time.RFC3339Nano), j.bitRate, j.sampleRate, j.bitDepth, j.format, j.offset)
return fmt.Sprintf("%s.%s.%d.%d.%d.%d.%s.%d", j.mf.ID, j.mf.UpdatedAt.Format(time.RFC3339Nano), j.bitRate, j.sampleRate, j.bitDepth, j.channels, j.format, j.offset)
}
func (ms *mediaStreamer) NewStream(ctx context.Context, id string, reqFormat string, reqBitRate int, reqSampleRate int, reqBitDepth int, reqOffset int) (*Stream, error) {
func (ms *mediaStreamer) NewStream(ctx context.Context, id string, reqFormat string, reqBitRate int, reqSampleRate int, reqBitDepth int, reqChannels int, reqOffset int) (*Stream, error) {
mf, err := ms.ds.MediaFile(ctx).Get(id)
if err != nil {
return nil, err
}
return ms.DoStream(ctx, mf, reqFormat, reqBitRate, reqSampleRate, reqBitDepth, reqOffset)
return ms.DoStream(ctx, mf, reqFormat, reqBitRate, reqSampleRate, reqBitDepth, reqChannels, reqOffset)
}
func (ms *mediaStreamer) DoStream(ctx context.Context, mf *model.MediaFile, reqFormat string, reqBitRate int, reqSampleRate int, reqBitDepth int, reqOffset int) (*Stream, error) {
func (ms *mediaStreamer) DoStream(ctx context.Context, mf *model.MediaFile, reqFormat string, reqBitRate int, reqSampleRate int, reqBitDepth int, reqChannels int, reqOffset int) (*Stream, error) {
var format string
var bitRate int
var cached bool
@ -96,6 +97,7 @@ func (ms *mediaStreamer) DoStream(ctx context.Context, mf *model.MediaFile, reqF
bitRate: bitRate,
sampleRate: reqSampleRate,
bitDepth: reqBitDepth,
channels: reqChannels,
offset: reqOffset,
}
r, err := ms.cache.Get(ctx, job)
@ -229,6 +231,7 @@ func NewTranscodingCache() TranscodingCache {
BitRate: job.bitRate,
SampleRate: job.sampleRate,
BitDepth: job.bitDepth,
Channels: job.channels,
Offset: job.offset,
})
if err != nil {

View File

@ -39,34 +39,34 @@ var _ = Describe("MediaStreamer", func() {
Context("NewStream", func() {
It("returns a seekable stream if format is 'raw'", func() {
s, err := streamer.NewStream(ctx, "123", "raw", 0, 0, 0, 0)
s, err := streamer.NewStream(ctx, "123", "raw", 0, 0, 0, 0, 0)
Expect(err).ToNot(HaveOccurred())
Expect(s.Seekable()).To(BeTrue())
})
It("returns a seekable stream if maxBitRate is 0", func() {
s, err := streamer.NewStream(ctx, "123", "mp3", 0, 0, 0, 0)
s, err := streamer.NewStream(ctx, "123", "mp3", 0, 0, 0, 0, 0)
Expect(err).ToNot(HaveOccurred())
Expect(s.Seekable()).To(BeTrue())
})
It("returns a seekable stream if maxBitRate is higher than file bitRate", func() {
s, err := streamer.NewStream(ctx, "123", "mp3", 320, 0, 0, 0)
s, err := streamer.NewStream(ctx, "123", "mp3", 320, 0, 0, 0, 0)
Expect(err).ToNot(HaveOccurred())
Expect(s.Seekable()).To(BeTrue())
})
It("returns a NON seekable stream if transcode is required", func() {
s, err := streamer.NewStream(ctx, "123", "mp3", 64, 0, 0, 0)
s, err := streamer.NewStream(ctx, "123", "mp3", 64, 0, 0, 0, 0)
Expect(err).To(BeNil())
Expect(s.Seekable()).To(BeFalse())
Expect(s.Duration()).To(Equal(float32(257.0)))
})
It("returns a seekable stream if the file is complete in the cache", func() {
s, err := streamer.NewStream(ctx, "123", "mp3", 32, 0, 0, 0)
s, err := streamer.NewStream(ctx, "123", "mp3", 32, 0, 0, 0, 0)
Expect(err).To(BeNil())
_, _ = io.ReadAll(s)
_ = s.Close()
Eventually(func() bool { return ffmpeg.IsClosed() }, "3s").Should(BeTrue())
s, err = streamer.NewStream(ctx, "123", "mp3", 32, 0, 0, 0)
s, err = streamer.NewStream(ctx, "123", "mp3", 32, 0, 0, 0, 0)
Expect(err).To(BeNil())
Expect(s.Seekable()).To(BeTrue())
})

View File

@ -334,9 +334,9 @@ func (s *deciderService) computeTranscodedStream(ctx context.Context, mf *model.
ts := &StreamDetails{
Container: responseContainer,
Codec: strings.ToLower(profile.AudioCodec),
SampleRate: dsdToPCMSampleRate(mf.SampleRate, mf.AudioCodec()),
SampleRate: normalizeSourceSampleRate(mf.SampleRate, mf.AudioCodec()),
Channels: mf.Channels,
BitDepth: mf.BitDepth,
BitDepth: normalizeSourceBitDepth(mf.BitDepth, mf.AudioCodec()),
IsLossless: targetIsLossless,
}
if ts.Codec == "" {
@ -750,16 +750,27 @@ func isLosslessFormat(format string) bool {
return false
}
// dsdToPCMSampleRate converts a DSD sample rate to its PCM-equivalent rate (÷8).
// normalizeSourceSampleRate adjusts the source sample rate for codecs that store
// it differently than PCM. Currently handles DSD (÷8):
// DSD64=2822400→352800, DSD128=5644800→705600, etc.
// For non-DSD codecs, returns the rate unchanged.
func dsdToPCMSampleRate(sampleRate int, codec string) int {
// For other codecs, returns the rate unchanged.
func normalizeSourceSampleRate(sampleRate int, codec string) int {
if strings.EqualFold(codec, "dsd") && sampleRate > 0 {
return sampleRate / 8
}
return sampleRate
}
// normalizeSourceBitDepth adjusts the source bit depth for codecs that use
// non-standard bit depths. Currently handles DSD (1-bit → 24-bit PCM, which is
// what ffmpeg produces). For other codecs, returns the depth unchanged.
func normalizeSourceBitDepth(bitDepth int, codec string) int {
if strings.EqualFold(codec, "dsd") && bitDepth == 1 {
return 24
}
return bitDepth
}
// codecFixedOutputSampleRate returns the mandatory output sample rate for codecs
// that always resample regardless of input (e.g., Opus always outputs 48000Hz).
// Returns 0 if the codec has no fixed output rate.

View File

@ -621,6 +621,9 @@ var _ = Describe("Decider", func() {
// DSD64 2822400 / 8 = 352800, capped by MP3 max of 48000
Expect(decision.TranscodeStream.SampleRate).To(Equal(48000))
Expect(decision.TargetSampleRate).To(Equal(48000))
// DSD 1-bit → 24-bit PCM
Expect(decision.TranscodeStream.BitDepth).To(Equal(24))
Expect(decision.TargetBitDepth).To(Equal(24))
})
It("converts DSD sample rate for FLAC target without codec limit", func() {
@ -637,6 +640,9 @@ var _ = Describe("Decider", func() {
// DSD64 2822400 / 8 = 352800, FLAC has no hard max
Expect(decision.TranscodeStream.SampleRate).To(Equal(352800))
Expect(decision.TargetSampleRate).To(Equal(352800))
// DSD 1-bit → 24-bit PCM
Expect(decision.TranscodeStream.BitDepth).To(Equal(24))
Expect(decision.TargetBitDepth).To(Equal(24))
})
It("applies codec profile limit to DSD-converted FLAC sample rate", func() {
@ -661,6 +667,33 @@ var _ = Describe("Decider", func() {
// DSD64 2822400 / 8 = 352800, capped by codec profile limit of 48000
Expect(decision.TranscodeStream.SampleRate).To(Equal(48000))
Expect(decision.TargetSampleRate).To(Equal(48000))
// DSD 1-bit → 24-bit PCM
Expect(decision.TranscodeStream.BitDepth).To(Equal(24))
Expect(decision.TargetBitDepth).To(Equal(24))
})
It("applies audioBitdepth limitation to DSD-converted bit depth", func() {
mf := &model.MediaFile{ID: "1", Suffix: "dsf", Codec: "DSD", BitRate: 5644, Channels: 2, SampleRate: 2822400, BitDepth: 1}
ci := &ClientInfo{
TranscodingProfiles: []Profile{
{Container: "flac", AudioCodec: "flac", Protocol: "http"},
},
CodecProfiles: []CodecProfile{
{
Type: CodecProfileTypeAudio,
Name: "flac",
Limitations: []Limitation{
{Name: LimitationAudioBitdepth, Comparison: ComparisonLessThanEqual, Values: []string{"16"}, Required: true},
},
},
},
}
decision, err := svc.MakeDecision(ctx, mf, ci)
Expect(err).ToNot(HaveOccurred())
Expect(decision.CanTranscode).To(BeTrue())
// DSD 1-bit → 24-bit PCM, then capped by codec profile limit to 16-bit
Expect(decision.TranscodeStream.BitDepth).To(Equal(16))
Expect(decision.TargetBitDepth).To(Equal(16))
})
})

View File

@ -24,7 +24,7 @@ func (pub *Router) handleStream(w http.ResponseWriter, r *http.Request) {
return
}
stream, err := pub.streamer.NewStream(ctx, info.id, info.format, info.bitrate, 0, 0, 0)
stream, err := pub.streamer.NewStream(ctx, info.id, info.format, info.bitrate, 0, 0, 0, 0)
if err != nil {
log.Error(ctx, "Error starting shared stream", err)
http.Error(w, "invalid request", http.StatusInternalServerError)

View File

@ -60,7 +60,7 @@ func (api *Router) Stream(w http.ResponseWriter, r *http.Request) (*responses.Su
format, _ := p.String("format")
timeOffset := p.IntOr("timeOffset", 0)
stream, err := api.streamer.NewStream(ctx, id, format, maxBitRate, 0, 0, timeOffset)
stream, err := api.streamer.NewStream(ctx, id, format, maxBitRate, 0, 0, 0, timeOffset)
if err != nil {
return nil, err
}
@ -129,7 +129,7 @@ func (api *Router) Download(w http.ResponseWriter, r *http.Request) (*responses.
switch v := entity.(type) {
case *model.MediaFile:
stream, err := api.streamer.NewStream(ctx, id, format, maxBitRate, 0, 0, 0)
stream, err := api.streamer.NewStream(ctx, id, format, maxBitRate, 0, 0, 0, 0)
if err != nil {
return nil, err
}

View File

@ -325,18 +325,20 @@ func (api *Router) GetTranscodeStream(w http.ResponseWriter, r *http.Request) (*
maxBitRate := 0
sampleRate := 0
bitDepth := 0
channels := 0
if !params.DirectPlay && params.TargetFormat != "" {
format = params.TargetFormat
maxBitRate = params.TargetBitrate // Already in kbps, matching the streamer
sampleRate = params.TargetSampleRate
bitDepth = params.TargetBitDepth
channels = params.TargetChannels
}
// Get offset parameter
offset := p.IntOr("offset", 0)
// Create stream
stream, err := api.streamer.NewStream(ctx, mediaID, format, maxBitRate, sampleRate, bitDepth, offset)
stream, err := api.streamer.NewStream(ctx, mediaID, format, maxBitRate, sampleRate, bitDepth, channels, offset)
if err != nil {
return nil, err
}