From 01b1fc90a9423fba7bf64bb3295ea16b57e70b64 Mon Sep 17 00:00:00 2001 From: Deluan Date: Sun, 8 Feb 2026 20:03:50 -0500 Subject: [PATCH] refactor(transcoding): enhance AAC command handling and support for audio channels in streaming Signed-off-by: Deluan --- consts/consts.go | 2 +- core/archiver.go | 2 +- core/archiver_test.go | 12 +++++------ core/ffmpeg/ffmpeg.go | 37 +++++++++++++++++++++++--------- core/ffmpeg/ffmpeg_test.go | 8 +++++-- core/media_streamer.go | 15 +++++++------ core/media_streamer_test.go | 12 +++++------ core/transcode/transcode.go | 21 +++++++++++++----- core/transcode/transcode_test.go | 33 ++++++++++++++++++++++++++++ server/public/handle_streams.go | 2 +- server/subsonic/stream.go | 4 ++-- server/subsonic/transcode.go | 4 +++- 12 files changed, 111 insertions(+), 41 deletions(-) diff --git a/consts/consts.go b/consts/consts.go index 3b26038ac..7064f8c86 100644 --- a/consts/consts.go +++ b/consts/consts.go @@ -151,7 +151,7 @@ var ( Name: "aac audio", TargetFormat: "aac", DefaultBitRate: 256, - Command: "ffmpeg -i %s -ss %t -map 0:a:0 -b:a %bk -v 0 -c:a aac -f adts -", + Command: "ffmpeg -i %s -ss %t -map 0:a:0 -b:a %bk -v 0 -c:a aac -f ipod -movflags frag_keyframe+empty_moov -", }, { Name: "flac audio", diff --git a/core/archiver.go b/core/archiver.go index d6bdaaf00..f2fd108b7 100644 --- a/core/archiver.go +++ b/core/archiver.go @@ -176,7 +176,7 @@ func (a *archiver) addFileToZip(ctx context.Context, z *zip.Writer, mf model.Med var r io.ReadCloser if format != "raw" && format != "" { - r, err = a.ms.DoStream(ctx, &mf, format, bitrate, 0, 0, 0) + r, err = a.ms.DoStream(ctx, &mf, format, bitrate, 0, 0, 0, 0) } else { r, err = os.Open(path) } diff --git a/core/archiver_test.go b/core/archiver_test.go index bf0ee3672..1291dce38 100644 --- a/core/archiver_test.go +++ b/core/archiver_test.go @@ -44,7 +44,7 @@ var _ = Describe("Archiver", func() { }}).Return(mfs, nil) ds.On("MediaFile", mock.Anything).Return(mfRepo) - ms.On("DoStream", mock.Anything, mock.Anything, "mp3", 128, 0, 0, 0).Return(io.NopCloser(strings.NewReader("test")), nil).Times(3) + ms.On("DoStream", mock.Anything, mock.Anything, "mp3", 128, 0, 0, 0, 0).Return(io.NopCloser(strings.NewReader("test")), nil).Times(3) out := new(bytes.Buffer) err := arch.ZipAlbum(context.Background(), "1", "mp3", 128, out) @@ -73,7 +73,7 @@ var _ = Describe("Archiver", func() { }}).Return(mfs, nil) ds.On("MediaFile", mock.Anything).Return(mfRepo) - ms.On("DoStream", mock.Anything, mock.Anything, "mp3", 128, 0, 0, 0).Return(io.NopCloser(strings.NewReader("test")), nil).Times(2) + ms.On("DoStream", mock.Anything, mock.Anything, "mp3", 128, 0, 0, 0, 0).Return(io.NopCloser(strings.NewReader("test")), nil).Times(2) out := new(bytes.Buffer) err := arch.ZipArtist(context.Background(), "1", "mp3", 128, out) @@ -104,7 +104,7 @@ var _ = Describe("Archiver", func() { } sh.On("Load", mock.Anything, "1").Return(share, nil) - ms.On("DoStream", mock.Anything, mock.Anything, "mp3", 128, 0, 0, 0).Return(io.NopCloser(strings.NewReader("test")), nil).Times(2) + ms.On("DoStream", mock.Anything, mock.Anything, "mp3", 128, 0, 0, 0, 0).Return(io.NopCloser(strings.NewReader("test")), nil).Times(2) out := new(bytes.Buffer) err := arch.ZipShare(context.Background(), "1", out) @@ -136,7 +136,7 @@ var _ = Describe("Archiver", func() { plRepo := &mockPlaylistRepository{} plRepo.On("GetWithTracks", "1", true, false).Return(pls, nil) ds.On("Playlist", mock.Anything).Return(plRepo) - ms.On("DoStream", mock.Anything, mock.Anything, "mp3", 128, 0, 0, 0).Return(io.NopCloser(strings.NewReader("test")), nil).Times(2) + ms.On("DoStream", mock.Anything, mock.Anything, "mp3", 128, 0, 0, 0, 0).Return(io.NopCloser(strings.NewReader("test")), nil).Times(2) out := new(bytes.Buffer) err := arch.ZipPlaylist(context.Background(), "1", "mp3", 128, out) @@ -217,8 +217,8 @@ type mockMediaStreamer struct { core.MediaStreamer } -func (m *mockMediaStreamer) DoStream(ctx context.Context, mf *model.MediaFile, reqFormat string, reqBitRate int, reqSampleRate int, reqBitDepth int, reqOffset int) (*core.Stream, error) { - args := m.Called(ctx, mf, reqFormat, reqBitRate, reqSampleRate, reqBitDepth, reqOffset) +func (m *mockMediaStreamer) DoStream(ctx context.Context, mf *model.MediaFile, reqFormat string, reqBitRate int, reqSampleRate int, reqBitDepth int, reqChannels int, reqOffset int) (*core.Stream, error) { + args := m.Called(ctx, mf, reqFormat, reqBitRate, reqSampleRate, reqBitDepth, reqChannels, reqOffset) if args.Error(1) != nil { return nil, args.Error(1) } diff --git a/core/ffmpeg/ffmpeg.go b/core/ffmpeg/ffmpeg.go index 19d2480f8..bed0eff0e 100644 --- a/core/ffmpeg/ffmpeg.go +++ b/core/ffmpeg/ffmpeg.go @@ -171,13 +171,17 @@ func (j *ffCmd) wait() { _ = j.out.Close() } -// defaultCommands maps format to the known default command template. +// defaultCommands maps format to the known default command templates. // Used to detect whether a user has customized their transcoding command. -var defaultCommands = map[string]string{ - "mp3": "ffmpeg -i %s -ss %t -map 0:a:0 -b:a %bk -v 0 -f mp3 -", - "opus": "ffmpeg -i %s -ss %t -map 0:a:0 -b:a %bk -v 0 -c:a libopus -f opus -", - "aac": "ffmpeg -i %s -ss %t -map 0:a:0 -b:a %bk -v 0 -c:a aac -f adts -", - "flac": "ffmpeg -i %s -ss %t -map 0:a:0 -v 0 -c:a flac -f flac -", +// Multiple entries per format support smooth upgrades (e.g. aac changed from adts to ipod). +var defaultCommands = map[string][]string{ + "mp3": {"ffmpeg -i %s -ss %t -map 0:a:0 -b:a %bk -v 0 -f mp3 -"}, + "opus": {"ffmpeg -i %s -ss %t -map 0:a:0 -b:a %bk -v 0 -c:a libopus -f opus -"}, + "aac": { + "ffmpeg -i %s -ss %t -map 0:a:0 -b:a %bk -v 0 -c:a aac -f ipod -movflags frag_keyframe+empty_moov -", + "ffmpeg -i %s -ss %t -map 0:a:0 -b:a %bk -v 0 -c:a aac -f adts -", // legacy default + }, + "flac": {"ffmpeg -i %s -ss %t -map 0:a:0 -v 0 -c:a flac -f flac -"}, } // formatCodecMap maps target format to ffmpeg codec flag. @@ -192,14 +196,22 @@ var formatCodecMap = map[string]string{ var formatOutputMap = map[string]string{ "mp3": "mp3", "opus": "opus", - "aac": "adts", + "aac": "ipod", "flac": "flac", } -// isDefaultCommand returns true if the command matches the known default for this format. +// isDefaultCommand returns true if the command matches any known default for this format. func isDefaultCommand(format, command string) bool { - defaultCmd, ok := defaultCommands[format] - return ok && command == defaultCmd + defaults, ok := defaultCommands[format] + if !ok { + return false + } + for _, d := range defaults { + if command == d { + return true + } + } + return false } // buildDynamicArgs programmatically constructs ffmpeg arguments for known formats, @@ -240,6 +252,11 @@ func buildDynamicArgs(opts TranscodeOptions) []string { args = append(args, "-f", outputFmt) } + // For AAC in MP4 container, enable fragmented MP4 for pipe-safe streaming + if opts.Format == "aac" { + args = append(args, "-movflags", "frag_keyframe+empty_moov") + } + args = append(args, "-") return args } diff --git a/core/ffmpeg/ffmpeg_test.go b/core/ffmpeg/ffmpeg_test.go index 88abaf4cc..dbd4c1123 100644 --- a/core/ffmpeg/ffmpeg_test.go +++ b/core/ffmpeg/ffmpeg_test.go @@ -86,6 +86,9 @@ var _ = Describe("ffmpeg", func() { Expect(isDefaultCommand("opus", "ffmpeg -i %s -ss %t -map 0:a:0 -b:a %bk -v 0 -c:a libopus -f opus -")).To(BeTrue()) }) It("returns true for known default aac command", func() { + Expect(isDefaultCommand("aac", "ffmpeg -i %s -ss %t -map 0:a:0 -b:a %bk -v 0 -c:a aac -f ipod -movflags frag_keyframe+empty_moov -")).To(BeTrue()) + }) + It("returns true for legacy default aac command", func() { Expect(isDefaultCommand("aac", "ffmpeg -i %s -ss %t -map 0:a:0 -b:a %bk -v 0 -c:a aac -f adts -")).To(BeTrue()) }) It("returns true for known default flac command", func() { @@ -174,7 +177,7 @@ var _ = Describe("ffmpeg", func() { })) }) - It("builds aac args correctly", func() { + It("builds aac args with fragmented MP4 container", func() { args := buildDynamicArgs(TranscodeOptions{ Format: "aac", FilePath: "/music/file.flac", @@ -186,7 +189,8 @@ var _ = Describe("ffmpeg", func() { "-c:a", "aac", "-b:a", "256k", "-v", "0", - "-f", "adts", + "-f", "ipod", + "-movflags", "frag_keyframe+empty_moov", "-", })) }) diff --git a/core/media_streamer.go b/core/media_streamer.go index 8f620a46d..6c154256f 100644 --- a/core/media_streamer.go +++ b/core/media_streamer.go @@ -19,8 +19,8 @@ import ( ) type MediaStreamer interface { - NewStream(ctx context.Context, id string, reqFormat string, reqBitRate int, reqSampleRate int, reqBitDepth int, offset int) (*Stream, error) - DoStream(ctx context.Context, mf *model.MediaFile, reqFormat string, reqBitRate int, reqSampleRate int, reqBitDepth int, reqOffset int) (*Stream, error) + NewStream(ctx context.Context, id string, reqFormat string, reqBitRate int, reqSampleRate int, reqBitDepth int, reqChannels int, offset int) (*Stream, error) + DoStream(ctx context.Context, mf *model.MediaFile, reqFormat string, reqBitRate int, reqSampleRate int, reqBitDepth int, reqChannels int, reqOffset int) (*Stream, error) } type TranscodingCache cache.FileCache @@ -43,23 +43,24 @@ type streamJob struct { bitRate int sampleRate int bitDepth int + channels int offset int } func (j *streamJob) Key() string { - return fmt.Sprintf("%s.%s.%d.%d.%d.%s.%d", j.mf.ID, j.mf.UpdatedAt.Format(time.RFC3339Nano), j.bitRate, j.sampleRate, j.bitDepth, j.format, j.offset) + return fmt.Sprintf("%s.%s.%d.%d.%d.%d.%s.%d", j.mf.ID, j.mf.UpdatedAt.Format(time.RFC3339Nano), j.bitRate, j.sampleRate, j.bitDepth, j.channels, j.format, j.offset) } -func (ms *mediaStreamer) NewStream(ctx context.Context, id string, reqFormat string, reqBitRate int, reqSampleRate int, reqBitDepth int, reqOffset int) (*Stream, error) { +func (ms *mediaStreamer) NewStream(ctx context.Context, id string, reqFormat string, reqBitRate int, reqSampleRate int, reqBitDepth int, reqChannels int, reqOffset int) (*Stream, error) { mf, err := ms.ds.MediaFile(ctx).Get(id) if err != nil { return nil, err } - return ms.DoStream(ctx, mf, reqFormat, reqBitRate, reqSampleRate, reqBitDepth, reqOffset) + return ms.DoStream(ctx, mf, reqFormat, reqBitRate, reqSampleRate, reqBitDepth, reqChannels, reqOffset) } -func (ms *mediaStreamer) DoStream(ctx context.Context, mf *model.MediaFile, reqFormat string, reqBitRate int, reqSampleRate int, reqBitDepth int, reqOffset int) (*Stream, error) { +func (ms *mediaStreamer) DoStream(ctx context.Context, mf *model.MediaFile, reqFormat string, reqBitRate int, reqSampleRate int, reqBitDepth int, reqChannels int, reqOffset int) (*Stream, error) { var format string var bitRate int var cached bool @@ -96,6 +97,7 @@ func (ms *mediaStreamer) DoStream(ctx context.Context, mf *model.MediaFile, reqF bitRate: bitRate, sampleRate: reqSampleRate, bitDepth: reqBitDepth, + channels: reqChannels, offset: reqOffset, } r, err := ms.cache.Get(ctx, job) @@ -229,6 +231,7 @@ func NewTranscodingCache() TranscodingCache { BitRate: job.bitRate, SampleRate: job.sampleRate, BitDepth: job.bitDepth, + Channels: job.channels, Offset: job.offset, }) if err != nil { diff --git a/core/media_streamer_test.go b/core/media_streamer_test.go index 204f90ac0..700877df1 100644 --- a/core/media_streamer_test.go +++ b/core/media_streamer_test.go @@ -39,34 +39,34 @@ var _ = Describe("MediaStreamer", func() { Context("NewStream", func() { It("returns a seekable stream if format is 'raw'", func() { - s, err := streamer.NewStream(ctx, "123", "raw", 0, 0, 0, 0) + s, err := streamer.NewStream(ctx, "123", "raw", 0, 0, 0, 0, 0) Expect(err).ToNot(HaveOccurred()) Expect(s.Seekable()).To(BeTrue()) }) It("returns a seekable stream if maxBitRate is 0", func() { - s, err := streamer.NewStream(ctx, "123", "mp3", 0, 0, 0, 0) + s, err := streamer.NewStream(ctx, "123", "mp3", 0, 0, 0, 0, 0) Expect(err).ToNot(HaveOccurred()) Expect(s.Seekable()).To(BeTrue()) }) It("returns a seekable stream if maxBitRate is higher than file bitRate", func() { - s, err := streamer.NewStream(ctx, "123", "mp3", 320, 0, 0, 0) + s, err := streamer.NewStream(ctx, "123", "mp3", 320, 0, 0, 0, 0) Expect(err).ToNot(HaveOccurred()) Expect(s.Seekable()).To(BeTrue()) }) It("returns a NON seekable stream if transcode is required", func() { - s, err := streamer.NewStream(ctx, "123", "mp3", 64, 0, 0, 0) + s, err := streamer.NewStream(ctx, "123", "mp3", 64, 0, 0, 0, 0) Expect(err).To(BeNil()) Expect(s.Seekable()).To(BeFalse()) Expect(s.Duration()).To(Equal(float32(257.0))) }) It("returns a seekable stream if the file is complete in the cache", func() { - s, err := streamer.NewStream(ctx, "123", "mp3", 32, 0, 0, 0) + s, err := streamer.NewStream(ctx, "123", "mp3", 32, 0, 0, 0, 0) Expect(err).To(BeNil()) _, _ = io.ReadAll(s) _ = s.Close() Eventually(func() bool { return ffmpeg.IsClosed() }, "3s").Should(BeTrue()) - s, err = streamer.NewStream(ctx, "123", "mp3", 32, 0, 0, 0) + s, err = streamer.NewStream(ctx, "123", "mp3", 32, 0, 0, 0, 0) Expect(err).To(BeNil()) Expect(s.Seekable()).To(BeTrue()) }) diff --git a/core/transcode/transcode.go b/core/transcode/transcode.go index 01098eeb3..98f652a0b 100644 --- a/core/transcode/transcode.go +++ b/core/transcode/transcode.go @@ -334,9 +334,9 @@ func (s *deciderService) computeTranscodedStream(ctx context.Context, mf *model. ts := &StreamDetails{ Container: responseContainer, Codec: strings.ToLower(profile.AudioCodec), - SampleRate: dsdToPCMSampleRate(mf.SampleRate, mf.AudioCodec()), + SampleRate: normalizeSourceSampleRate(mf.SampleRate, mf.AudioCodec()), Channels: mf.Channels, - BitDepth: mf.BitDepth, + BitDepth: normalizeSourceBitDepth(mf.BitDepth, mf.AudioCodec()), IsLossless: targetIsLossless, } if ts.Codec == "" { @@ -750,16 +750,27 @@ func isLosslessFormat(format string) bool { return false } -// dsdToPCMSampleRate converts a DSD sample rate to its PCM-equivalent rate (÷8). +// normalizeSourceSampleRate adjusts the source sample rate for codecs that store +// it differently than PCM. Currently handles DSD (÷8): // DSD64=2822400→352800, DSD128=5644800→705600, etc. -// For non-DSD codecs, returns the rate unchanged. -func dsdToPCMSampleRate(sampleRate int, codec string) int { +// For other codecs, returns the rate unchanged. +func normalizeSourceSampleRate(sampleRate int, codec string) int { if strings.EqualFold(codec, "dsd") && sampleRate > 0 { return sampleRate / 8 } return sampleRate } +// normalizeSourceBitDepth adjusts the source bit depth for codecs that use +// non-standard bit depths. Currently handles DSD (1-bit → 24-bit PCM, which is +// what ffmpeg produces). For other codecs, returns the depth unchanged. +func normalizeSourceBitDepth(bitDepth int, codec string) int { + if strings.EqualFold(codec, "dsd") && bitDepth == 1 { + return 24 + } + return bitDepth +} + // codecFixedOutputSampleRate returns the mandatory output sample rate for codecs // that always resample regardless of input (e.g., Opus always outputs 48000Hz). // Returns 0 if the codec has no fixed output rate. diff --git a/core/transcode/transcode_test.go b/core/transcode/transcode_test.go index a2ccba2e6..0ee3aba77 100644 --- a/core/transcode/transcode_test.go +++ b/core/transcode/transcode_test.go @@ -621,6 +621,9 @@ var _ = Describe("Decider", func() { // DSD64 2822400 / 8 = 352800, capped by MP3 max of 48000 Expect(decision.TranscodeStream.SampleRate).To(Equal(48000)) Expect(decision.TargetSampleRate).To(Equal(48000)) + // DSD 1-bit → 24-bit PCM + Expect(decision.TranscodeStream.BitDepth).To(Equal(24)) + Expect(decision.TargetBitDepth).To(Equal(24)) }) It("converts DSD sample rate for FLAC target without codec limit", func() { @@ -637,6 +640,9 @@ var _ = Describe("Decider", func() { // DSD64 2822400 / 8 = 352800, FLAC has no hard max Expect(decision.TranscodeStream.SampleRate).To(Equal(352800)) Expect(decision.TargetSampleRate).To(Equal(352800)) + // DSD 1-bit → 24-bit PCM + Expect(decision.TranscodeStream.BitDepth).To(Equal(24)) + Expect(decision.TargetBitDepth).To(Equal(24)) }) It("applies codec profile limit to DSD-converted FLAC sample rate", func() { @@ -661,6 +667,33 @@ var _ = Describe("Decider", func() { // DSD64 2822400 / 8 = 352800, capped by codec profile limit of 48000 Expect(decision.TranscodeStream.SampleRate).To(Equal(48000)) Expect(decision.TargetSampleRate).To(Equal(48000)) + // DSD 1-bit → 24-bit PCM + Expect(decision.TranscodeStream.BitDepth).To(Equal(24)) + Expect(decision.TargetBitDepth).To(Equal(24)) + }) + + It("applies audioBitdepth limitation to DSD-converted bit depth", func() { + mf := &model.MediaFile{ID: "1", Suffix: "dsf", Codec: "DSD", BitRate: 5644, Channels: 2, SampleRate: 2822400, BitDepth: 1} + ci := &ClientInfo{ + TranscodingProfiles: []Profile{ + {Container: "flac", AudioCodec: "flac", Protocol: "http"}, + }, + CodecProfiles: []CodecProfile{ + { + Type: CodecProfileTypeAudio, + Name: "flac", + Limitations: []Limitation{ + {Name: LimitationAudioBitdepth, Comparison: ComparisonLessThanEqual, Values: []string{"16"}, Required: true}, + }, + }, + }, + } + decision, err := svc.MakeDecision(ctx, mf, ci) + Expect(err).ToNot(HaveOccurred()) + Expect(decision.CanTranscode).To(BeTrue()) + // DSD 1-bit → 24-bit PCM, then capped by codec profile limit to 16-bit + Expect(decision.TranscodeStream.BitDepth).To(Equal(16)) + Expect(decision.TargetBitDepth).To(Equal(16)) }) }) diff --git a/server/public/handle_streams.go b/server/public/handle_streams.go index 7ceca76e1..c6ab66307 100644 --- a/server/public/handle_streams.go +++ b/server/public/handle_streams.go @@ -24,7 +24,7 @@ func (pub *Router) handleStream(w http.ResponseWriter, r *http.Request) { return } - stream, err := pub.streamer.NewStream(ctx, info.id, info.format, info.bitrate, 0, 0, 0) + stream, err := pub.streamer.NewStream(ctx, info.id, info.format, info.bitrate, 0, 0, 0, 0) if err != nil { log.Error(ctx, "Error starting shared stream", err) http.Error(w, "invalid request", http.StatusInternalServerError) diff --git a/server/subsonic/stream.go b/server/subsonic/stream.go index 64a61e57a..e7b5701ce 100644 --- a/server/subsonic/stream.go +++ b/server/subsonic/stream.go @@ -60,7 +60,7 @@ func (api *Router) Stream(w http.ResponseWriter, r *http.Request) (*responses.Su format, _ := p.String("format") timeOffset := p.IntOr("timeOffset", 0) - stream, err := api.streamer.NewStream(ctx, id, format, maxBitRate, 0, 0, timeOffset) + stream, err := api.streamer.NewStream(ctx, id, format, maxBitRate, 0, 0, 0, timeOffset) if err != nil { return nil, err } @@ -129,7 +129,7 @@ func (api *Router) Download(w http.ResponseWriter, r *http.Request) (*responses. switch v := entity.(type) { case *model.MediaFile: - stream, err := api.streamer.NewStream(ctx, id, format, maxBitRate, 0, 0, 0) + stream, err := api.streamer.NewStream(ctx, id, format, maxBitRate, 0, 0, 0, 0) if err != nil { return nil, err } diff --git a/server/subsonic/transcode.go b/server/subsonic/transcode.go index 64a097794..81a927d8c 100644 --- a/server/subsonic/transcode.go +++ b/server/subsonic/transcode.go @@ -325,18 +325,20 @@ func (api *Router) GetTranscodeStream(w http.ResponseWriter, r *http.Request) (* maxBitRate := 0 sampleRate := 0 bitDepth := 0 + channels := 0 if !params.DirectPlay && params.TargetFormat != "" { format = params.TargetFormat maxBitRate = params.TargetBitrate // Already in kbps, matching the streamer sampleRate = params.TargetSampleRate bitDepth = params.TargetBitDepth + channels = params.TargetChannels } // Get offset parameter offset := p.IntOr("offset", 0) // Create stream - stream, err := api.streamer.NewStream(ctx, mediaID, format, maxBitRate, sampleRate, bitDepth, offset) + stream, err := api.streamer.NewStream(ctx, mediaID, format, maxBitRate, sampleRate, bitDepth, channels, offset) if err != nil { return nil, err }