From 719fa5dc992185698243ecdd6d8133b4440cc96d Mon Sep 17 00:00:00 2001 From: ranokay Date: Tue, 26 May 2026 19:59:41 +0300 Subject: [PATCH] feat: parse embedded rich lyrics --- README.md | 2 +- core/lyrics/embedded.go | 64 +++++++++++ core/lyrics/embedded_test.go | 157 +++++++++++++++++++++++++++ core/lyrics/srt.go | 6 +- core/lyrics/ttml.go | 6 +- model/metadata/map_mediafile.go | 9 +- model/metadata/map_mediafile_test.go | 45 ++++++++ 7 files changed, 283 insertions(+), 6 deletions(-) create mode 100644 core/lyrics/embedded.go create mode 100644 core/lyrics/embedded_test.go diff --git a/README.md b/README.md index 645f1580d..efecfdbcb 100644 --- a/README.md +++ b/README.md @@ -52,7 +52,7 @@ A share of the revenue helps fund the development of Navidrome at no additional - **Multi-platform**, runs on macOS, Linux and Windows. **Docker** images are also provided - Ready to use binaries for all major platforms, including **Raspberry Pi** - Automatically **monitors your library** for changes, importing new files and reloading new metadata - - Supports lyrics from sidecar **.ttml**, **.elrc**, **.lrc**, **.srt**, **.txt** files and embedded tags (via `lyricspriority`) + - Supports lyrics from sidecar **.ttml**, **.elrc**, **.lrc**, **.srt**, **.txt** files and embedded **TTML**, **Enhanced LRC**, **LRC**, **SRT**, and plain-text tags (via `lyricspriority`) - **Themeable**, modern and responsive **Web interface** based on [Material UI](https://material-ui.com) - **Compatible** with all Subsonic/Madsonic/Airsonic [clients](https://www.navidrome.org/docs/overview/#apps) - **Transcoding** on the fly. Can be set per user/player. **Opus encoding is supported** diff --git a/core/lyrics/embedded.go b/core/lyrics/embedded.go new file mode 100644 index 000000000..4cdd898ee --- /dev/null +++ b/core/lyrics/embedded.go @@ -0,0 +1,64 @@ +package lyrics + +import ( + "encoding/xml" + "strings" + + "github.com/navidrome/navidrome/log" + "github.com/navidrome/navidrome/model" +) + +// ParseEmbedded parses lyrics read from media-file metadata tags. It detects rich +// payloads before falling back to the generic LRC/plain-text parser, because +// text sanitization would otherwise strip TTML XML markup. +func ParseEmbedded(language, text string) (model.LyricList, error) { + text = strings.TrimPrefix(text, "\ufeff") + + if isTTMLDocument(text) { + list, err := parseTTMLWithDefaultLang([]byte(text), language) + if err == nil && len(list) > 0 { + return list, nil + } + if err != nil { + log.Warn("Error parsing embedded TTML lyrics, falling back to plain lyrics", "error", err) + } + } + + list, err := parseSRTWithLanguage([]byte(text), language) + if err == nil && len(list) > 0 { + return list, nil + } + if err != nil && strings.Contains(text, "-->") { + log.Warn("Error parsing embedded SRT lyrics, falling back to plain lyrics", "error", err) + } + + lyric, err := model.ToLyrics(language, text) + if err != nil { + return nil, err + } + if lyric == nil || lyric.IsEmpty() { + return nil, nil + } + return model.LyricList{*lyric}, nil +} + +func isTTMLDocument(text string) bool { + decoder := xml.NewDecoder(strings.NewReader(strings.TrimSpace(text))) + for { + token, err := decoder.Token() + if err != nil { + return false + } + if start, ok := token.(xml.StartElement); ok { + return strings.EqualFold(start.Name.Local, "tt") + } + } +} + +func normalizeEmbeddedLanguage(language string) string { + language = strings.ToLower(strings.TrimSpace(language)) + if language == "" { + return "xxx" + } + return language +} diff --git a/core/lyrics/embedded_test.go b/core/lyrics/embedded_test.go new file mode 100644 index 000000000..3243cf5d0 --- /dev/null +++ b/core/lyrics/embedded_test.go @@ -0,0 +1,157 @@ +package lyrics + +import ( + "strings" + + "github.com/navidrome/navidrome/model" + "github.com/navidrome/navidrome/utils/gg" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var _ = Describe("ParseEmbedded", func() { + It("should parse embedded TTML with the tag language as the default", func() { + content := ` + + + + Lead Vocal + + + + +
+

+ Hello world +

+
+ +
` + + list, err := ParseEmbedded("ENG", content) + + Expect(err).ToNot(HaveOccurred()) + Expect(list).To(HaveLen(1)) + Expect(list[0].Kind).To(Equal("main")) + Expect(list[0].Lang).To(Equal("eng")) + Expect(list[0].Synced).To(BeTrue()) + Expect(list[0].Agents).To(Equal([]model.Agent{{ID: "lead", Role: "main", Name: "Lead Vocal"}})) + Expect(list[0].Line).To(HaveLen(1)) + Expect(list[0].Line[0].Start).To(Equal(gg.P(int64(1000)))) + Expect(list[0].Line[0].End).To(Equal(gg.P(int64(3000)))) + Expect(list[0].Line[0].Value).To(Equal("Hello world")) + Expect(list[0].Line[0].Cue).To(HaveLen(2)) + Expect(list[0].Line[0].Cue[0].AgentID).To(Equal("lead")) + Expect(list[0].Line[0].Cue[0].ByteStart).To(Equal(0)) + Expect(list[0].Line[0].Cue[0].ByteEnd).To(Equal(5)) + Expect(list[0].Line[0].Cue[1].ByteStart).To(Equal(6)) + Expect(list[0].Line[0].Cue[1].ByteEnd).To(Equal(10)) + }) + + It("should preserve embedded TTML translation and pronunciation tracks", func() { + content := ` + + + + + + Hola + + + + + konni + + + + + + +
+

こんにちは

+
+ +
` + + list, err := ParseEmbedded("eng", content) + + Expect(err).ToNot(HaveOccurred()) + Expect(list).To(HaveLen(3)) + Expect(list[0].Kind).To(Equal("main")) + Expect(list[0].Lang).To(Equal("ja")) + Expect(list[0].Line[0].Value).To(Equal("こんにちは")) + Expect(list[1].Kind).To(Equal("translation")) + Expect(list[1].Lang).To(Equal("es")) + Expect(list[1].Line[0].Value).To(Equal("Hola")) + Expect(list[2].Kind).To(Equal("pronunciation")) + Expect(list[2].Lang).To(Equal("ja-latn")) + Expect(list[2].Line[0].Value).To(Equal("konni")) + Expect(list[2].Line[0].Cue).To(HaveLen(2)) + }) + + It("should parse embedded SRT with the tag language", func() { + content := `1 +00:00:18,800 --> 00:00:22,800 +We're from subtitles + +2 +00:00:22,801 --> 00:00:26,000 +Another subtitle line` + + list, err := ParseEmbedded("POR", content) + + Expect(err).ToNot(HaveOccurred()) + Expect(list).To(Equal(model.LyricList{ + { + Lang: "por", + Line: []model.Line{ + { + Start: gg.P(int64(18800)), + End: gg.P(int64(22800)), + Value: "We're from subtitles", + }, + { + Start: gg.P(int64(22801)), + End: gg.P(int64(26000)), + Value: "Another subtitle line", + }, + }, + Synced: true, + }, + })) + }) + + It("should keep embedded enhanced LRC cues", func() { + content := "[00:01.00]<00:01.00>Lead <00:01.50>words" + + list, err := ParseEmbedded("eng", content) + + Expect(err).ToNot(HaveOccurred()) + Expect(list).To(HaveLen(1)) + Expect(list[0].Lang).To(Equal("eng")) + Expect(list[0].Synced).To(BeTrue()) + Expect(list[0].Line[0].Value).To(Equal("Lead words")) + Expect(list[0].Line[0].Cue).To(HaveLen(2)) + }) + + It("should fall back to plain lyrics when embedded TTML is invalid", func() { + content := ` + +

Broken

+ +
` + + list, err := ParseEmbedded("eng", content) + + Expect(err).ToNot(HaveOccurred()) + Expect(list).To(HaveLen(1)) + Expect(list[0].Lang).To(Equal("eng")) + Expect(list[0].Synced).To(BeFalse()) + Expect(list[0].Line).ToNot(BeEmpty()) + values := make([]string, 0, len(list[0].Line)) + for _, line := range list[0].Line { + values = append(values, line.Value) + } + Expect(strings.Join(values, "\n")).To(ContainSubstring("Broken")) + }) +}) diff --git a/core/lyrics/srt.go b/core/lyrics/srt.go index 8fd77abb4..e16c405d5 100644 --- a/core/lyrics/srt.go +++ b/core/lyrics/srt.go @@ -13,6 +13,10 @@ import ( var srtTimeRegex = regexp.MustCompile(`^\s*(\d{1,2}):(\d{2}):(\d{2})[,.](\d{1,3})\s*$`) func parseSRT(contents []byte) (model.LyricList, error) { + return parseSRTWithLanguage(contents, "xxx") +} + +func parseSRTWithLanguage(contents []byte, language string) (model.LyricList, error) { raw := strings.ReplaceAll(string(contents), "\r\n", "\n") raw = strings.ReplaceAll(raw, "\r", "\n") @@ -34,7 +38,7 @@ func parseSRT(contents []byte) (model.LyricList, error) { } lyrics := model.NormalizeLyrics(model.Lyrics{ - Lang: "xxx", + Lang: normalizeEmbeddedLanguage(language), Line: lines, Synced: true, }) diff --git a/core/lyrics/ttml.go b/core/lyrics/ttml.go index 576d2ca3d..8df7f930a 100644 --- a/core/lyrics/ttml.go +++ b/core/lyrics/ttml.go @@ -106,6 +106,10 @@ type ttmlParser struct { } func parseTTML(contents []byte) (model.LyricList, error) { + return parseTTMLWithDefaultLang(contents, "xxx") +} + +func parseTTMLWithDefaultLang(contents []byte, defaultLang string) (model.LyricList, error) { contents = xmlEncodingRegex.ReplaceAll(contents, []byte(``)) p := ttmlParser{ @@ -122,7 +126,7 @@ func parseTTML(contents []byte) (model.LyricList, error) { definedAgents: make(map[string]ttmlDefinedAgent), } - root := ttmlTimingContext{lang: "xxx"} + root := ttmlTimingContext{lang: normalizeTTMLLang(defaultLang)} for { token, err := p.decoder.Token() diff --git a/model/metadata/map_mediafile.go b/model/metadata/map_mediafile.go index 824cad7c2..2fbf893ac 100644 --- a/model/metadata/map_mediafile.go +++ b/model/metadata/map_mediafile.go @@ -8,6 +8,7 @@ import ( "strconv" "github.com/navidrome/navidrome/conf" + lyricssvc "github.com/navidrome/navidrome/core/lyrics" "github.com/navidrome/navidrome/log" "github.com/navidrome/navidrome/model" "github.com/navidrome/navidrome/utils/str" @@ -137,13 +138,15 @@ func (md Metadata) mapLyrics() string { lang := raw.Key() text := raw.Value() - lyrics, err := model.ToLyrics(lang, text) + lyrics, err := lyricssvc.ParseEmbedded(lang, text) if err != nil { log.Warn("Unexpected failure occurred when parsing lyrics", "file", md.filePath, err) continue } - if !lyrics.IsEmpty() { - lyricList = append(lyricList, *lyrics) + for _, lyric := range lyrics { + if !lyric.IsEmpty() { + lyricList = append(lyricList, lyric) + } } } diff --git a/model/metadata/map_mediafile_test.go b/model/metadata/map_mediafile_test.go index 16142f526..15565111a 100644 --- a/model/metadata/map_mediafile_test.go +++ b/model/metadata/map_mediafile_test.go @@ -116,5 +116,50 @@ var _ = Describe("ToMediaFile", func() { sort.Slice(expected, func(i, j int) bool { return expected[i].Lang < expected[j].Lang }) Expect(actual).To(Equal(expected)) }) + + It("should parse embedded TTML lyrics before sanitizing XML tags", func() { + mf = toMediaFile(model.RawTags{ + "LYRICS:ENG": {` + +
+

Embedded TTML line

+
+ +
`}, + }) + var actual model.LyricList + err := json.Unmarshal([]byte(mf.Lyrics), &actual) + Expect(err).ToNot(HaveOccurred()) + + Expect(actual).To(Equal(model.LyricList{ + { + Kind: "main", + Lang: "eng", + Line: []model.Line{{Start: P(int64(1000)), End: P(int64(2500)), Value: "Embedded TTML line"}}, + Synced: true, + }, + })) + }) + + It("should parse embedded SRT lyrics with the tag language", func() { + mf = toMediaFile(model.RawTags{ + "LYRICS:POR": {`1 +00:00:18,800 --> 00:00:22,800 +Estamos nas legendas`}, + }) + var actual model.LyricList + err := json.Unmarshal([]byte(mf.Lyrics), &actual) + Expect(err).ToNot(HaveOccurred()) + + Expect(actual).To(Equal(model.LyricList{ + { + Lang: "por", + Line: []model.Line{ + {Start: P(int64(18800)), End: P(int64(22800)), Value: "Estamos nas legendas"}, + }, + Synced: true, + }, + })) + }) }) })