mirror of
https://github.com/navidrome/navidrome.git
synced 2026-06-02 07:01:36 +00:00
feat: parse embedded rich lyrics
This commit is contained in:
parent
e9f969e3d2
commit
719fa5dc99
@ -52,7 +52,7 @@ A share of the revenue helps fund the development of Navidrome at no additional
|
||||
- **Multi-platform**, runs on macOS, Linux and Windows. **Docker** images are also provided
|
||||
- Ready to use binaries for all major platforms, including **Raspberry Pi**
|
||||
- Automatically **monitors your library** for changes, importing new files and reloading new metadata
|
||||
- Supports lyrics from sidecar **.ttml**, **.elrc**, **.lrc**, **.srt**, **.txt** files and embedded tags (via `lyricspriority`)
|
||||
- Supports lyrics from sidecar **.ttml**, **.elrc**, **.lrc**, **.srt**, **.txt** files and embedded **TTML**, **Enhanced LRC**, **LRC**, **SRT**, and plain-text tags (via `lyricspriority`)
|
||||
- **Themeable**, modern and responsive **Web interface** based on [Material UI](https://material-ui.com)
|
||||
- **Compatible** with all Subsonic/Madsonic/Airsonic [clients](https://www.navidrome.org/docs/overview/#apps)
|
||||
- **Transcoding** on the fly. Can be set per user/player. **Opus encoding is supported**
|
||||
|
||||
64
core/lyrics/embedded.go
Normal file
64
core/lyrics/embedded.go
Normal file
@ -0,0 +1,64 @@
|
||||
package lyrics
|
||||
|
||||
import (
|
||||
"encoding/xml"
|
||||
"strings"
|
||||
|
||||
"github.com/navidrome/navidrome/log"
|
||||
"github.com/navidrome/navidrome/model"
|
||||
)
|
||||
|
||||
// ParseEmbedded parses lyrics read from media-file metadata tags. It detects rich
|
||||
// payloads before falling back to the generic LRC/plain-text parser, because
|
||||
// text sanitization would otherwise strip TTML XML markup.
|
||||
func ParseEmbedded(language, text string) (model.LyricList, error) {
|
||||
text = strings.TrimPrefix(text, "\ufeff")
|
||||
|
||||
if isTTMLDocument(text) {
|
||||
list, err := parseTTMLWithDefaultLang([]byte(text), language)
|
||||
if err == nil && len(list) > 0 {
|
||||
return list, nil
|
||||
}
|
||||
if err != nil {
|
||||
log.Warn("Error parsing embedded TTML lyrics, falling back to plain lyrics", "error", err)
|
||||
}
|
||||
}
|
||||
|
||||
list, err := parseSRTWithLanguage([]byte(text), language)
|
||||
if err == nil && len(list) > 0 {
|
||||
return list, nil
|
||||
}
|
||||
if err != nil && strings.Contains(text, "-->") {
|
||||
log.Warn("Error parsing embedded SRT lyrics, falling back to plain lyrics", "error", err)
|
||||
}
|
||||
|
||||
lyric, err := model.ToLyrics(language, text)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if lyric == nil || lyric.IsEmpty() {
|
||||
return nil, nil
|
||||
}
|
||||
return model.LyricList{*lyric}, nil
|
||||
}
|
||||
|
||||
func isTTMLDocument(text string) bool {
|
||||
decoder := xml.NewDecoder(strings.NewReader(strings.TrimSpace(text)))
|
||||
for {
|
||||
token, err := decoder.Token()
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
if start, ok := token.(xml.StartElement); ok {
|
||||
return strings.EqualFold(start.Name.Local, "tt")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func normalizeEmbeddedLanguage(language string) string {
|
||||
language = strings.ToLower(strings.TrimSpace(language))
|
||||
if language == "" {
|
||||
return "xxx"
|
||||
}
|
||||
return language
|
||||
}
|
||||
157
core/lyrics/embedded_test.go
Normal file
157
core/lyrics/embedded_test.go
Normal file
@ -0,0 +1,157 @@
|
||||
package lyrics
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"github.com/navidrome/navidrome/model"
|
||||
"github.com/navidrome/navidrome/utils/gg"
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
var _ = Describe("ParseEmbedded", func() {
|
||||
It("should parse embedded TTML with the tag language as the default", func() {
|
||||
content := `<tt xmlns="http://www.w3.org/ns/ttml" xmlns:ttm="http://www.w3.org/ns/ttml#metadata">
|
||||
<head>
|
||||
<metadata>
|
||||
<ttm:agent xml:id="lead" ttm:type="person">
|
||||
<ttm:name>Lead Vocal</ttm:name>
|
||||
</ttm:agent>
|
||||
</metadata>
|
||||
</head>
|
||||
<body>
|
||||
<div>
|
||||
<p begin="00:00:01.000" end="00:00:03.000">
|
||||
<span begin="00:00:01.000" end="00:00:02.000" ttm:agent="lead">Hello </span><span begin="00:00:02.000" end="00:00:03.000" ttm:agent="lead">world</span>
|
||||
</p>
|
||||
</div>
|
||||
</body>
|
||||
</tt>`
|
||||
|
||||
list, err := ParseEmbedded("ENG", content)
|
||||
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(list).To(HaveLen(1))
|
||||
Expect(list[0].Kind).To(Equal("main"))
|
||||
Expect(list[0].Lang).To(Equal("eng"))
|
||||
Expect(list[0].Synced).To(BeTrue())
|
||||
Expect(list[0].Agents).To(Equal([]model.Agent{{ID: "lead", Role: "main", Name: "Lead Vocal"}}))
|
||||
Expect(list[0].Line).To(HaveLen(1))
|
||||
Expect(list[0].Line[0].Start).To(Equal(gg.P(int64(1000))))
|
||||
Expect(list[0].Line[0].End).To(Equal(gg.P(int64(3000))))
|
||||
Expect(list[0].Line[0].Value).To(Equal("Hello world"))
|
||||
Expect(list[0].Line[0].Cue).To(HaveLen(2))
|
||||
Expect(list[0].Line[0].Cue[0].AgentID).To(Equal("lead"))
|
||||
Expect(list[0].Line[0].Cue[0].ByteStart).To(Equal(0))
|
||||
Expect(list[0].Line[0].Cue[0].ByteEnd).To(Equal(5))
|
||||
Expect(list[0].Line[0].Cue[1].ByteStart).To(Equal(6))
|
||||
Expect(list[0].Line[0].Cue[1].ByteEnd).To(Equal(10))
|
||||
})
|
||||
|
||||
It("should preserve embedded TTML translation and pronunciation tracks", func() {
|
||||
content := `<tt xmlns="http://www.w3.org/ns/ttml" xmlns:itunes="http://music.apple.com/lyric-ttml-internal">
|
||||
<head>
|
||||
<metadata>
|
||||
<iTunesMetadata xmlns="http://music.apple.com/lyric-ttml-internal">
|
||||
<translations>
|
||||
<translation xml:lang="es">
|
||||
<text for="L1">Hola</text>
|
||||
</translation>
|
||||
</translations>
|
||||
<transliterations>
|
||||
<transliteration xml:lang="ja-Latn">
|
||||
<text for="L1"><span begin="00:00:01.000" end="00:00:01.300" xmlns="http://www.w3.org/ns/ttml">ko</span><span begin="00:00:01.300" end="00:00:01.600" xmlns="http://www.w3.org/ns/ttml">nni</span></text>
|
||||
</transliteration>
|
||||
</transliterations>
|
||||
</iTunesMetadata>
|
||||
</metadata>
|
||||
</head>
|
||||
<body xml:lang="ja">
|
||||
<div>
|
||||
<p begin="00:00:01.000" end="00:00:02.000" itunes:key="L1">こんにちは</p>
|
||||
</div>
|
||||
</body>
|
||||
</tt>`
|
||||
|
||||
list, err := ParseEmbedded("eng", content)
|
||||
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(list).To(HaveLen(3))
|
||||
Expect(list[0].Kind).To(Equal("main"))
|
||||
Expect(list[0].Lang).To(Equal("ja"))
|
||||
Expect(list[0].Line[0].Value).To(Equal("こんにちは"))
|
||||
Expect(list[1].Kind).To(Equal("translation"))
|
||||
Expect(list[1].Lang).To(Equal("es"))
|
||||
Expect(list[1].Line[0].Value).To(Equal("Hola"))
|
||||
Expect(list[2].Kind).To(Equal("pronunciation"))
|
||||
Expect(list[2].Lang).To(Equal("ja-latn"))
|
||||
Expect(list[2].Line[0].Value).To(Equal("konni"))
|
||||
Expect(list[2].Line[0].Cue).To(HaveLen(2))
|
||||
})
|
||||
|
||||
It("should parse embedded SRT with the tag language", func() {
|
||||
content := `1
|
||||
00:00:18,800 --> 00:00:22,800
|
||||
We're from subtitles
|
||||
|
||||
2
|
||||
00:00:22,801 --> 00:00:26,000
|
||||
Another subtitle line`
|
||||
|
||||
list, err := ParseEmbedded("POR", content)
|
||||
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(list).To(Equal(model.LyricList{
|
||||
{
|
||||
Lang: "por",
|
||||
Line: []model.Line{
|
||||
{
|
||||
Start: gg.P(int64(18800)),
|
||||
End: gg.P(int64(22800)),
|
||||
Value: "We're from subtitles",
|
||||
},
|
||||
{
|
||||
Start: gg.P(int64(22801)),
|
||||
End: gg.P(int64(26000)),
|
||||
Value: "Another subtitle line",
|
||||
},
|
||||
},
|
||||
Synced: true,
|
||||
},
|
||||
}))
|
||||
})
|
||||
|
||||
It("should keep embedded enhanced LRC cues", func() {
|
||||
content := "[00:01.00]<00:01.00>Lead <00:01.50>words"
|
||||
|
||||
list, err := ParseEmbedded("eng", content)
|
||||
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(list).To(HaveLen(1))
|
||||
Expect(list[0].Lang).To(Equal("eng"))
|
||||
Expect(list[0].Synced).To(BeTrue())
|
||||
Expect(list[0].Line[0].Value).To(Equal("Lead words"))
|
||||
Expect(list[0].Line[0].Cue).To(HaveLen(2))
|
||||
})
|
||||
|
||||
It("should fall back to plain lyrics when embedded TTML is invalid", func() {
|
||||
content := `<tt xmlns="http://www.w3.org/ns/ttml">
|
||||
<body>
|
||||
<p begin="not-a-time">Broken</p>
|
||||
</body>
|
||||
</tt>`
|
||||
|
||||
list, err := ParseEmbedded("eng", content)
|
||||
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(list).To(HaveLen(1))
|
||||
Expect(list[0].Lang).To(Equal("eng"))
|
||||
Expect(list[0].Synced).To(BeFalse())
|
||||
Expect(list[0].Line).ToNot(BeEmpty())
|
||||
values := make([]string, 0, len(list[0].Line))
|
||||
for _, line := range list[0].Line {
|
||||
values = append(values, line.Value)
|
||||
}
|
||||
Expect(strings.Join(values, "\n")).To(ContainSubstring("Broken"))
|
||||
})
|
||||
})
|
||||
@ -13,6 +13,10 @@ import (
|
||||
var srtTimeRegex = regexp.MustCompile(`^\s*(\d{1,2}):(\d{2}):(\d{2})[,.](\d{1,3})\s*$`)
|
||||
|
||||
func parseSRT(contents []byte) (model.LyricList, error) {
|
||||
return parseSRTWithLanguage(contents, "xxx")
|
||||
}
|
||||
|
||||
func parseSRTWithLanguage(contents []byte, language string) (model.LyricList, error) {
|
||||
raw := strings.ReplaceAll(string(contents), "\r\n", "\n")
|
||||
raw = strings.ReplaceAll(raw, "\r", "\n")
|
||||
|
||||
@ -34,7 +38,7 @@ func parseSRT(contents []byte) (model.LyricList, error) {
|
||||
}
|
||||
|
||||
lyrics := model.NormalizeLyrics(model.Lyrics{
|
||||
Lang: "xxx",
|
||||
Lang: normalizeEmbeddedLanguage(language),
|
||||
Line: lines,
|
||||
Synced: true,
|
||||
})
|
||||
|
||||
@ -106,6 +106,10 @@ type ttmlParser struct {
|
||||
}
|
||||
|
||||
func parseTTML(contents []byte) (model.LyricList, error) {
|
||||
return parseTTMLWithDefaultLang(contents, "xxx")
|
||||
}
|
||||
|
||||
func parseTTMLWithDefaultLang(contents []byte, defaultLang string) (model.LyricList, error) {
|
||||
contents = xmlEncodingRegex.ReplaceAll(contents, []byte(`<?xml$1encoding="UTF-8"$2?>`))
|
||||
|
||||
p := ttmlParser{
|
||||
@ -122,7 +126,7 @@ func parseTTML(contents []byte) (model.LyricList, error) {
|
||||
definedAgents: make(map[string]ttmlDefinedAgent),
|
||||
}
|
||||
|
||||
root := ttmlTimingContext{lang: "xxx"}
|
||||
root := ttmlTimingContext{lang: normalizeTTMLLang(defaultLang)}
|
||||
|
||||
for {
|
||||
token, err := p.decoder.Token()
|
||||
|
||||
@ -8,6 +8,7 @@ import (
|
||||
"strconv"
|
||||
|
||||
"github.com/navidrome/navidrome/conf"
|
||||
lyricssvc "github.com/navidrome/navidrome/core/lyrics"
|
||||
"github.com/navidrome/navidrome/log"
|
||||
"github.com/navidrome/navidrome/model"
|
||||
"github.com/navidrome/navidrome/utils/str"
|
||||
@ -137,13 +138,15 @@ func (md Metadata) mapLyrics() string {
|
||||
lang := raw.Key()
|
||||
text := raw.Value()
|
||||
|
||||
lyrics, err := model.ToLyrics(lang, text)
|
||||
lyrics, err := lyricssvc.ParseEmbedded(lang, text)
|
||||
if err != nil {
|
||||
log.Warn("Unexpected failure occurred when parsing lyrics", "file", md.filePath, err)
|
||||
continue
|
||||
}
|
||||
if !lyrics.IsEmpty() {
|
||||
lyricList = append(lyricList, *lyrics)
|
||||
for _, lyric := range lyrics {
|
||||
if !lyric.IsEmpty() {
|
||||
lyricList = append(lyricList, lyric)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -116,5 +116,50 @@ var _ = Describe("ToMediaFile", func() {
|
||||
sort.Slice(expected, func(i, j int) bool { return expected[i].Lang < expected[j].Lang })
|
||||
Expect(actual).To(Equal(expected))
|
||||
})
|
||||
|
||||
It("should parse embedded TTML lyrics before sanitizing XML tags", func() {
|
||||
mf = toMediaFile(model.RawTags{
|
||||
"LYRICS:ENG": {`<tt xmlns="http://www.w3.org/ns/ttml">
|
||||
<body>
|
||||
<div>
|
||||
<p begin="00:00:01.000" end="00:00:02.500">Embedded TTML line</p>
|
||||
</div>
|
||||
</body>
|
||||
</tt>`},
|
||||
})
|
||||
var actual model.LyricList
|
||||
err := json.Unmarshal([]byte(mf.Lyrics), &actual)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
Expect(actual).To(Equal(model.LyricList{
|
||||
{
|
||||
Kind: "main",
|
||||
Lang: "eng",
|
||||
Line: []model.Line{{Start: P(int64(1000)), End: P(int64(2500)), Value: "Embedded TTML line"}},
|
||||
Synced: true,
|
||||
},
|
||||
}))
|
||||
})
|
||||
|
||||
It("should parse embedded SRT lyrics with the tag language", func() {
|
||||
mf = toMediaFile(model.RawTags{
|
||||
"LYRICS:POR": {`1
|
||||
00:00:18,800 --> 00:00:22,800
|
||||
Estamos nas legendas`},
|
||||
})
|
||||
var actual model.LyricList
|
||||
err := json.Unmarshal([]byte(mf.Lyrics), &actual)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
Expect(actual).To(Equal(model.LyricList{
|
||||
{
|
||||
Lang: "por",
|
||||
Line: []model.Line{
|
||||
{Start: P(int64(18800)), End: P(int64(22800)), Value: "Estamos nas legendas"},
|
||||
},
|
||||
Synced: true,
|
||||
},
|
||||
}))
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user