feat: add TTML lyrics support with token-level karaoke and translation/pronunciation layers

Add a full TTML (Timed Text Markup Language) sidecar lyrics parser that extracts
word/syllable-level timing from <span> elements, plus translation and pronunciation
(transliteration) tracks from Apple Music TTML metadata sections.

Backend changes:
- TTML parser (core/lyrics/ttml.go) with support for all TTML time formats,
  nested timing contexts, and bare decimal second offsets
- Translation/pronunciation tracks resolved via key-based metadata linking
- Line timing hydration from token-level start/end values
- 'kind' field added to Lyrics model and StructuredLyric API response
  (main/translation/pronunciation)
- 'tokenLine' array in API response for word-level timing data
- UTF-8 BOM and UTF-16 LE encoding support for TTML files
- Fix for ambiguous time resolution in pronunciation spans (pre-1-minute)

Frontend changes:
- KaraokeLyricsOverlay rewritten with scrollable multi-line layout,
  word-level wipe highlighting with eased alpha transitions,
  rAF-driven playback clock with drift correction
- Inline translation (above) and pronunciation (below) each main line,
  with smart filtering to hide redundant lines (same normalized text)
- TR/PR toggle buttons and layer selection via selectLyricLayers()
- Click-to-seek: click any lyric line to jump to that position
- Customization popover with font-size sliders and color presets
  for each line type (TR/Default/PR), persisted to localStorage
- Smooth font-size transition between active and inactive lines
- Resizable overlay height via drag handle
- lyrics.js: resolveKaraokeTokenWindow, buildSyntheticWordTokens,
  findLayerLineIndexForMain, token sorting, collapsed timing detection

API extension (non-breaking, additive):
- tokenLine[].token[] provides per-word start/end timing (ms)
- tokenLine[].index maps back to the corresponding line[] entry
- kind field: 'main', 'translation', 'pronunciation'
- Clients ignoring tokenLine/kind continue to work unchanged
This commit is contained in:
ranokay 2026-02-20 16:54:45 +02:00
parent ccee33f474
commit c77e0de976
No known key found for this signature in database
30 changed files with 4644 additions and 59 deletions

View File

@ -52,6 +52,7 @@ A share of the revenue helps fund the development of Navidrome at no additional
- **Multi-platform**, runs on macOS, Linux and Windows. **Docker** images are also provided - **Multi-platform**, runs on macOS, Linux and Windows. **Docker** images are also provided
- Ready to use binaries for all major platforms, including **Raspberry Pi** - Ready to use binaries for all major platforms, including **Raspberry Pi**
- Automatically **monitors your library** for changes, importing new files and reloading new metadata - Automatically **monitors your library** for changes, importing new files and reloading new metadata
- Supports synchronized lyrics from sidecar **.lrc** and **.ttml** files (via `lyricspriority`)
- **Themeable**, modern and responsive **Web interface** based on [Material UI](https://material-ui.com) - **Themeable**, modern and responsive **Web interface** based on [Material UI](https://material-ui.com)
- **Compatible** with all Subsonic/Madsonic/Airsonic [clients](https://www.navidrome.org/docs/overview/#apps) - **Compatible** with all Subsonic/Madsonic/Airsonic [clients](https://www.navidrome.org/docs/overview/#apps)
- **Transcoding** on the fly. Can be set per user/player. **Opus encoding is supported** - **Transcoding** on the fly. Can be set per user/player. **Opus encoding is supported**

View File

@ -677,7 +677,7 @@ func setViperDefaults() {
viper.SetDefault("coverartquality", 75) viper.SetDefault("coverartquality", 75)
viper.SetDefault("artistartpriority", "artist.*, album/artist.*, external") viper.SetDefault("artistartpriority", "artist.*, album/artist.*, external")
viper.SetDefault("discartpriority", "disc*.*, cd*.*, cover.*, folder.*, front.*, discsubtitle, embedded") viper.SetDefault("discartpriority", "disc*.*, cd*.*, cover.*, folder.*, front.*, discsubtitle, embedded")
viper.SetDefault("lyricspriority", ".lrc,.txt,embedded") viper.SetDefault("lyricspriority", ".lrc,.ttml,.txt,embedded")
viper.SetDefault("enablegravatar", false) viper.SetDefault("enablegravatar", false)
viper.SetDefault("enablefavourites", true) viper.SetDefault("enablefavourites", true)
viper.SetDefault("enablestarrating", true) viper.SetDefault("enablestarrating", true)

View File

@ -44,6 +44,35 @@ var _ = Describe("sources", func() {
}, },
} }
ttmlLyrics := model.LyricList{
model.Lyrics{
Kind: "main",
Lang: "eng",
Line: []model.Line{
{
Start: gg.P(int64(18800)),
Value: "We're no strangers to love",
},
{
Start: gg.P(int64(22800)),
Value: "You know the rules and so do I",
},
},
Synced: true,
},
model.Lyrics{
Kind: "main",
Lang: "por",
Line: []model.Line{
{
Start: gg.P(int64(18800)),
Value: "Nao somos estranhos ao amor",
},
},
Synced: true,
},
}
unsyncedLyrics := model.LyricList{ unsyncedLyrics := model.LyricList{
model.Lyrics{ model.Lyrics{
Lang: "xxx", Lang: "xxx",
@ -80,7 +109,8 @@ var _ = Describe("sources", func() {
}, },
Entry("embedded > lrc > txt", "embedded,.lrc,.txt", embeddedLyrics), Entry("embedded > lrc > txt", "embedded,.lrc,.txt", embeddedLyrics),
Entry("lrc > embedded > txt", ".lrc,embedded,.txt", syncedLyrics), Entry("lrc > embedded > txt", ".lrc,embedded,.txt", syncedLyrics),
Entry("txt > lrc > embedded", ".txt,.lrc,embedded", unsyncedLyrics)) Entry("txt > lrc > embedded", ".txt,.lrc,embedded", unsyncedLyrics),
Entry("ttml > lrc > embedded", ".ttml,.lrc,embedded", ttmlLyrics))
Context("Errors", func() { Context("Errors", func() {
var RegularUserContext = XContext var RegularUserContext = XContext

View File

@ -5,6 +5,7 @@ import (
"errors" "errors"
"os" "os"
"path" "path"
"strings"
"github.com/navidrome/navidrome/log" "github.com/navidrome/navidrome/log"
"github.com/navidrome/navidrome/model" "github.com/navidrome/navidrome/model"
@ -36,18 +37,31 @@ func fromExternalFile(ctx context.Context, mf *model.MediaFile, suffix string) (
return nil, err return nil, err
} }
lyrics, err := model.ToLyrics("xxx", string(contents)) var list model.LyricList
if err != nil { if strings.EqualFold(suffix, ".ttml") {
log.Error(ctx, "error parsing lyric external file", "path", externalLyric, err) list, err = parseTTML(contents)
return nil, err if err != nil {
} else if lyrics == nil { log.Error(ctx, "error parsing ttml external file", "path", externalLyric, err)
return nil, err
}
} else {
lyrics, err := model.ToLyrics("xxx", string(contents))
if err != nil {
log.Error(ctx, "error parsing lyric external file", "path", externalLyric, err)
return nil, err
}
if lyrics != nil {
list = model.LyricList{*lyrics}
}
}
if len(list) == 0 {
log.Trace(ctx, "empty lyrics from external file", "path", externalLyric) log.Trace(ctx, "empty lyrics from external file", "path", externalLyric)
return nil, nil return nil, nil
} }
log.Trace(ctx, "retrieved lyrics from external file", "path", externalLyric) log.Trace(ctx, "retrieved lyrics from external file", "path", externalLyric)
return list, nil
return model.LyricList{*lyrics}, nil
} }
// fromPlugin attempts to load lyrics from a plugin with the given name. // fromPlugin attempts to load lyrics from a plugin with the given name.

View File

@ -109,6 +109,41 @@ var _ = Describe("sources", func() {
})) }))
}) })
It("should return synchronized multilingual lyrics from a TTML file", func() {
mf := model.MediaFile{Path: "tests/fixtures/test.mp3"}
lyrics, err := fromExternalFile(ctx, &mf, ".ttml")
Expect(err).To(BeNil())
Expect(lyrics).To(Equal(model.LyricList{
{
Kind: "main",
Lang: "eng",
Line: []model.Line{
{
Start: gg.P(int64(18800)),
Value: "We're no strangers to love",
},
{
Start: gg.P(int64(22800)),
Value: "You know the rules and so do I",
},
},
Synced: true,
},
{
Kind: "main",
Lang: "por",
Line: []model.Line{
{
Start: gg.P(int64(18800)),
Value: "Nao somos estranhos ao amor",
},
},
Synced: true,
},
}))
})
It("should handle LRC files with UTF-8 BOM marker (issue #4631)", func() { It("should handle LRC files with UTF-8 BOM marker (issue #4631)", func() {
// The function looks for <basePath-without-ext><suffix>, so we need to pass // The function looks for <basePath-without-ext><suffix>, so we need to pass
// a MediaFile with .mp3 path and look for .lrc suffix // a MediaFile with .mp3 path and look for .lrc suffix
@ -142,5 +177,33 @@ var _ = Describe("sources", func() {
Expect(lyrics[0].Line[1].Start).To(Equal(gg.P(int64(22801)))) Expect(lyrics[0].Line[1].Start).To(Equal(gg.P(int64(22801))))
Expect(lyrics[0].Line[1].Value).To(Equal("You know the rules and so do I")) Expect(lyrics[0].Line[1].Value).To(Equal("You know the rules and so do I"))
}) })
It("should handle TTML files with UTF-8 BOM marker", func() {
mf := model.MediaFile{Path: "tests/fixtures/bom-test.mp3"}
lyrics, err := fromExternalFile(ctx, &mf, ".ttml")
Expect(err).To(BeNil())
Expect(lyrics).To(HaveLen(1))
Expect(lyrics[0].Kind).To(Equal("main"))
Expect(lyrics[0].Synced).To(BeTrue())
Expect(lyrics[0].Line).To(HaveLen(1))
Expect(lyrics[0].Line[0].Start).To(Equal(gg.P(int64(0))))
Expect(lyrics[0].Line[0].Value).To(Equal("BOM test line"))
})
It("should handle UTF-16 LE encoded TTML files", func() {
mf := model.MediaFile{Path: "tests/fixtures/bom-utf16-test.mp3"}
lyrics, err := fromExternalFile(ctx, &mf, ".ttml")
Expect(err).To(BeNil())
Expect(lyrics).To(HaveLen(1))
Expect(lyrics[0].Kind).To(Equal("main"))
Expect(lyrics[0].Synced).To(BeTrue())
Expect(lyrics[0].Line).To(HaveLen(2))
Expect(lyrics[0].Line[0].Start).To(Equal(gg.P(int64(18800))))
Expect(lyrics[0].Line[0].Value).To(Equal("UTF16 line one"))
Expect(lyrics[0].Line[1].Start).To(Equal(gg.P(int64(22801))))
Expect(lyrics[0].Line[1].Value).To(Equal("UTF16 line two"))
})
}) })
}) })

View File

@ -0,0 +1,92 @@
package lyrics
import (
"context"
"os"
"path/filepath"
"testing"
"github.com/navidrome/navidrome/model"
)
func TestFromExternalFileTTML(t *testing.T) {
ctx := context.Background()
mf := model.MediaFile{Path: fixturePath("test.mp3")}
lyrics, err := fromExternalFile(ctx, &mf, ".ttml")
if err != nil {
t.Fatalf("fromExternalFile returned error: %v", err)
}
if len(lyrics) != 2 {
t.Fatalf("expected 2 lyric tracks, got %d", len(lyrics))
}
if lyrics[0].Lang != "eng" {
t.Fatalf("expected first language 'eng', got %q", lyrics[0].Lang)
}
if len(lyrics[0].Line) != 2 {
t.Fatalf("expected 2 english lines, got %d", len(lyrics[0].Line))
}
if lyrics[0].Line[0].Start == nil || *lyrics[0].Line[0].Start != 18800 {
t.Fatalf("expected first english line start to be 18800, got %v", lyrics[0].Line[0].Start)
}
}
func TestFromExternalFileTTMLWithUTF8BOM(t *testing.T) {
ctx := context.Background()
mf := model.MediaFile{Path: fixturePath("bom-test.ttml")}
lyrics, err := fromExternalFile(ctx, &mf, ".ttml")
if err != nil {
t.Fatalf("fromExternalFile returned error: %v", err)
}
if len(lyrics) != 1 {
t.Fatalf("expected 1 lyric track, got %d", len(lyrics))
}
if !lyrics[0].Synced {
t.Fatal("expected BOM TTML lyrics to be synced")
}
if len(lyrics[0].Line) != 1 {
t.Fatalf("expected 1 lyric line, got %d", len(lyrics[0].Line))
}
if lyrics[0].Line[0].Start == nil || *lyrics[0].Line[0].Start != 0 {
t.Fatalf("expected first line start 0, got %v", lyrics[0].Line[0].Start)
}
}
func TestFromExternalFileTTMLUTF16(t *testing.T) {
ctx := context.Background()
mf := model.MediaFile{Path: fixturePath("bom-utf16-test.ttml")}
lyrics, err := fromExternalFile(ctx, &mf, ".ttml")
if err != nil {
t.Fatalf("fromExternalFile returned error: %v", err)
}
if len(lyrics) != 1 {
t.Fatalf("expected 1 lyric track, got %d", len(lyrics))
}
if !lyrics[0].Synced {
t.Fatal("expected UTF16 TTML lyrics to be synced")
}
if len(lyrics[0].Line) != 2 {
t.Fatalf("expected 2 lyric lines, got %d", len(lyrics[0].Line))
}
if lyrics[0].Line[0].Start == nil || *lyrics[0].Line[0].Start != 18800 {
t.Fatalf("expected first line start 18800, got %v", lyrics[0].Line[0].Start)
}
if lyrics[0].Line[1].Start == nil || *lyrics[0].Line[1].Start != 22801 {
t.Fatalf("expected second line start 22801, got %v", lyrics[0].Line[1].Start)
}
}
func fixturePath(name string) string {
candidates := []string{
filepath.Join("tests", "fixtures", name),
filepath.Join("..", "..", "tests", "fixtures", name),
}
for _, candidate := range candidates {
if _, err := os.Stat(candidate); err == nil {
return candidate
}
}
return filepath.Join("tests", "fixtures", name)
}

886
core/lyrics/ttml.go Normal file
View File

@ -0,0 +1,886 @@
package lyrics
import (
"bytes"
"encoding/xml"
"errors"
"io"
"math"
"regexp"
"sort"
"strconv"
"strings"
"github.com/navidrome/navidrome/log"
"github.com/navidrome/navidrome/model"
"github.com/navidrome/navidrome/utils/str"
)
const (
defaultTTMLFrameRate = 30.0
defaultTTMLSubFrameRate = 1.0
defaultTTMLTickRate = 1.0
ttmlLyricKindMain = "main"
ttmlLyricKindTranslation = "translation"
ttmlLyricKindPronunciation = "pronunciation"
)
var offsetTimeRegex = regexp.MustCompile(`^([0-9]+(?:\.[0-9]+)?)(h|m|s|ms|f|t)$`)
var xmlEncodingRegex = regexp.MustCompile(`(?i)<\?xml([^>]*?)encoding\s*=\s*["'][^"']+["']([^>]*)\?>`)
type ttmlTimeKind int
const (
ttmlTimeAbsolute ttmlTimeKind = iota
ttmlTimeOffset
ttmlTimeAmbiguous
)
type ttmlTimingParams struct {
frameRate float64
subFrameRate float64
tickRate float64
}
type ttmlTimingContext struct {
lang string
role string
begin int64
hasBegin bool
end int64
hasEnd bool
invalid bool
}
type ttmlLineRef struct {
order int
line model.Line
}
type ttmlMetadataEntry struct {
key string
line model.Line
seq int
}
type ttmlResolvedMetadataLine struct {
order int
seq int
line model.Line
}
type ttmlParser struct {
decoder *xml.Decoder
params ttmlTimingParams
mainLangOrder []string
mainLinesByLang map[string][]model.Line
mainLineRefsByKey map[string]ttmlLineRef
mainLineOrder int
translationLangOrder []string
translationEntriesByLg map[string][]ttmlMetadataEntry
pronunciationLangOrder []string
pronunciationEntriesByLg map[string][]ttmlMetadataEntry
metadataSeq int
}
func parseTTML(contents []byte) (model.LyricList, error) {
contents = xmlEncodingRegex.ReplaceAll(contents, []byte(`<?xml$1encoding="UTF-8"$2?>`))
p := ttmlParser{
decoder: xml.NewDecoder(bytes.NewReader(contents)),
params: ttmlTimingParams{
frameRate: defaultTTMLFrameRate,
subFrameRate: defaultTTMLSubFrameRate,
tickRate: defaultTTMLTickRate,
},
mainLinesByLang: make(map[string][]model.Line),
mainLineRefsByKey: make(map[string]ttmlLineRef),
translationEntriesByLg: make(map[string][]ttmlMetadataEntry),
pronunciationEntriesByLg: make(map[string][]ttmlMetadataEntry),
}
root := ttmlTimingContext{lang: "xxx"}
for {
token, err := p.decoder.Token()
if errors.Is(err, io.EOF) {
break
}
if err != nil {
return nil, err
}
start, ok := token.(xml.StartElement)
if !ok {
continue
}
if err := p.parseElement(start, root); err != nil {
return nil, err
}
}
return p.toLyricList(), nil
}
func (p *ttmlParser) parseElement(start xml.StartElement, parent ttmlTimingContext) error {
local := strings.ToLower(start.Name.Local)
if local == "tt" {
p.updateTimingParams(start.Attr)
}
switch local {
case "translation":
return p.parseMetadataTrack(start, parent, ttmlLyricKindTranslation)
case "transliteration":
return p.parseMetadataTrack(start, parent, ttmlLyricKindPronunciation)
}
ctx := p.childContext(start.Attr, parent)
if local == "p" {
lineText, tokens, err := p.parseParagraph(ctx)
if err != nil {
return err
}
if ctx.invalid || lineText == "" {
return nil
}
parsedLine := model.Line{Value: lineText}
if ctx.hasBegin {
startMs := ctx.begin
parsedLine.Start = &startMs
}
if ctx.hasEnd {
endMs := ctx.end
parsedLine.End = &endMs
}
if len(tokens) > 0 {
parsedLine.Token = tokens
}
parsedLine = hydrateLineTimingFromTokens(parsedLine)
lineKey, _ := attrValue(start.Attr, "key")
p.addMainLine(ctx.lang, lineKey, parsedLine)
return nil
}
for {
token, err := p.decoder.Token()
if err != nil {
return err
}
switch t := token.(type) {
case xml.StartElement:
nextParent := ctx
if ctx.invalid {
// Best effort: ignore invalid timing in container elements, and
// continue traversing descendants with parent context.
nextParent = parent
}
if err := p.parseElement(t, nextParent); err != nil {
return err
}
case xml.EndElement:
if strings.EqualFold(t.Name.Local, start.Name.Local) {
return nil
}
}
}
}
func (p *ttmlParser) parseMetadataTrack(start xml.StartElement, parent ttmlTimingContext, kind string) error {
ctx := p.childContext(start.Attr, parent)
lang := normalizeTTMLLang(ctx.lang)
for {
token, err := p.decoder.Token()
if err != nil {
return err
}
switch t := token.(type) {
case xml.StartElement:
if strings.EqualFold(t.Name.Local, "text") {
entry, ok, err := p.parseMetadataText(t, ctx)
if err != nil {
return err
}
if ok {
p.addMetadataEntry(kind, lang, entry)
}
continue
}
nextParent := ctx
if ctx.invalid {
nextParent = parent
}
if err := p.parseElement(t, nextParent); err != nil {
return err
}
case xml.EndElement:
if strings.EqualFold(t.Name.Local, start.Name.Local) {
return nil
}
}
}
}
func (p *ttmlParser) parseMetadataText(start xml.StartElement, parent ttmlTimingContext) (ttmlMetadataEntry, bool, error) {
forKey, hasFor := attrValue(start.Attr, "for")
forKey = strings.TrimSpace(forKey)
value, tokens, err := p.parseInlineElement(start, parent)
if err != nil {
return ttmlMetadataEntry{}, false, err
}
if !hasFor || forKey == "" {
return ttmlMetadataEntry{}, false, nil
}
ctx := p.childContext(start.Attr, parent)
if ctx.invalid {
return ttmlMetadataEntry{}, false, nil
}
line := model.Line{Value: sanitizeTTMLText(value)}
if ctx.hasBegin {
startMs := ctx.begin
line.Start = &startMs
}
if ctx.hasEnd {
endMs := ctx.end
line.End = &endMs
}
if len(tokens) > 0 {
line.Token = tokens
}
line = hydrateLineTimingFromTokens(line)
if line.Value == "" && len(line.Token) == 0 {
return ttmlMetadataEntry{}, false, nil
}
return ttmlMetadataEntry{key: forKey, line: line}, true, nil
}
func (p *ttmlParser) parseParagraph(parent ttmlTimingContext) (string, []model.Token, error) {
var text strings.Builder
var tokens []model.Token
for {
token, err := p.decoder.Token()
if err != nil {
return "", nil, err
}
switch t := token.(type) {
case xml.StartElement:
value, inlineTokens, err := p.parseInlineElement(t, parent)
if err != nil {
return "", nil, err
}
text.WriteString(value)
tokens = append(tokens, inlineTokens...)
case xml.EndElement:
if strings.EqualFold(t.Name.Local, "p") {
return sanitizeTTMLText(text.String()), tokens, nil
}
case xml.CharData:
text.WriteString(string(t))
}
}
}
func (p *ttmlParser) parseInlineElement(start xml.StartElement, parent ttmlTimingContext) (string, []model.Token, error) {
local := strings.ToLower(start.Name.Local)
if local == "br" {
return "\n", nil, nil
}
ctx := p.childContext(start.Attr, parent)
_, hasBegin := attrValue(start.Attr, "begin")
_, hasEnd := attrValue(start.Attr, "end")
_, hasDur := attrValue(start.Attr, "dur")
hasOwnTiming := hasBegin || hasEnd || hasDur
var text strings.Builder
var tokens []model.Token
for {
token, err := p.decoder.Token()
if err != nil {
return "", nil, err
}
switch t := token.(type) {
case xml.StartElement:
value, inlineTokens, err := p.parseInlineElement(t, ctx)
if err != nil {
return "", nil, err
}
text.WriteString(value)
tokens = append(tokens, inlineTokens...)
case xml.EndElement:
if !strings.EqualFold(t.Name.Local, start.Name.Local) {
continue
}
value := text.String()
tokenText := sanitizeTTMLText(value)
if local == "span" && hasOwnTiming && !ctx.invalid && tokenText != "" && len(tokens) == 0 {
parsedToken := model.Token{
Value: tokenText,
Role: ctx.role,
}
if ctx.hasBegin {
startMs := ctx.begin
parsedToken.Start = &startMs
}
if ctx.hasEnd {
endMs := ctx.end
parsedToken.End = &endMs
}
tokens = append(tokens, parsedToken)
}
return value, tokens, nil
case xml.CharData:
text.WriteString(string(t))
}
}
}
func (p *ttmlParser) toLyricList() model.LyricList {
res := make(model.LyricList, 0, len(p.mainLangOrder)+len(p.translationLangOrder)+len(p.pronunciationLangOrder))
for _, lang := range p.mainLangOrder {
lines := p.mainLinesByLang[lang]
if len(lines) == 0 {
continue
}
res = append(res, model.Lyrics{
Kind: ttmlLyricKindMain,
Lang: lang,
Line: lines,
Synced: linesAreSynced(lines),
})
}
res = append(res, p.buildMetadataLyrics(ttmlLyricKindTranslation, p.translationLangOrder, p.translationEntriesByLg)...)
res = append(res, p.buildMetadataLyrics(ttmlLyricKindPronunciation, p.pronunciationLangOrder, p.pronunciationEntriesByLg)...)
return res
}
func (p *ttmlParser) buildMetadataLyrics(kind string, langOrder []string, entriesByLang map[string][]ttmlMetadataEntry) model.LyricList {
res := make(model.LyricList, 0, len(langOrder))
for _, lang := range langOrder {
entries := entriesByLang[lang]
if len(entries) == 0 {
continue
}
seenKeys := make(map[string]struct{}, len(entries))
resolved := make([]ttmlResolvedMetadataLine, 0, len(entries))
for _, entry := range entries {
if _, exists := seenKeys[entry.key]; exists {
continue
}
seenKeys[entry.key] = struct{}{}
ref, ok := p.mainLineRefsByKey[entry.key]
if !ok {
log.Warn("Skipping TTML metadata line without matching key", "kind", kind, "lang", lang, "key", entry.key)
continue
}
line := entry.line
if line.Start == nil && ref.line.Start != nil {
startMs := *ref.line.Start
line.Start = &startMs
}
if line.End == nil && ref.line.End != nil {
endMs := *ref.line.End
line.End = &endMs
}
line = hydrateLineTimingFromTokens(line)
if line.Value == "" && len(line.Token) == 0 {
continue
}
resolved = append(resolved, ttmlResolvedMetadataLine{
order: ref.order,
seq: entry.seq,
line: line,
})
}
if len(resolved) == 0 {
continue
}
sort.SliceStable(resolved, func(i, j int) bool {
if resolved[i].order != resolved[j].order {
return resolved[i].order < resolved[j].order
}
return resolved[i].seq < resolved[j].seq
})
lines := make([]model.Line, len(resolved))
for i := range resolved {
lines[i] = resolved[i].line
}
res = append(res, model.Lyrics{
Kind: kind,
Lang: lang,
Line: lines,
Synced: linesAreSynced(lines),
})
}
return res
}
func (p *ttmlParser) addMainLine(lang string, lineKey string, line model.Line) {
lang = normalizeTTMLLang(lang)
if _, ok := p.mainLinesByLang[lang]; !ok {
p.mainLangOrder = append(p.mainLangOrder, lang)
}
p.mainLinesByLang[lang] = append(p.mainLinesByLang[lang], line)
lineKey = strings.TrimSpace(lineKey)
if lineKey != "" {
if _, exists := p.mainLineRefsByKey[lineKey]; !exists {
p.mainLineRefsByKey[lineKey] = ttmlLineRef{
order: p.mainLineOrder,
line: line,
}
}
}
p.mainLineOrder++
}
func (p *ttmlParser) addMetadataEntry(kind string, lang string, entry ttmlMetadataEntry) {
lang = normalizeTTMLLang(lang)
entry.seq = p.metadataSeq
p.metadataSeq++
switch kind {
case ttmlLyricKindTranslation:
if _, ok := p.translationEntriesByLg[lang]; !ok {
p.translationLangOrder = append(p.translationLangOrder, lang)
}
p.translationEntriesByLg[lang] = append(p.translationEntriesByLg[lang], entry)
case ttmlLyricKindPronunciation:
if _, ok := p.pronunciationEntriesByLg[lang]; !ok {
p.pronunciationLangOrder = append(p.pronunciationLangOrder, lang)
}
p.pronunciationEntriesByLg[lang] = append(p.pronunciationEntriesByLg[lang], entry)
}
}
func (p *ttmlParser) childContext(attrs []xml.Attr, parent ttmlTimingContext) ttmlTimingContext {
ctx := parent
if lang, ok := attrValue(attrs, "lang"); ok {
ctx.lang = normalizeTTMLLang(lang)
}
if role, ok := attrValue(attrs, "role"); ok {
role = strings.TrimSpace(role)
if role != "" {
if ctx.role == "" {
ctx.role = role
} else if !strings.Contains(ctx.role, role) {
ctx.role = ctx.role + " " + role
}
}
}
beginExpr, hasBegin := attrValue(attrs, "begin")
endExpr, hasEnd := attrValue(attrs, "end")
durExpr, hasDur := attrValue(attrs, "dur")
if hasBegin {
begin, kind, ok := parseTTMLTimeExpression(beginExpr, p.params)
if !ok {
ctx.invalid = true
return ctx
}
base := int64(0)
if parent.hasBegin {
base = parent.begin
}
ctx.begin = resolveTTMLTime(begin, kind, base, parent)
ctx.hasBegin = true
} else {
ctx.begin = parent.begin
ctx.hasBegin = parent.hasBegin
}
var calculatedEnd int64
calculatedHasEnd := false
if hasEnd {
end, kind, ok := parseTTMLTimeExpression(endExpr, p.params)
if !ok {
ctx.invalid = true
return ctx
}
base := ctx.begin
if !ctx.hasBegin {
base = parent.begin
}
calculatedEnd = resolveTTMLTime(end, kind, base, parent)
calculatedHasEnd = true
}
if hasDur {
dur, ok := parseTTMLDurationExpression(durExpr, p.params)
if !ok {
ctx.invalid = true
return ctx
}
if ctx.hasBegin {
durEnd := ctx.begin + dur
if !calculatedHasEnd || durEnd < calculatedEnd {
calculatedEnd = durEnd
calculatedHasEnd = true
}
}
}
if !calculatedHasEnd && parent.hasEnd {
calculatedEnd = parent.end
calculatedHasEnd = true
}
ctx.end = calculatedEnd
ctx.hasEnd = calculatedHasEnd
return ctx
}
func (p *ttmlParser) updateTimingParams(attrs []xml.Attr) {
frameRate := p.params.frameRate
if value, ok := attrValue(attrs, "frameRate"); ok {
if parsed, err := strconv.ParseFloat(value, 64); err == nil && parsed > 0 {
frameRate = parsed
}
}
if value, ok := attrValue(attrs, "frameRateMultiplier"); ok {
parts := strings.Fields(value)
if len(parts) == 2 {
numerator, errA := strconv.ParseFloat(parts[0], 64)
denominator, errB := strconv.ParseFloat(parts[1], 64)
if errA == nil && errB == nil && denominator > 0 {
frameRate = frameRate * (numerator / denominator)
}
}
}
subFrameRate := p.params.subFrameRate
if value, ok := attrValue(attrs, "subFrameRate"); ok {
if parsed, err := strconv.ParseFloat(value, 64); err == nil && parsed > 0 {
subFrameRate = parsed
}
}
tickRate := p.params.tickRate
if value, ok := attrValue(attrs, "tickRate"); ok {
if parsed, err := strconv.ParseFloat(value, 64); err == nil && parsed > 0 {
tickRate = parsed
}
}
p.params.frameRate = max(frameRate, defaultTTMLFrameRate)
p.params.subFrameRate = max(subFrameRate, defaultTTMLSubFrameRate)
p.params.tickRate = max(tickRate, defaultTTMLTickRate)
}
func parseTTMLDurationExpression(expr string, params ttmlTimingParams) (int64, bool) {
value, _, ok := parseTTMLTimeExpression(expr, params)
return value, ok
}
func resolveTTMLTime(value int64, kind ttmlTimeKind, base int64, parent ttmlTimingContext) int64 {
switch kind {
case ttmlTimeAbsolute:
return value
case ttmlTimeOffset:
return base + value
case ttmlTimeAmbiguous:
absolute := value
offset := base + value
// No parent timing context → no reference frame for offsets.
// Prefer absolute when offset differs (i.e., base > 0).
if !parent.hasBegin && !parent.hasEnd && base != 0 {
return absolute
}
if parent.hasBegin && parent.hasEnd {
absoluteInParent := absolute >= parent.begin && absolute <= parent.end
offsetInParent := offset >= parent.begin && offset <= parent.end
if absoluteInParent && !offsetInParent {
return absolute
}
if offsetInParent && !absoluteInParent {
return offset
}
}
if parent.hasBegin {
if absolute < parent.begin && offset >= parent.begin {
return offset
}
if absolute >= parent.begin && offset > absolute {
return absolute
}
}
return offset
default:
return base + value
}
}
func parseTTMLTimeExpression(expr string, params ttmlTimingParams) (int64, ttmlTimeKind, bool) {
expr = strings.TrimSpace(expr)
if expr == "" {
return 0, ttmlTimeOffset, false
}
lower := strings.ToLower(expr)
if strings.Contains(lower, "wallclock(") ||
strings.Contains(lower, ".begin") ||
strings.Contains(lower, ".end") {
log.Warn("Unsupported TTML time expression", "value", expr)
return 0, ttmlTimeOffset, false
}
// Best-effort support for non-standard TTML seen in the wild where a
// bare decimal value is used (implicitly seconds), e.g. "0.170".
if value, err := strconv.ParseFloat(lower, 64); err == nil && value >= 0 {
return int64(math.Round(value * 1000)), ttmlTimeAmbiguous, true
}
if matches := offsetTimeRegex.FindStringSubmatch(lower); len(matches) == 3 {
value, err := strconv.ParseFloat(matches[1], 64)
if err != nil {
return 0, ttmlTimeOffset, false
}
unit := matches[2]
seconds := 0.0
switch unit {
case "h":
seconds = value * 60 * 60
case "m":
seconds = value * 60
case "s":
seconds = value
case "ms":
seconds = value / 1000
case "f":
seconds = value / params.frameRate
case "t":
seconds = value / params.tickRate
default:
return 0, ttmlTimeOffset, false
}
return int64(math.Round(seconds * 1000)), ttmlTimeOffset, true
}
colonCount := strings.Count(expr, ":")
switch colonCount {
case 1, 2:
clockMs, ok := parseTTMLClockTime(expr)
if !ok {
return 0, ttmlTimeAbsolute, false
}
return clockMs, ttmlTimeAbsolute, true
case 3:
framesMs, ok := parseTTMLFrameTime(expr, params)
if !ok {
return 0, ttmlTimeAbsolute, false
}
return framesMs, ttmlTimeAbsolute, true
default:
log.Warn("Unsupported TTML time expression", "value", expr)
return 0, ttmlTimeOffset, false
}
}
func parseTTMLClockTime(value string) (int64, bool) {
parts := strings.Split(value, ":")
if len(parts) != 2 && len(parts) != 3 {
return 0, false
}
hours := int64(0)
minutesIdx := 0
if len(parts) == 3 {
h, err := strconv.ParseInt(parts[0], 10, 64)
if err != nil {
return 0, false
}
hours = h
minutesIdx = 1
}
minutes, err := strconv.ParseInt(parts[minutesIdx], 10, 64)
if err != nil {
return 0, false
}
seconds, err := strconv.ParseFloat(parts[minutesIdx+1], 64)
if err != nil {
return 0, false
}
totalSeconds := float64(hours*60*60+minutes*60) + seconds
return int64(math.Round(totalSeconds * 1000)), true
}
func parseTTMLFrameTime(value string, params ttmlTimingParams) (int64, bool) {
parts := strings.Split(value, ":")
if len(parts) != 4 {
return 0, false
}
hours, err := strconv.ParseInt(parts[0], 10, 64)
if err != nil {
return 0, false
}
minutes, err := strconv.ParseInt(parts[1], 10, 64)
if err != nil {
return 0, false
}
seconds, err := strconv.ParseInt(parts[2], 10, 64)
if err != nil {
return 0, false
}
frameParts := strings.SplitN(parts[3], ".", 2)
frames, err := strconv.ParseFloat(frameParts[0], 64)
if err != nil {
return 0, false
}
subFrames := 0.0
if len(frameParts) == 2 {
subFrames, err = strconv.ParseFloat(frameParts[1], 64)
if err != nil {
return 0, false
}
}
totalSeconds := float64(hours*60*60 + minutes*60 + seconds)
totalSeconds += frames / params.frameRate
totalSeconds += subFrames / (params.subFrameRate * params.frameRate)
return int64(math.Round(totalSeconds * 1000)), true
}
func attrValue(attrs []xml.Attr, key string) (string, bool) {
for _, attr := range attrs {
if strings.EqualFold(attr.Name.Local, key) {
return strings.TrimSpace(attr.Value), true
}
}
return "", false
}
func normalizeTTMLLang(lang string) string {
lang = strings.ToLower(strings.TrimSpace(lang))
if lang == "" {
return "xxx"
}
return lang
}
func sanitizeTTMLText(raw string) string {
raw = str.SanitizeText(raw)
raw = strings.ReplaceAll(raw, "\r\n", "\n")
raw = strings.ReplaceAll(raw, "\r", "\n")
lines := strings.Split(raw, "\n")
for i := range lines {
lines[i] = strings.TrimSpace(lines[i])
}
return strings.TrimSpace(strings.Join(lines, "\n"))
}
func linesAreSynced(lines []model.Line) bool {
for i := range lines {
if lines[i].Start != nil {
return true
}
for j := range lines[i].Token {
if lines[i].Token[j].Start != nil {
return true
}
}
}
return false
}
func hydrateLineTimingFromTokens(line model.Line) model.Line {
if len(line.Token) == 0 {
return line
}
var earliestStart *int64
var latestEnd *int64
for i := range line.Token {
token := line.Token[i]
if token.Start != nil {
if earliestStart == nil || *token.Start < *earliestStart {
v := *token.Start
earliestStart = &v
}
}
candidateEnd := token.End
if candidateEnd == nil {
candidateEnd = token.Start
}
if candidateEnd != nil {
if latestEnd == nil || *candidateEnd > *latestEnd {
v := *candidateEnd
latestEnd = &v
}
}
}
if line.Start == nil && earliestStart != nil {
v := *earliestStart
line.Start = &v
}
if line.End == nil && latestEnd != nil {
v := *latestEnd
line.End = &v
}
return line
}
func max(v float64, fallback float64) float64 {
if v <= 0 {
return fallback
}
return v
}

398
core/lyrics/ttml_test.go Normal file
View File

@ -0,0 +1,398 @@
package lyrics
import (
"testing"
"github.com/navidrome/navidrome/model"
)
func TestParseTTML_MultiLanguageAndTiming(t *testing.T) {
content := []byte(`<?xml version="1.0" encoding="UTF-8"?>
<tt xmlns="http://www.w3.org/ns/ttml" xmlns:ttp="http://www.w3.org/ns/ttml#parameter" ttp:frameRate="30" ttp:subFrameRate="2" ttp:tickRate="10">
<body>
<div xml:lang="eng" begin="1s">
<p begin="2s">Line one</p>
<p begin="00:00:04:15.1"><span>Line two</span><br/>with break</p>
</div>
<div xml:lang="por">
<p begin="45t">Linha</p>
</div>
</body>
</tt>`)
list, err := parseTTML(content)
if err != nil {
t.Fatalf("parseTTML returned error: %v", err)
}
if len(list) != 2 {
t.Fatalf("expected 2 lyric tracks, got %d", len(list))
}
eng := list[0]
if eng.Lang != "eng" {
t.Fatalf("expected first track language 'eng', got %q", eng.Lang)
}
if !eng.Synced {
t.Fatal("expected first track to be synced")
}
assertTimedLine(t, eng.Line[0], 3000, "Line one")
assertTimedLine(t, eng.Line[1], 4517, "Line two\nwith break")
por := list[1]
if por.Lang != "por" {
t.Fatalf("expected second track language 'por', got %q", por.Lang)
}
assertTimedLine(t, por.Line[0], 4500, "Linha")
}
func TestParseTTML_UnsupportedCueSkipped(t *testing.T) {
content := []byte(`<?xml version="1.0" encoding="UTF-8"?>
<tt xmlns="http://www.w3.org/ns/ttml">
<body xml:lang="eng">
<div>
<p begin="wallclock(2026-01-01T00:00:00Z)">Skip me</p>
<p begin="1s">Keep me</p>
</div>
</body>
</tt>`)
list, err := parseTTML(content)
if err != nil {
t.Fatalf("parseTTML returned error: %v", err)
}
if len(list) != 1 {
t.Fatalf("expected 1 lyric track, got %d", len(list))
}
if len(list[0].Line) != 1 {
t.Fatalf("expected 1 line in lyric track, got %d", len(list[0].Line))
}
assertTimedLine(t, list[0].Line[0], 1000, "Keep me")
}
func TestParseTTML_BeginEndDurWithInheritance(t *testing.T) {
content := []byte(`<?xml version="1.0" encoding="UTF-8"?>
<tt xmlns="http://www.w3.org/ns/ttml">
<body xml:lang="eng" begin="10s">
<div begin="5s" dur="8s">
<p begin="1s" dur="2s">First line</p>
<p begin="3s" end="5s">Second line</p>
</div>
</body>
</tt>`)
list, err := parseTTML(content)
if err != nil {
t.Fatalf("parseTTML returned error: %v", err)
}
if len(list) != 1 {
t.Fatalf("expected 1 lyric track, got %d", len(list))
}
if list[0].Lang != "eng" {
t.Fatalf("expected language 'eng', got %q", list[0].Lang)
}
if len(list[0].Line) != 2 {
t.Fatalf("expected 2 lines, got %d", len(list[0].Line))
}
assertTimedLine(t, list[0].Line[0], 16000, "First line")
assertTimedLine(t, list[0].Line[1], 18000, "Second line")
}
func TestParseTTML_NonStandardBareSecondOffsets(t *testing.T) {
content := []byte(`<?xml version="1.0" encoding="UTF-8"?>
<tt xmlns="http://www.w3.org/ns/ttml">
<body xml:lang="eng" begin="10">
<div>
<p begin="0.170">First line</p>
<p begin="3.710">Second line</p>
</div>
</body>
</tt>`)
list, err := parseTTML(content)
if err != nil {
t.Fatalf("parseTTML returned error: %v", err)
}
if len(list) != 1 {
t.Fatalf("expected 1 lyric track, got %d", len(list))
}
if len(list[0].Line) != 2 {
t.Fatalf("expected 2 lines, got %d", len(list[0].Line))
}
assertTimedLine(t, list[0].Line[0], 10170, "First line")
assertTimedLine(t, list[0].Line[1], 13710, "Second line")
}
func TestParseTTML_WordTimingTokens(t *testing.T) {
content := []byte(`<?xml version="1.0" encoding="UTF-8"?>
<tt xmlns="http://www.w3.org/ns/ttml" xmlns:ttm="http://www.w3.org/ns/ttml#metadata">
<body xml:lang="eng">
<div>
<p begin="00:01.000" end="00:03.000">
<span begin="00:01.000" end="00:01.400">He</span><span begin="00:01.400" end="00:01.800">llo</span>
<span ttm:role="x-bg"><span begin="00:02.000" end="00:02.500">echo</span></span>
</p>
</div>
</body>
</tt>`)
list, err := parseTTML(content)
if err != nil {
t.Fatalf("parseTTML returned error: %v", err)
}
if len(list) != 1 {
t.Fatalf("expected 1 lyric track, got %d", len(list))
}
if len(list[0].Line) != 1 {
t.Fatalf("expected 1 line, got %d", len(list[0].Line))
}
line := list[0].Line[0]
assertTimedLine(t, line, 1000, "Hello\necho")
if line.End == nil || *line.End != 3000 {
t.Fatalf("expected line end 3000, got %v", line.End)
}
if len(line.Token) != 3 {
t.Fatalf("expected 3 timed tokens, got %d", len(line.Token))
}
assertToken(t, line.Token[0], 1000, 1400, "He", "")
assertToken(t, line.Token[1], 1400, 1800, "llo", "")
assertToken(t, line.Token[2], 2000, 2500, "echo", "x-bg")
}
func TestParseTTML_AmbiguousDecimalTimingPrefersAbsoluteWhenInsideParentWindow(t *testing.T) {
content := []byte(`<?xml version="1.0" encoding="UTF-8"?>
<tt xmlns="http://www.w3.org/ns/ttml">
<body xml:lang="eng">
<div begin="37.870" end="45.570">
<p begin="43.444" end="45.570">
<span begin="43.444" end="43.716">go</span>
<span begin="43.716" end="43.887">go</span>
</p>
</div>
</body>
</tt>`)
list, err := parseTTML(content)
if err != nil {
t.Fatalf("parseTTML returned error: %v", err)
}
if len(list) != 1 || len(list[0].Line) != 1 {
t.Fatalf("expected one parsed lyric line, got %#v", list)
}
line := list[0].Line[0]
assertTimedLine(t, line, 43444, "go\ngo")
if line.End == nil || *line.End != 45570 {
t.Fatalf("expected line end 45570, got %v", line.End)
}
if len(line.Token) != 2 {
t.Fatalf("expected 2 timed tokens, got %d", len(line.Token))
}
assertToken(t, line.Token[0], 43444, 43716, "go", "")
assertToken(t, line.Token[1], 43716, 43887, "go", "")
}
func TestParseTTML_UnsyncedFallback(t *testing.T) {
content := []byte(`<?xml version="1.0" encoding="UTF-8"?>
<tt xmlns="http://www.w3.org/ns/ttml">
<body>
<div>
<p>No timing here</p>
</div>
</body>
</tt>`)
list, err := parseTTML(content)
if err != nil {
t.Fatalf("parseTTML returned error: %v", err)
}
if len(list) != 1 {
t.Fatalf("expected 1 lyric track, got %d", len(list))
}
if list[0].Lang != "xxx" {
t.Fatalf("expected default language 'xxx', got %q", list[0].Lang)
}
if list[0].Synced {
t.Fatal("expected lyric track to be unsynced")
}
if len(list[0].Line) != 1 {
t.Fatalf("expected 1 line, got %d", len(list[0].Line))
}
if list[0].Line[0].Start != nil {
t.Fatalf("expected line start to be nil, got %v", *list[0].Line[0].Start)
}
if list[0].Line[0].Value != "No timing here" {
t.Fatalf("expected line value %q, got %q", "No timing here", list[0].Line[0].Value)
}
}
func TestParseTTML_MetadataTracksByKey(t *testing.T) {
content := []byte(`<?xml version="1.0" encoding="UTF-8"?>
<tt xmlns="http://www.w3.org/ns/ttml" xmlns:itunes="http://music.apple.com/lyric-ttml-internal">
<head>
<metadata>
<iTunesMetadata xmlns="http://music.apple.com/lyric-ttml-internal">
<translations>
<translation xml:lang="es">
<text for="L1">Hola</text>
<text for="MISSING">Skip me</text>
</translation>
</translations>
<transliterations>
<transliteration xml:lang="ja-Latn">
<text for="L2"><span begin="00:02.000" end="00:02.300" xmlns="http://www.w3.org/ns/ttml">ko</span><span begin="00:02.300" end="00:02.600" xmlns="http://www.w3.org/ns/ttml">nni</span></text>
</transliteration>
</transliterations>
</iTunesMetadata>
</metadata>
</head>
<body xml:lang="ja">
<div>
<p begin="00:01.000" end="00:01.500" itunes:key="L1">こんにちは</p>
<p begin="00:02.000" end="00:02.700" itunes:key="L2">こんばんは</p>
</div>
</body>
</tt>`)
list, err := parseTTML(content)
if err != nil {
t.Fatalf("parseTTML returned error: %v", err)
}
if len(list) != 3 {
t.Fatalf("expected 3 lyric tracks, got %d", len(list))
}
main := list[0]
if main.Kind != "main" {
t.Fatalf("expected main track kind %q, got %q", "main", main.Kind)
}
if main.Lang != "ja" {
t.Fatalf("expected main track language %q, got %q", "ja", main.Lang)
}
if len(main.Line) != 2 {
t.Fatalf("expected 2 lines in main track, got %d", len(main.Line))
}
translation := list[1]
if translation.Kind != "translation" {
t.Fatalf("expected translation kind %q, got %q", "translation", translation.Kind)
}
if translation.Lang != "es" {
t.Fatalf("expected translation language %q, got %q", "es", translation.Lang)
}
if len(translation.Line) != 1 {
t.Fatalf("expected 1 translation line, got %d", len(translation.Line))
}
assertTimedLine(t, translation.Line[0], 1000, "Hola")
if translation.Line[0].End == nil || *translation.Line[0].End != 1500 {
t.Fatalf("expected translation line end %d, got %v", 1500, translation.Line[0].End)
}
pronunciation := list[2]
if pronunciation.Kind != "pronunciation" {
t.Fatalf("expected pronunciation kind %q, got %q", "pronunciation", pronunciation.Kind)
}
if pronunciation.Lang != "ja-latn" {
t.Fatalf("expected pronunciation language %q, got %q", "ja-latn", pronunciation.Lang)
}
if len(pronunciation.Line) != 1 {
t.Fatalf("expected 1 pronunciation line, got %d", len(pronunciation.Line))
}
assertTimedLine(t, pronunciation.Line[0], 2000, "konni")
if pronunciation.Line[0].End == nil || *pronunciation.Line[0].End != 2600 {
t.Fatalf("expected pronunciation line end %d, got %v", 2600, pronunciation.Line[0].End)
}
if len(pronunciation.Line[0].Token) != 2 {
t.Fatalf("expected 2 pronunciation tokens, got %d", len(pronunciation.Line[0].Token))
}
assertToken(t, pronunciation.Line[0].Token[0], 2000, 2300, "ko", "")
assertToken(t, pronunciation.Line[0].Token[1], 2300, 2600, "nni", "")
}
func TestParseTTML_PronunciationBareDecimalEndTimes(t *testing.T) {
content := []byte(`<?xml version="1.0" encoding="UTF-8"?>
<tt xmlns="http://www.w3.org/ns/ttml" xmlns:itunes="http://music.apple.com/lyric-ttml-internal">
<head>
<metadata>
<iTunesMetadata xmlns="http://music.apple.com/lyric-ttml-internal">
<transliterations>
<transliteration xml:lang="ja-Latn">
<text for="L1"><span begin="2.747" end="3.018" xmlns="http://www.w3.org/ns/ttml">I</span> <span begin="3.018" end="3.179" xmlns="http://www.w3.org/ns/ttml">woke</span> <span begin="3.179" end="3.582" xmlns="http://www.w3.org/ns/ttml">up</span></text>
</transliteration>
</transliterations>
</iTunesMetadata>
</metadata>
</head>
<body xml:lang="ja">
<div>
<p begin="00:02.747" end="00:04.000" itunes:key="L1">起きた</p>
</div>
</body>
</tt>`)
list, err := parseTTML(content)
if err != nil {
t.Fatalf("parseTTML returned error: %v", err)
}
var pronunciation *model.Lyrics
for i := range list {
if list[i].Kind == "pronunciation" {
pronunciation = &list[i]
break
}
}
if pronunciation == nil {
t.Fatal("expected a pronunciation track")
}
if len(pronunciation.Line) != 1 {
t.Fatalf("expected 1 pronunciation line, got %d", len(pronunciation.Line))
}
line := pronunciation.Line[0]
assertTimedLine(t, line, 2747, "I woke up")
if len(line.Token) != 3 {
t.Fatalf("expected 3 tokens, got %d", len(line.Token))
}
assertToken(t, line.Token[0], 2747, 3018, "I", "")
assertToken(t, line.Token[1], 3018, 3179, "woke", "")
assertToken(t, line.Token[2], 3179, 3582, "up", "")
}
func assertTimedLine(t *testing.T, line model.Line, expectedStart int64, expectedValue string) {
t.Helper()
if line.Start == nil {
t.Fatal("expected line start to be set, got nil")
}
if *line.Start != expectedStart {
t.Fatalf("expected line start %d, got %d", expectedStart, *line.Start)
}
if line.Value != expectedValue {
t.Fatalf("expected line value %q, got %q", expectedValue, line.Value)
}
}
func assertToken(t *testing.T, token model.Token, expectedStart int64, expectedEnd int64, expectedValue string, expectedRole string) {
t.Helper()
if token.Start == nil {
t.Fatal("expected token start to be set, got nil")
}
if *token.Start != expectedStart {
t.Fatalf("expected token start %d, got %d", expectedStart, *token.Start)
}
if token.End == nil {
t.Fatal("expected token end to be set, got nil")
}
if *token.End != expectedEnd {
t.Fatalf("expected token end %d, got %d", expectedEnd, *token.End)
}
if token.Value != expectedValue {
t.Fatalf("expected token value %q, got %q", expectedValue, token.Value)
}
if token.Role != expectedRole {
t.Fatalf("expected token role %q, got %q", expectedRole, token.Role)
}
}

View File

@ -11,14 +11,24 @@ import (
"github.com/navidrome/navidrome/utils/str" "github.com/navidrome/navidrome/utils/str"
) )
type Line struct { type Token struct {
Start *int64 `structs:"start,omitempty" json:"start,omitempty"` Start *int64 `structs:"start,omitempty" json:"start,omitempty"`
End *int64 `structs:"end,omitempty" json:"end,omitempty"`
Value string `structs:"value" json:"value"` Value string `structs:"value" json:"value"`
Role string `structs:"role,omitempty" json:"role,omitempty"`
}
type Line struct {
Start *int64 `structs:"start,omitempty" json:"start,omitempty"`
End *int64 `structs:"end,omitempty" json:"end,omitempty"`
Value string `structs:"value" json:"value"`
Token []Token `structs:"token,omitempty" json:"token,omitempty"`
} }
type Lyrics struct { type Lyrics struct {
DisplayArtist string `structs:"displayArtist,omitempty" json:"displayArtist,omitempty"` DisplayArtist string `structs:"displayArtist,omitempty" json:"displayArtist,omitempty"`
DisplayTitle string `structs:"displayTitle,omitempty" json:"displayTitle,omitempty"` DisplayTitle string `structs:"displayTitle,omitempty" json:"displayTitle,omitempty"`
Kind string `structs:"kind,omitempty" json:"kind,omitempty"`
Lang string `structs:"lang" json:"lang"` Lang string `structs:"lang" json:"lang"`
Line []Line `structs:"line" json:"line"` Line []Line `structs:"line" json:"line"`
Offset *int64 `structs:"offset,omitempty" json:"offset,omitempty"` Offset *int64 `structs:"offset,omitempty" json:"offset,omitempty"`

View File

@ -478,19 +478,47 @@ func mapExplicitStatus(explicitStatus string) string {
func buildStructuredLyric(mf *model.MediaFile, lyrics model.Lyrics) responses.StructuredLyric { func buildStructuredLyric(mf *model.MediaFile, lyrics model.Lyrics) responses.StructuredLyric {
lines := make([]responses.Line, len(lyrics.Line)) lines := make([]responses.Line, len(lyrics.Line))
tokenLines := make([]responses.TokenLine, 0, len(lyrics.Line))
for i, line := range lyrics.Line { for i, line := range lyrics.Line {
lines[i] = responses.Line{ lines[i] = responses.Line{
Start: line.Start, Start: line.Start,
Value: line.Value, Value: line.Value,
} }
if len(line.Token) == 0 {
continue
}
tokens := make([]responses.LyricToken, len(line.Token))
for j, token := range line.Token {
tokens[j] = responses.LyricToken{
Start: token.Start,
End: token.End,
Value: token.Value,
Role: token.Role,
}
}
tokenLines = append(tokenLines, responses.TokenLine{
Index: int32(i),
Start: line.Start,
End: line.End,
Value: line.Value,
Token: tokens,
})
}
kind := strings.TrimSpace(lyrics.Kind)
if kind == "" {
kind = "main"
} }
structured := responses.StructuredLyric{ structured := responses.StructuredLyric{
DisplayArtist: lyrics.DisplayArtist, DisplayArtist: lyrics.DisplayArtist,
DisplayTitle: lyrics.DisplayTitle, DisplayTitle: lyrics.DisplayTitle,
Kind: kind,
Lang: lyrics.Lang, Lang: lyrics.Lang,
Line: lines, Line: lines,
TokenLine: tokenLines,
Offset: lyrics.Offset, Offset: lyrics.Offset,
Synced: lyrics.Synced, Synced: lyrics.Synced,
} }

View File

@ -98,7 +98,9 @@ func (api *Router) GetLyrics(r *http.Request) (*responses.Subsonic, error) {
response := newResponse() response := newResponse()
lyricsResponse := responses.Lyrics{} lyricsResponse := responses.Lyrics{}
response.Lyrics = &lyricsResponse response.Lyrics = &lyricsResponse
mediaFiles, err := api.ds.MediaFile(r.Context()).GetAll(filter.SongsByArtistTitleWithLyricsFirst(artist, title)) opts := filter.SongsByArtistTitleWithLyricsFirst(artist, title)
opts.Max = 0
mediaFiles, err := api.ds.MediaFile(r.Context()).GetAll(opts)
if err != nil { if err != nil {
return nil, err return nil, err
@ -108,25 +110,26 @@ func (api *Router) GetLyrics(r *http.Request) (*responses.Subsonic, error) {
return response, nil return response, nil
} }
structuredLyrics, err := api.lyrics.GetLyrics(r.Context(), &mediaFiles[0]) for i := range mediaFiles {
if err != nil { structuredLyrics, err := api.lyrics.GetLyrics(r.Context(), &mediaFiles[i])
return nil, err if err != nil {
return nil, err
}
if len(structuredLyrics) == 0 {
continue
}
lyricsResponse.Artist = artist
lyricsResponse.Title = title
var lyricsText strings.Builder
for _, line := range structuredLyrics[0].Line {
lyricsText.WriteString(line.Value + "\n")
}
lyricsResponse.Value = lyricsText.String()
break
} }
if len(structuredLyrics) == 0 {
return response, nil
}
lyricsResponse.Artist = artist
lyricsResponse.Title = title
var lyricsText strings.Builder
for _, line := range structuredLyrics[0].Line {
lyricsText.WriteString(line.Value + "\n")
}
lyricsResponse.Value = lyricsText.String()
return response, nil return response, nil
} }

View File

@ -186,6 +186,36 @@ var _ = Describe("MediaRetrievalController", func() {
Expect(response.Lyrics.Title).To(Equal("Never Gonna Give You Up")) Expect(response.Lyrics.Title).To(Equal("Never Gonna Give You Up"))
Expect(response.Lyrics.Value).To(Equal("We're no strangers to love\nYou know the rules and so do I\n")) Expect(response.Lyrics.Value).To(Equal("We're no strangers to love\nYou know the rules and so do I\n"))
}) })
It("should continue searching candidates for sidecar lyrics", func() {
conf.Server.LyricsPriority = ".ttml,embedded"
r := newGetRequest("artist=Rick+Astley", "title=Never+Gonna+Give+You+Up")
baseTime := time.Date(2025, 1, 1, 0, 0, 0, 0, time.UTC)
mockRepo.SetData(model.MediaFiles{
{
ID: "1",
Path: "tests/fixtures/01 Invisible (RED) Edit Version.mp3",
Artist: "Rick Astley",
Title: "Never Gonna Give You Up",
Lyrics: "[]",
UpdatedAt: baseTime.Add(2 * time.Hour), // Newer, but no TTML sidecar
},
{
ID: "2",
Path: "tests/fixtures/test.mp3",
Artist: "Rick Astley",
Title: "Never Gonna Give You Up",
Lyrics: "[]",
UpdatedAt: baseTime.Add(1 * time.Hour), // Older, but has TTML sidecar
},
})
response, err := router.GetLyrics(r)
Expect(err).ToNot(HaveOccurred())
Expect(response.Lyrics.Artist).To(Equal("Rick Astley"))
Expect(response.Lyrics.Title).To(Equal("Never Gonna Give You Up"))
Expect(response.Lyrics.Value).To(Equal("We're no strangers to love\nYou know the rules and so do I\n"))
})
}) })
Describe("GetLyricsBySongId", func() { Describe("GetLyricsBySongId", func() {
@ -202,6 +232,11 @@ var _ = Describe("MediaRetrievalController", func() {
Expect(realLyric.DisplayArtist).To(Equal(expectedLyric.DisplayArtist)) Expect(realLyric.DisplayArtist).To(Equal(expectedLyric.DisplayArtist))
Expect(realLyric.DisplayTitle).To(Equal(expectedLyric.DisplayTitle)) Expect(realLyric.DisplayTitle).To(Equal(expectedLyric.DisplayTitle))
expectedKind := expectedLyric.Kind
if expectedKind == "" {
expectedKind = "main"
}
Expect(realLyric.Kind).To(Equal(expectedKind))
Expect(realLyric.Lang).To(Equal(expectedLyric.Lang)) Expect(realLyric.Lang).To(Equal(expectedLyric.Lang))
Expect(realLyric.Synced).To(Equal(expectedLyric.Synced)) Expect(realLyric.Synced).To(Equal(expectedLyric.Synced))
@ -222,6 +257,40 @@ var _ = Describe("MediaRetrievalController", func() {
Expect(*realLine.Start).To(Equal(*expectedLine.Start)) Expect(*realLine.Start).To(Equal(*expectedLine.Start))
} }
} }
Expect(realLyric.TokenLine).To(HaveLen(len(expectedLyric.TokenLine)))
for j, realTokenLine := range realLyric.TokenLine {
expectedTokenLine := expectedLyric.TokenLine[j]
Expect(realTokenLine.Index).To(Equal(expectedTokenLine.Index))
Expect(realTokenLine.Value).To(Equal(expectedTokenLine.Value))
if expectedTokenLine.Start == nil {
Expect(realTokenLine.Start).To(BeNil())
} else {
Expect(*realTokenLine.Start).To(Equal(*expectedTokenLine.Start))
}
if expectedTokenLine.End == nil {
Expect(realTokenLine.End).To(BeNil())
} else {
Expect(*realTokenLine.End).To(Equal(*expectedTokenLine.End))
}
Expect(realTokenLine.Token).To(HaveLen(len(expectedTokenLine.Token)))
for k, realToken := range realTokenLine.Token {
expectedToken := expectedTokenLine.Token[k]
Expect(realToken.Value).To(Equal(expectedToken.Value))
Expect(realToken.Role).To(Equal(expectedToken.Role))
if expectedToken.Start == nil {
Expect(realToken.Start).To(BeNil())
} else {
Expect(*realToken.Start).To(Equal(*expectedToken.Start))
}
if expectedToken.End == nil {
Expect(realToken.End).To(BeNil())
} else {
Expect(*realToken.End).To(Equal(*expectedToken.End))
}
}
}
} }
} }
@ -323,6 +392,238 @@ var _ = Describe("MediaRetrievalController", func() {
}, },
}) })
}) })
It("should return multilingual TTML sidecar lyrics", func() {
conf.Server.LyricsPriority = ".ttml,embedded"
r := newGetRequest("id=1")
mockRepo.SetData(model.MediaFiles{
{
ID: "1",
Path: "tests/fixtures/test.mp3",
Artist: "Rick Astley",
Title: "Never Gonna Give You Up",
Lyrics: "[]",
},
})
response, err := router.GetLyricsBySongId(r)
Expect(err).ToNot(HaveOccurred())
porTime := int64(18800)
ttmlTime := int64(22800)
compareResponses(response.LyricsList, responses.LyricsList{
StructuredLyrics: responses.StructuredLyrics{
{
DisplayArtist: "Rick Astley",
DisplayTitle: "Never Gonna Give You Up",
Lang: "eng",
Synced: true,
Line: []responses.Line{
{
Start: &times[0],
Value: "We're no strangers to love",
},
{
Start: &ttmlTime,
Value: "You know the rules and so do I",
},
},
},
{
DisplayArtist: "Rick Astley",
DisplayTitle: "Never Gonna Give You Up",
Lang: "por",
Synced: true,
Line: []responses.Line{
{
Start: &porTime,
Value: "Nao somos estranhos ao amor",
},
},
},
},
})
})
It("should return metadata-linked translation and pronunciation tracks from TTML", func() {
conf.Server.LyricsPriority = ".ttml,embedded"
r := newGetRequest("id=1")
mockRepo.SetData(model.MediaFiles{
{
ID: "1",
Path: "tests/fixtures/test-metadata.mp3",
Artist: "Rick Astley",
Title: "Never Gonna Give You Up",
Lyrics: "[]",
},
})
response, err := router.GetLyricsBySongId(r)
Expect(err).ToNot(HaveOccurred())
mainStartA := int64(1000)
mainStartB := int64(2000)
tokenStartA := int64(2000)
tokenEndA := int64(2300)
tokenStartB := int64(2300)
tokenEndB := int64(2600)
compareResponses(response.LyricsList, responses.LyricsList{
StructuredLyrics: responses.StructuredLyrics{
{
DisplayArtist: "Rick Astley",
DisplayTitle: "Never Gonna Give You Up",
Kind: "main",
Lang: "ja",
Synced: true,
Line: []responses.Line{
{
Start: &mainStartA,
Value: "こんにちは",
},
{
Start: &mainStartB,
Value: "こんばんは",
},
},
},
{
DisplayArtist: "Rick Astley",
DisplayTitle: "Never Gonna Give You Up",
Kind: "translation",
Lang: "es",
Synced: true,
Line: []responses.Line{
{
Start: &mainStartA,
Value: "Hola",
},
},
},
{
DisplayArtist: "Rick Astley",
DisplayTitle: "Never Gonna Give You Up",
Kind: "pronunciation",
Lang: "ja-latn",
Synced: true,
Line: []responses.Line{
{
Start: &mainStartB,
Value: "konni",
},
},
TokenLine: []responses.TokenLine{
{
Index: 0,
Start: &mainStartB,
End: &tokenEndB,
Value: "konni",
Token: []responses.LyricToken{
{
Start: &tokenStartA,
End: &tokenEndA,
Value: "ko",
},
{
Start: &tokenStartB,
End: &tokenEndB,
Value: "nni",
},
},
},
},
},
},
})
})
It("should return tokenized lines for songLyrics v2 clients", func() {
r := newGetRequest("id=1")
lineStart := int64(1000)
lineEnd := int64(3000)
tokenStartA := int64(1000)
tokenEndA := int64(1400)
tokenStartB := int64(2000)
tokenEndB := int64(2500)
lyricsJson, err := json.Marshal(model.LyricList{
{
Lang: "eng",
Synced: true,
Line: []model.Line{
{
Start: &lineStart,
End: &lineEnd,
Value: "Hello echo",
Token: []model.Token{
{
Start: &tokenStartA,
End: &tokenEndA,
Value: "Hello",
},
{
Start: &tokenStartB,
End: &tokenEndB,
Value: "echo",
Role: "x-bg",
},
},
},
},
},
})
Expect(err).ToNot(HaveOccurred())
mockRepo.SetData(model.MediaFiles{
{
ID: "1",
Artist: "Rick Astley",
Title: "Never Gonna Give You Up",
Lyrics: string(lyricsJson),
},
})
response, err := router.GetLyricsBySongId(r)
Expect(err).ToNot(HaveOccurred())
compareResponses(response.LyricsList, responses.LyricsList{
StructuredLyrics: responses.StructuredLyrics{
{
DisplayArtist: "Rick Astley",
DisplayTitle: "Never Gonna Give You Up",
Lang: "eng",
Synced: true,
Line: []responses.Line{
{
Start: &lineStart,
Value: "Hello echo",
},
},
TokenLine: []responses.TokenLine{
{
Index: 0,
Start: &lineStart,
End: &lineEnd,
Value: "Hello echo",
Token: []responses.LyricToken{
{
Start: &tokenStartA,
End: &tokenEndA,
Value: "Hello",
},
{
Start: &tokenStartB,
End: &tokenEndB,
Value: "echo",
Role: "x-bg",
},
},
},
},
},
},
})
})
}) })
}) })

View File

@ -11,7 +11,7 @@ func (api *Router) GetOpenSubsonicExtensions(_ *http.Request) (*responses.Subson
response.OpenSubsonicExtensions = &responses.OpenSubsonicExtensions{ response.OpenSubsonicExtensions = &responses.OpenSubsonicExtensions{
{Name: "transcodeOffset", Versions: []int32{1}}, {Name: "transcodeOffset", Versions: []int32{1}},
{Name: "formPost", Versions: []int32{1}}, {Name: "formPost", Versions: []int32{1}},
{Name: "songLyrics", Versions: []int32{1}}, {Name: "songLyrics", Versions: []int32{1, 2}},
{Name: "indexBasedQueue", Versions: []int32{1}}, {Name: "indexBasedQueue", Versions: []int32{1}},
{Name: "transcoding", Versions: []int32{1}}, {Name: "transcoding", Versions: []int32{1}},
} }

View File

@ -38,7 +38,7 @@ var _ = Describe("GetOpenSubsonicExtensions", func() {
HaveLen(5), HaveLen(5),
ContainElement(responses.OpenSubsonicExtension{Name: "transcodeOffset", Versions: []int32{1}}), ContainElement(responses.OpenSubsonicExtension{Name: "transcodeOffset", Versions: []int32{1}}),
ContainElement(responses.OpenSubsonicExtension{Name: "formPost", Versions: []int32{1}}), ContainElement(responses.OpenSubsonicExtension{Name: "formPost", Versions: []int32{1}}),
ContainElement(responses.OpenSubsonicExtension{Name: "songLyrics", Versions: []int32{1}}), ContainElement(responses.OpenSubsonicExtension{Name: "songLyrics", Versions: []int32{1, 2}}),
ContainElement(responses.OpenSubsonicExtension{Name: "indexBasedQueue", Versions: []int32{1}}), ContainElement(responses.OpenSubsonicExtension{Name: "indexBasedQueue", Versions: []int32{1}}),
ContainElement(responses.OpenSubsonicExtension{Name: "transcoding", Versions: []int32{1}}), ContainElement(responses.OpenSubsonicExtension{Name: "transcoding", Versions: []int32{1}}),
)) ))

View File

@ -537,13 +537,30 @@ type Line struct {
Value string `xml:",chardata" json:"value"` Value string `xml:",chardata" json:"value"`
} }
type LyricToken struct {
Start *int64 `xml:"start,attr,omitempty" json:"start,omitempty"`
End *int64 `xml:"end,attr,omitempty" json:"end,omitempty"`
Value string `xml:"value,attr" json:"value"`
Role string `xml:"role,attr,omitempty" json:"role,omitempty"`
}
type TokenLine struct {
Index int32 `xml:"index,attr" json:"index"`
Start *int64 `xml:"start,attr,omitempty" json:"start,omitempty"`
End *int64 `xml:"end,attr,omitempty" json:"end,omitempty"`
Value string `xml:"value,attr,omitempty" json:"value,omitempty"`
Token []LyricToken `xml:"token,omitempty" json:"token,omitempty"`
}
type StructuredLyric struct { type StructuredLyric struct {
DisplayArtist string `xml:"displayArtist,attr,omitempty" json:"displayArtist,omitempty"` DisplayArtist string `xml:"displayArtist,attr,omitempty" json:"displayArtist,omitempty"`
DisplayTitle string `xml:"displayTitle,attr,omitempty" json:"displayTitle,omitempty"` DisplayTitle string `xml:"displayTitle,attr,omitempty" json:"displayTitle,omitempty"`
Lang string `xml:"lang,attr" json:"lang"` Kind string `xml:"kind,attr,omitempty" json:"kind,omitempty"`
Line []Line `xml:"line" json:"line"` Lang string `xml:"lang,attr" json:"lang"`
Offset *int64 `xml:"offset,attr,omitempty" json:"offset,omitempty"` Line []Line `xml:"line" json:"line"`
Synced bool `xml:"synced,attr" json:"synced"` TokenLine []TokenLine `xml:"tokenLine,omitempty" json:"tokenLine,omitempty"`
Offset *int64 `xml:"offset,attr,omitempty" json:"offset,omitempty"`
Synced bool `xml:"synced,attr" json:"synced"`
} }
type StructuredLyrics []StructuredLyric type StructuredLyrics []StructuredLyric

2
tests/fixtures/bom-test.ttml vendored Normal file
View File

@ -0,0 +1,2 @@
<?xml version="1.0" encoding="UTF-8"?>
<tt xmlns="http://www.w3.org/ns/ttml"><body><div xml:lang="eng"><p begin="00:00:00.00">BOM test line</p></div></body></tt>

BIN
tests/fixtures/bom-utf16-test.ttml vendored Normal file

Binary file not shown.

25
tests/fixtures/test-metadata.ttml vendored Normal file
View File

@ -0,0 +1,25 @@
<?xml version="1.0" encoding="UTF-8"?>
<tt xmlns="http://www.w3.org/ns/ttml" xmlns:itunes="http://music.apple.com/lyric-ttml-internal">
<head>
<metadata>
<iTunesMetadata xmlns="http://music.apple.com/lyric-ttml-internal">
<translations>
<translation xml:lang="es">
<text for="L1">Hola</text>
</translation>
</translations>
<transliterations>
<transliteration xml:lang="ja-Latn">
<text for="L2"><span begin="00:02.000" end="00:02.300" xmlns="http://www.w3.org/ns/ttml">ko</span><span begin="00:02.300" end="00:02.600" xmlns="http://www.w3.org/ns/ttml">nni</span></text>
</transliteration>
</transliterations>
</iTunesMetadata>
</metadata>
</head>
<body xml:lang="ja">
<div>
<p begin="00:01.000" end="00:01.500" itunes:key="L1">こんにちは</p>
<p begin="00:02.000" end="00:02.700" itunes:key="L2">こんばんは</p>
</div>
</body>
</tt>

12
tests/fixtures/test.ttml vendored Normal file
View File

@ -0,0 +1,12 @@
<?xml version="1.0" encoding="UTF-8"?>
<tt xmlns="http://www.w3.org/ns/ttml" xmlns:ttp="http://www.w3.org/ns/ttml#parameter" ttp:frameRate="30" ttp:subFrameRate="2" ttp:tickRate="10">
<body>
<div xml:lang="eng">
<p begin="00:00:18.80">We're no strangers to love</p>
<p begin="00:00:22:24">You know the rules and so do I</p>
</div>
<div xml:lang="por">
<p begin="188t">Nao somos estranhos ao amor</p>
</div>
</body>
</tt>

View File

@ -9,6 +9,7 @@ export const PLAYER_SET_VOLUME = 'PLAYER_SET_VOLUME'
export const PLAYER_SET_MODE = 'PLAYER_SET_MODE' export const PLAYER_SET_MODE = 'PLAYER_SET_MODE'
export const TRANSCODING_SET_PROFILE = 'TRANSCODING_SET_PROFILE' export const TRANSCODING_SET_PROFILE = 'TRANSCODING_SET_PROFILE'
export const PLAYER_REFRESH_QUEUE = 'PLAYER_REFRESH_QUEUE' export const PLAYER_REFRESH_QUEUE = 'PLAYER_REFRESH_QUEUE'
export const PLAYER_UPDATE_LYRIC = 'PLAYER_UPDATE_LYRIC'
export const setTrack = (data) => ({ export const setTrack = (data) => ({
type: PLAYER_SET_TRACK, type: PLAYER_SET_TRACK,
@ -114,3 +115,8 @@ export const refreshQueue = (resolvedUrls) => ({
type: PLAYER_REFRESH_QUEUE, type: PLAYER_REFRESH_QUEUE,
data: resolvedUrls, data: resolvedUrls,
}) })
export const updateQueueLyric = (trackId, lyric) => ({
type: PLAYER_UPDATE_LYRIC,
data: { trackId, lyric },
})

File diff suppressed because it is too large Load Diff

View File

@ -22,6 +22,7 @@ import {
refreshQueue, refreshQueue,
setPlayMode, setPlayMode,
setTranscodingProfile, setTranscodingProfile,
updateQueueLyric,
setVolume, setVolume,
syncQueue, syncQueue,
} from '../actions' } from '../actions'
@ -33,6 +34,25 @@ import { keyMap } from '../hotkeys'
import keyHandlers from './keyHandlers' import keyHandlers from './keyHandlers'
import { calculateGain } from '../utils/calculateReplayGain' import { calculateGain } from '../utils/calculateReplayGain'
import { detectBrowserProfile, decisionService } from '../transcode' import { detectBrowserProfile, decisionService } from '../transcode'
import {
getPreferredLyricLanguage,
hasStructuredLyricContent,
selectLyricLayers,
structuredLyricToLrc,
} from './lyrics'
import KaraokeLyricsOverlay from './KaraokeLyricsOverlay'
const emptyLyricLayers = {
main: null,
translation: null,
pronunciation: null,
}
const normalizeLyricLayers = (layers) => ({
main: layers?.main || null,
translation: layers?.translation || null,
pronunciation: layers?.pronunciation || null,
})
const Player = () => { const Player = () => {
const theme = useCurrentTheme() const theme = useCurrentTheme()
@ -120,6 +140,72 @@ const Player = () => {
const gainInfo = useSelector((state) => state.replayGain) const gainInfo = useSelector((state) => state.replayGain)
const [context, setContext] = useState(null) const [context, setContext] = useState(null)
const [gainNode, setGainNode] = useState(null) const [gainNode, setGainNode] = useState(null)
const lyricCacheRef = useRef(new Map())
const lyricRequestIdRef = useRef(0)
const playerRef = useRef(null)
const [karaokeVisible, setKaraokeVisible] = useState(false)
const [selectedLyricLayers, setSelectedLyricLayers] =
useState(emptyLyricLayers)
const [showTranslation, setShowTranslation] = useState(false)
const [showPronunciation, setShowPronunciation] = useState(false)
const currentTrackId = playerState.current?.trackId
const currentTrackIsRadio = playerState.current?.isRadio
const selectedStructuredLyric = selectedLyricLayers.main
const hasKaraokeLyric = hasStructuredLyricContent(selectedStructuredLyric)
const hasTranslationLyric = hasStructuredLyricContent(
selectedLyricLayers.translation,
)
const hasPronunciationLyric = hasStructuredLyricContent(
selectedLyricLayers.pronunciation,
)
const applyLyricToRuntimePlayer = useCallback((trackId, lyric) => {
if (!trackId) {
return
}
const player = playerRef.current
if (!player || typeof player.setState !== 'function') {
return
}
player.setState((prevState) => {
const prevLists = Array.isArray(prevState.audioLists)
? prevState.audioLists
: []
let changed = false
const audioLists = prevLists.map((item) => {
if (item.trackId !== trackId) {
return item
}
if (item.lyric === lyric) {
return item
}
changed = true
return {
...item,
lyric,
}
})
const currentItem = audioLists.find(
(item) => item.musicSrc === prevState.musicSrc,
)
const currentLyric =
typeof currentItem?.lyric === 'string'
? currentItem.lyric
: prevState.lyric
if (!changed && currentLyric === prevState.lyric) {
return null
}
return {
audioLists,
lyric: currentLyric,
}
})
}, [])
useEffect(() => { useEffect(() => {
if ( if (
@ -166,6 +252,107 @@ const Player = () => {
return () => window.removeEventListener('beforeunload', handleBeforeUnload) return () => window.removeEventListener('beforeunload', handleBeforeUnload)
}, [playerState, audioInstance]) }, [playerState, audioInstance])
useEffect(() => {
if (!currentTrackId || currentTrackIsRadio) {
setSelectedLyricLayers(emptyLyricLayers)
setShowTranslation(false)
setShowPronunciation(false)
setKaraokeVisible(false)
return
}
const cached = lyricCacheRef.current.get(currentTrackId)
let layers = emptyLyricLayers
if (cached && typeof cached !== 'string') {
if (cached.layers) {
layers = normalizeLyricLayers(cached.layers)
} else if (cached.structuredLyric) {
layers = normalizeLyricLayers({
main: cached.structuredLyric,
})
}
}
setSelectedLyricLayers(layers)
setShowTranslation(false)
setShowPronunciation(hasStructuredLyricContent(layers.pronunciation))
}, [currentTrackId, currentTrackIsRadio])
useEffect(() => {
lyricRequestIdRef.current += 1
const requestId = lyricRequestIdRef.current
if (!currentTrackId || currentTrackIsRadio) {
return
}
const cached = lyricCacheRef.current.get(currentTrackId)
if (cached !== undefined) {
const cachedLyric =
typeof cached === 'string' ? cached : cached?.lrc || ''
const cachedLayers =
typeof cached === 'string'
? emptyLyricLayers
: cached?.layers
? normalizeLyricLayers(cached.layers)
: normalizeLyricLayers({ main: cached?.structuredLyric })
setSelectedLyricLayers(cachedLayers)
setShowTranslation(false)
setShowPronunciation(
hasStructuredLyricContent(cachedLayers.pronunciation),
)
if (cachedLyric) {
dispatch(updateQueueLyric(currentTrackId, cachedLyric))
applyLyricToRuntimePlayer(currentTrackId, cachedLyric)
}
return
}
subsonic
.getLyricsBySongId(currentTrackId)
.then((resp) => {
if (lyricRequestIdRef.current !== requestId) {
return
}
const structuredLyrics =
resp?.json?.['subsonic-response']?.lyricsList?.structuredLyrics || []
const layers = selectLyricLayers(
structuredLyrics,
getPreferredLyricLanguage(),
)
const lyric = layers.main ? structuredLyricToLrc(layers.main) : ''
lyricCacheRef.current.set(currentTrackId, {
lrc: lyric,
layers,
})
setSelectedLyricLayers(layers)
setShowTranslation(false)
setShowPronunciation(hasStructuredLyricContent(layers.pronunciation))
if (lyric !== '') {
dispatch(updateQueueLyric(currentTrackId, lyric))
applyLyricToRuntimePlayer(currentTrackId, lyric)
}
})
.catch(() => {
if (lyricRequestIdRef.current !== requestId) {
return
}
setSelectedLyricLayers(emptyLyricLayers)
setShowTranslation(false)
setShowPronunciation(false)
// Do not cache network/request failures as empty lyrics, so we can retry.
lyricCacheRef.current.delete(currentTrackId)
})
}, [dispatch, currentTrackId, currentTrackIsRadio, applyLyricToRuntimePlayer])
useEffect(() => {
if (!hasKaraokeLyric && karaokeVisible) {
setKaraokeVisible(false)
}
}, [hasKaraokeLyric, karaokeVisible])
const defaultOptions = useMemo( const defaultOptions = useMemo(
() => ({ () => ({
theme: playerTheme, theme: playerTheme,
@ -177,7 +364,7 @@ const Player = () => {
clearPriorAudioLists: false, clearPriorAudioLists: false,
showDestroy: true, showDestroy: true,
showDownload: false, showDownload: false,
showLyric: true, showLyric: false,
showReload: false, showReload: false,
toggleMode: !isDesktop, toggleMode: !isDesktop,
glassBg: false, glassBg: false,
@ -214,12 +401,24 @@ const Player = () => {
(playerState.clear || playerState.playIndex === 0), (playerState.clear || playerState.playIndex === 0),
clearPriorAudioLists: playerState.clear, clearPriorAudioLists: playerState.clear,
extendsContent: ( extendsContent: (
<PlayerToolbar id={current.trackId} isRadio={current.isRadio} /> <PlayerToolbar
id={current.trackId}
isRadio={current.isRadio}
onToggleLyrics={() => setKaraokeVisible((visible) => !visible)}
lyricsActive={karaokeVisible}
lyricsDisabled={!hasKaraokeLyric}
/>
), ),
defaultVolume: isMobilePlayer ? 1 : playerState.volume, defaultVolume: isMobilePlayer ? 1 : playerState.volume,
showMediaSession: !current.isRadio, showMediaSession: !current.isRadio,
} }
}, [playerState, defaultOptions, isMobilePlayer]) }, [
playerState,
defaultOptions,
isMobilePlayer,
karaokeVisible,
hasKaraokeLyric,
])
const onAudioListsChange = useCallback( const onAudioListsChange = useCallback(
(_, audioLists, audioInfo) => dispatch(syncQueue(audioInfo, audioLists)), (_, audioLists, audioInfo) => dispatch(syncQueue(audioInfo, audioLists)),
@ -391,6 +590,7 @@ const Player = () => {
return ( return (
<ThemeProvider theme={createMuiTheme(theme)}> <ThemeProvider theme={createMuiTheme(theme)}>
<ReactJkMusicPlayer <ReactJkMusicPlayer
ref={playerRef}
{...options} {...options}
className={classes.player} className={classes.player}
onAudioListsChange={onAudioListsChange} onAudioListsChange={onAudioListsChange}
@ -406,6 +606,28 @@ const Player = () => {
onBeforeDestroy={onBeforeDestroy} onBeforeDestroy={onBeforeDestroy}
getAudioInstance={setAudioInstance} getAudioInstance={setAudioInstance}
/> />
<KaraokeLyricsOverlay
visible={karaokeVisible}
mainLyric={selectedLyricLayers.main}
translationLyric={selectedLyricLayers.translation}
pronunciationLyric={selectedLyricLayers.pronunciation}
showTranslation={showTranslation}
showPronunciation={showPronunciation}
translationEnabled={hasTranslationLyric}
pronunciationEnabled={hasPronunciationLyric}
onToggleTranslation={() =>
setShowTranslation((previous) =>
hasTranslationLyric ? !previous : false,
)
}
onTogglePronunciation={() =>
setShowPronunciation((previous) =>
hasPronunciationLyric ? !previous : false,
)
}
audioInstance={audioInstance}
onClose={() => setKaraokeVisible(false)}
/>
<GlobalHotKeys handlers={handlers} keyMap={keyMap} allowChanges /> <GlobalHotKeys handlers={handlers} keyMap={keyMap} allowChanges />
</ThemeProvider> </ThemeProvider>
) )

View File

@ -4,7 +4,9 @@ import { useGetOne } from 'react-admin'
import { GlobalHotKeys } from 'react-hotkeys' import { GlobalHotKeys } from 'react-hotkeys'
import IconButton from '@material-ui/core/IconButton' import IconButton from '@material-ui/core/IconButton'
import { useMediaQuery } from '@material-ui/core' import { useMediaQuery } from '@material-ui/core'
import Tooltip from '@material-ui/core/Tooltip'
import { RiSaveLine } from 'react-icons/ri' import { RiSaveLine } from 'react-icons/ri'
import { RiFileMusicLine } from 'react-icons/ri'
import { LoveButton, useToggleLove } from '../common' import { LoveButton, useToggleLove } from '../common'
import { openSaveQueueDialog } from '../actions' import { openSaveQueueDialog } from '../actions'
import { keyMap } from '../hotkeys' import { keyMap } from '../hotkeys'
@ -55,7 +57,13 @@ const useStyles = makeStyles((theme) => ({
}, },
})) }))
const PlayerToolbar = ({ id, isRadio }) => { const PlayerToolbar = ({
id,
isRadio,
onToggleLyrics,
lyricsActive = false,
lyricsDisabled = false,
}) => {
const dispatch = useDispatch() const dispatch = useDispatch()
const { data, loading } = useGetOne('song', id, { enabled: !!id && !isRadio }) const { data, loading } = useGetOne('song', id, { enabled: !!id && !isRadio })
const [toggleLove, toggling] = useToggleLove('song', data) const [toggleLove, toggling] = useToggleLove('song', data)
@ -99,6 +107,25 @@ const PlayerToolbar = ({ id, isRadio }) => {
/> />
) )
const toggleLyricsButton = (
<Tooltip title="Toggle synchronized lyrics">
<span>
<IconButton
size={isDesktop ? 'small' : undefined}
onClick={onToggleLyrics}
disabled={!onToggleLyrics || lyricsDisabled}
data-testid="toggle-lyrics-button"
className={buttonClass}
color={lyricsActive ? 'primary' : 'default'}
>
<RiFileMusicLine
className={!isDesktop ? classes.mobileIcon : undefined}
/>
</IconButton>
</span>
</Tooltip>
)
return ( return (
<> <>
<GlobalHotKeys keyMap={keyMap} handlers={handlers} allowChanges /> <GlobalHotKeys keyMap={keyMap} handlers={handlers} allowChanges />
@ -106,11 +133,13 @@ const PlayerToolbar = ({ id, isRadio }) => {
<li className={`${listItemClass} item`}> <li className={`${listItemClass} item`}>
{saveQueueButton} {saveQueueButton}
{loveButton} {loveButton}
{toggleLyricsButton}
</li> </li>
) : ( ) : (
<> <>
<li className={`${listItemClass} item`}>{saveQueueButton}</li> <li className={`${listItemClass} item`}>{saveQueueButton}</li>
<li className={`${listItemClass} item`}>{loveButton}</li> <li className={`${listItemClass} item`}>{loveButton}</li>
<li className={`${listItemClass} item`}>{toggleLyricsButton}</li>
</> </>
)} )}
</> </>

View File

@ -71,6 +71,7 @@ describe('<PlayerToolbar />', () => {
// Verify both buttons are rendered // Verify both buttons are rendered
expect(screen.getByTestId('save-queue-button')).toBeInTheDocument() expect(screen.getByTestId('save-queue-button')).toBeInTheDocument()
expect(screen.getByTestId('love-button')).toBeInTheDocument() expect(screen.getByTestId('love-button')).toBeInTheDocument()
expect(screen.getByTestId('toggle-lyrics-button')).toBeInTheDocument()
// Verify desktop classes are applied // Verify desktop classes are applied
expect(listItems[0].className).toContain('toolbar') expect(listItems[0].className).toContain('toolbar')
@ -102,6 +103,14 @@ describe('<PlayerToolbar />', () => {
type: 'OPEN_SAVE_QUEUE_DIALOG', type: 'OPEN_SAVE_QUEUE_DIALOG',
}) })
}) })
it('triggers lyric toggle callback when lyrics button is clicked', () => {
const onToggleLyrics = vi.fn()
render(<PlayerToolbar id="song-1" onToggleLyrics={onToggleLyrics} />)
fireEvent.click(screen.getByTestId('toggle-lyrics-button'))
expect(onToggleLyrics).toHaveBeenCalledTimes(1)
})
}) })
describe('Mobile layout', () => { describe('Mobile layout', () => {
@ -114,11 +123,12 @@ describe('<PlayerToolbar />', () => {
// Each button should be in its own list item // Each button should be in its own list item
const listItems = screen.getAllByRole('listitem') const listItems = screen.getAllByRole('listitem')
expect(listItems).toHaveLength(2) expect(listItems).toHaveLength(3)
// Verify both buttons are rendered // Verify both buttons are rendered
expect(screen.getByTestId('save-queue-button')).toBeInTheDocument() expect(screen.getByTestId('save-queue-button')).toBeInTheDocument()
expect(screen.getByTestId('love-button')).toBeInTheDocument() expect(screen.getByTestId('love-button')).toBeInTheDocument()
expect(screen.getByTestId('toggle-lyrics-button')).toBeInTheDocument()
// Verify mobile classes are applied // Verify mobile classes are applied
expect(listItems[0].className).toContain('mobileListItem') expect(listItems[0].className).toContain('mobileListItem')
@ -140,6 +150,13 @@ describe('<PlayerToolbar />', () => {
const loveButton = screen.getByTestId('love-button') const loveButton = screen.getByTestId('love-button')
expect(loveButton).toBeDisabled() expect(loveButton).toBeDisabled()
}) })
it('disables lyrics button when lyrics are unavailable', () => {
render(<PlayerToolbar id="song-1" lyricsDisabled={true} />)
const lyricsButton = screen.getByTestId('toggle-lyrics-button')
expect(lyricsButton).toBeDisabled()
})
}) })
describe('Common behavior', () => { describe('Common behavior', () => {

View File

@ -0,0 +1,617 @@
const normalizeLanguageTag = (language) =>
(language || '').toLowerCase().replace('_', '-')
const KARAOKE_SWITCH_EPSILON_MS = 18
const LYRIC_KIND_MAIN = 'main'
const LYRIC_KIND_TRANSLATION = 'translation'
const LYRIC_KIND_PRONUNCIATION = 'pronunciation'
const padTime = (value) => {
const str = value.toString()
return str.length === 1 ? `0${str}` : str
}
const toTime = (value) => {
const numeric = Number(value)
return Number.isFinite(numeric) ? numeric : null
}
const compareNullableTime = (a, b) => {
if (a == null && b == null) {
return 0
}
if (a == null) {
return 1
}
if (b == null) {
return -1
}
return a - b
}
const sortTokensByStart = (tokens) =>
tokens
.map((token, order) => ({ ...token, order }))
.sort((a, b) => {
const byStart = compareNullableTime(a.start, b.start)
if (byStart !== 0) {
return byStart
}
const byEnd = compareNullableTime(a.end, b.end)
if (byEnd !== 0) {
return byEnd
}
return a.order - b.order
})
.map(({ order, ...token }) => token)
const languageMatch = (candidate, preferred) => {
if (!candidate || !preferred) {
return false
}
return (
candidate === preferred ||
candidate.startsWith(`${preferred}-`) ||
preferred.startsWith(`${candidate}-`)
)
}
const hasTimedLines = (lyric) =>
lyric &&
lyric.synced &&
Array.isArray(lyric.line) &&
lyric.line.some((line) => Number.isFinite(Number(line.start)))
const normalizeToken = (token) => {
if (!token) {
return null
}
const value = typeof token.value === 'string' ? token.value : ''
if (!value.trim()) {
return null
}
return {
start: toTime(token.start),
end: toTime(token.end),
value,
role: typeof token.role === 'string' ? token.role : '',
}
}
const normalizeTokenLine = (tokenLine, fallbackIndex) => {
const index = Number.isFinite(Number(tokenLine?.index))
? Number(tokenLine.index)
: fallbackIndex
const tokens = sortTokensByStart(
Array.isArray(tokenLine?.token)
? tokenLine.token.map(normalizeToken).filter(Boolean)
: [],
)
return {
index,
start: toTime(tokenLine?.start),
end: toTime(tokenLine?.end),
value: typeof tokenLine?.value === 'string' ? tokenLine.value : '',
tokens,
}
}
const normalizeLyricKind = (kind) => {
const normalized = (kind || '').toLowerCase().trim()
switch (normalized) {
case LYRIC_KIND_TRANSLATION:
return LYRIC_KIND_TRANSLATION
case LYRIC_KIND_PRONUNCIATION:
return LYRIC_KIND_PRONUNCIATION
default:
return LYRIC_KIND_MAIN
}
}
const pickLyricByLanguage = (lyrics, preferredLanguage) => {
if (!Array.isArray(lyrics) || lyrics.length === 0) {
return null
}
const preferred = normalizeLanguageTag(preferredLanguage)
const preferredBase = preferred.split('-')[0]
return (
lyrics.find((lyric) =>
languageMatch(normalizeLanguageTag(lyric.lang), preferred),
) ||
lyrics.find((lyric) =>
languageMatch(normalizeLanguageTag(lyric.lang), preferredBase),
) ||
lyrics.find((lyric) =>
languageMatch(normalizeLanguageTag(lyric.lang), 'en'),
) ||
lyrics[0]
)
}
const lineTimeWindow = (lines, index) => {
const line = lines[index]
if (!line) {
return { start: null, end: null }
}
const start = toTime(line.start)
const end = toTime(line.end) ?? toTime(lines[index + 1]?.start)
return { start, end }
}
const buildSyntheticWordTokens = (line, token) => {
const text = typeof line?.value === 'string' ? line.value : ''
if (!text.trim()) {
return null
}
const chunks = text.match(/\S+\s*/g) || []
if (chunks.length < 2) {
return null
}
const normalizedLine = text.replace(/\s+/g, ' ').trim().toLowerCase()
const normalizedTokenValue = (token?.value || '')
.replace(/\s+/g, ' ')
.trim()
.toLowerCase()
if (!normalizedTokenValue || !normalizedLine) {
return null
}
const compressedLine = normalizedLine.replace(/\s+/g, '')
const compressedToken = normalizedTokenValue.replace(/\s+/g, '')
const tokenLooksLikeWholeLine =
compressedToken === compressedLine ||
compressedToken.length >= Math.floor(compressedLine.length * 0.8)
if (!tokenLooksLikeWholeLine) {
return null
}
const tokenStart = toTime(token?.start)
const tokenEnd = toTime(token?.end)
const lineStart = toTime(line?.start)
const lineEnd = toTime(line?.end)
const baseStart = tokenStart ?? lineStart
const baseEnd = tokenEnd ?? lineEnd
if (
baseStart == null ||
baseEnd == null ||
!Number.isFinite(baseStart) ||
!Number.isFinite(baseEnd) ||
baseEnd <= baseStart
) {
return null
}
const duration = baseEnd - baseStart
return chunks.map((chunk, idx) => ({
start: baseStart + (duration * idx) / chunks.length,
end: baseStart + (duration * (idx + 1)) / chunks.length,
value: chunk,
role: typeof token?.role === 'string' ? token.role : '',
}))
}
export const hasTokenTiming = (structuredLyric) =>
Boolean(
structuredLyric &&
Array.isArray(structuredLyric.tokenLine) &&
structuredLyric.tokenLine.some(
(tokenLine) =>
Array.isArray(tokenLine?.token) &&
tokenLine.token.some((token) => Number.isFinite(Number(token?.start))),
),
)
export const hasStructuredLyricContent = (structuredLyric) =>
Boolean(
structuredLyric &&
((Array.isArray(structuredLyric.line) &&
structuredLyric.line.some(
(line) => typeof line?.value === 'string' && line.value.trim() !== '',
)) ||
hasTokenTiming(structuredLyric)),
)
export const getPreferredLyricLanguage = () => {
if (typeof window !== 'undefined' && window.localStorage) {
const stored = window.localStorage.getItem('locale')
if (stored) {
return stored
}
}
if (typeof navigator !== 'undefined' && navigator.language) {
return navigator.language
}
return 'en'
}
export const selectLyricLayers = (structuredLyrics, preferredLanguage) => {
if (!Array.isArray(structuredLyrics)) {
return {
main: null,
translation: null,
pronunciation: null,
}
}
const synced = structuredLyrics.filter(hasTimedLines)
if (synced.length === 0) {
return {
main: null,
translation: null,
pronunciation: null,
}
}
const grouped = {
[LYRIC_KIND_MAIN]: [],
[LYRIC_KIND_TRANSLATION]: [],
[LYRIC_KIND_PRONUNCIATION]: [],
}
for (const lyric of synced) {
grouped[normalizeLyricKind(lyric?.kind)].push(lyric)
}
const mainCandidates = grouped[LYRIC_KIND_MAIN].length
? grouped[LYRIC_KIND_MAIN]
: synced
return {
main: pickLyricByLanguage(mainCandidates, preferredLanguage),
translation: pickLyricByLanguage(
grouped[LYRIC_KIND_TRANSLATION],
preferredLanguage,
),
pronunciation: pickLyricByLanguage(
grouped[LYRIC_KIND_PRONUNCIATION],
preferredLanguage,
),
}
}
export const pickStructuredLyric = (structuredLyrics, preferredLanguage) =>
selectLyricLayers(structuredLyrics, preferredLanguage).main
export const structuredLyricToLrc = (structuredLyric) => {
if (!structuredLyric || !Array.isArray(structuredLyric.line)) {
return ''
}
let lyricText = ''
for (const line of structuredLyric.line) {
const start = Number(line.start)
if (!Number.isFinite(start) || start < 0) {
continue
}
let time = Math.floor(start / 10)
const ms = time % 100
time = Math.floor(time / 100)
const sec = time % 60
time = Math.floor(time / 60)
const min = time % 60
lyricText += `[${padTime(min)}:${padTime(sec)}.${padTime(ms)}] ${line.value || ''}\n`
}
return lyricText
}
export const structuredLyricsToLrc = (structuredLyrics, preferredLanguage) => {
const selected = pickStructuredLyric(structuredLyrics, preferredLanguage)
if (!selected) {
return ''
}
return structuredLyricToLrc(selected)
}
export const buildKaraokeLines = (structuredLyric) => {
if (!structuredLyric) {
return []
}
const baseLines = Array.isArray(structuredLyric.line)
? structuredLyric.line
: []
const rawTokenLines = Array.isArray(structuredLyric.tokenLine)
? structuredLyric.tokenLine
: []
const lines =
rawTokenLines.length > 0
? rawTokenLines.map((tokenLine, fallbackIndex) => {
const normalized = normalizeTokenLine(tokenLine, fallbackIndex)
const baseLine = baseLines[normalized.index] || {}
const tokens = normalized.tokens
const fallbackStart =
tokens.find((token) => token.start != null)?.start ?? null
const fallbackEnd =
[...tokens].reverse().find((token) => token.end != null)?.end ??
null
const value =
normalized.value ||
(typeof baseLine.value === 'string' ? baseLine.value : '') ||
tokens.map((token) => token.value).join('')
return {
index: normalized.index,
start: normalized.start ?? toTime(baseLine.start) ?? fallbackStart,
end: normalized.end ?? toTime(baseLine.end) ?? fallbackEnd,
value,
tokens,
}
})
: baseLines.map((line, index) => ({
index,
start: toTime(line.start),
end: toTime(line.end),
value: typeof line.value === 'string' ? line.value : '',
tokens: [],
}))
const normalized = lines
.filter((line) => line.value || line.tokens.length > 0)
.sort((a, b) => {
if (a.start == null && b.start == null) {
return a.index - b.index
}
if (a.start == null) {
return 1
}
if (b.start == null) {
return -1
}
if (a.start !== b.start) {
return a.start - b.start
}
return a.index - b.index
})
.map((line) => {
const nextLine = { ...line }
if (nextLine.tokens.length === 1) {
const syntheticTokens = buildSyntheticWordTokens(
nextLine,
nextLine.tokens[0],
)
if (syntheticTokens) {
nextLine.tokens = syntheticTokens
}
}
return nextLine
})
for (let i = 0; i < normalized.length; i += 1) {
if (normalized[i].end == null) {
const nextStart = normalized[i + 1]?.start
if (nextStart != null) {
normalized[i].end = nextStart
}
}
}
return normalized
}
export const resolveKaraokeTokenWindow = (
line,
tokenIndex,
lineEndFallback = null,
) => {
const tokens = Array.isArray(line?.tokens) ? line.tokens : []
const token = tokens[tokenIndex]
if (!token) {
return { start: null, end: null }
}
const prevToken = tokenIndex > 0 ? tokens[tokenIndex - 1] : null
const nextToken =
tokenIndex + 1 < tokens.length ? tokens[tokenIndex + 1] : null
const lineStart = toTime(line?.start)
const lineEnd = toTime(line?.end) ?? toTime(lineEndFallback)
const tokenCount = tokens.length
const hasLineWindow =
lineStart != null &&
lineEnd != null &&
Number.isFinite(lineStart) &&
Number.isFinite(lineEnd) &&
lineEnd > lineStart
const estimatedStart =
hasLineWindow && tokenCount > 0
? lineStart + ((lineEnd - lineStart) * tokenIndex) / tokenCount
: null
const estimatedEnd =
hasLineWindow && tokenCount > 0
? lineStart + ((lineEnd - lineStart) * (tokenIndex + 1)) / tokenCount
: null
let explicitStartCount = 0
let explicitEndCount = 0
const uniqueStarts = new Set()
const uniqueEnds = new Set()
for (let i = 0; i < tokenCount; i += 1) {
const explicitStart = toTime(tokens[i]?.start)
if (explicitStart != null) {
explicitStartCount += 1
uniqueStarts.add(explicitStart)
}
const explicitEnd = toTime(tokens[i]?.end)
if (explicitEnd != null) {
explicitEndCount += 1
uniqueEnds.add(explicitEnd)
}
}
const collapsedStarts =
explicitStartCount > 1 && uniqueStarts.size <= Math.max(1, tokenCount / 4)
const collapsedEnds =
explicitEndCount > 1 && uniqueEnds.size <= Math.max(1, tokenCount / 4)
const shouldForceEstimated =
hasLineWindow && tokenCount > 1 && (collapsedStarts || collapsedEnds)
if (shouldForceEstimated) {
return {
start: estimatedStart,
end: estimatedEnd,
}
}
const prevEnd = toTime(prevToken?.end) ?? toTime(prevToken?.start)
let start = toTime(token.start)
if (start == null) {
start = prevEnd ?? estimatedStart ?? lineStart
}
let end = toTime(token.end)
if (end == null) {
const nextDirectStart = toTime(nextToken?.start)
const nextEstimatedStart =
hasLineWindow && tokenIndex + 1 < tokenCount
? lineStart + ((lineEnd - lineStart) * (tokenIndex + 1)) / tokenCount
: null
end = nextDirectStart ?? nextEstimatedStart ?? estimatedEnd ?? lineEnd
}
if (
tokenCount === 1 &&
hasLineWindow &&
(start == null || end == null || end <= start + 1)
) {
start = lineStart
end = lineEnd
}
if (start != null && end != null && end < start) {
end = start
}
return { start, end }
}
export const getActiveKaraokeState = (lines, currentTimeMs) => {
if (!Array.isArray(lines) || lines.length === 0) {
return { lineIndex: -1, tokenIndex: -1 }
}
const current = Number.isFinite(Number(currentTimeMs))
? Number(currentTimeMs)
: 0
let lineIndex = 0
for (let i = 0; i < lines.length; i += 1) {
const lineStart = toTime(lines[i]?.start)
if (lineStart == null || lineStart <= current + KARAOKE_SWITCH_EPSILON_MS) {
lineIndex = i
continue
}
break
}
for (let i = lineIndex; i >= 0; i -= 1) {
const lineStart = toTime(lines[i]?.start)
const lineEnd = toTime(lines[i]?.end) ?? toTime(lines[i + 1]?.start)
if (lineStart != null && current + KARAOKE_SWITCH_EPSILON_MS < lineStart) {
continue
}
if (lineEnd == null || current <= lineEnd + KARAOKE_SWITCH_EPSILON_MS) {
lineIndex = i
break
}
}
const activeLine = lines[lineIndex] || null
const tokens = Array.isArray(activeLine?.tokens) ? activeLine.tokens : []
let tokenIndex = -1
for (let i = 0; i < tokens.length; i += 1) {
const { start: tokenStart, end: tokenEnd } = resolveKaraokeTokenWindow(
activeLine,
i,
lines[lineIndex + 1]?.start,
)
if (
tokenStart == null ||
tokenStart <= current + KARAOKE_SWITCH_EPSILON_MS
) {
tokenIndex = i
if (tokenEnd != null && current <= tokenEnd + KARAOKE_SWITCH_EPSILON_MS) {
break
}
continue
}
break
}
return { lineIndex, tokenIndex }
}
export const findLayerLineIndexForMain = (mainLines, layerLines, mainIndex) => {
if (
!Array.isArray(mainLines) ||
!Array.isArray(layerLines) ||
mainLines.length === 0 ||
layerLines.length === 0 ||
mainIndex < 0 ||
mainIndex >= mainLines.length
) {
return -1
}
const { start: mainStart, end: mainEnd } = lineTimeWindow(
mainLines,
mainIndex,
)
if (mainStart == null) {
return -1
}
const mainWindowEnd = mainEnd ?? mainStart
const mainWindowDuration = Math.max(0, mainWindowEnd - mainStart)
const maxDelta = Math.max(550, Math.min(1400, mainWindowDuration + 420))
let bestIdx = -1
let bestScore = Number.POSITIVE_INFINITY
for (let i = 0; i < layerLines.length; i += 1) {
const { start, end } = lineTimeWindow(layerLines, i)
if (start != null && end != null) {
const overlap = Math.min(end, mainEnd ?? end) - Math.max(start, mainStart)
if (overlap >= 0) {
const score = Math.abs(start - mainStart) + Math.abs(i - mainIndex) * 30
if (score < bestScore) {
bestScore = score
bestIdx = i
}
continue
}
}
if (start != null) {
if (Math.abs(start - mainStart) > maxDelta) {
continue
}
const score = Math.abs(start - mainStart) + Math.abs(i - mainIndex) * 45
if (score < bestScore) {
bestScore = score
bestIdx = i
}
}
}
return bestIdx
}
export const resolveLayerLineForMain = (mainLines, layerLines, mainIndex) => {
const index = findLayerLineIndexForMain(mainLines, layerLines, mainIndex)
return {
index,
line: index >= 0 ? layerLines[index] : null,
}
}

View File

@ -0,0 +1,416 @@
import {
buildKaraokeLines,
findLayerLineIndexForMain,
getPreferredLyricLanguage,
getActiveKaraokeState,
hasStructuredLyricContent,
pickStructuredLyric,
resolveKaraokeTokenWindow,
resolveLayerLineForMain,
selectLyricLayers,
structuredLyricToLrc,
structuredLyricsToLrc,
} from './lyrics'
describe('lyrics helpers', () => {
beforeEach(() => {
localStorage.clear()
})
it('prefers a lyric track that matches the locale', () => {
const selected = pickStructuredLyric(
[
{
lang: 'eng',
synced: true,
line: [{ start: 1000, value: 'English line' }],
},
{
lang: 'pt-BR',
synced: true,
line: [{ start: 1000, value: 'Linha em portugues' }],
},
],
'pt-BR',
)
expect(selected.lang).toBe('pt-BR')
})
it('falls back to english when preferred locale is not available', () => {
const selected = pickStructuredLyric(
[
{
lang: 'eng',
synced: true,
line: [{ start: 1000, value: 'English line' }],
},
{
lang: 'deu',
synced: true,
line: [{ start: 1000, value: 'Deutsche Zeile' }],
},
],
'pt-BR',
)
expect(selected.lang).toBe('eng')
})
it('falls back to first synced track when english is missing', () => {
const selected = pickStructuredLyric(
[
{
lang: 'jpn',
synced: true,
line: [{ start: 1000, value: 'Nihongo' }],
},
{
lang: 'deu',
synced: true,
line: [{ start: 1000, value: 'Deutsch' }],
},
],
'pt-BR',
)
expect(selected.lang).toBe('jpn')
})
it('selects translation and pronunciation layers by kind', () => {
const layers = selectLyricLayers(
[
{
kind: 'main',
lang: 'ja',
synced: true,
line: [{ start: 1000, value: 'こんにちは' }],
},
{
kind: 'translation',
lang: 'es',
synced: true,
line: [{ start: 1000, value: 'Hola' }],
},
{
kind: 'pronunciation',
lang: 'ja-Latn',
synced: true,
line: [{ start: 1000, value: 'konnichiwa' }],
},
],
'es-MX',
)
expect(layers.main.lang).toBe('ja')
expect(layers.translation.lang).toBe('es')
expect(layers.pronunciation.lang).toBe('ja-Latn')
})
it('treats missing kind as main for backward compatibility', () => {
const layers = selectLyricLayers(
[
{
lang: 'eng',
synced: true,
line: [{ start: 1000, value: 'Main' }],
},
],
'eng',
)
expect(layers.main.lang).toBe('eng')
expect(layers.translation).toBeNull()
expect(layers.pronunciation).toBeNull()
})
it('matches layer line by timing for the active main line', () => {
const mainLines = [
{ index: 0, start: 1000, end: 1800, value: 'Line A', tokens: [] },
{ index: 1, start: 2000, end: 2800, value: 'Line B', tokens: [] },
]
const layerLines = [
{ index: 0, start: 900, end: 1750, value: 'A2', tokens: [] },
{ index: 1, start: 2050, end: 2900, value: 'B2', tokens: [] },
]
expect(findLayerLineIndexForMain(mainLines, layerLines, 1)).toBe(1)
expect(resolveLayerLineForMain(mainLines, layerLines, 0).line.value).toBe(
'A2',
)
})
it('matches metadata layers by nearest timing even when indexes differ', () => {
const mainLines = [
{ index: 0, start: 1000, end: 1800, value: 'Line A', tokens: [] },
{ index: 1, start: 2000, end: 2800, value: 'Line B', tokens: [] },
{ index: 2, start: 3000, end: 3800, value: 'Line C', tokens: [] },
]
const layerLines = [
{ index: 2, start: 3020, end: 3820, value: 'C2', tokens: [] },
{ index: 0, start: 980, end: 1760, value: 'A2', tokens: [] },
{ index: 1, start: 2010, end: 2810, value: 'B2', tokens: [] },
]
expect(findLayerLineIndexForMain(mainLines, layerLines, 1)).toBe(2)
expect(resolveLayerLineForMain(mainLines, layerLines, 2).line.value).toBe(
'C2',
)
})
it('returns no layer match when the nearest line is too far in time', () => {
const mainLines = [
{ index: 0, start: 1000, end: 1800, value: 'Line A', tokens: [] },
{ index: 1, start: 2000, end: 2800, value: 'Line B', tokens: [] },
]
const layerLines = [
{ index: 0, start: 60000, end: 60800, value: 'Far line', tokens: [] },
]
expect(findLayerLineIndexForMain(mainLines, layerLines, 1)).toBe(-1)
expect(resolveLayerLineForMain(mainLines, layerLines, 1).line).toBeNull()
})
it('converts a structured lyric track to LRC', () => {
const lrc = structuredLyricToLrc({
lang: 'eng',
synced: true,
line: [
{ start: 18800, value: "We're no strangers to love" },
{ start: 22801, value: 'You know the rules and so do I' },
],
})
expect(lrc).toBe(
"[00:18.80] We're no strangers to love\n[00:22.80] You know the rules and so do I\n",
)
})
it('returns empty text when no synced lyrics are available', () => {
const lrc = structuredLyricsToLrc(
[{ lang: 'eng', synced: false, line: [{ value: 'Unsynced line' }] }],
'eng',
)
expect(lrc).toBe('')
})
it('reads preferred language from localStorage first', () => {
localStorage.setItem('locale', 'pt-BR')
expect(getPreferredLyricLanguage()).toBe('pt-BR')
})
it('builds karaoke lines from tokenLine payload', () => {
const lines = buildKaraokeLines({
lang: 'eng',
synced: true,
line: [{ start: 1000, end: 3000, value: 'Hello world' }],
tokenLine: [
{
index: 0,
start: 1000,
end: 3000,
value: 'Hello world',
token: [
{ start: 1000, end: 1500, value: 'Hello' },
{ start: 2000, end: 2500, value: 'world', role: 'x-bg' },
],
},
],
})
expect(lines).toEqual([
{
index: 0,
start: 1000,
end: 3000,
value: 'Hello world',
tokens: [
{ start: 1000, end: 1500, value: 'Hello', role: '' },
{ start: 2000, end: 2500, value: 'world', role: 'x-bg' },
],
},
])
})
it('sorts token timing by start to keep playback stable', () => {
const lines = buildKaraokeLines({
lang: 'eng',
synced: true,
line: [{ start: 1000, end: 3000, value: 'Hello world' }],
tokenLine: [
{
index: 0,
start: 1000,
end: 3000,
value: 'Hello world',
token: [
{ start: 2000, end: 2500, value: 'world', role: '' },
{ start: 1000, end: 1500, value: 'Hello', role: '' },
],
},
],
})
expect(lines[0].tokens.map((token) => token.value)).toEqual([
'Hello',
'world',
])
})
it('splits a single full-line token into synthetic word tokens', () => {
const lines = buildKaraokeLines({
lang: 'ko-Latn',
synced: true,
line: [{ start: 1000, end: 2000, value: 'Da-la-lun, dun' }],
tokenLine: [
{
index: 0,
start: 1000,
end: 2000,
value: 'Da-la-lun, dun',
token: [{ start: 1000, end: 2000, value: 'Da-la-lun, dun' }],
},
],
})
expect(lines).toHaveLength(1)
expect(lines[0].tokens).toHaveLength(2)
expect(lines[0].tokens[0].value).toBe('Da-la-lun, ')
expect(lines[0].tokens[1].value).toBe('dun')
const firstWindow = resolveKaraokeTokenWindow(lines[0], 0)
const secondWindow = resolveKaraokeTokenWindow(lines[0], 1)
expect(firstWindow.start).toBeCloseTo(1000)
expect(firstWindow.end).toBeCloseTo(1500)
expect(secondWindow.start).toBeCloseTo(1500)
expect(secondWindow.end).toBeCloseTo(2000)
})
it('detects active line and token for karaoke timing', () => {
const state = getActiveKaraokeState(
[
{
index: 0,
start: 1000,
end: 3000,
value: 'Hello world',
tokens: [
{ start: 1000, end: 1500, value: 'Hello', role: '' },
{ start: 2000, end: 2500, value: 'world', role: '' },
],
},
{
index: 1,
start: 3500,
end: 5000,
value: 'Second line',
tokens: [],
},
],
2200,
)
expect(state).toEqual({ lineIndex: 0, tokenIndex: 1 })
})
it('resolves token window fallback boundaries from neighboring tokens', () => {
const line = {
start: 1000,
end: 3000,
value: 'Hello world',
tokens: [
{ start: 1200, value: 'Hello', role: '' },
{ start: 1800, value: 'world', role: '' },
],
}
expect(resolveKaraokeTokenWindow(line, 0)).toEqual({
start: 1200,
end: 1800,
})
expect(resolveKaraokeTokenWindow(line, 1)).toEqual({
start: 1800,
end: 3000,
})
})
it('infers sequential token windows when token timings are missing', () => {
const line = {
start: 1000,
end: 2000,
value: 'A B C',
tokens: [
{ value: 'A', role: '' },
{ value: 'B', role: '' },
{ value: 'C', role: '' },
],
}
const first = resolveKaraokeTokenWindow(line, 0)
const second = resolveKaraokeTokenWindow(line, 1)
const third = resolveKaraokeTokenWindow(line, 2)
expect(first.start).toBeCloseTo(1000)
expect(first.end).toBeCloseTo(1333.3333333333333)
expect(second.start).toBeCloseTo(1333.3333333333333)
expect(second.end).toBeCloseTo(1666.6666666666667)
expect(third.start).toBeCloseTo(1666.6666666666667)
expect(third.end).toBeCloseTo(2000)
})
it('falls back to sequential windows when token timings are collapsed', () => {
const line = {
start: 1000,
end: 2000,
value: 'A B C',
tokens: [
{ start: 1000, end: 2000, value: 'A', role: '' },
{ start: 1000, end: 2000, value: 'B', role: '' },
{ start: 1000, end: 2000, value: 'C', role: '' },
],
}
const first = resolveKaraokeTokenWindow(line, 0)
const second = resolveKaraokeTokenWindow(line, 1)
const third = resolveKaraokeTokenWindow(line, 2)
expect(first.start).toBeCloseTo(1000)
expect(first.end).toBeCloseTo(1333.3333333333333)
expect(second.start).toBeCloseTo(1333.3333333333333)
expect(second.end).toBeCloseTo(1666.6666666666667)
expect(third.start).toBeCloseTo(1666.6666666666667)
expect(third.end).toBeCloseTo(2000)
})
it('keeps token selection stable near tight token boundaries', () => {
const state = getActiveKaraokeState(
[
{
index: 0,
start: 1000,
end: 2000,
value: 'A B',
tokens: [
{ start: 1000, end: 1100, value: 'A', role: '' },
{ start: 1110, end: 1300, value: 'B', role: '' },
],
},
],
1108,
)
expect(state).toEqual({ lineIndex: 0, tokenIndex: 0 })
})
it('reports structured lyric content when token timing exists', () => {
expect(
hasStructuredLyricContent({
tokenLine: [{ token: [{ start: 100, value: 'a' }] }],
}),
).toBe(true)
})
})

View File

@ -7,6 +7,7 @@ import {
PLAYER_CURRENT, PLAYER_CURRENT,
PLAYER_PLAY_NEXT, PLAYER_PLAY_NEXT,
PLAYER_PLAY_TRACKS, PLAYER_PLAY_TRACKS,
PLAYER_UPDATE_LYRIC,
PLAYER_SET_TRACK, PLAYER_SET_TRACK,
PLAYER_SET_VOLUME, PLAYER_SET_VOLUME,
PLAYER_SYNC_QUEUE, PLAYER_SYNC_QUEUE,
@ -60,21 +61,25 @@ const mapToAudioLists = (item) => {
let lyricText = '' let lyricText = ''
if (lyrics) { if (lyrics) {
const structured = JSON.parse(lyrics) try {
for (const structuredLyric of structured) { const structured = JSON.parse(lyrics)
if (structuredLyric.synced) { for (const structuredLyric of structured) {
for (const line of structuredLyric.line) { if (structuredLyric.synced) {
let time = Math.floor(line.start / 10) for (const line of structuredLyric.line) {
const ms = time % 100 let time = Math.floor(line.start / 10)
time = Math.floor(time / 100) const ms = time % 100
const sec = time % 60 time = Math.floor(time / 100)
time = Math.floor(time / 60) const sec = time % 60
const min = time % 60 time = Math.floor(time / 60)
const min = time % 60
ms.toString() ms.toString()
lyricText += `[${pad(min)}:${pad(sec)}.${pad(ms)}] ${line.value}\n` lyricText += `[${pad(min)}:${pad(sec)}.${pad(ms)}] ${line.value}\n`
}
} }
} }
} catch {
lyricText = ''
} }
} }
@ -206,6 +211,45 @@ const reduceMode = (state, { data: { mode } }) => {
} }
} }
const reduceUpdateLyric = (state, { data: { trackId, lyric } }) => {
if (!trackId) {
return state
}
let changed = false
const queue = state.queue.map((item) => {
if (item.trackId !== trackId) {
return item
}
if (item.lyric === lyric) {
return item
}
changed = true
return {
...item,
lyric,
}
})
if (!changed) {
return state
}
const current =
state.current?.trackId === trackId
? {
...state.current,
lyric,
}
: state.current
return {
...state,
queue,
current,
}
}
export const playerReducer = (previousState = initialState, payload) => { export const playerReducer = (previousState = initialState, payload) => {
const { type } = payload const { type } = payload
switch (type) { switch (type) {
@ -243,6 +287,8 @@ export const playerReducer = (previousState = initialState, payload) => {
previousState.savedPlayIndex >= 0 ? previousState.savedPlayIndex : 0, previousState.savedPlayIndex >= 0 ? previousState.savedPlayIndex : 0,
} }
} }
case PLAYER_UPDATE_LYRIC:
return reduceUpdateLyric(previousState, payload)
default: default:
return previousState return previousState
} }

View File

@ -1,11 +1,24 @@
import { describe, it, expect } from 'vitest' import { describe, expect, it, vi } from 'vitest'
import { playerReducer } from './playerReducer' import { playerReducer } from './playerReducer'
import { import {
PLAYER_SYNC_QUEUE,
PLAYER_CURRENT, PLAYER_CURRENT,
PLAYER_REFRESH_QUEUE, PLAYER_REFRESH_QUEUE,
PLAYER_SET_TRACK,
PLAYER_SYNC_QUEUE,
PLAYER_UPDATE_LYRIC,
} from '../actions' } from '../actions'
vi.mock('uuid', () => ({
v4: () => 'test-uuid',
}))
vi.mock('../subsonic', () => ({
default: {
streamUrl: vi.fn((id) => `/rest/stream?id=${id}`),
getCoverArtUrl: vi.fn(() => '/rest/getCoverArt?id=test'),
},
}))
describe('playerReducer', () => { describe('playerReducer', () => {
describe('pending track selection survives SYNC_QUEUE and premature CURRENT', () => { describe('pending track selection survives SYNC_QUEUE and premature CURRENT', () => {
// Simulates the real sequence when clicking a new song while one is playing: // Simulates the real sequence when clicking a new song while one is playing:
@ -54,8 +67,6 @@ describe('playerReducer', () => {
}) })
it('CURRENT for old track preserves pending playIndex', () => { it('CURRENT for old track preserves pending playIndex', () => {
// After SYNC_QUEUE, queue has new UUIDs. The old track's UUID (zzz)
// is at index 2, but playIndex is 0. This is a premature callback.
const stateAfterSync = { const stateAfterSync = {
...stateAfterPlayTracks, ...stateAfterPlayTracks,
queue: [ queue: [
@ -71,7 +82,7 @@ describe('playerReducer', () => {
const result = playerReducer(stateAfterSync, action) const result = playerReducer(stateAfterSync, action)
expect(result.playIndex).toBe(0) expect(result.playIndex).toBe(0)
expect(result.clear).toBe(true) expect(result.clear).toBe(true)
expect(result.savedPlayIndex).toBe(2) // preserved from before expect(result.savedPlayIndex).toBe(2)
}) })
it('CURRENT for correct track consumes pending playIndex', () => { it('CURRENT for correct track consumes pending playIndex', () => {
@ -83,7 +94,6 @@ describe('playerReducer', () => {
{ trackId: 's3', uuid: 'zzz', name: 'Song 3' }, { trackId: 's3', uuid: 'zzz', name: 'Song 3' },
], ],
} }
// Player switched to Song 1 (uuid 'xxx', index 0 == playIndex)
const action = { const action = {
type: PLAYER_CURRENT, type: PLAYER_CURRENT,
data: { uuid: 'xxx', name: 'Song 1', volume: 1 }, data: { uuid: 'xxx', name: 'Song 1', volume: 1 },
@ -142,4 +152,80 @@ describe('playerReducer', () => {
expect(result.playIndex).toBe(0) expect(result.playIndex).toBe(0)
}) })
}) })
it('maps embedded synced lyrics to LRC text', () => {
const lyrics = JSON.stringify([
{
lang: 'eng',
synced: true,
line: [{ start: 1000, value: 'Line one' }],
},
{
lang: 'eng',
synced: false,
line: [{ value: 'Unsynced line' }],
},
])
const state = playerReducer(undefined, {
type: PLAYER_SET_TRACK,
data: {
id: 'song-1',
title: 'Test Song',
artist: 'Test Artist',
album: 'Test Album',
duration: 60,
lyrics,
},
})
expect(state.queue).toHaveLength(1)
expect(state.queue[0].lyric).toBe('[00:01.00] Line one\n')
})
it('updates queue lyric by track id', () => {
const initial = playerReducer(undefined, {
type: PLAYER_SET_TRACK,
data: {
id: 'song-1',
title: 'Test Song',
artist: 'Test Artist',
album: 'Test Album',
duration: 60,
},
})
const updated = playerReducer(initial, {
type: PLAYER_UPDATE_LYRIC,
data: {
trackId: 'song-1',
lyric: '[00:01.00] Updated lyric\n',
},
})
expect(updated.queue[0].lyric).toBe('[00:01.00] Updated lyric\n')
})
it('returns same state when lyric update does not match any track', () => {
const initial = playerReducer(undefined, {
type: PLAYER_SET_TRACK,
data: {
id: 'song-1',
title: 'Test Song',
artist: 'Test Artist',
album: 'Test Album',
duration: 60,
},
})
const updated = playerReducer(initial, {
type: PLAYER_UPDATE_LYRIC,
data: {
trackId: 'missing-track',
lyric: '[00:01.00] Updated lyric\n',
},
})
expect(updated).toBe(initial)
})
}) })

View File

@ -120,6 +120,10 @@ const getTopSongs = (artist, count = 50) => {
return httpClient(url('getTopSongs', null, { artist, count })) return httpClient(url('getTopSongs', null, { artist, count }))
} }
const getLyricsBySongId = (id) => {
return httpClient(url('getLyricsBySongId', id))
}
const streamUrl = (id, options) => { const streamUrl = (id, options) => {
return baseUrl( return baseUrl(
url('stream', id, { url('stream', id, {
@ -149,4 +153,5 @@ export default {
getArtistInfo, getArtistInfo,
getTopSongs, getTopSongs,
getSimilarSongs2, getSimilarSongs2,
getLyricsBySongId,
} }

View File

@ -1,7 +1,12 @@
import { vi } from 'vitest' import { vi } from 'vitest'
import { COVER_ART_SIZE } from '../consts' import { COVER_ART_SIZE } from '../consts'
import { httpClient } from '../dataProvider'
import subsonic from './index' import subsonic from './index'
vi.mock('../dataProvider', () => ({
httpClient: vi.fn(() => Promise.resolve({})),
}))
describe('getCoverArtUrl', () => { describe('getCoverArtUrl', () => {
beforeEach(() => { beforeEach(() => {
// Mock window.location // Mock window.location
@ -178,3 +183,29 @@ describe('getAvatarUrl', () => {
expect(url).toContain('username=john') expect(url).toContain('username=john')
}) })
}) })
describe('getLyricsBySongId', () => {
beforeEach(() => {
vi.clearAllMocks()
const localStorageMock = {
getItem: vi.fn((key) => {
const values = {
username: 'testuser',
'subsonic-token': 'testtoken',
'subsonic-salt': 'testsalt',
}
return values[key] || null
}),
}
Object.defineProperty(window, 'localStorage', { value: localStorageMock })
})
it('calls the getLyricsBySongId endpoint', async () => {
await subsonic.getLyricsBySongId('song-1')
expect(httpClient).toHaveBeenCalledTimes(1)
const calledUrl = httpClient.mock.calls[0][0]
expect(calledUrl).toContain('/rest/getLyricsBySongId?')
expect(calledUrl).toContain('id=song-1')
})
})