Merge 656a673eed0c6b251fdae0554a223cf0e5f4f221 into 2b9f32699348d520fc96acbd74be24b12702b02a

This commit is contained in:
Yuuta 2026-05-01 02:25:15 +03:00 committed by GitHub
commit 80bf5d94e9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
41 changed files with 7806 additions and 99 deletions

View File

@ -52,6 +52,7 @@ A share of the revenue helps fund the development of Navidrome at no additional
- **Multi-platform**, runs on macOS, Linux and Windows. **Docker** images are also provided - **Multi-platform**, runs on macOS, Linux and Windows. **Docker** images are also provided
- Ready to use binaries for all major platforms, including **Raspberry Pi** - Ready to use binaries for all major platforms, including **Raspberry Pi**
- Automatically **monitors your library** for changes, importing new files and reloading new metadata - Automatically **monitors your library** for changes, importing new files and reloading new metadata
- Supports lyrics from sidecar **.ttml**, **.elrc**, **.lrc**, **.srt**, **.txt** files and embedded tags (via `lyricspriority`)
- **Themeable**, modern and responsive **Web interface** based on [Material UI](https://material-ui.com) - **Themeable**, modern and responsive **Web interface** based on [Material UI](https://material-ui.com)
- **Compatible** with all Subsonic/Madsonic/Airsonic [clients](https://www.navidrome.org/docs/overview/#apps) - **Compatible** with all Subsonic/Madsonic/Airsonic [clients](https://www.navidrome.org/docs/overview/#apps)
- **Transcoding** on the fly. Can be set per user/player. **Opus encoding is supported** - **Transcoding** on the fly. Can be set per user/player. **Opus encoding is supported**

View File

@ -763,7 +763,7 @@ func setViperDefaults() {
viper.SetDefault("artistartpriority", "artist.*, album/artist.*, external") viper.SetDefault("artistartpriority", "artist.*, album/artist.*, external")
viper.SetDefault("artistimagefolder", "") viper.SetDefault("artistimagefolder", "")
viper.SetDefault("discartpriority", "disc*.*, cd*.*, cover.*, folder.*, front.*, discsubtitle, embedded") viper.SetDefault("discartpriority", "disc*.*, cd*.*, cover.*, folder.*, front.*, discsubtitle, embedded")
viper.SetDefault("lyricspriority", ".lrc,.txt,embedded") viper.SetDefault("lyricspriority", ".ttml,.elrc,.lrc,.srt,.txt,embedded")
viper.SetDefault("enablegravatar", false) viper.SetDefault("enablegravatar", false)
viper.SetDefault("enablefavourites", true) viper.SetDefault("enablefavourites", true)
viper.SetDefault("enablestarrating", true) viper.SetDefault("enablestarrating", true)

View File

@ -14,6 +14,12 @@ type Lyrics interface {
GetLyrics(ctx context.Context, mf *model.MediaFile) (model.LyricList, error) GetLyrics(ctx context.Context, mf *model.MediaFile) (model.LyricList, error)
} }
// BatchLyrics can resolve lyrics across multiple candidate media files while
// still honoring the configured source priority globally.
type BatchLyrics interface {
GetLyricsForMediaFiles(ctx context.Context, mediaFiles []model.MediaFile) (model.LyricList, error)
}
// PluginLoader discovers and loads lyrics provider plugins. // PluginLoader discovers and loads lyrics provider plugins.
type PluginLoader interface { type PluginLoader interface {
LoadLyricsProvider(name string) (Lyrics, bool) LoadLyricsProvider(name string) (Lyrics, bool)
@ -32,28 +38,53 @@ func NewLyrics(pluginLoader PluginLoader) Lyrics {
// GetLyrics returns lyrics for the given media file, trying sources in the // GetLyrics returns lyrics for the given media file, trying sources in the
// order specified by conf.Server.LyricsPriority. // order specified by conf.Server.LyricsPriority.
func (l *lyricsService) GetLyrics(ctx context.Context, mf *model.MediaFile) (model.LyricList, error) { func (l *lyricsService) GetLyrics(ctx context.Context, mf *model.MediaFile) (model.LyricList, error) {
var lyricsList model.LyricList return l.getLyricsForCandidates(ctx, []*model.MediaFile{mf})
var err error }
// GetLyricsForMediaFiles resolves lyrics across duplicate media files while
// preserving the configured source priority across the full candidate set.
func (l *lyricsService) GetLyricsForMediaFiles(ctx context.Context, mediaFiles []model.MediaFile) (model.LyricList, error) {
candidates := make([]*model.MediaFile, 0, len(mediaFiles))
for i := range mediaFiles {
candidates = append(candidates, &mediaFiles[i])
}
return l.getLyricsForCandidates(ctx, candidates)
}
func (l *lyricsService) getLyricsForCandidates(ctx context.Context, mediaFiles []*model.MediaFile) (model.LyricList, error) {
for pattern := range strings.SplitSeq(conf.Server.LyricsPriority, ",") { for pattern := range strings.SplitSeq(conf.Server.LyricsPriority, ",") {
pattern = strings.TrimSpace(pattern) pattern = strings.TrimSpace(pattern)
switch { if pattern == "" {
case strings.EqualFold(pattern, "embedded"): continue
lyricsList, err = fromEmbedded(ctx, mf)
case strings.HasPrefix(pattern, "."):
lyricsList, err = fromExternalFile(ctx, mf, strings.ToLower(pattern))
default:
lyricsList, err = l.fromPlugin(ctx, mf, pattern)
} }
for _, mf := range mediaFiles {
if mf == nil {
continue
}
lyricsList, err := l.getLyricsFromSource(ctx, mf, pattern)
if err != nil { if err != nil {
log.Error(ctx, "error getting lyrics", "source", pattern, err) log.Error(ctx, "error getting lyrics", "source", pattern, err)
continue
} }
if len(lyricsList) > 0 { if len(lyricsList) > 0 {
return lyricsList, nil return lyricsList, nil
} }
} }
}
return nil, nil return nil, nil
} }
func (l *lyricsService) getLyricsFromSource(ctx context.Context, mf *model.MediaFile, pattern string) (model.LyricList, error) {
switch {
case strings.EqualFold(pattern, "embedded"):
return fromEmbedded(ctx, mf)
case strings.HasPrefix(pattern, "."):
return fromExternalFile(ctx, mf, strings.ToLower(pattern))
default:
return l.fromPlugin(ctx, mf, pattern)
}
}

View File

@ -45,6 +45,71 @@ var _ = Describe("sources", func() {
}, },
} }
elrcLyrics := model.LyricList{
model.Lyrics{
DisplayArtist: "ELRC Artist",
DisplayTitle: "ELRC Song",
Lang: "eng",
Line: []model.Line{
{
Start: gg.P(int64(1000)),
End: gg.P(int64(3000)),
Value: "Lead words",
Cue: []model.Cue{
{
Start: gg.P(int64(1000)),
End: gg.P(int64(1500)),
Value: "Lead ",
ByteStart: 0,
ByteEnd: 4,
},
{
Start: gg.P(int64(1500)),
End: gg.P(int64(3000)),
Value: "words",
ByteStart: 5,
ByteEnd: 9,
},
},
},
{
Start: gg.P(int64(3000)),
Value: "Fallback line",
},
},
Synced: true,
},
}
ttmlLyrics := model.LyricList{
model.Lyrics{
Kind: "main",
Lang: "eng",
Line: []model.Line{
{
Start: gg.P(int64(18800)),
Value: "We're no strangers to love",
},
{
Start: gg.P(int64(22800)),
Value: "You know the rules and so do I",
},
},
Synced: true,
},
model.Lyrics{
Kind: "main",
Lang: "por",
Line: []model.Line{
{
Start: gg.P(int64(18800)),
Value: "Nao somos estranhos ao amor",
},
},
Synced: true,
},
}
unsyncedLyrics := model.LyricList{ unsyncedLyrics := model.LyricList{
model.Lyrics{ model.Lyrics{
Lang: "xxx", Lang: "xxx",
@ -60,6 +125,25 @@ var _ = Describe("sources", func() {
}, },
} }
srtLyrics := model.LyricList{
model.Lyrics{
Lang: "xxx",
Line: []model.Line{
{
Start: gg.P(int64(18800)),
End: gg.P(int64(22800)),
Value: "We're from subtitles",
},
{
Start: gg.P(int64(22801)),
End: gg.P(int64(26000)),
Value: "Another subtitle line",
},
},
Synced: true,
},
}
BeforeEach(func() { BeforeEach(func() {
DeferCleanup(configtest.SetupConfig()) DeferCleanup(configtest.SetupConfig())
@ -81,7 +165,33 @@ var _ = Describe("sources", func() {
}, },
Entry("embedded > lrc > txt", "embedded,.lrc,.txt", embeddedLyrics), Entry("embedded > lrc > txt", "embedded,.lrc,.txt", embeddedLyrics),
Entry("lrc > embedded > txt", ".lrc,embedded,.txt", syncedLyrics), Entry("lrc > embedded > txt", ".lrc,embedded,.txt", syncedLyrics),
Entry("txt > lrc > embedded", ".txt,.lrc,embedded", unsyncedLyrics)) Entry("elrc > lrc > embedded", ".elrc,.lrc,embedded", elrcLyrics),
Entry("srt > txt > embedded", ".srt,.txt,embedded", srtLyrics),
Entry("txt > lrc > embedded", ".txt,.lrc,embedded", unsyncedLyrics),
Entry("ttml > elrc > lrc > srt > embedded", ".ttml,.elrc,.lrc,.srt,embedded", ttmlLyrics))
It("resolves source priority across duplicate media files", func() {
conf.Server.LyricsPriority = ".ttml,embedded"
embeddedJSON, err := json.Marshal(embeddedLyrics)
Expect(err).To(BeNil())
svc := lyrics.NewLyrics(nil)
batchSvc, ok := svc.(lyrics.BatchLyrics)
Expect(ok).To(BeTrue())
list, err := batchSvc.GetLyricsForMediaFiles(ctx, []model.MediaFile{
{
Lyrics: string(embeddedJSON),
Path: "tests/fixtures/01 Invisible (RED) Edit Version.mp3",
},
{
Lyrics: "[]",
Path: "tests/fixtures/test.mp3",
},
})
Expect(err).To(BeNil())
Expect(list).To(Equal(ttmlLyrics))
})
Context("Errors", func() { Context("Errors", func() {
var RegularUserContext = XContext var RegularUserContext = XContext

View File

@ -5,6 +5,7 @@ import (
"errors" "errors"
"os" "os"
"path" "path"
"strings"
"github.com/navidrome/navidrome/log" "github.com/navidrome/navidrome/log"
"github.com/navidrome/navidrome/model" "github.com/navidrome/navidrome/model"
@ -36,18 +37,38 @@ func fromExternalFile(ctx context.Context, mf *model.MediaFile, suffix string) (
return nil, err return nil, err
} }
var list model.LyricList
switch {
case strings.EqualFold(suffix, ".ttml"):
list, err = parseTTML(contents)
if err != nil {
log.Error(ctx, "error parsing ttml external file", "path", externalLyric, err)
return nil, err
}
case strings.EqualFold(suffix, ".srt"):
list, err = parseSRT(contents)
if err != nil {
log.Error(ctx, "error parsing srt external file", "path", externalLyric, err)
return nil, err
}
default:
lyrics, err := model.ToLyrics("xxx", string(contents)) lyrics, err := model.ToLyrics("xxx", string(contents))
if err != nil { if err != nil {
log.Error(ctx, "error parsing lyric external file", "path", externalLyric, err) log.Error(ctx, "error parsing lyric external file", "path", externalLyric, err)
return nil, err return nil, err
} else if lyrics == nil { }
if lyrics != nil {
list = model.LyricList{*lyrics}
}
}
if len(list) == 0 {
log.Trace(ctx, "empty lyrics from external file", "path", externalLyric) log.Trace(ctx, "empty lyrics from external file", "path", externalLyric)
return nil, nil return nil, nil
} }
log.Trace(ctx, "retrieved lyrics from external file", "path", externalLyric) log.Trace(ctx, "retrieved lyrics from external file", "path", externalLyric)
return list, nil
return model.LyricList{*lyrics}, nil
} }
// fromPlugin attempts to load lyrics from a plugin with the given name. // fromPlugin attempts to load lyrics from a plugin with the given name.

View File

@ -88,6 +88,89 @@ var _ = Describe("sources", func() {
})) }))
}) })
It("should return Enhanced LRC lyrics with word-level cues from a file", func() {
mf := model.MediaFile{Path: "tests/fixtures/test-enhanced.mp3"}
lyrics, err := fromExternalFile(ctx, &mf, ".lrc")
Expect(err).To(BeNil())
Expect(lyrics).To(HaveLen(1))
Expect(lyrics[0].DisplayArtist).To(Equal("Test Artist"))
Expect(lyrics[0].DisplayTitle).To(Equal("Enhanced Test"))
Expect(lyrics[0].Lang).To(Equal("eng"))
Expect(lyrics[0].Synced).To(BeTrue())
Expect(lyrics[0].Line).To(HaveLen(3))
// Line 1: has inline markers → Cue array populated
Expect(lyrics[0].Line[0].Start).To(Equal(gg.P(int64(1000))))
Expect(lyrics[0].Line[0].End).To(Equal(gg.P(int64(3000))))
Expect(lyrics[0].Line[0].Value).To(Equal("Some lyrics here"))
Expect(lyrics[0].Line[0].Cue).To(HaveLen(3))
Expect(*lyrics[0].Line[0].Cue[0].Start).To(Equal(int64(1000)))
Expect(lyrics[0].Line[0].Cue[0].Value).To(Equal("Some "))
Expect(lyrics[0].Line[0].Cue[0].End).To(Equal(gg.P(int64(1500))))
Expect(lyrics[0].Line[0].Cue[0].ByteStart).To(Equal(0))
Expect(lyrics[0].Line[0].Cue[0].ByteEnd).To(Equal(4))
Expect(*lyrics[0].Line[0].Cue[1].Start).To(Equal(int64(1500)))
Expect(lyrics[0].Line[0].Cue[1].Value).To(Equal("lyrics "))
Expect(lyrics[0].Line[0].Cue[1].End).To(Equal(gg.P(int64(2000))))
Expect(lyrics[0].Line[0].Cue[1].ByteStart).To(Equal(5))
Expect(lyrics[0].Line[0].Cue[1].ByteEnd).To(Equal(11))
Expect(*lyrics[0].Line[0].Cue[2].Start).To(Equal(int64(2000)))
Expect(lyrics[0].Line[0].Cue[2].Value).To(Equal("here"))
Expect(lyrics[0].Line[0].Cue[2].End).To(Equal(gg.P(int64(3000))))
Expect(lyrics[0].Line[0].Cue[2].ByteStart).To(Equal(12))
Expect(lyrics[0].Line[0].Cue[2].ByteEnd).To(Equal(15))
// Line 2: has inline markers
Expect(lyrics[0].Line[1].Start).To(Equal(gg.P(int64(3000))))
Expect(lyrics[0].Line[1].End).To(Equal(gg.P(int64(5000))))
Expect(lyrics[0].Line[1].Value).To(Equal("More words"))
Expect(lyrics[0].Line[1].Cue).To(HaveLen(2))
Expect(lyrics[0].Line[1].Cue[0].End).To(Equal(gg.P(int64(3500))))
Expect(lyrics[0].Line[1].Cue[1].End).To(Equal(gg.P(int64(5000))))
Expect(lyrics[0].Line[1].Cue[0].ByteStart).To(Equal(0))
Expect(lyrics[0].Line[1].Cue[0].ByteEnd).To(Equal(4))
Expect(lyrics[0].Line[1].Cue[1].ByteStart).To(Equal(5))
Expect(lyrics[0].Line[1].Cue[1].ByteEnd).To(Equal(9))
// Line 3: plain line, no cues
Expect(lyrics[0].Line[2].Start).To(Equal(gg.P(int64(5000))))
Expect(lyrics[0].Line[2].Value).To(Equal("Plain line without inline markers"))
Expect(lyrics[0].Line[2].Cue).To(BeNil())
})
It("should return Enhanced LRC lyrics from an ELRC file", func() {
mf := model.MediaFile{Path: "tests/fixtures/test.mp3"}
lyrics, err := fromExternalFile(ctx, &mf, ".elrc")
Expect(err).To(BeNil())
Expect(lyrics).To(HaveLen(1))
Expect(lyrics[0].DisplayArtist).To(Equal("ELRC Artist"))
Expect(lyrics[0].DisplayTitle).To(Equal("ELRC Song"))
Expect(lyrics[0].Lang).To(Equal("eng"))
Expect(lyrics[0].Synced).To(BeTrue())
Expect(lyrics[0].Line).To(HaveLen(2))
Expect(lyrics[0].Line[0].Start).To(Equal(gg.P(int64(1000))))
Expect(lyrics[0].Line[0].End).To(Equal(gg.P(int64(3000))))
Expect(lyrics[0].Line[0].Value).To(Equal("Lead words"))
Expect(lyrics[0].Line[0].Cue).To(HaveLen(2))
Expect(*lyrics[0].Line[0].Cue[0].Start).To(Equal(int64(1000)))
Expect(lyrics[0].Line[0].Cue[0].Value).To(Equal("Lead "))
Expect(lyrics[0].Line[0].Cue[0].End).To(Equal(gg.P(int64(1500))))
Expect(lyrics[0].Line[0].Cue[0].ByteStart).To(Equal(0))
Expect(lyrics[0].Line[0].Cue[0].ByteEnd).To(Equal(4))
Expect(*lyrics[0].Line[0].Cue[1].Start).To(Equal(int64(1500)))
Expect(lyrics[0].Line[0].Cue[1].Value).To(Equal("words"))
Expect(lyrics[0].Line[0].Cue[1].End).To(Equal(gg.P(int64(3000))))
Expect(lyrics[0].Line[0].Cue[1].ByteStart).To(Equal(5))
Expect(lyrics[0].Line[0].Cue[1].ByteEnd).To(Equal(9))
Expect(lyrics[0].Line[1].Start).To(Equal(gg.P(int64(3000))))
Expect(lyrics[0].Line[1].Value).To(Equal("Fallback line"))
Expect(lyrics[0].Line[1].Cue).To(BeNil())
})
It("should return unsynchronized lyrics from a file", func() { It("should return unsynchronized lyrics from a file", func() {
mf := model.MediaFile{Path: "tests/fixtures/test.mp3"} mf := model.MediaFile{Path: "tests/fixtures/test.mp3"}
lyrics, err := fromExternalFile(ctx, &mf, ".txt") lyrics, err := fromExternalFile(ctx, &mf, ".txt")
@ -109,6 +192,66 @@ var _ = Describe("sources", func() {
})) }))
}) })
It("should return synchronized lyrics from an SRT file", func() {
mf := model.MediaFile{Path: "tests/fixtures/test.mp3"}
lyrics, err := fromExternalFile(ctx, &mf, ".srt")
Expect(err).To(BeNil())
Expect(lyrics).To(Equal(model.LyricList{
model.Lyrics{
Lang: "xxx",
Line: []model.Line{
{
Start: gg.P(int64(18800)),
End: gg.P(int64(22800)),
Value: "We're from subtitles",
},
{
Start: gg.P(int64(22801)),
End: gg.P(int64(26000)),
Value: "Another subtitle line",
},
},
Synced: true,
},
}))
})
It("should return synchronized multilingual lyrics from a TTML file", func() {
mf := model.MediaFile{Path: "tests/fixtures/test.mp3"}
lyrics, err := fromExternalFile(ctx, &mf, ".ttml")
Expect(err).To(BeNil())
Expect(lyrics).To(Equal(model.LyricList{
{
Kind: "main",
Lang: "eng",
Line: []model.Line{
{
Start: gg.P(int64(18800)),
Value: "We're no strangers to love",
},
{
Start: gg.P(int64(22800)),
Value: "You know the rules and so do I",
},
},
Synced: true,
},
{
Kind: "main",
Lang: "por",
Line: []model.Line{
{
Start: gg.P(int64(18800)),
Value: "Nao somos estranhos ao amor",
},
},
Synced: true,
},
}))
})
It("should handle LRC files with UTF-8 BOM marker (issue #4631)", func() { It("should handle LRC files with UTF-8 BOM marker (issue #4631)", func() {
// The function looks for <basePath-without-ext><suffix>, so we need to pass // The function looks for <basePath-without-ext><suffix>, so we need to pass
// a MediaFile with .mp3 path and look for .lrc suffix // a MediaFile with .mp3 path and look for .lrc suffix
@ -142,5 +285,33 @@ var _ = Describe("sources", func() {
Expect(lyrics[0].Line[1].Start).To(Equal(gg.P(int64(22801)))) Expect(lyrics[0].Line[1].Start).To(Equal(gg.P(int64(22801))))
Expect(lyrics[0].Line[1].Value).To(Equal("You know the rules and so do I")) Expect(lyrics[0].Line[1].Value).To(Equal("You know the rules and so do I"))
}) })
It("should handle TTML files with UTF-8 BOM marker", func() {
mf := model.MediaFile{Path: "tests/fixtures/bom-test.mp3"}
lyrics, err := fromExternalFile(ctx, &mf, ".ttml")
Expect(err).To(BeNil())
Expect(lyrics).To(HaveLen(1))
Expect(lyrics[0].Kind).To(Equal("main"))
Expect(lyrics[0].Synced).To(BeTrue())
Expect(lyrics[0].Line).To(HaveLen(1))
Expect(lyrics[0].Line[0].Start).To(Equal(gg.P(int64(0))))
Expect(lyrics[0].Line[0].Value).To(Equal("BOM test line"))
})
It("should handle UTF-16 BE encoded TTML files", func() {
mf := model.MediaFile{Path: "tests/fixtures/bom-utf16-test.mp3"}
lyrics, err := fromExternalFile(ctx, &mf, ".ttml")
Expect(err).To(BeNil())
Expect(lyrics).To(HaveLen(1))
Expect(lyrics[0].Kind).To(Equal("main"))
Expect(lyrics[0].Synced).To(BeTrue())
Expect(lyrics[0].Line).To(HaveLen(2))
Expect(lyrics[0].Line[0].Start).To(Equal(gg.P(int64(18800))))
Expect(lyrics[0].Line[0].Value).To(Equal("UTF16 line one"))
Expect(lyrics[0].Line[1].Start).To(Equal(gg.P(int64(22801))))
Expect(lyrics[0].Line[1].Value).To(Equal("UTF16 line two"))
})
}) })
}) })

161
core/lyrics/srt.go Normal file
View File

@ -0,0 +1,161 @@
package lyrics
import (
"bytes"
"regexp"
"strconv"
"strings"
"github.com/navidrome/navidrome/model"
"github.com/navidrome/navidrome/utils/str"
)
var srtTimeRegex = regexp.MustCompile(`^\s*(\d{1,2}):(\d{2}):(\d{2})[,.](\d{1,3})\s*$`)
func parseSRT(contents []byte) (model.LyricList, error) {
raw := strings.ReplaceAll(string(contents), "\r\n", "\n")
raw = strings.ReplaceAll(raw, "\r", "\n")
blocks := splitSRTBlocks(raw)
lines := make([]model.Line, 0, len(blocks))
for _, block := range blocks {
line, ok, err := parseSRTBlock(block)
if err != nil {
return nil, err
}
if ok {
lines = append(lines, line)
}
}
if len(lines) == 0 {
return nil, nil
}
lyrics := model.NormalizeLyrics(model.Lyrics{
Lang: "xxx",
Line: lines,
Synced: true,
})
return model.LyricList{lyrics}, nil
}
func splitSRTBlocks(raw string) []string {
raw = strings.TrimSpace(raw)
if raw == "" {
return nil
}
parts := strings.Split(raw, "\n\n")
blocks := make([]string, 0, len(parts))
for _, part := range parts {
part = strings.TrimSpace(part)
if part != "" {
blocks = append(blocks, part)
}
}
return blocks
}
func parseSRTBlock(block string) (model.Line, bool, error) {
scanner := bytes.Split([]byte(block), []byte("\n"))
if len(scanner) == 0 {
return model.Line{}, false, nil
}
lines := make([]string, 0, len(scanner))
for _, line := range scanner {
lines = append(lines, strings.TrimSpace(string(line)))
}
if len(lines) == 0 {
return model.Line{}, false, nil
}
startIdx := 0
if digitsOnly(lines[0]) {
startIdx = 1
}
if startIdx >= len(lines) {
return model.Line{}, false, nil
}
timing := strings.Split(lines[startIdx], "-->")
if len(timing) != 2 {
return model.Line{}, false, nil
}
startMs, err := parseSRTTime(timing[0])
if err != nil {
return model.Line{}, false, err
}
endMs, err := parseSRTTime(timing[1])
if err != nil {
return model.Line{}, false, err
}
textLines := make([]string, 0, len(lines)-startIdx-1)
for _, line := range lines[startIdx+1:] {
if line == "" {
continue
}
textLines = append(textLines, line)
}
value := str.SanitizeText(strings.Join(textLines, "\n"))
if value == "" {
return model.Line{}, false, nil
}
return model.Line{
Start: &startMs,
End: &endMs,
Value: value,
}, true, nil
}
func parseSRTTime(value string) (int64, error) {
match := srtTimeRegex.FindStringSubmatch(strings.TrimSpace(value))
if match == nil {
return 0, strconv.ErrSyntax
}
hours, err := strconv.ParseInt(match[1], 10, 64)
if err != nil {
return 0, err
}
minutes, err := strconv.ParseInt(match[2], 10, 64)
if err != nil {
return 0, err
}
seconds, err := strconv.ParseInt(match[3], 10, 64)
if err != nil {
return 0, err
}
millis, err := strconv.ParseInt(match[4], 10, 64)
if err != nil {
return 0, err
}
switch len(match[4]) {
case 1:
millis *= 100
case 2:
millis *= 10
}
return (((hours*60)+minutes)*60+seconds)*1000 + millis, nil
}
func digitsOnly(value string) bool {
if value == "" {
return false
}
for _, ch := range value {
if ch < '0' || ch > '9' {
return false
}
}
return true
}

1264
core/lyrics/ttml.go Normal file

File diff suppressed because it is too large Load Diff

407
core/lyrics/ttml_test.go Normal file
View File

@ -0,0 +1,407 @@
package lyrics
import (
"github.com/navidrome/navidrome/model"
"github.com/navidrome/navidrome/utils/gg"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
var _ = Describe("parseTTML", func() {
Describe("Multi-language and timing", func() {
It("should parse multiple language divs with inherited offsets and frame/tick timing", func() {
content := []byte(`<?xml version="1.0" encoding="UTF-8"?>
<tt xmlns="http://www.w3.org/ns/ttml" xmlns:ttp="http://www.w3.org/ns/ttml#parameter" ttp:frameRate="30" ttp:subFrameRate="2" ttp:tickRate="10">
<body>
<div xml:lang="eng" begin="1s">
<p begin="2s">Line one</p>
<p begin="00:00:04:15.1"><span>Line two</span><br/>with break</p>
</div>
<div xml:lang="por">
<p begin="45t">Linha</p>
</div>
</body>
</tt>`)
list, err := parseTTML(content)
Expect(err).ToNot(HaveOccurred())
Expect(list).To(HaveLen(2))
By("parsing the English track")
eng := list[0]
Expect(eng.Lang).To(Equal("eng"))
Expect(eng.Synced).To(BeTrue())
Expect(eng.Line[0].Start).To(Equal(gg.P(int64(3000))))
Expect(eng.Line[0].Value).To(Equal("Line one"))
Expect(eng.Line[1].Start).To(Equal(gg.P(int64(4517))))
Expect(eng.Line[1].Value).To(Equal("Line two\nwith break"))
By("parsing the Portuguese track")
por := list[1]
Expect(por.Lang).To(Equal("por"))
Expect(por.Line[0].Start).To(Equal(gg.P(int64(4500))))
Expect(por.Line[0].Value).To(Equal("Linha"))
})
})
Describe("Unsupported cue handling", func() {
It("should skip wallclock cues and keep valid ones", func() {
content := []byte(`<?xml version="1.0" encoding="UTF-8"?>
<tt xmlns="http://www.w3.org/ns/ttml">
<body xml:lang="eng">
<div>
<p begin="wallclock(2026-01-01T00:00:00Z)">Skip me</p>
<p begin="1s">Keep me</p>
</div>
</body>
</tt>`)
list, err := parseTTML(content)
Expect(err).ToNot(HaveOccurred())
Expect(list).To(HaveLen(1))
Expect(list[0].Line).To(HaveLen(1))
Expect(list[0].Line[0].Start).To(Equal(gg.P(int64(1000))))
Expect(list[0].Line[0].Value).To(Equal("Keep me"))
})
})
Describe("Begin/End/Dur with inheritance", func() {
It("should correctly accumulate nested timing from body, div, and p elements", func() {
content := []byte(`<?xml version="1.0" encoding="UTF-8"?>
<tt xmlns="http://www.w3.org/ns/ttml">
<body xml:lang="eng" begin="10s">
<div begin="5s" dur="8s">
<p begin="1s" dur="2s">First line</p>
<p begin="3s" end="5s">Second line</p>
</div>
</body>
</tt>`)
list, err := parseTTML(content)
Expect(err).ToNot(HaveOccurred())
Expect(list).To(HaveLen(1))
Expect(list[0].Lang).To(Equal("eng"))
Expect(list[0].Line).To(HaveLen(2))
Expect(list[0].Line[0].Start).To(Equal(gg.P(int64(16000))))
Expect(list[0].Line[0].Value).To(Equal("First line"))
Expect(list[0].Line[1].Start).To(Equal(gg.P(int64(18000))))
Expect(list[0].Line[1].Value).To(Equal("Second line"))
})
})
Describe("Non-standard bare second offsets", func() {
It("should parse bare decimal numbers as seconds", func() {
content := []byte(`<?xml version="1.0" encoding="UTF-8"?>
<tt xmlns="http://www.w3.org/ns/ttml">
<body xml:lang="eng" begin="10">
<div>
<p begin="0.170">First line</p>
<p begin="3.710">Second line</p>
</div>
</body>
</tt>`)
list, err := parseTTML(content)
Expect(err).ToNot(HaveOccurred())
Expect(list).To(HaveLen(1))
Expect(list[0].Line).To(HaveLen(2))
Expect(list[0].Line[0].Start).To(Equal(gg.P(int64(10170))))
Expect(list[0].Line[0].Value).To(Equal("First line"))
Expect(list[0].Line[1].Start).To(Equal(gg.P(int64(13710))))
Expect(list[0].Line[1].Value).To(Equal("Second line"))
})
})
Describe("Word timing tokens", func() {
It("should extract timed tokens from spans including background role", func() {
content := []byte(`<?xml version="1.0" encoding="UTF-8"?>
<tt xmlns="http://www.w3.org/ns/ttml" xmlns:ttm="http://www.w3.org/ns/ttml#metadata">
<body xml:lang="eng">
<div>
<p begin="00:01.000" end="00:03.000">
<span begin="00:01.000" end="00:01.400">He</span><span begin="00:01.400" end="00:01.800">llo</span>
<span ttm:role="x-bg"><span begin="00:02.000" end="00:02.500">echo</span></span>
</p>
</div>
</body>
</tt>`)
list, err := parseTTML(content)
Expect(err).ToNot(HaveOccurred())
Expect(list).To(HaveLen(1))
Expect(list[0].Agents).To(Equal([]model.Agent{
{ID: "main", Role: "main"},
{ID: "__nd_bg__|main", Role: "bg"},
}))
Expect(list[0].Line).To(HaveLen(1))
line := list[0].Line[0]
Expect(line.Start).To(Equal(gg.P(int64(1000))))
Expect(line.Value).To(Equal("Hello\necho"))
Expect(line.End).To(Equal(gg.P(int64(3000))))
Expect(line.Cue).To(HaveLen(3))
Expect(line.Cue[0]).To(Equal(model.Cue{Start: gg.P(int64(1000)), End: gg.P(int64(1400)), Value: "He", ByteStart: 0, ByteEnd: 1, AgentID: "main"}))
Expect(line.Cue[1]).To(Equal(model.Cue{Start: gg.P(int64(1400)), End: gg.P(int64(1800)), Value: "llo", ByteStart: 2, ByteEnd: 4, AgentID: "main"}))
Expect(line.Cue[2]).To(Equal(model.Cue{Start: gg.P(int64(2000)), End: gg.P(int64(2500)), Value: "echo", ByteStart: 6, ByteEnd: 9, AgentID: "__nd_bg__|main"}))
})
It("should parse named TTML agents into main, voice, and group roles", func() {
content := []byte(`<?xml version="1.0" encoding="UTF-8"?>
<tt xmlns="http://www.w3.org/ns/ttml" xmlns:ttm="http://www.w3.org/ns/ttml#metadata">
<head>
<metadata>
<ttm:agent xml:id="v1" type="person"><ttm:name>Chris Martin</ttm:name></ttm:agent>
<ttm:agent xml:id="v2" type="person"><ttm:name>Jin</ttm:name></ttm:agent>
<ttm:agent xml:id="v1000" type="group"><ttm:name>All</ttm:name></ttm:agent>
</metadata>
</head>
<body xml:lang="eng">
<div>
<p begin="1s" end="2s" ttm:agent="v1"><span begin="1s" end="1.5s">You</span></p>
<p begin="2s" end="3s" ttm:agent="v2"><span begin="2s" end="2.5s">and</span></p>
<p begin="3s" end="4s" ttm:agent="v1000"><span begin="3s" end="3.5s">All</span></p>
</div>
</body>
</tt>`)
list, err := parseTTML(content)
Expect(err).ToNot(HaveOccurred())
Expect(list).To(HaveLen(1))
Expect(list[0].Agents).To(Equal([]model.Agent{
{ID: "v1", Role: "main", Name: "Chris Martin"},
{ID: "v2", Role: "voice", Name: "Jin"},
{ID: "v1000", Role: "group", Name: "All"},
}))
Expect(list[0].Line[0].Cue[0].AgentID).To(Equal("v1"))
Expect(list[0].Line[1].Cue[0].AgentID).To(Equal("v2"))
Expect(list[0].Line[2].Cue[0].AgentID).To(Equal("v1000"))
})
It("should avoid collisions between derived background agents and explicit TTML agent ids", func() {
content := []byte(`<?xml version="1.0" encoding="UTF-8"?>
<tt xmlns="http://www.w3.org/ns/ttml" xmlns:ttm="http://www.w3.org/ns/ttml#metadata">
<head>
<metadata>
<ttm:agent xml:id="lead" type="person"><ttm:name>Lead</ttm:name></ttm:agent>
<ttm:agent xml:id="lead__bg" type="person"><ttm:name>Existing Background Id</ttm:name></ttm:agent>
</metadata>
</head>
<body xml:lang="eng">
<div>
<p begin="1s" end="2s" ttm:agent="lead">
<span begin="1s" end="1.4s">Lead</span>
<span ttm:role="x-bg"><span begin="1.5s" end="1.8s">Echo</span></span>
</p>
<p begin="2s" end="3s" ttm:agent="lead__bg">
<span begin="2s" end="2.5s">Named</span>
</p>
</div>
</body>
</tt>`)
list, err := parseTTML(content)
Expect(err).ToNot(HaveOccurred())
Expect(list).To(HaveLen(1))
Expect(list[0].Agents).To(Equal([]model.Agent{
{ID: "lead", Role: "main", Name: "Lead"},
{ID: "__nd_bg__|lead", Role: "bg", Name: "Lead"},
{ID: "lead__bg", Role: "voice", Name: "Existing Background Id"},
}))
Expect(list[0].Line).To(HaveLen(2))
Expect(list[0].Line[0].Cue).To(HaveLen(2))
Expect(list[0].Line[0].Cue[0].AgentID).To(Equal("lead"))
Expect(list[0].Line[0].Cue[1].AgentID).To(Equal("__nd_bg__|lead"))
Expect(list[0].Line[1].Cue).To(HaveLen(1))
Expect(list[0].Line[1].Cue[0].AgentID).To(Equal("lead__bg"))
})
It("should fill missing cue agent ids with the resolved main agent", func() {
content := []byte(`<?xml version="1.0" encoding="UTF-8"?>
<tt xmlns="http://www.w3.org/ns/ttml" xmlns:ttm="http://www.w3.org/ns/ttml#metadata">
<head>
<metadata>
<ttm:agent xml:id="guest" type="person"><ttm:name>Guest Vocal</ttm:name></ttm:agent>
</metadata>
</head>
<body xml:lang="eng">
<div>
<p begin="1s" end="3s">
<span begin="1s" end="1.4s">Lead</span>
<span begin="2s" end="2.4s" ttm:agent="guest">Guest</span>
</p>
</div>
</body>
</tt>`)
list, err := parseTTML(content)
Expect(err).ToNot(HaveOccurred())
Expect(list).To(HaveLen(1))
Expect(list[0].Agents).To(Equal([]model.Agent{
{ID: "guest", Role: "main", Name: "Guest Vocal"},
}))
Expect(list[0].Line).To(HaveLen(1))
Expect(list[0].Line[0].Cue).To(HaveLen(2))
Expect(list[0].Line[0].Cue[0].AgentID).To(Equal("guest"))
Expect(list[0].Line[0].Cue[1].AgentID).To(Equal("guest"))
})
})
Describe("Ambiguous decimal timing", func() {
It("should prefer absolute timing when values fall inside parent window", func() {
content := []byte(`<?xml version="1.0" encoding="UTF-8"?>
<tt xmlns="http://www.w3.org/ns/ttml">
<body xml:lang="eng">
<div begin="37.870" end="45.570">
<p begin="43.444" end="45.570">
<span begin="43.444" end="43.716">go</span>
<span begin="43.716" end="43.887">go</span>
</p>
</div>
</body>
</tt>`)
list, err := parseTTML(content)
Expect(err).ToNot(HaveOccurred())
Expect(list).To(HaveLen(1))
Expect(list[0].Line).To(HaveLen(1))
line := list[0].Line[0]
Expect(line.Start).To(Equal(gg.P(int64(43444))))
Expect(line.Value).To(Equal("go\ngo"))
Expect(line.End).To(Equal(gg.P(int64(45570))))
Expect(line.Cue).To(HaveLen(2))
Expect(line.Cue[0]).To(Equal(model.Cue{Start: gg.P(int64(43444)), End: gg.P(int64(43716)), Value: "go", ByteStart: 0, ByteEnd: 1}))
Expect(line.Cue[1]).To(Equal(model.Cue{Start: gg.P(int64(43716)), End: gg.P(int64(43887)), Value: "go", ByteStart: 3, ByteEnd: 4}))
})
})
Describe("Unsynced fallback", func() {
It("should return unsynced lyrics when no timing is present", func() {
content := []byte(`<?xml version="1.0" encoding="UTF-8"?>
<tt xmlns="http://www.w3.org/ns/ttml">
<body>
<div>
<p>No timing here</p>
</div>
</body>
</tt>`)
list, err := parseTTML(content)
Expect(err).ToNot(HaveOccurred())
Expect(list).To(HaveLen(1))
Expect(list[0].Lang).To(Equal("xxx"))
Expect(list[0].Synced).To(BeFalse())
Expect(list[0].Line).To(HaveLen(1))
Expect(list[0].Line[0].Start).To(BeNil())
Expect(list[0].Line[0].Value).To(Equal("No timing here"))
})
})
Describe("Metadata tracks", func() {
It("should produce main, translation, and pronunciation tracks from iTunesMetadata", func() {
content := []byte(`<?xml version="1.0" encoding="UTF-8"?>
<tt xmlns="http://www.w3.org/ns/ttml" xmlns:itunes="http://music.apple.com/lyric-ttml-internal">
<head>
<metadata>
<iTunesMetadata xmlns="http://music.apple.com/lyric-ttml-internal">
<translations>
<translation xml:lang="es">
<text for="L1">Hola</text>
<text for="MISSING">Skip me</text>
</translation>
</translations>
<transliterations>
<transliteration xml:lang="ja-Latn">
<text for="L2"><span begin="00:02.000" end="00:02.300" xmlns="http://www.w3.org/ns/ttml">ko</span><span begin="00:02.300" end="00:02.600" xmlns="http://www.w3.org/ns/ttml">nni</span></text>
</transliteration>
</transliterations>
</iTunesMetadata>
</metadata>
</head>
<body xml:lang="ja">
<div>
<p begin="00:01.000" end="00:01.500" itunes:key="L1">こんにちは</p>
<p begin="00:02.000" end="00:02.700" itunes:key="L2">こんばんは</p>
</div>
</body>
</tt>`)
list, err := parseTTML(content)
Expect(err).ToNot(HaveOccurred())
Expect(list).To(HaveLen(3))
By("checking the main track")
main := list[0]
Expect(main.Kind).To(Equal("main"))
Expect(main.Lang).To(Equal("ja"))
Expect(main.Line).To(HaveLen(2))
By("checking the translation track")
translation := list[1]
Expect(translation.Kind).To(Equal("translation"))
Expect(translation.Lang).To(Equal("es"))
Expect(translation.Line).To(HaveLen(1))
Expect(translation.Line[0].Start).To(Equal(gg.P(int64(1000))))
Expect(translation.Line[0].Value).To(Equal("Hola"))
Expect(translation.Line[0].End).To(Equal(gg.P(int64(1500))))
By("checking the pronunciation track")
pronunciation := list[2]
Expect(pronunciation.Kind).To(Equal("pronunciation"))
Expect(pronunciation.Lang).To(Equal("ja-latn"))
Expect(pronunciation.Line).To(HaveLen(1))
Expect(pronunciation.Line[0].Start).To(Equal(gg.P(int64(2000))))
Expect(pronunciation.Line[0].Value).To(Equal("konni"))
Expect(pronunciation.Line[0].End).To(Equal(gg.P(int64(2600))))
Expect(pronunciation.Line[0].Cue).To(HaveLen(2))
Expect(pronunciation.Line[0].Cue[0]).To(Equal(model.Cue{Start: gg.P(int64(2000)), End: gg.P(int64(2300)), Value: "ko", ByteStart: 0, ByteEnd: 1}))
Expect(pronunciation.Line[0].Cue[1]).To(Equal(model.Cue{Start: gg.P(int64(2300)), End: gg.P(int64(2600)), Value: "nni", ByteStart: 2, ByteEnd: 4}))
})
})
Describe("Pronunciation with bare decimal end times", func() {
It("should correctly parse bare decimal times in transliteration spans", func() {
content := []byte(`<?xml version="1.0" encoding="UTF-8"?>
<tt xmlns="http://www.w3.org/ns/ttml" xmlns:itunes="http://music.apple.com/lyric-ttml-internal">
<head>
<metadata>
<iTunesMetadata xmlns="http://music.apple.com/lyric-ttml-internal">
<transliterations>
<transliteration xml:lang="ja-Latn">
<text for="L1"><span begin="2.747" end="3.018" xmlns="http://www.w3.org/ns/ttml">I</span> <span begin="3.018" end="3.179" xmlns="http://www.w3.org/ns/ttml">woke</span> <span begin="3.179" end="3.582" xmlns="http://www.w3.org/ns/ttml">up</span></text>
</transliteration>
</transliterations>
</iTunesMetadata>
</metadata>
</head>
<body xml:lang="ja">
<div>
<p begin="00:02.747" end="00:04.000" itunes:key="L1">起きた</p>
</div>
</body>
</tt>`)
list, err := parseTTML(content)
Expect(err).ToNot(HaveOccurred())
var pronunciation *model.Lyrics
for i := range list {
if list[i].Kind == "pronunciation" {
pronunciation = &list[i]
break
}
}
Expect(pronunciation).ToNot(BeNil())
Expect(pronunciation.Line).To(HaveLen(1))
line := pronunciation.Line[0]
Expect(line.Start).To(Equal(gg.P(int64(2747))))
Expect(line.Value).To(Equal("I woke up"))
Expect(line.Cue).To(HaveLen(3))
Expect(line.Cue[0]).To(Equal(model.Cue{Start: gg.P(int64(2747)), End: gg.P(int64(3018)), Value: "I", ByteStart: 0, ByteEnd: 0}))
Expect(line.Cue[1]).To(Equal(model.Cue{Start: gg.P(int64(3018)), End: gg.P(int64(3179)), Value: "woke", ByteStart: 2, ByteEnd: 5}))
Expect(line.Cue[2]).To(Equal(model.Cue{Start: gg.P(int64(3179)), End: gg.P(int64(3582)), Value: "up", ByteStart: 7, ByteEnd: 8}))
})
})
})

View File

@ -6,20 +6,40 @@ import (
"slices" "slices"
"strconv" "strconv"
"strings" "strings"
"unicode"
"github.com/navidrome/navidrome/log" "github.com/navidrome/navidrome/log"
"github.com/navidrome/navidrome/utils/str" "github.com/navidrome/navidrome/utils/str"
) )
type Cue struct {
Start *int64 `structs:"start,omitempty" json:"start,omitempty"`
End *int64 `structs:"end,omitempty" json:"end,omitempty"`
Value string `structs:"value" json:"value"`
ByteStart int `structs:"byteStart" json:"byteStart"`
ByteEnd int `structs:"byteEnd" json:"byteEnd"`
AgentID string `structs:"agentId,omitempty" json:"agentId,omitempty"`
}
type Agent struct {
ID string `structs:"id" json:"id"`
Role string `structs:"role" json:"role"`
Name string `structs:"name,omitempty" json:"name,omitempty"`
}
type Line struct { type Line struct {
Start *int64 `structs:"start,omitempty" json:"start,omitempty"` Start *int64 `structs:"start,omitempty" json:"start,omitempty"`
End *int64 `structs:"end,omitempty" json:"end,omitempty"`
Value string `structs:"value" json:"value"` Value string `structs:"value" json:"value"`
Cue []Cue `structs:"cue,omitempty" json:"cue,omitempty"`
} }
type Lyrics struct { type Lyrics struct {
DisplayArtist string `structs:"displayArtist,omitempty" json:"displayArtist,omitempty"` DisplayArtist string `structs:"displayArtist,omitempty" json:"displayArtist,omitempty"`
DisplayTitle string `structs:"displayTitle,omitempty" json:"displayTitle,omitempty"` DisplayTitle string `structs:"displayTitle,omitempty" json:"displayTitle,omitempty"`
Kind string `structs:"kind,omitempty" json:"kind,omitempty"`
Lang string `structs:"lang" json:"lang"` Lang string `structs:"lang" json:"lang"`
Agents []Agent `structs:"agents,omitempty" json:"agents,omitempty"`
Line []Line `structs:"line" json:"line"` Line []Line `structs:"line" json:"line"`
Offset *int64 `structs:"offset,omitempty" json:"offset,omitempty"` Offset *int64 `structs:"offset,omitempty" json:"offset,omitempty"`
Synced bool `structs:"synced" json:"synced"` Synced bool `structs:"synced" json:"synced"`
@ -33,6 +53,10 @@ var (
syncRegex = regexp.MustCompile(`(^|\n)\s*` + timeRegexString) syncRegex = regexp.MustCompile(`(^|\n)\s*` + timeRegexString)
timeRegex = regexp.MustCompile(timeRegexString) timeRegex = regexp.MustCompile(timeRegexString)
lrcIdRegex = regexp.MustCompile(`\[(ar|ti|offset|lang):([^]]+)]`) lrcIdRegex = regexp.MustCompile(`\[(ar|ti|offset|lang):([^]]+)]`)
// Enhanced LRC: inline word-level timing markers like <00:12.34>
enhancedLRCTimeString = `<([0-9]{1,2}:)?([0-9]{1,2}):([0-9]{1,2})(.[0-9]{1,3})?>`
enhancedLRCRegex = regexp.MustCompile(enhancedLRCTimeString)
) )
func (l Lyrics) IsEmpty() bool { func (l Lyrics) IsEmpty() bool {
@ -106,9 +130,11 @@ func ToLyrics(language, text string) (*Lyrics, error) {
if validLine { if validLine {
for idx := range timestamps { for idx := range timestamps {
value, cues := parseEnhancedLine(priorLine)
structuredLines = append(structuredLines, Line{ structuredLines = append(structuredLines, Line{
Start: &timestamps[idx], Start: &timestamps[idx],
Value: strings.TrimSpace(priorLine), Value: value,
Cue: cues,
}) })
} }
timestamps = nil timestamps = nil
@ -154,9 +180,11 @@ func ToLyrics(language, text string) (*Lyrics, error) {
if validLine { if validLine {
for idx := range timestamps { for idx := range timestamps {
value, cues := parseEnhancedLine(priorLine)
structuredLines = append(structuredLines, Line{ structuredLines = append(structuredLines, Line{
Start: &timestamps[idx], Start: &timestamps[idx],
Value: strings.TrimSpace(priorLine), Value: value,
Cue: cues,
}) })
} }
} }
@ -173,13 +201,118 @@ func ToLyrics(language, text string) (*Lyrics, error) {
DisplayArtist: artist, DisplayArtist: artist,
DisplayTitle: title, DisplayTitle: title,
Lang: language, Lang: language,
Line: structuredLines, Line: NormalizeCueLines(structuredLines),
Offset: offset, Offset: offset,
Synced: synced, Synced: synced,
} }
return &lyrics, nil return &lyrics, nil
} }
// parseEnhancedLine extracts word-level timing cues from Enhanced LRC inline markers
// and computes UTF-8 byte offsets against the final stripped line value.
func parseEnhancedLine(text string) (string, []Cue) {
matches := enhancedLRCRegex.FindAllStringSubmatchIndex(text, -1)
if len(matches) == 0 {
return strings.TrimSpace(text), nil
}
type segment struct {
start int64
rawStart int
rawEnd int
}
segments := make([]segment, 0, len(matches))
var rawValue strings.Builder
for i, match := range matches {
timeMs, err := parseTime(
// Rewrite <...> as [...] so parseTime can handle it with the same logic
"["+text[match[0]+1:match[1]-1]+"]",
// Adjust match indices to point into our rewritten string (need start/end pairs for each group)
[]int{
0, match[1] - match[0],
adjustGroup(match, 2), adjustGroup(match, 3),
adjustGroup(match, 4), adjustGroup(match, 5),
adjustGroup(match, 6), adjustGroup(match, 7),
adjustGroup(match, 8), adjustGroup(match, 9),
},
)
if err != nil {
continue
}
// Text runs from after this marker to the start of the next marker (or end of string)
textStart := match[1]
var textEnd int
if i+1 < len(matches) {
textEnd = matches[i+1][0]
} else {
textEnd = len(text)
}
word := text[textStart:textEnd]
if word == "" {
continue
}
rawStart := rawValue.Len()
rawValue.WriteString(word)
segments = append(segments, segment{
start: timeMs,
rawStart: rawStart,
rawEnd: rawValue.Len(),
})
}
if len(segments) == 0 {
return strings.TrimSpace(stripEnhancedMarkers(text)), nil
}
finalRaw := rawValue.String()
leftTrimBytes := len(finalRaw) - len(strings.TrimLeftFunc(finalRaw, unicode.IsSpace))
rightTrimBytes := len(finalRaw) - len(strings.TrimRightFunc(finalRaw, unicode.IsSpace))
trimmedEnd := len(finalRaw) - rightTrimBytes
if trimmedEnd < leftTrimBytes {
trimmedEnd = leftTrimBytes
}
cues := make([]Cue, 0, len(segments))
for _, seg := range segments {
start := seg.start
byteStart := max(seg.rawStart, leftTrimBytes)
byteEnd := min(seg.rawEnd, trimmedEnd)
if byteStart >= byteEnd {
continue
}
cues = append(cues, Cue{
Start: &start,
Value: finalRaw[byteStart:byteEnd],
ByteStart: byteStart - leftTrimBytes,
ByteEnd: byteEnd - leftTrimBytes - 1,
})
}
return strings.TrimSpace(finalRaw), cues
}
// adjustGroup remaps a capture group index from the original match to our rewritten "[...]" string.
// The rewrite shifts by -1 (removed '<', added '[') so positions within the brackets stay the same.
func adjustGroup(match []int, groupIdx int) int {
orig := match[groupIdx]
if orig == -1 {
return -1
}
// Offset is: original position minus the position of '<' in the original, plus 1 for '['
return orig - match[0]
}
// stripEnhancedMarkers removes all <mm:ss.mm> inline markers from text,
// returning the plain lyric text.
func stripEnhancedMarkers(text string) string {
return enhancedLRCRegex.ReplaceAllString(text, "")
}
func parseTime(line string, match []int) (int64, error) { func parseTime(line string, match []int) (int64, error) {
var hours, millis int64 var hours, millis int64
var err error var err error
@ -227,3 +360,115 @@ func parseTime(line string, match []int) (int64, error) {
} }
type LyricList []Lyrics type LyricList []Lyrics
func NormalizeLyrics(lyrics Lyrics) Lyrics {
lyrics.Line = NormalizeCueLines(lyrics.Line)
if len(lyrics.Agents) == 0 {
lyrics.Agents = nil
}
return lyrics
}
func NormalizeCueLines(lines []Line) []Line {
if len(lines) == 0 {
return lines
}
normalized := make([]Line, len(lines))
copy(normalized, lines)
for i := range normalized {
var fallbackEnd *int64
if normalized[i].End != nil {
v := *normalized[i].End
fallbackEnd = &v
} else if i+1 < len(normalized) && normalized[i+1].Start != nil {
v := *normalized[i+1].Start
fallbackEnd = &v
}
normalized[i] = normalizeCueLine(normalized[i], fallbackEnd)
}
return normalized
}
func NormalizeLineTiming(line Line) Line {
if len(line.Cue) == 0 {
return line
}
var earliestStart *int64
var latestEnd *int64
for i := range line.Cue {
token := line.Cue[i]
if token.Start != nil {
if earliestStart == nil || *token.Start < *earliestStart {
v := *token.Start
earliestStart = &v
}
}
candidateEnd := token.End
if candidateEnd == nil {
candidateEnd = token.Start
}
if candidateEnd != nil {
if latestEnd == nil || *candidateEnd > *latestEnd {
v := *candidateEnd
latestEnd = &v
}
}
}
if line.Start == nil && earliestStart != nil {
v := *earliestStart
line.Start = &v
}
if line.End == nil && latestEnd != nil {
v := *latestEnd
line.End = &v
}
return line
}
func normalizeCueLine(line Line, fallbackEnd *int64) Line {
if len(line.Cue) == 0 {
return line
}
for i := range line.Cue {
if line.Cue[i].End != nil {
continue
}
if i+1 < len(line.Cue) && line.Cue[i+1].Start != nil {
v := *line.Cue[i+1].Start
line.Cue[i].End = &v
continue
}
if fallbackEnd != nil {
v := *fallbackEnd
line.Cue[i].End = &v
}
}
for i := range line.Cue {
if line.Cue[i].End == nil {
line.Cue = clearCueEnds(line.Cue)
return NormalizeLineTiming(line)
}
}
return NormalizeLineTiming(line)
}
func clearCueEnds(cues []Cue) []Cue {
normalized := make([]Cue, len(cues))
copy(normalized, cues)
for i := range normalized {
normalized[i].End = nil
}
return normalized
}

View File

@ -116,4 +116,85 @@ var _ = Describe("ToLyrics", func() {
{Start: &e, Value: "Test"}, {Start: &e, Value: "Test"},
})) }))
}) })
It("should parse Enhanced LRC with word-level timing", func() {
lyrics, err := ToLyrics("xxx", "[00:01.00]<00:01.00>Some <00:01.50>lyrics <00:02.00>here\n[00:03.00]<00:03.00>More <00:03.50>words")
Expect(err).ToNot(HaveOccurred())
Expect(lyrics.Synced).To(BeTrue())
Expect(lyrics.Line).To(HaveLen(2))
t1000, t1500, t2000, t3000, t3500 := int64(1000), int64(1500), int64(2000), int64(3000), int64(3500)
line0 := lyrics.Line[0]
Expect(line0.Start).To(Equal(&t1000))
Expect(line0.End).To(Equal(&t3000))
Expect(line0.Value).To(Equal("Some lyrics here"))
Expect(line0.Cue).To(Equal([]Cue{
{Start: &t1000, End: &t1500, Value: "Some ", ByteStart: 0, ByteEnd: 4},
{Start: &t1500, End: &t2000, Value: "lyrics ", ByteStart: 5, ByteEnd: 11},
{Start: &t2000, End: &t3000, Value: "here", ByteStart: 12, ByteEnd: 15},
}))
line1 := lyrics.Line[1]
Expect(line1.Start).To(Equal(&t3000))
Expect(line1.End).To(Equal(&t3500))
Expect(line1.Value).To(Equal("More words"))
Expect(line1.Cue).To(Equal([]Cue{
{Start: &t3000, Value: "More ", ByteStart: 0, ByteEnd: 4},
{Start: &t3500, Value: "words", ByteStart: 5, ByteEnd: 9},
}))
Expect(line1.Cue[1].End).To(BeNil())
})
It("should ignore Enhanced LRC markers and return plain lines when no markers present", func() {
a, b := int64(1000), int64(3000)
lyrics, err := ToLyrics("xxx", "[00:01.00]Plain line\n[00:03.00]Another plain line")
Expect(err).ToNot(HaveOccurred())
Expect(lyrics.Line).To(Equal([]Line{
{Start: &a, Value: "Plain line"},
{Start: &b, Value: "Another plain line"},
}))
})
It("should handle mixed Enhanced and plain LRC lines", func() {
lyrics, err := ToLyrics("xxx", "[00:01.00]<00:01.00>Some <00:01.50>lyrics\n[00:03.00]Plain line\n[00:05.00]<00:05.00>More <00:05.50>words")
Expect(err).ToNot(HaveOccurred())
Expect(lyrics.Line).To(HaveLen(3))
t1000, t1500, t5000, t5500 := int64(1000), int64(1500), int64(5000), int64(5500)
t3000 := int64(3000)
Expect(lyrics.Line[0].Cue).To(Equal([]Cue{
{Start: &t1000, End: &t1500, Value: "Some ", ByteStart: 0, ByteEnd: 4},
{Start: &t1500, End: &t3000, Value: "lyrics", ByteStart: 5, ByteEnd: 10},
}))
Expect(lyrics.Line[0].Value).To(Equal("Some lyrics"))
Expect(lyrics.Line[0].End).To(Equal(&t3000))
Expect(lyrics.Line[1].Cue).To(BeNil())
Expect(lyrics.Line[1].Value).To(Equal("Plain line"))
Expect(lyrics.Line[2].Cue).To(Equal([]Cue{
{Start: &t5000, Value: "More ", ByteStart: 0, ByteEnd: 4},
{Start: &t5500, Value: "words", ByteStart: 5, ByteEnd: 9},
}))
Expect(lyrics.Line[2].Value).To(Equal("More words"))
})
It("should preserve byte offsets for Enhanced LRC cues", func() {
lyrics, err := ToLyrics("xxx", "[00:00.00]<00:00.00>Oh <00:00.90>love<00:01.30> me <00:01.60>tonight")
Expect(err).ToNot(HaveOccurred())
Expect(lyrics.Line).To(HaveLen(1))
t0, t900, t1300, t1600 := int64(0), int64(900), int64(1300), int64(1600)
line := lyrics.Line[0]
Expect(line.Value).To(Equal("Oh love me tonight"))
Expect(line.Cue).To(Equal([]Cue{
{Start: &t0, Value: "Oh ", ByteStart: 0, ByteEnd: 2},
{Start: &t900, Value: "love", ByteStart: 3, ByteEnd: 6},
{Start: &t1300, Value: " me ", ByteStart: 7, ByteEnd: 10},
{Start: &t1600, Value: "tonight", ByteStart: 11, ByteEnd: 17},
}))
})
}) })

View File

@ -493,14 +493,79 @@ func mapExplicitStatus(explicitStatus string) string {
return "" return ""
} }
func buildStructuredLyric(mf *model.MediaFile, lyrics model.Lyrics) responses.StructuredLyric { func buildStructuredLyric(mf *model.MediaFile, lyrics model.Lyrics, enhanced bool) responses.StructuredLyric {
lines := make([]responses.Line, len(lyrics.Line)) lines := make([]responses.Line, len(lyrics.Line))
var cueLines []responses.CueLine
agentOrderByID := make(map[string]int, len(lyrics.Agents))
agentRoleByID := make(map[string]string, len(lyrics.Agents))
responseAgents := make([]responses.Agent, 0, len(lyrics.Agents))
for i, agent := range lyrics.Agents {
agentOrderByID[agent.ID] = i
agentRoleByID[agent.ID] = agent.Role
responseAgents = append(responseAgents, responses.Agent{
ID: agent.ID,
Role: agent.Role,
Name: agent.Name,
})
}
for i, line := range lyrics.Line { for i, line := range lyrics.Line {
lines[i] = responses.Line{ lines[i] = responses.Line{
Start: line.Start, Start: line.Start,
Value: line.Value, Value: line.Value,
} }
if !enhanced || len(line.Cue) == 0 {
continue
}
agentOrder := make([]string, 0, 2)
cuesByAgent := make(map[string][]model.Cue)
for _, cue := range line.Cue {
if cue.Start == nil {
continue
}
agentID := strings.TrimSpace(cue.AgentID)
if _, exists := cuesByAgent[agentID]; !exists {
agentOrder = append(agentOrder, agentID)
}
cuesByAgent[agentID] = append(cuesByAgent[agentID], cue)
}
sort.SliceStable(agentOrder, func(i, j int) bool {
leftRole := agentRoleByID[agentOrder[i]]
rightRole := agentRoleByID[agentOrder[j]]
if leftRole == "main" && rightRole != "main" {
return true
}
if rightRole == "main" && leftRole != "main" {
return false
}
leftOrder, leftOK := agentOrderByID[agentOrder[i]]
rightOrder, rightOK := agentOrderByID[agentOrder[j]]
if leftOK && rightOK && leftOrder != rightOrder {
return leftOrder < rightOrder
}
if leftOK != rightOK {
return leftOK
}
return i < j
})
for _, agentID := range agentOrder {
cueLine := responses.CueLine{
Index: int32(i),
Start: line.Start,
End: line.End,
Value: line.Value,
Cue: buildLyricCues(cuesByAgent[agentID], line.End),
}
if agentID != "" {
cueLine.AgentID = agentID
}
cueLines = append(cueLines, cueLine)
}
} }
structured := responses.StructuredLyric{ structured := responses.StructuredLyric{
@ -508,10 +573,22 @@ func buildStructuredLyric(mf *model.MediaFile, lyrics model.Lyrics) responses.St
DisplayTitle: lyrics.DisplayTitle, DisplayTitle: lyrics.DisplayTitle,
Lang: lyrics.Lang, Lang: lyrics.Lang,
Line: lines, Line: lines,
CueLine: cueLines,
Offset: lyrics.Offset, Offset: lyrics.Offset,
Synced: lyrics.Synced, Synced: lyrics.Synced,
} }
if enhanced {
kind := strings.TrimSpace(lyrics.Kind)
if kind == "" {
kind = "main"
}
structured.Kind = kind
if len(cueLines) > 0 && len(responseAgents) > 0 {
structured.Agents = responseAgents
}
}
if structured.DisplayArtist == "" { if structured.DisplayArtist == "" {
structured.DisplayArtist = mf.Artist structured.DisplayArtist = mf.Artist
} }
@ -522,11 +599,86 @@ func buildStructuredLyric(mf *model.MediaFile, lyrics model.Lyrics) responses.St
return structured return structured
} }
func buildLyricsList(mf *model.MediaFile, lyricsList model.LyricList) *responses.LyricsList { func buildLyricCues(cues []model.Cue, lineEnd *int64) []responses.LyricCue {
lyricList := make(responses.StructuredLyrics, len(lyricsList)) if len(cues) == 0 {
return nil
}
for i, lyrics := range lyricsList { hasAnyEnd := false
lyricList[i] = buildStructuredLyric(mf, lyrics) for i := range cues {
if cues[i].End != nil {
hasAnyEnd = true
break
}
}
normalized := make([]responses.LyricCue, 0, len(cues))
for i := range cues {
if cues[i].Start == nil {
continue
}
cue := responses.LyricCue{
Start: *cues[i].Start,
Value: cues[i].Value,
ByteStart: cues[i].ByteStart,
ByteEnd: cues[i].ByteEnd,
}
if hasAnyEnd {
end := cues[i].End
if end == nil {
if i+1 < len(cues) && cues[i+1].Start != nil {
v := *cues[i+1].Start
end = &v
} else if lineEnd != nil {
v := *lineEnd
end = &v
}
}
if end != nil && i+1 < len(cues) && cues[i+1].Start != nil && *end > *cues[i+1].Start {
v := *cues[i+1].Start
end = &v
}
if end != nil && *end < cue.Start {
v := cue.Start
end = &v
}
cue.End = end
}
normalized = append(normalized, cue)
}
if hasAnyEnd {
for i := range normalized {
if normalized[i].End == nil {
for j := range normalized {
normalized[j].End = nil
}
break
}
}
}
return normalized
}
func buildLyricsList(mf *model.MediaFile, lyricsList model.LyricList, enhanced bool) *responses.LyricsList {
var filtered model.LyricList
if enhanced {
filtered = lyricsList
} else {
// Without enhanced, only return "main" kind entries
for _, l := range lyricsList {
kind := strings.TrimSpace(l.Kind)
if kind == "" || kind == "main" {
filtered = append(filtered, l)
}
}
}
lyricList := make(responses.StructuredLyrics, len(filtered))
for i, lyrics := range filtered {
lyricList[i] = buildStructuredLyric(mf, lyrics, enhanced)
} }
res := &responses.LyricsList{ res := &responses.LyricsList{

View File

@ -10,6 +10,7 @@ import (
"github.com/navidrome/navidrome/conf" "github.com/navidrome/navidrome/conf"
"github.com/navidrome/navidrome/consts" "github.com/navidrome/navidrome/consts"
lyricssvc "github.com/navidrome/navidrome/core/lyrics"
"github.com/navidrome/navidrome/log" "github.com/navidrome/navidrome/log"
"github.com/navidrome/navidrome/model" "github.com/navidrome/navidrome/model"
"github.com/navidrome/navidrome/resources" "github.com/navidrome/navidrome/resources"
@ -19,6 +20,8 @@ import (
"github.com/navidrome/navidrome/utils/req" "github.com/navidrome/navidrome/utils/req"
) )
const maxLegacyLyricsCandidates = 10
func (api *Router) GetAvatar(w http.ResponseWriter, r *http.Request) (*responses.Subsonic, error) { func (api *Router) GetAvatar(w http.ResponseWriter, r *http.Request) (*responses.Subsonic, error) {
if !conf.Server.EnableGravatar { if !conf.Server.EnableGravatar {
return api.getPlaceHolderAvatar(w, r) return api.getPlaceHolderAvatar(w, r)
@ -98,7 +101,11 @@ func (api *Router) GetLyrics(r *http.Request) (*responses.Subsonic, error) {
response := newResponse() response := newResponse()
lyricsResponse := responses.Lyrics{} lyricsResponse := responses.Lyrics{}
response.Lyrics = &lyricsResponse response.Lyrics = &lyricsResponse
mediaFiles, err := api.ds.MediaFile(r.Context()).GetAll(filter.SongsByArtistTitleWithLyricsFirst(artist, title)) opts := filter.SongsByArtistTitleWithLyricsFirst(artist, title)
// Search a bounded duplicate window so source-priority fallback can still
// reach older matches without turning legacy getLyrics into an unbounded scan.
opts.Max = maxLegacyLyricsCandidates
mediaFiles, err := api.ds.MediaFile(r.Context()).GetAll(opts)
if err != nil { if err != nil {
return nil, err return nil, err
@ -108,10 +115,23 @@ func (api *Router) GetLyrics(r *http.Request) (*responses.Subsonic, error) {
return response, nil return response, nil
} }
structuredLyrics, err := api.lyrics.GetLyrics(r.Context(), &mediaFiles[0]) var structuredLyrics model.LyricList
if batchLyrics, ok := api.lyrics.(lyricssvc.BatchLyrics); ok {
structuredLyrics, err = batchLyrics.GetLyricsForMediaFiles(r.Context(), mediaFiles)
if err != nil { if err != nil {
return nil, err return nil, err
} }
} else {
for i := range mediaFiles {
structuredLyrics, err = api.lyrics.GetLyrics(r.Context(), &mediaFiles[i])
if err != nil {
return nil, err
}
if len(structuredLyrics) > 0 {
break
}
}
}
if len(structuredLyrics) == 0 { if len(structuredLyrics) == 0 {
return response, nil return response, nil
@ -124,7 +144,6 @@ func (api *Router) GetLyrics(r *http.Request) (*responses.Subsonic, error) {
for _, line := range structuredLyrics[0].Line { for _, line := range structuredLyrics[0].Line {
lyricsText.WriteString(line.Value + "\n") lyricsText.WriteString(line.Value + "\n")
} }
lyricsResponse.Value = lyricsText.String() lyricsResponse.Value = lyricsText.String()
return response, nil return response, nil
@ -146,8 +165,10 @@ func (api *Router) GetLyricsBySongId(r *http.Request) (*responses.Subsonic, erro
return nil, err return nil, err
} }
enhanced, _ := req.Params(r).Bool("enhanced")
response := newResponse() response := newResponse()
response.LyricsList = buildLyricsList(mediaFile, structuredLyrics) response.LyricsList = buildLyricsList(mediaFile, structuredLyrics, enhanced)
return response, nil return response, nil
} }

View File

@ -186,6 +186,41 @@ var _ = Describe("MediaRetrievalController", func() {
Expect(response.Lyrics.Title).To(Equal("Never Gonna Give You Up")) Expect(response.Lyrics.Title).To(Equal("Never Gonna Give You Up"))
Expect(response.Lyrics.Value).To(Equal("We're no strangers to love\nYou know the rules and so do I\n")) Expect(response.Lyrics.Value).To(Equal("We're no strangers to love\nYou know the rules and so do I\n"))
}) })
It("should prefer higher-priority sidecar lyrics across duplicate candidates", func() {
conf.Server.LyricsPriority = ".ttml,embedded"
r := newGetRequest("artist=Rick+Astley", "title=Never+Gonna+Give+You+Up")
baseTime := time.Date(2025, 1, 1, 0, 0, 0, 0, time.UTC)
embedded, err := model.ToLyrics("eng", "Newest duplicate embedded lyrics")
Expect(err).ToNot(HaveOccurred())
embeddedJSON, err := json.Marshal(model.LyricList{*embedded})
Expect(err).ToNot(HaveOccurred())
mockRepo.SetData(model.MediaFiles{
{
ID: "1",
Path: "tests/fixtures/01 Invisible (RED) Edit Version.mp3",
Artist: "Rick Astley",
Title: "Never Gonna Give You Up",
Lyrics: string(embeddedJSON),
UpdatedAt: baseTime.Add(2 * time.Hour), // Newer duplicate with embedded lyrics only
},
{
ID: "2",
Path: "tests/fixtures/test.mp3",
Artist: "Rick Astley",
Title: "Never Gonna Give You Up",
Lyrics: "[]",
UpdatedAt: baseTime.Add(1 * time.Hour), // Older, but has TTML sidecar
},
})
response, err := router.GetLyrics(r)
Expect(err).ToNot(HaveOccurred())
Expect(response.Lyrics.Artist).To(Equal("Rick Astley"))
Expect(response.Lyrics.Title).To(Equal("Never Gonna Give You Up"))
Expect(response.Lyrics.Value).To(Equal("We're no strangers to love\nYou know the rules and so do I\n"))
Expect(mockRepo.Options.Max).To(Equal(maxLegacyLyricsCandidates))
})
}) })
Describe("GetLyricsBySongId", func() { Describe("GetLyricsBySongId", func() {
@ -202,8 +237,10 @@ var _ = Describe("MediaRetrievalController", func() {
Expect(realLyric.DisplayArtist).To(Equal(expectedLyric.DisplayArtist)) Expect(realLyric.DisplayArtist).To(Equal(expectedLyric.DisplayArtist))
Expect(realLyric.DisplayTitle).To(Equal(expectedLyric.DisplayTitle)) Expect(realLyric.DisplayTitle).To(Equal(expectedLyric.DisplayTitle))
Expect(realLyric.Kind).To(Equal(expectedLyric.Kind))
Expect(realLyric.Lang).To(Equal(expectedLyric.Lang)) Expect(realLyric.Lang).To(Equal(expectedLyric.Lang))
Expect(realLyric.Synced).To(Equal(expectedLyric.Synced)) Expect(realLyric.Synced).To(Equal(expectedLyric.Synced))
Expect(realLyric.Agents).To(Equal(expectedLyric.Agents))
if expectedLyric.Offset == nil { if expectedLyric.Offset == nil {
Expect(realLyric.Offset).To(BeNil()) Expect(realLyric.Offset).To(BeNil())
@ -222,6 +259,38 @@ var _ = Describe("MediaRetrievalController", func() {
Expect(*realLine.Start).To(Equal(*expectedLine.Start)) Expect(*realLine.Start).To(Equal(*expectedLine.Start))
} }
} }
Expect(realLyric.CueLine).To(HaveLen(len(expectedLyric.CueLine)))
for j, realCueLine := range realLyric.CueLine {
expectedCueLine := expectedLyric.CueLine[j]
Expect(realCueLine.Index).To(Equal(expectedCueLine.Index))
Expect(realCueLine.Value).To(Equal(expectedCueLine.Value))
Expect(realCueLine.AgentID).To(Equal(expectedCueLine.AgentID))
if expectedCueLine.Start == nil {
Expect(realCueLine.Start).To(BeNil())
} else {
Expect(*realCueLine.Start).To(Equal(*expectedCueLine.Start))
}
if expectedCueLine.End == nil {
Expect(realCueLine.End).To(BeNil())
} else {
Expect(*realCueLine.End).To(Equal(*expectedCueLine.End))
}
Expect(realCueLine.Cue).To(HaveLen(len(expectedCueLine.Cue)))
for k, realCue := range realCueLine.Cue {
expectedCue := expectedCueLine.Cue[k]
Expect(realCue.Value).To(Equal(expectedCue.Value))
Expect(realCue.Start).To(Equal(expectedCue.Start))
Expect(realCue.ByteStart).To(Equal(expectedCue.ByteStart))
Expect(realCue.ByteEnd).To(Equal(expectedCue.ByteEnd))
if expectedCue.End == nil {
Expect(realCue.End).To(BeNil())
} else {
Expect(*realCue.End).To(Equal(*expectedCue.End))
}
}
}
} }
} }
@ -323,6 +392,427 @@ var _ = Describe("MediaRetrievalController", func() {
}, },
}) })
}) })
It("should return multilingual TTML sidecar lyrics", func() {
conf.Server.LyricsPriority = ".ttml,embedded"
r := newGetRequest("id=1")
mockRepo.SetData(model.MediaFiles{
{
ID: "1",
Path: "tests/fixtures/test.mp3",
Artist: "Rick Astley",
Title: "Never Gonna Give You Up",
Lyrics: "[]",
},
})
response, err := router.GetLyricsBySongId(r)
Expect(err).ToNot(HaveOccurred())
porTime := int64(18800)
ttmlTime := int64(22800)
compareResponses(response.LyricsList, responses.LyricsList{
StructuredLyrics: responses.StructuredLyrics{
{
DisplayArtist: "Rick Astley",
DisplayTitle: "Never Gonna Give You Up",
Lang: "eng",
Synced: true,
Line: []responses.Line{
{
Start: &times[0],
Value: "We're no strangers to love",
},
{
Start: &ttmlTime,
Value: "You know the rules and so do I",
},
},
},
{
DisplayArtist: "Rick Astley",
DisplayTitle: "Never Gonna Give You Up",
Lang: "por",
Synced: true,
Line: []responses.Line{
{
Start: &porTime,
Value: "Nao somos estranhos ao amor",
},
},
},
},
})
})
It("should return metadata-linked translation and pronunciation tracks from TTML", func() {
conf.Server.LyricsPriority = ".ttml,embedded"
r := newGetRequest("id=1&enhanced=true")
mockRepo.SetData(model.MediaFiles{
{
ID: "1",
Path: "tests/fixtures/test-metadata.mp3",
Artist: "Rick Astley",
Title: "Never Gonna Give You Up",
Lyrics: "[]",
},
})
response, err := router.GetLyricsBySongId(r)
Expect(err).ToNot(HaveOccurred())
mainStartA := int64(1000)
mainStartB := int64(2000)
tokenStartA := int64(2000)
tokenEndA := int64(2300)
tokenStartB := int64(2300)
tokenEndB := int64(2600)
compareResponses(response.LyricsList, responses.LyricsList{
StructuredLyrics: responses.StructuredLyrics{
{
DisplayArtist: "Rick Astley",
DisplayTitle: "Never Gonna Give You Up",
Kind: "main",
Lang: "ja",
Synced: true,
Line: []responses.Line{
{
Start: &mainStartA,
Value: "こんにちは",
},
{
Start: &mainStartB,
Value: "こんばんは",
},
},
},
{
DisplayArtist: "Rick Astley",
DisplayTitle: "Never Gonna Give You Up",
Kind: "translation",
Lang: "es",
Synced: true,
Line: []responses.Line{
{
Start: &mainStartA,
Value: "Hola",
},
},
},
{
DisplayArtist: "Rick Astley",
DisplayTitle: "Never Gonna Give You Up",
Kind: "pronunciation",
Lang: "ja-latn",
Synced: true,
Line: []responses.Line{
{
Start: &mainStartB,
Value: "konni",
},
},
CueLine: []responses.CueLine{
{
Index: 0,
Start: &mainStartB,
End: &tokenEndB,
Value: "konni",
Cue: []responses.LyricCue{
{
Start: tokenStartA,
End: &tokenEndA,
ByteStart: 0,
ByteEnd: 1,
Value: "ko",
},
{
Start: tokenStartB,
End: &tokenEndB,
ByteStart: 2,
ByteEnd: 4,
Value: "nni",
},
},
},
},
},
},
})
})
It("should return cue lines for songLyrics v2 clients with enhanced=true", func() {
r := newGetRequest("id=1&enhanced=true")
lineStart := int64(1000)
lineEnd := int64(3000)
tokenStartA := int64(1000)
tokenEndA := int64(1400)
tokenStartB := int64(2000)
tokenEndB := int64(2500)
lyricsJson, err := json.Marshal(model.LyricList{
{
Lang: "eng",
Agents: []model.Agent{{ID: "lead", Role: "main"}, {ID: "__nd_bg__|lead", Role: "bg"}},
Synced: true,
Line: []model.Line{
{
Start: &lineStart,
End: &lineEnd,
Value: "Hello echo",
Cue: []model.Cue{
{
Start: &tokenStartA,
End: &tokenEndA,
Value: "Hello",
ByteStart: 0,
ByteEnd: 4,
AgentID: "lead",
},
{
Start: &tokenStartB,
End: &tokenEndB,
Value: "echo",
ByteStart: 6,
ByteEnd: 9,
AgentID: "__nd_bg__|lead",
},
},
},
},
},
})
Expect(err).ToNot(HaveOccurred())
mockRepo.SetData(model.MediaFiles{
{
ID: "1",
Artist: "Rick Astley",
Title: "Never Gonna Give You Up",
Lyrics: string(lyricsJson),
},
})
response, err := router.GetLyricsBySongId(r)
Expect(err).ToNot(HaveOccurred())
compareResponses(response.LyricsList, responses.LyricsList{
StructuredLyrics: responses.StructuredLyrics{
{
DisplayArtist: "Rick Astley",
DisplayTitle: "Never Gonna Give You Up",
Kind: "main",
Lang: "eng",
Synced: true,
Agents: []responses.Agent{
{ID: "lead", Role: "main"},
{ID: "__nd_bg__|lead", Role: "bg"},
},
Line: []responses.Line{
{
Start: &lineStart,
Value: "Hello echo",
},
},
CueLine: []responses.CueLine{
{
Index: 0,
Start: &lineStart,
End: &lineEnd,
Value: "Hello echo",
AgentID: "lead",
Cue: []responses.LyricCue{
{
Start: tokenStartA,
End: &tokenEndA,
ByteStart: 0,
ByteEnd: 4,
Value: "Hello",
},
},
},
{
Index: 0,
Start: &lineStart,
End: &lineEnd,
Value: "Hello echo",
AgentID: "__nd_bg__|lead",
Cue: []responses.LyricCue{
{
Start: tokenStartB,
End: &tokenEndB,
ByteStart: 6,
ByteEnd: 9,
Value: "echo",
},
},
},
},
},
},
})
})
It("should keep enhanced line-level lyrics when no cue data is available", func() {
r := newGetRequest("id=1&enhanced=true")
lineStart := int64(1000)
lineEnd := int64(3000)
lyricsJSON, err := json.Marshal(model.LyricList{
{
Kind: "main",
Lang: "eng",
Synced: true,
Line: []model.Line{
{
Start: &lineStart,
End: &lineEnd,
Value: "Line without word timing",
},
},
},
})
Expect(err).ToNot(HaveOccurred())
mockRepo.SetData(model.MediaFiles{
{
ID: "1",
Artist: "Rick Astley",
Title: "Never Gonna Give You Up",
Lyrics: string(lyricsJSON),
},
})
response, err := router.GetLyricsBySongId(r)
Expect(err).ToNot(HaveOccurred())
compareResponses(response.LyricsList, responses.LyricsList{
StructuredLyrics: responses.StructuredLyrics{
{
DisplayArtist: "Rick Astley",
DisplayTitle: "Never Gonna Give You Up",
Kind: "main",
Lang: "eng",
Synced: true,
Line: []responses.Line{
{
Start: &lineStart,
Value: "Line without word timing",
},
},
},
},
})
})
It("should return required cue byte offsets for ambiguous and multibyte cue lines", func() {
r := newGetRequest("id=1&enhanced=true")
asciiLineStart := int64(0)
asciiLineEnd := int64(2400)
asciiCueStartA := int64(0)
asciiCueEndA := int64(300)
asciiCueStartB := int64(900)
asciiCueEndB := int64(1300)
asciiCueStartC := int64(1300)
asciiCueEndC := int64(1600)
asciiCueStartD := int64(1600)
utfLineStart := int64(2747)
utfLineEnd := int64(6214)
utfCueStartA := int64(2747)
utfCueEndA := int64(3018)
utfCueStartB := int64(3018)
utfCueEndB := int64(3179)
utfCueStartC := int64(3582)
utfCueEndC := int64(4100)
utfCueStartD := int64(4500)
utfCueEndD := int64(6214)
lyricsJSON, err := json.Marshal(model.LyricList{
{
Lang: "eng",
Synced: true,
Line: []model.Line{
{
Start: &asciiLineStart,
End: &asciiLineEnd,
Value: "Oh love love me tonight",
Cue: []model.Cue{
{Start: &asciiCueStartA, End: &asciiCueEndA, Value: "Oh", ByteStart: 0, ByteEnd: 1},
{Start: &asciiCueStartB, End: &asciiCueEndB, Value: "love", ByteStart: 8, ByteEnd: 11},
{Start: &asciiCueStartC, End: &asciiCueEndC, Value: "me", ByteStart: 13, ByteEnd: 14},
{Start: &asciiCueStartD, Value: "tonight", ByteStart: 16, ByteEnd: 22},
},
},
{
Start: &utfLineStart,
End: &utfLineEnd,
Value: "눈을 뜬 순간",
Cue: []model.Cue{
{Start: &utfCueStartA, End: &utfCueEndA, Value: "눈", ByteStart: 0, ByteEnd: 2},
{Start: &utfCueStartB, End: &utfCueEndB, Value: "을", ByteStart: 3, ByteEnd: 5},
{Start: &utfCueStartC, End: &utfCueEndC, Value: "뜬", ByteStart: 7, ByteEnd: 9},
{Start: &utfCueStartD, End: &utfCueEndD, Value: "순간", ByteStart: 11, ByteEnd: 16},
},
},
},
},
})
Expect(err).ToNot(HaveOccurred())
mockRepo.SetData(model.MediaFiles{
{
ID: "1",
Artist: "Rick Astley",
Title: "Never Gonna Give You Up",
Lyrics: string(lyricsJSON),
},
})
response, err := router.GetLyricsBySongId(r)
Expect(err).ToNot(HaveOccurred())
compareResponses(response.LyricsList, responses.LyricsList{
StructuredLyrics: responses.StructuredLyrics{
{
DisplayArtist: "Rick Astley",
DisplayTitle: "Never Gonna Give You Up",
Kind: "main",
Lang: "eng",
Synced: true,
Line: []responses.Line{
{Start: &asciiLineStart, Value: "Oh love love me tonight"},
{Start: &utfLineStart, Value: "눈을 뜬 순간"},
},
CueLine: []responses.CueLine{
{
Index: 0,
Start: &asciiLineStart,
End: &asciiLineEnd,
Value: "Oh love love me tonight",
Cue: []responses.LyricCue{
{Start: asciiCueStartA, End: &asciiCueEndA, Value: "Oh", ByteStart: 0, ByteEnd: 1},
{Start: asciiCueStartB, End: &asciiCueEndB, Value: "love", ByteStart: 8, ByteEnd: 11},
{Start: asciiCueStartC, End: &asciiCueEndC, Value: "me", ByteStart: 13, ByteEnd: 14},
{Start: asciiCueStartD, End: &asciiLineEnd, Value: "tonight", ByteStart: 16, ByteEnd: 22},
},
},
{
Index: 1,
Start: &utfLineStart,
End: &utfLineEnd,
Value: "눈을 뜬 순간",
Cue: []responses.LyricCue{
{Start: utfCueStartA, End: &utfCueEndA, Value: "눈", ByteStart: 0, ByteEnd: 2},
{Start: utfCueStartB, End: &utfCueEndB, Value: "을", ByteStart: 3, ByteEnd: 5},
{Start: utfCueStartC, End: &utfCueEndC, Value: "뜬", ByteStart: 7, ByteEnd: 9},
{Start: utfCueStartD, End: &utfCueEndD, Value: "순간", ByteStart: 11, ByteEnd: 16},
},
},
},
},
},
})
})
}) })
}) })

View File

@ -11,7 +11,7 @@ func (api *Router) GetOpenSubsonicExtensions(_ *http.Request) (*responses.Subson
extensions := responses.OpenSubsonicExtensions{ extensions := responses.OpenSubsonicExtensions{
{Name: "transcodeOffset", Versions: []int32{1}}, {Name: "transcodeOffset", Versions: []int32{1}},
{Name: "formPost", Versions: []int32{1}}, {Name: "formPost", Versions: []int32{1}},
{Name: "songLyrics", Versions: []int32{1}}, {Name: "songLyrics", Versions: []int32{1, 2}},
{Name: "indexBasedQueue", Versions: []int32{1}}, {Name: "indexBasedQueue", Versions: []int32{1}},
{Name: "transcoding", Versions: []int32{1}}, {Name: "transcoding", Versions: []int32{1}},
} }

View File

@ -58,7 +58,7 @@ var _ = Describe("GetOpenSubsonicExtensions", func() {
HaveLen(5), HaveLen(5),
ContainElement(responses.OpenSubsonicExtension{Name: "transcodeOffset", Versions: []int32{1}}), ContainElement(responses.OpenSubsonicExtension{Name: "transcodeOffset", Versions: []int32{1}}),
ContainElement(responses.OpenSubsonicExtension{Name: "formPost", Versions: []int32{1}}), ContainElement(responses.OpenSubsonicExtension{Name: "formPost", Versions: []int32{1}}),
ContainElement(responses.OpenSubsonicExtension{Name: "songLyrics", Versions: []int32{1}}), ContainElement(responses.OpenSubsonicExtension{Name: "songLyrics", Versions: []int32{1, 2}}),
ContainElement(responses.OpenSubsonicExtension{Name: "indexBasedQueue", Versions: []int32{1}}), ContainElement(responses.OpenSubsonicExtension{Name: "indexBasedQueue", Versions: []int32{1}}),
ContainElement(responses.OpenSubsonicExtension{Name: "transcoding", Versions: []int32{1}}), ContainElement(responses.OpenSubsonicExtension{Name: "transcoding", Versions: []int32{1}}),
)) ))
@ -87,7 +87,7 @@ var _ = Describe("GetOpenSubsonicExtensions", func() {
HaveLen(6), HaveLen(6),
ContainElement(responses.OpenSubsonicExtension{Name: "transcodeOffset", Versions: []int32{1}}), ContainElement(responses.OpenSubsonicExtension{Name: "transcodeOffset", Versions: []int32{1}}),
ContainElement(responses.OpenSubsonicExtension{Name: "formPost", Versions: []int32{1}}), ContainElement(responses.OpenSubsonicExtension{Name: "formPost", Versions: []int32{1}}),
ContainElement(responses.OpenSubsonicExtension{Name: "songLyrics", Versions: []int32{1}}), ContainElement(responses.OpenSubsonicExtension{Name: "songLyrics", Versions: []int32{1, 2}}),
ContainElement(responses.OpenSubsonicExtension{Name: "indexBasedQueue", Versions: []int32{1}}), ContainElement(responses.OpenSubsonicExtension{Name: "indexBasedQueue", Versions: []int32{1}}),
ContainElement(responses.OpenSubsonicExtension{Name: "transcoding", Versions: []int32{1}}), ContainElement(responses.OpenSubsonicExtension{Name: "transcoding", Versions: []int32{1}}),
ContainElement(responses.OpenSubsonicExtension{Name: "sonicSimilarity", Versions: []int32{1}}), ContainElement(responses.OpenSubsonicExtension{Name: "sonicSimilarity", Versions: []int32{1}}),

View File

@ -543,11 +543,37 @@ type Line struct {
Value string `xml:",chardata" json:"value"` Value string `xml:",chardata" json:"value"`
} }
type LyricCue struct {
Start int64 `xml:"start,attr" json:"start"`
End *int64 `xml:"end,attr,omitempty" json:"end,omitempty"`
ByteStart int `xml:"byteStart,attr" json:"byteStart"`
ByteEnd int `xml:"byteEnd,attr" json:"byteEnd"`
Value string `xml:",chardata" json:"value"`
}
type Agent struct {
ID string `xml:"id,attr" json:"id"`
Role string `xml:"role,attr" json:"role"`
Name string `xml:"name,attr,omitempty" json:"name,omitempty"`
}
type CueLine struct {
Index int32 `xml:"index,attr" json:"index"`
Start *int64 `xml:"start,attr,omitempty" json:"start,omitempty"`
End *int64 `xml:"end,attr,omitempty" json:"end,omitempty"`
Value string `xml:"value,attr" json:"value"`
AgentID string `xml:"agentId,attr,omitempty" json:"agentId,omitempty"`
Cue []LyricCue `xml:"cue,omitempty" json:"cue,omitempty"`
}
type StructuredLyric struct { type StructuredLyric struct {
DisplayArtist string `xml:"displayArtist,attr,omitempty" json:"displayArtist,omitempty"` DisplayArtist string `xml:"displayArtist,attr,omitempty" json:"displayArtist,omitempty"`
DisplayTitle string `xml:"displayTitle,attr,omitempty" json:"displayTitle,omitempty"` DisplayTitle string `xml:"displayTitle,attr,omitempty" json:"displayTitle,omitempty"`
Kind string `xml:"kind,attr,omitempty" json:"kind,omitempty"`
Lang string `xml:"lang,attr" json:"lang"` Lang string `xml:"lang,attr" json:"lang"`
Line []Line `xml:"line" json:"line"` Line []Line `xml:"line" json:"line"`
Agents []Agent `xml:"agent,omitempty" json:"agents,omitempty"`
CueLine []CueLine `xml:"cueLine,omitempty" json:"cueLine,omitempty"`
Offset *int64 `xml:"offset,attr,omitempty" json:"offset,omitempty"` Offset *int64 `xml:"offset,attr,omitempty" json:"offset,omitempty"`
Synced bool `xml:"synced,attr" json:"synced"` Synced bool `xml:"synced,attr" json:"synced"`
} }

2
tests/fixtures/bom-test.ttml vendored Normal file
View File

@ -0,0 +1,2 @@
<?xml version="1.0" encoding="UTF-8"?>
<tt xmlns="http://www.w3.org/ns/ttml"><body><div xml:lang="eng"><p begin="00:00:00.00">BOM test line</p></div></body></tt>

BIN
tests/fixtures/bom-utf16-test.ttml vendored Normal file

Binary file not shown.

6
tests/fixtures/test-enhanced.lrc vendored Normal file
View File

@ -0,0 +1,6 @@
[ar:Test Artist]
[ti:Enhanced Test]
[lang:eng]
[00:01.00]<00:01.00>Some <00:01.50>lyrics <00:02.00>here
[00:03.00]<00:03.00>More <00:03.50>words
[00:05.00]Plain line without inline markers

25
tests/fixtures/test-metadata.ttml vendored Normal file
View File

@ -0,0 +1,25 @@
<?xml version="1.0" encoding="UTF-8"?>
<tt xmlns="http://www.w3.org/ns/ttml" xmlns:itunes="http://music.apple.com/lyric-ttml-internal">
<head>
<metadata>
<iTunesMetadata xmlns="http://music.apple.com/lyric-ttml-internal">
<translations>
<translation xml:lang="es">
<text for="L1">Hola</text>
</translation>
</translations>
<transliterations>
<transliteration xml:lang="ja-Latn">
<text for="L2"><span begin="00:02.000" end="00:02.300" xmlns="http://www.w3.org/ns/ttml">ko</span><span begin="00:02.300" end="00:02.600" xmlns="http://www.w3.org/ns/ttml">nni</span></text>
</transliteration>
</transliterations>
</iTunesMetadata>
</metadata>
</head>
<body xml:lang="ja">
<div>
<p begin="00:01.000" end="00:01.500" itunes:key="L1">こんにちは</p>
<p begin="00:02.000" end="00:02.700" itunes:key="L2">こんばんは</p>
</div>
</body>
</tt>

5
tests/fixtures/test.elrc vendored Normal file
View File

@ -0,0 +1,5 @@
[ar:ELRC Artist]
[ti:ELRC Song]
[lang:eng]
[00:01.00]<00:01.00>Lead <00:01.50>words
[00:03.00]Fallback line

7
tests/fixtures/test.srt vendored Normal file
View File

@ -0,0 +1,7 @@
1
00:00:18,800 --> 00:00:22,800
We're from subtitles
2
00:00:22,801 --> 00:00:26,000
Another subtitle line

12
tests/fixtures/test.ttml vendored Normal file
View File

@ -0,0 +1,12 @@
<?xml version="1.0" encoding="UTF-8"?>
<tt xmlns="http://www.w3.org/ns/ttml" xmlns:ttp="http://www.w3.org/ns/ttml#parameter" ttp:frameRate="30" ttp:subFrameRate="2" ttp:tickRate="10">
<body>
<div xml:lang="eng">
<p begin="00:00:18.80">We're no strangers to love</p>
<p begin="00:00:22:24">You know the rules and so do I</p>
</div>
<div xml:lang="por">
<p begin="188t">Nao somos estranhos ao amor</p>
</div>
</body>
</tt>

View File

@ -9,6 +9,7 @@ export const PLAYER_SET_VOLUME = 'PLAYER_SET_VOLUME'
export const PLAYER_SET_MODE = 'PLAYER_SET_MODE' export const PLAYER_SET_MODE = 'PLAYER_SET_MODE'
export const TRANSCODING_SET_PROFILE = 'TRANSCODING_SET_PROFILE' export const TRANSCODING_SET_PROFILE = 'TRANSCODING_SET_PROFILE'
export const PLAYER_REFRESH_QUEUE = 'PLAYER_REFRESH_QUEUE' export const PLAYER_REFRESH_QUEUE = 'PLAYER_REFRESH_QUEUE'
export const PLAYER_UPDATE_LYRIC = 'PLAYER_UPDATE_LYRIC'
export const setTrack = (data) => ({ export const setTrack = (data) => ({
type: PLAYER_SET_TRACK, type: PLAYER_SET_TRACK,
@ -114,3 +115,8 @@ export const refreshQueue = (resolvedUrls) => ({
type: PLAYER_REFRESH_QUEUE, type: PLAYER_REFRESH_QUEUE,
data: resolvedUrls, data: resolvedUrls,
}) })
export const updateQueueLyric = (trackId, lyric) => ({
type: PLAYER_UPDATE_LYRIC,
data: { trackId, lyric },
})

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,514 @@
import React from 'react'
import {
cleanup,
fireEvent,
render,
screen,
waitFor,
} from '@testing-library/react'
import KaraokeLyricsOverlay from './KaraokeLyricsOverlay'
const DEFAULT_LINE_HEIGHT_TEXT = '1.30'
const NEXT_LINE_HEIGHT_TEXT = '1.32'
const audioInstance = {
currentTime: 0,
paused: true,
seeking: false,
playbackRate: 1,
}
const buildLyric = (kind, lang, value) => ({
kind,
lang,
synced: true,
line: [{ start: 1000, value }],
})
const renderOverlay = (props = {}) =>
render(
<KaraokeLyricsOverlay
visible={true}
mainLyric={buildLyric('main', 'ja', 'こんにちは')}
translationLyric={buildLyric('translation', 'en', 'Hello')}
pronunciationLyric={buildLyric('pronunciation', 'ja-Latn', 'konnichiwa')}
showTranslation={false}
showPronunciation={true}
translationEnabled={true}
pronunciationEnabled={true}
onToggleTranslation={() => {}}
onTogglePronunciation={() => {}}
audioInstance={audioInstance}
onClose={() => {}}
{...props}
/>,
)
describe('<KaraokeLyricsOverlay /> behavior', () => {
beforeEach(() => {
localStorage.clear()
window.innerWidth = 1200
window.innerHeight = 900
vi.spyOn(window, 'requestAnimationFrame').mockImplementation(() => 1)
vi.spyOn(window, 'cancelAnimationFrame').mockImplementation(() => {})
})
afterEach(() => {
vi.restoreAllMocks()
cleanup()
})
it('shows tooltips for translation, pronunciation, and appearance controls', async () => {
renderOverlay()
fireEvent.mouseOver(screen.getByTestId('lyrics-language-badge-tr'))
expect(await screen.findByText('Show translation')).toBeInTheDocument()
fireEvent.mouseOver(screen.getByTestId('lyrics-language-badge-pr'))
expect(await screen.findByText('Hide pronunciation')).toBeInTheDocument()
fireEvent.mouseOver(screen.getByTestId('lyrics-settings-button'))
expect(await screen.findByText('Appearance')).toBeInTheDocument()
})
it('renders inline mode without the desktop resize handle', () => {
renderOverlay({ inline: true })
expect(screen.getByTestId('karaoke-lyrics-overlay')).toHaveAttribute(
'data-inline',
'true',
)
expect(screen.queryByTestId('lyrics-resize-handle')).not.toBeInTheDocument()
})
it('renders the appearance popup with Main label and default line height for older settings', async () => {
localStorage.setItem(
'karaoke-lyrics-settings',
JSON.stringify({
tr: { fontSize: 16, colorKey: 'blue' },
main: { fontSize: 26, colorKey: 'white' },
pr: { fontSize: 15, colorKey: 'green' },
}),
)
renderOverlay()
fireEvent.click(screen.getByTestId('lyrics-settings-button'))
expect(await screen.findByText('Appearance')).toBeInTheDocument()
expect(screen.getByText('Main', { selector: 'div' })).toBeInTheDocument()
expect(screen.queryByText('Default')).not.toBeInTheDocument()
expect(screen.getByTestId('lyrics-reset-appearance')).toBeInTheDocument()
expect(screen.getByTestId('lyrics-line-height-value')).toHaveTextContent(
DEFAULT_LINE_HEIGHT_TEXT,
)
})
it('renders the lyric group in main, pronunciation, translation order with layer badges', () => {
renderOverlay({
showTranslation: true,
showPronunciation: true,
})
const mainLine = screen.getByText('こんにちは')
const pronunciationLine = screen.getByText('konnichiwa')
const translationLine = screen.getByText('Hello')
expect(
mainLine.compareDocumentPosition(pronunciationLine) &
Node.DOCUMENT_POSITION_FOLLOWING,
).toBeTruthy()
expect(
pronunciationLine.compareDocumentPosition(translationLine) &
Node.DOCUMENT_POSITION_FOLLOWING,
).toBeTruthy()
expect(screen.getByTestId('lyrics-language-badge-main')).toHaveTextContent(
'Mainja',
)
expect(screen.getByTestId('lyrics-language-badge-pr')).toHaveTextContent(
'PRja-Latn',
)
expect(screen.getByTestId('lyrics-language-badge-tr')).toHaveTextContent(
'TRen',
)
})
it('renders line-timed rows as whole-line spans without synthetic token splits', () => {
renderOverlay({
mainLyric: {
kind: 'main',
lang: 'en',
synced: true,
line: [
{ start: 1000, end: 2400, value: 'Batter up, batter up, batter up' },
],
},
translationLyric: {
kind: 'translation',
lang: 'ja',
synced: true,
line: [
{
start: 1000,
end: 2400,
value: 'バッターアップ、バッターアップ、バッターアップ',
},
],
},
pronunciationLyric: {
kind: 'pronunciation',
lang: 'ja-Latn',
synced: true,
line: [
{
start: 1000,
end: 2400,
value: 'Battaa appu, battaa appu, battaa appu',
},
],
},
showTranslation: true,
showPronunciation: true,
})
const mainLine = screen.getByText(
'Batter up, batter up, batter up',
).parentElement
const pronunciationLine = screen.getByText(
'Battaa appu, battaa appu, battaa appu',
).parentElement
const translationLine = screen.getByText(
'バッターアップ、バッターアップ、バッターアップ',
).parentElement
expect(mainLine.querySelectorAll('span')).toHaveLength(1)
expect(pronunciationLine.querySelectorAll('span')).toHaveLength(1)
expect(translationLine.querySelectorAll('span')).toHaveLength(1)
})
it('uses cue byte offsets to segment repeated words in the karaoke line', () => {
renderOverlay({
mainLyric: {
kind: 'main',
lang: 'en',
synced: true,
line: [{ start: 0, end: 2400, value: 'Oh love love me tonight' }],
cueLine: [
{
index: 0,
start: 0,
end: 2400,
value: 'Oh love love me tonight',
cue: [
{ start: 0, end: 300, value: 'Oh', byteStart: 0, byteEnd: 1 },
{
start: 900,
end: 1300,
value: 'love',
byteStart: 8,
byteEnd: 11,
},
{
start: 1300,
end: 1600,
value: 'me',
byteStart: 13,
byteEnd: 14,
},
{
start: 1600,
end: 2400,
value: 'tonight',
byteStart: 16,
byteEnd: 22,
},
],
},
],
},
translationLyric: null,
pronunciationLyric: null,
showTranslation: false,
showPronunciation: false,
translationEnabled: false,
pronunciationEnabled: false,
audioInstance: {
...audioInstance,
currentTime: 1.0,
},
})
const mainLine = screen.getByText('Oh').parentElement
const segments = Array.from(mainLine.querySelectorAll('span')).map(
(span) => span.textContent,
)
expect(segments).toEqual([
'Oh',
' love ',
'love',
' ',
'me',
' ',
'tonight',
])
})
it('uses cue byte offsets to preserve explicit space cues in multibyte karaoke lines', () => {
renderOverlay({
mainLyric: {
kind: 'main',
lang: 'ko',
synced: true,
line: [{ start: 0, end: 900, value: '눈을 뜬 순간' }],
cueLine: [
{
index: 0,
start: 0,
end: 900,
value: '눈을 뜬 순간',
cue: [
{ start: 0, end: 150, value: '눈을', byteStart: 0, byteEnd: 5 },
{ start: 150, end: 250, value: ' ', byteStart: 6, byteEnd: 6 },
{ start: 250, end: 450, value: '뜬', byteStart: 7, byteEnd: 9 },
{ start: 450, end: 550, value: ' ', byteStart: 10, byteEnd: 10 },
{ start: 550, end: 900, value: '순간', byteStart: 11, byteEnd: 16 },
],
},
],
},
translationLyric: null,
pronunciationLyric: null,
showTranslation: false,
showPronunciation: false,
translationEnabled: false,
pronunciationEnabled: false,
audioInstance: {
...audioInstance,
currentTime: 0.3,
},
})
const mainLine = screen.getByText('눈을').parentElement
const segments = Array.from(mainLine.querySelectorAll('span')).map(
(span) => span.textContent,
)
expect(segments).toEqual(['눈을', ' ', '뜬', ' ', '순간'])
})
it('highlights line-timed pronunciation and translation rows with the active main line', () => {
renderOverlay({
mainLyric: {
kind: 'main',
lang: 'en',
synced: true,
line: [
{ start: 1000, end: 1800, value: 'Line one' },
{ start: 2500, end: 3300, value: 'Line two' },
],
},
translationLyric: {
kind: 'translation',
lang: 'ja',
synced: true,
line: [
{ start: 1000, end: 1800, value: '一行目' },
{ start: 2500, end: 3300, value: '二行目' },
],
},
pronunciationLyric: {
kind: 'pronunciation',
lang: 'ja-Latn',
synced: true,
line: [
{ start: 1000, end: 1800, value: 'ichigyoume' },
{ start: 2500, end: 3300, value: 'nigyoume' },
],
},
showTranslation: true,
showPronunciation: true,
audioInstance: {
...audioInstance,
currentTime: 1.2,
},
})
const activePronunciation = screen.getByText('ichigyoume').parentElement
const inactivePronunciation = screen.getByText('nigyoume').parentElement
const activeTranslation = screen.getByText('一行目').parentElement
const inactiveTranslation = screen.getByText('二行目').parentElement
expect(parseFloat(activePronunciation.style.opacity)).toBeGreaterThan(
parseFloat(inactivePronunciation.style.opacity),
)
expect(parseFloat(activeTranslation.style.opacity)).toBeGreaterThan(
parseFloat(inactiveTranslation.style.opacity),
)
})
it('pre-wraps inactive main lines so the active line keeps the same wrap shape', () => {
renderOverlay({
mainLyric: {
kind: 'main',
lang: 'en',
synced: true,
line: [
{ start: 1000, end: 1800, value: 'First line that is getting focus' },
{ start: 2500, end: 3300, value: 'Second line waiting below' },
],
},
translationLyric: null,
pronunciationLyric: null,
showTranslation: false,
showPronunciation: false,
translationEnabled: false,
pronunciationEnabled: false,
audioInstance: {
...audioInstance,
currentTime: 1.2,
},
})
const activeLine = screen.getByText('First line that is getting focus')
.parentElement
const inactiveLine = screen.getByText('Second line waiting below')
.parentElement
expect(parseFloat(activeLine.style.fontSize)).toBeGreaterThan(
parseFloat(inactiveLine.style.fontSize),
)
expect(activeLine.style.maxWidth).toBe('100%')
expect(inactiveLine.style.maxWidth).toBe('80%')
})
it('centers pronunciation text inside the pill container', () => {
renderOverlay({
showTranslation: false,
showPronunciation: true,
})
const pronunciationLine = screen.getByText('konnichiwa').parentElement
const styles = window.getComputedStyle(pronunciationLine)
expect(styles.display).toBe('inline-flex')
expect(styles.justifyContent).toBe('center')
expect(styles.alignItems).toBe('center')
})
it('renders untimed text lyrics in manual reading mode without a pinned active line', () => {
renderOverlay({
mainLyric: {
kind: 'main',
lang: 'en',
synced: false,
line: [{ value: 'First plain line' }, { value: 'Second plain line' }],
},
translationLyric: null,
pronunciationLyric: null,
showTranslation: false,
showPronunciation: false,
translationEnabled: false,
pronunciationEnabled: false,
})
const firstLine = screen.getByText('First plain line').parentElement
const secondLine = screen.getByText('Second plain line').parentElement
expect(firstLine.style.opacity).toBe('1')
expect(secondLine.style.opacity).toBe('1')
expect(firstLine.style.color).toBe(secondLine.style.color)
})
it('persists line height changes, keeps aux line spacing fixed, and stores overlay height', async () => {
renderOverlay({
mainLyric: buildLyric('main', 'en', 'Hello world'),
translationLyric: buildLyric('translation', 'es', 'Hola'),
pronunciationLyric: buildLyric('pronunciation', 'en-Latn', 'heh-loh'),
showTranslation: true,
showPronunciation: true,
translationEnabled: true,
pronunciationEnabled: true,
})
const overlay = screen.getByTestId('karaoke-lyrics-overlay')
const mainLine = screen.getByText('Hello world').parentElement
const pronunciationLine = screen.getByText('heh-loh').parentElement
expect(mainLine).toHaveStyle(`line-height: ${DEFAULT_LINE_HEIGHT_TEXT}`)
expect(pronunciationLine).toHaveStyle('line-height: 1.2')
fireEvent.click(screen.getByTestId('lyrics-settings-button'))
const slider = screen.getByRole('slider', { name: 'Line height' })
slider.focus()
fireEvent.keyDown(slider, { key: 'ArrowRight' })
await waitFor(() =>
expect(screen.getByTestId('lyrics-line-height-value')).toHaveTextContent(
NEXT_LINE_HEIGHT_TEXT,
),
)
await waitFor(() =>
expect(mainLine).toHaveStyle(`line-height: ${NEXT_LINE_HEIGHT_TEXT}`),
)
expect(pronunciationLine).toHaveStyle('line-height: 1.2')
fireEvent.mouseDown(screen.getByTestId('lyrics-resize-handle'), {
clientY: 400,
})
fireEvent.mouseMove(window, { clientY: 360 })
fireEvent.mouseUp(window)
await waitFor(() => expect(overlay).toHaveStyle('height: 340px'))
const stored = JSON.parse(localStorage.getItem('karaoke-lyrics-settings'))
expect(stored.lineHeight).toBeCloseTo(1.32, 2)
expect(stored.overlayHeight).toBe(340)
})
it('resets appearance back to the default spacing and overlay height', async () => {
localStorage.setItem(
'karaoke-lyrics-settings',
JSON.stringify({
lineHeight: 1.8,
overlayHeight: 420,
tr: { fontSize: 16, colorKey: 'yellow' },
main: { fontSize: 28, colorKey: 'cyan' },
pr: { fontSize: 15, colorKey: 'pink' },
}),
)
renderOverlay({
mainLyric: buildLyric('main', 'en', 'Hello world'),
translationLyric: null,
pronunciationLyric: null,
showPronunciation: false,
translationEnabled: false,
pronunciationEnabled: false,
})
const overlay = screen.getByTestId('karaoke-lyrics-overlay')
const mainLine = screen.getByText('Hello world').parentElement
expect(overlay).toHaveStyle('height: 420px')
expect(mainLine).toHaveStyle('line-height: 1.8')
fireEvent.click(screen.getByTestId('lyrics-settings-button'))
fireEvent.click(screen.getByTestId('lyrics-reset-appearance'))
await waitFor(() =>
expect(screen.getByTestId('lyrics-line-height-value')).toHaveTextContent(
DEFAULT_LINE_HEIGHT_TEXT,
),
)
await waitFor(() => expect(overlay).toHaveStyle('height: 300px'))
await waitFor(() =>
expect(mainLine).toHaveStyle(`line-height: ${DEFAULT_LINE_HEIGHT_TEXT}`),
)
const stored = JSON.parse(localStorage.getItem('karaoke-lyrics-settings'))
expect(stored.lineHeight).toBeCloseTo(1.3, 2)
expect(stored.overlayHeight).toBe(300)
})
})

View File

@ -0,0 +1,65 @@
import React, { useEffect, useState } from 'react'
import { createPortal } from 'react-dom'
export const MOBILE_KARAOKE_LYRICS_HOST_SELECTOR =
'.react-jinke-music-player-mobile-cover'
export const MOBILE_KARAOKE_LYRICS_ACTIVE_CLASS = 'nd-mobile-lyrics-active'
const resolveMobileLyricsHost = () => {
if (typeof document === 'undefined') {
return null
}
return document.querySelector(MOBILE_KARAOKE_LYRICS_HOST_SELECTOR)
}
const MobileKaraokeLyricsPortal = ({ active, children }) => {
const [host, setHost] = useState(() =>
active ? resolveMobileLyricsHost() : null,
)
useEffect(() => {
if (typeof document === 'undefined') {
setHost(null)
return undefined
}
if (!active) {
setHost(null)
return undefined
}
const syncHost = () => {
setHost(resolveMobileLyricsHost())
}
syncHost()
const observer = new MutationObserver(syncHost)
observer.observe(document.body, {
childList: true,
subtree: true,
})
return () => observer.disconnect()
}, [active])
useEffect(() => {
if (!host) {
return undefined
}
host.classList.toggle(MOBILE_KARAOKE_LYRICS_ACTIVE_CLASS, active)
return () => {
host.classList.remove(MOBILE_KARAOKE_LYRICS_ACTIVE_CLASS)
}
}, [active, host])
if (!active || !host) {
return null
}
return createPortal(children, host)
}
export default MobileKaraokeLyricsPortal

View File

@ -0,0 +1,55 @@
import React from 'react'
import { cleanup, render, screen, waitFor } from '@testing-library/react'
import MobileKaraokeLyricsPortal, {
MOBILE_KARAOKE_LYRICS_ACTIVE_CLASS,
} from './MobileKaraokeLyricsPortal'
const HOST_CLASS = 'react-jinke-music-player-mobile-cover'
describe('<MobileKaraokeLyricsPortal />', () => {
afterEach(() => {
cleanup()
document.body.innerHTML = ''
})
it('renders lyrics into the mobile cover host and toggles the active class', () => {
const host = document.createElement('div')
host.className = HOST_CLASS
document.body.appendChild(host)
const { rerender } = render(
<MobileKaraokeLyricsPortal active={true}>
<div data-testid="mobile-inline-lyrics">Lyrics</div>
</MobileKaraokeLyricsPortal>,
)
expect(host).toContainElement(screen.getByTestId('mobile-inline-lyrics'))
expect(host).toHaveClass(MOBILE_KARAOKE_LYRICS_ACTIVE_CLASS)
rerender(
<MobileKaraokeLyricsPortal active={false}>
<div data-testid="mobile-inline-lyrics">Lyrics</div>
</MobileKaraokeLyricsPortal>,
)
expect(screen.queryByTestId('mobile-inline-lyrics')).not.toBeInTheDocument()
expect(host).not.toHaveClass(MOBILE_KARAOKE_LYRICS_ACTIVE_CLASS)
})
it('attaches when the mobile cover host appears after mount', async () => {
render(
<MobileKaraokeLyricsPortal active={true}>
<div data-testid="mobile-inline-lyrics">Lyrics</div>
</MobileKaraokeLyricsPortal>,
)
const host = document.createElement('div')
host.className = HOST_CLASS
document.body.appendChild(host)
await waitFor(() =>
expect(host).toContainElement(screen.getByTestId('mobile-inline-lyrics')),
)
expect(host).toHaveClass(MOBILE_KARAOKE_LYRICS_ACTIVE_CLASS)
})
})

View File

@ -22,6 +22,7 @@ import {
refreshQueue, refreshQueue,
setPlayMode, setPlayMode,
setTranscodingProfile, setTranscodingProfile,
updateQueueLyric,
setVolume, setVolume,
syncQueue, syncQueue,
} from '../actions' } from '../actions'
@ -33,6 +34,30 @@ import { keyMap } from '../hotkeys'
import keyHandlers from './keyHandlers' import keyHandlers from './keyHandlers'
import { calculateGain } from '../utils/calculateReplayGain' import { calculateGain } from '../utils/calculateReplayGain'
import { detectBrowserProfile, decisionService } from '../transcode' import { detectBrowserProfile, decisionService } from '../transcode'
import {
getPreferredLyricLanguage,
hasStructuredLyricContent,
selectLyricLayers,
structuredLyricToLrc,
} from './lyrics'
import {
resolveLyricsOverlayState,
togglePronunciationPreference,
} from './lyricsOverlayState'
import KaraokeLyricsOverlay from './KaraokeLyricsOverlay'
import MobileKaraokeLyricsPortal from './MobileKaraokeLyricsPortal'
const emptyLyricLayers = {
main: null,
translation: null,
pronunciation: null,
}
const normalizeLyricLayers = (layers) => ({
main: layers?.main || null,
translation: layers?.translation || null,
pronunciation: layers?.pronunciation || null,
})
const Player = () => { const Player = () => {
const theme = useCurrentTheme() const theme = useCurrentTheme()
@ -120,6 +145,83 @@ const Player = () => {
const gainInfo = useSelector((state) => state.replayGain) const gainInfo = useSelector((state) => state.replayGain)
const [context, setContext] = useState(null) const [context, setContext] = useState(null)
const [gainNode, setGainNode] = useState(null) const [gainNode, setGainNode] = useState(null)
const lyricCacheRef = useRef(new Map())
const lyricRequestIdRef = useRef(0)
const playerRef = useRef(null)
const [karaokeVisiblePreference, setKaraokeVisiblePreference] =
useState(false)
const [selectedLyricLayers, setSelectedLyricLayers] =
useState(emptyLyricLayers)
const [translationPreference, setTranslationPreference] = useState(false)
const [pronunciationPreference, setPronunciationPreference] = useState(null)
const currentTrackId = playerState.current?.trackId
const currentTrackIsRadio = playerState.current?.isRadio
const selectedStructuredLyric = selectedLyricLayers.main
const hasKaraokeLyric = hasStructuredLyricContent(selectedStructuredLyric)
const hasTranslationLyric = hasStructuredLyricContent(
selectedLyricLayers.translation,
)
const hasPronunciationLyric = hasStructuredLyricContent(
selectedLyricLayers.pronunciation,
)
const { karaokeVisible, showTranslation, showPronunciation } =
resolveLyricsOverlayState({
karaokeVisiblePreference,
translationPreference,
pronunciationPreference,
hasKaraokeLyric,
hasTranslationLyric,
hasPronunciationLyric,
})
const useInlineMobileLyrics = karaokeVisible && !isDesktop
const applyLyricToRuntimePlayer = useCallback((trackId, lyric) => {
if (!trackId) {
return
}
const player = playerRef.current
if (!player || typeof player.setState !== 'function') {
return
}
player.setState((prevState) => {
const prevLists = Array.isArray(prevState.audioLists)
? prevState.audioLists
: []
let changed = false
const audioLists = prevLists.map((item) => {
if (item.trackId !== trackId) {
return item
}
if (item.lyric === lyric) {
return item
}
changed = true
return {
...item,
lyric,
}
})
const currentItem = audioLists.find(
(item) => item.musicSrc === prevState.musicSrc,
)
const currentLyric =
typeof currentItem?.lyric === 'string'
? currentItem.lyric
: prevState.lyric
if (!changed && currentLyric === prevState.lyric) {
return null
}
return {
audioLists,
lyric: currentLyric,
}
})
}, [])
useEffect(() => { useEffect(() => {
if ( if (
@ -166,6 +268,88 @@ const Player = () => {
return () => window.removeEventListener('beforeunload', handleBeforeUnload) return () => window.removeEventListener('beforeunload', handleBeforeUnload)
}, [playerState, audioInstance]) }, [playerState, audioInstance])
useEffect(() => {
if (!currentTrackId || currentTrackIsRadio) {
setSelectedLyricLayers(emptyLyricLayers)
return
}
const cached = lyricCacheRef.current.get(currentTrackId)
let layers = emptyLyricLayers
if (cached && typeof cached !== 'string') {
if (cached.layers) {
layers = normalizeLyricLayers(cached.layers)
} else if (cached.structuredLyric) {
layers = normalizeLyricLayers({
main: cached.structuredLyric,
})
}
}
setSelectedLyricLayers(layers)
}, [currentTrackId, currentTrackIsRadio])
useEffect(() => {
lyricRequestIdRef.current += 1
const requestId = lyricRequestIdRef.current
if (!currentTrackId || currentTrackIsRadio) {
return
}
const cached = lyricCacheRef.current.get(currentTrackId)
if (cached !== undefined) {
const cachedLyric =
typeof cached === 'string' ? cached : cached?.lrc || ''
const cachedLayers =
typeof cached === 'string'
? emptyLyricLayers
: cached?.layers
? normalizeLyricLayers(cached.layers)
: normalizeLyricLayers({ main: cached?.structuredLyric })
setSelectedLyricLayers(cachedLayers)
if (cachedLyric) {
dispatch(updateQueueLyric(currentTrackId, cachedLyric))
applyLyricToRuntimePlayer(currentTrackId, cachedLyric)
}
return
}
subsonic
.getLyricsBySongId(currentTrackId)
.then((resp) => {
if (lyricRequestIdRef.current !== requestId) {
return
}
const structuredLyrics =
resp?.json?.['subsonic-response']?.lyricsList?.structuredLyrics || []
const layers = selectLyricLayers(
structuredLyrics,
getPreferredLyricLanguage(),
)
const lyric = layers.main ? structuredLyricToLrc(layers.main) : ''
lyricCacheRef.current.set(currentTrackId, {
lrc: lyric,
layers,
})
setSelectedLyricLayers(layers)
if (lyric !== '') {
dispatch(updateQueueLyric(currentTrackId, lyric))
applyLyricToRuntimePlayer(currentTrackId, lyric)
}
})
.catch(() => {
if (lyricRequestIdRef.current !== requestId) {
return
}
setSelectedLyricLayers(emptyLyricLayers)
// Do not cache network/request failures as empty lyrics, so we can retry.
lyricCacheRef.current.delete(currentTrackId)
})
}, [dispatch, currentTrackId, currentTrackIsRadio, applyLyricToRuntimePlayer])
const defaultOptions = useMemo( const defaultOptions = useMemo(
() => ({ () => ({
theme: playerTheme, theme: playerTheme,
@ -177,7 +361,7 @@ const Player = () => {
clearPriorAudioLists: false, clearPriorAudioLists: false,
showDestroy: true, showDestroy: true,
showDownload: false, showDownload: false,
showLyric: true, showLyric: false,
showReload: false, showReload: false,
toggleMode: !isDesktop, toggleMode: !isDesktop,
glassBg: false, glassBg: false,
@ -215,12 +399,26 @@ const Player = () => {
(playerState.clear || playerState.playIndex === 0), (playerState.clear || playerState.playIndex === 0),
clearPriorAudioLists: playerState.clear, clearPriorAudioLists: playerState.clear,
extendsContent: ( extendsContent: (
<PlayerToolbar id={current.trackId} isRadio={current.isRadio} /> <PlayerToolbar
id={current.trackId}
isRadio={current.isRadio}
onToggleLyrics={() =>
setKaraokeVisiblePreference((visible) => !visible)
}
lyricsActive={karaokeVisible}
lyricsDisabled={!hasKaraokeLyric}
/>
), ),
defaultVolume: isMobilePlayer ? 1 : playerState.volume, defaultVolume: isMobilePlayer ? 1 : playerState.volume,
showMediaSession: !current.isRadio, showMediaSession: !current.isRadio,
} }
}, [playerState, defaultOptions, isMobilePlayer]) }, [
playerState,
defaultOptions,
isMobilePlayer,
karaokeVisible,
hasKaraokeLyric,
])
const onAudioListsChange = useCallback( const onAudioListsChange = useCallback(
(_, audioLists, audioInfo) => dispatch(syncQueue(audioInfo, audioLists)), (_, audioLists, audioInfo) => dispatch(syncQueue(audioInfo, audioLists)),
@ -340,10 +538,13 @@ const Player = () => {
) )
const onCoverClick = useCallback((mode, audioLists, audioInfo) => { const onCoverClick = useCallback((mode, audioLists, audioInfo) => {
if (!isDesktop && karaokeVisible) {
return
}
if (mode === 'full' && audioInfo?.song?.albumId) { if (mode === 'full' && audioInfo?.song?.albumId) {
window.location.href = `#/album/${audioInfo.song.albumId}/show` window.location.href = `#/album/${audioInfo.song.albumId}/show`
} }
}, []) }, [isDesktop, karaokeVisible])
const onAudioError = useCallback( const onAudioError = useCallback(
(error, currentPlayId, audioLists, audioInfo) => { (error, currentPlayId, audioLists, audioInfo) => {
@ -392,6 +593,7 @@ const Player = () => {
return ( return (
<ThemeProvider theme={createMuiTheme(theme)}> <ThemeProvider theme={createMuiTheme(theme)}>
<ReactJkMusicPlayer <ReactJkMusicPlayer
ref={playerRef}
{...options} {...options}
className={classes.player} className={classes.player}
onAudioListsChange={onAudioListsChange} onAudioListsChange={onAudioListsChange}
@ -407,6 +609,55 @@ const Player = () => {
onBeforeDestroy={onBeforeDestroy} onBeforeDestroy={onBeforeDestroy}
getAudioInstance={setAudioInstance} getAudioInstance={setAudioInstance}
/> />
{isDesktop && (
<KaraokeLyricsOverlay
visible={karaokeVisible}
mainLyric={selectedLyricLayers.main}
translationLyric={selectedLyricLayers.translation}
pronunciationLyric={selectedLyricLayers.pronunciation}
showTranslation={showTranslation}
showPronunciation={showPronunciation}
translationEnabled={hasTranslationLyric}
pronunciationEnabled={hasPronunciationLyric}
onToggleTranslation={() =>
setTranslationPreference((previous) =>
hasTranslationLyric ? !previous : false,
)
}
onTogglePronunciation={() =>
setPronunciationPreference((previous) =>
togglePronunciationPreference(previous, hasPronunciationLyric),
)
}
audioInstance={audioInstance}
onClose={() => setKaraokeVisiblePreference(false)}
/>
)}
<MobileKaraokeLyricsPortal active={useInlineMobileLyrics}>
<KaraokeLyricsOverlay
visible={useInlineMobileLyrics}
inline={true}
mainLyric={selectedLyricLayers.main}
translationLyric={selectedLyricLayers.translation}
pronunciationLyric={selectedLyricLayers.pronunciation}
showTranslation={showTranslation}
showPronunciation={showPronunciation}
translationEnabled={hasTranslationLyric}
pronunciationEnabled={hasPronunciationLyric}
onToggleTranslation={() =>
setTranslationPreference((previous) =>
hasTranslationLyric ? !previous : false,
)
}
onTogglePronunciation={() =>
setPronunciationPreference((previous) =>
togglePronunciationPreference(previous, hasPronunciationLyric),
)
}
audioInstance={audioInstance}
onClose={() => setKaraokeVisiblePreference(false)}
/>
</MobileKaraokeLyricsPortal>
<GlobalHotKeys handlers={handlers} keyMap={keyMap} allowChanges /> <GlobalHotKeys handlers={handlers} keyMap={keyMap} allowChanges />
</ThemeProvider> </ThemeProvider>
) )

View File

@ -0,0 +1,77 @@
import {
resolveLyricsOverlayState,
togglePronunciationPreference,
} from './lyricsOverlayState'
describe('Player lyrics state helpers', () => {
it('keeps the lyrics window preference across track changes in the session', () => {
const visibleOnCurrentTrack = resolveLyricsOverlayState({
karaokeVisiblePreference: true,
translationPreference: false,
pronunciationPreference: null,
hasKaraokeLyric: true,
hasTranslationLyric: true,
hasPronunciationLyric: true,
})
expect(visibleOnCurrentTrack.karaokeVisible).toBe(true)
const hiddenForTrackWithoutLyrics = resolveLyricsOverlayState({
karaokeVisiblePreference: true,
translationPreference: false,
pronunciationPreference: null,
hasKaraokeLyric: false,
hasTranslationLyric: false,
hasPronunciationLyric: false,
})
expect(hiddenForTrackWithoutLyrics.karaokeVisible).toBe(false)
const restoredOnNextLyricsTrack = resolveLyricsOverlayState({
karaokeVisiblePreference: true,
translationPreference: false,
pronunciationPreference: null,
hasKaraokeLyric: true,
hasTranslationLyric: false,
hasPronunciationLyric: false,
})
expect(restoredOnNextLyricsTrack.karaokeVisible).toBe(true)
})
it('restores translation and pronunciation preferences after tracks without those layers', () => {
const initialState = resolveLyricsOverlayState({
karaokeVisiblePreference: false,
translationPreference: false,
pronunciationPreference: null,
hasKaraokeLyric: true,
hasTranslationLyric: true,
hasPronunciationLyric: true,
})
expect(initialState.showTranslation).toBe(false)
expect(initialState.showPronunciation).toBe(true)
const translationPreference = true
const pronunciationPreference = togglePronunciationPreference(null, true)
expect(pronunciationPreference).toBe(false)
const hiddenOnTrackWithoutAuxLayers = resolveLyricsOverlayState({
karaokeVisiblePreference: false,
translationPreference,
pronunciationPreference,
hasKaraokeLyric: true,
hasTranslationLyric: false,
hasPronunciationLyric: false,
})
expect(hiddenOnTrackWithoutAuxLayers.showTranslation).toBe(false)
expect(hiddenOnTrackWithoutAuxLayers.showPronunciation).toBe(false)
const restoredOnTrackWithAuxLayers = resolveLyricsOverlayState({
karaokeVisiblePreference: false,
translationPreference,
pronunciationPreference,
hasKaraokeLyric: true,
hasTranslationLyric: true,
hasPronunciationLyric: true,
})
expect(restoredOnTrackWithAuxLayers.showTranslation).toBe(true)
expect(restoredOnTrackWithAuxLayers.showPronunciation).toBe(false)
})
})

View File

@ -4,7 +4,9 @@ import { useGetOne } from 'react-admin'
import { GlobalHotKeys } from 'react-hotkeys' import { GlobalHotKeys } from 'react-hotkeys'
import IconButton from '@material-ui/core/IconButton' import IconButton from '@material-ui/core/IconButton'
import { useMediaQuery } from '@material-ui/core' import { useMediaQuery } from '@material-ui/core'
import Tooltip from '@material-ui/core/Tooltip'
import { RiSaveLine } from 'react-icons/ri' import { RiSaveLine } from 'react-icons/ri'
import { RiFileMusicLine } from 'react-icons/ri'
import { LoveButton, useToggleLove } from '../common' import { LoveButton, useToggleLove } from '../common'
import { openSaveQueueDialog } from '../actions' import { openSaveQueueDialog } from '../actions'
import { keyMap } from '../hotkeys' import { keyMap } from '../hotkeys'
@ -55,7 +57,13 @@ const useStyles = makeStyles((theme) => ({
}, },
})) }))
const PlayerToolbar = ({ id, isRadio }) => { const PlayerToolbar = ({
id,
isRadio,
onToggleLyrics,
lyricsActive = false,
lyricsDisabled = false,
}) => {
const dispatch = useDispatch() const dispatch = useDispatch()
const { data, loading } = useGetOne('song', id, { enabled: !!id && !isRadio }) const { data, loading } = useGetOne('song', id, { enabled: !!id && !isRadio })
const [toggleLove, toggling] = useToggleLove('song', data) const [toggleLove, toggling] = useToggleLove('song', data)
@ -99,6 +107,25 @@ const PlayerToolbar = ({ id, isRadio }) => {
/> />
) )
const toggleLyricsButton = (
<Tooltip title="Toggle lyrics">
<span>
<IconButton
size={isDesktop ? 'small' : undefined}
onClick={onToggleLyrics}
disabled={!onToggleLyrics || lyricsDisabled}
data-testid="toggle-lyrics-button"
className={buttonClass}
color={lyricsActive ? 'primary' : 'default'}
>
<RiFileMusicLine
className={!isDesktop ? classes.mobileIcon : undefined}
/>
</IconButton>
</span>
</Tooltip>
)
return ( return (
<> <>
<GlobalHotKeys keyMap={keyMap} handlers={handlers} allowChanges /> <GlobalHotKeys keyMap={keyMap} handlers={handlers} allowChanges />
@ -106,11 +133,13 @@ const PlayerToolbar = ({ id, isRadio }) => {
<li className={`${listItemClass} item`}> <li className={`${listItemClass} item`}>
{saveQueueButton} {saveQueueButton}
{loveButton} {loveButton}
{toggleLyricsButton}
</li> </li>
) : ( ) : (
<> <>
<li className={`${listItemClass} item`}>{saveQueueButton}</li> <li className={`${listItemClass} item`}>{saveQueueButton}</li>
<li className={`${listItemClass} item`}>{loveButton}</li> <li className={`${listItemClass} item`}>{loveButton}</li>
<li className={`${listItemClass} item`}>{toggleLyricsButton}</li>
</> </>
)} )}
</> </>

View File

@ -71,6 +71,7 @@ describe('<PlayerToolbar />', () => {
// Verify both buttons are rendered // Verify both buttons are rendered
expect(screen.getByTestId('save-queue-button')).toBeInTheDocument() expect(screen.getByTestId('save-queue-button')).toBeInTheDocument()
expect(screen.getByTestId('love-button')).toBeInTheDocument() expect(screen.getByTestId('love-button')).toBeInTheDocument()
expect(screen.getByTestId('toggle-lyrics-button')).toBeInTheDocument()
// Verify desktop classes are applied // Verify desktop classes are applied
expect(listItems[0].className).toContain('toolbar') expect(listItems[0].className).toContain('toolbar')
@ -102,6 +103,14 @@ describe('<PlayerToolbar />', () => {
type: 'OPEN_SAVE_QUEUE_DIALOG', type: 'OPEN_SAVE_QUEUE_DIALOG',
}) })
}) })
it('triggers lyric toggle callback when lyrics button is clicked', () => {
const onToggleLyrics = vi.fn()
render(<PlayerToolbar id="song-1" onToggleLyrics={onToggleLyrics} />)
fireEvent.click(screen.getByTestId('toggle-lyrics-button'))
expect(onToggleLyrics).toHaveBeenCalledTimes(1)
})
}) })
describe('Mobile layout', () => { describe('Mobile layout', () => {
@ -114,11 +123,12 @@ describe('<PlayerToolbar />', () => {
// Each button should be in its own list item // Each button should be in its own list item
const listItems = screen.getAllByRole('listitem') const listItems = screen.getAllByRole('listitem')
expect(listItems).toHaveLength(2) expect(listItems).toHaveLength(3)
// Verify both buttons are rendered // Verify both buttons are rendered
expect(screen.getByTestId('save-queue-button')).toBeInTheDocument() expect(screen.getByTestId('save-queue-button')).toBeInTheDocument()
expect(screen.getByTestId('love-button')).toBeInTheDocument() expect(screen.getByTestId('love-button')).toBeInTheDocument()
expect(screen.getByTestId('toggle-lyrics-button')).toBeInTheDocument()
// Verify mobile classes are applied // Verify mobile classes are applied
expect(listItems[0].className).toContain('mobileListItem') expect(listItems[0].className).toContain('mobileListItem')
@ -140,6 +150,13 @@ describe('<PlayerToolbar />', () => {
const loveButton = screen.getByTestId('love-button') const loveButton = screen.getByTestId('love-button')
expect(loveButton).toBeDisabled() expect(loveButton).toBeDisabled()
}) })
it('disables lyrics button when lyrics are unavailable', () => {
render(<PlayerToolbar id="song-1" lyricsDisabled={true} />)
const lyricsButton = screen.getByTestId('toggle-lyrics-button')
expect(lyricsButton).toBeDisabled()
})
}) })
describe('Common behavior', () => { describe('Common behavior', () => {

View File

@ -0,0 +1,725 @@
const normalizeLanguageTag = (language) =>
(language || '').toLowerCase().replace('_', '-')
// Roughly one 60fps frame; keeps line/token switching stable near tight boundaries.
const KARAOKE_SWITCH_EPSILON_MS = 50
const LYRIC_KIND_MAIN = 'main'
const LYRIC_KIND_TRANSLATION = 'translation'
const LYRIC_KIND_PRONUNCIATION = 'pronunciation'
const padTime = (value) => {
const str = value.toString()
return str.length === 1 ? `0${str}` : str
}
const toTime = (value) => {
if (value == null || value === '') {
return null
}
const numeric = Number(value)
return Number.isFinite(numeric) ? numeric : null
}
const toByteOffset = (value) => {
if (value == null || value === '') {
return null
}
const numeric = Number(value)
if (!Number.isInteger(numeric) || numeric < 0) {
return null
}
return numeric
}
const compareNullableTime = (a, b) => {
if (a == null && b == null) {
return 0
}
if (a == null) {
return 1
}
if (b == null) {
return -1
}
return a - b
}
const sortTokensByStart = (tokens) =>
tokens
.map((token, order) => ({ ...token, order }))
.sort((a, b) => {
const byStart = compareNullableTime(a.start, b.start)
if (byStart !== 0) {
return byStart
}
const byEnd = compareNullableTime(a.end, b.end)
if (byEnd !== 0) {
return byEnd
}
return a.order - b.order
})
.map(({ order, ...token }) => token)
const languageMatch = (candidate, preferred) => {
if (!candidate || !preferred) {
return false
}
return (
candidate === preferred ||
candidate.startsWith(`${preferred}-`) ||
preferred.startsWith(`${candidate}-`)
)
}
const hasTimedLines = (lyric) =>
lyric &&
lyric.synced &&
Array.isArray(lyric.line) &&
lyric.line.some((line) => Number.isFinite(Number(line.start)))
const preferTimedLyrics = (lyrics) => {
const timed = lyrics.filter(hasTimedLines)
return timed.length > 0 ? timed : lyrics
}
const normalizeToken = (token) => {
if (!token) {
return null
}
const value = typeof token.value === 'string' ? token.value : ''
if (value.length === 0) {
return null
}
const byteStart = toByteOffset(token.byteStart)
const byteEnd = toByteOffset(token.byteEnd)
return {
start: toTime(token.start),
end: toTime(token.end),
value,
...(byteStart != null ? { byteStart } : {}),
...(byteEnd != null ? { byteEnd } : {}),
}
}
const utf8BytesForCodePoint = (codePoint) => {
if (codePoint <= 0x7f) {
return 1
}
if (codePoint <= 0x7ff) {
return 2
}
if (codePoint <= 0xffff) {
return 3
}
return 4
}
export const utf8ByteOffsetToCodeUnitIndex = (text, targetByteOffset) => {
if (typeof text !== 'string' || text.length === 0) {
return 0
}
const target = toByteOffset(targetByteOffset)
if (target == null || target <= 0) {
return 0
}
let byteOffset = 0
let index = 0
while (index < text.length) {
if (byteOffset >= target) {
return index
}
const codePoint = text.codePointAt(index)
byteOffset += utf8BytesForCodePoint(codePoint)
index += codePoint > 0xffff ? 2 : 1
}
return text.length
}
export const utf8ByteRangeToCodeUnitRange = (text, byteStart, byteEnd) => {
if (typeof text !== 'string') {
return null
}
const start = toByteOffset(byteStart)
const end = toByteOffset(byteEnd)
if (start == null || end == null || end < start) {
return null
}
const startIndex = utf8ByteOffsetToCodeUnitIndex(text, start)
const endIndex = utf8ByteOffsetToCodeUnitIndex(text, end + 1)
if (
startIndex >= endIndex ||
startIndex > text.length ||
endIndex > text.length
) {
return null
}
return {
start: startIndex,
end: endIndex,
text: text.slice(startIndex, endIndex),
}
}
const buildAgentLookup = (structuredLyric) => {
const lookup = new Map()
const agents = Array.isArray(structuredLyric?.agents)
? structuredLyric.agents
: []
for (const agent of agents) {
const id = typeof agent?.id === 'string' ? agent.id : ''
if (!id || lookup.has(id)) {
continue
}
lookup.set(id, {
id,
role: typeof agent?.role === 'string' ? agent.role : '',
name: typeof agent?.name === 'string' ? agent.name : '',
})
}
return lookup
}
const deriveUiRole = (agent) => {
if (!agent?.role || agent.role === 'main') {
return ''
}
return agent.role
}
const normalizeCueLine = (cueLine, fallbackIndex, agentLookup) => {
const index = Number.isFinite(Number(cueLine?.index))
? Number(cueLine.index)
: fallbackIndex
const agentId = typeof cueLine?.agentId === 'string' ? cueLine.agentId : ''
const agent = agentId ? agentLookup.get(agentId) || null : null
const fallbackRole = typeof cueLine?.role === 'string' ? cueLine.role : ''
const tokens = sortTokensByStart(
Array.isArray(cueLine?.cue)
? cueLine.cue.map(normalizeToken).filter(Boolean)
: [],
)
return {
index,
start: toTime(cueLine?.start),
end: toTime(cueLine?.end),
value: typeof cueLine?.value === 'string' ? cueLine.value : '',
role: agent ? deriveUiRole(agent) : fallbackRole,
agentId,
agentRole: agent?.role || fallbackRole,
agentName: agent?.name || '',
tokens,
}
}
const normalizeLyricKind = (kind) => {
const normalized = (kind || '').toLowerCase().trim()
switch (normalized) {
case LYRIC_KIND_TRANSLATION:
return LYRIC_KIND_TRANSLATION
case LYRIC_KIND_PRONUNCIATION:
return LYRIC_KIND_PRONUNCIATION
default:
return LYRIC_KIND_MAIN
}
}
const pickLyricByLanguage = (lyrics, preferredLanguage) => {
if (!Array.isArray(lyrics) || lyrics.length === 0) {
return null
}
const preferred = normalizeLanguageTag(preferredLanguage)
const preferredBase = preferred.split('-')[0]
return (
lyrics.find((lyric) =>
languageMatch(normalizeLanguageTag(lyric.lang), preferred),
) ||
lyrics.find((lyric) =>
languageMatch(normalizeLanguageTag(lyric.lang), preferredBase),
) ||
lyrics.find((lyric) =>
languageMatch(normalizeLanguageTag(lyric.lang), 'en'),
) ||
lyrics[0]
)
}
const lineTimeWindow = (lines, index) => {
const line = lines[index]
if (!line) {
return { start: null, end: null }
}
const start = toTime(line.start)
const end = toTime(line.end) ?? toTime(lines[index + 1]?.start)
return { start, end }
}
export const hasCueTiming = (structuredLyric) =>
Boolean(
structuredLyric &&
Array.isArray(structuredLyric.cueLine) &&
structuredLyric.cueLine.some(
(cueLine) =>
Array.isArray(cueLine?.cue) &&
cueLine.cue.some((cue) => Number.isFinite(Number(cue?.start))),
),
)
export const hasStructuredLyricContent = (structuredLyric) =>
Boolean(
structuredLyric &&
((Array.isArray(structuredLyric.line) &&
structuredLyric.line.some(
(line) => typeof line?.value === 'string' && line.value.trim() !== '',
)) ||
hasCueTiming(structuredLyric)),
)
export const getPreferredLyricLanguage = () => {
if (typeof window !== 'undefined' && window.localStorage) {
const stored = window.localStorage.getItem('locale')
if (stored) {
return stored
}
}
if (typeof navigator !== 'undefined' && navigator.language) {
return navigator.language
}
return 'en'
}
export const selectLyricLayers = (structuredLyrics, preferredLanguage) => {
if (!Array.isArray(structuredLyrics)) {
return {
main: null,
translation: null,
pronunciation: null,
}
}
const available = structuredLyrics.filter(hasStructuredLyricContent)
if (available.length === 0) {
return {
main: null,
translation: null,
pronunciation: null,
}
}
const grouped = {
[LYRIC_KIND_MAIN]: [],
[LYRIC_KIND_TRANSLATION]: [],
[LYRIC_KIND_PRONUNCIATION]: [],
}
for (const lyric of available) {
grouped[normalizeLyricKind(lyric?.kind)].push(lyric)
}
const mainCandidates = grouped[LYRIC_KIND_MAIN].length
? grouped[LYRIC_KIND_MAIN]
: available
return {
main: pickLyricByLanguage(
preferTimedLyrics(mainCandidates),
preferredLanguage,
),
translation: pickLyricByLanguage(
preferTimedLyrics(grouped[LYRIC_KIND_TRANSLATION]),
preferredLanguage,
),
pronunciation: pickLyricByLanguage(
preferTimedLyrics(grouped[LYRIC_KIND_PRONUNCIATION]),
preferredLanguage,
),
}
}
export const pickStructuredLyric = (structuredLyrics, preferredLanguage) =>
selectLyricLayers(structuredLyrics, preferredLanguage).main
export const structuredLyricToLrc = (structuredLyric) => {
if (!structuredLyric || !Array.isArray(structuredLyric.line)) {
return ''
}
let lyricText = ''
for (const line of structuredLyric.line) {
const start = Number(line.start)
if (!Number.isFinite(start) || start < 0) {
continue
}
let time = Math.floor(start / 10)
const ms = time % 100
time = Math.floor(time / 100)
const sec = time % 60
time = Math.floor(time / 60)
const min = time % 60
lyricText += `[${padTime(min)}:${padTime(sec)}.${padTime(ms)}] ${line.value || ''}\n`
}
return lyricText
}
export const structuredLyricsToLrc = (structuredLyrics, preferredLanguage) => {
const selected = pickStructuredLyric(structuredLyrics, preferredLanguage)
if (!selected) {
return ''
}
return structuredLyricToLrc(selected)
}
const buildBaseKaraokeLines = (baseLines) =>
baseLines.map((line, index) => ({
index,
start: toTime(line.start),
end: toTime(line.end),
value: typeof line.value === 'string' ? line.value : '',
tokens: [],
}))
export const buildKaraokeLinesFromCueLines = (
rawCueLines,
baseLines,
agentLookup,
) => {
const normalizedCueLines = rawCueLines.map((cueLine, fallbackIndex) => {
const normalized = normalizeCueLine(cueLine, fallbackIndex, agentLookup)
return {
...normalized,
tokens: normalized.tokens.map((token) => ({
...token,
role: normalized.role,
agentId: normalized.agentId,
agentName: normalized.agentName,
agentRole: normalized.agentRole,
})),
}
})
const byIndex = new Map()
for (const cueLine of normalizedCueLines) {
if (!byIndex.has(cueLine.index)) {
byIndex.set(cueLine.index, [])
}
byIndex.get(cueLine.index).push(cueLine)
}
return Array.from(byIndex.entries()).map(([index, group]) => {
const first = group[0]
const baseLine = baseLines[index] || {}
const tokens = sortTokensByStart(group.flatMap((cueLine) => cueLine.tokens))
const fallbackStart =
tokens.find((token) => token.start != null)?.start ?? null
const fallbackEnd =
[...tokens].reverse().find((token) => token.end != null)?.end ?? null
const value =
first.value ||
(typeof baseLine.value === 'string' ? baseLine.value : '') ||
tokens.map((token) => token.value).join('')
return {
index,
start: first.start ?? toTime(baseLine.start) ?? fallbackStart,
end: first.end ?? toTime(baseLine.end) ?? fallbackEnd,
value,
agentId: first.agentId,
agentName: first.agentName,
agentRole: first.agentRole,
tokens,
}
})
}
export const buildKaraokeLines = (structuredLyric) => {
if (!structuredLyric) {
return []
}
const agentLookup = buildAgentLookup(structuredLyric)
const baseLines = Array.isArray(structuredLyric.line)
? structuredLyric.line
: []
const rawCueLines = Array.isArray(structuredLyric.cueLine)
? structuredLyric.cueLine
: []
const lines =
rawCueLines.length > 0
? buildKaraokeLinesFromCueLines(rawCueLines, baseLines, agentLookup)
: buildBaseKaraokeLines(baseLines)
const normalized = lines
.filter((line) => line.value || line.tokens.length > 0)
.sort((a, b) => {
if (a.start == null && b.start == null) {
return a.index - b.index
}
if (a.start == null) {
return 1
}
if (b.start == null) {
return -1
}
if (a.start !== b.start) {
return a.start - b.start
}
return a.index - b.index
})
for (let i = 0; i < normalized.length; i += 1) {
if (normalized[i].end == null) {
const nextStart = normalized[i + 1]?.start
if (nextStart != null) {
normalized[i].end = nextStart
}
}
}
return normalized
}
export const resolveKaraokeTokenWindow = (
line,
tokenIndex,
lineEndFallback = null,
) => {
const tokens = Array.isArray(line?.tokens) ? line.tokens : []
const token = tokens[tokenIndex]
if (!token) {
return { start: null, end: null }
}
const prevToken = tokenIndex > 0 ? tokens[tokenIndex - 1] : null
const nextToken =
tokenIndex + 1 < tokens.length ? tokens[tokenIndex + 1] : null
const lineStart = toTime(line?.start)
const lineEnd = toTime(line?.end) ?? toTime(lineEndFallback)
const tokenCount = tokens.length
const hasLineWindow =
lineStart != null &&
lineEnd != null &&
Number.isFinite(lineStart) &&
Number.isFinite(lineEnd) &&
lineEnd > lineStart
const estimatedStart =
hasLineWindow && tokenCount > 0
? lineStart + ((lineEnd - lineStart) * tokenIndex) / tokenCount
: null
const estimatedEnd =
hasLineWindow && tokenCount > 0
? lineStart + ((lineEnd - lineStart) * (tokenIndex + 1)) / tokenCount
: null
let explicitStartCount = 0
let explicitEndCount = 0
const uniqueStarts = new Set()
const uniqueEnds = new Set()
for (let i = 0; i < tokenCount; i += 1) {
const explicitStart = toTime(tokens[i]?.start)
if (explicitStart != null) {
explicitStartCount += 1
uniqueStarts.add(explicitStart)
}
const explicitEnd = toTime(tokens[i]?.end)
if (explicitEnd != null) {
explicitEndCount += 1
uniqueEnds.add(explicitEnd)
}
}
const collapsedStarts =
explicitStartCount > 1 && uniqueStarts.size <= Math.max(1, tokenCount / 4)
const collapsedEnds =
explicitEndCount > 1 && uniqueEnds.size <= Math.max(1, tokenCount / 4)
const shouldForceEstimated =
hasLineWindow && tokenCount > 1 && (collapsedStarts || collapsedEnds)
if (shouldForceEstimated) {
return {
start: estimatedStart,
end: estimatedEnd,
}
}
const prevEnd = toTime(prevToken?.end) ?? toTime(prevToken?.start)
let start = toTime(token.start)
if (start == null) {
start = prevEnd ?? estimatedStart ?? lineStart
}
let end = toTime(token.end)
if (end == null) {
const nextDirectStart = toTime(nextToken?.start)
const nextEstimatedStart =
hasLineWindow && tokenIndex + 1 < tokenCount
? lineStart + ((lineEnd - lineStart) * (tokenIndex + 1)) / tokenCount
: null
end = nextDirectStart ?? nextEstimatedStart ?? estimatedEnd ?? lineEnd
}
if (
tokenCount === 1 &&
hasLineWindow &&
(start == null || end == null || end <= start + 1)
) {
start = lineStart
end = lineEnd
}
if (start != null && end != null && end < start) {
end = start
}
return { start, end }
}
export const getActiveKaraokeState = (lines, currentTimeMs) => {
if (!Array.isArray(lines) || lines.length === 0) {
return { lineIndex: -1, tokenIndex: -1 }
}
const current = Number.isFinite(Number(currentTimeMs))
? Number(currentTimeMs)
: 0
let lineIndex = 0
for (let i = 0; i < lines.length; i += 1) {
const lineStart = toTime(lines[i]?.start)
if (lineStart == null || lineStart <= current + KARAOKE_SWITCH_EPSILON_MS) {
lineIndex = i
continue
}
break
}
for (let i = lineIndex; i >= 0; i -= 1) {
const lineStart = toTime(lines[i]?.start)
const lineEnd = toTime(lines[i]?.end) ?? toTime(lines[i + 1]?.start)
if (lineStart != null && current + KARAOKE_SWITCH_EPSILON_MS < lineStart) {
continue
}
if (lineEnd == null || current <= lineEnd + KARAOKE_SWITCH_EPSILON_MS) {
lineIndex = i
break
}
}
const activeLine = lines[lineIndex] || null
const tokens = Array.isArray(activeLine?.tokens) ? activeLine.tokens : []
let tokenIndex = -1
for (let i = 0; i < tokens.length; i += 1) {
const { start: tokenStart, end: tokenEnd } = resolveKaraokeTokenWindow(
activeLine,
i,
lines[lineIndex + 1]?.start,
)
if (
tokenStart == null ||
tokenStart <= current + KARAOKE_SWITCH_EPSILON_MS
) {
tokenIndex = i
if (tokenEnd != null && current <= tokenEnd + KARAOKE_SWITCH_EPSILON_MS) {
break
}
continue
}
break
}
return { lineIndex, tokenIndex }
}
export const hasUsableKaraokeTiming = (lines) =>
Array.isArray(lines) &&
lines.some(
(line) =>
toTime(line?.start) != null ||
(Array.isArray(line?.tokens) &&
line.tokens.some(
(token) => toTime(token?.start) != null || toTime(token?.end) != null,
)),
)
export const findLayerLineIndexForMain = (mainLines, layerLines, mainIndex) => {
if (
!Array.isArray(mainLines) ||
!Array.isArray(layerLines) ||
mainLines.length === 0 ||
layerLines.length === 0 ||
mainIndex < 0 ||
mainIndex >= mainLines.length
) {
return -1
}
const { start: mainStart, end: mainEnd } = lineTimeWindow(
mainLines,
mainIndex,
)
if (mainStart == null) {
return -1
}
const mainWindowEnd = mainEnd ?? mainStart
const mainWindowDuration = Math.max(0, mainWindowEnd - mainStart)
const maxDelta = Math.max(550, Math.min(1400, mainWindowDuration + 420))
let bestIdx = -1
let bestScore = Number.POSITIVE_INFINITY
for (let i = 0; i < layerLines.length; i += 1) {
const { start, end } = lineTimeWindow(layerLines, i)
if (start != null && end != null) {
const overlap = Math.min(end, mainEnd ?? end) - Math.max(start, mainStart)
if (overlap >= 0) {
const score = Math.abs(start - mainStart) + Math.abs(i - mainIndex) * 30
if (score < bestScore) {
bestScore = score
bestIdx = i
}
continue
}
}
if (start != null) {
if (Math.abs(start - mainStart) > maxDelta) {
continue
}
const score = Math.abs(start - mainStart) + Math.abs(i - mainIndex) * 45
if (score < bestScore) {
bestScore = score
bestIdx = i
}
}
}
return bestIdx
}
export const resolveLayerLineForMain = (mainLines, layerLines, mainIndex) => {
const index = findLayerLineIndexForMain(mainLines, layerLines, mainIndex)
return {
index,
line: index >= 0 ? layerLines[index] : null,
}
}
export const buildHighlightedMainLine = (line) => line
export const buildHighlightedAuxLine = (_referenceLine, auxiliaryLine) =>
auxiliaryLine ?? null

View File

@ -0,0 +1,786 @@
import {
buildHighlightedAuxLine,
buildHighlightedMainLine,
buildKaraokeLines,
buildKaraokeLinesFromCueLines,
findLayerLineIndexForMain,
getActiveKaraokeState,
getPreferredLyricLanguage,
hasUsableKaraokeTiming,
hasStructuredLyricContent,
pickStructuredLyric,
resolveKaraokeTokenWindow,
resolveLayerLineForMain,
selectLyricLayers,
structuredLyricsToLrc,
structuredLyricToLrc,
utf8ByteOffsetToCodeUnitIndex,
utf8ByteRangeToCodeUnitRange,
} from './lyrics'
describe('lyrics helpers', () => {
beforeEach(() => {
localStorage.clear()
})
it('prefers a lyric track that matches the locale', () => {
const selected = pickStructuredLyric(
[
{
lang: 'eng',
synced: true,
line: [{ start: 1000, value: 'English line' }],
},
{
lang: 'pt-BR',
synced: true,
line: [{ start: 1000, value: 'Linha em portugues' }],
},
],
'pt-BR',
)
expect(selected.lang).toBe('pt-BR')
})
it('falls back to english when preferred locale is not available', () => {
const selected = pickStructuredLyric(
[
{
lang: 'eng',
synced: true,
line: [{ start: 1000, value: 'English line' }],
},
{
lang: 'deu',
synced: true,
line: [{ start: 1000, value: 'Deutsche Zeile' }],
},
],
'pt-BR',
)
expect(selected.lang).toBe('eng')
})
it('falls back to first synced track when english is missing', () => {
const selected = pickStructuredLyric(
[
{
lang: 'jpn',
synced: true,
line: [{ start: 1000, value: 'Nihongo' }],
},
{
lang: 'deu',
synced: true,
line: [{ start: 1000, value: 'Deutsch' }],
},
],
'pt-BR',
)
expect(selected.lang).toBe('jpn')
})
it('selects translation and pronunciation layers by kind', () => {
const layers = selectLyricLayers(
[
{
kind: 'main',
lang: 'ja',
synced: true,
line: [{ start: 1000, value: 'こんにちは' }],
},
{
kind: 'translation',
lang: 'es',
synced: true,
line: [{ start: 1000, value: 'Hola' }],
},
{
kind: 'pronunciation',
lang: 'ja-Latn',
synced: true,
line: [{ start: 1000, value: 'konnichiwa' }],
},
],
'es-MX',
)
expect(layers.main.lang).toBe('ja')
expect(layers.translation.lang).toBe('es')
expect(layers.pronunciation.lang).toBe('ja-Latn')
})
it('treats missing kind as main for backward compatibility', () => {
const layers = selectLyricLayers(
[
{
lang: 'eng',
synced: true,
line: [{ start: 1000, value: 'Main' }],
},
],
'eng',
)
expect(layers.main.lang).toBe('eng')
expect(layers.translation).toBeNull()
expect(layers.pronunciation).toBeNull()
})
it('falls back to unsynced lyric content when no timed track exists', () => {
const layers = selectLyricLayers(
[
{
lang: 'eng',
synced: false,
line: [{ value: 'Plain embedded lyric' }],
},
],
'eng',
)
expect(layers.main).toEqual({
lang: 'eng',
synced: false,
line: [{ value: 'Plain embedded lyric' }],
})
})
it('still prefers timed lyrics when both timed and untimed tracks exist', () => {
const layers = selectLyricLayers(
[
{
lang: 'eng',
synced: false,
line: [{ value: 'Plain lyric' }],
},
{
lang: 'eng',
synced: true,
line: [{ start: 1000, value: 'Timed lyric' }],
},
],
'eng',
)
expect(layers.main).toEqual({
lang: 'eng',
synced: true,
line: [{ start: 1000, value: 'Timed lyric' }],
})
})
it('matches layer line by timing for the active main line', () => {
const mainLines = [
{ index: 0, start: 1000, end: 1800, value: 'Line A', tokens: [] },
{ index: 1, start: 2000, end: 2800, value: 'Line B', tokens: [] },
]
const layerLines = [
{ index: 0, start: 900, end: 1750, value: 'A2', tokens: [] },
{ index: 1, start: 2050, end: 2900, value: 'B2', tokens: [] },
]
expect(findLayerLineIndexForMain(mainLines, layerLines, 1)).toBe(1)
expect(resolveLayerLineForMain(mainLines, layerLines, 0).line.value).toBe(
'A2',
)
})
it('matches metadata layers by nearest timing even when indexes differ', () => {
const mainLines = [
{ index: 0, start: 1000, end: 1800, value: 'Line A', tokens: [] },
{ index: 1, start: 2000, end: 2800, value: 'Line B', tokens: [] },
{ index: 2, start: 3000, end: 3800, value: 'Line C', tokens: [] },
]
const layerLines = [
{ index: 2, start: 3020, end: 3820, value: 'C2', tokens: [] },
{ index: 0, start: 980, end: 1760, value: 'A2', tokens: [] },
{ index: 1, start: 2010, end: 2810, value: 'B2', tokens: [] },
]
expect(findLayerLineIndexForMain(mainLines, layerLines, 1)).toBe(2)
expect(resolveLayerLineForMain(mainLines, layerLines, 2).line.value).toBe(
'C2',
)
})
it('keeps translation lines line-level when they do not have real cue timing', () => {
const mainLine = {
index: 0,
start: 1000,
end: 2200,
value: '불을 질러라',
tokens: [
{ start: 1000, end: 1300, value: '불을 ' },
{ start: 1300, end: 1650, value: '질' },
{ start: 1650, end: 2200, value: '러라' },
],
}
const translationLine = {
index: 0,
start: 1000,
end: 2200,
value: 'Set it on fire',
tokens: [],
}
const highlighted = buildHighlightedAuxLine(mainLine, translationLine, 2600)
expect(highlighted).toBe(translationLine)
expect(highlighted.tokens).toEqual([])
})
it('keeps pronunciation lines line-level when they do not have real cue timing', () => {
const mainLine = {
index: 0,
start: 1000,
end: 2200,
value: 'You もっと強く 素早く 吹き飛ばせ',
tokens: [],
}
const pronunciationLine = {
index: 0,
start: 1000,
end: 2200,
value: 'You motto tsuyoku subayaku fukitobase',
tokens: [],
}
const highlighted = buildHighlightedAuxLine(
mainLine,
pronunciationLine,
2600,
)
expect(highlighted).toBe(pronunciationLine)
expect(highlighted.tokens).toEqual([])
})
it('keeps main lines line-level when they do not have real cue timing', () => {
const line = {
index: 0,
start: 1000,
end: 2200,
value: 'Youもっと強く 素早く 吹き飛ばせ',
tokens: [],
}
const highlighted = buildHighlightedMainLine(line, 2600)
expect(highlighted).toBe(line)
expect(highlighted.tokens).toEqual([])
})
it('keeps auxiliary lines line-level when end time is missing and they lack cues', () => {
const mainLine = {
index: 0,
start: 1000,
end: null,
value: 'Hello there',
tokens: [],
}
const translationLine = {
index: 0,
start: 1000,
end: null,
value: 'Bonjour toi',
tokens: [],
}
const highlighted = buildHighlightedAuxLine(mainLine, translationLine, 2400)
expect(highlighted).toBe(translationLine)
expect(highlighted.tokens).toEqual([])
})
it('keeps main lines line-level when end time is missing and they lack cues', () => {
const line = {
index: 0,
start: 1000,
end: null,
value: 'One more time',
tokens: [],
}
const highlighted = buildHighlightedMainLine(line, 2400)
expect(highlighted).toBe(line)
expect(highlighted.tokens).toEqual([])
})
it('returns no layer match when the nearest line is too far in time', () => {
const mainLines = [
{ index: 0, start: 1000, end: 1800, value: 'Line A', tokens: [] },
{ index: 1, start: 2000, end: 2800, value: 'Line B', tokens: [] },
]
const layerLines = [
{ index: 0, start: 60000, end: 60800, value: 'Far line', tokens: [] },
]
expect(findLayerLineIndexForMain(mainLines, layerLines, 1)).toBe(-1)
expect(resolveLayerLineForMain(mainLines, layerLines, 1).line).toBeNull()
})
it('converts a structured lyric track to LRC', () => {
const lrc = structuredLyricToLrc({
lang: 'eng',
synced: true,
line: [
{ start: 18800, value: "We're no strangers to love" },
{ start: 22801, value: 'You know the rules and so do I' },
],
})
expect(lrc).toBe(
"[00:18.80] We're no strangers to love\n[00:22.80] You know the rules and so do I\n",
)
})
it('returns empty text when no synced lyrics are available', () => {
const lrc = structuredLyricsToLrc(
[{ lang: 'eng', synced: false, line: [{ value: 'Unsynced line' }] }],
'eng',
)
expect(lrc).toBe('')
})
it('reads preferred language from localStorage first', () => {
localStorage.setItem('locale', 'pt-BR')
expect(getPreferredLyricLanguage()).toBe('pt-BR')
})
it('builds karaoke lines from agent-based cueLine payload', () => {
const lines = buildKaraokeLines({
lang: 'eng',
synced: true,
line: [{ start: 1000, end: 3000, value: 'Hello world' }],
agents: [
{ id: 'lead', role: 'main', name: 'Lead Vocal' },
{ id: 'backing', role: 'bg' },
],
cueLine: [
{
index: 0,
start: 1000,
end: 3000,
value: 'Hello world',
agentId: 'lead',
cue: [{ start: 1000, end: 1500, value: 'Hello' }],
},
{
index: 0,
start: 1000,
end: 3000,
value: 'Hello world',
agentId: 'backing',
cue: [{ start: 2000, end: 2500, value: 'world' }],
},
],
})
expect(lines).toEqual([
{
agentId: 'lead',
agentName: 'Lead Vocal',
agentRole: 'main',
index: 0,
start: 1000,
end: 3000,
value: 'Hello world',
tokens: [
{
start: 1000,
end: 1500,
value: 'Hello',
role: '',
agentId: 'lead',
agentName: 'Lead Vocal',
agentRole: 'main',
},
{
start: 2000,
end: 2500,
value: 'world',
role: 'bg',
agentId: 'backing',
agentName: '',
agentRole: 'bg',
},
],
},
])
})
it('builds grouped karaoke lines directly from cue lines', () => {
const agentLookup = new Map([
['lead', { id: 'lead', role: 'main', name: 'Lead Vocal' }],
['backing', { id: 'backing', role: 'bg', name: '' }],
])
const lines = buildKaraokeLinesFromCueLines(
[
{
index: 0,
start: 1000,
end: 3000,
value: 'Hello world',
agentId: 'lead',
cue: [{ start: 1000, end: 1500, value: 'Hello' }],
},
{
index: 0,
start: 1000,
end: 3000,
value: 'Hello world',
agentId: 'backing',
cue: [{ start: 2000, end: 2500, value: 'world' }],
},
],
[{ start: 1000, end: 3000, value: 'Hello world' }],
agentLookup,
)
expect(lines).toEqual([
{
agentId: 'lead',
agentName: 'Lead Vocal',
agentRole: 'main',
index: 0,
start: 1000,
end: 3000,
value: 'Hello world',
tokens: [
{
start: 1000,
end: 1500,
value: 'Hello',
role: '',
agentId: 'lead',
agentName: 'Lead Vocal',
agentRole: 'main',
},
{
start: 2000,
end: 2500,
value: 'world',
role: 'bg',
agentId: 'backing',
agentName: '',
agentRole: 'bg',
},
],
},
])
})
it('preserves cue byte offsets on karaoke tokens', () => {
const lines = buildKaraokeLines({
lang: 'eng',
synced: true,
line: [{ start: 0, end: 2400, value: 'Oh love love me tonight' }],
cueLine: [
{
index: 0,
start: 0,
end: 2400,
value: 'Oh love love me tonight',
cue: [
{ start: 0, end: 300, value: 'Oh', byteStart: 0, byteEnd: 1 },
{ start: 900, end: 1300, value: 'love', byteStart: 8, byteEnd: 11 },
{ start: 1300, end: 1600, value: 'me', byteStart: 13, byteEnd: 14 },
{
start: 1600,
end: 2400,
value: 'tonight',
byteStart: 16,
byteEnd: 22,
},
],
},
],
})
expect(
lines[0].tokens.map((token) => [
token.value,
token.byteStart,
token.byteEnd,
]),
).toEqual([
['Oh', 0, 1],
['love', 8, 11],
['me', 13, 14],
['tonight', 16, 22],
])
})
it('preserves whitespace-only cues for exact byte-range rendering', () => {
const lines = buildKaraokeLines({
lang: 'kor',
synced: true,
line: [{ start: 0, end: 900, value: '눈을 뜬 순간' }],
cueLine: [
{
index: 0,
start: 0,
end: 900,
value: '눈을 뜬 순간',
cue: [
{ start: 0, end: 150, value: '눈을', byteStart: 0, byteEnd: 5 },
{ start: 150, end: 250, value: ' ', byteStart: 6, byteEnd: 6 },
{ start: 250, end: 450, value: '뜬', byteStart: 7, byteEnd: 9 },
{ start: 450, end: 550, value: ' ', byteStart: 10, byteEnd: 10 },
{ start: 550, end: 900, value: '순간', byteStart: 11, byteEnd: 16 },
],
},
],
})
expect(
lines[0].tokens.map((token) => [
token.value,
token.byteStart,
token.byteEnd,
]),
).toEqual([
['눈을', 0, 5],
[' ', 6, 6],
['뜬', 7, 9],
[' ', 10, 10],
['순간', 11, 16],
])
})
it('maps UTF-8 byte offsets to string ranges for multibyte lyrics', () => {
const text = '눈을 뜬 순간'
expect(utf8ByteOffsetToCodeUnitIndex(text, 0)).toBe(0)
expect(utf8ByteOffsetToCodeUnitIndex(text, 3)).toBe(1)
expect(utf8ByteOffsetToCodeUnitIndex(text, 7)).toBe(3)
expect(utf8ByteRangeToCodeUnitRange(text, 11, 16)).toEqual({
start: 5,
end: 7,
text: '순간',
})
})
it('falls back to legacy cueLine role values when agents are absent', () => {
const lines = buildKaraokeLines({
lang: 'eng',
synced: true,
line: [{ start: 1000, end: 3000, value: 'Hello world' }],
cueLine: [
{
index: 0,
start: 1000,
end: 3000,
value: 'Hello world',
role: 'bg',
cue: [{ start: 1000, end: 1500, value: 'Hello' }],
},
],
})
expect(lines[0].tokens[0].role).toBe('bg')
expect(lines[0].tokens[0].agentId).toBe('')
expect(lines[0].tokens[0].agentName).toBe('')
})
it('sorts token timing by start to keep playback stable', () => {
const lines = buildKaraokeLines({
lang: 'eng',
synced: true,
line: [{ start: 1000, end: 3000, value: 'Hello world' }],
cueLine: [
{
index: 0,
start: 1000,
end: 3000,
value: 'Hello world',
role: '',
cue: [
{ start: 2000, end: 2500, value: 'world' },
{ start: 1000, end: 1500, value: 'Hello' },
],
},
],
})
expect(lines[0].tokens.map((token) => token.value)).toEqual([
'Hello',
'world',
])
})
it('keeps a single full-line token unchanged instead of expanding it synthetically', () => {
const lines = buildKaraokeLines({
lang: 'ko-Latn',
synced: true,
line: [{ start: 1000, end: 2000, value: 'Da-la-lun, dun' }],
cueLine: [
{
index: 0,
start: 1000,
end: 2000,
value: 'Da-la-lun, dun',
role: '',
cue: [{ start: 1000, end: 2000, value: 'Da-la-lun, dun' }],
},
],
})
expect(lines).toHaveLength(1)
expect(lines[0].tokens).toHaveLength(1)
expect(lines[0].tokens[0].value).toBe('Da-la-lun, dun')
const firstWindow = resolveKaraokeTokenWindow(lines[0], 0)
expect(firstWindow.start).toBeCloseTo(1000)
expect(firstWindow.end).toBeCloseTo(2000)
})
it('detects active line and token for karaoke timing', () => {
const state = getActiveKaraokeState(
[
{
index: 0,
start: 1000,
end: 3000,
value: 'Hello world',
tokens: [
{ start: 1000, end: 1500, value: 'Hello', role: '' },
{ start: 2000, end: 2500, value: 'world', role: '' },
],
},
{
index: 1,
start: 3500,
end: 5000,
value: 'Second line',
tokens: [],
},
],
2200,
)
expect(state).toEqual({ lineIndex: 0, tokenIndex: 1 })
})
it('resolves token window fallback boundaries from neighboring tokens', () => {
const line = {
start: 1000,
end: 3000,
value: 'Hello world',
tokens: [
{ start: 1200, value: 'Hello', role: '' },
{ start: 1800, value: 'world', role: '' },
],
}
expect(resolveKaraokeTokenWindow(line, 0)).toEqual({
start: 1200,
end: 1800,
})
expect(resolveKaraokeTokenWindow(line, 1)).toEqual({
start: 1800,
end: 3000,
})
})
it('infers sequential token windows when token timings are missing', () => {
const line = {
start: 1000,
end: 2000,
value: 'A B C',
tokens: [
{ value: 'A', role: '' },
{ value: 'B', role: '' },
{ value: 'C', role: '' },
],
}
const first = resolveKaraokeTokenWindow(line, 0)
const second = resolveKaraokeTokenWindow(line, 1)
const third = resolveKaraokeTokenWindow(line, 2)
expect(first.start).toBeCloseTo(1000)
expect(first.end).toBeCloseTo(1333.3333333333333)
expect(second.start).toBeCloseTo(1333.3333333333333)
expect(second.end).toBeCloseTo(1666.6666666666667)
expect(third.start).toBeCloseTo(1666.6666666666667)
expect(third.end).toBeCloseTo(2000)
})
it('falls back to sequential windows when token timings are collapsed', () => {
const line = {
start: 1000,
end: 2000,
value: 'A B C',
tokens: [
{ start: 1000, end: 2000, value: 'A', role: '' },
{ start: 1000, end: 2000, value: 'B', role: '' },
{ start: 1000, end: 2000, value: 'C', role: '' },
],
}
const first = resolveKaraokeTokenWindow(line, 0)
const second = resolveKaraokeTokenWindow(line, 1)
const third = resolveKaraokeTokenWindow(line, 2)
expect(first.start).toBeCloseTo(1000)
expect(first.end).toBeCloseTo(1333.3333333333333)
expect(second.start).toBeCloseTo(1333.3333333333333)
expect(second.end).toBeCloseTo(1666.6666666666667)
expect(third.start).toBeCloseTo(1666.6666666666667)
expect(third.end).toBeCloseTo(2000)
})
it('keeps token selection stable near tight token boundaries', () => {
const state = getActiveKaraokeState(
[
{
index: 0,
start: 1000,
end: 2000,
value: 'A B',
tokens: [
{ start: 1000, end: 1100, value: 'A', role: '' },
{ start: 1110, end: 1300, value: 'B', role: '' },
],
},
],
1108,
)
expect(state).toEqual({ lineIndex: 0, tokenIndex: 0 })
})
it('reports structured lyric content when token timing exists', () => {
expect(
hasStructuredLyricContent({
cueLine: [{ cue: [{ start: 100, value: 'a' }] }],
}),
).toBe(true)
})
it('detects when built karaoke lines have no usable timing', () => {
expect(
hasUsableKaraokeTiming([
{ index: 0, value: 'First line', tokens: [] },
{ index: 1, value: 'Second line', tokens: [] },
]),
).toBe(false)
expect(
hasUsableKaraokeTiming([
{ index: 0, start: 1000, value: 'Timed line', tokens: [] },
]),
).toBe(true)
})
})

View File

@ -0,0 +1,27 @@
export const resolveLyricsOverlayState = ({
karaokeVisiblePreference,
translationPreference,
pronunciationPreference,
hasKaraokeLyric,
hasTranslationLyric,
hasPronunciationLyric,
}) => ({
karaokeVisible: karaokeVisiblePreference && hasKaraokeLyric,
showTranslation: translationPreference && hasTranslationLyric,
showPronunciation:
(pronunciationPreference == null
? hasPronunciationLyric
: pronunciationPreference) && hasPronunciationLyric,
})
export const togglePronunciationPreference = (
previousPreference,
hasPronunciationLyric,
) => {
if (!hasPronunciationLyric) {
return false
}
const currentPreference =
previousPreference == null ? hasPronunciationLyric : previousPreference
return !currentPreference
}

View File

@ -62,12 +62,30 @@ const useStyle = makeStyles(
// Fix cover display when image is not square // Fix cover display when image is not square
aspectRatio: '1/1', aspectRatio: '1/1',
display: 'flex', display: 'flex',
position: 'relative',
},
'& .react-jinke-music-player-mobile .react-jinke-music-player-mobile-cover.nd-mobile-lyrics-active':
{
width: '100%',
maxWidth: 'none',
height: 'clamp(280px, 42vh, 460px)',
aspectRatio: 'auto',
borderRadius: 12,
border: 'none',
boxShadow: 'none',
background: 'transparent',
cursor: 'default',
}, },
'& .react-jinke-music-player-mobile .react-jinke-music-player-mobile-cover img.cover': '& .react-jinke-music-player-mobile .react-jinke-music-player-mobile-cover img.cover':
{ {
animationDuration: (props) => !props.enableCoverAnimation && '0s', animationDuration: (props) => !props.enableCoverAnimation && '0s',
objectFit: 'contain', // Fix cover display when image is not square objectFit: 'contain', // Fix cover display when image is not square
}, },
'& .react-jinke-music-player-mobile .react-jinke-music-player-mobile-cover.nd-mobile-lyrics-active img.cover':
{
opacity: 0,
pointerEvents: 'none',
},
// Hide old singer display // Hide old singer display
'& .react-jinke-music-player-mobile .react-jinke-music-player-mobile-singer': '& .react-jinke-music-player-mobile .react-jinke-music-player-mobile-singer':
{ {

View File

@ -7,6 +7,7 @@ import {
PLAYER_CURRENT, PLAYER_CURRENT,
PLAYER_PLAY_NEXT, PLAYER_PLAY_NEXT,
PLAYER_PLAY_TRACKS, PLAYER_PLAY_TRACKS,
PLAYER_UPDATE_LYRIC,
PLAYER_SET_TRACK, PLAYER_SET_TRACK,
PLAYER_SET_VOLUME, PLAYER_SET_VOLUME,
PLAYER_SYNC_QUEUE, PLAYER_SYNC_QUEUE,
@ -60,6 +61,7 @@ const mapToAudioLists = (item) => {
let lyricText = '' let lyricText = ''
if (lyrics) { if (lyrics) {
try {
const structured = JSON.parse(lyrics) const structured = JSON.parse(lyrics)
for (const structuredLyric of structured) { for (const structuredLyric of structured) {
if (structuredLyric.synced) { if (structuredLyric.synced) {
@ -76,6 +78,9 @@ const mapToAudioLists = (item) => {
} }
} }
} }
} catch {
lyricText = ''
}
} }
return { return {
@ -208,6 +213,45 @@ const reduceMode = (state, { data: { mode } }) => {
} }
} }
const reduceUpdateLyric = (state, { data: { trackId, lyric } }) => {
if (!trackId) {
return state
}
let changed = false
const queue = state.queue.map((item) => {
if (item.trackId !== trackId) {
return item
}
if (item.lyric === lyric) {
return item
}
changed = true
return {
...item,
lyric,
}
})
if (!changed) {
return state
}
const current =
state.current?.trackId === trackId
? {
...state.current,
lyric,
}
: state.current
return {
...state,
queue,
current,
}
}
export const playerReducer = (previousState = initialState, payload) => { export const playerReducer = (previousState = initialState, payload) => {
const { type } = payload const { type } = payload
switch (type) { switch (type) {
@ -245,6 +289,8 @@ export const playerReducer = (previousState = initialState, payload) => {
previousState.savedPlayIndex >= 0 ? previousState.savedPlayIndex : 0, previousState.savedPlayIndex >= 0 ? previousState.savedPlayIndex : 0,
} }
} }
case PLAYER_UPDATE_LYRIC:
return reduceUpdateLyric(previousState, payload)
default: default:
return previousState return previousState
} }

View File

@ -1,11 +1,24 @@
import { describe, it, expect } from 'vitest' import { describe, expect, it, vi } from 'vitest'
import { playerReducer } from './playerReducer' import { playerReducer } from './playerReducer'
import { import {
PLAYER_SYNC_QUEUE,
PLAYER_CURRENT, PLAYER_CURRENT,
PLAYER_REFRESH_QUEUE, PLAYER_REFRESH_QUEUE,
PLAYER_SET_TRACK,
PLAYER_SYNC_QUEUE,
PLAYER_UPDATE_LYRIC,
} from '../actions' } from '../actions'
vi.mock('uuid', () => ({
v4: () => 'test-uuid',
}))
vi.mock('../subsonic', () => ({
default: {
streamUrl: vi.fn((id) => `/rest/stream?id=${id}`),
getCoverArtUrl: vi.fn(() => '/rest/getCoverArt?id=test'),
},
}))
describe('playerReducer', () => { describe('playerReducer', () => {
describe('pending track selection survives SYNC_QUEUE and premature CURRENT', () => { describe('pending track selection survives SYNC_QUEUE and premature CURRENT', () => {
// Simulates the real sequence when clicking a new song while one is playing: // Simulates the real sequence when clicking a new song while one is playing:
@ -54,8 +67,6 @@ describe('playerReducer', () => {
}) })
it('CURRENT for old track preserves pending playIndex', () => { it('CURRENT for old track preserves pending playIndex', () => {
// After SYNC_QUEUE, queue has new UUIDs. The old track's UUID (zzz)
// is at index 2, but playIndex is 0. This is a premature callback.
const stateAfterSync = { const stateAfterSync = {
...stateAfterPlayTracks, ...stateAfterPlayTracks,
queue: [ queue: [
@ -71,7 +82,7 @@ describe('playerReducer', () => {
const result = playerReducer(stateAfterSync, action) const result = playerReducer(stateAfterSync, action)
expect(result.playIndex).toBe(0) expect(result.playIndex).toBe(0)
expect(result.clear).toBe(true) expect(result.clear).toBe(true)
expect(result.savedPlayIndex).toBe(2) // preserved from before expect(result.savedPlayIndex).toBe(2)
}) })
it('CURRENT for correct track consumes pending playIndex', () => { it('CURRENT for correct track consumes pending playIndex', () => {
@ -83,7 +94,6 @@ describe('playerReducer', () => {
{ trackId: 's3', uuid: 'zzz', name: 'Song 3' }, { trackId: 's3', uuid: 'zzz', name: 'Song 3' },
], ],
} }
// Player switched to Song 1 (uuid 'xxx', index 0 == playIndex)
const action = { const action = {
type: PLAYER_CURRENT, type: PLAYER_CURRENT,
data: { uuid: 'xxx', name: 'Song 1', volume: 1 }, data: { uuid: 'xxx', name: 'Song 1', volume: 1 },
@ -224,4 +234,80 @@ describe('playerReducer', () => {
expect(result.playIndex).toBe(0) expect(result.playIndex).toBe(0)
}) })
}) })
it('maps embedded synced lyrics to LRC text', () => {
const lyrics = JSON.stringify([
{
lang: 'eng',
synced: true,
line: [{ start: 1000, value: 'Line one' }],
},
{
lang: 'eng',
synced: false,
line: [{ value: 'Unsynced line' }],
},
])
const state = playerReducer(undefined, {
type: PLAYER_SET_TRACK,
data: {
id: 'song-1',
title: 'Test Song',
artist: 'Test Artist',
album: 'Test Album',
duration: 60,
lyrics,
},
})
expect(state.queue).toHaveLength(1)
expect(state.queue[0].lyric).toBe('[00:01.00] Line one\n')
})
it('updates queue lyric by track id', () => {
const initial = playerReducer(undefined, {
type: PLAYER_SET_TRACK,
data: {
id: 'song-1',
title: 'Test Song',
artist: 'Test Artist',
album: 'Test Album',
duration: 60,
},
})
const updated = playerReducer(initial, {
type: PLAYER_UPDATE_LYRIC,
data: {
trackId: 'song-1',
lyric: '[00:01.00] Updated lyric\n',
},
})
expect(updated.queue[0].lyric).toBe('[00:01.00] Updated lyric\n')
})
it('returns same state when lyric update does not match any track', () => {
const initial = playerReducer(undefined, {
type: PLAYER_SET_TRACK,
data: {
id: 'song-1',
title: 'Test Song',
artist: 'Test Artist',
album: 'Test Album',
duration: 60,
},
})
const updated = playerReducer(initial, {
type: PLAYER_UPDATE_LYRIC,
data: {
trackId: 'missing-track',
lyric: '[00:01.00] Updated lyric\n',
},
})
expect(updated).toBe(initial)
})
}) })

View File

@ -1,5 +1,5 @@
import { baseUrl } from '../utils'
import { httpClient } from '../dataProvider' import { httpClient } from '../dataProvider'
import { baseUrl } from '../utils'
const url = (command, id, options) => { const url = (command, id, options) => {
const username = localStorage.getItem('username') const username = localStorage.getItem('username')
@ -120,6 +120,10 @@ const getTopSongs = (artist, count = 50) => {
return httpClient(url('getTopSongs', null, { artist, count })) return httpClient(url('getTopSongs', null, { artist, count }))
} }
const getLyricsBySongId = (id) => {
return httpClient(url('getLyricsBySongId', id, { enhanced: true }))
}
const streamUrl = (id, options) => { const streamUrl = (id, options) => {
return baseUrl( return baseUrl(
url('stream', id, { url('stream', id, {
@ -149,4 +153,5 @@ export default {
getArtistInfo, getArtistInfo,
getTopSongs, getTopSongs,
getSimilarSongs2, getSimilarSongs2,
getLyricsBySongId,
} }

View File

@ -1,7 +1,13 @@
import { vi } from 'vitest' import { vi } from 'vitest'
import config from '../config' import { httpClient } from '../dataProvider'
import subsonic from './index' import subsonic from './index'
vi.mock('../dataProvider', () => ({
httpClient: vi.fn(() => Promise.resolve({})),
}))
const COVER_ART_SIZE = 600
describe('getCoverArtUrl', () => { describe('getCoverArtUrl', () => {
beforeEach(() => { beforeEach(() => {
// Mock window.location // Mock window.location
@ -31,11 +37,7 @@ describe('getCoverArtUrl', () => {
updatedAt: '2023-01-01T00:00:00Z', updatedAt: '2023-01-01T00:00:00Z',
} }
const url = subsonic.getCoverArtUrl( const url = subsonic.getCoverArtUrl(playlistRecord, COVER_ART_SIZE, true)
playlistRecord,
config.uiCoverArtSize,
true,
)
expect(url).toContain('pl-playlist-123') expect(url).toContain('pl-playlist-123')
expect(url).toContain('size=600') expect(url).toContain('size=600')
@ -49,11 +51,7 @@ describe('getCoverArtUrl', () => {
sync: true, sync: true,
} }
const url = subsonic.getCoverArtUrl( const url = subsonic.getCoverArtUrl(playlistRecord, COVER_ART_SIZE, true)
playlistRecord,
config.uiCoverArtSize,
true,
)
expect(url).toContain('pl-playlist-123') expect(url).toContain('pl-playlist-123')
expect(url).toContain('size=600') expect(url).toContain('size=600')
@ -68,11 +66,7 @@ describe('getCoverArtUrl', () => {
updatedAt: '2023-01-01T00:00:00Z', updatedAt: '2023-01-01T00:00:00Z',
} }
const url = subsonic.getCoverArtUrl( const url = subsonic.getCoverArtUrl(albumRecord, COVER_ART_SIZE, true)
albumRecord,
config.uiCoverArtSize,
true,
)
expect(url).toContain('al-album-123') expect(url).toContain('al-album-123')
expect(url).toContain('size=600') expect(url).toContain('size=600')
@ -86,7 +80,7 @@ describe('getCoverArtUrl', () => {
updatedAt: '2023-01-01T00:00:00Z', updatedAt: '2023-01-01T00:00:00Z',
} }
const url = subsonic.getCoverArtUrl(songRecord, config.uiCoverArtSize, true) const url = subsonic.getCoverArtUrl(songRecord, COVER_ART_SIZE, true)
expect(url).toContain('mf-song-123') expect(url).toContain('mf-song-123')
expect(url).toContain('size=600') expect(url).toContain('size=600')
@ -99,11 +93,7 @@ describe('getCoverArtUrl', () => {
updatedAt: '2023-01-01T00:00:00Z', updatedAt: '2023-01-01T00:00:00Z',
} }
const url = subsonic.getCoverArtUrl( const url = subsonic.getCoverArtUrl(artistRecord, COVER_ART_SIZE, true)
artistRecord,
config.uiCoverArtSize,
true,
)
expect(url).toContain('ar-artist-123') expect(url).toContain('ar-artist-123')
expect(url).toContain('size=600') expect(url).toContain('size=600')
@ -194,3 +184,30 @@ describe('getAvatarUrl', () => {
expect(url).toContain('username=john') expect(url).toContain('username=john')
}) })
}) })
describe('getLyricsBySongId', () => {
beforeEach(() => {
vi.clearAllMocks()
const localStorageMock = {
getItem: vi.fn((key) => {
const values = {
username: 'testuser',
'subsonic-token': 'testtoken',
'subsonic-salt': 'testsalt',
}
return values[key] || null
}),
}
Object.defineProperty(window, 'localStorage', { value: localStorageMock })
})
it('calls the getLyricsBySongId endpoint with enhanced=true', async () => {
await subsonic.getLyricsBySongId('song-1')
expect(httpClient).toHaveBeenCalledTimes(1)
const calledUrl = httpClient.mock.calls[0][0]
expect(calledUrl).toContain('/rest/getLyricsBySongId?')
expect(calledUrl).toContain('id=song-1')
expect(calledUrl).toContain('enhanced=true')
})
})