diff --git a/README.md b/README.md index 0ae5bdfaf..645f1580d 100644 --- a/README.md +++ b/README.md @@ -52,6 +52,7 @@ A share of the revenue helps fund the development of Navidrome at no additional - **Multi-platform**, runs on macOS, Linux and Windows. **Docker** images are also provided - Ready to use binaries for all major platforms, including **Raspberry Pi** - Automatically **monitors your library** for changes, importing new files and reloading new metadata + - Supports lyrics from sidecar **.ttml**, **.elrc**, **.lrc**, **.srt**, **.txt** files and embedded tags (via `lyricspriority`) - **Themeable**, modern and responsive **Web interface** based on [Material UI](https://material-ui.com) - **Compatible** with all Subsonic/Madsonic/Airsonic [clients](https://www.navidrome.org/docs/overview/#apps) - **Transcoding** on the fly. Can be set per user/player. **Opus encoding is supported** diff --git a/conf/configuration.go b/conf/configuration.go index 916efe70b..9bd79d781 100644 --- a/conf/configuration.go +++ b/conf/configuration.go @@ -763,7 +763,7 @@ func setViperDefaults() { viper.SetDefault("artistartpriority", "artist.*, album/artist.*, external") viper.SetDefault("artistimagefolder", "") viper.SetDefault("discartpriority", "disc*.*, cd*.*, cover.*, folder.*, front.*, discsubtitle, embedded") - viper.SetDefault("lyricspriority", ".lrc,.txt,embedded") + viper.SetDefault("lyricspriority", ".ttml,.elrc,.lrc,.srt,.txt,embedded") viper.SetDefault("enablegravatar", false) viper.SetDefault("enablefavourites", true) viper.SetDefault("enablestarrating", true) diff --git a/core/lyrics/lyrics.go b/core/lyrics/lyrics.go index 758053042..cc3d574b3 100644 --- a/core/lyrics/lyrics.go +++ b/core/lyrics/lyrics.go @@ -14,6 +14,12 @@ type Lyrics interface { GetLyrics(ctx context.Context, mf *model.MediaFile) (model.LyricList, error) } +// BatchLyrics can resolve lyrics across multiple candidate media files while +// still honoring the configured source priority globally. +type BatchLyrics interface { + GetLyricsForMediaFiles(ctx context.Context, mediaFiles []model.MediaFile) (model.LyricList, error) +} + // PluginLoader discovers and loads lyrics provider plugins. type PluginLoader interface { LoadLyricsProvider(name string) (Lyrics, bool) @@ -32,28 +38,53 @@ func NewLyrics(pluginLoader PluginLoader) Lyrics { // GetLyrics returns lyrics for the given media file, trying sources in the // order specified by conf.Server.LyricsPriority. func (l *lyricsService) GetLyrics(ctx context.Context, mf *model.MediaFile) (model.LyricList, error) { - var lyricsList model.LyricList - var err error + return l.getLyricsForCandidates(ctx, []*model.MediaFile{mf}) +} +// GetLyricsForMediaFiles resolves lyrics across duplicate media files while +// preserving the configured source priority across the full candidate set. +func (l *lyricsService) GetLyricsForMediaFiles(ctx context.Context, mediaFiles []model.MediaFile) (model.LyricList, error) { + candidates := make([]*model.MediaFile, 0, len(mediaFiles)) + for i := range mediaFiles { + candidates = append(candidates, &mediaFiles[i]) + } + return l.getLyricsForCandidates(ctx, candidates) +} + +func (l *lyricsService) getLyricsForCandidates(ctx context.Context, mediaFiles []*model.MediaFile) (model.LyricList, error) { for pattern := range strings.SplitSeq(conf.Server.LyricsPriority, ",") { pattern = strings.TrimSpace(pattern) - switch { - case strings.EqualFold(pattern, "embedded"): - lyricsList, err = fromEmbedded(ctx, mf) - case strings.HasPrefix(pattern, "."): - lyricsList, err = fromExternalFile(ctx, mf, strings.ToLower(pattern)) - default: - lyricsList, err = l.fromPlugin(ctx, mf, pattern) + if pattern == "" { + continue } - if err != nil { - log.Error(ctx, "error getting lyrics", "source", pattern, err) - } + for _, mf := range mediaFiles { + if mf == nil { + continue + } - if len(lyricsList) > 0 { - return lyricsList, nil + lyricsList, err := l.getLyricsFromSource(ctx, mf, pattern) + if err != nil { + log.Error(ctx, "error getting lyrics", "source", pattern, err) + continue + } + + if len(lyricsList) > 0 { + return lyricsList, nil + } } } return nil, nil } + +func (l *lyricsService) getLyricsFromSource(ctx context.Context, mf *model.MediaFile, pattern string) (model.LyricList, error) { + switch { + case strings.EqualFold(pattern, "embedded"): + return fromEmbedded(ctx, mf) + case strings.HasPrefix(pattern, "."): + return fromExternalFile(ctx, mf, strings.ToLower(pattern)) + default: + return l.fromPlugin(ctx, mf, pattern) + } +} diff --git a/core/lyrics/lyrics_test.go b/core/lyrics/lyrics_test.go index 7e837782e..8fbaec6c5 100644 --- a/core/lyrics/lyrics_test.go +++ b/core/lyrics/lyrics_test.go @@ -45,6 +45,71 @@ var _ = Describe("sources", func() { }, } + elrcLyrics := model.LyricList{ + model.Lyrics{ + DisplayArtist: "ELRC Artist", + DisplayTitle: "ELRC Song", + Lang: "eng", + Line: []model.Line{ + { + Start: gg.P(int64(1000)), + End: gg.P(int64(3000)), + Value: "Lead words", + Cue: []model.Cue{ + { + Start: gg.P(int64(1000)), + End: gg.P(int64(1500)), + Value: "Lead ", + ByteStart: 0, + ByteEnd: 4, + }, + { + Start: gg.P(int64(1500)), + End: gg.P(int64(3000)), + Value: "words", + ByteStart: 5, + ByteEnd: 9, + }, + }, + }, + { + Start: gg.P(int64(3000)), + Value: "Fallback line", + }, + }, + Synced: true, + }, + } + + ttmlLyrics := model.LyricList{ + model.Lyrics{ + Kind: "main", + Lang: "eng", + Line: []model.Line{ + { + Start: gg.P(int64(18800)), + Value: "We're no strangers to love", + }, + { + Start: gg.P(int64(22800)), + Value: "You know the rules and so do I", + }, + }, + Synced: true, + }, + model.Lyrics{ + Kind: "main", + Lang: "por", + Line: []model.Line{ + { + Start: gg.P(int64(18800)), + Value: "Nao somos estranhos ao amor", + }, + }, + Synced: true, + }, + } + unsyncedLyrics := model.LyricList{ model.Lyrics{ Lang: "xxx", @@ -60,6 +125,25 @@ var _ = Describe("sources", func() { }, } + srtLyrics := model.LyricList{ + model.Lyrics{ + Lang: "xxx", + Line: []model.Line{ + { + Start: gg.P(int64(18800)), + End: gg.P(int64(22800)), + Value: "We're from subtitles", + }, + { + Start: gg.P(int64(22801)), + End: gg.P(int64(26000)), + Value: "Another subtitle line", + }, + }, + Synced: true, + }, + } + BeforeEach(func() { DeferCleanup(configtest.SetupConfig()) @@ -81,7 +165,33 @@ var _ = Describe("sources", func() { }, Entry("embedded > lrc > txt", "embedded,.lrc,.txt", embeddedLyrics), Entry("lrc > embedded > txt", ".lrc,embedded,.txt", syncedLyrics), - Entry("txt > lrc > embedded", ".txt,.lrc,embedded", unsyncedLyrics)) + Entry("elrc > lrc > embedded", ".elrc,.lrc,embedded", elrcLyrics), + Entry("srt > txt > embedded", ".srt,.txt,embedded", srtLyrics), + Entry("txt > lrc > embedded", ".txt,.lrc,embedded", unsyncedLyrics), + Entry("ttml > elrc > lrc > srt > embedded", ".ttml,.elrc,.lrc,.srt,embedded", ttmlLyrics)) + + It("resolves source priority across duplicate media files", func() { + conf.Server.LyricsPriority = ".ttml,embedded" + embeddedJSON, err := json.Marshal(embeddedLyrics) + Expect(err).To(BeNil()) + + svc := lyrics.NewLyrics(nil) + batchSvc, ok := svc.(lyrics.BatchLyrics) + Expect(ok).To(BeTrue()) + + list, err := batchSvc.GetLyricsForMediaFiles(ctx, []model.MediaFile{ + { + Lyrics: string(embeddedJSON), + Path: "tests/fixtures/01 Invisible (RED) Edit Version.mp3", + }, + { + Lyrics: "[]", + Path: "tests/fixtures/test.mp3", + }, + }) + Expect(err).To(BeNil()) + Expect(list).To(Equal(ttmlLyrics)) + }) Context("Errors", func() { var RegularUserContext = XContext diff --git a/core/lyrics/sources.go b/core/lyrics/sources.go index 82a10ca41..7586c944f 100644 --- a/core/lyrics/sources.go +++ b/core/lyrics/sources.go @@ -5,6 +5,7 @@ import ( "errors" "os" "path" + "strings" "github.com/navidrome/navidrome/log" "github.com/navidrome/navidrome/model" @@ -36,18 +37,38 @@ func fromExternalFile(ctx context.Context, mf *model.MediaFile, suffix string) ( return nil, err } - lyrics, err := model.ToLyrics("xxx", string(contents)) - if err != nil { - log.Error(ctx, "error parsing lyric external file", "path", externalLyric, err) - return nil, err - } else if lyrics == nil { + var list model.LyricList + switch { + case strings.EqualFold(suffix, ".ttml"): + list, err = parseTTML(contents) + if err != nil { + log.Error(ctx, "error parsing ttml external file", "path", externalLyric, err) + return nil, err + } + case strings.EqualFold(suffix, ".srt"): + list, err = parseSRT(contents) + if err != nil { + log.Error(ctx, "error parsing srt external file", "path", externalLyric, err) + return nil, err + } + default: + lyrics, err := model.ToLyrics("xxx", string(contents)) + if err != nil { + log.Error(ctx, "error parsing lyric external file", "path", externalLyric, err) + return nil, err + } + if lyrics != nil { + list = model.LyricList{*lyrics} + } + } + + if len(list) == 0 { log.Trace(ctx, "empty lyrics from external file", "path", externalLyric) return nil, nil } log.Trace(ctx, "retrieved lyrics from external file", "path", externalLyric) - - return model.LyricList{*lyrics}, nil + return list, nil } // fromPlugin attempts to load lyrics from a plugin with the given name. diff --git a/core/lyrics/sources_test.go b/core/lyrics/sources_test.go index b3d502101..1e98323ca 100644 --- a/core/lyrics/sources_test.go +++ b/core/lyrics/sources_test.go @@ -88,6 +88,89 @@ var _ = Describe("sources", func() { })) }) + It("should return Enhanced LRC lyrics with word-level cues from a file", func() { + mf := model.MediaFile{Path: "tests/fixtures/test-enhanced.mp3"} + lyrics, err := fromExternalFile(ctx, &mf, ".lrc") + + Expect(err).To(BeNil()) + Expect(lyrics).To(HaveLen(1)) + Expect(lyrics[0].DisplayArtist).To(Equal("Test Artist")) + Expect(lyrics[0].DisplayTitle).To(Equal("Enhanced Test")) + Expect(lyrics[0].Lang).To(Equal("eng")) + Expect(lyrics[0].Synced).To(BeTrue()) + Expect(lyrics[0].Line).To(HaveLen(3)) + + // Line 1: has inline markers → Cue array populated + Expect(lyrics[0].Line[0].Start).To(Equal(gg.P(int64(1000)))) + Expect(lyrics[0].Line[0].End).To(Equal(gg.P(int64(3000)))) + Expect(lyrics[0].Line[0].Value).To(Equal("Some lyrics here")) + Expect(lyrics[0].Line[0].Cue).To(HaveLen(3)) + Expect(*lyrics[0].Line[0].Cue[0].Start).To(Equal(int64(1000))) + Expect(lyrics[0].Line[0].Cue[0].Value).To(Equal("Some ")) + Expect(lyrics[0].Line[0].Cue[0].End).To(Equal(gg.P(int64(1500)))) + Expect(lyrics[0].Line[0].Cue[0].ByteStart).To(Equal(0)) + Expect(lyrics[0].Line[0].Cue[0].ByteEnd).To(Equal(4)) + Expect(*lyrics[0].Line[0].Cue[1].Start).To(Equal(int64(1500))) + Expect(lyrics[0].Line[0].Cue[1].Value).To(Equal("lyrics ")) + Expect(lyrics[0].Line[0].Cue[1].End).To(Equal(gg.P(int64(2000)))) + Expect(lyrics[0].Line[0].Cue[1].ByteStart).To(Equal(5)) + Expect(lyrics[0].Line[0].Cue[1].ByteEnd).To(Equal(11)) + Expect(*lyrics[0].Line[0].Cue[2].Start).To(Equal(int64(2000))) + Expect(lyrics[0].Line[0].Cue[2].Value).To(Equal("here")) + Expect(lyrics[0].Line[0].Cue[2].End).To(Equal(gg.P(int64(3000)))) + Expect(lyrics[0].Line[0].Cue[2].ByteStart).To(Equal(12)) + Expect(lyrics[0].Line[0].Cue[2].ByteEnd).To(Equal(15)) + + // Line 2: has inline markers + Expect(lyrics[0].Line[1].Start).To(Equal(gg.P(int64(3000)))) + Expect(lyrics[0].Line[1].End).To(Equal(gg.P(int64(5000)))) + Expect(lyrics[0].Line[1].Value).To(Equal("More words")) + Expect(lyrics[0].Line[1].Cue).To(HaveLen(2)) + Expect(lyrics[0].Line[1].Cue[0].End).To(Equal(gg.P(int64(3500)))) + Expect(lyrics[0].Line[1].Cue[1].End).To(Equal(gg.P(int64(5000)))) + Expect(lyrics[0].Line[1].Cue[0].ByteStart).To(Equal(0)) + Expect(lyrics[0].Line[1].Cue[0].ByteEnd).To(Equal(4)) + Expect(lyrics[0].Line[1].Cue[1].ByteStart).To(Equal(5)) + Expect(lyrics[0].Line[1].Cue[1].ByteEnd).To(Equal(9)) + + // Line 3: plain line, no cues + Expect(lyrics[0].Line[2].Start).To(Equal(gg.P(int64(5000)))) + Expect(lyrics[0].Line[2].Value).To(Equal("Plain line without inline markers")) + Expect(lyrics[0].Line[2].Cue).To(BeNil()) + }) + + It("should return Enhanced LRC lyrics from an ELRC file", func() { + mf := model.MediaFile{Path: "tests/fixtures/test.mp3"} + lyrics, err := fromExternalFile(ctx, &mf, ".elrc") + + Expect(err).To(BeNil()) + Expect(lyrics).To(HaveLen(1)) + Expect(lyrics[0].DisplayArtist).To(Equal("ELRC Artist")) + Expect(lyrics[0].DisplayTitle).To(Equal("ELRC Song")) + Expect(lyrics[0].Lang).To(Equal("eng")) + Expect(lyrics[0].Synced).To(BeTrue()) + Expect(lyrics[0].Line).To(HaveLen(2)) + + Expect(lyrics[0].Line[0].Start).To(Equal(gg.P(int64(1000)))) + Expect(lyrics[0].Line[0].End).To(Equal(gg.P(int64(3000)))) + Expect(lyrics[0].Line[0].Value).To(Equal("Lead words")) + Expect(lyrics[0].Line[0].Cue).To(HaveLen(2)) + Expect(*lyrics[0].Line[0].Cue[0].Start).To(Equal(int64(1000))) + Expect(lyrics[0].Line[0].Cue[0].Value).To(Equal("Lead ")) + Expect(lyrics[0].Line[0].Cue[0].End).To(Equal(gg.P(int64(1500)))) + Expect(lyrics[0].Line[0].Cue[0].ByteStart).To(Equal(0)) + Expect(lyrics[0].Line[0].Cue[0].ByteEnd).To(Equal(4)) + Expect(*lyrics[0].Line[0].Cue[1].Start).To(Equal(int64(1500))) + Expect(lyrics[0].Line[0].Cue[1].Value).To(Equal("words")) + Expect(lyrics[0].Line[0].Cue[1].End).To(Equal(gg.P(int64(3000)))) + Expect(lyrics[0].Line[0].Cue[1].ByteStart).To(Equal(5)) + Expect(lyrics[0].Line[0].Cue[1].ByteEnd).To(Equal(9)) + + Expect(lyrics[0].Line[1].Start).To(Equal(gg.P(int64(3000)))) + Expect(lyrics[0].Line[1].Value).To(Equal("Fallback line")) + Expect(lyrics[0].Line[1].Cue).To(BeNil()) + }) + It("should return unsynchronized lyrics from a file", func() { mf := model.MediaFile{Path: "tests/fixtures/test.mp3"} lyrics, err := fromExternalFile(ctx, &mf, ".txt") @@ -109,6 +192,66 @@ var _ = Describe("sources", func() { })) }) + It("should return synchronized lyrics from an SRT file", func() { + mf := model.MediaFile{Path: "tests/fixtures/test.mp3"} + lyrics, err := fromExternalFile(ctx, &mf, ".srt") + + Expect(err).To(BeNil()) + Expect(lyrics).To(Equal(model.LyricList{ + model.Lyrics{ + Lang: "xxx", + Line: []model.Line{ + { + Start: gg.P(int64(18800)), + End: gg.P(int64(22800)), + Value: "We're from subtitles", + }, + { + Start: gg.P(int64(22801)), + End: gg.P(int64(26000)), + Value: "Another subtitle line", + }, + }, + Synced: true, + }, + })) + }) + + It("should return synchronized multilingual lyrics from a TTML file", func() { + mf := model.MediaFile{Path: "tests/fixtures/test.mp3"} + lyrics, err := fromExternalFile(ctx, &mf, ".ttml") + + Expect(err).To(BeNil()) + Expect(lyrics).To(Equal(model.LyricList{ + { + Kind: "main", + Lang: "eng", + Line: []model.Line{ + { + Start: gg.P(int64(18800)), + Value: "We're no strangers to love", + }, + { + Start: gg.P(int64(22800)), + Value: "You know the rules and so do I", + }, + }, + Synced: true, + }, + { + Kind: "main", + Lang: "por", + Line: []model.Line{ + { + Start: gg.P(int64(18800)), + Value: "Nao somos estranhos ao amor", + }, + }, + Synced: true, + }, + })) + }) + It("should handle LRC files with UTF-8 BOM marker (issue #4631)", func() { // The function looks for , so we need to pass // a MediaFile with .mp3 path and look for .lrc suffix @@ -142,5 +285,33 @@ var _ = Describe("sources", func() { Expect(lyrics[0].Line[1].Start).To(Equal(gg.P(int64(22801)))) Expect(lyrics[0].Line[1].Value).To(Equal("You know the rules and so do I")) }) + + It("should handle TTML files with UTF-8 BOM marker", func() { + mf := model.MediaFile{Path: "tests/fixtures/bom-test.mp3"} + lyrics, err := fromExternalFile(ctx, &mf, ".ttml") + + Expect(err).To(BeNil()) + Expect(lyrics).To(HaveLen(1)) + Expect(lyrics[0].Kind).To(Equal("main")) + Expect(lyrics[0].Synced).To(BeTrue()) + Expect(lyrics[0].Line).To(HaveLen(1)) + Expect(lyrics[0].Line[0].Start).To(Equal(gg.P(int64(0)))) + Expect(lyrics[0].Line[0].Value).To(Equal("BOM test line")) + }) + + It("should handle UTF-16 BE encoded TTML files", func() { + mf := model.MediaFile{Path: "tests/fixtures/bom-utf16-test.mp3"} + lyrics, err := fromExternalFile(ctx, &mf, ".ttml") + + Expect(err).To(BeNil()) + Expect(lyrics).To(HaveLen(1)) + Expect(lyrics[0].Kind).To(Equal("main")) + Expect(lyrics[0].Synced).To(BeTrue()) + Expect(lyrics[0].Line).To(HaveLen(2)) + Expect(lyrics[0].Line[0].Start).To(Equal(gg.P(int64(18800)))) + Expect(lyrics[0].Line[0].Value).To(Equal("UTF16 line one")) + Expect(lyrics[0].Line[1].Start).To(Equal(gg.P(int64(22801)))) + Expect(lyrics[0].Line[1].Value).To(Equal("UTF16 line two")) + }) }) }) diff --git a/core/lyrics/srt.go b/core/lyrics/srt.go new file mode 100644 index 000000000..8fd77abb4 --- /dev/null +++ b/core/lyrics/srt.go @@ -0,0 +1,161 @@ +package lyrics + +import ( + "bytes" + "regexp" + "strconv" + "strings" + + "github.com/navidrome/navidrome/model" + "github.com/navidrome/navidrome/utils/str" +) + +var srtTimeRegex = regexp.MustCompile(`^\s*(\d{1,2}):(\d{2}):(\d{2})[,.](\d{1,3})\s*$`) + +func parseSRT(contents []byte) (model.LyricList, error) { + raw := strings.ReplaceAll(string(contents), "\r\n", "\n") + raw = strings.ReplaceAll(raw, "\r", "\n") + + blocks := splitSRTBlocks(raw) + lines := make([]model.Line, 0, len(blocks)) + + for _, block := range blocks { + line, ok, err := parseSRTBlock(block) + if err != nil { + return nil, err + } + if ok { + lines = append(lines, line) + } + } + + if len(lines) == 0 { + return nil, nil + } + + lyrics := model.NormalizeLyrics(model.Lyrics{ + Lang: "xxx", + Line: lines, + Synced: true, + }) + return model.LyricList{lyrics}, nil +} + +func splitSRTBlocks(raw string) []string { + raw = strings.TrimSpace(raw) + if raw == "" { + return nil + } + + parts := strings.Split(raw, "\n\n") + blocks := make([]string, 0, len(parts)) + for _, part := range parts { + part = strings.TrimSpace(part) + if part != "" { + blocks = append(blocks, part) + } + } + return blocks +} + +func parseSRTBlock(block string) (model.Line, bool, error) { + scanner := bytes.Split([]byte(block), []byte("\n")) + if len(scanner) == 0 { + return model.Line{}, false, nil + } + + lines := make([]string, 0, len(scanner)) + for _, line := range scanner { + lines = append(lines, strings.TrimSpace(string(line))) + } + + if len(lines) == 0 { + return model.Line{}, false, nil + } + + startIdx := 0 + if digitsOnly(lines[0]) { + startIdx = 1 + } + if startIdx >= len(lines) { + return model.Line{}, false, nil + } + + timing := strings.Split(lines[startIdx], "-->") + if len(timing) != 2 { + return model.Line{}, false, nil + } + + startMs, err := parseSRTTime(timing[0]) + if err != nil { + return model.Line{}, false, err + } + endMs, err := parseSRTTime(timing[1]) + if err != nil { + return model.Line{}, false, err + } + + textLines := make([]string, 0, len(lines)-startIdx-1) + for _, line := range lines[startIdx+1:] { + if line == "" { + continue + } + textLines = append(textLines, line) + } + + value := str.SanitizeText(strings.Join(textLines, "\n")) + if value == "" { + return model.Line{}, false, nil + } + + return model.Line{ + Start: &startMs, + End: &endMs, + Value: value, + }, true, nil +} + +func parseSRTTime(value string) (int64, error) { + match := srtTimeRegex.FindStringSubmatch(strings.TrimSpace(value)) + if match == nil { + return 0, strconv.ErrSyntax + } + + hours, err := strconv.ParseInt(match[1], 10, 64) + if err != nil { + return 0, err + } + minutes, err := strconv.ParseInt(match[2], 10, 64) + if err != nil { + return 0, err + } + seconds, err := strconv.ParseInt(match[3], 10, 64) + if err != nil { + return 0, err + } + millis, err := strconv.ParseInt(match[4], 10, 64) + if err != nil { + return 0, err + } + + switch len(match[4]) { + case 1: + millis *= 100 + case 2: + millis *= 10 + } + + return (((hours*60)+minutes)*60+seconds)*1000 + millis, nil +} + +func digitsOnly(value string) bool { + if value == "" { + return false + } + for _, ch := range value { + if ch < '0' || ch > '9' { + return false + } + } + return true +} diff --git a/core/lyrics/ttml.go b/core/lyrics/ttml.go new file mode 100644 index 000000000..576d2ca3d --- /dev/null +++ b/core/lyrics/ttml.go @@ -0,0 +1,1264 @@ +package lyrics + +import ( + "bytes" + "encoding/xml" + "errors" + "io" + "math" + "regexp" + "sort" + "strconv" + "strings" + "unicode" + + "github.com/navidrome/navidrome/log" + "github.com/navidrome/navidrome/model" + "github.com/navidrome/navidrome/utils/str" +) + +const ( + defaultTTMLFrameRate = 30.0 + defaultTTMLSubFrameRate = 1.0 + defaultTTMLTickRate = 1.0 + + ttmlLyricKindMain = "main" + ttmlLyricKindTranslation = "translation" + ttmlLyricKindPronunciation = "pronunciation" + ttmlBackgroundAgentPrefix = "__nd_bg__|" +) + +var offsetTimeRegex = regexp.MustCompile(`^([0-9]+(?:\.[0-9]+)?)(h|m|s|ms|f|t)$`) +var xmlEncodingRegex = regexp.MustCompile(`(?i)<\?xml([^>]*?)encoding\s*=\s*["'][^"']+["']([^>]*)\?>`) + +type ttmlTimeKind int + +const ( + ttmlTimeAbsolute ttmlTimeKind = iota + ttmlTimeOffset + ttmlTimeAmbiguous +) + +type ttmlTimingParams struct { + frameRate float64 + subFrameRate float64 + tickRate float64 +} + +type ttmlTimingContext struct { + lang string + role string + agentID string + begin int64 + hasBegin bool + end int64 + hasEnd bool + invalid bool +} + +type ttmlLineRef struct { + order int + line model.Line +} + +type ttmlMetadataEntry struct { + key string + line model.Line + seq int +} + +type ttmlResolvedMetadataLine struct { + order int + seq int + line model.Line +} + +type ttmlDefinedAgent struct { + ID string + Type string + Name string +} + +type ttmlPiece struct { + raw string + cue *model.Cue +} + +type ttmlParser struct { + decoder *xml.Decoder + params ttmlTimingParams + + mainLangOrder []string + mainLinesByLang map[string][]model.Line + + mainLineRefsByKey map[string]ttmlLineRef + mainLineOrder int + + translationLangOrder []string + translationEntriesByLg map[string][]ttmlMetadataEntry + + pronunciationLangOrder []string + pronunciationEntriesByLg map[string][]ttmlMetadataEntry + + definedAgents map[string]ttmlDefinedAgent + + metadataSeq int +} + +func parseTTML(contents []byte) (model.LyricList, error) { + contents = xmlEncodingRegex.ReplaceAll(contents, []byte(``)) + + p := ttmlParser{ + decoder: xml.NewDecoder(bytes.NewReader(contents)), + params: ttmlTimingParams{ + frameRate: defaultTTMLFrameRate, + subFrameRate: defaultTTMLSubFrameRate, + tickRate: defaultTTMLTickRate, + }, + mainLinesByLang: make(map[string][]model.Line), + mainLineRefsByKey: make(map[string]ttmlLineRef), + translationEntriesByLg: make(map[string][]ttmlMetadataEntry), + pronunciationEntriesByLg: make(map[string][]ttmlMetadataEntry), + definedAgents: make(map[string]ttmlDefinedAgent), + } + + root := ttmlTimingContext{lang: "xxx"} + + for { + token, err := p.decoder.Token() + if errors.Is(err, io.EOF) { + break + } + if err != nil { + return nil, err + } + + start, ok := token.(xml.StartElement) + if !ok { + continue + } + + if err := p.parseElement(start, root); err != nil { + return nil, err + } + } + + return p.toLyricList(), nil +} + +func (p *ttmlParser) parseElement(start xml.StartElement, parent ttmlTimingContext) error { + local := strings.ToLower(start.Name.Local) + if local == "tt" { + p.updateTimingParams(start.Attr) + } + + switch local { + case "translation": + return p.parseMetadataTrack(start, parent, ttmlLyricKindTranslation) + case "transliteration": + return p.parseMetadataTrack(start, parent, ttmlLyricKindPronunciation) + case "agent": + return p.parseAgentDefinition(start) + } + + ctx := p.childContext(start.Attr, parent) + if local == "p" { + lineText, tokens, err := p.parseParagraph(ctx) + if err != nil { + return err + } + if ctx.invalid || lineText == "" { + return nil + } + + parsedLine := model.Line{Value: lineText} + if ctx.hasBegin { + startMs := ctx.begin + parsedLine.Start = &startMs + } + if ctx.hasEnd { + endMs := ctx.end + parsedLine.End = &endMs + } + if len(tokens) > 0 { + parsedLine.Cue = tokens + } + parsedLine = hydrateLineTimingFromTokens(parsedLine) + + lineKey, _ := attrValue(start.Attr, "key") + p.addMainLine(ctx.lang, lineKey, parsedLine) + return nil + } + + for { + token, err := p.decoder.Token() + if err != nil { + return err + } + + switch t := token.(type) { + case xml.StartElement: + nextParent := ctx + if ctx.invalid { + // Best effort: ignore invalid timing in container elements, and + // continue traversing descendants with parent context. + nextParent = parent + } + if err := p.parseElement(t, nextParent); err != nil { + return err + } + case xml.EndElement: + if strings.EqualFold(t.Name.Local, start.Name.Local) { + return nil + } + } + } +} + +func (p *ttmlParser) parseMetadataTrack(start xml.StartElement, parent ttmlTimingContext, kind string) error { + ctx := p.childContext(start.Attr, parent) + lang := normalizeTTMLLang(ctx.lang) + + for { + token, err := p.decoder.Token() + if err != nil { + return err + } + + switch t := token.(type) { + case xml.StartElement: + if strings.EqualFold(t.Name.Local, "text") { + entry, ok, err := p.parseMetadataText(t, ctx) + if err != nil { + return err + } + if ok { + p.addMetadataEntry(kind, lang, entry) + } + continue + } + + nextParent := ctx + if ctx.invalid { + nextParent = parent + } + if err := p.parseElement(t, nextParent); err != nil { + return err + } + case xml.EndElement: + if strings.EqualFold(t.Name.Local, start.Name.Local) { + return nil + } + } + } +} + +func (p *ttmlParser) parseAgentDefinition(start xml.StartElement) error { + id, ok := attrValue(start.Attr, "id") + id = strings.TrimSpace(id) + if !ok || id == "" { + return p.skipElement(start) + } + + agent := ttmlDefinedAgent{ + ID: id, + Type: strings.ToLower(strings.TrimSpace(attrOrEmpty(start.Attr, "type"))), + } + + for { + token, err := p.decoder.Token() + if err != nil { + return err + } + + switch t := token.(type) { + case xml.StartElement: + if strings.EqualFold(t.Name.Local, "name") { + name, err := p.collectElementText(t) + if err != nil { + return err + } + name = sanitizeTTMLText(name) + if name != "" && agent.Name == "" { + agent.Name = name + } + continue + } + if err := p.skipElement(t); err != nil { + return err + } + case xml.EndElement: + if strings.EqualFold(t.Name.Local, start.Name.Local) { + p.definedAgents[agent.ID] = agent + return nil + } + } + } +} + +func (p *ttmlParser) parseMetadataText(start xml.StartElement, parent ttmlTimingContext) (ttmlMetadataEntry, bool, error) { + forKey, hasFor := attrValue(start.Attr, "for") + forKey = strings.TrimSpace(forKey) + + pieces, err := p.parseInlineElement(start, parent) + if err != nil { + return ttmlMetadataEntry{}, false, err + } + if !hasFor || forKey == "" { + return ttmlMetadataEntry{}, false, nil + } + + ctx := p.childContext(start.Attr, parent) + if ctx.invalid { + return ttmlMetadataEntry{}, false, nil + } + + value, tokens := buildTTMLLineFromPieces(pieces) + line := model.Line{Value: value} + if ctx.hasBegin { + startMs := ctx.begin + line.Start = &startMs + } + if ctx.hasEnd { + endMs := ctx.end + line.End = &endMs + } + if len(tokens) > 0 { + line.Cue = tokens + } + line = hydrateLineTimingFromTokens(line) + + if line.Value == "" && len(line.Cue) == 0 { + return ttmlMetadataEntry{}, false, nil + } + + return ttmlMetadataEntry{key: forKey, line: line}, true, nil +} + +func (p *ttmlParser) parseParagraph(parent ttmlTimingContext) (string, []model.Cue, error) { + var pieces []ttmlPiece + + for { + token, err := p.decoder.Token() + if err != nil { + return "", nil, err + } + + switch t := token.(type) { + case xml.StartElement: + inlinePieces, err := p.parseInlineElement(t, parent) + if err != nil { + return "", nil, err + } + pieces = append(pieces, inlinePieces...) + case xml.EndElement: + if strings.EqualFold(t.Name.Local, "p") { + value, tokens := buildTTMLLineFromPieces(pieces) + return value, tokens, nil + } + case xml.CharData: + pieces = append(pieces, ttmlPiece{raw: string(t)}) + } + } +} + +func (p *ttmlParser) parseInlineElement(start xml.StartElement, parent ttmlTimingContext) ([]ttmlPiece, error) { + local := strings.ToLower(start.Name.Local) + if local == "br" { + return []ttmlPiece{{raw: "\n"}}, nil + } + + ctx := p.childContext(start.Attr, parent) + _, hasBegin := attrValue(start.Attr, "begin") + _, hasEnd := attrValue(start.Attr, "end") + _, hasDur := attrValue(start.Attr, "dur") + hasOwnTiming := hasBegin || hasEnd || hasDur + + var pieces []ttmlPiece + + for { + token, err := p.decoder.Token() + if err != nil { + return nil, err + } + + switch t := token.(type) { + case xml.StartElement: + inlinePieces, err := p.parseInlineElement(t, ctx) + if err != nil { + return nil, err + } + pieces = append(pieces, inlinePieces...) + case xml.EndElement: + if !strings.EqualFold(t.Name.Local, start.Name.Local) { + continue + } + + if local == "span" && hasOwnTiming && !ctx.invalid && !ttmlPiecesContainCue(pieces) { + rawValue := concatTTMLPieceRaw(pieces) + tokenText := sanitizeTTMLText(rawValue) + if tokenText != "" { + parsedToken := model.Cue{ + AgentID: p.resolveCueAgentID(ctx), + } + if ctx.hasBegin { + startMs := ctx.begin + parsedToken.Start = &startMs + } + if ctx.hasEnd { + endMs := ctx.end + parsedToken.End = &endMs + } + + return []ttmlPiece{{ + raw: rawValue, + cue: &parsedToken, + }}, nil + } + } + + return pieces, nil + case xml.CharData: + pieces = append(pieces, ttmlPiece{raw: string(t)}) + } + } +} + +func buildTTMLLineFromPieces(pieces []ttmlPiece) (string, []model.Cue) { + finalized := finalizeTTMLLines(splitTTMLPiecesByNewline(pieces)) + for len(finalized) > 0 && finalized[0].text == "" && len(finalized[0].cues) == 0 { + finalized = finalized[1:] + } + for len(finalized) > 0 { + last := finalized[len(finalized)-1] + if last.text != "" || len(last.cues) > 0 { + break + } + finalized = finalized[:len(finalized)-1] + } + + var value strings.Builder + cues := make([]model.Cue, 0, 8) + byteOffset := 0 + for i, line := range finalized { + if i > 0 { + value.WriteByte('\n') + byteOffset++ + } + value.WriteString(line.text) + for _, cue := range line.cues { + cue.ByteStart += byteOffset + cue.ByteEnd += byteOffset + cues = append(cues, cue) + } + byteOffset += len(line.text) + } + + return value.String(), cues +} + +type ttmlFinalLine struct { + text string + cues []model.Cue +} + +func finalizeTTMLLines(lines [][]ttmlPiece) []ttmlFinalLine { + finalized := make([]ttmlFinalLine, 0, len(lines)) + for _, line := range lines { + text, cues := finalizeTTMLLogicalLine(line) + finalized = append(finalized, ttmlFinalLine{text: text, cues: cues}) + } + return finalized +} + +func splitTTMLPiecesByNewline(pieces []ttmlPiece) [][]ttmlPiece { + lines := [][]ttmlPiece{{}} + for _, piece := range pieces { + raw := normalizeTTMLPieceRaw(piece.raw) + if raw == "" { + continue + } + + start := 0 + for i := 0; i < len(raw); i++ { + if raw[i] != '\n' { + continue + } + if start < i { + lines[len(lines)-1] = append(lines[len(lines)-1], ttmlPiece{ + raw: raw[start:i], + cue: cloneTTMLCue(piece.cue), + }) + } + lines = append(lines, []ttmlPiece{}) + start = i + 1 + } + if start < len(raw) { + lines[len(lines)-1] = append(lines[len(lines)-1], ttmlPiece{ + raw: raw[start:], + cue: cloneTTMLCue(piece.cue), + }) + } + } + return lines +} + +func finalizeTTMLLogicalLine(line []ttmlPiece) (string, []model.Cue) { + rawLine := concatTTMLPieceRaw(line) + if rawLine == "" { + return "", nil + } + + leftTrimBytes := len(rawLine) - len(strings.TrimLeftFunc(rawLine, unicode.IsSpace)) + rightTrimBytes := len(rawLine) - len(strings.TrimRightFunc(rawLine, unicode.IsSpace)) + trimmedEnd := len(rawLine) - rightTrimBytes + if trimmedEnd < leftTrimBytes { + trimmedEnd = leftTrimBytes + } + + trimmed := strings.TrimSpace(rawLine) + cues := make([]model.Cue, 0, len(line)) + cursor := 0 + for _, piece := range line { + pieceEnd := cursor + len(piece.raw) + if piece.cue != nil { + byteStart := max(cursor, leftTrimBytes) + byteEnd := min(pieceEnd, trimmedEnd) + if byteStart < byteEnd { + cue := *piece.cue + cue.Value = rawLine[byteStart:byteEnd] + cue.ByteStart = byteStart - leftTrimBytes + cue.ByteEnd = byteEnd - leftTrimBytes - 1 + cues = append(cues, cue) + } + } + cursor = pieceEnd + } + + return trimmed, cues +} + +func normalizeTTMLPieceRaw(raw string) string { + raw = str.SanitizeText(raw) + raw = strings.ReplaceAll(raw, "\r\n", "\n") + raw = strings.ReplaceAll(raw, "\r", "\n") + return raw +} + +func concatTTMLPieceRaw(pieces []ttmlPiece) string { + var raw strings.Builder + for _, piece := range pieces { + raw.WriteString(normalizeTTMLPieceRaw(piece.raw)) + } + return raw.String() +} + +func ttmlPiecesContainCue(pieces []ttmlPiece) bool { + for _, piece := range pieces { + if piece.cue != nil { + return true + } + } + return false +} + +func cloneTTMLCue(cue *model.Cue) *model.Cue { + if cue == nil { + return nil + } + + cloned := *cue + return &cloned +} + +func (p *ttmlParser) toLyricList() model.LyricList { + res := make(model.LyricList, 0, len(p.mainLangOrder)+len(p.translationLangOrder)+len(p.pronunciationLangOrder)) + for _, lang := range p.mainLangOrder { + lines := p.mainLinesByLang[lang] + if len(lines) == 0 { + continue + } + res = append(res, p.finalizeLyrics(model.Lyrics{ + Kind: ttmlLyricKindMain, + Lang: lang, + Line: lines, + Synced: linesAreSynced(lines), + })) + } + + res = append(res, p.buildMetadataLyrics(ttmlLyricKindTranslation, p.translationLangOrder, p.translationEntriesByLg)...) + res = append(res, p.buildMetadataLyrics(ttmlLyricKindPronunciation, p.pronunciationLangOrder, p.pronunciationEntriesByLg)...) + return res +} + +func (p *ttmlParser) buildMetadataLyrics(kind string, langOrder []string, entriesByLang map[string][]ttmlMetadataEntry) model.LyricList { + res := make(model.LyricList, 0, len(langOrder)) + + for _, lang := range langOrder { + entries := entriesByLang[lang] + if len(entries) == 0 { + continue + } + + seenKeys := make(map[string]struct{}, len(entries)) + resolved := make([]ttmlResolvedMetadataLine, 0, len(entries)) + for _, entry := range entries { + if _, exists := seenKeys[entry.key]; exists { + continue + } + seenKeys[entry.key] = struct{}{} + + ref, ok := p.mainLineRefsByKey[entry.key] + if !ok { + log.Warn("Skipping TTML metadata line without matching key", "kind", kind, "lang", lang, "key", entry.key) + continue + } + + line := entry.line + if line.Start == nil && ref.line.Start != nil { + startMs := *ref.line.Start + line.Start = &startMs + } + if line.End == nil && ref.line.End != nil { + endMs := *ref.line.End + line.End = &endMs + } + line = hydrateLineTimingFromTokens(line) + + if line.Value == "" && len(line.Cue) == 0 { + continue + } + + resolved = append(resolved, ttmlResolvedMetadataLine{ + order: ref.order, + seq: entry.seq, + line: line, + }) + } + + if len(resolved) == 0 { + continue + } + + sort.SliceStable(resolved, func(i, j int) bool { + if resolved[i].order != resolved[j].order { + return resolved[i].order < resolved[j].order + } + return resolved[i].seq < resolved[j].seq + }) + + lines := make([]model.Line, len(resolved)) + for i := range resolved { + lines[i] = resolved[i].line + } + + res = append(res, p.finalizeLyrics(model.Lyrics{ + Kind: kind, + Lang: lang, + Line: lines, + Synced: linesAreSynced(lines), + })) + } + + return res +} + +func (p *ttmlParser) finalizeLyrics(lyrics model.Lyrics) model.Lyrics { + lyrics.Line, lyrics.Agents = p.resolveAgents(lyrics.Line) + return model.NormalizeLyrics(lyrics) +} + +func (p *ttmlParser) resolveAgents(lines []model.Line) ([]model.Line, []model.Agent) { + if len(lines) == 0 { + return lines, nil + } + + usedOrder := make([]string, 0, 4) + usedSet := make(map[string]struct{}, 4) + sawEmptyCue := false + + for i := range lines { + for j := range lines[i].Cue { + agentID := strings.TrimSpace(lines[i].Cue[j].AgentID) + if agentID == "" { + sawEmptyCue = true + continue + } + if _, exists := usedSet[agentID]; !exists { + usedSet[agentID] = struct{}{} + usedOrder = append(usedOrder, agentID) + } + } + } + + if len(usedOrder) == 0 { + return lines, nil + } + + mainID := "" + for _, agentID := range usedOrder { + role := p.baseRoleForAgent(agentID) + if role != "bg" && role != "group" { + mainID = agentID + break + } + } + if mainID == "" && sawEmptyCue { + mainID = "main" + } + if mainID == "" { + for _, agentID := range usedOrder { + if p.baseRoleForAgent(agentID) != "bg" { + mainID = agentID + break + } + } + } + if mainID == "" { + mainID = usedOrder[0] + } + + if _, exists := usedSet[mainID]; !exists { + usedSet[mainID] = struct{}{} + usedOrder = append([]string{mainID}, usedOrder...) + } + + for i := range lines { + for j := range lines[i].Cue { + if strings.TrimSpace(lines[i].Cue[j].AgentID) == "" { + lines[i].Cue[j].AgentID = mainID + } + } + } + + agents := make([]model.Agent, 0, len(usedOrder)) + for _, agentID := range usedOrder { + role := p.baseRoleForAgent(agentID) + if agentID == mainID { + role = "main" + } + agent := model.Agent{ + ID: agentID, + Role: role, + Name: p.agentNameForID(agentID), + } + agents = append(agents, agent) + } + + return lines, agents +} + +func (p *ttmlParser) resolveCueAgentID(ctx ttmlTimingContext) string { + agentID := strings.TrimSpace(ctx.agentID) + if contextHasRole(ctx.role, "x-bg") { + if agentID == "" { + agentID = "main" + } + return backgroundAgentID(agentID) + } + return agentID +} + +func (p *ttmlParser) baseRoleForAgent(agentID string) string { + if isBackgroundAgentID(agentID) { + return "bg" + } + + if agent, ok := p.definedAgents[agentID]; ok { + switch agent.Type { + case "group": + return "group" + default: + return "voice" + } + } + + return "voice" +} + +func (p *ttmlParser) agentNameForID(agentID string) string { + if isBackgroundAgentID(agentID) { + baseID := strings.TrimPrefix(agentID, ttmlBackgroundAgentPrefix) + if baseID == "main" { + return "" + } + if agent, ok := p.definedAgents[baseID]; ok { + return agent.Name + } + return "" + } + + if agent, ok := p.definedAgents[agentID]; ok { + return agent.Name + } + + return "" +} + +func backgroundAgentID(agentID string) string { + return ttmlBackgroundAgentPrefix + agentID +} + +func isBackgroundAgentID(agentID string) bool { + return strings.HasPrefix(agentID, ttmlBackgroundAgentPrefix) +} + +func contextHasRole(roles string, role string) bool { + for _, candidate := range strings.Fields(strings.ToLower(roles)) { + if candidate == strings.ToLower(role) { + return true + } + } + return false +} + +func (p *ttmlParser) addMainLine(lang string, lineKey string, line model.Line) { + lang = normalizeTTMLLang(lang) + if _, ok := p.mainLinesByLang[lang]; !ok { + p.mainLangOrder = append(p.mainLangOrder, lang) + } + p.mainLinesByLang[lang] = append(p.mainLinesByLang[lang], line) + + lineKey = strings.TrimSpace(lineKey) + if lineKey != "" { + if _, exists := p.mainLineRefsByKey[lineKey]; !exists { + p.mainLineRefsByKey[lineKey] = ttmlLineRef{ + order: p.mainLineOrder, + line: line, + } + } + } + p.mainLineOrder++ +} + +func (p *ttmlParser) addMetadataEntry(kind string, lang string, entry ttmlMetadataEntry) { + lang = normalizeTTMLLang(lang) + entry.seq = p.metadataSeq + p.metadataSeq++ + + switch kind { + case ttmlLyricKindTranslation: + if _, ok := p.translationEntriesByLg[lang]; !ok { + p.translationLangOrder = append(p.translationLangOrder, lang) + } + p.translationEntriesByLg[lang] = append(p.translationEntriesByLg[lang], entry) + case ttmlLyricKindPronunciation: + if _, ok := p.pronunciationEntriesByLg[lang]; !ok { + p.pronunciationLangOrder = append(p.pronunciationLangOrder, lang) + } + p.pronunciationEntriesByLg[lang] = append(p.pronunciationEntriesByLg[lang], entry) + } +} + +func (p *ttmlParser) childContext(attrs []xml.Attr, parent ttmlTimingContext) ttmlTimingContext { + ctx := parent + + if lang, ok := attrValue(attrs, "lang"); ok { + ctx.lang = normalizeTTMLLang(lang) + } + if agentID, ok := attrValue(attrs, "agent"); ok { + ctx.agentID = strings.TrimSpace(agentID) + } + if role, ok := attrValue(attrs, "role"); ok { + role = strings.TrimSpace(role) + if role != "" { + if ctx.role == "" { + ctx.role = role + } else if !strings.Contains(ctx.role, role) { + ctx.role = ctx.role + " " + role + } + } + } + + beginExpr, hasBegin := attrValue(attrs, "begin") + endExpr, hasEnd := attrValue(attrs, "end") + durExpr, hasDur := attrValue(attrs, "dur") + + if hasBegin { + begin, kind, ok := parseTTMLTimeExpression(beginExpr, p.params) + if !ok { + ctx.invalid = true + return ctx + } + + base := int64(0) + if parent.hasBegin { + base = parent.begin + } + ctx.begin = resolveTTMLTime(begin, kind, base, parent) + ctx.hasBegin = true + } else { + ctx.begin = parent.begin + ctx.hasBegin = parent.hasBegin + } + + var calculatedEnd int64 + calculatedHasEnd := false + + if hasEnd { + end, kind, ok := parseTTMLTimeExpression(endExpr, p.params) + if !ok { + ctx.invalid = true + return ctx + } + + base := ctx.begin + if !ctx.hasBegin { + base = parent.begin + } + calculatedEnd = resolveTTMLTime(end, kind, base, parent) + calculatedHasEnd = true + } + + if hasDur { + dur, ok := parseTTMLDurationExpression(durExpr, p.params) + if !ok { + ctx.invalid = true + return ctx + } + if ctx.hasBegin { + durEnd := ctx.begin + dur + if !calculatedHasEnd || durEnd < calculatedEnd { + calculatedEnd = durEnd + calculatedHasEnd = true + } + } + } + + if !calculatedHasEnd && parent.hasEnd { + calculatedEnd = parent.end + calculatedHasEnd = true + } + + ctx.end = calculatedEnd + ctx.hasEnd = calculatedHasEnd + return ctx +} + +func (p *ttmlParser) updateTimingParams(attrs []xml.Attr) { + frameRate := p.params.frameRate + if value, ok := attrValue(attrs, "frameRate"); ok { + if parsed, err := strconv.ParseFloat(value, 64); err == nil && parsed > 0 { + frameRate = parsed + } + } + + if value, ok := attrValue(attrs, "frameRateMultiplier"); ok { + parts := strings.Fields(value) + if len(parts) == 2 { + numerator, errA := strconv.ParseFloat(parts[0], 64) + denominator, errB := strconv.ParseFloat(parts[1], 64) + if errA == nil && errB == nil && denominator > 0 { + frameRate = frameRate * (numerator / denominator) + } + } + } + + subFrameRate := p.params.subFrameRate + if value, ok := attrValue(attrs, "subFrameRate"); ok { + if parsed, err := strconv.ParseFloat(value, 64); err == nil && parsed > 0 { + subFrameRate = parsed + } + } + + tickRate := p.params.tickRate + if value, ok := attrValue(attrs, "tickRate"); ok { + if parsed, err := strconv.ParseFloat(value, 64); err == nil && parsed > 0 { + tickRate = parsed + } + } + + p.params.frameRate = positiveOrDefault(frameRate, defaultTTMLFrameRate) + p.params.subFrameRate = positiveOrDefault(subFrameRate, defaultTTMLSubFrameRate) + p.params.tickRate = positiveOrDefault(tickRate, defaultTTMLTickRate) +} + +func parseTTMLDurationExpression(expr string, params ttmlTimingParams) (int64, bool) { + value, _, ok := parseTTMLTimeExpression(expr, params) + return value, ok +} + +func resolveTTMLTime(value int64, kind ttmlTimeKind, base int64, parent ttmlTimingContext) int64 { + switch kind { + case ttmlTimeAbsolute: + return value + case ttmlTimeOffset: + return base + value + case ttmlTimeAmbiguous: + absolute := value + offset := base + value + + // No parent timing context → no reference frame for offsets. + // Prefer absolute when offset differs (i.e., base > 0). + if !parent.hasBegin && !parent.hasEnd && base != 0 { + return absolute + } + + if parent.hasBegin && parent.hasEnd { + absoluteInParent := absolute >= parent.begin && absolute <= parent.end + offsetInParent := offset >= parent.begin && offset <= parent.end + if absoluteInParent && !offsetInParent { + return absolute + } + if offsetInParent && !absoluteInParent { + return offset + } + } + + if parent.hasBegin { + if absolute < parent.begin && offset >= parent.begin { + return offset + } + if absolute >= parent.begin && offset > absolute { + return absolute + } + } + return offset + default: + return base + value + } +} + +func parseTTMLTimeExpression(expr string, params ttmlTimingParams) (int64, ttmlTimeKind, bool) { + expr = strings.TrimSpace(expr) + if expr == "" { + return 0, ttmlTimeOffset, false + } + + lower := strings.ToLower(expr) + if strings.Contains(lower, "wallclock(") || + strings.Contains(lower, ".begin") || + strings.Contains(lower, ".end") { + log.Warn("Unsupported TTML time expression", "value", expr) + return 0, ttmlTimeOffset, false + } + + // Best-effort support for non-standard TTML seen in the wild where a + // bare decimal value is used (implicitly seconds), e.g. "0.170". + if value, err := strconv.ParseFloat(lower, 64); err == nil && value >= 0 { + return int64(math.Round(value * 1000)), ttmlTimeAmbiguous, true + } + + if matches := offsetTimeRegex.FindStringSubmatch(lower); len(matches) == 3 { + value, err := strconv.ParseFloat(matches[1], 64) + if err != nil { + return 0, ttmlTimeOffset, false + } + + unit := matches[2] + seconds := 0.0 + switch unit { + case "h": + seconds = value * 60 * 60 + case "m": + seconds = value * 60 + case "s": + seconds = value + case "ms": + seconds = value / 1000 + case "f": + seconds = value / params.frameRate + case "t": + seconds = value / params.tickRate + default: + return 0, ttmlTimeOffset, false + } + + return int64(math.Round(seconds * 1000)), ttmlTimeOffset, true + } + + colonCount := strings.Count(expr, ":") + switch colonCount { + case 1, 2: + clockMs, ok := parseTTMLClockTime(expr) + if !ok { + return 0, ttmlTimeAbsolute, false + } + return clockMs, ttmlTimeAbsolute, true + case 3: + framesMs, ok := parseTTMLFrameTime(expr, params) + if !ok { + return 0, ttmlTimeAbsolute, false + } + return framesMs, ttmlTimeAbsolute, true + default: + log.Warn("Unsupported TTML time expression", "value", expr) + return 0, ttmlTimeOffset, false + } +} + +func parseTTMLClockTime(value string) (int64, bool) { + parts := strings.Split(value, ":") + if len(parts) != 2 && len(parts) != 3 { + return 0, false + } + + hours := int64(0) + minutesIdx := 0 + if len(parts) == 3 { + h, err := strconv.ParseInt(parts[0], 10, 64) + if err != nil { + return 0, false + } + hours = h + minutesIdx = 1 + } + + minutes, err := strconv.ParseInt(parts[minutesIdx], 10, 64) + if err != nil { + return 0, false + } + + seconds, err := strconv.ParseFloat(parts[minutesIdx+1], 64) + if err != nil { + return 0, false + } + + totalSeconds := float64(hours*60*60+minutes*60) + seconds + return int64(math.Round(totalSeconds * 1000)), true +} + +func parseTTMLFrameTime(value string, params ttmlTimingParams) (int64, bool) { + parts := strings.Split(value, ":") + if len(parts) != 4 { + return 0, false + } + + hours, err := strconv.ParseInt(parts[0], 10, 64) + if err != nil { + return 0, false + } + + minutes, err := strconv.ParseInt(parts[1], 10, 64) + if err != nil { + return 0, false + } + + seconds, err := strconv.ParseInt(parts[2], 10, 64) + if err != nil { + return 0, false + } + + frameParts := strings.SplitN(parts[3], ".", 2) + frames, err := strconv.ParseFloat(frameParts[0], 64) + if err != nil { + return 0, false + } + + subFrames := 0.0 + if len(frameParts) == 2 { + subFrames, err = strconv.ParseFloat(frameParts[1], 64) + if err != nil { + return 0, false + } + } + + totalSeconds := float64(hours*60*60 + minutes*60 + seconds) + totalSeconds += frames / params.frameRate + totalSeconds += subFrames / (params.subFrameRate * params.frameRate) + + return int64(math.Round(totalSeconds * 1000)), true +} + +func attrValue(attrs []xml.Attr, key string) (string, bool) { + for _, attr := range attrs { + if strings.EqualFold(attr.Name.Local, key) { + return strings.TrimSpace(attr.Value), true + } + } + return "", false +} + +func attrOrEmpty(attrs []xml.Attr, key string) string { + value, _ := attrValue(attrs, key) + return value +} + +func (p *ttmlParser) collectElementText(start xml.StartElement) (string, error) { + var text strings.Builder + + for { + token, err := p.decoder.Token() + if err != nil { + return "", err + } + + switch t := token.(type) { + case xml.StartElement: + value, err := p.collectElementText(t) + if err != nil { + return "", err + } + text.WriteString(value) + case xml.EndElement: + if strings.EqualFold(t.Name.Local, start.Name.Local) { + return text.String(), nil + } + case xml.CharData: + text.WriteString(string(t)) + } + } +} + +func (p *ttmlParser) skipElement(_ xml.StartElement) error { + depth := 1 + for depth > 0 { + token, err := p.decoder.Token() + if err != nil { + return err + } + + switch token.(type) { + case xml.StartElement: + depth++ + case xml.EndElement: + depth-- + } + } + return nil +} + +func normalizeTTMLLang(lang string) string { + lang = strings.ToLower(strings.TrimSpace(lang)) + if lang == "" { + return "xxx" + } + return lang +} + +func sanitizeTTMLText(raw string) string { + raw = str.SanitizeText(raw) + raw = strings.ReplaceAll(raw, "\r\n", "\n") + raw = strings.ReplaceAll(raw, "\r", "\n") + + lines := strings.Split(raw, "\n") + for i := range lines { + lines[i] = strings.TrimSpace(lines[i]) + } + return strings.TrimSpace(strings.Join(lines, "\n")) +} + +func linesAreSynced(lines []model.Line) bool { + for i := range lines { + if lines[i].Start != nil { + return true + } + for j := range lines[i].Cue { + if lines[i].Cue[j].Start != nil { + return true + } + } + } + return false +} + +func hydrateLineTimingFromTokens(line model.Line) model.Line { + return model.NormalizeLineTiming(line) +} + +func positiveOrDefault(v float64, fallback float64) float64 { + if v <= 0 { + return fallback + } + return v +} diff --git a/core/lyrics/ttml_test.go b/core/lyrics/ttml_test.go new file mode 100644 index 000000000..14676975d --- /dev/null +++ b/core/lyrics/ttml_test.go @@ -0,0 +1,407 @@ +package lyrics + +import ( + "github.com/navidrome/navidrome/model" + "github.com/navidrome/navidrome/utils/gg" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var _ = Describe("parseTTML", func() { + Describe("Multi-language and timing", func() { + It("should parse multiple language divs with inherited offsets and frame/tick timing", func() { + content := []byte(` + + +
+

Line one

+

Line two
with break

+
+
+

Linha

+
+ +
`) + + list, err := parseTTML(content) + Expect(err).ToNot(HaveOccurred()) + Expect(list).To(HaveLen(2)) + + By("parsing the English track") + eng := list[0] + Expect(eng.Lang).To(Equal("eng")) + Expect(eng.Synced).To(BeTrue()) + Expect(eng.Line[0].Start).To(Equal(gg.P(int64(3000)))) + Expect(eng.Line[0].Value).To(Equal("Line one")) + Expect(eng.Line[1].Start).To(Equal(gg.P(int64(4517)))) + Expect(eng.Line[1].Value).To(Equal("Line two\nwith break")) + + By("parsing the Portuguese track") + por := list[1] + Expect(por.Lang).To(Equal("por")) + Expect(por.Line[0].Start).To(Equal(gg.P(int64(4500)))) + Expect(por.Line[0].Value).To(Equal("Linha")) + }) + }) + + Describe("Unsupported cue handling", func() { + It("should skip wallclock cues and keep valid ones", func() { + content := []byte(` + + +
+

Skip me

+

Keep me

+
+ +
`) + + list, err := parseTTML(content) + Expect(err).ToNot(HaveOccurred()) + Expect(list).To(HaveLen(1)) + Expect(list[0].Line).To(HaveLen(1)) + Expect(list[0].Line[0].Start).To(Equal(gg.P(int64(1000)))) + Expect(list[0].Line[0].Value).To(Equal("Keep me")) + }) + }) + + Describe("Begin/End/Dur with inheritance", func() { + It("should correctly accumulate nested timing from body, div, and p elements", func() { + content := []byte(` + + +
+

First line

+

Second line

+
+ +
`) + + list, err := parseTTML(content) + Expect(err).ToNot(HaveOccurred()) + Expect(list).To(HaveLen(1)) + Expect(list[0].Lang).To(Equal("eng")) + Expect(list[0].Line).To(HaveLen(2)) + Expect(list[0].Line[0].Start).To(Equal(gg.P(int64(16000)))) + Expect(list[0].Line[0].Value).To(Equal("First line")) + Expect(list[0].Line[1].Start).To(Equal(gg.P(int64(18000)))) + Expect(list[0].Line[1].Value).To(Equal("Second line")) + }) + }) + + Describe("Non-standard bare second offsets", func() { + It("should parse bare decimal numbers as seconds", func() { + content := []byte(` + + +
+

First line

+

Second line

+
+ +
`) + + list, err := parseTTML(content) + Expect(err).ToNot(HaveOccurred()) + Expect(list).To(HaveLen(1)) + Expect(list[0].Line).To(HaveLen(2)) + Expect(list[0].Line[0].Start).To(Equal(gg.P(int64(10170)))) + Expect(list[0].Line[0].Value).To(Equal("First line")) + Expect(list[0].Line[1].Start).To(Equal(gg.P(int64(13710)))) + Expect(list[0].Line[1].Value).To(Equal("Second line")) + }) + }) + + Describe("Word timing tokens", func() { + It("should extract timed tokens from spans including background role", func() { + content := []byte(` + + +
+

+ Hello + echo +

+
+ +
`) + + list, err := parseTTML(content) + Expect(err).ToNot(HaveOccurred()) + Expect(list).To(HaveLen(1)) + Expect(list[0].Agents).To(Equal([]model.Agent{ + {ID: "main", Role: "main"}, + {ID: "__nd_bg__|main", Role: "bg"}, + })) + Expect(list[0].Line).To(HaveLen(1)) + + line := list[0].Line[0] + Expect(line.Start).To(Equal(gg.P(int64(1000)))) + Expect(line.Value).To(Equal("Hello\necho")) + Expect(line.End).To(Equal(gg.P(int64(3000)))) + Expect(line.Cue).To(HaveLen(3)) + + Expect(line.Cue[0]).To(Equal(model.Cue{Start: gg.P(int64(1000)), End: gg.P(int64(1400)), Value: "He", ByteStart: 0, ByteEnd: 1, AgentID: "main"})) + Expect(line.Cue[1]).To(Equal(model.Cue{Start: gg.P(int64(1400)), End: gg.P(int64(1800)), Value: "llo", ByteStart: 2, ByteEnd: 4, AgentID: "main"})) + Expect(line.Cue[2]).To(Equal(model.Cue{Start: gg.P(int64(2000)), End: gg.P(int64(2500)), Value: "echo", ByteStart: 6, ByteEnd: 9, AgentID: "__nd_bg__|main"})) + }) + + It("should parse named TTML agents into main, voice, and group roles", func() { + content := []byte(` + + + + Chris Martin + Jin + All + + + +
+

You

+

and

+

All

+
+ +
`) + + list, err := parseTTML(content) + Expect(err).ToNot(HaveOccurred()) + Expect(list).To(HaveLen(1)) + Expect(list[0].Agents).To(Equal([]model.Agent{ + {ID: "v1", Role: "main", Name: "Chris Martin"}, + {ID: "v2", Role: "voice", Name: "Jin"}, + {ID: "v1000", Role: "group", Name: "All"}, + })) + Expect(list[0].Line[0].Cue[0].AgentID).To(Equal("v1")) + Expect(list[0].Line[1].Cue[0].AgentID).To(Equal("v2")) + Expect(list[0].Line[2].Cue[0].AgentID).To(Equal("v1000")) + }) + + It("should avoid collisions between derived background agents and explicit TTML agent ids", func() { + content := []byte(` + + + + Lead + Existing Background Id + + + +
+

+ Lead + Echo +

+

+ Named +

+
+ +
`) + + list, err := parseTTML(content) + Expect(err).ToNot(HaveOccurred()) + Expect(list).To(HaveLen(1)) + Expect(list[0].Agents).To(Equal([]model.Agent{ + {ID: "lead", Role: "main", Name: "Lead"}, + {ID: "__nd_bg__|lead", Role: "bg", Name: "Lead"}, + {ID: "lead__bg", Role: "voice", Name: "Existing Background Id"}, + })) + Expect(list[0].Line).To(HaveLen(2)) + Expect(list[0].Line[0].Cue).To(HaveLen(2)) + Expect(list[0].Line[0].Cue[0].AgentID).To(Equal("lead")) + Expect(list[0].Line[0].Cue[1].AgentID).To(Equal("__nd_bg__|lead")) + Expect(list[0].Line[1].Cue).To(HaveLen(1)) + Expect(list[0].Line[1].Cue[0].AgentID).To(Equal("lead__bg")) + }) + + It("should fill missing cue agent ids with the resolved main agent", func() { + content := []byte(` + + + + Guest Vocal + + + +
+

+ Lead + Guest +

+
+ +
`) + + list, err := parseTTML(content) + Expect(err).ToNot(HaveOccurred()) + Expect(list).To(HaveLen(1)) + Expect(list[0].Agents).To(Equal([]model.Agent{ + {ID: "guest", Role: "main", Name: "Guest Vocal"}, + })) + Expect(list[0].Line).To(HaveLen(1)) + Expect(list[0].Line[0].Cue).To(HaveLen(2)) + Expect(list[0].Line[0].Cue[0].AgentID).To(Equal("guest")) + Expect(list[0].Line[0].Cue[1].AgentID).To(Equal("guest")) + }) + }) + + Describe("Ambiguous decimal timing", func() { + It("should prefer absolute timing when values fall inside parent window", func() { + content := []byte(` + + +
+

+ go + go +

+
+ +
`) + + list, err := parseTTML(content) + Expect(err).ToNot(HaveOccurred()) + Expect(list).To(HaveLen(1)) + Expect(list[0].Line).To(HaveLen(1)) + + line := list[0].Line[0] + Expect(line.Start).To(Equal(gg.P(int64(43444)))) + Expect(line.Value).To(Equal("go\ngo")) + Expect(line.End).To(Equal(gg.P(int64(45570)))) + Expect(line.Cue).To(HaveLen(2)) + Expect(line.Cue[0]).To(Equal(model.Cue{Start: gg.P(int64(43444)), End: gg.P(int64(43716)), Value: "go", ByteStart: 0, ByteEnd: 1})) + Expect(line.Cue[1]).To(Equal(model.Cue{Start: gg.P(int64(43716)), End: gg.P(int64(43887)), Value: "go", ByteStart: 3, ByteEnd: 4})) + }) + }) + + Describe("Unsynced fallback", func() { + It("should return unsynced lyrics when no timing is present", func() { + content := []byte(` + + +
+

No timing here

+
+ +
`) + + list, err := parseTTML(content) + Expect(err).ToNot(HaveOccurred()) + Expect(list).To(HaveLen(1)) + Expect(list[0].Lang).To(Equal("xxx")) + Expect(list[0].Synced).To(BeFalse()) + Expect(list[0].Line).To(HaveLen(1)) + Expect(list[0].Line[0].Start).To(BeNil()) + Expect(list[0].Line[0].Value).To(Equal("No timing here")) + }) + }) + + Describe("Metadata tracks", func() { + It("should produce main, translation, and pronunciation tracks from iTunesMetadata", func() { + content := []byte(` + + + + + + + Hola + Skip me + + + + + konni + + + + + + +
+

こんにちは

+

こんばんは

+
+ +
`) + + list, err := parseTTML(content) + Expect(err).ToNot(HaveOccurred()) + Expect(list).To(HaveLen(3)) + + By("checking the main track") + main := list[0] + Expect(main.Kind).To(Equal("main")) + Expect(main.Lang).To(Equal("ja")) + Expect(main.Line).To(HaveLen(2)) + + By("checking the translation track") + translation := list[1] + Expect(translation.Kind).To(Equal("translation")) + Expect(translation.Lang).To(Equal("es")) + Expect(translation.Line).To(HaveLen(1)) + Expect(translation.Line[0].Start).To(Equal(gg.P(int64(1000)))) + Expect(translation.Line[0].Value).To(Equal("Hola")) + Expect(translation.Line[0].End).To(Equal(gg.P(int64(1500)))) + + By("checking the pronunciation track") + pronunciation := list[2] + Expect(pronunciation.Kind).To(Equal("pronunciation")) + Expect(pronunciation.Lang).To(Equal("ja-latn")) + Expect(pronunciation.Line).To(HaveLen(1)) + Expect(pronunciation.Line[0].Start).To(Equal(gg.P(int64(2000)))) + Expect(pronunciation.Line[0].Value).To(Equal("konni")) + Expect(pronunciation.Line[0].End).To(Equal(gg.P(int64(2600)))) + Expect(pronunciation.Line[0].Cue).To(HaveLen(2)) + Expect(pronunciation.Line[0].Cue[0]).To(Equal(model.Cue{Start: gg.P(int64(2000)), End: gg.P(int64(2300)), Value: "ko", ByteStart: 0, ByteEnd: 1})) + Expect(pronunciation.Line[0].Cue[1]).To(Equal(model.Cue{Start: gg.P(int64(2300)), End: gg.P(int64(2600)), Value: "nni", ByteStart: 2, ByteEnd: 4})) + }) + }) + + Describe("Pronunciation with bare decimal end times", func() { + It("should correctly parse bare decimal times in transliteration spans", func() { + content := []byte(` + + + + + + + I woke up + + + + + + +
+

起きた

+
+ +
`) + + list, err := parseTTML(content) + Expect(err).ToNot(HaveOccurred()) + + var pronunciation *model.Lyrics + for i := range list { + if list[i].Kind == "pronunciation" { + pronunciation = &list[i] + break + } + } + Expect(pronunciation).ToNot(BeNil()) + Expect(pronunciation.Line).To(HaveLen(1)) + + line := pronunciation.Line[0] + Expect(line.Start).To(Equal(gg.P(int64(2747)))) + Expect(line.Value).To(Equal("I woke up")) + Expect(line.Cue).To(HaveLen(3)) + Expect(line.Cue[0]).To(Equal(model.Cue{Start: gg.P(int64(2747)), End: gg.P(int64(3018)), Value: "I", ByteStart: 0, ByteEnd: 0})) + Expect(line.Cue[1]).To(Equal(model.Cue{Start: gg.P(int64(3018)), End: gg.P(int64(3179)), Value: "woke", ByteStart: 2, ByteEnd: 5})) + Expect(line.Cue[2]).To(Equal(model.Cue{Start: gg.P(int64(3179)), End: gg.P(int64(3582)), Value: "up", ByteStart: 7, ByteEnd: 8})) + }) + }) +}) diff --git a/model/lyrics.go b/model/lyrics.go index f75f3b11b..9a57ebaad 100644 --- a/model/lyrics.go +++ b/model/lyrics.go @@ -6,23 +6,43 @@ import ( "slices" "strconv" "strings" + "unicode" "github.com/navidrome/navidrome/log" "github.com/navidrome/navidrome/utils/str" ) +type Cue struct { + Start *int64 `structs:"start,omitempty" json:"start,omitempty"` + End *int64 `structs:"end,omitempty" json:"end,omitempty"` + Value string `structs:"value" json:"value"` + ByteStart int `structs:"byteStart" json:"byteStart"` + ByteEnd int `structs:"byteEnd" json:"byteEnd"` + AgentID string `structs:"agentId,omitempty" json:"agentId,omitempty"` +} + +type Agent struct { + ID string `structs:"id" json:"id"` + Role string `structs:"role" json:"role"` + Name string `structs:"name,omitempty" json:"name,omitempty"` +} + type Line struct { Start *int64 `structs:"start,omitempty" json:"start,omitempty"` + End *int64 `structs:"end,omitempty" json:"end,omitempty"` Value string `structs:"value" json:"value"` + Cue []Cue `structs:"cue,omitempty" json:"cue,omitempty"` } type Lyrics struct { - DisplayArtist string `structs:"displayArtist,omitempty" json:"displayArtist,omitempty"` - DisplayTitle string `structs:"displayTitle,omitempty" json:"displayTitle,omitempty"` - Lang string `structs:"lang" json:"lang"` - Line []Line `structs:"line" json:"line"` - Offset *int64 `structs:"offset,omitempty" json:"offset,omitempty"` - Synced bool `structs:"synced" json:"synced"` + DisplayArtist string `structs:"displayArtist,omitempty" json:"displayArtist,omitempty"` + DisplayTitle string `structs:"displayTitle,omitempty" json:"displayTitle,omitempty"` + Kind string `structs:"kind,omitempty" json:"kind,omitempty"` + Lang string `structs:"lang" json:"lang"` + Agents []Agent `structs:"agents,omitempty" json:"agents,omitempty"` + Line []Line `structs:"line" json:"line"` + Offset *int64 `structs:"offset,omitempty" json:"offset,omitempty"` + Synced bool `structs:"synced" json:"synced"` } // support the standard [mm:ss.mm], as well as [hh:*] and [*.mmm] @@ -33,6 +53,10 @@ var ( syncRegex = regexp.MustCompile(`(^|\n)\s*` + timeRegexString) timeRegex = regexp.MustCompile(timeRegexString) lrcIdRegex = regexp.MustCompile(`\[(ar|ti|offset|lang):([^]]+)]`) + + // Enhanced LRC: inline word-level timing markers like <00:12.34> + enhancedLRCTimeString = `<([0-9]{1,2}:)?([0-9]{1,2}):([0-9]{1,2})(.[0-9]{1,3})?>` + enhancedLRCRegex = regexp.MustCompile(enhancedLRCTimeString) ) func (l Lyrics) IsEmpty() bool { @@ -106,9 +130,11 @@ func ToLyrics(language, text string) (*Lyrics, error) { if validLine { for idx := range timestamps { + value, cues := parseEnhancedLine(priorLine) structuredLines = append(structuredLines, Line{ Start: ×tamps[idx], - Value: strings.TrimSpace(priorLine), + Value: value, + Cue: cues, }) } timestamps = nil @@ -154,9 +180,11 @@ func ToLyrics(language, text string) (*Lyrics, error) { if validLine { for idx := range timestamps { + value, cues := parseEnhancedLine(priorLine) structuredLines = append(structuredLines, Line{ Start: ×tamps[idx], - Value: strings.TrimSpace(priorLine), + Value: value, + Cue: cues, }) } } @@ -173,13 +201,118 @@ func ToLyrics(language, text string) (*Lyrics, error) { DisplayArtist: artist, DisplayTitle: title, Lang: language, - Line: structuredLines, + Line: NormalizeCueLines(structuredLines), Offset: offset, Synced: synced, } return &lyrics, nil } +// parseEnhancedLine extracts word-level timing cues from Enhanced LRC inline markers +// and computes UTF-8 byte offsets against the final stripped line value. +func parseEnhancedLine(text string) (string, []Cue) { + matches := enhancedLRCRegex.FindAllStringSubmatchIndex(text, -1) + if len(matches) == 0 { + return strings.TrimSpace(text), nil + } + + type segment struct { + start int64 + rawStart int + rawEnd int + } + + segments := make([]segment, 0, len(matches)) + var rawValue strings.Builder + for i, match := range matches { + timeMs, err := parseTime( + // Rewrite <...> as [...] so parseTime can handle it with the same logic + "["+text[match[0]+1:match[1]-1]+"]", + // Adjust match indices to point into our rewritten string (need start/end pairs for each group) + []int{ + 0, match[1] - match[0], + adjustGroup(match, 2), adjustGroup(match, 3), + adjustGroup(match, 4), adjustGroup(match, 5), + adjustGroup(match, 6), adjustGroup(match, 7), + adjustGroup(match, 8), adjustGroup(match, 9), + }, + ) + if err != nil { + continue + } + + // Text runs from after this marker to the start of the next marker (or end of string) + textStart := match[1] + var textEnd int + if i+1 < len(matches) { + textEnd = matches[i+1][0] + } else { + textEnd = len(text) + } + + word := text[textStart:textEnd] + if word == "" { + continue + } + + rawStart := rawValue.Len() + rawValue.WriteString(word) + segments = append(segments, segment{ + start: timeMs, + rawStart: rawStart, + rawEnd: rawValue.Len(), + }) + } + + if len(segments) == 0 { + return strings.TrimSpace(stripEnhancedMarkers(text)), nil + } + + finalRaw := rawValue.String() + leftTrimBytes := len(finalRaw) - len(strings.TrimLeftFunc(finalRaw, unicode.IsSpace)) + rightTrimBytes := len(finalRaw) - len(strings.TrimRightFunc(finalRaw, unicode.IsSpace)) + trimmedEnd := len(finalRaw) - rightTrimBytes + if trimmedEnd < leftTrimBytes { + trimmedEnd = leftTrimBytes + } + + cues := make([]Cue, 0, len(segments)) + for _, seg := range segments { + start := seg.start + byteStart := max(seg.rawStart, leftTrimBytes) + byteEnd := min(seg.rawEnd, trimmedEnd) + if byteStart >= byteEnd { + continue + } + + cues = append(cues, Cue{ + Start: &start, + Value: finalRaw[byteStart:byteEnd], + ByteStart: byteStart - leftTrimBytes, + ByteEnd: byteEnd - leftTrimBytes - 1, + }) + } + + return strings.TrimSpace(finalRaw), cues +} + +// adjustGroup remaps a capture group index from the original match to our rewritten "[...]" string. +// The rewrite shifts by -1 (removed '<', added '[') so positions within the brackets stay the same. +func adjustGroup(match []int, groupIdx int) int { + orig := match[groupIdx] + if orig == -1 { + return -1 + } + // Offset is: original position minus the position of '<' in the original, plus 1 for '[' + return orig - match[0] +} + +// stripEnhancedMarkers removes all inline markers from text, +// returning the plain lyric text. +func stripEnhancedMarkers(text string) string { + return enhancedLRCRegex.ReplaceAllString(text, "") +} + func parseTime(line string, match []int) (int64, error) { var hours, millis int64 var err error @@ -227,3 +360,115 @@ func parseTime(line string, match []int) (int64, error) { } type LyricList []Lyrics + +func NormalizeLyrics(lyrics Lyrics) Lyrics { + lyrics.Line = NormalizeCueLines(lyrics.Line) + if len(lyrics.Agents) == 0 { + lyrics.Agents = nil + } + return lyrics +} + +func NormalizeCueLines(lines []Line) []Line { + if len(lines) == 0 { + return lines + } + + normalized := make([]Line, len(lines)) + copy(normalized, lines) + + for i := range normalized { + var fallbackEnd *int64 + if normalized[i].End != nil { + v := *normalized[i].End + fallbackEnd = &v + } else if i+1 < len(normalized) && normalized[i+1].Start != nil { + v := *normalized[i+1].Start + fallbackEnd = &v + } + + normalized[i] = normalizeCueLine(normalized[i], fallbackEnd) + } + + return normalized +} + +func NormalizeLineTiming(line Line) Line { + if len(line.Cue) == 0 { + return line + } + + var earliestStart *int64 + var latestEnd *int64 + for i := range line.Cue { + token := line.Cue[i] + if token.Start != nil { + if earliestStart == nil || *token.Start < *earliestStart { + v := *token.Start + earliestStart = &v + } + } + + candidateEnd := token.End + if candidateEnd == nil { + candidateEnd = token.Start + } + if candidateEnd != nil { + if latestEnd == nil || *candidateEnd > *latestEnd { + v := *candidateEnd + latestEnd = &v + } + } + } + + if line.Start == nil && earliestStart != nil { + v := *earliestStart + line.Start = &v + } + if line.End == nil && latestEnd != nil { + v := *latestEnd + line.End = &v + } + return line +} + +func normalizeCueLine(line Line, fallbackEnd *int64) Line { + if len(line.Cue) == 0 { + return line + } + + for i := range line.Cue { + if line.Cue[i].End != nil { + continue + } + + if i+1 < len(line.Cue) && line.Cue[i+1].Start != nil { + v := *line.Cue[i+1].Start + line.Cue[i].End = &v + continue + } + + if fallbackEnd != nil { + v := *fallbackEnd + line.Cue[i].End = &v + } + } + + for i := range line.Cue { + if line.Cue[i].End == nil { + line.Cue = clearCueEnds(line.Cue) + return NormalizeLineTiming(line) + } + } + + return NormalizeLineTiming(line) +} + +func clearCueEnds(cues []Cue) []Cue { + normalized := make([]Cue, len(cues)) + copy(normalized, cues) + for i := range normalized { + normalized[i].End = nil + } + return normalized +} diff --git a/model/lyrics_test.go b/model/lyrics_test.go index 382976872..1fa82f258 100644 --- a/model/lyrics_test.go +++ b/model/lyrics_test.go @@ -116,4 +116,85 @@ var _ = Describe("ToLyrics", func() { {Start: &e, Value: "Test"}, })) }) + + It("should parse Enhanced LRC with word-level timing", func() { + lyrics, err := ToLyrics("xxx", "[00:01.00]<00:01.00>Some <00:01.50>lyrics <00:02.00>here\n[00:03.00]<00:03.00>More <00:03.50>words") + Expect(err).ToNot(HaveOccurred()) + Expect(lyrics.Synced).To(BeTrue()) + Expect(lyrics.Line).To(HaveLen(2)) + + t1000, t1500, t2000, t3000, t3500 := int64(1000), int64(1500), int64(2000), int64(3000), int64(3500) + + line0 := lyrics.Line[0] + Expect(line0.Start).To(Equal(&t1000)) + Expect(line0.End).To(Equal(&t3000)) + Expect(line0.Value).To(Equal("Some lyrics here")) + Expect(line0.Cue).To(Equal([]Cue{ + {Start: &t1000, End: &t1500, Value: "Some ", ByteStart: 0, ByteEnd: 4}, + {Start: &t1500, End: &t2000, Value: "lyrics ", ByteStart: 5, ByteEnd: 11}, + {Start: &t2000, End: &t3000, Value: "here", ByteStart: 12, ByteEnd: 15}, + })) + + line1 := lyrics.Line[1] + Expect(line1.Start).To(Equal(&t3000)) + Expect(line1.End).To(Equal(&t3500)) + Expect(line1.Value).To(Equal("More words")) + Expect(line1.Cue).To(Equal([]Cue{ + {Start: &t3000, Value: "More ", ByteStart: 0, ByteEnd: 4}, + {Start: &t3500, Value: "words", ByteStart: 5, ByteEnd: 9}, + })) + + Expect(line1.Cue[1].End).To(BeNil()) + }) + + It("should ignore Enhanced LRC markers and return plain lines when no markers present", func() { + a, b := int64(1000), int64(3000) + lyrics, err := ToLyrics("xxx", "[00:01.00]Plain line\n[00:03.00]Another plain line") + Expect(err).ToNot(HaveOccurred()) + Expect(lyrics.Line).To(Equal([]Line{ + {Start: &a, Value: "Plain line"}, + {Start: &b, Value: "Another plain line"}, + })) + }) + + It("should handle mixed Enhanced and plain LRC lines", func() { + lyrics, err := ToLyrics("xxx", "[00:01.00]<00:01.00>Some <00:01.50>lyrics\n[00:03.00]Plain line\n[00:05.00]<00:05.00>More <00:05.50>words") + Expect(err).ToNot(HaveOccurred()) + Expect(lyrics.Line).To(HaveLen(3)) + + t1000, t1500, t5000, t5500 := int64(1000), int64(1500), int64(5000), int64(5500) + t3000 := int64(3000) + + Expect(lyrics.Line[0].Cue).To(Equal([]Cue{ + {Start: &t1000, End: &t1500, Value: "Some ", ByteStart: 0, ByteEnd: 4}, + {Start: &t1500, End: &t3000, Value: "lyrics", ByteStart: 5, ByteEnd: 10}, + })) + Expect(lyrics.Line[0].Value).To(Equal("Some lyrics")) + Expect(lyrics.Line[0].End).To(Equal(&t3000)) + + Expect(lyrics.Line[1].Cue).To(BeNil()) + Expect(lyrics.Line[1].Value).To(Equal("Plain line")) + + Expect(lyrics.Line[2].Cue).To(Equal([]Cue{ + {Start: &t5000, Value: "More ", ByteStart: 0, ByteEnd: 4}, + {Start: &t5500, Value: "words", ByteStart: 5, ByteEnd: 9}, + })) + Expect(lyrics.Line[2].Value).To(Equal("More words")) + }) + + It("should preserve byte offsets for Enhanced LRC cues", func() { + lyrics, err := ToLyrics("xxx", "[00:00.00]<00:00.00>Oh <00:00.90>love<00:01.30> me <00:01.60>tonight") + Expect(err).ToNot(HaveOccurred()) + Expect(lyrics.Line).To(HaveLen(1)) + + t0, t900, t1300, t1600 := int64(0), int64(900), int64(1300), int64(1600) + line := lyrics.Line[0] + Expect(line.Value).To(Equal("Oh love me tonight")) + Expect(line.Cue).To(Equal([]Cue{ + {Start: &t0, Value: "Oh ", ByteStart: 0, ByteEnd: 2}, + {Start: &t900, Value: "love", ByteStart: 3, ByteEnd: 6}, + {Start: &t1300, Value: " me ", ByteStart: 7, ByteEnd: 10}, + {Start: &t1600, Value: "tonight", ByteStart: 11, ByteEnd: 17}, + })) + }) }) diff --git a/server/subsonic/helpers.go b/server/subsonic/helpers.go index 74d57ade4..6a14aa4aa 100644 --- a/server/subsonic/helpers.go +++ b/server/subsonic/helpers.go @@ -493,14 +493,79 @@ func mapExplicitStatus(explicitStatus string) string { return "" } -func buildStructuredLyric(mf *model.MediaFile, lyrics model.Lyrics) responses.StructuredLyric { +func buildStructuredLyric(mf *model.MediaFile, lyrics model.Lyrics, enhanced bool) responses.StructuredLyric { lines := make([]responses.Line, len(lyrics.Line)) + var cueLines []responses.CueLine + agentOrderByID := make(map[string]int, len(lyrics.Agents)) + agentRoleByID := make(map[string]string, len(lyrics.Agents)) + responseAgents := make([]responses.Agent, 0, len(lyrics.Agents)) + + for i, agent := range lyrics.Agents { + agentOrderByID[agent.ID] = i + agentRoleByID[agent.ID] = agent.Role + responseAgents = append(responseAgents, responses.Agent{ + ID: agent.ID, + Role: agent.Role, + Name: agent.Name, + }) + } for i, line := range lyrics.Line { lines[i] = responses.Line{ Start: line.Start, Value: line.Value, } + if !enhanced || len(line.Cue) == 0 { + continue + } + + agentOrder := make([]string, 0, 2) + cuesByAgent := make(map[string][]model.Cue) + for _, cue := range line.Cue { + if cue.Start == nil { + continue + } + agentID := strings.TrimSpace(cue.AgentID) + if _, exists := cuesByAgent[agentID]; !exists { + agentOrder = append(agentOrder, agentID) + } + cuesByAgent[agentID] = append(cuesByAgent[agentID], cue) + } + + sort.SliceStable(agentOrder, func(i, j int) bool { + leftRole := agentRoleByID[agentOrder[i]] + rightRole := agentRoleByID[agentOrder[j]] + if leftRole == "main" && rightRole != "main" { + return true + } + if rightRole == "main" && leftRole != "main" { + return false + } + + leftOrder, leftOK := agentOrderByID[agentOrder[i]] + rightOrder, rightOK := agentOrderByID[agentOrder[j]] + if leftOK && rightOK && leftOrder != rightOrder { + return leftOrder < rightOrder + } + if leftOK != rightOK { + return leftOK + } + return i < j + }) + + for _, agentID := range agentOrder { + cueLine := responses.CueLine{ + Index: int32(i), + Start: line.Start, + End: line.End, + Value: line.Value, + Cue: buildLyricCues(cuesByAgent[agentID], line.End), + } + if agentID != "" { + cueLine.AgentID = agentID + } + cueLines = append(cueLines, cueLine) + } } structured := responses.StructuredLyric{ @@ -508,10 +573,22 @@ func buildStructuredLyric(mf *model.MediaFile, lyrics model.Lyrics) responses.St DisplayTitle: lyrics.DisplayTitle, Lang: lyrics.Lang, Line: lines, + CueLine: cueLines, Offset: lyrics.Offset, Synced: lyrics.Synced, } + if enhanced { + kind := strings.TrimSpace(lyrics.Kind) + if kind == "" { + kind = "main" + } + structured.Kind = kind + if len(cueLines) > 0 && len(responseAgents) > 0 { + structured.Agents = responseAgents + } + } + if structured.DisplayArtist == "" { structured.DisplayArtist = mf.Artist } @@ -522,11 +599,86 @@ func buildStructuredLyric(mf *model.MediaFile, lyrics model.Lyrics) responses.St return structured } -func buildLyricsList(mf *model.MediaFile, lyricsList model.LyricList) *responses.LyricsList { - lyricList := make(responses.StructuredLyrics, len(lyricsList)) +func buildLyricCues(cues []model.Cue, lineEnd *int64) []responses.LyricCue { + if len(cues) == 0 { + return nil + } - for i, lyrics := range lyricsList { - lyricList[i] = buildStructuredLyric(mf, lyrics) + hasAnyEnd := false + for i := range cues { + if cues[i].End != nil { + hasAnyEnd = true + break + } + } + + normalized := make([]responses.LyricCue, 0, len(cues)) + for i := range cues { + if cues[i].Start == nil { + continue + } + + cue := responses.LyricCue{ + Start: *cues[i].Start, + Value: cues[i].Value, + ByteStart: cues[i].ByteStart, + ByteEnd: cues[i].ByteEnd, + } + if hasAnyEnd { + end := cues[i].End + if end == nil { + if i+1 < len(cues) && cues[i+1].Start != nil { + v := *cues[i+1].Start + end = &v + } else if lineEnd != nil { + v := *lineEnd + end = &v + } + } + if end != nil && i+1 < len(cues) && cues[i+1].Start != nil && *end > *cues[i+1].Start { + v := *cues[i+1].Start + end = &v + } + if end != nil && *end < cue.Start { + v := cue.Start + end = &v + } + cue.End = end + } + normalized = append(normalized, cue) + } + + if hasAnyEnd { + for i := range normalized { + if normalized[i].End == nil { + for j := range normalized { + normalized[j].End = nil + } + break + } + } + } + + return normalized +} + +func buildLyricsList(mf *model.MediaFile, lyricsList model.LyricList, enhanced bool) *responses.LyricsList { + var filtered model.LyricList + if enhanced { + filtered = lyricsList + } else { + // Without enhanced, only return "main" kind entries + for _, l := range lyricsList { + kind := strings.TrimSpace(l.Kind) + if kind == "" || kind == "main" { + filtered = append(filtered, l) + } + } + } + + lyricList := make(responses.StructuredLyrics, len(filtered)) + for i, lyrics := range filtered { + lyricList[i] = buildStructuredLyric(mf, lyrics, enhanced) } res := &responses.LyricsList{ diff --git a/server/subsonic/media_retrieval.go b/server/subsonic/media_retrieval.go index 3faae1650..c3c6d98ea 100644 --- a/server/subsonic/media_retrieval.go +++ b/server/subsonic/media_retrieval.go @@ -10,6 +10,7 @@ import ( "github.com/navidrome/navidrome/conf" "github.com/navidrome/navidrome/consts" + lyricssvc "github.com/navidrome/navidrome/core/lyrics" "github.com/navidrome/navidrome/log" "github.com/navidrome/navidrome/model" "github.com/navidrome/navidrome/resources" @@ -19,6 +20,8 @@ import ( "github.com/navidrome/navidrome/utils/req" ) +const maxLegacyLyricsCandidates = 10 + func (api *Router) GetAvatar(w http.ResponseWriter, r *http.Request) (*responses.Subsonic, error) { if !conf.Server.EnableGravatar { return api.getPlaceHolderAvatar(w, r) @@ -98,7 +101,11 @@ func (api *Router) GetLyrics(r *http.Request) (*responses.Subsonic, error) { response := newResponse() lyricsResponse := responses.Lyrics{} response.Lyrics = &lyricsResponse - mediaFiles, err := api.ds.MediaFile(r.Context()).GetAll(filter.SongsByArtistTitleWithLyricsFirst(artist, title)) + opts := filter.SongsByArtistTitleWithLyricsFirst(artist, title) + // Search a bounded duplicate window so source-priority fallback can still + // reach older matches without turning legacy getLyrics into an unbounded scan. + opts.Max = maxLegacyLyricsCandidates + mediaFiles, err := api.ds.MediaFile(r.Context()).GetAll(opts) if err != nil { return nil, err @@ -108,9 +115,22 @@ func (api *Router) GetLyrics(r *http.Request) (*responses.Subsonic, error) { return response, nil } - structuredLyrics, err := api.lyrics.GetLyrics(r.Context(), &mediaFiles[0]) - if err != nil { - return nil, err + var structuredLyrics model.LyricList + if batchLyrics, ok := api.lyrics.(lyricssvc.BatchLyrics); ok { + structuredLyrics, err = batchLyrics.GetLyricsForMediaFiles(r.Context(), mediaFiles) + if err != nil { + return nil, err + } + } else { + for i := range mediaFiles { + structuredLyrics, err = api.lyrics.GetLyrics(r.Context(), &mediaFiles[i]) + if err != nil { + return nil, err + } + if len(structuredLyrics) > 0 { + break + } + } } if len(structuredLyrics) == 0 { @@ -124,7 +144,6 @@ func (api *Router) GetLyrics(r *http.Request) (*responses.Subsonic, error) { for _, line := range structuredLyrics[0].Line { lyricsText.WriteString(line.Value + "\n") } - lyricsResponse.Value = lyricsText.String() return response, nil @@ -146,8 +165,10 @@ func (api *Router) GetLyricsBySongId(r *http.Request) (*responses.Subsonic, erro return nil, err } + enhanced, _ := req.Params(r).Bool("enhanced") + response := newResponse() - response.LyricsList = buildLyricsList(mediaFile, structuredLyrics) + response.LyricsList = buildLyricsList(mediaFile, structuredLyrics, enhanced) return response, nil } diff --git a/server/subsonic/media_retrieval_test.go b/server/subsonic/media_retrieval_test.go index 589a609da..45a475da6 100644 --- a/server/subsonic/media_retrieval_test.go +++ b/server/subsonic/media_retrieval_test.go @@ -186,6 +186,41 @@ var _ = Describe("MediaRetrievalController", func() { Expect(response.Lyrics.Title).To(Equal("Never Gonna Give You Up")) Expect(response.Lyrics.Value).To(Equal("We're no strangers to love\nYou know the rules and so do I\n")) }) + + It("should prefer higher-priority sidecar lyrics across duplicate candidates", func() { + conf.Server.LyricsPriority = ".ttml,embedded" + r := newGetRequest("artist=Rick+Astley", "title=Never+Gonna+Give+You+Up") + baseTime := time.Date(2025, 1, 1, 0, 0, 0, 0, time.UTC) + embedded, err := model.ToLyrics("eng", "Newest duplicate embedded lyrics") + Expect(err).ToNot(HaveOccurred()) + embeddedJSON, err := json.Marshal(model.LyricList{*embedded}) + Expect(err).ToNot(HaveOccurred()) + mockRepo.SetData(model.MediaFiles{ + { + ID: "1", + Path: "tests/fixtures/01 Invisible (RED) Edit Version.mp3", + Artist: "Rick Astley", + Title: "Never Gonna Give You Up", + Lyrics: string(embeddedJSON), + UpdatedAt: baseTime.Add(2 * time.Hour), // Newer duplicate with embedded lyrics only + }, + { + ID: "2", + Path: "tests/fixtures/test.mp3", + Artist: "Rick Astley", + Title: "Never Gonna Give You Up", + Lyrics: "[]", + UpdatedAt: baseTime.Add(1 * time.Hour), // Older, but has TTML sidecar + }, + }) + + response, err := router.GetLyrics(r) + Expect(err).ToNot(HaveOccurred()) + Expect(response.Lyrics.Artist).To(Equal("Rick Astley")) + Expect(response.Lyrics.Title).To(Equal("Never Gonna Give You Up")) + Expect(response.Lyrics.Value).To(Equal("We're no strangers to love\nYou know the rules and so do I\n")) + Expect(mockRepo.Options.Max).To(Equal(maxLegacyLyricsCandidates)) + }) }) Describe("GetLyricsBySongId", func() { @@ -202,8 +237,10 @@ var _ = Describe("MediaRetrievalController", func() { Expect(realLyric.DisplayArtist).To(Equal(expectedLyric.DisplayArtist)) Expect(realLyric.DisplayTitle).To(Equal(expectedLyric.DisplayTitle)) + Expect(realLyric.Kind).To(Equal(expectedLyric.Kind)) Expect(realLyric.Lang).To(Equal(expectedLyric.Lang)) Expect(realLyric.Synced).To(Equal(expectedLyric.Synced)) + Expect(realLyric.Agents).To(Equal(expectedLyric.Agents)) if expectedLyric.Offset == nil { Expect(realLyric.Offset).To(BeNil()) @@ -222,6 +259,38 @@ var _ = Describe("MediaRetrievalController", func() { Expect(*realLine.Start).To(Equal(*expectedLine.Start)) } } + + Expect(realLyric.CueLine).To(HaveLen(len(expectedLyric.CueLine))) + for j, realCueLine := range realLyric.CueLine { + expectedCueLine := expectedLyric.CueLine[j] + Expect(realCueLine.Index).To(Equal(expectedCueLine.Index)) + Expect(realCueLine.Value).To(Equal(expectedCueLine.Value)) + Expect(realCueLine.AgentID).To(Equal(expectedCueLine.AgentID)) + if expectedCueLine.Start == nil { + Expect(realCueLine.Start).To(BeNil()) + } else { + Expect(*realCueLine.Start).To(Equal(*expectedCueLine.Start)) + } + if expectedCueLine.End == nil { + Expect(realCueLine.End).To(BeNil()) + } else { + Expect(*realCueLine.End).To(Equal(*expectedCueLine.End)) + } + + Expect(realCueLine.Cue).To(HaveLen(len(expectedCueLine.Cue))) + for k, realCue := range realCueLine.Cue { + expectedCue := expectedCueLine.Cue[k] + Expect(realCue.Value).To(Equal(expectedCue.Value)) + Expect(realCue.Start).To(Equal(expectedCue.Start)) + Expect(realCue.ByteStart).To(Equal(expectedCue.ByteStart)) + Expect(realCue.ByteEnd).To(Equal(expectedCue.ByteEnd)) + if expectedCue.End == nil { + Expect(realCue.End).To(BeNil()) + } else { + Expect(*realCue.End).To(Equal(*expectedCue.End)) + } + } + } } } @@ -323,6 +392,427 @@ var _ = Describe("MediaRetrievalController", func() { }, }) }) + + It("should return multilingual TTML sidecar lyrics", func() { + conf.Server.LyricsPriority = ".ttml,embedded" + r := newGetRequest("id=1") + + mockRepo.SetData(model.MediaFiles{ + { + ID: "1", + Path: "tests/fixtures/test.mp3", + Artist: "Rick Astley", + Title: "Never Gonna Give You Up", + Lyrics: "[]", + }, + }) + + response, err := router.GetLyricsBySongId(r) + Expect(err).ToNot(HaveOccurred()) + + porTime := int64(18800) + ttmlTime := int64(22800) + compareResponses(response.LyricsList, responses.LyricsList{ + StructuredLyrics: responses.StructuredLyrics{ + { + DisplayArtist: "Rick Astley", + DisplayTitle: "Never Gonna Give You Up", + Lang: "eng", + Synced: true, + Line: []responses.Line{ + { + Start: ×[0], + Value: "We're no strangers to love", + }, + { + Start: &ttmlTime, + Value: "You know the rules and so do I", + }, + }, + }, + { + DisplayArtist: "Rick Astley", + DisplayTitle: "Never Gonna Give You Up", + Lang: "por", + Synced: true, + Line: []responses.Line{ + { + Start: &porTime, + Value: "Nao somos estranhos ao amor", + }, + }, + }, + }, + }) + }) + + It("should return metadata-linked translation and pronunciation tracks from TTML", func() { + conf.Server.LyricsPriority = ".ttml,embedded" + r := newGetRequest("id=1&enhanced=true") + + mockRepo.SetData(model.MediaFiles{ + { + ID: "1", + Path: "tests/fixtures/test-metadata.mp3", + Artist: "Rick Astley", + Title: "Never Gonna Give You Up", + Lyrics: "[]", + }, + }) + + response, err := router.GetLyricsBySongId(r) + Expect(err).ToNot(HaveOccurred()) + + mainStartA := int64(1000) + mainStartB := int64(2000) + tokenStartA := int64(2000) + tokenEndA := int64(2300) + tokenStartB := int64(2300) + tokenEndB := int64(2600) + compareResponses(response.LyricsList, responses.LyricsList{ + StructuredLyrics: responses.StructuredLyrics{ + { + DisplayArtist: "Rick Astley", + DisplayTitle: "Never Gonna Give You Up", + Kind: "main", + Lang: "ja", + Synced: true, + Line: []responses.Line{ + { + Start: &mainStartA, + Value: "こんにちは", + }, + { + Start: &mainStartB, + Value: "こんばんは", + }, + }, + }, + { + DisplayArtist: "Rick Astley", + DisplayTitle: "Never Gonna Give You Up", + Kind: "translation", + Lang: "es", + Synced: true, + Line: []responses.Line{ + { + Start: &mainStartA, + Value: "Hola", + }, + }, + }, + { + DisplayArtist: "Rick Astley", + DisplayTitle: "Never Gonna Give You Up", + Kind: "pronunciation", + Lang: "ja-latn", + Synced: true, + Line: []responses.Line{ + { + Start: &mainStartB, + Value: "konni", + }, + }, + CueLine: []responses.CueLine{ + { + Index: 0, + Start: &mainStartB, + End: &tokenEndB, + Value: "konni", + Cue: []responses.LyricCue{ + { + Start: tokenStartA, + End: &tokenEndA, + ByteStart: 0, + ByteEnd: 1, + Value: "ko", + }, + { + Start: tokenStartB, + End: &tokenEndB, + ByteStart: 2, + ByteEnd: 4, + Value: "nni", + }, + }, + }, + }, + }, + }, + }) + }) + + It("should return cue lines for songLyrics v2 clients with enhanced=true", func() { + r := newGetRequest("id=1&enhanced=true") + + lineStart := int64(1000) + lineEnd := int64(3000) + tokenStartA := int64(1000) + tokenEndA := int64(1400) + tokenStartB := int64(2000) + tokenEndB := int64(2500) + lyricsJson, err := json.Marshal(model.LyricList{ + { + Lang: "eng", + Agents: []model.Agent{{ID: "lead", Role: "main"}, {ID: "__nd_bg__|lead", Role: "bg"}}, + Synced: true, + Line: []model.Line{ + { + Start: &lineStart, + End: &lineEnd, + Value: "Hello echo", + Cue: []model.Cue{ + { + Start: &tokenStartA, + End: &tokenEndA, + Value: "Hello", + ByteStart: 0, + ByteEnd: 4, + AgentID: "lead", + }, + { + Start: &tokenStartB, + End: &tokenEndB, + Value: "echo", + ByteStart: 6, + ByteEnd: 9, + AgentID: "__nd_bg__|lead", + }, + }, + }, + }, + }, + }) + Expect(err).ToNot(HaveOccurred()) + + mockRepo.SetData(model.MediaFiles{ + { + ID: "1", + Artist: "Rick Astley", + Title: "Never Gonna Give You Up", + Lyrics: string(lyricsJson), + }, + }) + + response, err := router.GetLyricsBySongId(r) + Expect(err).ToNot(HaveOccurred()) + compareResponses(response.LyricsList, responses.LyricsList{ + StructuredLyrics: responses.StructuredLyrics{ + { + DisplayArtist: "Rick Astley", + DisplayTitle: "Never Gonna Give You Up", + Kind: "main", + Lang: "eng", + Synced: true, + Agents: []responses.Agent{ + {ID: "lead", Role: "main"}, + {ID: "__nd_bg__|lead", Role: "bg"}, + }, + Line: []responses.Line{ + { + Start: &lineStart, + Value: "Hello echo", + }, + }, + CueLine: []responses.CueLine{ + { + Index: 0, + Start: &lineStart, + End: &lineEnd, + Value: "Hello echo", + AgentID: "lead", + Cue: []responses.LyricCue{ + { + Start: tokenStartA, + End: &tokenEndA, + ByteStart: 0, + ByteEnd: 4, + Value: "Hello", + }, + }, + }, + { + Index: 0, + Start: &lineStart, + End: &lineEnd, + Value: "Hello echo", + AgentID: "__nd_bg__|lead", + Cue: []responses.LyricCue{ + { + Start: tokenStartB, + End: &tokenEndB, + ByteStart: 6, + ByteEnd: 9, + Value: "echo", + }, + }, + }, + }, + }, + }, + }) + }) + + It("should keep enhanced line-level lyrics when no cue data is available", func() { + r := newGetRequest("id=1&enhanced=true") + + lineStart := int64(1000) + lineEnd := int64(3000) + lyricsJSON, err := json.Marshal(model.LyricList{ + { + Kind: "main", + Lang: "eng", + Synced: true, + Line: []model.Line{ + { + Start: &lineStart, + End: &lineEnd, + Value: "Line without word timing", + }, + }, + }, + }) + Expect(err).ToNot(HaveOccurred()) + + mockRepo.SetData(model.MediaFiles{ + { + ID: "1", + Artist: "Rick Astley", + Title: "Never Gonna Give You Up", + Lyrics: string(lyricsJSON), + }, + }) + + response, err := router.GetLyricsBySongId(r) + Expect(err).ToNot(HaveOccurred()) + compareResponses(response.LyricsList, responses.LyricsList{ + StructuredLyrics: responses.StructuredLyrics{ + { + DisplayArtist: "Rick Astley", + DisplayTitle: "Never Gonna Give You Up", + Kind: "main", + Lang: "eng", + Synced: true, + Line: []responses.Line{ + { + Start: &lineStart, + Value: "Line without word timing", + }, + }, + }, + }, + }) + }) + + It("should return required cue byte offsets for ambiguous and multibyte cue lines", func() { + r := newGetRequest("id=1&enhanced=true") + + asciiLineStart := int64(0) + asciiLineEnd := int64(2400) + asciiCueStartA := int64(0) + asciiCueEndA := int64(300) + asciiCueStartB := int64(900) + asciiCueEndB := int64(1300) + asciiCueStartC := int64(1300) + asciiCueEndC := int64(1600) + asciiCueStartD := int64(1600) + + utfLineStart := int64(2747) + utfLineEnd := int64(6214) + utfCueStartA := int64(2747) + utfCueEndA := int64(3018) + utfCueStartB := int64(3018) + utfCueEndB := int64(3179) + utfCueStartC := int64(3582) + utfCueEndC := int64(4100) + utfCueStartD := int64(4500) + utfCueEndD := int64(6214) + + lyricsJSON, err := json.Marshal(model.LyricList{ + { + Lang: "eng", + Synced: true, + Line: []model.Line{ + { + Start: &asciiLineStart, + End: &asciiLineEnd, + Value: "Oh love love me tonight", + Cue: []model.Cue{ + {Start: &asciiCueStartA, End: &asciiCueEndA, Value: "Oh", ByteStart: 0, ByteEnd: 1}, + {Start: &asciiCueStartB, End: &asciiCueEndB, Value: "love", ByteStart: 8, ByteEnd: 11}, + {Start: &asciiCueStartC, End: &asciiCueEndC, Value: "me", ByteStart: 13, ByteEnd: 14}, + {Start: &asciiCueStartD, Value: "tonight", ByteStart: 16, ByteEnd: 22}, + }, + }, + { + Start: &utfLineStart, + End: &utfLineEnd, + Value: "눈을 뜬 순간", + Cue: []model.Cue{ + {Start: &utfCueStartA, End: &utfCueEndA, Value: "눈", ByteStart: 0, ByteEnd: 2}, + {Start: &utfCueStartB, End: &utfCueEndB, Value: "을", ByteStart: 3, ByteEnd: 5}, + {Start: &utfCueStartC, End: &utfCueEndC, Value: "뜬", ByteStart: 7, ByteEnd: 9}, + {Start: &utfCueStartD, End: &utfCueEndD, Value: "순간", ByteStart: 11, ByteEnd: 16}, + }, + }, + }, + }, + }) + Expect(err).ToNot(HaveOccurred()) + + mockRepo.SetData(model.MediaFiles{ + { + ID: "1", + Artist: "Rick Astley", + Title: "Never Gonna Give You Up", + Lyrics: string(lyricsJSON), + }, + }) + + response, err := router.GetLyricsBySongId(r) + Expect(err).ToNot(HaveOccurred()) + compareResponses(response.LyricsList, responses.LyricsList{ + StructuredLyrics: responses.StructuredLyrics{ + { + DisplayArtist: "Rick Astley", + DisplayTitle: "Never Gonna Give You Up", + Kind: "main", + Lang: "eng", + Synced: true, + Line: []responses.Line{ + {Start: &asciiLineStart, Value: "Oh love love me tonight"}, + {Start: &utfLineStart, Value: "눈을 뜬 순간"}, + }, + CueLine: []responses.CueLine{ + { + Index: 0, + Start: &asciiLineStart, + End: &asciiLineEnd, + Value: "Oh love love me tonight", + Cue: []responses.LyricCue{ + {Start: asciiCueStartA, End: &asciiCueEndA, Value: "Oh", ByteStart: 0, ByteEnd: 1}, + {Start: asciiCueStartB, End: &asciiCueEndB, Value: "love", ByteStart: 8, ByteEnd: 11}, + {Start: asciiCueStartC, End: &asciiCueEndC, Value: "me", ByteStart: 13, ByteEnd: 14}, + {Start: asciiCueStartD, End: &asciiLineEnd, Value: "tonight", ByteStart: 16, ByteEnd: 22}, + }, + }, + { + Index: 1, + Start: &utfLineStart, + End: &utfLineEnd, + Value: "눈을 뜬 순간", + Cue: []responses.LyricCue{ + {Start: utfCueStartA, End: &utfCueEndA, Value: "눈", ByteStart: 0, ByteEnd: 2}, + {Start: utfCueStartB, End: &utfCueEndB, Value: "을", ByteStart: 3, ByteEnd: 5}, + {Start: utfCueStartC, End: &utfCueEndC, Value: "뜬", ByteStart: 7, ByteEnd: 9}, + {Start: utfCueStartD, End: &utfCueEndD, Value: "순간", ByteStart: 11, ByteEnd: 16}, + }, + }, + }, + }, + }, + }) + }) }) }) diff --git a/server/subsonic/opensubsonic.go b/server/subsonic/opensubsonic.go index 6c54d36d0..16bd6805f 100644 --- a/server/subsonic/opensubsonic.go +++ b/server/subsonic/opensubsonic.go @@ -11,7 +11,7 @@ func (api *Router) GetOpenSubsonicExtensions(_ *http.Request) (*responses.Subson extensions := responses.OpenSubsonicExtensions{ {Name: "transcodeOffset", Versions: []int32{1}}, {Name: "formPost", Versions: []int32{1}}, - {Name: "songLyrics", Versions: []int32{1}}, + {Name: "songLyrics", Versions: []int32{1, 2}}, {Name: "indexBasedQueue", Versions: []int32{1}}, {Name: "transcoding", Versions: []int32{1}}, } diff --git a/server/subsonic/opensubsonic_test.go b/server/subsonic/opensubsonic_test.go index d98599f8f..5031f1c69 100644 --- a/server/subsonic/opensubsonic_test.go +++ b/server/subsonic/opensubsonic_test.go @@ -58,7 +58,7 @@ var _ = Describe("GetOpenSubsonicExtensions", func() { HaveLen(5), ContainElement(responses.OpenSubsonicExtension{Name: "transcodeOffset", Versions: []int32{1}}), ContainElement(responses.OpenSubsonicExtension{Name: "formPost", Versions: []int32{1}}), - ContainElement(responses.OpenSubsonicExtension{Name: "songLyrics", Versions: []int32{1}}), + ContainElement(responses.OpenSubsonicExtension{Name: "songLyrics", Versions: []int32{1, 2}}), ContainElement(responses.OpenSubsonicExtension{Name: "indexBasedQueue", Versions: []int32{1}}), ContainElement(responses.OpenSubsonicExtension{Name: "transcoding", Versions: []int32{1}}), )) @@ -87,7 +87,7 @@ var _ = Describe("GetOpenSubsonicExtensions", func() { HaveLen(6), ContainElement(responses.OpenSubsonicExtension{Name: "transcodeOffset", Versions: []int32{1}}), ContainElement(responses.OpenSubsonicExtension{Name: "formPost", Versions: []int32{1}}), - ContainElement(responses.OpenSubsonicExtension{Name: "songLyrics", Versions: []int32{1}}), + ContainElement(responses.OpenSubsonicExtension{Name: "songLyrics", Versions: []int32{1, 2}}), ContainElement(responses.OpenSubsonicExtension{Name: "indexBasedQueue", Versions: []int32{1}}), ContainElement(responses.OpenSubsonicExtension{Name: "transcoding", Versions: []int32{1}}), ContainElement(responses.OpenSubsonicExtension{Name: "sonicSimilarity", Versions: []int32{1}}), diff --git a/server/subsonic/responses/responses.go b/server/subsonic/responses/responses.go index c2e863b0f..8d3279d87 100644 --- a/server/subsonic/responses/responses.go +++ b/server/subsonic/responses/responses.go @@ -543,13 +543,39 @@ type Line struct { Value string `xml:",chardata" json:"value"` } +type LyricCue struct { + Start int64 `xml:"start,attr" json:"start"` + End *int64 `xml:"end,attr,omitempty" json:"end,omitempty"` + ByteStart int `xml:"byteStart,attr" json:"byteStart"` + ByteEnd int `xml:"byteEnd,attr" json:"byteEnd"` + Value string `xml:",chardata" json:"value"` +} + +type Agent struct { + ID string `xml:"id,attr" json:"id"` + Role string `xml:"role,attr" json:"role"` + Name string `xml:"name,attr,omitempty" json:"name,omitempty"` +} + +type CueLine struct { + Index int32 `xml:"index,attr" json:"index"` + Start *int64 `xml:"start,attr,omitempty" json:"start,omitempty"` + End *int64 `xml:"end,attr,omitempty" json:"end,omitempty"` + Value string `xml:"value,attr" json:"value"` + AgentID string `xml:"agentId,attr,omitempty" json:"agentId,omitempty"` + Cue []LyricCue `xml:"cue,omitempty" json:"cue,omitempty"` +} + type StructuredLyric struct { - DisplayArtist string `xml:"displayArtist,attr,omitempty" json:"displayArtist,omitempty"` - DisplayTitle string `xml:"displayTitle,attr,omitempty" json:"displayTitle,omitempty"` - Lang string `xml:"lang,attr" json:"lang"` - Line []Line `xml:"line" json:"line"` - Offset *int64 `xml:"offset,attr,omitempty" json:"offset,omitempty"` - Synced bool `xml:"synced,attr" json:"synced"` + DisplayArtist string `xml:"displayArtist,attr,omitempty" json:"displayArtist,omitempty"` + DisplayTitle string `xml:"displayTitle,attr,omitempty" json:"displayTitle,omitempty"` + Kind string `xml:"kind,attr,omitempty" json:"kind,omitempty"` + Lang string `xml:"lang,attr" json:"lang"` + Line []Line `xml:"line" json:"line"` + Agents []Agent `xml:"agent,omitempty" json:"agents,omitempty"` + CueLine []CueLine `xml:"cueLine,omitempty" json:"cueLine,omitempty"` + Offset *int64 `xml:"offset,attr,omitempty" json:"offset,omitempty"` + Synced bool `xml:"synced,attr" json:"synced"` } type StructuredLyrics []StructuredLyric diff --git a/tests/fixtures/bom-test.ttml b/tests/fixtures/bom-test.ttml new file mode 100644 index 000000000..319ab1f07 --- /dev/null +++ b/tests/fixtures/bom-test.ttml @@ -0,0 +1,2 @@ + +

BOM test line

diff --git a/tests/fixtures/bom-utf16-test.ttml b/tests/fixtures/bom-utf16-test.ttml new file mode 100644 index 000000000..a5621ef5d Binary files /dev/null and b/tests/fixtures/bom-utf16-test.ttml differ diff --git a/tests/fixtures/test-enhanced.lrc b/tests/fixtures/test-enhanced.lrc new file mode 100644 index 000000000..8f7b60f8c --- /dev/null +++ b/tests/fixtures/test-enhanced.lrc @@ -0,0 +1,6 @@ +[ar:Test Artist] +[ti:Enhanced Test] +[lang:eng] +[00:01.00]<00:01.00>Some <00:01.50>lyrics <00:02.00>here +[00:03.00]<00:03.00>More <00:03.50>words +[00:05.00]Plain line without inline markers diff --git a/tests/fixtures/test-metadata.ttml b/tests/fixtures/test-metadata.ttml new file mode 100644 index 000000000..c0243c18f --- /dev/null +++ b/tests/fixtures/test-metadata.ttml @@ -0,0 +1,25 @@ + + + + + + + + Hola + + + + + konni + + + + + + +
+

こんにちは

+

こんばんは

+
+ +
diff --git a/tests/fixtures/test.elrc b/tests/fixtures/test.elrc new file mode 100644 index 000000000..01c3d2cdd --- /dev/null +++ b/tests/fixtures/test.elrc @@ -0,0 +1,5 @@ +[ar:ELRC Artist] +[ti:ELRC Song] +[lang:eng] +[00:01.00]<00:01.00>Lead <00:01.50>words +[00:03.00]Fallback line diff --git a/tests/fixtures/test.srt b/tests/fixtures/test.srt new file mode 100644 index 000000000..3c9c09a39 --- /dev/null +++ b/tests/fixtures/test.srt @@ -0,0 +1,7 @@ +1 +00:00:18,800 --> 00:00:22,800 +We're from subtitles + +2 +00:00:22,801 --> 00:00:26,000 +Another subtitle line diff --git a/tests/fixtures/test.ttml b/tests/fixtures/test.ttml new file mode 100644 index 000000000..a85673a1b --- /dev/null +++ b/tests/fixtures/test.ttml @@ -0,0 +1,12 @@ + + + +
+

We're no strangers to love

+

You know the rules and so do I

+
+
+

Nao somos estranhos ao amor

+
+ +
diff --git a/ui/src/actions/player.js b/ui/src/actions/player.js index 9056abeb6..f55102207 100644 --- a/ui/src/actions/player.js +++ b/ui/src/actions/player.js @@ -9,6 +9,7 @@ export const PLAYER_SET_VOLUME = 'PLAYER_SET_VOLUME' export const PLAYER_SET_MODE = 'PLAYER_SET_MODE' export const TRANSCODING_SET_PROFILE = 'TRANSCODING_SET_PROFILE' export const PLAYER_REFRESH_QUEUE = 'PLAYER_REFRESH_QUEUE' +export const PLAYER_UPDATE_LYRIC = 'PLAYER_UPDATE_LYRIC' export const setTrack = (data) => ({ type: PLAYER_SET_TRACK, @@ -114,3 +115,8 @@ export const refreshQueue = (resolvedUrls) => ({ type: PLAYER_REFRESH_QUEUE, data: resolvedUrls, }) + +export const updateQueueLyric = (trackId, lyric) => ({ + type: PLAYER_UPDATE_LYRIC, + data: { trackId, lyric }, +}) diff --git a/ui/src/audioplayer/KaraokeLyricsOverlay.jsx b/ui/src/audioplayer/KaraokeLyricsOverlay.jsx new file mode 100644 index 000000000..aefb0127e --- /dev/null +++ b/ui/src/audioplayer/KaraokeLyricsOverlay.jsx @@ -0,0 +1,1745 @@ +import IconButton from '@material-ui/core/IconButton' +import Popover from '@material-ui/core/Popover' +import Slider from '@material-ui/core/Slider' +import { makeStyles, useTheme } from '@material-ui/core/styles' +import Tooltip from '@material-ui/core/Tooltip' +import Typography from '@material-ui/core/Typography' +import CloseIcon from '@material-ui/icons/Close' +import RestoreIcon from '@material-ui/icons/Restore' +import TuneIcon from '@material-ui/icons/Tune' +import clsx from 'clsx' +import React, { + memo, + useCallback, + useEffect, + useMemo, + useRef, + useState, +} from 'react' +import { + buildHighlightedAuxLine, + buildHighlightedMainLine, + buildKaraokeLines, + getActiveKaraokeState, + hasUsableKaraokeTiming, + hasStructuredLyricContent, + resolveKaraokeTokenWindow, + resolveLayerLineForMain, + utf8ByteRangeToCodeUnitRange, +} from './lyrics' + +const KARAOKE_RENDER_LEAD_MS = 80 +const KARAOKE_CLOCK_DRIFT_RESET_MS = 140 +const KARAOKE_CLOCK_RESET_THRESHOLD_MS = 320 +const KARAOKE_MONOTONIC_JITTER_MS = 60 +const KARAOKE_RENDER_UPDATE_EPSILON_MS = 6 +const KARAOKE_WORD_SETTLE_MS = 96 +const KARAOKE_ANIMATION_MS = 150 +const KARAOKE_DEFAULT_HEIGHT_PX = 300 +const KARAOKE_MIN_HEIGHT_PX = 150 +const KARAOKE_MAX_HEIGHT_RATIO = 0.72 +const KARAOKE_MAX_HEIGHT_PX = 760 +const KARAOKE_CENTER_SPACER_RATIO = 0.5 +const KARAOKE_CENTER_SPACER_MIN_PX = 132 +const KARAOKE_DEFAULT_LINE_HEIGHT = 1.3 +const KARAOKE_MIN_LINE_HEIGHT = 1 +const KARAOKE_MAX_LINE_HEIGHT = 2.2 +const KARAOKE_LINE_HEIGHT_STEP = 0.02 +const KARAOKE_GROUP_SPACING_BASE_PX = 14 +const KARAOKE_AUX_LINE_HEIGHT = 1.2 +const KARAOKE_MAIN_INACTIVE_FONT_FACTOR = 0.8 +const KARAOKE_AUX_INACTIVE_FONT_FACTOR = 0.88 + +const TOKEN_DONE_ALPHA = 1 +const TOKEN_FUTURE_ALPHA = 0.34 +const TOKEN_ACTIVE_ALPHA = 1 +const TOKEN_WIPE_SOFT_SPREAD_PCT = 12 +const TOKEN_WIPE_EDGE_PCT = 8 + +const COLOR_PRESETS = [ + { key: 'white', label: 'White', value: 'rgba(255, 255, 255, 0.92)' }, + { key: 'black', label: 'Black', value: 'rgba(0, 0, 0, 0.87)' }, + { key: 'blue', label: 'Blue', value: 'rgba(120, 160, 220, 0.75)' }, + { key: 'green', label: 'Green', value: 'rgba(100, 200, 130, 0.7)' }, + { key: 'pink', label: 'Pink', value: 'rgba(240, 140, 170, 0.75)' }, + { key: 'purple', label: 'Purple', value: 'rgba(180, 140, 240, 0.75)' }, + { key: 'orange', label: 'Orange', value: 'rgba(240, 180, 100, 0.75)' }, + { key: 'cyan', label: 'Cyan', value: 'rgba(100, 210, 220, 0.75)' }, + { key: 'yellow', label: 'Yellow', value: 'rgba(240, 230, 110, 0.75)' }, +] + +const DEFAULT_LYRICS_SETTINGS = { + lineHeight: KARAOKE_DEFAULT_LINE_HEIGHT, + overlayHeight: KARAOKE_DEFAULT_HEIGHT_PX, + tr: { fontSize: 18, colorKey: 'blue' }, + main: { fontSize: 30, colorKey: 'white' }, + pr: { fontSize: 18, colorKey: 'green' }, +} + +const SETTINGS_STORAGE_KEY = 'karaoke-lyrics-settings' + +const createDefaultLyricsSettings = (isDark = true) => ({ + lineHeight: KARAOKE_DEFAULT_LINE_HEIGHT, + overlayHeight: KARAOKE_DEFAULT_HEIGHT_PX, + tr: { ...DEFAULT_LYRICS_SETTINGS.tr }, + main: { ...DEFAULT_LYRICS_SETTINGS.main, colorKey: isDark ? 'white' : 'black' }, + pr: { ...DEFAULT_LYRICS_SETTINGS.pr }, +}) + +const clampLineHeight = (value) => { + const numeric = Number(value) + if (!Number.isFinite(numeric)) { + return KARAOKE_DEFAULT_LINE_HEIGHT + } + return clamp(numeric, KARAOKE_MIN_LINE_HEIGHT, KARAOKE_MAX_LINE_HEIGHT) +} + +const clampOverlayHeightPreference = (value) => { + const numeric = Number(value) + if (!Number.isFinite(numeric)) { + return KARAOKE_DEFAULT_HEIGHT_PX + } + return clamp(numeric, KARAOKE_MIN_HEIGHT_PX, KARAOKE_MAX_HEIGHT_PX) +} + +const normalizeLyricsSettings = (settings) => ({ + lineHeight: clampLineHeight(settings?.lineHeight), + overlayHeight: clampOverlayHeightPreference(settings?.overlayHeight), + tr: { ...DEFAULT_LYRICS_SETTINGS.tr, ...settings?.tr }, + main: { ...DEFAULT_LYRICS_SETTINGS.main, ...settings?.main }, + pr: { ...DEFAULT_LYRICS_SETTINGS.pr, ...settings?.pr }, +}) + +const loadLyricsSettings = () => { + try { + const raw = localStorage.getItem(SETTINGS_STORAGE_KEY) + if (raw) { + return normalizeLyricsSettings(JSON.parse(raw)) + } + } catch { + /* ignore */ + } + return normalizeLyricsSettings() +} + +const saveLyricsSettings = (settings) => { + try { + localStorage.setItem( + SETTINGS_STORAGE_KEY, + JSON.stringify(normalizeLyricsSettings(settings)), + ) + } catch { + /* ignore */ + } +} + +const getColorValue = (colorKey) => + COLOR_PRESETS.find((c) => c.key === colorKey)?.value || COLOR_PRESETS[0].value + +const hexToRgba = (hex, alpha) => { + const m = (hex || '').match(/#([0-9a-f]{2})([0-9a-f]{2})([0-9a-f]{2})/i) + if (m) return `rgba(${parseInt(m[1], 16)}, ${parseInt(m[2], 16)}, ${parseInt(m[3], 16)}, ${alpha})` + const rm = (hex || '').match(/rgba?\((\d+),\s*(\d+),\s*(\d+)/) + if (rm) return `rgba(${rm[1]}, ${rm[2]}, ${rm[3]}, ${alpha})` + return `rgba(48, 48, 48, ${alpha})` +} + +const useStyles = makeStyles((theme) => { + const isDark = theme.palette.type === 'dark' + const overlayBg = hexToRgba(theme.palette.background.default, 0.85) + const primaryMain = theme.palette.primary.main + const primaryRgb = (() => { + const m = (primaryMain || '').match(/#([0-9a-f]{2})([0-9a-f]{2})([0-9a-f]{2})/i) + if (m) return [parseInt(m[1], 16), parseInt(m[2], 16), parseInt(m[3], 16)] + const rm = (primaryMain || '').match(/rgba?\((\d+),\s*(\d+),\s*(\d+)/) + if (rm) return [parseInt(rm[1]), parseInt(rm[2]), parseInt(rm[3])] + return [144, 202, 249] + })() + const textPrimary = isDark ? 'rgba(255, 255, 255, 0.92)' : 'rgba(0, 0, 0, 0.87)' + const textSecondary = isDark ? 'rgba(255, 255, 255, 0.55)' : 'rgba(0, 0, 0, 0.54)' + const borderSubtle = isDark ? 'rgba(255, 255, 255, 0.12)' : 'rgba(0, 0, 0, 0.12)' + + return ({ + overlay: { + position: 'fixed', + left: '50%', + bottom: 100, + transform: 'translateX(-50%)', + zIndex: 1400, + width: 'min(1000px, calc(100vw - 32px))', + minHeight: KARAOKE_MIN_HEIGHT_PX, + background: overlayBg, + borderRadius: 12, + border: `1px solid ${borderSubtle}`, + boxShadow: '0 18px 48px rgba(0, 0, 0, 0.42)', + backdropFilter: 'blur(20px)', + color: textPrimary, + display: 'flex', + flexDirection: 'column', + overflow: 'hidden', + '@media (max-width:810px)': { + bottom: 78, + width: 'calc(100vw - 12px)', + borderRadius: 12, + minHeight: 180, + maxHeight: '65vh', + }, + }, + overlayInline: { + position: 'absolute', + inset: 0, + width: '100%', + height: '100%', + minHeight: 0, + maxHeight: '100%', + transform: 'none', + borderRadius: 'inherit', + border: 'none', + boxShadow: 'none', + background: 'transparent', + backdropFilter: 'blur(16px)', + WebkitBackdropFilter: 'blur(16px)', + zIndex: 1, + }, + resizeHandle: { + height: 14, + cursor: 'ns-resize', + flexShrink: 0, + position: 'relative', + '&::after': { + content: '""', + position: 'absolute', + left: '50%', + top: 4, + transform: 'translateX(-50%)', + width: 56, + height: 3, + borderRadius: 999, + background: `rgba(${primaryRgb.join(', ')}, 0.22)`, + }, + '@media (max-width:810px)': { + display: 'none', + }, + }, + header: { + display: 'flex', + alignItems: 'center', + justifyContent: 'space-between', + gap: theme.spacing(1), + padding: theme.spacing(0.3, 1.3, 0.4, 1.3), + }, + headerInline: { + padding: theme.spacing(0.25, 0.65, 0.35, 0.65), + gap: theme.spacing(0.65), + }, + headerLeft: { + display: 'flex', + alignItems: 'center', + gap: theme.spacing(1), + minWidth: 0, + }, + languageBadges: { + display: 'flex', + alignItems: 'center', + gap: theme.spacing(0.5), + flexWrap: 'wrap', + minWidth: 0, + }, + languageBadge: { + display: 'inline-flex', + alignItems: 'center', + justifyContent: 'center', + gap: theme.spacing(0.35), + padding: theme.spacing(0.2, 0.7), + borderRadius: 999, + border: `1px solid ${borderSubtle}`, + background: isDark ? 'rgba(15, 23, 42, 0.42)' : 'rgba(0, 0, 0, 0.06)', + color: isDark ? 'rgba(226, 232, 240, 0.8)' : 'rgba(0, 0, 0, 0.6)', + fontSize: 10, + lineHeight: 1, + letterSpacing: '0.04em', + whiteSpace: 'nowrap', + transition: `all ${KARAOKE_ANIMATION_MS}ms ease-in-out`, + userSelect: 'none', + }, + languageBadgeToggle: { + cursor: 'pointer', + '&:hover': { + borderColor: `rgba(${primaryRgb.join(', ')}, 0.35)`, + background: isDark ? 'rgba(15, 23, 42, 0.56)' : 'rgba(0, 0, 0, 0.1)', + }, + }, + languageBadgeActive: { + borderColor: `rgba(${primaryRgb.join(', ')}, 0.46)`, + background: `rgba(${primaryRgb.join(', ')}, 0.18)`, + color: isDark ? 'rgba(248, 250, 252, 0.94)' : 'rgba(0, 0, 0, 0.87)', + }, + languageBadgeLabel: { + fontWeight: 700, + textTransform: 'uppercase', + opacity: 0.78, + }, + languageBadgeValue: { + opacity: 0.9, + }, + closeButton: { + color: textSecondary, + }, + lineGroup: { + display: 'flex', + flexDirection: 'column', + alignItems: 'center', + gap: theme.spacing(0.35), + }, + inlineTr: { + margin: 0, + display: 'inline-block', + maxWidth: '100%', + textAlign: 'center', + fontWeight: 400, + lineHeight: KARAOKE_AUX_LINE_HEIGHT, + letterSpacing: '0.01em', + transition: `opacity ${KARAOKE_ANIMATION_MS}ms ease-in-out, color ${KARAOKE_ANIMATION_MS}ms ease-in-out, font-size 280ms ease-in-out, max-width 280ms ease-in-out`, + }, + inlinePr: { + margin: 0, + display: 'inline-flex', + alignItems: 'center', + justifyContent: 'center', + flexWrap: 'wrap', + alignSelf: 'center', + width: 'fit-content', + maxWidth: '100%', + boxSizing: 'border-box', + textAlign: 'center', + fontWeight: 400, + lineHeight: 1, + letterSpacing: '0.01em', + transition: `opacity ${KARAOKE_ANIMATION_MS}ms ease-in-out, color ${KARAOKE_ANIMATION_MS}ms ease-in-out, font-size 280ms ease-in-out, max-width 280ms ease-in-out`, + padding: theme.spacing(0.15, 0.9), + borderRadius: 999, + background: isDark ? 'rgba(255, 255, 255, 0.08)' : 'rgba(0, 0, 0, 0.05)', + border: `1px solid ${borderSubtle}`, + }, + bodyWrapper: { + position: 'relative', + flex: 1, + overflow: 'hidden', + }, + body: { + padding: theme.spacing(0.5, 2, 1.4, 2), + overflowY: 'auto', + overflowX: 'hidden', + height: '100%', + overscrollBehavior: 'contain', + scrollbarWidth: 'none', + msOverflowStyle: 'none', + maskImage: 'linear-gradient(to bottom, transparent 0%, black 8%, black 92%, transparent 100%)', + WebkitMaskImage: 'linear-gradient(to bottom, transparent 0%, black 8%, black 92%, transparent 100%)', + '&::-webkit-scrollbar': { + display: 'none', + width: 0, + height: 0, + }, + '@media (max-width:810px)': { + padding: theme.spacing(0.35, 1.2, 1.2, 1.2), + }, + }, + bodyInline: { + padding: theme.spacing(0.25, 0.8, 0.85, 0.8), + }, + lines: { + display: 'flex', + flexDirection: 'column', + gap: theme.spacing(1.24), + paddingBottom: theme.spacing(1), + }, + line: { + margin: 0, + display: 'inline-block', + maxWidth: '100%', + fontWeight: 600, + lineHeight: 1.24, + letterSpacing: '0.01em', + textAlign: 'center', + color: isDark ? 'rgba(255, 255, 255, 0.62)' : 'rgba(0, 0, 0, 0.52)', + transition: `opacity ${KARAOKE_ANIMATION_MS}ms ease-in-out, color ${KARAOKE_ANIMATION_MS}ms ease-in-out, font-size 280ms ease-in-out, max-width 280ms ease-in-out`, + }, + token: { + display: 'inline-block', + whiteSpace: 'pre-wrap', + transition: `color ${KARAOKE_ANIMATION_MS}ms ease-in-out, text-shadow ${KARAOKE_ANIMATION_MS}ms ease-in-out`, + }, + settingsButton: { + color: textSecondary, + padding: 4, + '&:hover': { + color: textPrimary, + }, + }, + settingsPanel: { + background: isDark ? 'rgba(12, 14, 20, 0.96)' : 'rgba(255, 255, 255, 0.96)', + border: `1px solid ${borderSubtle}`, + borderRadius: 10, + padding: theme.spacing(1.5, 2), + width: 278, + backdropFilter: 'blur(12px)', + }, + settingsHeader: { + display: 'flex', + alignItems: 'center', + justifyContent: 'space-between', + gap: theme.spacing(1), + marginBottom: theme.spacing(1.25), + }, + settingsSection: { + marginBottom: theme.spacing(1.2), + '&:last-child': { + marginBottom: 0, + }, + }, + settingsTitle: { + fontSize: 11, + fontWeight: 700, + letterSpacing: '0.08em', + textTransform: 'uppercase', + color: isDark ? 'rgba(255, 255, 255, 0.78)' : 'rgba(0, 0, 0, 0.72)', + }, + settingsLabel: { + fontSize: 10, + fontWeight: 600, + letterSpacing: '0.1em', + textTransform: 'uppercase', + color: isDark ? 'rgba(255, 255, 255, 0.55)' : 'rgba(0, 0, 0, 0.5)', + marginBottom: 4, + }, + settingsRow: { + display: 'flex', + alignItems: 'center', + gap: theme.spacing(1), + }, + settingsSlider: { + flex: 1, + color: `rgba(${primaryRgb.join(', ')}, 0.6)`, + '& .MuiSlider-thumb': { + width: 12, + height: 12, + }, + '& .MuiSlider-rail': { + opacity: 0.3, + }, + }, + settingsSliderValue: { + fontSize: 11, + color: isDark ? 'rgba(255, 255, 255, 0.5)' : 'rgba(0, 0, 0, 0.45)', + minWidth: 22, + textAlign: 'right', + }, + settingsControlLabel: { + fontSize: 10, + letterSpacing: '0.06em', + textTransform: 'uppercase', + color: isDark ? 'rgba(255, 255, 255, 0.45)' : 'rgba(0, 0, 0, 0.42)', + minWidth: 72, + whiteSpace: 'nowrap', + }, + resetButton: { + color: textSecondary, + padding: 4, + '&:hover': { + color: textPrimary, + }, + }, + colorDots: { + display: 'flex', + gap: 5, + marginTop: 4, + }, + colorDot: { + width: 16, + height: 16, + borderRadius: '50%', + border: '2px solid transparent', + cursor: 'pointer', + transition: 'border-color 120ms ease, transform 120ms ease', + '&:hover': { + transform: 'scale(1.2)', + }, + }, + colorDotActive: { + borderColor: isDark ? 'rgba(255, 255, 255, 0.85)' : 'rgba(0, 0, 0, 0.7)', + }, +})}) + +const clamp = (v, min, max) => Math.max(min, Math.min(max, v)) +const lerp = (from, to, t) => from + (to - from) * t +const formatLineHeight = (value) => clampLineHeight(value).toFixed(2) +const getLineGapPx = (lineHeight) => + `${Math.round(clampLineHeight(lineHeight) * KARAOKE_GROUP_SPACING_BASE_PX)}px` + +const normalizeForComparison = (text) => + (text || '').replace(/[\s\p{P}]/gu, '').toLowerCase() + +const shouldShowAuxLine = (mainLine, auxLine) => { + if (!auxLine || !auxLine.value) return false + return ( + normalizeForComparison(auxLine.value) !== + normalizeForComparison(mainLine.value) + ) +} + +const buildLanguageBadges = ({ + mainLyric, + translationLyric, + pronunciationLyric, + showTranslation, + showPronunciation, + translationEnabled, + pronunciationEnabled, +}) => + [ + { + key: 'main', + label: 'Main', + lang: mainLyric?.lang, + active: true, + toggleable: false, + }, + pronunciationEnabled && { + key: 'pr', + label: 'PR', + lang: pronunciationLyric?.lang, + active: showPronunciation, + toggleable: true, + tooltip: showPronunciation ? 'Hide pronunciation' : 'Show pronunciation', + }, + translationEnabled && { + key: 'tr', + label: 'TR', + lang: translationLyric?.lang, + active: showTranslation, + toggleable: true, + tooltip: showTranslation ? 'Hide translation' : 'Show translation', + }, + ].filter((badge) => badge && badge.lang) + +const SettingsSection = ({ label, layer, settings, onChange, classes }) => { + const s = settings[layer] + return ( +
+
{label}
+
+ + onChange({ ...settings, [layer]: { ...s, fontSize: val } }) + } + /> + {s.fontSize} +
+
+ {COLOR_PRESETS.map((preset) => ( +
+ onChange({ ...settings, [layer]: { ...s, colorKey: preset.key } }) + } + /> + ))} +
+
+ ) +} + +const LineHeightSetting = ({ settings, onChange, classes }) => ( +
+
Spacing
+
+
Line height
+ + onChange({ + ...settings, + lineHeight: clampLineHeight(Array.isArray(val) ? val[0] : val), + }) + } + /> + + {formatLineHeight(settings.lineHeight)} + +
+
+) + +const LyricsSettingsPopover = ({ settings, onChange, onReset }) => { + const classes = useStyles() + const [anchorEl, setAnchorEl] = useState(null) + + const handleToggle = useCallback((e) => { + e.stopPropagation() + setAnchorEl((prev) => (prev ? null : e.currentTarget)) + }, []) + + const handleClose = useCallback(() => setAnchorEl(null), []) + + return ( + <> + + + + + + + + +
+ Appearance + + + + + + + +
+ + + + +
+ + ) +} + +const easeInOut = (v) => { + const clamped = clamp(v, 0, 1) + return clamped < 0.5 ? 2 * clamped * clamped : 1 - (-2 * clamped + 2) ** 2 / 2 +} + +const getMaxHeightPx = () => { + if (typeof window === 'undefined') { + return KARAOKE_MAX_HEIGHT_PX + } + return Math.min( + Math.floor(window.innerHeight * KARAOKE_MAX_HEIGHT_RATIO), + KARAOKE_MAX_HEIGHT_PX, + ) +} + +const buildSegmentsFromLine = (line) => { + if (!line || !Array.isArray(line.tokens) || line.tokens.length === 0) { + return [{ text: line?.value || '', token: null, tokenIndex: -1 }] + } + + const text = line.value || '' + const exactSegments = (() => { + if (!text) { + return null + } + + const rangedTokens = line.tokens + .map((token, tokenIndex) => ({ + token, + tokenIndex, + range: utf8ByteRangeToCodeUnitRange( + text, + token?.byteStart, + token?.byteEnd, + ), + })) + .filter((entry) => entry.range != null) + + if ( + rangedTokens.length !== line.tokens.length || + rangedTokens.length === 0 + ) { + return null + } + + rangedTokens.sort( + (a, b) => + a.range.start - b.range.start || + a.range.end - b.range.end || + a.tokenIndex - b.tokenIndex, + ) + + const segments = [] + let cursor = 0 + for (const entry of rangedTokens) { + if (entry.range.start < cursor) { + return null + } + if (entry.range.start > cursor) { + segments.push({ + text: text.slice(cursor, entry.range.start), + token: null, + tokenIndex: -1, + }) + } + segments.push({ + text: entry.range.text, + token: entry.token, + tokenIndex: entry.tokenIndex, + }) + cursor = entry.range.end + } + + if (cursor < text.length) { + segments.push({ + text: text.slice(cursor), + token: null, + tokenIndex: -1, + }) + } + + return segments + })() + if (exactSegments) { + return exactSegments + } + + const matchedSegments = [] + const fallbackSegments = [] + let cursor = 0 + let allMatched = text.length > 0 + let anyMatched = false + + const pushFallbackSeparatorIfNeeded = (nextTokenText) => { + if (fallbackSegments.length === 0) { + return + } + const prevText = fallbackSegments[fallbackSegments.length - 1].text || '' + if (!prevText || !nextTokenText) { + return + } + if (/\s$/.test(prevText) || /^\s/.test(nextTokenText)) { + return + } + if (/[A-Za-z0-9]$/.test(prevText) && /^[A-Za-z0-9]/.test(nextTokenText)) { + fallbackSegments.push({ text: ' ', token: null, tokenIndex: -1 }) + } + } + + for (let tokenIndex = 0; tokenIndex < line.tokens.length; tokenIndex += 1) { + const token = line.tokens[tokenIndex] + const tokenText = token.value || '' + if (!tokenText) { + continue + } + + pushFallbackSeparatorIfNeeded(tokenText) + fallbackSegments.push({ text: tokenText, token, tokenIndex }) + + if (!text) { + allMatched = false + continue + } + + const foundAt = text.indexOf(tokenText, cursor) + const normalizedFoundAt = + foundAt >= 0 + ? foundAt + : text.toLowerCase().indexOf(tokenText.toLowerCase(), cursor) + + if (normalizedFoundAt >= 0) { + anyMatched = true + if (normalizedFoundAt > cursor) { + matchedSegments.push({ + text: text.slice(cursor, normalizedFoundAt), + token: null, + tokenIndex: -1, + }) + } + const matchedTokenText = text.slice( + normalizedFoundAt, + normalizedFoundAt + tokenText.length, + ) + matchedSegments.push({ + text: matchedTokenText || tokenText, + token, + tokenIndex, + }) + cursor = normalizedFoundAt + tokenText.length + } else { + allMatched = false + } + } + + if (allMatched && anyMatched) { + if (cursor < text.length) { + matchedSegments.push({ + text: text.slice(cursor), + token: null, + tokenIndex: -1, + }) + } + return matchedSegments + } + + if (fallbackSegments.length > 0) { + return fallbackSegments + } + + return [{ text, token: null, tokenIndex: -1 }] +} + +const getLineRenderWindow = (line, nextLineStart) => { + let start = Number.isFinite(Number(line?.start)) ? Number(line.start) : null + let end = Number.isFinite(Number(line?.end)) ? Number(line.end) : null + const fallbackEnd = Number.isFinite(Number(nextLineStart)) + ? Number(nextLineStart) + : null + + if (end == null) { + end = fallbackEnd + } + + const tokens = Array.isArray(line?.tokens) ? line.tokens : [] + if (tokens.length > 0) { + const firstWindow = resolveKaraokeTokenWindow(line, 0, nextLineStart) + const lastWindow = resolveKaraokeTokenWindow( + line, + tokens.length - 1, + nextLineStart, + ) + + if ( + firstWindow.start != null && + (start == null || firstWindow.start < start) + ) { + start = firstWindow.start + } + if (lastWindow.end != null && (end == null || lastWindow.end > end)) { + end = lastWindow.end + } + } + + return { start, end } +} + +const shouldSkipLineFrame = ( + prevPlaybackMs, + nextPlaybackMs, + line, + nextLineStart, +) => { + if (prevPlaybackMs === nextPlaybackMs) { + return true + } + + const { start, end } = getLineRenderWindow(line, nextLineStart) + + if (start != null) { + const activationStart = start - 220 + if (prevPlaybackMs < activationStart && nextPlaybackMs < activationStart) { + return true + } + } + + if (end != null) { + const settleEnd = end + KARAOKE_WORD_SETTLE_MS + 160 + if (prevPlaybackMs > settleEnd && nextPlaybackMs > settleEnd) { + return true + } + } + + return false +} + +const areLineStylesEqual = (prevStyle, nextStyle) => { + const a = prevStyle || {} + const b = nextStyle || {} + return ( + a.opacity === b.opacity && + a.color === b.color && + a.fontSize === b.fontSize && + a.fontWeight === b.fontWeight && + a.lineHeight === b.lineHeight && + a.maxWidth === b.maxWidth + ) +} + +const parseColorRGB = (rgba) => { + const m = (rgba || '').match(/rgba?\((\d+),\s*(\d+),\s*(\d+)/) + return m ? [parseInt(m[1]), parseInt(m[2]), parseInt(m[3])] : [255, 255, 255] +} + +const buildTokenWipeStyle = ({ + fillProgress, + highlightAlpha, + futureAlpha, + rgb, +}) => { + const [r, g, b] = rgb || [255, 255, 255] + const fillPct = clamp(fillProgress, 0, 1) * 100 + const doneColor = `rgba(${r}, ${g}, ${b}, ${clamp(highlightAlpha, TOKEN_DONE_ALPHA, TOKEN_ACTIVE_ALPHA)})` + const futureColor = `rgba(${r}, ${g}, ${b}, ${futureAlpha})` + + if (fillPct <= 0) { + return { color: futureColor, textShadow: 'none' } + } + + const edgeStart = clamp(fillPct - TOKEN_WIPE_EDGE_PCT, 0, 100) + const softEnd = clamp(fillPct + TOKEN_WIPE_SOFT_SPREAD_PCT, 0, 100) + return { + color: 'transparent', + WebkitTextFillColor: 'transparent', + backgroundImage: `linear-gradient(90deg, ${doneColor} 0%, ${doneColor} ${edgeStart}%, ${doneColor} ${fillPct}%, ${futureColor} ${softEnd}%, ${futureColor} 100%)`, + backgroundClip: 'text', + WebkitBackgroundClip: 'text', + textShadow: 'none', + } +} + +const KaraokeLineRow = memo( + ({ + line, + nextLineStart, + renderPlaybackMs, + className, + style, + tokenClassName, + highlightTokens = true, + }) => { + const segments = buildSegmentsFromLine(line) + const tokenRGB = useMemo( + () => (style?.color ? parseColorRGB(style.color) : [255, 255, 255]), + [style?.color], + ) + + return ( + + {segments.map((segment, idx) => { + if (!segment.token) { + return {segment.text} + } + + if (!highlightTokens) { + return {segment.text} + } + + const { start: tokenStart, end: tokenEnd } = + resolveKaraokeTokenWindow(line, segment.tokenIndex, nextLineStart) + + const isDone = tokenEnd != null ? renderPlaybackMs >= tokenEnd : false + const isActive = + !isDone && tokenStart != null && renderPlaybackMs >= tokenStart + + const progress = + isDone || + tokenStart == null || + tokenEnd == null || + tokenEnd <= tokenStart + ? isDone + ? 1 + : 0 + : clamp( + (renderPlaybackMs - tokenStart) / (tokenEnd - tokenStart), + 0, + 1, + ) + + const justEnded = + tokenEnd != null && + renderPlaybackMs > tokenEnd && + renderPlaybackMs <= tokenEnd + KARAOKE_WORD_SETTLE_MS + + const settleProgress = + justEnded && tokenEnd != null + ? clamp( + (renderPlaybackMs - tokenEnd) / KARAOKE_WORD_SETTLE_MS, + 0, + 1, + ) + : 0 + + let alpha = TOKEN_FUTURE_ALPHA + if (isDone) { + alpha = TOKEN_DONE_ALPHA + } else if (isActive) { + alpha = lerp( + TOKEN_FUTURE_ALPHA, + TOKEN_ACTIVE_ALPHA, + easeInOut(progress), + ) + } + if (justEnded) { + alpha = lerp( + TOKEN_ACTIVE_ALPHA, + TOKEN_DONE_ALPHA, + easeInOut(settleProgress), + ) + } + alpha = clamp(alpha, TOKEN_FUTURE_ALPHA, TOKEN_ACTIVE_ALPHA) + const fillProgress = isDone ? 1 : isActive ? progress : 0 + const isBgRole = segment.token?.role === 'bg' + + return ( + + {segment.text} + + ) + })} + + ) + }, + (prevProps, nextProps) => { + if ( + prevProps.line !== nextProps.line || + prevProps.nextLineStart !== nextProps.nextLineStart || + prevProps.className !== nextProps.className || + prevProps.tokenClassName !== nextProps.tokenClassName || + prevProps.highlightTokens !== nextProps.highlightTokens || + !areLineStylesEqual(prevProps.style, nextProps.style) + ) { + return false + } + + return shouldSkipLineFrame( + prevProps.renderPlaybackMs, + nextProps.renderPlaybackMs, + nextProps.line, + nextProps.nextLineStart, + ) + }, +) + +KaraokeLineRow.displayName = 'KaraokeLineRow' + +const KaraokeLyricsOverlay = ({ + visible, + mainLyric, + translationLyric, + pronunciationLyric, + showTranslation, + showPronunciation, + translationEnabled, + pronunciationEnabled, + onToggleTranslation, + onTogglePronunciation, + audioInstance, + onClose, + inline = false, +}) => { + const classes = useStyles() + const theme = useTheme() + const isDark = theme.palette.type === 'dark' + const [playbackMs, setPlaybackMs] = useState(0) + const [maxHeightPx, setMaxHeightPx] = useState(getMaxHeightPx()) + const [bodyViewportHeight, setBodyViewportHeight] = useState(0) + const [isCompact, setIsCompact] = useState( + typeof window !== 'undefined' ? window.innerWidth <= 810 : false, + ) + const [lyricsSettings, setLyricsSettings] = useState(loadLyricsSettings) + + const handleSettingsChange = useCallback((next) => { + const normalized = normalizeLyricsSettings(next) + setLyricsSettings(normalized) + saveLyricsSettings(normalized) + }, []) + + const handleResetAppearance = useCallback(() => { + const defaults = createDefaultLyricsSettings(isDark) + setLyricsSettings(defaults) + saveLyricsSettings(defaults) + }, [isDark]) + + const bodyRef = useRef(null) + const activeLineRef = useRef(null) + + const mainLines = useMemo(() => buildKaraokeLines(mainLyric), [mainLyric]) + const translationLines = useMemo( + () => buildKaraokeLines(translationLyric), + [translationLyric], + ) + const pronunciationLines = useMemo( + () => buildKaraokeLines(pronunciationLyric), + [pronunciationLyric], + ) + const overlayHeight = clamp( + lyricsSettings.overlayHeight, + KARAOKE_MIN_HEIGHT_PX, + maxHeightPx, + ) + + useEffect(() => { + const onResize = () => { + const nextMaxHeight = getMaxHeightPx() + setIsCompact(window.innerWidth <= 810) + setMaxHeightPx(nextMaxHeight) + } + + onResize() + window.addEventListener('resize', onResize) + return () => window.removeEventListener('resize', onResize) + }, []) + + useEffect(() => { + setLyricsSettings((prev) => { + const currentColor = prev.main.colorKey + const shouldSwap = + (isDark && currentColor === 'black') || + (!isDark && currentColor === 'white') + if (!shouldSwap) return prev + const newColorKey = isDark ? 'white' : 'black' + const updated = { + ...prev, + main: { ...prev.main, colorKey: newColorKey }, + } + saveLyricsSettings(updated) + return updated + }) + }, [isDark]) + + useEffect(() => { + const body = bodyRef.current + if (!body) { + return undefined + } + + const updateViewportHeight = () => { + setBodyViewportHeight(body.clientHeight || 0) + } + + updateViewportHeight() + + if (typeof ResizeObserver !== 'undefined') { + const observer = new ResizeObserver(updateViewportHeight) + observer.observe(body) + return () => observer.disconnect() + } + + window.addEventListener('resize', updateViewportHeight) + return () => window.removeEventListener('resize', updateViewportHeight) + }, [overlayHeight, isCompact, showTranslation, showPronunciation, visible]) + + const onResizeStart = useCallback( + (event) => { + if (isCompact) { + return + } + + event.preventDefault() + const startY = event.clientY + const startHeight = overlayHeight + + const onMove = (moveEvent) => { + const delta = startY - moveEvent.clientY + handleSettingsChange({ + ...lyricsSettings, + overlayHeight: clamp( + startHeight + delta, + KARAOKE_MIN_HEIGHT_PX, + maxHeightPx, + ), + }) + } + + const onUp = () => { + window.removeEventListener('mousemove', onMove) + window.removeEventListener('mouseup', onUp) + } + + window.addEventListener('mousemove', onMove) + window.addEventListener('mouseup', onUp) + }, + [ + handleSettingsChange, + isCompact, + lyricsSettings, + maxHeightPx, + overlayHeight, + ], + ) + + useEffect(() => { + if (!visible || !audioInstance) { + setPlaybackMs(0) + return + } + + let rafId = 0 + let cancelled = false + let anchorAudioMs = 0 + let anchorPerfMs = 0 + let lastRenderMs = 0 + + const readPlaybackMs = () => { + const seconds = Number(audioInstance.currentTime) + if (!Number.isFinite(seconds) || seconds < 0) { + return 0 + } + return seconds * 1000 + } + + const resetAnchor = (perfNow, observedMs) => { + anchorAudioMs = observedMs + anchorPerfMs = perfNow + } + + const tick = () => { + if (cancelled) { + return + } + + const observedMs = readPlaybackMs() + const perfNow = performance.now() + const playbackRate = Number(audioInstance.playbackRate) + const canInterpolate = + !audioInstance.paused && + !audioInstance.seeking && + Number.isFinite(playbackRate) && + playbackRate > 0 + + let nowMs = observedMs + + if (!canInterpolate) { + resetAnchor(perfNow, observedMs) + } else if (anchorPerfMs === 0) { + resetAnchor(perfNow, observedMs) + } else { + const predicted = + anchorAudioMs + (perfNow - anchorPerfMs) * playbackRate + const drift = observedMs - predicted + if (Math.abs(drift) > KARAOKE_CLOCK_DRIFT_RESET_MS) { + nowMs = observedMs + resetAnchor(perfNow, observedMs) + } else { + nowMs = predicted + } + } + + const backwardsDrift = lastRenderMs - nowMs + if (canInterpolate && backwardsDrift > 0) { + nowMs = lastRenderMs + } + + if (canInterpolate && backwardsDrift > KARAOKE_CLOCK_RESET_THRESHOLD_MS) { + resetAnchor(perfNow, observedMs) + } else if ( + !canInterpolate && + backwardsDrift > 0 && + backwardsDrift <= KARAOKE_MONOTONIC_JITTER_MS + ) { + nowMs = lastRenderMs + } + + nowMs = Math.max(0, nowMs) + lastRenderMs = nowMs + + setPlaybackMs((prev) => + Math.abs(prev - nowMs) >= KARAOKE_RENDER_UPDATE_EPSILON_MS + ? nowMs + : prev, + ) + rafId = window.requestAnimationFrame(tick) + } + + const initialMs = readPlaybackMs() + resetAnchor(performance.now(), initialMs) + lastRenderMs = initialMs + setPlaybackMs(initialMs) + rafId = window.requestAnimationFrame(tick) + + return () => { + cancelled = true + if (rafId) { + window.cancelAnimationFrame(rafId) + } + } + }, [audioInstance, visible]) + + const renderPlaybackMs = playbackMs + KARAOKE_RENDER_LEAD_MS + const hasTimedMainLines = useMemo( + () => hasUsableKaraokeTiming(mainLines), + [mainLines], + ) + + const { lineIndex } = useMemo( + () => + hasTimedMainLines + ? getActiveKaraokeState(mainLines, renderPlaybackMs) + : { lineIndex: -1, tokenIndex: -1 }, + [hasTimedMainLines, mainLines, renderPlaybackMs], + ) + + const activeIndex = hasTimedMainLines && lineIndex >= 0 ? lineIndex : -1 + const lineHeight = lyricsSettings.lineHeight + const lineGap = getLineGapPx(lineHeight) + const languageBadges = buildLanguageBadges({ + mainLyric, + translationLyric, + pronunciationLyric, + showTranslation, + showPronunciation, + translationEnabled, + pronunciationEnabled, + }) + + const trByMainIndex = useMemo(() => { + if (!showTranslation || translationLines.length === 0) return {} + const map = {} + for (let i = 0; i < mainLines.length; i++) { + const { line } = resolveLayerLineForMain(mainLines, translationLines, i) + if (line) map[i] = line + } + return map + }, [mainLines, translationLines, showTranslation]) + + const prByMainIndex = useMemo(() => { + if (!showPronunciation || pronunciationLines.length === 0) return {} + const map = {} + for (let i = 0; i < mainLines.length; i++) { + const { line } = resolveLayerLineForMain(mainLines, pronunciationLines, i) + if (line) map[i] = line + } + return map + }, [mainLines, pronunciationLines, showPronunciation]) + + const hasTranslationLine = showTranslation && translationLines.length > 0 + const hasPronunciationLine = + showPronunciation && pronunciationLines.length > 0 + const measuredViewportHeight = bodyRef.current?.clientHeight || 0 + const estimatedViewportHeight = + measuredViewportHeight > 0 + ? measuredViewportHeight + : bodyViewportHeight > 0 + ? bodyViewportHeight + : isCompact + ? 260 + : Math.max(220, overlayHeight - 170) + const centerSpacerPx = Math.max( + hasTimedMainLines ? KARAOKE_CENTER_SPACER_MIN_PX : 0, + hasTimedMainLines + ? Math.floor(estimatedViewportHeight * KARAOKE_CENTER_SPACER_RATIO) + : 0, + ) + + useEffect(() => { + if (!visible || !hasTimedMainLines) { + return + } + + let animFrameId = null + let scrollAnimId = null + + animFrameId = window.requestAnimationFrame(() => { + const body = bodyRef.current + const activeNode = activeLineRef.current + if (!body || !activeNode) { + return + } + + const bodyRect = body.getBoundingClientRect() + const activeRect = activeNode.getBoundingClientRect() + const deltaWithinBody = + activeRect.top - + bodyRect.top - + (body.clientHeight - activeRect.height) / 2 + const maxTop = Math.max(0, body.scrollHeight - body.clientHeight) + const targetTop = clamp(body.scrollTop + deltaWithinBody, 0, maxTop) + const distance = targetTop - body.scrollTop + + if (Math.abs(distance) < 2) { + return + } + + const startTop = body.scrollTop + const duration = 400 + const startTime = performance.now() + + const easeOutCubic = (t) => 1 - Math.pow(1 - t, 3) + + const step = (now) => { + const elapsed = now - startTime + const progress = Math.min(elapsed / duration, 1) + const eased = easeOutCubic(progress) + body.scrollTop = startTop + distance * eased + if (progress < 1) { + scrollAnimId = window.requestAnimationFrame(step) + } + } + + scrollAnimId = window.requestAnimationFrame(step) + }) + + return () => { + if (animFrameId) window.cancelAnimationFrame(animFrameId) + if (scrollAnimId) window.cancelAnimationFrame(scrollAnimId) + } + }, [ + centerSpacerPx, + hasTimedMainLines, + hasPronunciationLine, + hasTranslationLine, + lineIndex, + overlayHeight, + visible, + ]) + + if ( + !visible || + !hasStructuredLyricContent(mainLyric) || + mainLines.length === 0 + ) { + return null + } + + const getMainLineStyle = (idx) => { + const [r, g, b] = parseColorRGB(getColorValue(lyricsSettings.main.colorKey)) + if (!hasTimedMainLines) { + return { + opacity: 1, + color: `rgba(${r}, ${g}, ${b}, 0.98)`, + fontSize: lyricsSettings.main.fontSize, + lineHeight, + } + } + + const delta = idx - activeIndex + const isActive = delta === 0 + let opacity = isActive ? 1 : delta < 0 ? 0.6 : 0.72 + const color = isActive + ? `rgba(${r}, ${g}, ${b}, 0.98)` + : delta < 0 + ? `rgba(${r}, ${g}, ${b}, 0.4)` + : `rgba(${r}, ${g}, ${b}, 0.54)` + + if (delta > 1) { + const level = clamp(delta, 1, 6) + opacity = Math.max(0.36, 0.74 - level * 0.08) + } + + if (delta < -1) { + const level = clamp(Math.abs(delta), 1, 6) + opacity = Math.max(0.28, 0.62 - level * 0.08) + } + + const baseFontSize = lyricsSettings.main.fontSize + const fontSize = isActive + ? baseFontSize + : Math.round(baseFontSize * KARAOKE_MAIN_INACTIVE_FONT_FACTOR) + + return { + opacity, + color, + fontSize, + lineHeight, + maxWidth: isActive + ? '100%' + : `${Math.round(KARAOKE_MAIN_INACTIVE_FONT_FACTOR * 100)}%`, + } + } + + const getAuxLineStyle = (idx, layerKey) => { + const [r, g, b] = parseColorRGB( + getColorValue(lyricsSettings[layerKey].colorKey), + ) + const baseFontSize = lyricsSettings[layerKey].fontSize + if (!hasTimedMainLines) { + return { + opacity: 0.94, + fontSize: baseFontSize, + color: `rgba(${r}, ${g}, ${b}, 0.94)`, + lineHeight: KARAOKE_AUX_LINE_HEIGHT, + } + } + + const delta = idx - activeIndex + const isActive = delta === 0 + + let opacity = isActive ? 0.94 : delta < 0 ? 0.5 : 0.62 + const color = isActive + ? `rgba(${r}, ${g}, ${b}, 0.94)` + : delta < 0 + ? `rgba(${r}, ${g}, ${b}, 0.42)` + : `rgba(${r}, ${g}, ${b}, 0.56)` + + if (delta > 1) { + const level = clamp(delta, 1, 6) + opacity = Math.max(0.28, 0.64 - level * 0.08) + } + + if (delta < -1) { + const level = clamp(Math.abs(delta), 1, 6) + opacity = Math.max(0.22, 0.5 - level * 0.08) + } + + const fontSize = isActive + ? baseFontSize + : Math.round(baseFontSize * KARAOKE_AUX_INACTIVE_FONT_FACTOR) + + return { + opacity, + fontSize, + color, + lineHeight: KARAOKE_AUX_LINE_HEIGHT, + maxWidth: isActive + ? '100%' + : `${Math.round(KARAOKE_AUX_INACTIVE_FONT_FACTOR * 100)}%`, + } + } + + const overlayStyle = inline + ? undefined + : isCompact + ? undefined + : { + height: overlayHeight, + maxHeight: maxHeightPx, + } + + return ( +
event.stopPropagation() : undefined} + > + {!inline && ( +
+ )} + +
+
+
+ {languageBadges.map((badge) => { + const badgeEl = ( +
{ + if (e.key === 'Enter' || e.key === ' ') { + e.preventDefault() + ;(badge.key === 'tr' + ? onToggleTranslation + : onTogglePronunciation)() + } + } + : undefined + } + > + + {badge.label} + + {badge.lang} +
+ ) + return badge.toggleable ? ( + + {badgeEl} + + ) : badgeEl + })} +
+
+ +
+ + + + +
+
+ +
+
+
+
+ {mainLines.map((line, idx) => { + const trLine = trByMainIndex[idx] + const prLine = prByMainIndex[idx] + const mainNextLineStart = mainLines[idx + 1]?.start ?? null + const highlightedMainLine = buildHighlightedMainLine( + line, + mainNextLineStart, + ) + const highlightedTrLine = buildHighlightedAuxLine( + line, + trLine, + mainNextLineStart, + ) + const highlightedPrLine = buildHighlightedAuxLine( + line, + prLine, + mainNextLineStart, + ) + const showTr = shouldShowAuxLine(line, trLine) + const showPr = shouldShowAuxLine(line, prLine) + const lineStyle = getMainLineStyle(idx) + const trStyle = getAuxLineStyle(idx, 'tr') + const prStyle = getAuxLineStyle(idx, 'pr') + return ( +
{ + if (audioInstance && line.start != null) { + audioInstance.currentTime = line.start / 1000 + } + }} + > + + {showPr && ( + + )} + {showTr && ( + + )} +
+ ) + })} +
+
+
+
+
+ ) +} + +export default KaraokeLyricsOverlay diff --git a/ui/src/audioplayer/KaraokeLyricsOverlay.test.jsx b/ui/src/audioplayer/KaraokeLyricsOverlay.test.jsx new file mode 100644 index 000000000..dba354363 --- /dev/null +++ b/ui/src/audioplayer/KaraokeLyricsOverlay.test.jsx @@ -0,0 +1,514 @@ +import React from 'react' +import { + cleanup, + fireEvent, + render, + screen, + waitFor, +} from '@testing-library/react' +import KaraokeLyricsOverlay from './KaraokeLyricsOverlay' + +const DEFAULT_LINE_HEIGHT_TEXT = '1.30' +const NEXT_LINE_HEIGHT_TEXT = '1.32' + +const audioInstance = { + currentTime: 0, + paused: true, + seeking: false, + playbackRate: 1, +} + +const buildLyric = (kind, lang, value) => ({ + kind, + lang, + synced: true, + line: [{ start: 1000, value }], +}) + +const renderOverlay = (props = {}) => + render( + {}} + onTogglePronunciation={() => {}} + audioInstance={audioInstance} + onClose={() => {}} + {...props} + />, + ) + +describe(' behavior', () => { + beforeEach(() => { + localStorage.clear() + window.innerWidth = 1200 + window.innerHeight = 900 + vi.spyOn(window, 'requestAnimationFrame').mockImplementation(() => 1) + vi.spyOn(window, 'cancelAnimationFrame').mockImplementation(() => {}) + }) + + afterEach(() => { + vi.restoreAllMocks() + cleanup() + }) + + it('shows tooltips for translation, pronunciation, and appearance controls', async () => { + renderOverlay() + + fireEvent.mouseOver(screen.getByTestId('lyrics-language-badge-tr')) + expect(await screen.findByText('Show translation')).toBeInTheDocument() + + fireEvent.mouseOver(screen.getByTestId('lyrics-language-badge-pr')) + expect(await screen.findByText('Hide pronunciation')).toBeInTheDocument() + + fireEvent.mouseOver(screen.getByTestId('lyrics-settings-button')) + expect(await screen.findByText('Appearance')).toBeInTheDocument() + }) + + it('renders inline mode without the desktop resize handle', () => { + renderOverlay({ inline: true }) + + expect(screen.getByTestId('karaoke-lyrics-overlay')).toHaveAttribute( + 'data-inline', + 'true', + ) + expect(screen.queryByTestId('lyrics-resize-handle')).not.toBeInTheDocument() + }) + + it('renders the appearance popup with Main label and default line height for older settings', async () => { + localStorage.setItem( + 'karaoke-lyrics-settings', + JSON.stringify({ + tr: { fontSize: 16, colorKey: 'blue' }, + main: { fontSize: 26, colorKey: 'white' }, + pr: { fontSize: 15, colorKey: 'green' }, + }), + ) + + renderOverlay() + + fireEvent.click(screen.getByTestId('lyrics-settings-button')) + + expect(await screen.findByText('Appearance')).toBeInTheDocument() + expect(screen.getByText('Main', { selector: 'div' })).toBeInTheDocument() + expect(screen.queryByText('Default')).not.toBeInTheDocument() + expect(screen.getByTestId('lyrics-reset-appearance')).toBeInTheDocument() + expect(screen.getByTestId('lyrics-line-height-value')).toHaveTextContent( + DEFAULT_LINE_HEIGHT_TEXT, + ) + }) + + it('renders the lyric group in main, pronunciation, translation order with layer badges', () => { + renderOverlay({ + showTranslation: true, + showPronunciation: true, + }) + + const mainLine = screen.getByText('こんにちは') + const pronunciationLine = screen.getByText('konnichiwa') + const translationLine = screen.getByText('Hello') + + expect( + mainLine.compareDocumentPosition(pronunciationLine) & + Node.DOCUMENT_POSITION_FOLLOWING, + ).toBeTruthy() + expect( + pronunciationLine.compareDocumentPosition(translationLine) & + Node.DOCUMENT_POSITION_FOLLOWING, + ).toBeTruthy() + + expect(screen.getByTestId('lyrics-language-badge-main')).toHaveTextContent( + 'Mainja', + ) + expect(screen.getByTestId('lyrics-language-badge-pr')).toHaveTextContent( + 'PRja-Latn', + ) + expect(screen.getByTestId('lyrics-language-badge-tr')).toHaveTextContent( + 'TRen', + ) + }) + + it('renders line-timed rows as whole-line spans without synthetic token splits', () => { + renderOverlay({ + mainLyric: { + kind: 'main', + lang: 'en', + synced: true, + line: [ + { start: 1000, end: 2400, value: 'Batter up, batter up, batter up' }, + ], + }, + translationLyric: { + kind: 'translation', + lang: 'ja', + synced: true, + line: [ + { + start: 1000, + end: 2400, + value: 'バッターアップ、バッターアップ、バッターアップ', + }, + ], + }, + pronunciationLyric: { + kind: 'pronunciation', + lang: 'ja-Latn', + synced: true, + line: [ + { + start: 1000, + end: 2400, + value: 'Battaa appu, battaa appu, battaa appu', + }, + ], + }, + showTranslation: true, + showPronunciation: true, + }) + + const mainLine = screen.getByText( + 'Batter up, batter up, batter up', + ).parentElement + const pronunciationLine = screen.getByText( + 'Battaa appu, battaa appu, battaa appu', + ).parentElement + const translationLine = screen.getByText( + 'バッターアップ、バッターアップ、バッターアップ', + ).parentElement + + expect(mainLine.querySelectorAll('span')).toHaveLength(1) + expect(pronunciationLine.querySelectorAll('span')).toHaveLength(1) + expect(translationLine.querySelectorAll('span')).toHaveLength(1) + }) + + it('uses cue byte offsets to segment repeated words in the karaoke line', () => { + renderOverlay({ + mainLyric: { + kind: 'main', + lang: 'en', + synced: true, + line: [{ start: 0, end: 2400, value: 'Oh love love me tonight' }], + cueLine: [ + { + index: 0, + start: 0, + end: 2400, + value: 'Oh love love me tonight', + cue: [ + { start: 0, end: 300, value: 'Oh', byteStart: 0, byteEnd: 1 }, + { + start: 900, + end: 1300, + value: 'love', + byteStart: 8, + byteEnd: 11, + }, + { + start: 1300, + end: 1600, + value: 'me', + byteStart: 13, + byteEnd: 14, + }, + { + start: 1600, + end: 2400, + value: 'tonight', + byteStart: 16, + byteEnd: 22, + }, + ], + }, + ], + }, + translationLyric: null, + pronunciationLyric: null, + showTranslation: false, + showPronunciation: false, + translationEnabled: false, + pronunciationEnabled: false, + audioInstance: { + ...audioInstance, + currentTime: 1.0, + }, + }) + + const mainLine = screen.getByText('Oh').parentElement + const segments = Array.from(mainLine.querySelectorAll('span')).map( + (span) => span.textContent, + ) + + expect(segments).toEqual([ + 'Oh', + ' love ', + 'love', + ' ', + 'me', + ' ', + 'tonight', + ]) + }) + + it('uses cue byte offsets to preserve explicit space cues in multibyte karaoke lines', () => { + renderOverlay({ + mainLyric: { + kind: 'main', + lang: 'ko', + synced: true, + line: [{ start: 0, end: 900, value: '눈을 뜬 순간' }], + cueLine: [ + { + index: 0, + start: 0, + end: 900, + value: '눈을 뜬 순간', + cue: [ + { start: 0, end: 150, value: '눈을', byteStart: 0, byteEnd: 5 }, + { start: 150, end: 250, value: ' ', byteStart: 6, byteEnd: 6 }, + { start: 250, end: 450, value: '뜬', byteStart: 7, byteEnd: 9 }, + { start: 450, end: 550, value: ' ', byteStart: 10, byteEnd: 10 }, + { start: 550, end: 900, value: '순간', byteStart: 11, byteEnd: 16 }, + ], + }, + ], + }, + translationLyric: null, + pronunciationLyric: null, + showTranslation: false, + showPronunciation: false, + translationEnabled: false, + pronunciationEnabled: false, + audioInstance: { + ...audioInstance, + currentTime: 0.3, + }, + }) + + const mainLine = screen.getByText('눈을').parentElement + const segments = Array.from(mainLine.querySelectorAll('span')).map( + (span) => span.textContent, + ) + + expect(segments).toEqual(['눈을', ' ', '뜬', ' ', '순간']) + }) + + it('highlights line-timed pronunciation and translation rows with the active main line', () => { + renderOverlay({ + mainLyric: { + kind: 'main', + lang: 'en', + synced: true, + line: [ + { start: 1000, end: 1800, value: 'Line one' }, + { start: 2500, end: 3300, value: 'Line two' }, + ], + }, + translationLyric: { + kind: 'translation', + lang: 'ja', + synced: true, + line: [ + { start: 1000, end: 1800, value: '一行目' }, + { start: 2500, end: 3300, value: '二行目' }, + ], + }, + pronunciationLyric: { + kind: 'pronunciation', + lang: 'ja-Latn', + synced: true, + line: [ + { start: 1000, end: 1800, value: 'ichigyoume' }, + { start: 2500, end: 3300, value: 'nigyoume' }, + ], + }, + showTranslation: true, + showPronunciation: true, + audioInstance: { + ...audioInstance, + currentTime: 1.2, + }, + }) + + const activePronunciation = screen.getByText('ichigyoume').parentElement + const inactivePronunciation = screen.getByText('nigyoume').parentElement + const activeTranslation = screen.getByText('一行目').parentElement + const inactiveTranslation = screen.getByText('二行目').parentElement + + expect(parseFloat(activePronunciation.style.opacity)).toBeGreaterThan( + parseFloat(inactivePronunciation.style.opacity), + ) + expect(parseFloat(activeTranslation.style.opacity)).toBeGreaterThan( + parseFloat(inactiveTranslation.style.opacity), + ) + }) + + it('pre-wraps inactive main lines so the active line keeps the same wrap shape', () => { + renderOverlay({ + mainLyric: { + kind: 'main', + lang: 'en', + synced: true, + line: [ + { start: 1000, end: 1800, value: 'First line that is getting focus' }, + { start: 2500, end: 3300, value: 'Second line waiting below' }, + ], + }, + translationLyric: null, + pronunciationLyric: null, + showTranslation: false, + showPronunciation: false, + translationEnabled: false, + pronunciationEnabled: false, + audioInstance: { + ...audioInstance, + currentTime: 1.2, + }, + }) + + const activeLine = screen.getByText('First line that is getting focus') + .parentElement + const inactiveLine = screen.getByText('Second line waiting below') + .parentElement + + expect(parseFloat(activeLine.style.fontSize)).toBeGreaterThan( + parseFloat(inactiveLine.style.fontSize), + ) + expect(activeLine.style.maxWidth).toBe('100%') + expect(inactiveLine.style.maxWidth).toBe('80%') + }) + + it('centers pronunciation text inside the pill container', () => { + renderOverlay({ + showTranslation: false, + showPronunciation: true, + }) + + const pronunciationLine = screen.getByText('konnichiwa').parentElement + const styles = window.getComputedStyle(pronunciationLine) + + expect(styles.display).toBe('inline-flex') + expect(styles.justifyContent).toBe('center') + expect(styles.alignItems).toBe('center') + }) + + it('renders untimed text lyrics in manual reading mode without a pinned active line', () => { + renderOverlay({ + mainLyric: { + kind: 'main', + lang: 'en', + synced: false, + line: [{ value: 'First plain line' }, { value: 'Second plain line' }], + }, + translationLyric: null, + pronunciationLyric: null, + showTranslation: false, + showPronunciation: false, + translationEnabled: false, + pronunciationEnabled: false, + }) + + const firstLine = screen.getByText('First plain line').parentElement + const secondLine = screen.getByText('Second plain line').parentElement + + expect(firstLine.style.opacity).toBe('1') + expect(secondLine.style.opacity).toBe('1') + expect(firstLine.style.color).toBe(secondLine.style.color) + }) + + it('persists line height changes, keeps aux line spacing fixed, and stores overlay height', async () => { + renderOverlay({ + mainLyric: buildLyric('main', 'en', 'Hello world'), + translationLyric: buildLyric('translation', 'es', 'Hola'), + pronunciationLyric: buildLyric('pronunciation', 'en-Latn', 'heh-loh'), + showTranslation: true, + showPronunciation: true, + translationEnabled: true, + pronunciationEnabled: true, + }) + + const overlay = screen.getByTestId('karaoke-lyrics-overlay') + const mainLine = screen.getByText('Hello world').parentElement + const pronunciationLine = screen.getByText('heh-loh').parentElement + expect(mainLine).toHaveStyle(`line-height: ${DEFAULT_LINE_HEIGHT_TEXT}`) + expect(pronunciationLine).toHaveStyle('line-height: 1.2') + + fireEvent.click(screen.getByTestId('lyrics-settings-button')) + + const slider = screen.getByRole('slider', { name: 'Line height' }) + slider.focus() + fireEvent.keyDown(slider, { key: 'ArrowRight' }) + + await waitFor(() => + expect(screen.getByTestId('lyrics-line-height-value')).toHaveTextContent( + NEXT_LINE_HEIGHT_TEXT, + ), + ) + + await waitFor(() => + expect(mainLine).toHaveStyle(`line-height: ${NEXT_LINE_HEIGHT_TEXT}`), + ) + expect(pronunciationLine).toHaveStyle('line-height: 1.2') + + fireEvent.mouseDown(screen.getByTestId('lyrics-resize-handle'), { + clientY: 400, + }) + fireEvent.mouseMove(window, { clientY: 360 }) + fireEvent.mouseUp(window) + + await waitFor(() => expect(overlay).toHaveStyle('height: 340px')) + + const stored = JSON.parse(localStorage.getItem('karaoke-lyrics-settings')) + expect(stored.lineHeight).toBeCloseTo(1.32, 2) + expect(stored.overlayHeight).toBe(340) + }) + + it('resets appearance back to the default spacing and overlay height', async () => { + localStorage.setItem( + 'karaoke-lyrics-settings', + JSON.stringify({ + lineHeight: 1.8, + overlayHeight: 420, + tr: { fontSize: 16, colorKey: 'yellow' }, + main: { fontSize: 28, colorKey: 'cyan' }, + pr: { fontSize: 15, colorKey: 'pink' }, + }), + ) + + renderOverlay({ + mainLyric: buildLyric('main', 'en', 'Hello world'), + translationLyric: null, + pronunciationLyric: null, + showPronunciation: false, + translationEnabled: false, + pronunciationEnabled: false, + }) + + const overlay = screen.getByTestId('karaoke-lyrics-overlay') + const mainLine = screen.getByText('Hello world').parentElement + expect(overlay).toHaveStyle('height: 420px') + expect(mainLine).toHaveStyle('line-height: 1.8') + + fireEvent.click(screen.getByTestId('lyrics-settings-button')) + fireEvent.click(screen.getByTestId('lyrics-reset-appearance')) + + await waitFor(() => + expect(screen.getByTestId('lyrics-line-height-value')).toHaveTextContent( + DEFAULT_LINE_HEIGHT_TEXT, + ), + ) + await waitFor(() => expect(overlay).toHaveStyle('height: 300px')) + await waitFor(() => + expect(mainLine).toHaveStyle(`line-height: ${DEFAULT_LINE_HEIGHT_TEXT}`), + ) + + const stored = JSON.parse(localStorage.getItem('karaoke-lyrics-settings')) + expect(stored.lineHeight).toBeCloseTo(1.3, 2) + expect(stored.overlayHeight).toBe(300) + }) +}) diff --git a/ui/src/audioplayer/MobileKaraokeLyricsPortal.jsx b/ui/src/audioplayer/MobileKaraokeLyricsPortal.jsx new file mode 100644 index 000000000..636107184 --- /dev/null +++ b/ui/src/audioplayer/MobileKaraokeLyricsPortal.jsx @@ -0,0 +1,65 @@ +import React, { useEffect, useState } from 'react' +import { createPortal } from 'react-dom' + +export const MOBILE_KARAOKE_LYRICS_HOST_SELECTOR = + '.react-jinke-music-player-mobile-cover' +export const MOBILE_KARAOKE_LYRICS_ACTIVE_CLASS = 'nd-mobile-lyrics-active' + +const resolveMobileLyricsHost = () => { + if (typeof document === 'undefined') { + return null + } + return document.querySelector(MOBILE_KARAOKE_LYRICS_HOST_SELECTOR) +} + +const MobileKaraokeLyricsPortal = ({ active, children }) => { + const [host, setHost] = useState(() => + active ? resolveMobileLyricsHost() : null, + ) + + useEffect(() => { + if (typeof document === 'undefined') { + setHost(null) + return undefined + } + + if (!active) { + setHost(null) + return undefined + } + + const syncHost = () => { + setHost(resolveMobileLyricsHost()) + } + + syncHost() + + const observer = new MutationObserver(syncHost) + observer.observe(document.body, { + childList: true, + subtree: true, + }) + + return () => observer.disconnect() + }, [active]) + + useEffect(() => { + if (!host) { + return undefined + } + + host.classList.toggle(MOBILE_KARAOKE_LYRICS_ACTIVE_CLASS, active) + + return () => { + host.classList.remove(MOBILE_KARAOKE_LYRICS_ACTIVE_CLASS) + } + }, [active, host]) + + if (!active || !host) { + return null + } + + return createPortal(children, host) +} + +export default MobileKaraokeLyricsPortal diff --git a/ui/src/audioplayer/MobileKaraokeLyricsPortal.test.jsx b/ui/src/audioplayer/MobileKaraokeLyricsPortal.test.jsx new file mode 100644 index 000000000..8b237e184 --- /dev/null +++ b/ui/src/audioplayer/MobileKaraokeLyricsPortal.test.jsx @@ -0,0 +1,55 @@ +import React from 'react' +import { cleanup, render, screen, waitFor } from '@testing-library/react' +import MobileKaraokeLyricsPortal, { + MOBILE_KARAOKE_LYRICS_ACTIVE_CLASS, +} from './MobileKaraokeLyricsPortal' + +const HOST_CLASS = 'react-jinke-music-player-mobile-cover' + +describe('', () => { + afterEach(() => { + cleanup() + document.body.innerHTML = '' + }) + + it('renders lyrics into the mobile cover host and toggles the active class', () => { + const host = document.createElement('div') + host.className = HOST_CLASS + document.body.appendChild(host) + + const { rerender } = render( + +
Lyrics
+
, + ) + + expect(host).toContainElement(screen.getByTestId('mobile-inline-lyrics')) + expect(host).toHaveClass(MOBILE_KARAOKE_LYRICS_ACTIVE_CLASS) + + rerender( + +
Lyrics
+
, + ) + + expect(screen.queryByTestId('mobile-inline-lyrics')).not.toBeInTheDocument() + expect(host).not.toHaveClass(MOBILE_KARAOKE_LYRICS_ACTIVE_CLASS) + }) + + it('attaches when the mobile cover host appears after mount', async () => { + render( + +
Lyrics
+
, + ) + + const host = document.createElement('div') + host.className = HOST_CLASS + document.body.appendChild(host) + + await waitFor(() => + expect(host).toContainElement(screen.getByTestId('mobile-inline-lyrics')), + ) + expect(host).toHaveClass(MOBILE_KARAOKE_LYRICS_ACTIVE_CLASS) + }) +}) diff --git a/ui/src/audioplayer/Player.jsx b/ui/src/audioplayer/Player.jsx index 5599b9e1d..3603e263e 100644 --- a/ui/src/audioplayer/Player.jsx +++ b/ui/src/audioplayer/Player.jsx @@ -22,6 +22,7 @@ import { refreshQueue, setPlayMode, setTranscodingProfile, + updateQueueLyric, setVolume, syncQueue, } from '../actions' @@ -33,6 +34,30 @@ import { keyMap } from '../hotkeys' import keyHandlers from './keyHandlers' import { calculateGain } from '../utils/calculateReplayGain' import { detectBrowserProfile, decisionService } from '../transcode' +import { + getPreferredLyricLanguage, + hasStructuredLyricContent, + selectLyricLayers, + structuredLyricToLrc, +} from './lyrics' +import { + resolveLyricsOverlayState, + togglePronunciationPreference, +} from './lyricsOverlayState' +import KaraokeLyricsOverlay from './KaraokeLyricsOverlay' +import MobileKaraokeLyricsPortal from './MobileKaraokeLyricsPortal' + +const emptyLyricLayers = { + main: null, + translation: null, + pronunciation: null, +} + +const normalizeLyricLayers = (layers) => ({ + main: layers?.main || null, + translation: layers?.translation || null, + pronunciation: layers?.pronunciation || null, +}) const Player = () => { const theme = useCurrentTheme() @@ -120,6 +145,83 @@ const Player = () => { const gainInfo = useSelector((state) => state.replayGain) const [context, setContext] = useState(null) const [gainNode, setGainNode] = useState(null) + const lyricCacheRef = useRef(new Map()) + const lyricRequestIdRef = useRef(0) + const playerRef = useRef(null) + const [karaokeVisiblePreference, setKaraokeVisiblePreference] = + useState(false) + const [selectedLyricLayers, setSelectedLyricLayers] = + useState(emptyLyricLayers) + const [translationPreference, setTranslationPreference] = useState(false) + const [pronunciationPreference, setPronunciationPreference] = useState(null) + const currentTrackId = playerState.current?.trackId + const currentTrackIsRadio = playerState.current?.isRadio + const selectedStructuredLyric = selectedLyricLayers.main + const hasKaraokeLyric = hasStructuredLyricContent(selectedStructuredLyric) + const hasTranslationLyric = hasStructuredLyricContent( + selectedLyricLayers.translation, + ) + const hasPronunciationLyric = hasStructuredLyricContent( + selectedLyricLayers.pronunciation, + ) + const { karaokeVisible, showTranslation, showPronunciation } = + resolveLyricsOverlayState({ + karaokeVisiblePreference, + translationPreference, + pronunciationPreference, + hasKaraokeLyric, + hasTranslationLyric, + hasPronunciationLyric, + }) + const useInlineMobileLyrics = karaokeVisible && !isDesktop + + const applyLyricToRuntimePlayer = useCallback((trackId, lyric) => { + if (!trackId) { + return + } + + const player = playerRef.current + if (!player || typeof player.setState !== 'function') { + return + } + + player.setState((prevState) => { + const prevLists = Array.isArray(prevState.audioLists) + ? prevState.audioLists + : [] + let changed = false + const audioLists = prevLists.map((item) => { + if (item.trackId !== trackId) { + return item + } + if (item.lyric === lyric) { + return item + } + changed = true + return { + ...item, + lyric, + } + }) + + const currentItem = audioLists.find( + (item) => item.musicSrc === prevState.musicSrc, + ) + const currentLyric = + typeof currentItem?.lyric === 'string' + ? currentItem.lyric + : prevState.lyric + + if (!changed && currentLyric === prevState.lyric) { + return null + } + + return { + audioLists, + lyric: currentLyric, + } + }) + }, []) useEffect(() => { if ( @@ -166,6 +268,88 @@ const Player = () => { return () => window.removeEventListener('beforeunload', handleBeforeUnload) }, [playerState, audioInstance]) + useEffect(() => { + if (!currentTrackId || currentTrackIsRadio) { + setSelectedLyricLayers(emptyLyricLayers) + return + } + + const cached = lyricCacheRef.current.get(currentTrackId) + let layers = emptyLyricLayers + if (cached && typeof cached !== 'string') { + if (cached.layers) { + layers = normalizeLyricLayers(cached.layers) + } else if (cached.structuredLyric) { + layers = normalizeLyricLayers({ + main: cached.structuredLyric, + }) + } + } + setSelectedLyricLayers(layers) + }, [currentTrackId, currentTrackIsRadio]) + + useEffect(() => { + lyricRequestIdRef.current += 1 + const requestId = lyricRequestIdRef.current + + if (!currentTrackId || currentTrackIsRadio) { + return + } + + const cached = lyricCacheRef.current.get(currentTrackId) + if (cached !== undefined) { + const cachedLyric = + typeof cached === 'string' ? cached : cached?.lrc || '' + const cachedLayers = + typeof cached === 'string' + ? emptyLyricLayers + : cached?.layers + ? normalizeLyricLayers(cached.layers) + : normalizeLyricLayers({ main: cached?.structuredLyric }) + + setSelectedLyricLayers(cachedLayers) + if (cachedLyric) { + dispatch(updateQueueLyric(currentTrackId, cachedLyric)) + applyLyricToRuntimePlayer(currentTrackId, cachedLyric) + } + return + } + + subsonic + .getLyricsBySongId(currentTrackId) + .then((resp) => { + if (lyricRequestIdRef.current !== requestId) { + return + } + + const structuredLyrics = + resp?.json?.['subsonic-response']?.lyricsList?.structuredLyrics || [] + const layers = selectLyricLayers( + structuredLyrics, + getPreferredLyricLanguage(), + ) + const lyric = layers.main ? structuredLyricToLrc(layers.main) : '' + lyricCacheRef.current.set(currentTrackId, { + lrc: lyric, + layers, + }) + setSelectedLyricLayers(layers) + + if (lyric !== '') { + dispatch(updateQueueLyric(currentTrackId, lyric)) + applyLyricToRuntimePlayer(currentTrackId, lyric) + } + }) + .catch(() => { + if (lyricRequestIdRef.current !== requestId) { + return + } + setSelectedLyricLayers(emptyLyricLayers) + // Do not cache network/request failures as empty lyrics, so we can retry. + lyricCacheRef.current.delete(currentTrackId) + }) + }, [dispatch, currentTrackId, currentTrackIsRadio, applyLyricToRuntimePlayer]) + const defaultOptions = useMemo( () => ({ theme: playerTheme, @@ -177,7 +361,7 @@ const Player = () => { clearPriorAudioLists: false, showDestroy: true, showDownload: false, - showLyric: true, + showLyric: false, showReload: false, toggleMode: !isDesktop, glassBg: false, @@ -215,12 +399,26 @@ const Player = () => { (playerState.clear || playerState.playIndex === 0), clearPriorAudioLists: playerState.clear, extendsContent: ( - + + setKaraokeVisiblePreference((visible) => !visible) + } + lyricsActive={karaokeVisible} + lyricsDisabled={!hasKaraokeLyric} + /> ), defaultVolume: isMobilePlayer ? 1 : playerState.volume, showMediaSession: !current.isRadio, } - }, [playerState, defaultOptions, isMobilePlayer]) + }, [ + playerState, + defaultOptions, + isMobilePlayer, + karaokeVisible, + hasKaraokeLyric, + ]) const onAudioListsChange = useCallback( (_, audioLists, audioInfo) => dispatch(syncQueue(audioInfo, audioLists)), @@ -340,10 +538,13 @@ const Player = () => { ) const onCoverClick = useCallback((mode, audioLists, audioInfo) => { + if (!isDesktop && karaokeVisible) { + return + } if (mode === 'full' && audioInfo?.song?.albumId) { window.location.href = `#/album/${audioInfo.song.albumId}/show` } - }, []) + }, [isDesktop, karaokeVisible]) const onAudioError = useCallback( (error, currentPlayId, audioLists, audioInfo) => { @@ -392,6 +593,7 @@ const Player = () => { return ( { onBeforeDestroy={onBeforeDestroy} getAudioInstance={setAudioInstance} /> + {isDesktop && ( + + setTranslationPreference((previous) => + hasTranslationLyric ? !previous : false, + ) + } + onTogglePronunciation={() => + setPronunciationPreference((previous) => + togglePronunciationPreference(previous, hasPronunciationLyric), + ) + } + audioInstance={audioInstance} + onClose={() => setKaraokeVisiblePreference(false)} + /> + )} + + + setTranslationPreference((previous) => + hasTranslationLyric ? !previous : false, + ) + } + onTogglePronunciation={() => + setPronunciationPreference((previous) => + togglePronunciationPreference(previous, hasPronunciationLyric), + ) + } + audioInstance={audioInstance} + onClose={() => setKaraokeVisiblePreference(false)} + /> + ) diff --git a/ui/src/audioplayer/Player.lyricsState.test.jsx b/ui/src/audioplayer/Player.lyricsState.test.jsx new file mode 100644 index 000000000..c47abea76 --- /dev/null +++ b/ui/src/audioplayer/Player.lyricsState.test.jsx @@ -0,0 +1,77 @@ +import { + resolveLyricsOverlayState, + togglePronunciationPreference, +} from './lyricsOverlayState' + +describe('Player lyrics state helpers', () => { + it('keeps the lyrics window preference across track changes in the session', () => { + const visibleOnCurrentTrack = resolveLyricsOverlayState({ + karaokeVisiblePreference: true, + translationPreference: false, + pronunciationPreference: null, + hasKaraokeLyric: true, + hasTranslationLyric: true, + hasPronunciationLyric: true, + }) + expect(visibleOnCurrentTrack.karaokeVisible).toBe(true) + + const hiddenForTrackWithoutLyrics = resolveLyricsOverlayState({ + karaokeVisiblePreference: true, + translationPreference: false, + pronunciationPreference: null, + hasKaraokeLyric: false, + hasTranslationLyric: false, + hasPronunciationLyric: false, + }) + expect(hiddenForTrackWithoutLyrics.karaokeVisible).toBe(false) + + const restoredOnNextLyricsTrack = resolveLyricsOverlayState({ + karaokeVisiblePreference: true, + translationPreference: false, + pronunciationPreference: null, + hasKaraokeLyric: true, + hasTranslationLyric: false, + hasPronunciationLyric: false, + }) + expect(restoredOnNextLyricsTrack.karaokeVisible).toBe(true) + }) + + it('restores translation and pronunciation preferences after tracks without those layers', () => { + const initialState = resolveLyricsOverlayState({ + karaokeVisiblePreference: false, + translationPreference: false, + pronunciationPreference: null, + hasKaraokeLyric: true, + hasTranslationLyric: true, + hasPronunciationLyric: true, + }) + expect(initialState.showTranslation).toBe(false) + expect(initialState.showPronunciation).toBe(true) + + const translationPreference = true + const pronunciationPreference = togglePronunciationPreference(null, true) + expect(pronunciationPreference).toBe(false) + + const hiddenOnTrackWithoutAuxLayers = resolveLyricsOverlayState({ + karaokeVisiblePreference: false, + translationPreference, + pronunciationPreference, + hasKaraokeLyric: true, + hasTranslationLyric: false, + hasPronunciationLyric: false, + }) + expect(hiddenOnTrackWithoutAuxLayers.showTranslation).toBe(false) + expect(hiddenOnTrackWithoutAuxLayers.showPronunciation).toBe(false) + + const restoredOnTrackWithAuxLayers = resolveLyricsOverlayState({ + karaokeVisiblePreference: false, + translationPreference, + pronunciationPreference, + hasKaraokeLyric: true, + hasTranslationLyric: true, + hasPronunciationLyric: true, + }) + expect(restoredOnTrackWithAuxLayers.showTranslation).toBe(true) + expect(restoredOnTrackWithAuxLayers.showPronunciation).toBe(false) + }) +}) diff --git a/ui/src/audioplayer/PlayerToolbar.jsx b/ui/src/audioplayer/PlayerToolbar.jsx index 4812141ab..8487b0655 100644 --- a/ui/src/audioplayer/PlayerToolbar.jsx +++ b/ui/src/audioplayer/PlayerToolbar.jsx @@ -4,7 +4,9 @@ import { useGetOne } from 'react-admin' import { GlobalHotKeys } from 'react-hotkeys' import IconButton from '@material-ui/core/IconButton' import { useMediaQuery } from '@material-ui/core' +import Tooltip from '@material-ui/core/Tooltip' import { RiSaveLine } from 'react-icons/ri' +import { RiFileMusicLine } from 'react-icons/ri' import { LoveButton, useToggleLove } from '../common' import { openSaveQueueDialog } from '../actions' import { keyMap } from '../hotkeys' @@ -55,7 +57,13 @@ const useStyles = makeStyles((theme) => ({ }, })) -const PlayerToolbar = ({ id, isRadio }) => { +const PlayerToolbar = ({ + id, + isRadio, + onToggleLyrics, + lyricsActive = false, + lyricsDisabled = false, +}) => { const dispatch = useDispatch() const { data, loading } = useGetOne('song', id, { enabled: !!id && !isRadio }) const [toggleLove, toggling] = useToggleLove('song', data) @@ -99,6 +107,25 @@ const PlayerToolbar = ({ id, isRadio }) => { /> ) + const toggleLyricsButton = ( + + + + + + + + ) + return ( <> @@ -106,11 +133,13 @@ const PlayerToolbar = ({ id, isRadio }) => {
  • {saveQueueButton} {loveButton} + {toggleLyricsButton}
  • ) : ( <>
  • {saveQueueButton}
  • {loveButton}
  • +
  • {toggleLyricsButton}
  • )} diff --git a/ui/src/audioplayer/PlayerToolbar.test.jsx b/ui/src/audioplayer/PlayerToolbar.test.jsx index d0368b0f0..3041001eb 100644 --- a/ui/src/audioplayer/PlayerToolbar.test.jsx +++ b/ui/src/audioplayer/PlayerToolbar.test.jsx @@ -71,6 +71,7 @@ describe('', () => { // Verify both buttons are rendered expect(screen.getByTestId('save-queue-button')).toBeInTheDocument() expect(screen.getByTestId('love-button')).toBeInTheDocument() + expect(screen.getByTestId('toggle-lyrics-button')).toBeInTheDocument() // Verify desktop classes are applied expect(listItems[0].className).toContain('toolbar') @@ -102,6 +103,14 @@ describe('', () => { type: 'OPEN_SAVE_QUEUE_DIALOG', }) }) + + it('triggers lyric toggle callback when lyrics button is clicked', () => { + const onToggleLyrics = vi.fn() + render() + + fireEvent.click(screen.getByTestId('toggle-lyrics-button')) + expect(onToggleLyrics).toHaveBeenCalledTimes(1) + }) }) describe('Mobile layout', () => { @@ -114,11 +123,12 @@ describe('', () => { // Each button should be in its own list item const listItems = screen.getAllByRole('listitem') - expect(listItems).toHaveLength(2) + expect(listItems).toHaveLength(3) // Verify both buttons are rendered expect(screen.getByTestId('save-queue-button')).toBeInTheDocument() expect(screen.getByTestId('love-button')).toBeInTheDocument() + expect(screen.getByTestId('toggle-lyrics-button')).toBeInTheDocument() // Verify mobile classes are applied expect(listItems[0].className).toContain('mobileListItem') @@ -140,6 +150,13 @@ describe('', () => { const loveButton = screen.getByTestId('love-button') expect(loveButton).toBeDisabled() }) + + it('disables lyrics button when lyrics are unavailable', () => { + render() + + const lyricsButton = screen.getByTestId('toggle-lyrics-button') + expect(lyricsButton).toBeDisabled() + }) }) describe('Common behavior', () => { diff --git a/ui/src/audioplayer/lyrics.js b/ui/src/audioplayer/lyrics.js new file mode 100644 index 000000000..98c638ab3 --- /dev/null +++ b/ui/src/audioplayer/lyrics.js @@ -0,0 +1,725 @@ +const normalizeLanguageTag = (language) => + (language || '').toLowerCase().replace('_', '-') + +// Roughly one 60fps frame; keeps line/token switching stable near tight boundaries. +const KARAOKE_SWITCH_EPSILON_MS = 50 +const LYRIC_KIND_MAIN = 'main' +const LYRIC_KIND_TRANSLATION = 'translation' +const LYRIC_KIND_PRONUNCIATION = 'pronunciation' + +const padTime = (value) => { + const str = value.toString() + return str.length === 1 ? `0${str}` : str +} + +const toTime = (value) => { + if (value == null || value === '') { + return null + } + const numeric = Number(value) + return Number.isFinite(numeric) ? numeric : null +} + +const toByteOffset = (value) => { + if (value == null || value === '') { + return null + } + const numeric = Number(value) + if (!Number.isInteger(numeric) || numeric < 0) { + return null + } + return numeric +} + +const compareNullableTime = (a, b) => { + if (a == null && b == null) { + return 0 + } + if (a == null) { + return 1 + } + if (b == null) { + return -1 + } + return a - b +} + +const sortTokensByStart = (tokens) => + tokens + .map((token, order) => ({ ...token, order })) + .sort((a, b) => { + const byStart = compareNullableTime(a.start, b.start) + if (byStart !== 0) { + return byStart + } + const byEnd = compareNullableTime(a.end, b.end) + if (byEnd !== 0) { + return byEnd + } + return a.order - b.order + }) + .map(({ order, ...token }) => token) + +const languageMatch = (candidate, preferred) => { + if (!candidate || !preferred) { + return false + } + return ( + candidate === preferred || + candidate.startsWith(`${preferred}-`) || + preferred.startsWith(`${candidate}-`) + ) +} + +const hasTimedLines = (lyric) => + lyric && + lyric.synced && + Array.isArray(lyric.line) && + lyric.line.some((line) => Number.isFinite(Number(line.start))) + +const preferTimedLyrics = (lyrics) => { + const timed = lyrics.filter(hasTimedLines) + return timed.length > 0 ? timed : lyrics +} + +const normalizeToken = (token) => { + if (!token) { + return null + } + const value = typeof token.value === 'string' ? token.value : '' + if (value.length === 0) { + return null + } + const byteStart = toByteOffset(token.byteStart) + const byteEnd = toByteOffset(token.byteEnd) + return { + start: toTime(token.start), + end: toTime(token.end), + value, + ...(byteStart != null ? { byteStart } : {}), + ...(byteEnd != null ? { byteEnd } : {}), + } +} + +const utf8BytesForCodePoint = (codePoint) => { + if (codePoint <= 0x7f) { + return 1 + } + if (codePoint <= 0x7ff) { + return 2 + } + if (codePoint <= 0xffff) { + return 3 + } + return 4 +} + +export const utf8ByteOffsetToCodeUnitIndex = (text, targetByteOffset) => { + if (typeof text !== 'string' || text.length === 0) { + return 0 + } + + const target = toByteOffset(targetByteOffset) + if (target == null || target <= 0) { + return 0 + } + + let byteOffset = 0 + let index = 0 + while (index < text.length) { + if (byteOffset >= target) { + return index + } + const codePoint = text.codePointAt(index) + byteOffset += utf8BytesForCodePoint(codePoint) + index += codePoint > 0xffff ? 2 : 1 + } + + return text.length +} + +export const utf8ByteRangeToCodeUnitRange = (text, byteStart, byteEnd) => { + if (typeof text !== 'string') { + return null + } + + const start = toByteOffset(byteStart) + const end = toByteOffset(byteEnd) + if (start == null || end == null || end < start) { + return null + } + + const startIndex = utf8ByteOffsetToCodeUnitIndex(text, start) + const endIndex = utf8ByteOffsetToCodeUnitIndex(text, end + 1) + if ( + startIndex >= endIndex || + startIndex > text.length || + endIndex > text.length + ) { + return null + } + + return { + start: startIndex, + end: endIndex, + text: text.slice(startIndex, endIndex), + } +} + +const buildAgentLookup = (structuredLyric) => { + const lookup = new Map() + const agents = Array.isArray(structuredLyric?.agents) + ? structuredLyric.agents + : [] + for (const agent of agents) { + const id = typeof agent?.id === 'string' ? agent.id : '' + if (!id || lookup.has(id)) { + continue + } + lookup.set(id, { + id, + role: typeof agent?.role === 'string' ? agent.role : '', + name: typeof agent?.name === 'string' ? agent.name : '', + }) + } + return lookup +} + +const deriveUiRole = (agent) => { + if (!agent?.role || agent.role === 'main') { + return '' + } + return agent.role +} + +const normalizeCueLine = (cueLine, fallbackIndex, agentLookup) => { + const index = Number.isFinite(Number(cueLine?.index)) + ? Number(cueLine.index) + : fallbackIndex + const agentId = typeof cueLine?.agentId === 'string' ? cueLine.agentId : '' + const agent = agentId ? agentLookup.get(agentId) || null : null + const fallbackRole = typeof cueLine?.role === 'string' ? cueLine.role : '' + const tokens = sortTokensByStart( + Array.isArray(cueLine?.cue) + ? cueLine.cue.map(normalizeToken).filter(Boolean) + : [], + ) + + return { + index, + start: toTime(cueLine?.start), + end: toTime(cueLine?.end), + value: typeof cueLine?.value === 'string' ? cueLine.value : '', + role: agent ? deriveUiRole(agent) : fallbackRole, + agentId, + agentRole: agent?.role || fallbackRole, + agentName: agent?.name || '', + tokens, + } +} + +const normalizeLyricKind = (kind) => { + const normalized = (kind || '').toLowerCase().trim() + switch (normalized) { + case LYRIC_KIND_TRANSLATION: + return LYRIC_KIND_TRANSLATION + case LYRIC_KIND_PRONUNCIATION: + return LYRIC_KIND_PRONUNCIATION + default: + return LYRIC_KIND_MAIN + } +} + +const pickLyricByLanguage = (lyrics, preferredLanguage) => { + if (!Array.isArray(lyrics) || lyrics.length === 0) { + return null + } + + const preferred = normalizeLanguageTag(preferredLanguage) + const preferredBase = preferred.split('-')[0] + + return ( + lyrics.find((lyric) => + languageMatch(normalizeLanguageTag(lyric.lang), preferred), + ) || + lyrics.find((lyric) => + languageMatch(normalizeLanguageTag(lyric.lang), preferredBase), + ) || + lyrics.find((lyric) => + languageMatch(normalizeLanguageTag(lyric.lang), 'en'), + ) || + lyrics[0] + ) +} + +const lineTimeWindow = (lines, index) => { + const line = lines[index] + if (!line) { + return { start: null, end: null } + } + + const start = toTime(line.start) + const end = toTime(line.end) ?? toTime(lines[index + 1]?.start) + return { start, end } +} + +export const hasCueTiming = (structuredLyric) => + Boolean( + structuredLyric && + Array.isArray(structuredLyric.cueLine) && + structuredLyric.cueLine.some( + (cueLine) => + Array.isArray(cueLine?.cue) && + cueLine.cue.some((cue) => Number.isFinite(Number(cue?.start))), + ), + ) + +export const hasStructuredLyricContent = (structuredLyric) => + Boolean( + structuredLyric && + ((Array.isArray(structuredLyric.line) && + structuredLyric.line.some( + (line) => typeof line?.value === 'string' && line.value.trim() !== '', + )) || + hasCueTiming(structuredLyric)), + ) + +export const getPreferredLyricLanguage = () => { + if (typeof window !== 'undefined' && window.localStorage) { + const stored = window.localStorage.getItem('locale') + if (stored) { + return stored + } + } + if (typeof navigator !== 'undefined' && navigator.language) { + return navigator.language + } + return 'en' +} + +export const selectLyricLayers = (structuredLyrics, preferredLanguage) => { + if (!Array.isArray(structuredLyrics)) { + return { + main: null, + translation: null, + pronunciation: null, + } + } + + const available = structuredLyrics.filter(hasStructuredLyricContent) + if (available.length === 0) { + return { + main: null, + translation: null, + pronunciation: null, + } + } + + const grouped = { + [LYRIC_KIND_MAIN]: [], + [LYRIC_KIND_TRANSLATION]: [], + [LYRIC_KIND_PRONUNCIATION]: [], + } + + for (const lyric of available) { + grouped[normalizeLyricKind(lyric?.kind)].push(lyric) + } + + const mainCandidates = grouped[LYRIC_KIND_MAIN].length + ? grouped[LYRIC_KIND_MAIN] + : available + + return { + main: pickLyricByLanguage( + preferTimedLyrics(mainCandidates), + preferredLanguage, + ), + translation: pickLyricByLanguage( + preferTimedLyrics(grouped[LYRIC_KIND_TRANSLATION]), + preferredLanguage, + ), + pronunciation: pickLyricByLanguage( + preferTimedLyrics(grouped[LYRIC_KIND_PRONUNCIATION]), + preferredLanguage, + ), + } +} + +export const pickStructuredLyric = (structuredLyrics, preferredLanguage) => + selectLyricLayers(structuredLyrics, preferredLanguage).main + +export const structuredLyricToLrc = (structuredLyric) => { + if (!structuredLyric || !Array.isArray(structuredLyric.line)) { + return '' + } + + let lyricText = '' + for (const line of structuredLyric.line) { + const start = Number(line.start) + if (!Number.isFinite(start) || start < 0) { + continue + } + + let time = Math.floor(start / 10) + const ms = time % 100 + time = Math.floor(time / 100) + const sec = time % 60 + time = Math.floor(time / 60) + const min = time % 60 + + lyricText += `[${padTime(min)}:${padTime(sec)}.${padTime(ms)}] ${line.value || ''}\n` + } + return lyricText +} + +export const structuredLyricsToLrc = (structuredLyrics, preferredLanguage) => { + const selected = pickStructuredLyric(structuredLyrics, preferredLanguage) + if (!selected) { + return '' + } + return structuredLyricToLrc(selected) +} + +const buildBaseKaraokeLines = (baseLines) => + baseLines.map((line, index) => ({ + index, + start: toTime(line.start), + end: toTime(line.end), + value: typeof line.value === 'string' ? line.value : '', + tokens: [], + })) + +export const buildKaraokeLinesFromCueLines = ( + rawCueLines, + baseLines, + agentLookup, +) => { + const normalizedCueLines = rawCueLines.map((cueLine, fallbackIndex) => { + const normalized = normalizeCueLine(cueLine, fallbackIndex, agentLookup) + return { + ...normalized, + tokens: normalized.tokens.map((token) => ({ + ...token, + role: normalized.role, + agentId: normalized.agentId, + agentName: normalized.agentName, + agentRole: normalized.agentRole, + })), + } + }) + + const byIndex = new Map() + for (const cueLine of normalizedCueLines) { + if (!byIndex.has(cueLine.index)) { + byIndex.set(cueLine.index, []) + } + byIndex.get(cueLine.index).push(cueLine) + } + + return Array.from(byIndex.entries()).map(([index, group]) => { + const first = group[0] + const baseLine = baseLines[index] || {} + const tokens = sortTokensByStart(group.flatMap((cueLine) => cueLine.tokens)) + const fallbackStart = + tokens.find((token) => token.start != null)?.start ?? null + const fallbackEnd = + [...tokens].reverse().find((token) => token.end != null)?.end ?? null + const value = + first.value || + (typeof baseLine.value === 'string' ? baseLine.value : '') || + tokens.map((token) => token.value).join('') + + return { + index, + start: first.start ?? toTime(baseLine.start) ?? fallbackStart, + end: first.end ?? toTime(baseLine.end) ?? fallbackEnd, + value, + agentId: first.agentId, + agentName: first.agentName, + agentRole: first.agentRole, + tokens, + } + }) +} + +export const buildKaraokeLines = (structuredLyric) => { + if (!structuredLyric) { + return [] + } + + const agentLookup = buildAgentLookup(structuredLyric) + const baseLines = Array.isArray(structuredLyric.line) + ? structuredLyric.line + : [] + const rawCueLines = Array.isArray(structuredLyric.cueLine) + ? structuredLyric.cueLine + : [] + + const lines = + rawCueLines.length > 0 + ? buildKaraokeLinesFromCueLines(rawCueLines, baseLines, agentLookup) + : buildBaseKaraokeLines(baseLines) + + const normalized = lines + .filter((line) => line.value || line.tokens.length > 0) + .sort((a, b) => { + if (a.start == null && b.start == null) { + return a.index - b.index + } + if (a.start == null) { + return 1 + } + if (b.start == null) { + return -1 + } + if (a.start !== b.start) { + return a.start - b.start + } + return a.index - b.index + }) + + for (let i = 0; i < normalized.length; i += 1) { + if (normalized[i].end == null) { + const nextStart = normalized[i + 1]?.start + if (nextStart != null) { + normalized[i].end = nextStart + } + } + } + + return normalized +} + +export const resolveKaraokeTokenWindow = ( + line, + tokenIndex, + lineEndFallback = null, +) => { + const tokens = Array.isArray(line?.tokens) ? line.tokens : [] + const token = tokens[tokenIndex] + if (!token) { + return { start: null, end: null } + } + + const prevToken = tokenIndex > 0 ? tokens[tokenIndex - 1] : null + const nextToken = + tokenIndex + 1 < tokens.length ? tokens[tokenIndex + 1] : null + + const lineStart = toTime(line?.start) + const lineEnd = toTime(line?.end) ?? toTime(lineEndFallback) + const tokenCount = tokens.length + const hasLineWindow = + lineStart != null && + lineEnd != null && + Number.isFinite(lineStart) && + Number.isFinite(lineEnd) && + lineEnd > lineStart + const estimatedStart = + hasLineWindow && tokenCount > 0 + ? lineStart + ((lineEnd - lineStart) * tokenIndex) / tokenCount + : null + const estimatedEnd = + hasLineWindow && tokenCount > 0 + ? lineStart + ((lineEnd - lineStart) * (tokenIndex + 1)) / tokenCount + : null + + let explicitStartCount = 0 + let explicitEndCount = 0 + const uniqueStarts = new Set() + const uniqueEnds = new Set() + + for (let i = 0; i < tokenCount; i += 1) { + const explicitStart = toTime(tokens[i]?.start) + if (explicitStart != null) { + explicitStartCount += 1 + uniqueStarts.add(explicitStart) + } + + const explicitEnd = toTime(tokens[i]?.end) + if (explicitEnd != null) { + explicitEndCount += 1 + uniqueEnds.add(explicitEnd) + } + } + + const collapsedStarts = + explicitStartCount > 1 && uniqueStarts.size <= Math.max(1, tokenCount / 4) + const collapsedEnds = + explicitEndCount > 1 && uniqueEnds.size <= Math.max(1, tokenCount / 4) + const shouldForceEstimated = + hasLineWindow && tokenCount > 1 && (collapsedStarts || collapsedEnds) + + if (shouldForceEstimated) { + return { + start: estimatedStart, + end: estimatedEnd, + } + } + const prevEnd = toTime(prevToken?.end) ?? toTime(prevToken?.start) + + let start = toTime(token.start) + if (start == null) { + start = prevEnd ?? estimatedStart ?? lineStart + } + + let end = toTime(token.end) + if (end == null) { + const nextDirectStart = toTime(nextToken?.start) + const nextEstimatedStart = + hasLineWindow && tokenIndex + 1 < tokenCount + ? lineStart + ((lineEnd - lineStart) * (tokenIndex + 1)) / tokenCount + : null + end = nextDirectStart ?? nextEstimatedStart ?? estimatedEnd ?? lineEnd + } + + if ( + tokenCount === 1 && + hasLineWindow && + (start == null || end == null || end <= start + 1) + ) { + start = lineStart + end = lineEnd + } + + if (start != null && end != null && end < start) { + end = start + } + + return { start, end } +} + +export const getActiveKaraokeState = (lines, currentTimeMs) => { + if (!Array.isArray(lines) || lines.length === 0) { + return { lineIndex: -1, tokenIndex: -1 } + } + + const current = Number.isFinite(Number(currentTimeMs)) + ? Number(currentTimeMs) + : 0 + let lineIndex = 0 + for (let i = 0; i < lines.length; i += 1) { + const lineStart = toTime(lines[i]?.start) + if (lineStart == null || lineStart <= current + KARAOKE_SWITCH_EPSILON_MS) { + lineIndex = i + continue + } + break + } + + for (let i = lineIndex; i >= 0; i -= 1) { + const lineStart = toTime(lines[i]?.start) + const lineEnd = toTime(lines[i]?.end) ?? toTime(lines[i + 1]?.start) + if (lineStart != null && current + KARAOKE_SWITCH_EPSILON_MS < lineStart) { + continue + } + if (lineEnd == null || current <= lineEnd + KARAOKE_SWITCH_EPSILON_MS) { + lineIndex = i + break + } + } + + const activeLine = lines[lineIndex] || null + const tokens = Array.isArray(activeLine?.tokens) ? activeLine.tokens : [] + let tokenIndex = -1 + for (let i = 0; i < tokens.length; i += 1) { + const { start: tokenStart, end: tokenEnd } = resolveKaraokeTokenWindow( + activeLine, + i, + lines[lineIndex + 1]?.start, + ) + if ( + tokenStart == null || + tokenStart <= current + KARAOKE_SWITCH_EPSILON_MS + ) { + tokenIndex = i + if (tokenEnd != null && current <= tokenEnd + KARAOKE_SWITCH_EPSILON_MS) { + break + } + continue + } + break + } + + return { lineIndex, tokenIndex } +} + +export const hasUsableKaraokeTiming = (lines) => + Array.isArray(lines) && + lines.some( + (line) => + toTime(line?.start) != null || + (Array.isArray(line?.tokens) && + line.tokens.some( + (token) => toTime(token?.start) != null || toTime(token?.end) != null, + )), + ) + +export const findLayerLineIndexForMain = (mainLines, layerLines, mainIndex) => { + if ( + !Array.isArray(mainLines) || + !Array.isArray(layerLines) || + mainLines.length === 0 || + layerLines.length === 0 || + mainIndex < 0 || + mainIndex >= mainLines.length + ) { + return -1 + } + + const { start: mainStart, end: mainEnd } = lineTimeWindow( + mainLines, + mainIndex, + ) + + if (mainStart == null) { + return -1 + } + const mainWindowEnd = mainEnd ?? mainStart + const mainWindowDuration = Math.max(0, mainWindowEnd - mainStart) + const maxDelta = Math.max(550, Math.min(1400, mainWindowDuration + 420)) + + let bestIdx = -1 + let bestScore = Number.POSITIVE_INFINITY + + for (let i = 0; i < layerLines.length; i += 1) { + const { start, end } = lineTimeWindow(layerLines, i) + + if (start != null && end != null) { + const overlap = Math.min(end, mainEnd ?? end) - Math.max(start, mainStart) + if (overlap >= 0) { + const score = Math.abs(start - mainStart) + Math.abs(i - mainIndex) * 30 + if (score < bestScore) { + bestScore = score + bestIdx = i + } + continue + } + } + + if (start != null) { + if (Math.abs(start - mainStart) > maxDelta) { + continue + } + const score = Math.abs(start - mainStart) + Math.abs(i - mainIndex) * 45 + if (score < bestScore) { + bestScore = score + bestIdx = i + } + } + } + + return bestIdx +} + +export const resolveLayerLineForMain = (mainLines, layerLines, mainIndex) => { + const index = findLayerLineIndexForMain(mainLines, layerLines, mainIndex) + return { + index, + line: index >= 0 ? layerLines[index] : null, + } +} + +export const buildHighlightedMainLine = (line) => line + +export const buildHighlightedAuxLine = (_referenceLine, auxiliaryLine) => + auxiliaryLine ?? null diff --git a/ui/src/audioplayer/lyrics.test.js b/ui/src/audioplayer/lyrics.test.js new file mode 100644 index 000000000..1abea57a5 --- /dev/null +++ b/ui/src/audioplayer/lyrics.test.js @@ -0,0 +1,786 @@ +import { + buildHighlightedAuxLine, + buildHighlightedMainLine, + buildKaraokeLines, + buildKaraokeLinesFromCueLines, + findLayerLineIndexForMain, + getActiveKaraokeState, + getPreferredLyricLanguage, + hasUsableKaraokeTiming, + hasStructuredLyricContent, + pickStructuredLyric, + resolveKaraokeTokenWindow, + resolveLayerLineForMain, + selectLyricLayers, + structuredLyricsToLrc, + structuredLyricToLrc, + utf8ByteOffsetToCodeUnitIndex, + utf8ByteRangeToCodeUnitRange, +} from './lyrics' + +describe('lyrics helpers', () => { + beforeEach(() => { + localStorage.clear() + }) + + it('prefers a lyric track that matches the locale', () => { + const selected = pickStructuredLyric( + [ + { + lang: 'eng', + synced: true, + line: [{ start: 1000, value: 'English line' }], + }, + { + lang: 'pt-BR', + synced: true, + line: [{ start: 1000, value: 'Linha em portugues' }], + }, + ], + 'pt-BR', + ) + + expect(selected.lang).toBe('pt-BR') + }) + + it('falls back to english when preferred locale is not available', () => { + const selected = pickStructuredLyric( + [ + { + lang: 'eng', + synced: true, + line: [{ start: 1000, value: 'English line' }], + }, + { + lang: 'deu', + synced: true, + line: [{ start: 1000, value: 'Deutsche Zeile' }], + }, + ], + 'pt-BR', + ) + + expect(selected.lang).toBe('eng') + }) + + it('falls back to first synced track when english is missing', () => { + const selected = pickStructuredLyric( + [ + { + lang: 'jpn', + synced: true, + line: [{ start: 1000, value: 'Nihongo' }], + }, + { + lang: 'deu', + synced: true, + line: [{ start: 1000, value: 'Deutsch' }], + }, + ], + 'pt-BR', + ) + + expect(selected.lang).toBe('jpn') + }) + + it('selects translation and pronunciation layers by kind', () => { + const layers = selectLyricLayers( + [ + { + kind: 'main', + lang: 'ja', + synced: true, + line: [{ start: 1000, value: 'こんにちは' }], + }, + { + kind: 'translation', + lang: 'es', + synced: true, + line: [{ start: 1000, value: 'Hola' }], + }, + { + kind: 'pronunciation', + lang: 'ja-Latn', + synced: true, + line: [{ start: 1000, value: 'konnichiwa' }], + }, + ], + 'es-MX', + ) + + expect(layers.main.lang).toBe('ja') + expect(layers.translation.lang).toBe('es') + expect(layers.pronunciation.lang).toBe('ja-Latn') + }) + + it('treats missing kind as main for backward compatibility', () => { + const layers = selectLyricLayers( + [ + { + lang: 'eng', + synced: true, + line: [{ start: 1000, value: 'Main' }], + }, + ], + 'eng', + ) + + expect(layers.main.lang).toBe('eng') + expect(layers.translation).toBeNull() + expect(layers.pronunciation).toBeNull() + }) + + it('falls back to unsynced lyric content when no timed track exists', () => { + const layers = selectLyricLayers( + [ + { + lang: 'eng', + synced: false, + line: [{ value: 'Plain embedded lyric' }], + }, + ], + 'eng', + ) + + expect(layers.main).toEqual({ + lang: 'eng', + synced: false, + line: [{ value: 'Plain embedded lyric' }], + }) + }) + + it('still prefers timed lyrics when both timed and untimed tracks exist', () => { + const layers = selectLyricLayers( + [ + { + lang: 'eng', + synced: false, + line: [{ value: 'Plain lyric' }], + }, + { + lang: 'eng', + synced: true, + line: [{ start: 1000, value: 'Timed lyric' }], + }, + ], + 'eng', + ) + + expect(layers.main).toEqual({ + lang: 'eng', + synced: true, + line: [{ start: 1000, value: 'Timed lyric' }], + }) + }) + + it('matches layer line by timing for the active main line', () => { + const mainLines = [ + { index: 0, start: 1000, end: 1800, value: 'Line A', tokens: [] }, + { index: 1, start: 2000, end: 2800, value: 'Line B', tokens: [] }, + ] + const layerLines = [ + { index: 0, start: 900, end: 1750, value: 'A2', tokens: [] }, + { index: 1, start: 2050, end: 2900, value: 'B2', tokens: [] }, + ] + + expect(findLayerLineIndexForMain(mainLines, layerLines, 1)).toBe(1) + expect(resolveLayerLineForMain(mainLines, layerLines, 0).line.value).toBe( + 'A2', + ) + }) + + it('matches metadata layers by nearest timing even when indexes differ', () => { + const mainLines = [ + { index: 0, start: 1000, end: 1800, value: 'Line A', tokens: [] }, + { index: 1, start: 2000, end: 2800, value: 'Line B', tokens: [] }, + { index: 2, start: 3000, end: 3800, value: 'Line C', tokens: [] }, + ] + const layerLines = [ + { index: 2, start: 3020, end: 3820, value: 'C2', tokens: [] }, + { index: 0, start: 980, end: 1760, value: 'A2', tokens: [] }, + { index: 1, start: 2010, end: 2810, value: 'B2', tokens: [] }, + ] + + expect(findLayerLineIndexForMain(mainLines, layerLines, 1)).toBe(2) + expect(resolveLayerLineForMain(mainLines, layerLines, 2).line.value).toBe( + 'C2', + ) + }) + + it('keeps translation lines line-level when they do not have real cue timing', () => { + const mainLine = { + index: 0, + start: 1000, + end: 2200, + value: '불을 질러라', + tokens: [ + { start: 1000, end: 1300, value: '불을 ' }, + { start: 1300, end: 1650, value: '질' }, + { start: 1650, end: 2200, value: '러라' }, + ], + } + const translationLine = { + index: 0, + start: 1000, + end: 2200, + value: 'Set it on fire', + tokens: [], + } + + const highlighted = buildHighlightedAuxLine(mainLine, translationLine, 2600) + + expect(highlighted).toBe(translationLine) + expect(highlighted.tokens).toEqual([]) + }) + + it('keeps pronunciation lines line-level when they do not have real cue timing', () => { + const mainLine = { + index: 0, + start: 1000, + end: 2200, + value: 'You もっと強く 素早く 吹き飛ばせ', + tokens: [], + } + const pronunciationLine = { + index: 0, + start: 1000, + end: 2200, + value: 'You motto tsuyoku subayaku fukitobase', + tokens: [], + } + + const highlighted = buildHighlightedAuxLine( + mainLine, + pronunciationLine, + 2600, + ) + + expect(highlighted).toBe(pronunciationLine) + expect(highlighted.tokens).toEqual([]) + }) + + it('keeps main lines line-level when they do not have real cue timing', () => { + const line = { + index: 0, + start: 1000, + end: 2200, + value: 'Youもっと強く 素早く 吹き飛ばせ', + tokens: [], + } + + const highlighted = buildHighlightedMainLine(line, 2600) + + expect(highlighted).toBe(line) + expect(highlighted.tokens).toEqual([]) + }) + + it('keeps auxiliary lines line-level when end time is missing and they lack cues', () => { + const mainLine = { + index: 0, + start: 1000, + end: null, + value: 'Hello there', + tokens: [], + } + const translationLine = { + index: 0, + start: 1000, + end: null, + value: 'Bonjour toi', + tokens: [], + } + + const highlighted = buildHighlightedAuxLine(mainLine, translationLine, 2400) + + expect(highlighted).toBe(translationLine) + expect(highlighted.tokens).toEqual([]) + }) + + it('keeps main lines line-level when end time is missing and they lack cues', () => { + const line = { + index: 0, + start: 1000, + end: null, + value: 'One more time', + tokens: [], + } + + const highlighted = buildHighlightedMainLine(line, 2400) + + expect(highlighted).toBe(line) + expect(highlighted.tokens).toEqual([]) + }) + + it('returns no layer match when the nearest line is too far in time', () => { + const mainLines = [ + { index: 0, start: 1000, end: 1800, value: 'Line A', tokens: [] }, + { index: 1, start: 2000, end: 2800, value: 'Line B', tokens: [] }, + ] + const layerLines = [ + { index: 0, start: 60000, end: 60800, value: 'Far line', tokens: [] }, + ] + + expect(findLayerLineIndexForMain(mainLines, layerLines, 1)).toBe(-1) + expect(resolveLayerLineForMain(mainLines, layerLines, 1).line).toBeNull() + }) + + it('converts a structured lyric track to LRC', () => { + const lrc = structuredLyricToLrc({ + lang: 'eng', + synced: true, + line: [ + { start: 18800, value: "We're no strangers to love" }, + { start: 22801, value: 'You know the rules and so do I' }, + ], + }) + + expect(lrc).toBe( + "[00:18.80] We're no strangers to love\n[00:22.80] You know the rules and so do I\n", + ) + }) + + it('returns empty text when no synced lyrics are available', () => { + const lrc = structuredLyricsToLrc( + [{ lang: 'eng', synced: false, line: [{ value: 'Unsynced line' }] }], + 'eng', + ) + + expect(lrc).toBe('') + }) + + it('reads preferred language from localStorage first', () => { + localStorage.setItem('locale', 'pt-BR') + expect(getPreferredLyricLanguage()).toBe('pt-BR') + }) + + it('builds karaoke lines from agent-based cueLine payload', () => { + const lines = buildKaraokeLines({ + lang: 'eng', + synced: true, + line: [{ start: 1000, end: 3000, value: 'Hello world' }], + agents: [ + { id: 'lead', role: 'main', name: 'Lead Vocal' }, + { id: 'backing', role: 'bg' }, + ], + cueLine: [ + { + index: 0, + start: 1000, + end: 3000, + value: 'Hello world', + agentId: 'lead', + cue: [{ start: 1000, end: 1500, value: 'Hello' }], + }, + { + index: 0, + start: 1000, + end: 3000, + value: 'Hello world', + agentId: 'backing', + cue: [{ start: 2000, end: 2500, value: 'world' }], + }, + ], + }) + + expect(lines).toEqual([ + { + agentId: 'lead', + agentName: 'Lead Vocal', + agentRole: 'main', + index: 0, + start: 1000, + end: 3000, + value: 'Hello world', + tokens: [ + { + start: 1000, + end: 1500, + value: 'Hello', + role: '', + agentId: 'lead', + agentName: 'Lead Vocal', + agentRole: 'main', + }, + { + start: 2000, + end: 2500, + value: 'world', + role: 'bg', + agentId: 'backing', + agentName: '', + agentRole: 'bg', + }, + ], + }, + ]) + }) + + it('builds grouped karaoke lines directly from cue lines', () => { + const agentLookup = new Map([ + ['lead', { id: 'lead', role: 'main', name: 'Lead Vocal' }], + ['backing', { id: 'backing', role: 'bg', name: '' }], + ]) + + const lines = buildKaraokeLinesFromCueLines( + [ + { + index: 0, + start: 1000, + end: 3000, + value: 'Hello world', + agentId: 'lead', + cue: [{ start: 1000, end: 1500, value: 'Hello' }], + }, + { + index: 0, + start: 1000, + end: 3000, + value: 'Hello world', + agentId: 'backing', + cue: [{ start: 2000, end: 2500, value: 'world' }], + }, + ], + [{ start: 1000, end: 3000, value: 'Hello world' }], + agentLookup, + ) + + expect(lines).toEqual([ + { + agentId: 'lead', + agentName: 'Lead Vocal', + agentRole: 'main', + index: 0, + start: 1000, + end: 3000, + value: 'Hello world', + tokens: [ + { + start: 1000, + end: 1500, + value: 'Hello', + role: '', + agentId: 'lead', + agentName: 'Lead Vocal', + agentRole: 'main', + }, + { + start: 2000, + end: 2500, + value: 'world', + role: 'bg', + agentId: 'backing', + agentName: '', + agentRole: 'bg', + }, + ], + }, + ]) + }) + + it('preserves cue byte offsets on karaoke tokens', () => { + const lines = buildKaraokeLines({ + lang: 'eng', + synced: true, + line: [{ start: 0, end: 2400, value: 'Oh love love me tonight' }], + cueLine: [ + { + index: 0, + start: 0, + end: 2400, + value: 'Oh love love me tonight', + cue: [ + { start: 0, end: 300, value: 'Oh', byteStart: 0, byteEnd: 1 }, + { start: 900, end: 1300, value: 'love', byteStart: 8, byteEnd: 11 }, + { start: 1300, end: 1600, value: 'me', byteStart: 13, byteEnd: 14 }, + { + start: 1600, + end: 2400, + value: 'tonight', + byteStart: 16, + byteEnd: 22, + }, + ], + }, + ], + }) + + expect( + lines[0].tokens.map((token) => [ + token.value, + token.byteStart, + token.byteEnd, + ]), + ).toEqual([ + ['Oh', 0, 1], + ['love', 8, 11], + ['me', 13, 14], + ['tonight', 16, 22], + ]) + }) + + it('preserves whitespace-only cues for exact byte-range rendering', () => { + const lines = buildKaraokeLines({ + lang: 'kor', + synced: true, + line: [{ start: 0, end: 900, value: '눈을 뜬 순간' }], + cueLine: [ + { + index: 0, + start: 0, + end: 900, + value: '눈을 뜬 순간', + cue: [ + { start: 0, end: 150, value: '눈을', byteStart: 0, byteEnd: 5 }, + { start: 150, end: 250, value: ' ', byteStart: 6, byteEnd: 6 }, + { start: 250, end: 450, value: '뜬', byteStart: 7, byteEnd: 9 }, + { start: 450, end: 550, value: ' ', byteStart: 10, byteEnd: 10 }, + { start: 550, end: 900, value: '순간', byteStart: 11, byteEnd: 16 }, + ], + }, + ], + }) + + expect( + lines[0].tokens.map((token) => [ + token.value, + token.byteStart, + token.byteEnd, + ]), + ).toEqual([ + ['눈을', 0, 5], + [' ', 6, 6], + ['뜬', 7, 9], + [' ', 10, 10], + ['순간', 11, 16], + ]) + }) + + it('maps UTF-8 byte offsets to string ranges for multibyte lyrics', () => { + const text = '눈을 뜬 순간' + + expect(utf8ByteOffsetToCodeUnitIndex(text, 0)).toBe(0) + expect(utf8ByteOffsetToCodeUnitIndex(text, 3)).toBe(1) + expect(utf8ByteOffsetToCodeUnitIndex(text, 7)).toBe(3) + expect(utf8ByteRangeToCodeUnitRange(text, 11, 16)).toEqual({ + start: 5, + end: 7, + text: '순간', + }) + }) + + it('falls back to legacy cueLine role values when agents are absent', () => { + const lines = buildKaraokeLines({ + lang: 'eng', + synced: true, + line: [{ start: 1000, end: 3000, value: 'Hello world' }], + cueLine: [ + { + index: 0, + start: 1000, + end: 3000, + value: 'Hello world', + role: 'bg', + cue: [{ start: 1000, end: 1500, value: 'Hello' }], + }, + ], + }) + + expect(lines[0].tokens[0].role).toBe('bg') + expect(lines[0].tokens[0].agentId).toBe('') + expect(lines[0].tokens[0].agentName).toBe('') + }) + + it('sorts token timing by start to keep playback stable', () => { + const lines = buildKaraokeLines({ + lang: 'eng', + synced: true, + line: [{ start: 1000, end: 3000, value: 'Hello world' }], + cueLine: [ + { + index: 0, + start: 1000, + end: 3000, + value: 'Hello world', + role: '', + cue: [ + { start: 2000, end: 2500, value: 'world' }, + { start: 1000, end: 1500, value: 'Hello' }, + ], + }, + ], + }) + + expect(lines[0].tokens.map((token) => token.value)).toEqual([ + 'Hello', + 'world', + ]) + }) + + it('keeps a single full-line token unchanged instead of expanding it synthetically', () => { + const lines = buildKaraokeLines({ + lang: 'ko-Latn', + synced: true, + line: [{ start: 1000, end: 2000, value: 'Da-la-lun, dun' }], + cueLine: [ + { + index: 0, + start: 1000, + end: 2000, + value: 'Da-la-lun, dun', + role: '', + cue: [{ start: 1000, end: 2000, value: 'Da-la-lun, dun' }], + }, + ], + }) + + expect(lines).toHaveLength(1) + expect(lines[0].tokens).toHaveLength(1) + expect(lines[0].tokens[0].value).toBe('Da-la-lun, dun') + + const firstWindow = resolveKaraokeTokenWindow(lines[0], 0) + + expect(firstWindow.start).toBeCloseTo(1000) + expect(firstWindow.end).toBeCloseTo(2000) + }) + + it('detects active line and token for karaoke timing', () => { + const state = getActiveKaraokeState( + [ + { + index: 0, + start: 1000, + end: 3000, + value: 'Hello world', + tokens: [ + { start: 1000, end: 1500, value: 'Hello', role: '' }, + { start: 2000, end: 2500, value: 'world', role: '' }, + ], + }, + { + index: 1, + start: 3500, + end: 5000, + value: 'Second line', + tokens: [], + }, + ], + 2200, + ) + + expect(state).toEqual({ lineIndex: 0, tokenIndex: 1 }) + }) + + it('resolves token window fallback boundaries from neighboring tokens', () => { + const line = { + start: 1000, + end: 3000, + value: 'Hello world', + tokens: [ + { start: 1200, value: 'Hello', role: '' }, + { start: 1800, value: 'world', role: '' }, + ], + } + + expect(resolveKaraokeTokenWindow(line, 0)).toEqual({ + start: 1200, + end: 1800, + }) + expect(resolveKaraokeTokenWindow(line, 1)).toEqual({ + start: 1800, + end: 3000, + }) + }) + + it('infers sequential token windows when token timings are missing', () => { + const line = { + start: 1000, + end: 2000, + value: 'A B C', + tokens: [ + { value: 'A', role: '' }, + { value: 'B', role: '' }, + { value: 'C', role: '' }, + ], + } + + const first = resolveKaraokeTokenWindow(line, 0) + const second = resolveKaraokeTokenWindow(line, 1) + const third = resolveKaraokeTokenWindow(line, 2) + + expect(first.start).toBeCloseTo(1000) + expect(first.end).toBeCloseTo(1333.3333333333333) + + expect(second.start).toBeCloseTo(1333.3333333333333) + expect(second.end).toBeCloseTo(1666.6666666666667) + + expect(third.start).toBeCloseTo(1666.6666666666667) + expect(third.end).toBeCloseTo(2000) + }) + + it('falls back to sequential windows when token timings are collapsed', () => { + const line = { + start: 1000, + end: 2000, + value: 'A B C', + tokens: [ + { start: 1000, end: 2000, value: 'A', role: '' }, + { start: 1000, end: 2000, value: 'B', role: '' }, + { start: 1000, end: 2000, value: 'C', role: '' }, + ], + } + + const first = resolveKaraokeTokenWindow(line, 0) + const second = resolveKaraokeTokenWindow(line, 1) + const third = resolveKaraokeTokenWindow(line, 2) + + expect(first.start).toBeCloseTo(1000) + expect(first.end).toBeCloseTo(1333.3333333333333) + expect(second.start).toBeCloseTo(1333.3333333333333) + expect(second.end).toBeCloseTo(1666.6666666666667) + expect(third.start).toBeCloseTo(1666.6666666666667) + expect(third.end).toBeCloseTo(2000) + }) + + it('keeps token selection stable near tight token boundaries', () => { + const state = getActiveKaraokeState( + [ + { + index: 0, + start: 1000, + end: 2000, + value: 'A B', + tokens: [ + { start: 1000, end: 1100, value: 'A', role: '' }, + { start: 1110, end: 1300, value: 'B', role: '' }, + ], + }, + ], + 1108, + ) + + expect(state).toEqual({ lineIndex: 0, tokenIndex: 0 }) + }) + + it('reports structured lyric content when token timing exists', () => { + expect( + hasStructuredLyricContent({ + cueLine: [{ cue: [{ start: 100, value: 'a' }] }], + }), + ).toBe(true) + }) + + it('detects when built karaoke lines have no usable timing', () => { + expect( + hasUsableKaraokeTiming([ + { index: 0, value: 'First line', tokens: [] }, + { index: 1, value: 'Second line', tokens: [] }, + ]), + ).toBe(false) + + expect( + hasUsableKaraokeTiming([ + { index: 0, start: 1000, value: 'Timed line', tokens: [] }, + ]), + ).toBe(true) + }) +}) diff --git a/ui/src/audioplayer/lyricsOverlayState.js b/ui/src/audioplayer/lyricsOverlayState.js new file mode 100644 index 000000000..e8ff0e0a8 --- /dev/null +++ b/ui/src/audioplayer/lyricsOverlayState.js @@ -0,0 +1,27 @@ +export const resolveLyricsOverlayState = ({ + karaokeVisiblePreference, + translationPreference, + pronunciationPreference, + hasKaraokeLyric, + hasTranslationLyric, + hasPronunciationLyric, +}) => ({ + karaokeVisible: karaokeVisiblePreference && hasKaraokeLyric, + showTranslation: translationPreference && hasTranslationLyric, + showPronunciation: + (pronunciationPreference == null + ? hasPronunciationLyric + : pronunciationPreference) && hasPronunciationLyric, +}) + +export const togglePronunciationPreference = ( + previousPreference, + hasPronunciationLyric, +) => { + if (!hasPronunciationLyric) { + return false + } + const currentPreference = + previousPreference == null ? hasPronunciationLyric : previousPreference + return !currentPreference +} diff --git a/ui/src/audioplayer/styles.js b/ui/src/audioplayer/styles.js index 30a14d4db..30ccf7afb 100644 --- a/ui/src/audioplayer/styles.js +++ b/ui/src/audioplayer/styles.js @@ -62,12 +62,30 @@ const useStyle = makeStyles( // Fix cover display when image is not square aspectRatio: '1/1', display: 'flex', + position: 'relative', + }, + '& .react-jinke-music-player-mobile .react-jinke-music-player-mobile-cover.nd-mobile-lyrics-active': + { + width: '100%', + maxWidth: 'none', + height: 'clamp(280px, 42vh, 460px)', + aspectRatio: 'auto', + borderRadius: 12, + border: 'none', + boxShadow: 'none', + background: 'transparent', + cursor: 'default', }, '& .react-jinke-music-player-mobile .react-jinke-music-player-mobile-cover img.cover': { animationDuration: (props) => !props.enableCoverAnimation && '0s', objectFit: 'contain', // Fix cover display when image is not square }, + '& .react-jinke-music-player-mobile .react-jinke-music-player-mobile-cover.nd-mobile-lyrics-active img.cover': + { + opacity: 0, + pointerEvents: 'none', + }, // Hide old singer display '& .react-jinke-music-player-mobile .react-jinke-music-player-mobile-singer': { diff --git a/ui/src/reducers/playerReducer.js b/ui/src/reducers/playerReducer.js index d6ab7484b..449dcd294 100644 --- a/ui/src/reducers/playerReducer.js +++ b/ui/src/reducers/playerReducer.js @@ -7,6 +7,7 @@ import { PLAYER_CURRENT, PLAYER_PLAY_NEXT, PLAYER_PLAY_TRACKS, + PLAYER_UPDATE_LYRIC, PLAYER_SET_TRACK, PLAYER_SET_VOLUME, PLAYER_SYNC_QUEUE, @@ -60,21 +61,25 @@ const mapToAudioLists = (item) => { let lyricText = '' if (lyrics) { - const structured = JSON.parse(lyrics) - for (const structuredLyric of structured) { - if (structuredLyric.synced) { - for (const line of structuredLyric.line) { - let time = Math.floor(line.start / 10) - const ms = time % 100 - time = Math.floor(time / 100) - const sec = time % 60 - time = Math.floor(time / 60) - const min = time % 60 + try { + const structured = JSON.parse(lyrics) + for (const structuredLyric of structured) { + if (structuredLyric.synced) { + for (const line of structuredLyric.line) { + let time = Math.floor(line.start / 10) + const ms = time % 100 + time = Math.floor(time / 100) + const sec = time % 60 + time = Math.floor(time / 60) + const min = time % 60 - ms.toString() - lyricText += `[${pad(min)}:${pad(sec)}.${pad(ms)}] ${line.value}\n` + ms.toString() + lyricText += `[${pad(min)}:${pad(sec)}.${pad(ms)}] ${line.value}\n` + } } } + } catch { + lyricText = '' } } @@ -208,6 +213,45 @@ const reduceMode = (state, { data: { mode } }) => { } } +const reduceUpdateLyric = (state, { data: { trackId, lyric } }) => { + if (!trackId) { + return state + } + + let changed = false + const queue = state.queue.map((item) => { + if (item.trackId !== trackId) { + return item + } + if (item.lyric === lyric) { + return item + } + changed = true + return { + ...item, + lyric, + } + }) + + if (!changed) { + return state + } + + const current = + state.current?.trackId === trackId + ? { + ...state.current, + lyric, + } + : state.current + + return { + ...state, + queue, + current, + } +} + export const playerReducer = (previousState = initialState, payload) => { const { type } = payload switch (type) { @@ -245,6 +289,8 @@ export const playerReducer = (previousState = initialState, payload) => { previousState.savedPlayIndex >= 0 ? previousState.savedPlayIndex : 0, } } + case PLAYER_UPDATE_LYRIC: + return reduceUpdateLyric(previousState, payload) default: return previousState } diff --git a/ui/src/reducers/playerReducer.test.js b/ui/src/reducers/playerReducer.test.js index 110ce8c53..43f24ec55 100644 --- a/ui/src/reducers/playerReducer.test.js +++ b/ui/src/reducers/playerReducer.test.js @@ -1,11 +1,24 @@ -import { describe, it, expect } from 'vitest' +import { describe, expect, it, vi } from 'vitest' import { playerReducer } from './playerReducer' import { - PLAYER_SYNC_QUEUE, PLAYER_CURRENT, PLAYER_REFRESH_QUEUE, + PLAYER_SET_TRACK, + PLAYER_SYNC_QUEUE, + PLAYER_UPDATE_LYRIC, } from '../actions' +vi.mock('uuid', () => ({ + v4: () => 'test-uuid', +})) + +vi.mock('../subsonic', () => ({ + default: { + streamUrl: vi.fn((id) => `/rest/stream?id=${id}`), + getCoverArtUrl: vi.fn(() => '/rest/getCoverArt?id=test'), + }, +})) + describe('playerReducer', () => { describe('pending track selection survives SYNC_QUEUE and premature CURRENT', () => { // Simulates the real sequence when clicking a new song while one is playing: @@ -54,8 +67,6 @@ describe('playerReducer', () => { }) it('CURRENT for old track preserves pending playIndex', () => { - // After SYNC_QUEUE, queue has new UUIDs. The old track's UUID (zzz) - // is at index 2, but playIndex is 0. This is a premature callback. const stateAfterSync = { ...stateAfterPlayTracks, queue: [ @@ -71,7 +82,7 @@ describe('playerReducer', () => { const result = playerReducer(stateAfterSync, action) expect(result.playIndex).toBe(0) expect(result.clear).toBe(true) - expect(result.savedPlayIndex).toBe(2) // preserved from before + expect(result.savedPlayIndex).toBe(2) }) it('CURRENT for correct track consumes pending playIndex', () => { @@ -83,7 +94,6 @@ describe('playerReducer', () => { { trackId: 's3', uuid: 'zzz', name: 'Song 3' }, ], } - // Player switched to Song 1 (uuid 'xxx', index 0 == playIndex) const action = { type: PLAYER_CURRENT, data: { uuid: 'xxx', name: 'Song 1', volume: 1 }, @@ -224,4 +234,80 @@ describe('playerReducer', () => { expect(result.playIndex).toBe(0) }) }) + + it('maps embedded synced lyrics to LRC text', () => { + const lyrics = JSON.stringify([ + { + lang: 'eng', + synced: true, + line: [{ start: 1000, value: 'Line one' }], + }, + { + lang: 'eng', + synced: false, + line: [{ value: 'Unsynced line' }], + }, + ]) + + const state = playerReducer(undefined, { + type: PLAYER_SET_TRACK, + data: { + id: 'song-1', + title: 'Test Song', + artist: 'Test Artist', + album: 'Test Album', + duration: 60, + lyrics, + }, + }) + + expect(state.queue).toHaveLength(1) + expect(state.queue[0].lyric).toBe('[00:01.00] Line one\n') + }) + + it('updates queue lyric by track id', () => { + const initial = playerReducer(undefined, { + type: PLAYER_SET_TRACK, + data: { + id: 'song-1', + title: 'Test Song', + artist: 'Test Artist', + album: 'Test Album', + duration: 60, + }, + }) + + const updated = playerReducer(initial, { + type: PLAYER_UPDATE_LYRIC, + data: { + trackId: 'song-1', + lyric: '[00:01.00] Updated lyric\n', + }, + }) + + expect(updated.queue[0].lyric).toBe('[00:01.00] Updated lyric\n') + }) + + it('returns same state when lyric update does not match any track', () => { + const initial = playerReducer(undefined, { + type: PLAYER_SET_TRACK, + data: { + id: 'song-1', + title: 'Test Song', + artist: 'Test Artist', + album: 'Test Album', + duration: 60, + }, + }) + + const updated = playerReducer(initial, { + type: PLAYER_UPDATE_LYRIC, + data: { + trackId: 'missing-track', + lyric: '[00:01.00] Updated lyric\n', + }, + }) + + expect(updated).toBe(initial) + }) }) diff --git a/ui/src/subsonic/index.js b/ui/src/subsonic/index.js index 3579619aa..47ebabe99 100644 --- a/ui/src/subsonic/index.js +++ b/ui/src/subsonic/index.js @@ -1,5 +1,5 @@ -import { baseUrl } from '../utils' import { httpClient } from '../dataProvider' +import { baseUrl } from '../utils' const url = (command, id, options) => { const username = localStorage.getItem('username') @@ -120,6 +120,10 @@ const getTopSongs = (artist, count = 50) => { return httpClient(url('getTopSongs', null, { artist, count })) } +const getLyricsBySongId = (id) => { + return httpClient(url('getLyricsBySongId', id, { enhanced: true })) +} + const streamUrl = (id, options) => { return baseUrl( url('stream', id, { @@ -149,4 +153,5 @@ export default { getArtistInfo, getTopSongs, getSimilarSongs2, + getLyricsBySongId, } diff --git a/ui/src/subsonic/index.test.js b/ui/src/subsonic/index.test.js index a750694f4..6bd5e08ee 100644 --- a/ui/src/subsonic/index.test.js +++ b/ui/src/subsonic/index.test.js @@ -1,7 +1,13 @@ import { vi } from 'vitest' -import config from '../config' +import { httpClient } from '../dataProvider' import subsonic from './index' +vi.mock('../dataProvider', () => ({ + httpClient: vi.fn(() => Promise.resolve({})), +})) + +const COVER_ART_SIZE = 600 + describe('getCoverArtUrl', () => { beforeEach(() => { // Mock window.location @@ -31,11 +37,7 @@ describe('getCoverArtUrl', () => { updatedAt: '2023-01-01T00:00:00Z', } - const url = subsonic.getCoverArtUrl( - playlistRecord, - config.uiCoverArtSize, - true, - ) + const url = subsonic.getCoverArtUrl(playlistRecord, COVER_ART_SIZE, true) expect(url).toContain('pl-playlist-123') expect(url).toContain('size=600') @@ -49,11 +51,7 @@ describe('getCoverArtUrl', () => { sync: true, } - const url = subsonic.getCoverArtUrl( - playlistRecord, - config.uiCoverArtSize, - true, - ) + const url = subsonic.getCoverArtUrl(playlistRecord, COVER_ART_SIZE, true) expect(url).toContain('pl-playlist-123') expect(url).toContain('size=600') @@ -68,11 +66,7 @@ describe('getCoverArtUrl', () => { updatedAt: '2023-01-01T00:00:00Z', } - const url = subsonic.getCoverArtUrl( - albumRecord, - config.uiCoverArtSize, - true, - ) + const url = subsonic.getCoverArtUrl(albumRecord, COVER_ART_SIZE, true) expect(url).toContain('al-album-123') expect(url).toContain('size=600') @@ -86,7 +80,7 @@ describe('getCoverArtUrl', () => { updatedAt: '2023-01-01T00:00:00Z', } - const url = subsonic.getCoverArtUrl(songRecord, config.uiCoverArtSize, true) + const url = subsonic.getCoverArtUrl(songRecord, COVER_ART_SIZE, true) expect(url).toContain('mf-song-123') expect(url).toContain('size=600') @@ -99,11 +93,7 @@ describe('getCoverArtUrl', () => { updatedAt: '2023-01-01T00:00:00Z', } - const url = subsonic.getCoverArtUrl( - artistRecord, - config.uiCoverArtSize, - true, - ) + const url = subsonic.getCoverArtUrl(artistRecord, COVER_ART_SIZE, true) expect(url).toContain('ar-artist-123') expect(url).toContain('size=600') @@ -194,3 +184,30 @@ describe('getAvatarUrl', () => { expect(url).toContain('username=john') }) }) + +describe('getLyricsBySongId', () => { + beforeEach(() => { + vi.clearAllMocks() + const localStorageMock = { + getItem: vi.fn((key) => { + const values = { + username: 'testuser', + 'subsonic-token': 'testtoken', + 'subsonic-salt': 'testsalt', + } + return values[key] || null + }), + } + Object.defineProperty(window, 'localStorage', { value: localStorageMock }) + }) + + it('calls the getLyricsBySongId endpoint with enhanced=true', async () => { + await subsonic.getLyricsBySongId('song-1') + + expect(httpClient).toHaveBeenCalledTimes(1) + const calledUrl = httpClient.mock.calls[0][0] + expect(calledUrl).toContain('/rest/getLyricsBySongId?') + expect(calledUrl).toContain('id=song-1') + expect(calledUrl).toContain('enhanced=true') + }) +})