mirror of
https://github.com/navidrome/navidrome.git
synced 2026-05-03 06:51:16 +00:00
feat: add TTML lyrics support with token-level karaoke and translation/pronunciation layers
Add a full TTML (Timed Text Markup Language) sidecar lyrics parser that extracts word/syllable-level timing from <span> elements, plus translation and pronunciation (transliteration) tracks from Apple Music TTML metadata sections. Backend changes: - TTML parser (core/lyrics/ttml.go) with support for all TTML time formats, nested timing contexts, and bare decimal second offsets - Translation/pronunciation tracks resolved via key-based metadata linking - Line timing hydration from token-level start/end values - 'kind' field added to Lyrics model and StructuredLyric API response (main/translation/pronunciation) - 'tokenLine' array in API response for word-level timing data - UTF-8 BOM and UTF-16 LE encoding support for TTML files - Fix for ambiguous time resolution in pronunciation spans (pre-1-minute) Frontend changes: - KaraokeLyricsOverlay rewritten with scrollable multi-line layout, word-level wipe highlighting with eased alpha transitions, rAF-driven playback clock with drift correction - Inline translation (above) and pronunciation (below) each main line, with smart filtering to hide redundant lines (same normalized text) - TR/PR toggle buttons and layer selection via selectLyricLayers() - Click-to-seek: click any lyric line to jump to that position - Customization popover with font-size sliders and color presets for each line type (TR/Default/PR), persisted to localStorage - Smooth font-size transition between active and inactive lines - Resizable overlay height via drag handle - lyrics.js: resolveKaraokeTokenWindow, buildSyntheticWordTokens, findLayerLineIndexForMain, token sorting, collapsed timing detection API extension (non-breaking, additive): - tokenLine[].token[] provides per-word start/end timing (ms) - tokenLine[].index maps back to the corresponding line[] entry - kind field: 'main', 'translation', 'pronunciation' - Clients ignoring tokenLine/kind continue to work unchanged
This commit is contained in:
parent
ccee33f474
commit
c77e0de976
@ -52,6 +52,7 @@ A share of the revenue helps fund the development of Navidrome at no additional
|
||||
- **Multi-platform**, runs on macOS, Linux and Windows. **Docker** images are also provided
|
||||
- Ready to use binaries for all major platforms, including **Raspberry Pi**
|
||||
- Automatically **monitors your library** for changes, importing new files and reloading new metadata
|
||||
- Supports synchronized lyrics from sidecar **.lrc** and **.ttml** files (via `lyricspriority`)
|
||||
- **Themeable**, modern and responsive **Web interface** based on [Material UI](https://material-ui.com)
|
||||
- **Compatible** with all Subsonic/Madsonic/Airsonic [clients](https://www.navidrome.org/docs/overview/#apps)
|
||||
- **Transcoding** on the fly. Can be set per user/player. **Opus encoding is supported**
|
||||
|
||||
@ -677,7 +677,7 @@ func setViperDefaults() {
|
||||
viper.SetDefault("coverartquality", 75)
|
||||
viper.SetDefault("artistartpriority", "artist.*, album/artist.*, external")
|
||||
viper.SetDefault("discartpriority", "disc*.*, cd*.*, cover.*, folder.*, front.*, discsubtitle, embedded")
|
||||
viper.SetDefault("lyricspriority", ".lrc,.txt,embedded")
|
||||
viper.SetDefault("lyricspriority", ".lrc,.ttml,.txt,embedded")
|
||||
viper.SetDefault("enablegravatar", false)
|
||||
viper.SetDefault("enablefavourites", true)
|
||||
viper.SetDefault("enablestarrating", true)
|
||||
|
||||
@ -44,6 +44,35 @@ var _ = Describe("sources", func() {
|
||||
},
|
||||
}
|
||||
|
||||
ttmlLyrics := model.LyricList{
|
||||
model.Lyrics{
|
||||
Kind: "main",
|
||||
Lang: "eng",
|
||||
Line: []model.Line{
|
||||
{
|
||||
Start: gg.P(int64(18800)),
|
||||
Value: "We're no strangers to love",
|
||||
},
|
||||
{
|
||||
Start: gg.P(int64(22800)),
|
||||
Value: "You know the rules and so do I",
|
||||
},
|
||||
},
|
||||
Synced: true,
|
||||
},
|
||||
model.Lyrics{
|
||||
Kind: "main",
|
||||
Lang: "por",
|
||||
Line: []model.Line{
|
||||
{
|
||||
Start: gg.P(int64(18800)),
|
||||
Value: "Nao somos estranhos ao amor",
|
||||
},
|
||||
},
|
||||
Synced: true,
|
||||
},
|
||||
}
|
||||
|
||||
unsyncedLyrics := model.LyricList{
|
||||
model.Lyrics{
|
||||
Lang: "xxx",
|
||||
@ -80,7 +109,8 @@ var _ = Describe("sources", func() {
|
||||
},
|
||||
Entry("embedded > lrc > txt", "embedded,.lrc,.txt", embeddedLyrics),
|
||||
Entry("lrc > embedded > txt", ".lrc,embedded,.txt", syncedLyrics),
|
||||
Entry("txt > lrc > embedded", ".txt,.lrc,embedded", unsyncedLyrics))
|
||||
Entry("txt > lrc > embedded", ".txt,.lrc,embedded", unsyncedLyrics),
|
||||
Entry("ttml > lrc > embedded", ".ttml,.lrc,embedded", ttmlLyrics))
|
||||
|
||||
Context("Errors", func() {
|
||||
var RegularUserContext = XContext
|
||||
|
||||
@ -5,6 +5,7 @@ import (
|
||||
"errors"
|
||||
"os"
|
||||
"path"
|
||||
"strings"
|
||||
|
||||
"github.com/navidrome/navidrome/log"
|
||||
"github.com/navidrome/navidrome/model"
|
||||
@ -36,18 +37,31 @@ func fromExternalFile(ctx context.Context, mf *model.MediaFile, suffix string) (
|
||||
return nil, err
|
||||
}
|
||||
|
||||
lyrics, err := model.ToLyrics("xxx", string(contents))
|
||||
if err != nil {
|
||||
log.Error(ctx, "error parsing lyric external file", "path", externalLyric, err)
|
||||
return nil, err
|
||||
} else if lyrics == nil {
|
||||
var list model.LyricList
|
||||
if strings.EqualFold(suffix, ".ttml") {
|
||||
list, err = parseTTML(contents)
|
||||
if err != nil {
|
||||
log.Error(ctx, "error parsing ttml external file", "path", externalLyric, err)
|
||||
return nil, err
|
||||
}
|
||||
} else {
|
||||
lyrics, err := model.ToLyrics("xxx", string(contents))
|
||||
if err != nil {
|
||||
log.Error(ctx, "error parsing lyric external file", "path", externalLyric, err)
|
||||
return nil, err
|
||||
}
|
||||
if lyrics != nil {
|
||||
list = model.LyricList{*lyrics}
|
||||
}
|
||||
}
|
||||
|
||||
if len(list) == 0 {
|
||||
log.Trace(ctx, "empty lyrics from external file", "path", externalLyric)
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
log.Trace(ctx, "retrieved lyrics from external file", "path", externalLyric)
|
||||
|
||||
return model.LyricList{*lyrics}, nil
|
||||
return list, nil
|
||||
}
|
||||
|
||||
// fromPlugin attempts to load lyrics from a plugin with the given name.
|
||||
|
||||
@ -109,6 +109,41 @@ var _ = Describe("sources", func() {
|
||||
}))
|
||||
})
|
||||
|
||||
It("should return synchronized multilingual lyrics from a TTML file", func() {
|
||||
mf := model.MediaFile{Path: "tests/fixtures/test.mp3"}
|
||||
lyrics, err := fromExternalFile(ctx, &mf, ".ttml")
|
||||
|
||||
Expect(err).To(BeNil())
|
||||
Expect(lyrics).To(Equal(model.LyricList{
|
||||
{
|
||||
Kind: "main",
|
||||
Lang: "eng",
|
||||
Line: []model.Line{
|
||||
{
|
||||
Start: gg.P(int64(18800)),
|
||||
Value: "We're no strangers to love",
|
||||
},
|
||||
{
|
||||
Start: gg.P(int64(22800)),
|
||||
Value: "You know the rules and so do I",
|
||||
},
|
||||
},
|
||||
Synced: true,
|
||||
},
|
||||
{
|
||||
Kind: "main",
|
||||
Lang: "por",
|
||||
Line: []model.Line{
|
||||
{
|
||||
Start: gg.P(int64(18800)),
|
||||
Value: "Nao somos estranhos ao amor",
|
||||
},
|
||||
},
|
||||
Synced: true,
|
||||
},
|
||||
}))
|
||||
})
|
||||
|
||||
It("should handle LRC files with UTF-8 BOM marker (issue #4631)", func() {
|
||||
// The function looks for <basePath-without-ext><suffix>, so we need to pass
|
||||
// a MediaFile with .mp3 path and look for .lrc suffix
|
||||
@ -142,5 +177,33 @@ var _ = Describe("sources", func() {
|
||||
Expect(lyrics[0].Line[1].Start).To(Equal(gg.P(int64(22801))))
|
||||
Expect(lyrics[0].Line[1].Value).To(Equal("You know the rules and so do I"))
|
||||
})
|
||||
|
||||
It("should handle TTML files with UTF-8 BOM marker", func() {
|
||||
mf := model.MediaFile{Path: "tests/fixtures/bom-test.mp3"}
|
||||
lyrics, err := fromExternalFile(ctx, &mf, ".ttml")
|
||||
|
||||
Expect(err).To(BeNil())
|
||||
Expect(lyrics).To(HaveLen(1))
|
||||
Expect(lyrics[0].Kind).To(Equal("main"))
|
||||
Expect(lyrics[0].Synced).To(BeTrue())
|
||||
Expect(lyrics[0].Line).To(HaveLen(1))
|
||||
Expect(lyrics[0].Line[0].Start).To(Equal(gg.P(int64(0))))
|
||||
Expect(lyrics[0].Line[0].Value).To(Equal("BOM test line"))
|
||||
})
|
||||
|
||||
It("should handle UTF-16 LE encoded TTML files", func() {
|
||||
mf := model.MediaFile{Path: "tests/fixtures/bom-utf16-test.mp3"}
|
||||
lyrics, err := fromExternalFile(ctx, &mf, ".ttml")
|
||||
|
||||
Expect(err).To(BeNil())
|
||||
Expect(lyrics).To(HaveLen(1))
|
||||
Expect(lyrics[0].Kind).To(Equal("main"))
|
||||
Expect(lyrics[0].Synced).To(BeTrue())
|
||||
Expect(lyrics[0].Line).To(HaveLen(2))
|
||||
Expect(lyrics[0].Line[0].Start).To(Equal(gg.P(int64(18800))))
|
||||
Expect(lyrics[0].Line[0].Value).To(Equal("UTF16 line one"))
|
||||
Expect(lyrics[0].Line[1].Start).To(Equal(gg.P(int64(22801))))
|
||||
Expect(lyrics[0].Line[1].Value).To(Equal("UTF16 line two"))
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
92
core/lyrics/sources_ttml_test.go
Normal file
92
core/lyrics/sources_ttml_test.go
Normal file
@ -0,0 +1,92 @@
|
||||
package lyrics
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/navidrome/navidrome/model"
|
||||
)
|
||||
|
||||
func TestFromExternalFileTTML(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
mf := model.MediaFile{Path: fixturePath("test.mp3")}
|
||||
|
||||
lyrics, err := fromExternalFile(ctx, &mf, ".ttml")
|
||||
if err != nil {
|
||||
t.Fatalf("fromExternalFile returned error: %v", err)
|
||||
}
|
||||
if len(lyrics) != 2 {
|
||||
t.Fatalf("expected 2 lyric tracks, got %d", len(lyrics))
|
||||
}
|
||||
if lyrics[0].Lang != "eng" {
|
||||
t.Fatalf("expected first language 'eng', got %q", lyrics[0].Lang)
|
||||
}
|
||||
if len(lyrics[0].Line) != 2 {
|
||||
t.Fatalf("expected 2 english lines, got %d", len(lyrics[0].Line))
|
||||
}
|
||||
if lyrics[0].Line[0].Start == nil || *lyrics[0].Line[0].Start != 18800 {
|
||||
t.Fatalf("expected first english line start to be 18800, got %v", lyrics[0].Line[0].Start)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFromExternalFileTTMLWithUTF8BOM(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
mf := model.MediaFile{Path: fixturePath("bom-test.ttml")}
|
||||
|
||||
lyrics, err := fromExternalFile(ctx, &mf, ".ttml")
|
||||
if err != nil {
|
||||
t.Fatalf("fromExternalFile returned error: %v", err)
|
||||
}
|
||||
if len(lyrics) != 1 {
|
||||
t.Fatalf("expected 1 lyric track, got %d", len(lyrics))
|
||||
}
|
||||
if !lyrics[0].Synced {
|
||||
t.Fatal("expected BOM TTML lyrics to be synced")
|
||||
}
|
||||
if len(lyrics[0].Line) != 1 {
|
||||
t.Fatalf("expected 1 lyric line, got %d", len(lyrics[0].Line))
|
||||
}
|
||||
if lyrics[0].Line[0].Start == nil || *lyrics[0].Line[0].Start != 0 {
|
||||
t.Fatalf("expected first line start 0, got %v", lyrics[0].Line[0].Start)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFromExternalFileTTMLUTF16(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
mf := model.MediaFile{Path: fixturePath("bom-utf16-test.ttml")}
|
||||
|
||||
lyrics, err := fromExternalFile(ctx, &mf, ".ttml")
|
||||
if err != nil {
|
||||
t.Fatalf("fromExternalFile returned error: %v", err)
|
||||
}
|
||||
if len(lyrics) != 1 {
|
||||
t.Fatalf("expected 1 lyric track, got %d", len(lyrics))
|
||||
}
|
||||
if !lyrics[0].Synced {
|
||||
t.Fatal("expected UTF16 TTML lyrics to be synced")
|
||||
}
|
||||
if len(lyrics[0].Line) != 2 {
|
||||
t.Fatalf("expected 2 lyric lines, got %d", len(lyrics[0].Line))
|
||||
}
|
||||
if lyrics[0].Line[0].Start == nil || *lyrics[0].Line[0].Start != 18800 {
|
||||
t.Fatalf("expected first line start 18800, got %v", lyrics[0].Line[0].Start)
|
||||
}
|
||||
if lyrics[0].Line[1].Start == nil || *lyrics[0].Line[1].Start != 22801 {
|
||||
t.Fatalf("expected second line start 22801, got %v", lyrics[0].Line[1].Start)
|
||||
}
|
||||
}
|
||||
|
||||
func fixturePath(name string) string {
|
||||
candidates := []string{
|
||||
filepath.Join("tests", "fixtures", name),
|
||||
filepath.Join("..", "..", "tests", "fixtures", name),
|
||||
}
|
||||
for _, candidate := range candidates {
|
||||
if _, err := os.Stat(candidate); err == nil {
|
||||
return candidate
|
||||
}
|
||||
}
|
||||
return filepath.Join("tests", "fixtures", name)
|
||||
}
|
||||
886
core/lyrics/ttml.go
Normal file
886
core/lyrics/ttml.go
Normal file
@ -0,0 +1,886 @@
|
||||
package lyrics
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/xml"
|
||||
"errors"
|
||||
"io"
|
||||
"math"
|
||||
"regexp"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/navidrome/navidrome/log"
|
||||
"github.com/navidrome/navidrome/model"
|
||||
"github.com/navidrome/navidrome/utils/str"
|
||||
)
|
||||
|
||||
const (
|
||||
defaultTTMLFrameRate = 30.0
|
||||
defaultTTMLSubFrameRate = 1.0
|
||||
defaultTTMLTickRate = 1.0
|
||||
|
||||
ttmlLyricKindMain = "main"
|
||||
ttmlLyricKindTranslation = "translation"
|
||||
ttmlLyricKindPronunciation = "pronunciation"
|
||||
)
|
||||
|
||||
var offsetTimeRegex = regexp.MustCompile(`^([0-9]+(?:\.[0-9]+)?)(h|m|s|ms|f|t)$`)
|
||||
var xmlEncodingRegex = regexp.MustCompile(`(?i)<\?xml([^>]*?)encoding\s*=\s*["'][^"']+["']([^>]*)\?>`)
|
||||
|
||||
type ttmlTimeKind int
|
||||
|
||||
const (
|
||||
ttmlTimeAbsolute ttmlTimeKind = iota
|
||||
ttmlTimeOffset
|
||||
ttmlTimeAmbiguous
|
||||
)
|
||||
|
||||
type ttmlTimingParams struct {
|
||||
frameRate float64
|
||||
subFrameRate float64
|
||||
tickRate float64
|
||||
}
|
||||
|
||||
type ttmlTimingContext struct {
|
||||
lang string
|
||||
role string
|
||||
begin int64
|
||||
hasBegin bool
|
||||
end int64
|
||||
hasEnd bool
|
||||
invalid bool
|
||||
}
|
||||
|
||||
type ttmlLineRef struct {
|
||||
order int
|
||||
line model.Line
|
||||
}
|
||||
|
||||
type ttmlMetadataEntry struct {
|
||||
key string
|
||||
line model.Line
|
||||
seq int
|
||||
}
|
||||
|
||||
type ttmlResolvedMetadataLine struct {
|
||||
order int
|
||||
seq int
|
||||
line model.Line
|
||||
}
|
||||
|
||||
type ttmlParser struct {
|
||||
decoder *xml.Decoder
|
||||
params ttmlTimingParams
|
||||
|
||||
mainLangOrder []string
|
||||
mainLinesByLang map[string][]model.Line
|
||||
|
||||
mainLineRefsByKey map[string]ttmlLineRef
|
||||
mainLineOrder int
|
||||
|
||||
translationLangOrder []string
|
||||
translationEntriesByLg map[string][]ttmlMetadataEntry
|
||||
|
||||
pronunciationLangOrder []string
|
||||
pronunciationEntriesByLg map[string][]ttmlMetadataEntry
|
||||
|
||||
metadataSeq int
|
||||
}
|
||||
|
||||
func parseTTML(contents []byte) (model.LyricList, error) {
|
||||
contents = xmlEncodingRegex.ReplaceAll(contents, []byte(`<?xml$1encoding="UTF-8"$2?>`))
|
||||
|
||||
p := ttmlParser{
|
||||
decoder: xml.NewDecoder(bytes.NewReader(contents)),
|
||||
params: ttmlTimingParams{
|
||||
frameRate: defaultTTMLFrameRate,
|
||||
subFrameRate: defaultTTMLSubFrameRate,
|
||||
tickRate: defaultTTMLTickRate,
|
||||
},
|
||||
mainLinesByLang: make(map[string][]model.Line),
|
||||
mainLineRefsByKey: make(map[string]ttmlLineRef),
|
||||
translationEntriesByLg: make(map[string][]ttmlMetadataEntry),
|
||||
pronunciationEntriesByLg: make(map[string][]ttmlMetadataEntry),
|
||||
}
|
||||
|
||||
root := ttmlTimingContext{lang: "xxx"}
|
||||
|
||||
for {
|
||||
token, err := p.decoder.Token()
|
||||
if errors.Is(err, io.EOF) {
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
start, ok := token.(xml.StartElement)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
|
||||
if err := p.parseElement(start, root); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
return p.toLyricList(), nil
|
||||
}
|
||||
|
||||
func (p *ttmlParser) parseElement(start xml.StartElement, parent ttmlTimingContext) error {
|
||||
local := strings.ToLower(start.Name.Local)
|
||||
if local == "tt" {
|
||||
p.updateTimingParams(start.Attr)
|
||||
}
|
||||
|
||||
switch local {
|
||||
case "translation":
|
||||
return p.parseMetadataTrack(start, parent, ttmlLyricKindTranslation)
|
||||
case "transliteration":
|
||||
return p.parseMetadataTrack(start, parent, ttmlLyricKindPronunciation)
|
||||
}
|
||||
|
||||
ctx := p.childContext(start.Attr, parent)
|
||||
if local == "p" {
|
||||
lineText, tokens, err := p.parseParagraph(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if ctx.invalid || lineText == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
parsedLine := model.Line{Value: lineText}
|
||||
if ctx.hasBegin {
|
||||
startMs := ctx.begin
|
||||
parsedLine.Start = &startMs
|
||||
}
|
||||
if ctx.hasEnd {
|
||||
endMs := ctx.end
|
||||
parsedLine.End = &endMs
|
||||
}
|
||||
if len(tokens) > 0 {
|
||||
parsedLine.Token = tokens
|
||||
}
|
||||
parsedLine = hydrateLineTimingFromTokens(parsedLine)
|
||||
|
||||
lineKey, _ := attrValue(start.Attr, "key")
|
||||
p.addMainLine(ctx.lang, lineKey, parsedLine)
|
||||
return nil
|
||||
}
|
||||
|
||||
for {
|
||||
token, err := p.decoder.Token()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
switch t := token.(type) {
|
||||
case xml.StartElement:
|
||||
nextParent := ctx
|
||||
if ctx.invalid {
|
||||
// Best effort: ignore invalid timing in container elements, and
|
||||
// continue traversing descendants with parent context.
|
||||
nextParent = parent
|
||||
}
|
||||
if err := p.parseElement(t, nextParent); err != nil {
|
||||
return err
|
||||
}
|
||||
case xml.EndElement:
|
||||
if strings.EqualFold(t.Name.Local, start.Name.Local) {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (p *ttmlParser) parseMetadataTrack(start xml.StartElement, parent ttmlTimingContext, kind string) error {
|
||||
ctx := p.childContext(start.Attr, parent)
|
||||
lang := normalizeTTMLLang(ctx.lang)
|
||||
|
||||
for {
|
||||
token, err := p.decoder.Token()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
switch t := token.(type) {
|
||||
case xml.StartElement:
|
||||
if strings.EqualFold(t.Name.Local, "text") {
|
||||
entry, ok, err := p.parseMetadataText(t, ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if ok {
|
||||
p.addMetadataEntry(kind, lang, entry)
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
nextParent := ctx
|
||||
if ctx.invalid {
|
||||
nextParent = parent
|
||||
}
|
||||
if err := p.parseElement(t, nextParent); err != nil {
|
||||
return err
|
||||
}
|
||||
case xml.EndElement:
|
||||
if strings.EqualFold(t.Name.Local, start.Name.Local) {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (p *ttmlParser) parseMetadataText(start xml.StartElement, parent ttmlTimingContext) (ttmlMetadataEntry, bool, error) {
|
||||
forKey, hasFor := attrValue(start.Attr, "for")
|
||||
forKey = strings.TrimSpace(forKey)
|
||||
|
||||
value, tokens, err := p.parseInlineElement(start, parent)
|
||||
if err != nil {
|
||||
return ttmlMetadataEntry{}, false, err
|
||||
}
|
||||
if !hasFor || forKey == "" {
|
||||
return ttmlMetadataEntry{}, false, nil
|
||||
}
|
||||
|
||||
ctx := p.childContext(start.Attr, parent)
|
||||
if ctx.invalid {
|
||||
return ttmlMetadataEntry{}, false, nil
|
||||
}
|
||||
|
||||
line := model.Line{Value: sanitizeTTMLText(value)}
|
||||
if ctx.hasBegin {
|
||||
startMs := ctx.begin
|
||||
line.Start = &startMs
|
||||
}
|
||||
if ctx.hasEnd {
|
||||
endMs := ctx.end
|
||||
line.End = &endMs
|
||||
}
|
||||
if len(tokens) > 0 {
|
||||
line.Token = tokens
|
||||
}
|
||||
line = hydrateLineTimingFromTokens(line)
|
||||
|
||||
if line.Value == "" && len(line.Token) == 0 {
|
||||
return ttmlMetadataEntry{}, false, nil
|
||||
}
|
||||
|
||||
return ttmlMetadataEntry{key: forKey, line: line}, true, nil
|
||||
}
|
||||
|
||||
func (p *ttmlParser) parseParagraph(parent ttmlTimingContext) (string, []model.Token, error) {
|
||||
var text strings.Builder
|
||||
var tokens []model.Token
|
||||
|
||||
for {
|
||||
token, err := p.decoder.Token()
|
||||
if err != nil {
|
||||
return "", nil, err
|
||||
}
|
||||
|
||||
switch t := token.(type) {
|
||||
case xml.StartElement:
|
||||
value, inlineTokens, err := p.parseInlineElement(t, parent)
|
||||
if err != nil {
|
||||
return "", nil, err
|
||||
}
|
||||
text.WriteString(value)
|
||||
tokens = append(tokens, inlineTokens...)
|
||||
case xml.EndElement:
|
||||
if strings.EqualFold(t.Name.Local, "p") {
|
||||
return sanitizeTTMLText(text.String()), tokens, nil
|
||||
}
|
||||
case xml.CharData:
|
||||
text.WriteString(string(t))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (p *ttmlParser) parseInlineElement(start xml.StartElement, parent ttmlTimingContext) (string, []model.Token, error) {
|
||||
local := strings.ToLower(start.Name.Local)
|
||||
if local == "br" {
|
||||
return "\n", nil, nil
|
||||
}
|
||||
|
||||
ctx := p.childContext(start.Attr, parent)
|
||||
_, hasBegin := attrValue(start.Attr, "begin")
|
||||
_, hasEnd := attrValue(start.Attr, "end")
|
||||
_, hasDur := attrValue(start.Attr, "dur")
|
||||
hasOwnTiming := hasBegin || hasEnd || hasDur
|
||||
|
||||
var text strings.Builder
|
||||
var tokens []model.Token
|
||||
|
||||
for {
|
||||
token, err := p.decoder.Token()
|
||||
if err != nil {
|
||||
return "", nil, err
|
||||
}
|
||||
|
||||
switch t := token.(type) {
|
||||
case xml.StartElement:
|
||||
value, inlineTokens, err := p.parseInlineElement(t, ctx)
|
||||
if err != nil {
|
||||
return "", nil, err
|
||||
}
|
||||
text.WriteString(value)
|
||||
tokens = append(tokens, inlineTokens...)
|
||||
case xml.EndElement:
|
||||
if !strings.EqualFold(t.Name.Local, start.Name.Local) {
|
||||
continue
|
||||
}
|
||||
|
||||
value := text.String()
|
||||
tokenText := sanitizeTTMLText(value)
|
||||
if local == "span" && hasOwnTiming && !ctx.invalid && tokenText != "" && len(tokens) == 0 {
|
||||
parsedToken := model.Token{
|
||||
Value: tokenText,
|
||||
Role: ctx.role,
|
||||
}
|
||||
if ctx.hasBegin {
|
||||
startMs := ctx.begin
|
||||
parsedToken.Start = &startMs
|
||||
}
|
||||
if ctx.hasEnd {
|
||||
endMs := ctx.end
|
||||
parsedToken.End = &endMs
|
||||
}
|
||||
tokens = append(tokens, parsedToken)
|
||||
}
|
||||
|
||||
return value, tokens, nil
|
||||
case xml.CharData:
|
||||
text.WriteString(string(t))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (p *ttmlParser) toLyricList() model.LyricList {
|
||||
res := make(model.LyricList, 0, len(p.mainLangOrder)+len(p.translationLangOrder)+len(p.pronunciationLangOrder))
|
||||
for _, lang := range p.mainLangOrder {
|
||||
lines := p.mainLinesByLang[lang]
|
||||
if len(lines) == 0 {
|
||||
continue
|
||||
}
|
||||
res = append(res, model.Lyrics{
|
||||
Kind: ttmlLyricKindMain,
|
||||
Lang: lang,
|
||||
Line: lines,
|
||||
Synced: linesAreSynced(lines),
|
||||
})
|
||||
}
|
||||
|
||||
res = append(res, p.buildMetadataLyrics(ttmlLyricKindTranslation, p.translationLangOrder, p.translationEntriesByLg)...)
|
||||
res = append(res, p.buildMetadataLyrics(ttmlLyricKindPronunciation, p.pronunciationLangOrder, p.pronunciationEntriesByLg)...)
|
||||
return res
|
||||
}
|
||||
|
||||
func (p *ttmlParser) buildMetadataLyrics(kind string, langOrder []string, entriesByLang map[string][]ttmlMetadataEntry) model.LyricList {
|
||||
res := make(model.LyricList, 0, len(langOrder))
|
||||
|
||||
for _, lang := range langOrder {
|
||||
entries := entriesByLang[lang]
|
||||
if len(entries) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
seenKeys := make(map[string]struct{}, len(entries))
|
||||
resolved := make([]ttmlResolvedMetadataLine, 0, len(entries))
|
||||
for _, entry := range entries {
|
||||
if _, exists := seenKeys[entry.key]; exists {
|
||||
continue
|
||||
}
|
||||
seenKeys[entry.key] = struct{}{}
|
||||
|
||||
ref, ok := p.mainLineRefsByKey[entry.key]
|
||||
if !ok {
|
||||
log.Warn("Skipping TTML metadata line without matching key", "kind", kind, "lang", lang, "key", entry.key)
|
||||
continue
|
||||
}
|
||||
|
||||
line := entry.line
|
||||
if line.Start == nil && ref.line.Start != nil {
|
||||
startMs := *ref.line.Start
|
||||
line.Start = &startMs
|
||||
}
|
||||
if line.End == nil && ref.line.End != nil {
|
||||
endMs := *ref.line.End
|
||||
line.End = &endMs
|
||||
}
|
||||
line = hydrateLineTimingFromTokens(line)
|
||||
|
||||
if line.Value == "" && len(line.Token) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
resolved = append(resolved, ttmlResolvedMetadataLine{
|
||||
order: ref.order,
|
||||
seq: entry.seq,
|
||||
line: line,
|
||||
})
|
||||
}
|
||||
|
||||
if len(resolved) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
sort.SliceStable(resolved, func(i, j int) bool {
|
||||
if resolved[i].order != resolved[j].order {
|
||||
return resolved[i].order < resolved[j].order
|
||||
}
|
||||
return resolved[i].seq < resolved[j].seq
|
||||
})
|
||||
|
||||
lines := make([]model.Line, len(resolved))
|
||||
for i := range resolved {
|
||||
lines[i] = resolved[i].line
|
||||
}
|
||||
|
||||
res = append(res, model.Lyrics{
|
||||
Kind: kind,
|
||||
Lang: lang,
|
||||
Line: lines,
|
||||
Synced: linesAreSynced(lines),
|
||||
})
|
||||
}
|
||||
|
||||
return res
|
||||
}
|
||||
|
||||
func (p *ttmlParser) addMainLine(lang string, lineKey string, line model.Line) {
|
||||
lang = normalizeTTMLLang(lang)
|
||||
if _, ok := p.mainLinesByLang[lang]; !ok {
|
||||
p.mainLangOrder = append(p.mainLangOrder, lang)
|
||||
}
|
||||
p.mainLinesByLang[lang] = append(p.mainLinesByLang[lang], line)
|
||||
|
||||
lineKey = strings.TrimSpace(lineKey)
|
||||
if lineKey != "" {
|
||||
if _, exists := p.mainLineRefsByKey[lineKey]; !exists {
|
||||
p.mainLineRefsByKey[lineKey] = ttmlLineRef{
|
||||
order: p.mainLineOrder,
|
||||
line: line,
|
||||
}
|
||||
}
|
||||
}
|
||||
p.mainLineOrder++
|
||||
}
|
||||
|
||||
func (p *ttmlParser) addMetadataEntry(kind string, lang string, entry ttmlMetadataEntry) {
|
||||
lang = normalizeTTMLLang(lang)
|
||||
entry.seq = p.metadataSeq
|
||||
p.metadataSeq++
|
||||
|
||||
switch kind {
|
||||
case ttmlLyricKindTranslation:
|
||||
if _, ok := p.translationEntriesByLg[lang]; !ok {
|
||||
p.translationLangOrder = append(p.translationLangOrder, lang)
|
||||
}
|
||||
p.translationEntriesByLg[lang] = append(p.translationEntriesByLg[lang], entry)
|
||||
case ttmlLyricKindPronunciation:
|
||||
if _, ok := p.pronunciationEntriesByLg[lang]; !ok {
|
||||
p.pronunciationLangOrder = append(p.pronunciationLangOrder, lang)
|
||||
}
|
||||
p.pronunciationEntriesByLg[lang] = append(p.pronunciationEntriesByLg[lang], entry)
|
||||
}
|
||||
}
|
||||
|
||||
func (p *ttmlParser) childContext(attrs []xml.Attr, parent ttmlTimingContext) ttmlTimingContext {
|
||||
ctx := parent
|
||||
|
||||
if lang, ok := attrValue(attrs, "lang"); ok {
|
||||
ctx.lang = normalizeTTMLLang(lang)
|
||||
}
|
||||
if role, ok := attrValue(attrs, "role"); ok {
|
||||
role = strings.TrimSpace(role)
|
||||
if role != "" {
|
||||
if ctx.role == "" {
|
||||
ctx.role = role
|
||||
} else if !strings.Contains(ctx.role, role) {
|
||||
ctx.role = ctx.role + " " + role
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
beginExpr, hasBegin := attrValue(attrs, "begin")
|
||||
endExpr, hasEnd := attrValue(attrs, "end")
|
||||
durExpr, hasDur := attrValue(attrs, "dur")
|
||||
|
||||
if hasBegin {
|
||||
begin, kind, ok := parseTTMLTimeExpression(beginExpr, p.params)
|
||||
if !ok {
|
||||
ctx.invalid = true
|
||||
return ctx
|
||||
}
|
||||
|
||||
base := int64(0)
|
||||
if parent.hasBegin {
|
||||
base = parent.begin
|
||||
}
|
||||
ctx.begin = resolveTTMLTime(begin, kind, base, parent)
|
||||
ctx.hasBegin = true
|
||||
} else {
|
||||
ctx.begin = parent.begin
|
||||
ctx.hasBegin = parent.hasBegin
|
||||
}
|
||||
|
||||
var calculatedEnd int64
|
||||
calculatedHasEnd := false
|
||||
|
||||
if hasEnd {
|
||||
end, kind, ok := parseTTMLTimeExpression(endExpr, p.params)
|
||||
if !ok {
|
||||
ctx.invalid = true
|
||||
return ctx
|
||||
}
|
||||
|
||||
base := ctx.begin
|
||||
if !ctx.hasBegin {
|
||||
base = parent.begin
|
||||
}
|
||||
calculatedEnd = resolveTTMLTime(end, kind, base, parent)
|
||||
calculatedHasEnd = true
|
||||
}
|
||||
|
||||
if hasDur {
|
||||
dur, ok := parseTTMLDurationExpression(durExpr, p.params)
|
||||
if !ok {
|
||||
ctx.invalid = true
|
||||
return ctx
|
||||
}
|
||||
if ctx.hasBegin {
|
||||
durEnd := ctx.begin + dur
|
||||
if !calculatedHasEnd || durEnd < calculatedEnd {
|
||||
calculatedEnd = durEnd
|
||||
calculatedHasEnd = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !calculatedHasEnd && parent.hasEnd {
|
||||
calculatedEnd = parent.end
|
||||
calculatedHasEnd = true
|
||||
}
|
||||
|
||||
ctx.end = calculatedEnd
|
||||
ctx.hasEnd = calculatedHasEnd
|
||||
return ctx
|
||||
}
|
||||
|
||||
func (p *ttmlParser) updateTimingParams(attrs []xml.Attr) {
|
||||
frameRate := p.params.frameRate
|
||||
if value, ok := attrValue(attrs, "frameRate"); ok {
|
||||
if parsed, err := strconv.ParseFloat(value, 64); err == nil && parsed > 0 {
|
||||
frameRate = parsed
|
||||
}
|
||||
}
|
||||
|
||||
if value, ok := attrValue(attrs, "frameRateMultiplier"); ok {
|
||||
parts := strings.Fields(value)
|
||||
if len(parts) == 2 {
|
||||
numerator, errA := strconv.ParseFloat(parts[0], 64)
|
||||
denominator, errB := strconv.ParseFloat(parts[1], 64)
|
||||
if errA == nil && errB == nil && denominator > 0 {
|
||||
frameRate = frameRate * (numerator / denominator)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
subFrameRate := p.params.subFrameRate
|
||||
if value, ok := attrValue(attrs, "subFrameRate"); ok {
|
||||
if parsed, err := strconv.ParseFloat(value, 64); err == nil && parsed > 0 {
|
||||
subFrameRate = parsed
|
||||
}
|
||||
}
|
||||
|
||||
tickRate := p.params.tickRate
|
||||
if value, ok := attrValue(attrs, "tickRate"); ok {
|
||||
if parsed, err := strconv.ParseFloat(value, 64); err == nil && parsed > 0 {
|
||||
tickRate = parsed
|
||||
}
|
||||
}
|
||||
|
||||
p.params.frameRate = max(frameRate, defaultTTMLFrameRate)
|
||||
p.params.subFrameRate = max(subFrameRate, defaultTTMLSubFrameRate)
|
||||
p.params.tickRate = max(tickRate, defaultTTMLTickRate)
|
||||
}
|
||||
|
||||
func parseTTMLDurationExpression(expr string, params ttmlTimingParams) (int64, bool) {
|
||||
value, _, ok := parseTTMLTimeExpression(expr, params)
|
||||
return value, ok
|
||||
}
|
||||
|
||||
func resolveTTMLTime(value int64, kind ttmlTimeKind, base int64, parent ttmlTimingContext) int64 {
|
||||
switch kind {
|
||||
case ttmlTimeAbsolute:
|
||||
return value
|
||||
case ttmlTimeOffset:
|
||||
return base + value
|
||||
case ttmlTimeAmbiguous:
|
||||
absolute := value
|
||||
offset := base + value
|
||||
|
||||
// No parent timing context → no reference frame for offsets.
|
||||
// Prefer absolute when offset differs (i.e., base > 0).
|
||||
if !parent.hasBegin && !parent.hasEnd && base != 0 {
|
||||
return absolute
|
||||
}
|
||||
|
||||
if parent.hasBegin && parent.hasEnd {
|
||||
absoluteInParent := absolute >= parent.begin && absolute <= parent.end
|
||||
offsetInParent := offset >= parent.begin && offset <= parent.end
|
||||
if absoluteInParent && !offsetInParent {
|
||||
return absolute
|
||||
}
|
||||
if offsetInParent && !absoluteInParent {
|
||||
return offset
|
||||
}
|
||||
}
|
||||
|
||||
if parent.hasBegin {
|
||||
if absolute < parent.begin && offset >= parent.begin {
|
||||
return offset
|
||||
}
|
||||
if absolute >= parent.begin && offset > absolute {
|
||||
return absolute
|
||||
}
|
||||
}
|
||||
return offset
|
||||
default:
|
||||
return base + value
|
||||
}
|
||||
}
|
||||
|
||||
func parseTTMLTimeExpression(expr string, params ttmlTimingParams) (int64, ttmlTimeKind, bool) {
|
||||
expr = strings.TrimSpace(expr)
|
||||
if expr == "" {
|
||||
return 0, ttmlTimeOffset, false
|
||||
}
|
||||
|
||||
lower := strings.ToLower(expr)
|
||||
if strings.Contains(lower, "wallclock(") ||
|
||||
strings.Contains(lower, ".begin") ||
|
||||
strings.Contains(lower, ".end") {
|
||||
log.Warn("Unsupported TTML time expression", "value", expr)
|
||||
return 0, ttmlTimeOffset, false
|
||||
}
|
||||
|
||||
// Best-effort support for non-standard TTML seen in the wild where a
|
||||
// bare decimal value is used (implicitly seconds), e.g. "0.170".
|
||||
if value, err := strconv.ParseFloat(lower, 64); err == nil && value >= 0 {
|
||||
return int64(math.Round(value * 1000)), ttmlTimeAmbiguous, true
|
||||
}
|
||||
|
||||
if matches := offsetTimeRegex.FindStringSubmatch(lower); len(matches) == 3 {
|
||||
value, err := strconv.ParseFloat(matches[1], 64)
|
||||
if err != nil {
|
||||
return 0, ttmlTimeOffset, false
|
||||
}
|
||||
|
||||
unit := matches[2]
|
||||
seconds := 0.0
|
||||
switch unit {
|
||||
case "h":
|
||||
seconds = value * 60 * 60
|
||||
case "m":
|
||||
seconds = value * 60
|
||||
case "s":
|
||||
seconds = value
|
||||
case "ms":
|
||||
seconds = value / 1000
|
||||
case "f":
|
||||
seconds = value / params.frameRate
|
||||
case "t":
|
||||
seconds = value / params.tickRate
|
||||
default:
|
||||
return 0, ttmlTimeOffset, false
|
||||
}
|
||||
|
||||
return int64(math.Round(seconds * 1000)), ttmlTimeOffset, true
|
||||
}
|
||||
|
||||
colonCount := strings.Count(expr, ":")
|
||||
switch colonCount {
|
||||
case 1, 2:
|
||||
clockMs, ok := parseTTMLClockTime(expr)
|
||||
if !ok {
|
||||
return 0, ttmlTimeAbsolute, false
|
||||
}
|
||||
return clockMs, ttmlTimeAbsolute, true
|
||||
case 3:
|
||||
framesMs, ok := parseTTMLFrameTime(expr, params)
|
||||
if !ok {
|
||||
return 0, ttmlTimeAbsolute, false
|
||||
}
|
||||
return framesMs, ttmlTimeAbsolute, true
|
||||
default:
|
||||
log.Warn("Unsupported TTML time expression", "value", expr)
|
||||
return 0, ttmlTimeOffset, false
|
||||
}
|
||||
}
|
||||
|
||||
func parseTTMLClockTime(value string) (int64, bool) {
|
||||
parts := strings.Split(value, ":")
|
||||
if len(parts) != 2 && len(parts) != 3 {
|
||||
return 0, false
|
||||
}
|
||||
|
||||
hours := int64(0)
|
||||
minutesIdx := 0
|
||||
if len(parts) == 3 {
|
||||
h, err := strconv.ParseInt(parts[0], 10, 64)
|
||||
if err != nil {
|
||||
return 0, false
|
||||
}
|
||||
hours = h
|
||||
minutesIdx = 1
|
||||
}
|
||||
|
||||
minutes, err := strconv.ParseInt(parts[minutesIdx], 10, 64)
|
||||
if err != nil {
|
||||
return 0, false
|
||||
}
|
||||
|
||||
seconds, err := strconv.ParseFloat(parts[minutesIdx+1], 64)
|
||||
if err != nil {
|
||||
return 0, false
|
||||
}
|
||||
|
||||
totalSeconds := float64(hours*60*60+minutes*60) + seconds
|
||||
return int64(math.Round(totalSeconds * 1000)), true
|
||||
}
|
||||
|
||||
func parseTTMLFrameTime(value string, params ttmlTimingParams) (int64, bool) {
|
||||
parts := strings.Split(value, ":")
|
||||
if len(parts) != 4 {
|
||||
return 0, false
|
||||
}
|
||||
|
||||
hours, err := strconv.ParseInt(parts[0], 10, 64)
|
||||
if err != nil {
|
||||
return 0, false
|
||||
}
|
||||
|
||||
minutes, err := strconv.ParseInt(parts[1], 10, 64)
|
||||
if err != nil {
|
||||
return 0, false
|
||||
}
|
||||
|
||||
seconds, err := strconv.ParseInt(parts[2], 10, 64)
|
||||
if err != nil {
|
||||
return 0, false
|
||||
}
|
||||
|
||||
frameParts := strings.SplitN(parts[3], ".", 2)
|
||||
frames, err := strconv.ParseFloat(frameParts[0], 64)
|
||||
if err != nil {
|
||||
return 0, false
|
||||
}
|
||||
|
||||
subFrames := 0.0
|
||||
if len(frameParts) == 2 {
|
||||
subFrames, err = strconv.ParseFloat(frameParts[1], 64)
|
||||
if err != nil {
|
||||
return 0, false
|
||||
}
|
||||
}
|
||||
|
||||
totalSeconds := float64(hours*60*60 + minutes*60 + seconds)
|
||||
totalSeconds += frames / params.frameRate
|
||||
totalSeconds += subFrames / (params.subFrameRate * params.frameRate)
|
||||
|
||||
return int64(math.Round(totalSeconds * 1000)), true
|
||||
}
|
||||
|
||||
func attrValue(attrs []xml.Attr, key string) (string, bool) {
|
||||
for _, attr := range attrs {
|
||||
if strings.EqualFold(attr.Name.Local, key) {
|
||||
return strings.TrimSpace(attr.Value), true
|
||||
}
|
||||
}
|
||||
return "", false
|
||||
}
|
||||
|
||||
func normalizeTTMLLang(lang string) string {
|
||||
lang = strings.ToLower(strings.TrimSpace(lang))
|
||||
if lang == "" {
|
||||
return "xxx"
|
||||
}
|
||||
return lang
|
||||
}
|
||||
|
||||
func sanitizeTTMLText(raw string) string {
|
||||
raw = str.SanitizeText(raw)
|
||||
raw = strings.ReplaceAll(raw, "\r\n", "\n")
|
||||
raw = strings.ReplaceAll(raw, "\r", "\n")
|
||||
|
||||
lines := strings.Split(raw, "\n")
|
||||
for i := range lines {
|
||||
lines[i] = strings.TrimSpace(lines[i])
|
||||
}
|
||||
return strings.TrimSpace(strings.Join(lines, "\n"))
|
||||
}
|
||||
|
||||
func linesAreSynced(lines []model.Line) bool {
|
||||
for i := range lines {
|
||||
if lines[i].Start != nil {
|
||||
return true
|
||||
}
|
||||
for j := range lines[i].Token {
|
||||
if lines[i].Token[j].Start != nil {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func hydrateLineTimingFromTokens(line model.Line) model.Line {
|
||||
if len(line.Token) == 0 {
|
||||
return line
|
||||
}
|
||||
|
||||
var earliestStart *int64
|
||||
var latestEnd *int64
|
||||
for i := range line.Token {
|
||||
token := line.Token[i]
|
||||
if token.Start != nil {
|
||||
if earliestStart == nil || *token.Start < *earliestStart {
|
||||
v := *token.Start
|
||||
earliestStart = &v
|
||||
}
|
||||
}
|
||||
|
||||
candidateEnd := token.End
|
||||
if candidateEnd == nil {
|
||||
candidateEnd = token.Start
|
||||
}
|
||||
if candidateEnd != nil {
|
||||
if latestEnd == nil || *candidateEnd > *latestEnd {
|
||||
v := *candidateEnd
|
||||
latestEnd = &v
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if line.Start == nil && earliestStart != nil {
|
||||
v := *earliestStart
|
||||
line.Start = &v
|
||||
}
|
||||
if line.End == nil && latestEnd != nil {
|
||||
v := *latestEnd
|
||||
line.End = &v
|
||||
}
|
||||
return line
|
||||
}
|
||||
|
||||
func max(v float64, fallback float64) float64 {
|
||||
if v <= 0 {
|
||||
return fallback
|
||||
}
|
||||
return v
|
||||
}
|
||||
398
core/lyrics/ttml_test.go
Normal file
398
core/lyrics/ttml_test.go
Normal file
@ -0,0 +1,398 @@
|
||||
package lyrics
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/navidrome/navidrome/model"
|
||||
)
|
||||
|
||||
func TestParseTTML_MultiLanguageAndTiming(t *testing.T) {
|
||||
content := []byte(`<?xml version="1.0" encoding="UTF-8"?>
|
||||
<tt xmlns="http://www.w3.org/ns/ttml" xmlns:ttp="http://www.w3.org/ns/ttml#parameter" ttp:frameRate="30" ttp:subFrameRate="2" ttp:tickRate="10">
|
||||
<body>
|
||||
<div xml:lang="eng" begin="1s">
|
||||
<p begin="2s">Line one</p>
|
||||
<p begin="00:00:04:15.1"><span>Line two</span><br/>with break</p>
|
||||
</div>
|
||||
<div xml:lang="por">
|
||||
<p begin="45t">Linha</p>
|
||||
</div>
|
||||
</body>
|
||||
</tt>`)
|
||||
|
||||
list, err := parseTTML(content)
|
||||
if err != nil {
|
||||
t.Fatalf("parseTTML returned error: %v", err)
|
||||
}
|
||||
if len(list) != 2 {
|
||||
t.Fatalf("expected 2 lyric tracks, got %d", len(list))
|
||||
}
|
||||
|
||||
eng := list[0]
|
||||
if eng.Lang != "eng" {
|
||||
t.Fatalf("expected first track language 'eng', got %q", eng.Lang)
|
||||
}
|
||||
if !eng.Synced {
|
||||
t.Fatal("expected first track to be synced")
|
||||
}
|
||||
assertTimedLine(t, eng.Line[0], 3000, "Line one")
|
||||
assertTimedLine(t, eng.Line[1], 4517, "Line two\nwith break")
|
||||
|
||||
por := list[1]
|
||||
if por.Lang != "por" {
|
||||
t.Fatalf("expected second track language 'por', got %q", por.Lang)
|
||||
}
|
||||
assertTimedLine(t, por.Line[0], 4500, "Linha")
|
||||
}
|
||||
|
||||
func TestParseTTML_UnsupportedCueSkipped(t *testing.T) {
|
||||
content := []byte(`<?xml version="1.0" encoding="UTF-8"?>
|
||||
<tt xmlns="http://www.w3.org/ns/ttml">
|
||||
<body xml:lang="eng">
|
||||
<div>
|
||||
<p begin="wallclock(2026-01-01T00:00:00Z)">Skip me</p>
|
||||
<p begin="1s">Keep me</p>
|
||||
</div>
|
||||
</body>
|
||||
</tt>`)
|
||||
|
||||
list, err := parseTTML(content)
|
||||
if err != nil {
|
||||
t.Fatalf("parseTTML returned error: %v", err)
|
||||
}
|
||||
if len(list) != 1 {
|
||||
t.Fatalf("expected 1 lyric track, got %d", len(list))
|
||||
}
|
||||
if len(list[0].Line) != 1 {
|
||||
t.Fatalf("expected 1 line in lyric track, got %d", len(list[0].Line))
|
||||
}
|
||||
assertTimedLine(t, list[0].Line[0], 1000, "Keep me")
|
||||
}
|
||||
|
||||
func TestParseTTML_BeginEndDurWithInheritance(t *testing.T) {
|
||||
content := []byte(`<?xml version="1.0" encoding="UTF-8"?>
|
||||
<tt xmlns="http://www.w3.org/ns/ttml">
|
||||
<body xml:lang="eng" begin="10s">
|
||||
<div begin="5s" dur="8s">
|
||||
<p begin="1s" dur="2s">First line</p>
|
||||
<p begin="3s" end="5s">Second line</p>
|
||||
</div>
|
||||
</body>
|
||||
</tt>`)
|
||||
|
||||
list, err := parseTTML(content)
|
||||
if err != nil {
|
||||
t.Fatalf("parseTTML returned error: %v", err)
|
||||
}
|
||||
if len(list) != 1 {
|
||||
t.Fatalf("expected 1 lyric track, got %d", len(list))
|
||||
}
|
||||
if list[0].Lang != "eng" {
|
||||
t.Fatalf("expected language 'eng', got %q", list[0].Lang)
|
||||
}
|
||||
if len(list[0].Line) != 2 {
|
||||
t.Fatalf("expected 2 lines, got %d", len(list[0].Line))
|
||||
}
|
||||
assertTimedLine(t, list[0].Line[0], 16000, "First line")
|
||||
assertTimedLine(t, list[0].Line[1], 18000, "Second line")
|
||||
}
|
||||
|
||||
func TestParseTTML_NonStandardBareSecondOffsets(t *testing.T) {
|
||||
content := []byte(`<?xml version="1.0" encoding="UTF-8"?>
|
||||
<tt xmlns="http://www.w3.org/ns/ttml">
|
||||
<body xml:lang="eng" begin="10">
|
||||
<div>
|
||||
<p begin="0.170">First line</p>
|
||||
<p begin="3.710">Second line</p>
|
||||
</div>
|
||||
</body>
|
||||
</tt>`)
|
||||
|
||||
list, err := parseTTML(content)
|
||||
if err != nil {
|
||||
t.Fatalf("parseTTML returned error: %v", err)
|
||||
}
|
||||
if len(list) != 1 {
|
||||
t.Fatalf("expected 1 lyric track, got %d", len(list))
|
||||
}
|
||||
if len(list[0].Line) != 2 {
|
||||
t.Fatalf("expected 2 lines, got %d", len(list[0].Line))
|
||||
}
|
||||
assertTimedLine(t, list[0].Line[0], 10170, "First line")
|
||||
assertTimedLine(t, list[0].Line[1], 13710, "Second line")
|
||||
}
|
||||
|
||||
func TestParseTTML_WordTimingTokens(t *testing.T) {
|
||||
content := []byte(`<?xml version="1.0" encoding="UTF-8"?>
|
||||
<tt xmlns="http://www.w3.org/ns/ttml" xmlns:ttm="http://www.w3.org/ns/ttml#metadata">
|
||||
<body xml:lang="eng">
|
||||
<div>
|
||||
<p begin="00:01.000" end="00:03.000">
|
||||
<span begin="00:01.000" end="00:01.400">He</span><span begin="00:01.400" end="00:01.800">llo</span>
|
||||
<span ttm:role="x-bg"><span begin="00:02.000" end="00:02.500">echo</span></span>
|
||||
</p>
|
||||
</div>
|
||||
</body>
|
||||
</tt>`)
|
||||
|
||||
list, err := parseTTML(content)
|
||||
if err != nil {
|
||||
t.Fatalf("parseTTML returned error: %v", err)
|
||||
}
|
||||
if len(list) != 1 {
|
||||
t.Fatalf("expected 1 lyric track, got %d", len(list))
|
||||
}
|
||||
if len(list[0].Line) != 1 {
|
||||
t.Fatalf("expected 1 line, got %d", len(list[0].Line))
|
||||
}
|
||||
|
||||
line := list[0].Line[0]
|
||||
assertTimedLine(t, line, 1000, "Hello\necho")
|
||||
if line.End == nil || *line.End != 3000 {
|
||||
t.Fatalf("expected line end 3000, got %v", line.End)
|
||||
}
|
||||
if len(line.Token) != 3 {
|
||||
t.Fatalf("expected 3 timed tokens, got %d", len(line.Token))
|
||||
}
|
||||
|
||||
assertToken(t, line.Token[0], 1000, 1400, "He", "")
|
||||
assertToken(t, line.Token[1], 1400, 1800, "llo", "")
|
||||
assertToken(t, line.Token[2], 2000, 2500, "echo", "x-bg")
|
||||
}
|
||||
|
||||
func TestParseTTML_AmbiguousDecimalTimingPrefersAbsoluteWhenInsideParentWindow(t *testing.T) {
|
||||
content := []byte(`<?xml version="1.0" encoding="UTF-8"?>
|
||||
<tt xmlns="http://www.w3.org/ns/ttml">
|
||||
<body xml:lang="eng">
|
||||
<div begin="37.870" end="45.570">
|
||||
<p begin="43.444" end="45.570">
|
||||
<span begin="43.444" end="43.716">go</span>
|
||||
<span begin="43.716" end="43.887">go</span>
|
||||
</p>
|
||||
</div>
|
||||
</body>
|
||||
</tt>`)
|
||||
|
||||
list, err := parseTTML(content)
|
||||
if err != nil {
|
||||
t.Fatalf("parseTTML returned error: %v", err)
|
||||
}
|
||||
if len(list) != 1 || len(list[0].Line) != 1 {
|
||||
t.Fatalf("expected one parsed lyric line, got %#v", list)
|
||||
}
|
||||
|
||||
line := list[0].Line[0]
|
||||
assertTimedLine(t, line, 43444, "go\ngo")
|
||||
if line.End == nil || *line.End != 45570 {
|
||||
t.Fatalf("expected line end 45570, got %v", line.End)
|
||||
}
|
||||
if len(line.Token) != 2 {
|
||||
t.Fatalf("expected 2 timed tokens, got %d", len(line.Token))
|
||||
}
|
||||
assertToken(t, line.Token[0], 43444, 43716, "go", "")
|
||||
assertToken(t, line.Token[1], 43716, 43887, "go", "")
|
||||
}
|
||||
|
||||
func TestParseTTML_UnsyncedFallback(t *testing.T) {
|
||||
content := []byte(`<?xml version="1.0" encoding="UTF-8"?>
|
||||
<tt xmlns="http://www.w3.org/ns/ttml">
|
||||
<body>
|
||||
<div>
|
||||
<p>No timing here</p>
|
||||
</div>
|
||||
</body>
|
||||
</tt>`)
|
||||
|
||||
list, err := parseTTML(content)
|
||||
if err != nil {
|
||||
t.Fatalf("parseTTML returned error: %v", err)
|
||||
}
|
||||
if len(list) != 1 {
|
||||
t.Fatalf("expected 1 lyric track, got %d", len(list))
|
||||
}
|
||||
if list[0].Lang != "xxx" {
|
||||
t.Fatalf("expected default language 'xxx', got %q", list[0].Lang)
|
||||
}
|
||||
if list[0].Synced {
|
||||
t.Fatal("expected lyric track to be unsynced")
|
||||
}
|
||||
if len(list[0].Line) != 1 {
|
||||
t.Fatalf("expected 1 line, got %d", len(list[0].Line))
|
||||
}
|
||||
if list[0].Line[0].Start != nil {
|
||||
t.Fatalf("expected line start to be nil, got %v", *list[0].Line[0].Start)
|
||||
}
|
||||
if list[0].Line[0].Value != "No timing here" {
|
||||
t.Fatalf("expected line value %q, got %q", "No timing here", list[0].Line[0].Value)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseTTML_MetadataTracksByKey(t *testing.T) {
|
||||
content := []byte(`<?xml version="1.0" encoding="UTF-8"?>
|
||||
<tt xmlns="http://www.w3.org/ns/ttml" xmlns:itunes="http://music.apple.com/lyric-ttml-internal">
|
||||
<head>
|
||||
<metadata>
|
||||
<iTunesMetadata xmlns="http://music.apple.com/lyric-ttml-internal">
|
||||
<translations>
|
||||
<translation xml:lang="es">
|
||||
<text for="L1">Hola</text>
|
||||
<text for="MISSING">Skip me</text>
|
||||
</translation>
|
||||
</translations>
|
||||
<transliterations>
|
||||
<transliteration xml:lang="ja-Latn">
|
||||
<text for="L2"><span begin="00:02.000" end="00:02.300" xmlns="http://www.w3.org/ns/ttml">ko</span><span begin="00:02.300" end="00:02.600" xmlns="http://www.w3.org/ns/ttml">nni</span></text>
|
||||
</transliteration>
|
||||
</transliterations>
|
||||
</iTunesMetadata>
|
||||
</metadata>
|
||||
</head>
|
||||
<body xml:lang="ja">
|
||||
<div>
|
||||
<p begin="00:01.000" end="00:01.500" itunes:key="L1">こんにちは</p>
|
||||
<p begin="00:02.000" end="00:02.700" itunes:key="L2">こんばんは</p>
|
||||
</div>
|
||||
</body>
|
||||
</tt>`)
|
||||
|
||||
list, err := parseTTML(content)
|
||||
if err != nil {
|
||||
t.Fatalf("parseTTML returned error: %v", err)
|
||||
}
|
||||
if len(list) != 3 {
|
||||
t.Fatalf("expected 3 lyric tracks, got %d", len(list))
|
||||
}
|
||||
|
||||
main := list[0]
|
||||
if main.Kind != "main" {
|
||||
t.Fatalf("expected main track kind %q, got %q", "main", main.Kind)
|
||||
}
|
||||
if main.Lang != "ja" {
|
||||
t.Fatalf("expected main track language %q, got %q", "ja", main.Lang)
|
||||
}
|
||||
if len(main.Line) != 2 {
|
||||
t.Fatalf("expected 2 lines in main track, got %d", len(main.Line))
|
||||
}
|
||||
|
||||
translation := list[1]
|
||||
if translation.Kind != "translation" {
|
||||
t.Fatalf("expected translation kind %q, got %q", "translation", translation.Kind)
|
||||
}
|
||||
if translation.Lang != "es" {
|
||||
t.Fatalf("expected translation language %q, got %q", "es", translation.Lang)
|
||||
}
|
||||
if len(translation.Line) != 1 {
|
||||
t.Fatalf("expected 1 translation line, got %d", len(translation.Line))
|
||||
}
|
||||
assertTimedLine(t, translation.Line[0], 1000, "Hola")
|
||||
if translation.Line[0].End == nil || *translation.Line[0].End != 1500 {
|
||||
t.Fatalf("expected translation line end %d, got %v", 1500, translation.Line[0].End)
|
||||
}
|
||||
|
||||
pronunciation := list[2]
|
||||
if pronunciation.Kind != "pronunciation" {
|
||||
t.Fatalf("expected pronunciation kind %q, got %q", "pronunciation", pronunciation.Kind)
|
||||
}
|
||||
if pronunciation.Lang != "ja-latn" {
|
||||
t.Fatalf("expected pronunciation language %q, got %q", "ja-latn", pronunciation.Lang)
|
||||
}
|
||||
if len(pronunciation.Line) != 1 {
|
||||
t.Fatalf("expected 1 pronunciation line, got %d", len(pronunciation.Line))
|
||||
}
|
||||
assertTimedLine(t, pronunciation.Line[0], 2000, "konni")
|
||||
if pronunciation.Line[0].End == nil || *pronunciation.Line[0].End != 2600 {
|
||||
t.Fatalf("expected pronunciation line end %d, got %v", 2600, pronunciation.Line[0].End)
|
||||
}
|
||||
if len(pronunciation.Line[0].Token) != 2 {
|
||||
t.Fatalf("expected 2 pronunciation tokens, got %d", len(pronunciation.Line[0].Token))
|
||||
}
|
||||
assertToken(t, pronunciation.Line[0].Token[0], 2000, 2300, "ko", "")
|
||||
assertToken(t, pronunciation.Line[0].Token[1], 2300, 2600, "nni", "")
|
||||
}
|
||||
|
||||
func TestParseTTML_PronunciationBareDecimalEndTimes(t *testing.T) {
|
||||
content := []byte(`<?xml version="1.0" encoding="UTF-8"?>
|
||||
<tt xmlns="http://www.w3.org/ns/ttml" xmlns:itunes="http://music.apple.com/lyric-ttml-internal">
|
||||
<head>
|
||||
<metadata>
|
||||
<iTunesMetadata xmlns="http://music.apple.com/lyric-ttml-internal">
|
||||
<transliterations>
|
||||
<transliteration xml:lang="ja-Latn">
|
||||
<text for="L1"><span begin="2.747" end="3.018" xmlns="http://www.w3.org/ns/ttml">I</span> <span begin="3.018" end="3.179" xmlns="http://www.w3.org/ns/ttml">woke</span> <span begin="3.179" end="3.582" xmlns="http://www.w3.org/ns/ttml">up</span></text>
|
||||
</transliteration>
|
||||
</transliterations>
|
||||
</iTunesMetadata>
|
||||
</metadata>
|
||||
</head>
|
||||
<body xml:lang="ja">
|
||||
<div>
|
||||
<p begin="00:02.747" end="00:04.000" itunes:key="L1">起きた</p>
|
||||
</div>
|
||||
</body>
|
||||
</tt>`)
|
||||
|
||||
list, err := parseTTML(content)
|
||||
if err != nil {
|
||||
t.Fatalf("parseTTML returned error: %v", err)
|
||||
}
|
||||
|
||||
var pronunciation *model.Lyrics
|
||||
for i := range list {
|
||||
if list[i].Kind == "pronunciation" {
|
||||
pronunciation = &list[i]
|
||||
break
|
||||
}
|
||||
}
|
||||
if pronunciation == nil {
|
||||
t.Fatal("expected a pronunciation track")
|
||||
}
|
||||
if len(pronunciation.Line) != 1 {
|
||||
t.Fatalf("expected 1 pronunciation line, got %d", len(pronunciation.Line))
|
||||
}
|
||||
|
||||
line := pronunciation.Line[0]
|
||||
assertTimedLine(t, line, 2747, "I woke up")
|
||||
if len(line.Token) != 3 {
|
||||
t.Fatalf("expected 3 tokens, got %d", len(line.Token))
|
||||
}
|
||||
assertToken(t, line.Token[0], 2747, 3018, "I", "")
|
||||
assertToken(t, line.Token[1], 3018, 3179, "woke", "")
|
||||
assertToken(t, line.Token[2], 3179, 3582, "up", "")
|
||||
}
|
||||
|
||||
func assertTimedLine(t *testing.T, line model.Line, expectedStart int64, expectedValue string) {
|
||||
t.Helper()
|
||||
|
||||
if line.Start == nil {
|
||||
t.Fatal("expected line start to be set, got nil")
|
||||
}
|
||||
if *line.Start != expectedStart {
|
||||
t.Fatalf("expected line start %d, got %d", expectedStart, *line.Start)
|
||||
}
|
||||
if line.Value != expectedValue {
|
||||
t.Fatalf("expected line value %q, got %q", expectedValue, line.Value)
|
||||
}
|
||||
}
|
||||
|
||||
func assertToken(t *testing.T, token model.Token, expectedStart int64, expectedEnd int64, expectedValue string, expectedRole string) {
|
||||
t.Helper()
|
||||
|
||||
if token.Start == nil {
|
||||
t.Fatal("expected token start to be set, got nil")
|
||||
}
|
||||
if *token.Start != expectedStart {
|
||||
t.Fatalf("expected token start %d, got %d", expectedStart, *token.Start)
|
||||
}
|
||||
if token.End == nil {
|
||||
t.Fatal("expected token end to be set, got nil")
|
||||
}
|
||||
if *token.End != expectedEnd {
|
||||
t.Fatalf("expected token end %d, got %d", expectedEnd, *token.End)
|
||||
}
|
||||
if token.Value != expectedValue {
|
||||
t.Fatalf("expected token value %q, got %q", expectedValue, token.Value)
|
||||
}
|
||||
if token.Role != expectedRole {
|
||||
t.Fatalf("expected token role %q, got %q", expectedRole, token.Role)
|
||||
}
|
||||
}
|
||||
@ -11,14 +11,24 @@ import (
|
||||
"github.com/navidrome/navidrome/utils/str"
|
||||
)
|
||||
|
||||
type Line struct {
|
||||
type Token struct {
|
||||
Start *int64 `structs:"start,omitempty" json:"start,omitempty"`
|
||||
End *int64 `structs:"end,omitempty" json:"end,omitempty"`
|
||||
Value string `structs:"value" json:"value"`
|
||||
Role string `structs:"role,omitempty" json:"role,omitempty"`
|
||||
}
|
||||
|
||||
type Line struct {
|
||||
Start *int64 `structs:"start,omitempty" json:"start,omitempty"`
|
||||
End *int64 `structs:"end,omitempty" json:"end,omitempty"`
|
||||
Value string `structs:"value" json:"value"`
|
||||
Token []Token `structs:"token,omitempty" json:"token,omitempty"`
|
||||
}
|
||||
|
||||
type Lyrics struct {
|
||||
DisplayArtist string `structs:"displayArtist,omitempty" json:"displayArtist,omitempty"`
|
||||
DisplayTitle string `structs:"displayTitle,omitempty" json:"displayTitle,omitempty"`
|
||||
Kind string `structs:"kind,omitempty" json:"kind,omitempty"`
|
||||
Lang string `structs:"lang" json:"lang"`
|
||||
Line []Line `structs:"line" json:"line"`
|
||||
Offset *int64 `structs:"offset,omitempty" json:"offset,omitempty"`
|
||||
|
||||
@ -478,19 +478,47 @@ func mapExplicitStatus(explicitStatus string) string {
|
||||
|
||||
func buildStructuredLyric(mf *model.MediaFile, lyrics model.Lyrics) responses.StructuredLyric {
|
||||
lines := make([]responses.Line, len(lyrics.Line))
|
||||
tokenLines := make([]responses.TokenLine, 0, len(lyrics.Line))
|
||||
|
||||
for i, line := range lyrics.Line {
|
||||
lines[i] = responses.Line{
|
||||
Start: line.Start,
|
||||
Value: line.Value,
|
||||
}
|
||||
if len(line.Token) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
tokens := make([]responses.LyricToken, len(line.Token))
|
||||
for j, token := range line.Token {
|
||||
tokens[j] = responses.LyricToken{
|
||||
Start: token.Start,
|
||||
End: token.End,
|
||||
Value: token.Value,
|
||||
Role: token.Role,
|
||||
}
|
||||
}
|
||||
tokenLines = append(tokenLines, responses.TokenLine{
|
||||
Index: int32(i),
|
||||
Start: line.Start,
|
||||
End: line.End,
|
||||
Value: line.Value,
|
||||
Token: tokens,
|
||||
})
|
||||
}
|
||||
|
||||
kind := strings.TrimSpace(lyrics.Kind)
|
||||
if kind == "" {
|
||||
kind = "main"
|
||||
}
|
||||
|
||||
structured := responses.StructuredLyric{
|
||||
DisplayArtist: lyrics.DisplayArtist,
|
||||
DisplayTitle: lyrics.DisplayTitle,
|
||||
Kind: kind,
|
||||
Lang: lyrics.Lang,
|
||||
Line: lines,
|
||||
TokenLine: tokenLines,
|
||||
Offset: lyrics.Offset,
|
||||
Synced: lyrics.Synced,
|
||||
}
|
||||
|
||||
@ -98,7 +98,9 @@ func (api *Router) GetLyrics(r *http.Request) (*responses.Subsonic, error) {
|
||||
response := newResponse()
|
||||
lyricsResponse := responses.Lyrics{}
|
||||
response.Lyrics = &lyricsResponse
|
||||
mediaFiles, err := api.ds.MediaFile(r.Context()).GetAll(filter.SongsByArtistTitleWithLyricsFirst(artist, title))
|
||||
opts := filter.SongsByArtistTitleWithLyricsFirst(artist, title)
|
||||
opts.Max = 0
|
||||
mediaFiles, err := api.ds.MediaFile(r.Context()).GetAll(opts)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@ -108,25 +110,26 @@ func (api *Router) GetLyrics(r *http.Request) (*responses.Subsonic, error) {
|
||||
return response, nil
|
||||
}
|
||||
|
||||
structuredLyrics, err := api.lyrics.GetLyrics(r.Context(), &mediaFiles[0])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
for i := range mediaFiles {
|
||||
structuredLyrics, err := api.lyrics.GetLyrics(r.Context(), &mediaFiles[i])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if len(structuredLyrics) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
lyricsResponse.Artist = artist
|
||||
lyricsResponse.Title = title
|
||||
|
||||
var lyricsText strings.Builder
|
||||
for _, line := range structuredLyrics[0].Line {
|
||||
lyricsText.WriteString(line.Value + "\n")
|
||||
}
|
||||
lyricsResponse.Value = lyricsText.String()
|
||||
break
|
||||
}
|
||||
|
||||
if len(structuredLyrics) == 0 {
|
||||
return response, nil
|
||||
}
|
||||
|
||||
lyricsResponse.Artist = artist
|
||||
lyricsResponse.Title = title
|
||||
|
||||
var lyricsText strings.Builder
|
||||
for _, line := range structuredLyrics[0].Line {
|
||||
lyricsText.WriteString(line.Value + "\n")
|
||||
}
|
||||
|
||||
lyricsResponse.Value = lyricsText.String()
|
||||
|
||||
return response, nil
|
||||
}
|
||||
|
||||
|
||||
@ -186,6 +186,36 @@ var _ = Describe("MediaRetrievalController", func() {
|
||||
Expect(response.Lyrics.Title).To(Equal("Never Gonna Give You Up"))
|
||||
Expect(response.Lyrics.Value).To(Equal("We're no strangers to love\nYou know the rules and so do I\n"))
|
||||
})
|
||||
|
||||
It("should continue searching candidates for sidecar lyrics", func() {
|
||||
conf.Server.LyricsPriority = ".ttml,embedded"
|
||||
r := newGetRequest("artist=Rick+Astley", "title=Never+Gonna+Give+You+Up")
|
||||
baseTime := time.Date(2025, 1, 1, 0, 0, 0, 0, time.UTC)
|
||||
mockRepo.SetData(model.MediaFiles{
|
||||
{
|
||||
ID: "1",
|
||||
Path: "tests/fixtures/01 Invisible (RED) Edit Version.mp3",
|
||||
Artist: "Rick Astley",
|
||||
Title: "Never Gonna Give You Up",
|
||||
Lyrics: "[]",
|
||||
UpdatedAt: baseTime.Add(2 * time.Hour), // Newer, but no TTML sidecar
|
||||
},
|
||||
{
|
||||
ID: "2",
|
||||
Path: "tests/fixtures/test.mp3",
|
||||
Artist: "Rick Astley",
|
||||
Title: "Never Gonna Give You Up",
|
||||
Lyrics: "[]",
|
||||
UpdatedAt: baseTime.Add(1 * time.Hour), // Older, but has TTML sidecar
|
||||
},
|
||||
})
|
||||
|
||||
response, err := router.GetLyrics(r)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
Expect(response.Lyrics.Artist).To(Equal("Rick Astley"))
|
||||
Expect(response.Lyrics.Title).To(Equal("Never Gonna Give You Up"))
|
||||
Expect(response.Lyrics.Value).To(Equal("We're no strangers to love\nYou know the rules and so do I\n"))
|
||||
})
|
||||
})
|
||||
|
||||
Describe("GetLyricsBySongId", func() {
|
||||
@ -202,6 +232,11 @@ var _ = Describe("MediaRetrievalController", func() {
|
||||
|
||||
Expect(realLyric.DisplayArtist).To(Equal(expectedLyric.DisplayArtist))
|
||||
Expect(realLyric.DisplayTitle).To(Equal(expectedLyric.DisplayTitle))
|
||||
expectedKind := expectedLyric.Kind
|
||||
if expectedKind == "" {
|
||||
expectedKind = "main"
|
||||
}
|
||||
Expect(realLyric.Kind).To(Equal(expectedKind))
|
||||
Expect(realLyric.Lang).To(Equal(expectedLyric.Lang))
|
||||
Expect(realLyric.Synced).To(Equal(expectedLyric.Synced))
|
||||
|
||||
@ -222,6 +257,40 @@ var _ = Describe("MediaRetrievalController", func() {
|
||||
Expect(*realLine.Start).To(Equal(*expectedLine.Start))
|
||||
}
|
||||
}
|
||||
|
||||
Expect(realLyric.TokenLine).To(HaveLen(len(expectedLyric.TokenLine)))
|
||||
for j, realTokenLine := range realLyric.TokenLine {
|
||||
expectedTokenLine := expectedLyric.TokenLine[j]
|
||||
Expect(realTokenLine.Index).To(Equal(expectedTokenLine.Index))
|
||||
Expect(realTokenLine.Value).To(Equal(expectedTokenLine.Value))
|
||||
if expectedTokenLine.Start == nil {
|
||||
Expect(realTokenLine.Start).To(BeNil())
|
||||
} else {
|
||||
Expect(*realTokenLine.Start).To(Equal(*expectedTokenLine.Start))
|
||||
}
|
||||
if expectedTokenLine.End == nil {
|
||||
Expect(realTokenLine.End).To(BeNil())
|
||||
} else {
|
||||
Expect(*realTokenLine.End).To(Equal(*expectedTokenLine.End))
|
||||
}
|
||||
|
||||
Expect(realTokenLine.Token).To(HaveLen(len(expectedTokenLine.Token)))
|
||||
for k, realToken := range realTokenLine.Token {
|
||||
expectedToken := expectedTokenLine.Token[k]
|
||||
Expect(realToken.Value).To(Equal(expectedToken.Value))
|
||||
Expect(realToken.Role).To(Equal(expectedToken.Role))
|
||||
if expectedToken.Start == nil {
|
||||
Expect(realToken.Start).To(BeNil())
|
||||
} else {
|
||||
Expect(*realToken.Start).To(Equal(*expectedToken.Start))
|
||||
}
|
||||
if expectedToken.End == nil {
|
||||
Expect(realToken.End).To(BeNil())
|
||||
} else {
|
||||
Expect(*realToken.End).To(Equal(*expectedToken.End))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -323,6 +392,238 @@ var _ = Describe("MediaRetrievalController", func() {
|
||||
},
|
||||
})
|
||||
})
|
||||
|
||||
It("should return multilingual TTML sidecar lyrics", func() {
|
||||
conf.Server.LyricsPriority = ".ttml,embedded"
|
||||
r := newGetRequest("id=1")
|
||||
|
||||
mockRepo.SetData(model.MediaFiles{
|
||||
{
|
||||
ID: "1",
|
||||
Path: "tests/fixtures/test.mp3",
|
||||
Artist: "Rick Astley",
|
||||
Title: "Never Gonna Give You Up",
|
||||
Lyrics: "[]",
|
||||
},
|
||||
})
|
||||
|
||||
response, err := router.GetLyricsBySongId(r)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
porTime := int64(18800)
|
||||
ttmlTime := int64(22800)
|
||||
compareResponses(response.LyricsList, responses.LyricsList{
|
||||
StructuredLyrics: responses.StructuredLyrics{
|
||||
{
|
||||
DisplayArtist: "Rick Astley",
|
||||
DisplayTitle: "Never Gonna Give You Up",
|
||||
Lang: "eng",
|
||||
Synced: true,
|
||||
Line: []responses.Line{
|
||||
{
|
||||
Start: ×[0],
|
||||
Value: "We're no strangers to love",
|
||||
},
|
||||
{
|
||||
Start: &ttmlTime,
|
||||
Value: "You know the rules and so do I",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
DisplayArtist: "Rick Astley",
|
||||
DisplayTitle: "Never Gonna Give You Up",
|
||||
Lang: "por",
|
||||
Synced: true,
|
||||
Line: []responses.Line{
|
||||
{
|
||||
Start: &porTime,
|
||||
Value: "Nao somos estranhos ao amor",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
})
|
||||
|
||||
It("should return metadata-linked translation and pronunciation tracks from TTML", func() {
|
||||
conf.Server.LyricsPriority = ".ttml,embedded"
|
||||
r := newGetRequest("id=1")
|
||||
|
||||
mockRepo.SetData(model.MediaFiles{
|
||||
{
|
||||
ID: "1",
|
||||
Path: "tests/fixtures/test-metadata.mp3",
|
||||
Artist: "Rick Astley",
|
||||
Title: "Never Gonna Give You Up",
|
||||
Lyrics: "[]",
|
||||
},
|
||||
})
|
||||
|
||||
response, err := router.GetLyricsBySongId(r)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
mainStartA := int64(1000)
|
||||
mainStartB := int64(2000)
|
||||
tokenStartA := int64(2000)
|
||||
tokenEndA := int64(2300)
|
||||
tokenStartB := int64(2300)
|
||||
tokenEndB := int64(2600)
|
||||
compareResponses(response.LyricsList, responses.LyricsList{
|
||||
StructuredLyrics: responses.StructuredLyrics{
|
||||
{
|
||||
DisplayArtist: "Rick Astley",
|
||||
DisplayTitle: "Never Gonna Give You Up",
|
||||
Kind: "main",
|
||||
Lang: "ja",
|
||||
Synced: true,
|
||||
Line: []responses.Line{
|
||||
{
|
||||
Start: &mainStartA,
|
||||
Value: "こんにちは",
|
||||
},
|
||||
{
|
||||
Start: &mainStartB,
|
||||
Value: "こんばんは",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
DisplayArtist: "Rick Astley",
|
||||
DisplayTitle: "Never Gonna Give You Up",
|
||||
Kind: "translation",
|
||||
Lang: "es",
|
||||
Synced: true,
|
||||
Line: []responses.Line{
|
||||
{
|
||||
Start: &mainStartA,
|
||||
Value: "Hola",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
DisplayArtist: "Rick Astley",
|
||||
DisplayTitle: "Never Gonna Give You Up",
|
||||
Kind: "pronunciation",
|
||||
Lang: "ja-latn",
|
||||
Synced: true,
|
||||
Line: []responses.Line{
|
||||
{
|
||||
Start: &mainStartB,
|
||||
Value: "konni",
|
||||
},
|
||||
},
|
||||
TokenLine: []responses.TokenLine{
|
||||
{
|
||||
Index: 0,
|
||||
Start: &mainStartB,
|
||||
End: &tokenEndB,
|
||||
Value: "konni",
|
||||
Token: []responses.LyricToken{
|
||||
{
|
||||
Start: &tokenStartA,
|
||||
End: &tokenEndA,
|
||||
Value: "ko",
|
||||
},
|
||||
{
|
||||
Start: &tokenStartB,
|
||||
End: &tokenEndB,
|
||||
Value: "nni",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
})
|
||||
|
||||
It("should return tokenized lines for songLyrics v2 clients", func() {
|
||||
r := newGetRequest("id=1")
|
||||
|
||||
lineStart := int64(1000)
|
||||
lineEnd := int64(3000)
|
||||
tokenStartA := int64(1000)
|
||||
tokenEndA := int64(1400)
|
||||
tokenStartB := int64(2000)
|
||||
tokenEndB := int64(2500)
|
||||
lyricsJson, err := json.Marshal(model.LyricList{
|
||||
{
|
||||
Lang: "eng",
|
||||
Synced: true,
|
||||
Line: []model.Line{
|
||||
{
|
||||
Start: &lineStart,
|
||||
End: &lineEnd,
|
||||
Value: "Hello echo",
|
||||
Token: []model.Token{
|
||||
{
|
||||
Start: &tokenStartA,
|
||||
End: &tokenEndA,
|
||||
Value: "Hello",
|
||||
},
|
||||
{
|
||||
Start: &tokenStartB,
|
||||
End: &tokenEndB,
|
||||
Value: "echo",
|
||||
Role: "x-bg",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
|
||||
mockRepo.SetData(model.MediaFiles{
|
||||
{
|
||||
ID: "1",
|
||||
Artist: "Rick Astley",
|
||||
Title: "Never Gonna Give You Up",
|
||||
Lyrics: string(lyricsJson),
|
||||
},
|
||||
})
|
||||
|
||||
response, err := router.GetLyricsBySongId(r)
|
||||
Expect(err).ToNot(HaveOccurred())
|
||||
compareResponses(response.LyricsList, responses.LyricsList{
|
||||
StructuredLyrics: responses.StructuredLyrics{
|
||||
{
|
||||
DisplayArtist: "Rick Astley",
|
||||
DisplayTitle: "Never Gonna Give You Up",
|
||||
Lang: "eng",
|
||||
Synced: true,
|
||||
Line: []responses.Line{
|
||||
{
|
||||
Start: &lineStart,
|
||||
Value: "Hello echo",
|
||||
},
|
||||
},
|
||||
TokenLine: []responses.TokenLine{
|
||||
{
|
||||
Index: 0,
|
||||
Start: &lineStart,
|
||||
End: &lineEnd,
|
||||
Value: "Hello echo",
|
||||
Token: []responses.LyricToken{
|
||||
{
|
||||
Start: &tokenStartA,
|
||||
End: &tokenEndA,
|
||||
Value: "Hello",
|
||||
},
|
||||
{
|
||||
Start: &tokenStartB,
|
||||
End: &tokenEndB,
|
||||
Value: "echo",
|
||||
Role: "x-bg",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
|
||||
@ -11,7 +11,7 @@ func (api *Router) GetOpenSubsonicExtensions(_ *http.Request) (*responses.Subson
|
||||
response.OpenSubsonicExtensions = &responses.OpenSubsonicExtensions{
|
||||
{Name: "transcodeOffset", Versions: []int32{1}},
|
||||
{Name: "formPost", Versions: []int32{1}},
|
||||
{Name: "songLyrics", Versions: []int32{1}},
|
||||
{Name: "songLyrics", Versions: []int32{1, 2}},
|
||||
{Name: "indexBasedQueue", Versions: []int32{1}},
|
||||
{Name: "transcoding", Versions: []int32{1}},
|
||||
}
|
||||
|
||||
@ -38,7 +38,7 @@ var _ = Describe("GetOpenSubsonicExtensions", func() {
|
||||
HaveLen(5),
|
||||
ContainElement(responses.OpenSubsonicExtension{Name: "transcodeOffset", Versions: []int32{1}}),
|
||||
ContainElement(responses.OpenSubsonicExtension{Name: "formPost", Versions: []int32{1}}),
|
||||
ContainElement(responses.OpenSubsonicExtension{Name: "songLyrics", Versions: []int32{1}}),
|
||||
ContainElement(responses.OpenSubsonicExtension{Name: "songLyrics", Versions: []int32{1, 2}}),
|
||||
ContainElement(responses.OpenSubsonicExtension{Name: "indexBasedQueue", Versions: []int32{1}}),
|
||||
ContainElement(responses.OpenSubsonicExtension{Name: "transcoding", Versions: []int32{1}}),
|
||||
))
|
||||
|
||||
@ -537,13 +537,30 @@ type Line struct {
|
||||
Value string `xml:",chardata" json:"value"`
|
||||
}
|
||||
|
||||
type LyricToken struct {
|
||||
Start *int64 `xml:"start,attr,omitempty" json:"start,omitempty"`
|
||||
End *int64 `xml:"end,attr,omitempty" json:"end,omitempty"`
|
||||
Value string `xml:"value,attr" json:"value"`
|
||||
Role string `xml:"role,attr,omitempty" json:"role,omitempty"`
|
||||
}
|
||||
|
||||
type TokenLine struct {
|
||||
Index int32 `xml:"index,attr" json:"index"`
|
||||
Start *int64 `xml:"start,attr,omitempty" json:"start,omitempty"`
|
||||
End *int64 `xml:"end,attr,omitempty" json:"end,omitempty"`
|
||||
Value string `xml:"value,attr,omitempty" json:"value,omitempty"`
|
||||
Token []LyricToken `xml:"token,omitempty" json:"token,omitempty"`
|
||||
}
|
||||
|
||||
type StructuredLyric struct {
|
||||
DisplayArtist string `xml:"displayArtist,attr,omitempty" json:"displayArtist,omitempty"`
|
||||
DisplayTitle string `xml:"displayTitle,attr,omitempty" json:"displayTitle,omitempty"`
|
||||
Lang string `xml:"lang,attr" json:"lang"`
|
||||
Line []Line `xml:"line" json:"line"`
|
||||
Offset *int64 `xml:"offset,attr,omitempty" json:"offset,omitempty"`
|
||||
Synced bool `xml:"synced,attr" json:"synced"`
|
||||
DisplayArtist string `xml:"displayArtist,attr,omitempty" json:"displayArtist,omitempty"`
|
||||
DisplayTitle string `xml:"displayTitle,attr,omitempty" json:"displayTitle,omitempty"`
|
||||
Kind string `xml:"kind,attr,omitempty" json:"kind,omitempty"`
|
||||
Lang string `xml:"lang,attr" json:"lang"`
|
||||
Line []Line `xml:"line" json:"line"`
|
||||
TokenLine []TokenLine `xml:"tokenLine,omitempty" json:"tokenLine,omitempty"`
|
||||
Offset *int64 `xml:"offset,attr,omitempty" json:"offset,omitempty"`
|
||||
Synced bool `xml:"synced,attr" json:"synced"`
|
||||
}
|
||||
|
||||
type StructuredLyrics []StructuredLyric
|
||||
|
||||
2
tests/fixtures/bom-test.ttml
vendored
Normal file
2
tests/fixtures/bom-test.ttml
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<tt xmlns="http://www.w3.org/ns/ttml"><body><div xml:lang="eng"><p begin="00:00:00.00">BOM test line</p></div></body></tt>
|
||||
BIN
tests/fixtures/bom-utf16-test.ttml
vendored
Normal file
BIN
tests/fixtures/bom-utf16-test.ttml
vendored
Normal file
Binary file not shown.
25
tests/fixtures/test-metadata.ttml
vendored
Normal file
25
tests/fixtures/test-metadata.ttml
vendored
Normal file
@ -0,0 +1,25 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<tt xmlns="http://www.w3.org/ns/ttml" xmlns:itunes="http://music.apple.com/lyric-ttml-internal">
|
||||
<head>
|
||||
<metadata>
|
||||
<iTunesMetadata xmlns="http://music.apple.com/lyric-ttml-internal">
|
||||
<translations>
|
||||
<translation xml:lang="es">
|
||||
<text for="L1">Hola</text>
|
||||
</translation>
|
||||
</translations>
|
||||
<transliterations>
|
||||
<transliteration xml:lang="ja-Latn">
|
||||
<text for="L2"><span begin="00:02.000" end="00:02.300" xmlns="http://www.w3.org/ns/ttml">ko</span><span begin="00:02.300" end="00:02.600" xmlns="http://www.w3.org/ns/ttml">nni</span></text>
|
||||
</transliteration>
|
||||
</transliterations>
|
||||
</iTunesMetadata>
|
||||
</metadata>
|
||||
</head>
|
||||
<body xml:lang="ja">
|
||||
<div>
|
||||
<p begin="00:01.000" end="00:01.500" itunes:key="L1">こんにちは</p>
|
||||
<p begin="00:02.000" end="00:02.700" itunes:key="L2">こんばんは</p>
|
||||
</div>
|
||||
</body>
|
||||
</tt>
|
||||
12
tests/fixtures/test.ttml
vendored
Normal file
12
tests/fixtures/test.ttml
vendored
Normal file
@ -0,0 +1,12 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<tt xmlns="http://www.w3.org/ns/ttml" xmlns:ttp="http://www.w3.org/ns/ttml#parameter" ttp:frameRate="30" ttp:subFrameRate="2" ttp:tickRate="10">
|
||||
<body>
|
||||
<div xml:lang="eng">
|
||||
<p begin="00:00:18.80">We're no strangers to love</p>
|
||||
<p begin="00:00:22:24">You know the rules and so do I</p>
|
||||
</div>
|
||||
<div xml:lang="por">
|
||||
<p begin="188t">Nao somos estranhos ao amor</p>
|
||||
</div>
|
||||
</body>
|
||||
</tt>
|
||||
@ -9,6 +9,7 @@ export const PLAYER_SET_VOLUME = 'PLAYER_SET_VOLUME'
|
||||
export const PLAYER_SET_MODE = 'PLAYER_SET_MODE'
|
||||
export const TRANSCODING_SET_PROFILE = 'TRANSCODING_SET_PROFILE'
|
||||
export const PLAYER_REFRESH_QUEUE = 'PLAYER_REFRESH_QUEUE'
|
||||
export const PLAYER_UPDATE_LYRIC = 'PLAYER_UPDATE_LYRIC'
|
||||
|
||||
export const setTrack = (data) => ({
|
||||
type: PLAYER_SET_TRACK,
|
||||
@ -114,3 +115,8 @@ export const refreshQueue = (resolvedUrls) => ({
|
||||
type: PLAYER_REFRESH_QUEUE,
|
||||
data: resolvedUrls,
|
||||
})
|
||||
|
||||
export const updateQueueLyric = (trackId, lyric) => ({
|
||||
type: PLAYER_UPDATE_LYRIC,
|
||||
data: { trackId, lyric },
|
||||
})
|
||||
|
||||
1228
ui/src/audioplayer/KaraokeLyricsOverlay.jsx
Normal file
1228
ui/src/audioplayer/KaraokeLyricsOverlay.jsx
Normal file
File diff suppressed because it is too large
Load Diff
@ -22,6 +22,7 @@ import {
|
||||
refreshQueue,
|
||||
setPlayMode,
|
||||
setTranscodingProfile,
|
||||
updateQueueLyric,
|
||||
setVolume,
|
||||
syncQueue,
|
||||
} from '../actions'
|
||||
@ -33,6 +34,25 @@ import { keyMap } from '../hotkeys'
|
||||
import keyHandlers from './keyHandlers'
|
||||
import { calculateGain } from '../utils/calculateReplayGain'
|
||||
import { detectBrowserProfile, decisionService } from '../transcode'
|
||||
import {
|
||||
getPreferredLyricLanguage,
|
||||
hasStructuredLyricContent,
|
||||
selectLyricLayers,
|
||||
structuredLyricToLrc,
|
||||
} from './lyrics'
|
||||
import KaraokeLyricsOverlay from './KaraokeLyricsOverlay'
|
||||
|
||||
const emptyLyricLayers = {
|
||||
main: null,
|
||||
translation: null,
|
||||
pronunciation: null,
|
||||
}
|
||||
|
||||
const normalizeLyricLayers = (layers) => ({
|
||||
main: layers?.main || null,
|
||||
translation: layers?.translation || null,
|
||||
pronunciation: layers?.pronunciation || null,
|
||||
})
|
||||
|
||||
const Player = () => {
|
||||
const theme = useCurrentTheme()
|
||||
@ -120,6 +140,72 @@ const Player = () => {
|
||||
const gainInfo = useSelector((state) => state.replayGain)
|
||||
const [context, setContext] = useState(null)
|
||||
const [gainNode, setGainNode] = useState(null)
|
||||
const lyricCacheRef = useRef(new Map())
|
||||
const lyricRequestIdRef = useRef(0)
|
||||
const playerRef = useRef(null)
|
||||
const [karaokeVisible, setKaraokeVisible] = useState(false)
|
||||
const [selectedLyricLayers, setSelectedLyricLayers] =
|
||||
useState(emptyLyricLayers)
|
||||
const [showTranslation, setShowTranslation] = useState(false)
|
||||
const [showPronunciation, setShowPronunciation] = useState(false)
|
||||
const currentTrackId = playerState.current?.trackId
|
||||
const currentTrackIsRadio = playerState.current?.isRadio
|
||||
const selectedStructuredLyric = selectedLyricLayers.main
|
||||
const hasKaraokeLyric = hasStructuredLyricContent(selectedStructuredLyric)
|
||||
const hasTranslationLyric = hasStructuredLyricContent(
|
||||
selectedLyricLayers.translation,
|
||||
)
|
||||
const hasPronunciationLyric = hasStructuredLyricContent(
|
||||
selectedLyricLayers.pronunciation,
|
||||
)
|
||||
|
||||
const applyLyricToRuntimePlayer = useCallback((trackId, lyric) => {
|
||||
if (!trackId) {
|
||||
return
|
||||
}
|
||||
|
||||
const player = playerRef.current
|
||||
if (!player || typeof player.setState !== 'function') {
|
||||
return
|
||||
}
|
||||
|
||||
player.setState((prevState) => {
|
||||
const prevLists = Array.isArray(prevState.audioLists)
|
||||
? prevState.audioLists
|
||||
: []
|
||||
let changed = false
|
||||
const audioLists = prevLists.map((item) => {
|
||||
if (item.trackId !== trackId) {
|
||||
return item
|
||||
}
|
||||
if (item.lyric === lyric) {
|
||||
return item
|
||||
}
|
||||
changed = true
|
||||
return {
|
||||
...item,
|
||||
lyric,
|
||||
}
|
||||
})
|
||||
|
||||
const currentItem = audioLists.find(
|
||||
(item) => item.musicSrc === prevState.musicSrc,
|
||||
)
|
||||
const currentLyric =
|
||||
typeof currentItem?.lyric === 'string'
|
||||
? currentItem.lyric
|
||||
: prevState.lyric
|
||||
|
||||
if (!changed && currentLyric === prevState.lyric) {
|
||||
return null
|
||||
}
|
||||
|
||||
return {
|
||||
audioLists,
|
||||
lyric: currentLyric,
|
||||
}
|
||||
})
|
||||
}, [])
|
||||
|
||||
useEffect(() => {
|
||||
if (
|
||||
@ -166,6 +252,107 @@ const Player = () => {
|
||||
return () => window.removeEventListener('beforeunload', handleBeforeUnload)
|
||||
}, [playerState, audioInstance])
|
||||
|
||||
useEffect(() => {
|
||||
if (!currentTrackId || currentTrackIsRadio) {
|
||||
setSelectedLyricLayers(emptyLyricLayers)
|
||||
setShowTranslation(false)
|
||||
setShowPronunciation(false)
|
||||
setKaraokeVisible(false)
|
||||
return
|
||||
}
|
||||
|
||||
const cached = lyricCacheRef.current.get(currentTrackId)
|
||||
let layers = emptyLyricLayers
|
||||
if (cached && typeof cached !== 'string') {
|
||||
if (cached.layers) {
|
||||
layers = normalizeLyricLayers(cached.layers)
|
||||
} else if (cached.structuredLyric) {
|
||||
layers = normalizeLyricLayers({
|
||||
main: cached.structuredLyric,
|
||||
})
|
||||
}
|
||||
}
|
||||
setSelectedLyricLayers(layers)
|
||||
setShowTranslation(false)
|
||||
setShowPronunciation(hasStructuredLyricContent(layers.pronunciation))
|
||||
}, [currentTrackId, currentTrackIsRadio])
|
||||
|
||||
useEffect(() => {
|
||||
lyricRequestIdRef.current += 1
|
||||
const requestId = lyricRequestIdRef.current
|
||||
|
||||
if (!currentTrackId || currentTrackIsRadio) {
|
||||
return
|
||||
}
|
||||
|
||||
const cached = lyricCacheRef.current.get(currentTrackId)
|
||||
if (cached !== undefined) {
|
||||
const cachedLyric =
|
||||
typeof cached === 'string' ? cached : cached?.lrc || ''
|
||||
const cachedLayers =
|
||||
typeof cached === 'string'
|
||||
? emptyLyricLayers
|
||||
: cached?.layers
|
||||
? normalizeLyricLayers(cached.layers)
|
||||
: normalizeLyricLayers({ main: cached?.structuredLyric })
|
||||
|
||||
setSelectedLyricLayers(cachedLayers)
|
||||
setShowTranslation(false)
|
||||
setShowPronunciation(
|
||||
hasStructuredLyricContent(cachedLayers.pronunciation),
|
||||
)
|
||||
if (cachedLyric) {
|
||||
dispatch(updateQueueLyric(currentTrackId, cachedLyric))
|
||||
applyLyricToRuntimePlayer(currentTrackId, cachedLyric)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
subsonic
|
||||
.getLyricsBySongId(currentTrackId)
|
||||
.then((resp) => {
|
||||
if (lyricRequestIdRef.current !== requestId) {
|
||||
return
|
||||
}
|
||||
|
||||
const structuredLyrics =
|
||||
resp?.json?.['subsonic-response']?.lyricsList?.structuredLyrics || []
|
||||
const layers = selectLyricLayers(
|
||||
structuredLyrics,
|
||||
getPreferredLyricLanguage(),
|
||||
)
|
||||
const lyric = layers.main ? structuredLyricToLrc(layers.main) : ''
|
||||
lyricCacheRef.current.set(currentTrackId, {
|
||||
lrc: lyric,
|
||||
layers,
|
||||
})
|
||||
setSelectedLyricLayers(layers)
|
||||
setShowTranslation(false)
|
||||
setShowPronunciation(hasStructuredLyricContent(layers.pronunciation))
|
||||
|
||||
if (lyric !== '') {
|
||||
dispatch(updateQueueLyric(currentTrackId, lyric))
|
||||
applyLyricToRuntimePlayer(currentTrackId, lyric)
|
||||
}
|
||||
})
|
||||
.catch(() => {
|
||||
if (lyricRequestIdRef.current !== requestId) {
|
||||
return
|
||||
}
|
||||
setSelectedLyricLayers(emptyLyricLayers)
|
||||
setShowTranslation(false)
|
||||
setShowPronunciation(false)
|
||||
// Do not cache network/request failures as empty lyrics, so we can retry.
|
||||
lyricCacheRef.current.delete(currentTrackId)
|
||||
})
|
||||
}, [dispatch, currentTrackId, currentTrackIsRadio, applyLyricToRuntimePlayer])
|
||||
|
||||
useEffect(() => {
|
||||
if (!hasKaraokeLyric && karaokeVisible) {
|
||||
setKaraokeVisible(false)
|
||||
}
|
||||
}, [hasKaraokeLyric, karaokeVisible])
|
||||
|
||||
const defaultOptions = useMemo(
|
||||
() => ({
|
||||
theme: playerTheme,
|
||||
@ -177,7 +364,7 @@ const Player = () => {
|
||||
clearPriorAudioLists: false,
|
||||
showDestroy: true,
|
||||
showDownload: false,
|
||||
showLyric: true,
|
||||
showLyric: false,
|
||||
showReload: false,
|
||||
toggleMode: !isDesktop,
|
||||
glassBg: false,
|
||||
@ -214,12 +401,24 @@ const Player = () => {
|
||||
(playerState.clear || playerState.playIndex === 0),
|
||||
clearPriorAudioLists: playerState.clear,
|
||||
extendsContent: (
|
||||
<PlayerToolbar id={current.trackId} isRadio={current.isRadio} />
|
||||
<PlayerToolbar
|
||||
id={current.trackId}
|
||||
isRadio={current.isRadio}
|
||||
onToggleLyrics={() => setKaraokeVisible((visible) => !visible)}
|
||||
lyricsActive={karaokeVisible}
|
||||
lyricsDisabled={!hasKaraokeLyric}
|
||||
/>
|
||||
),
|
||||
defaultVolume: isMobilePlayer ? 1 : playerState.volume,
|
||||
showMediaSession: !current.isRadio,
|
||||
}
|
||||
}, [playerState, defaultOptions, isMobilePlayer])
|
||||
}, [
|
||||
playerState,
|
||||
defaultOptions,
|
||||
isMobilePlayer,
|
||||
karaokeVisible,
|
||||
hasKaraokeLyric,
|
||||
])
|
||||
|
||||
const onAudioListsChange = useCallback(
|
||||
(_, audioLists, audioInfo) => dispatch(syncQueue(audioInfo, audioLists)),
|
||||
@ -391,6 +590,7 @@ const Player = () => {
|
||||
return (
|
||||
<ThemeProvider theme={createMuiTheme(theme)}>
|
||||
<ReactJkMusicPlayer
|
||||
ref={playerRef}
|
||||
{...options}
|
||||
className={classes.player}
|
||||
onAudioListsChange={onAudioListsChange}
|
||||
@ -406,6 +606,28 @@ const Player = () => {
|
||||
onBeforeDestroy={onBeforeDestroy}
|
||||
getAudioInstance={setAudioInstance}
|
||||
/>
|
||||
<KaraokeLyricsOverlay
|
||||
visible={karaokeVisible}
|
||||
mainLyric={selectedLyricLayers.main}
|
||||
translationLyric={selectedLyricLayers.translation}
|
||||
pronunciationLyric={selectedLyricLayers.pronunciation}
|
||||
showTranslation={showTranslation}
|
||||
showPronunciation={showPronunciation}
|
||||
translationEnabled={hasTranslationLyric}
|
||||
pronunciationEnabled={hasPronunciationLyric}
|
||||
onToggleTranslation={() =>
|
||||
setShowTranslation((previous) =>
|
||||
hasTranslationLyric ? !previous : false,
|
||||
)
|
||||
}
|
||||
onTogglePronunciation={() =>
|
||||
setShowPronunciation((previous) =>
|
||||
hasPronunciationLyric ? !previous : false,
|
||||
)
|
||||
}
|
||||
audioInstance={audioInstance}
|
||||
onClose={() => setKaraokeVisible(false)}
|
||||
/>
|
||||
<GlobalHotKeys handlers={handlers} keyMap={keyMap} allowChanges />
|
||||
</ThemeProvider>
|
||||
)
|
||||
|
||||
@ -4,7 +4,9 @@ import { useGetOne } from 'react-admin'
|
||||
import { GlobalHotKeys } from 'react-hotkeys'
|
||||
import IconButton from '@material-ui/core/IconButton'
|
||||
import { useMediaQuery } from '@material-ui/core'
|
||||
import Tooltip from '@material-ui/core/Tooltip'
|
||||
import { RiSaveLine } from 'react-icons/ri'
|
||||
import { RiFileMusicLine } from 'react-icons/ri'
|
||||
import { LoveButton, useToggleLove } from '../common'
|
||||
import { openSaveQueueDialog } from '../actions'
|
||||
import { keyMap } from '../hotkeys'
|
||||
@ -55,7 +57,13 @@ const useStyles = makeStyles((theme) => ({
|
||||
},
|
||||
}))
|
||||
|
||||
const PlayerToolbar = ({ id, isRadio }) => {
|
||||
const PlayerToolbar = ({
|
||||
id,
|
||||
isRadio,
|
||||
onToggleLyrics,
|
||||
lyricsActive = false,
|
||||
lyricsDisabled = false,
|
||||
}) => {
|
||||
const dispatch = useDispatch()
|
||||
const { data, loading } = useGetOne('song', id, { enabled: !!id && !isRadio })
|
||||
const [toggleLove, toggling] = useToggleLove('song', data)
|
||||
@ -99,6 +107,25 @@ const PlayerToolbar = ({ id, isRadio }) => {
|
||||
/>
|
||||
)
|
||||
|
||||
const toggleLyricsButton = (
|
||||
<Tooltip title="Toggle synchronized lyrics">
|
||||
<span>
|
||||
<IconButton
|
||||
size={isDesktop ? 'small' : undefined}
|
||||
onClick={onToggleLyrics}
|
||||
disabled={!onToggleLyrics || lyricsDisabled}
|
||||
data-testid="toggle-lyrics-button"
|
||||
className={buttonClass}
|
||||
color={lyricsActive ? 'primary' : 'default'}
|
||||
>
|
||||
<RiFileMusicLine
|
||||
className={!isDesktop ? classes.mobileIcon : undefined}
|
||||
/>
|
||||
</IconButton>
|
||||
</span>
|
||||
</Tooltip>
|
||||
)
|
||||
|
||||
return (
|
||||
<>
|
||||
<GlobalHotKeys keyMap={keyMap} handlers={handlers} allowChanges />
|
||||
@ -106,11 +133,13 @@ const PlayerToolbar = ({ id, isRadio }) => {
|
||||
<li className={`${listItemClass} item`}>
|
||||
{saveQueueButton}
|
||||
{loveButton}
|
||||
{toggleLyricsButton}
|
||||
</li>
|
||||
) : (
|
||||
<>
|
||||
<li className={`${listItemClass} item`}>{saveQueueButton}</li>
|
||||
<li className={`${listItemClass} item`}>{loveButton}</li>
|
||||
<li className={`${listItemClass} item`}>{toggleLyricsButton}</li>
|
||||
</>
|
||||
)}
|
||||
</>
|
||||
|
||||
@ -71,6 +71,7 @@ describe('<PlayerToolbar />', () => {
|
||||
// Verify both buttons are rendered
|
||||
expect(screen.getByTestId('save-queue-button')).toBeInTheDocument()
|
||||
expect(screen.getByTestId('love-button')).toBeInTheDocument()
|
||||
expect(screen.getByTestId('toggle-lyrics-button')).toBeInTheDocument()
|
||||
|
||||
// Verify desktop classes are applied
|
||||
expect(listItems[0].className).toContain('toolbar')
|
||||
@ -102,6 +103,14 @@ describe('<PlayerToolbar />', () => {
|
||||
type: 'OPEN_SAVE_QUEUE_DIALOG',
|
||||
})
|
||||
})
|
||||
|
||||
it('triggers lyric toggle callback when lyrics button is clicked', () => {
|
||||
const onToggleLyrics = vi.fn()
|
||||
render(<PlayerToolbar id="song-1" onToggleLyrics={onToggleLyrics} />)
|
||||
|
||||
fireEvent.click(screen.getByTestId('toggle-lyrics-button'))
|
||||
expect(onToggleLyrics).toHaveBeenCalledTimes(1)
|
||||
})
|
||||
})
|
||||
|
||||
describe('Mobile layout', () => {
|
||||
@ -114,11 +123,12 @@ describe('<PlayerToolbar />', () => {
|
||||
|
||||
// Each button should be in its own list item
|
||||
const listItems = screen.getAllByRole('listitem')
|
||||
expect(listItems).toHaveLength(2)
|
||||
expect(listItems).toHaveLength(3)
|
||||
|
||||
// Verify both buttons are rendered
|
||||
expect(screen.getByTestId('save-queue-button')).toBeInTheDocument()
|
||||
expect(screen.getByTestId('love-button')).toBeInTheDocument()
|
||||
expect(screen.getByTestId('toggle-lyrics-button')).toBeInTheDocument()
|
||||
|
||||
// Verify mobile classes are applied
|
||||
expect(listItems[0].className).toContain('mobileListItem')
|
||||
@ -140,6 +150,13 @@ describe('<PlayerToolbar />', () => {
|
||||
const loveButton = screen.getByTestId('love-button')
|
||||
expect(loveButton).toBeDisabled()
|
||||
})
|
||||
|
||||
it('disables lyrics button when lyrics are unavailable', () => {
|
||||
render(<PlayerToolbar id="song-1" lyricsDisabled={true} />)
|
||||
|
||||
const lyricsButton = screen.getByTestId('toggle-lyrics-button')
|
||||
expect(lyricsButton).toBeDisabled()
|
||||
})
|
||||
})
|
||||
|
||||
describe('Common behavior', () => {
|
||||
|
||||
617
ui/src/audioplayer/lyrics.js
Normal file
617
ui/src/audioplayer/lyrics.js
Normal file
@ -0,0 +1,617 @@
|
||||
const normalizeLanguageTag = (language) =>
|
||||
(language || '').toLowerCase().replace('_', '-')
|
||||
|
||||
const KARAOKE_SWITCH_EPSILON_MS = 18
|
||||
const LYRIC_KIND_MAIN = 'main'
|
||||
const LYRIC_KIND_TRANSLATION = 'translation'
|
||||
const LYRIC_KIND_PRONUNCIATION = 'pronunciation'
|
||||
|
||||
const padTime = (value) => {
|
||||
const str = value.toString()
|
||||
return str.length === 1 ? `0${str}` : str
|
||||
}
|
||||
|
||||
const toTime = (value) => {
|
||||
const numeric = Number(value)
|
||||
return Number.isFinite(numeric) ? numeric : null
|
||||
}
|
||||
|
||||
const compareNullableTime = (a, b) => {
|
||||
if (a == null && b == null) {
|
||||
return 0
|
||||
}
|
||||
if (a == null) {
|
||||
return 1
|
||||
}
|
||||
if (b == null) {
|
||||
return -1
|
||||
}
|
||||
return a - b
|
||||
}
|
||||
|
||||
const sortTokensByStart = (tokens) =>
|
||||
tokens
|
||||
.map((token, order) => ({ ...token, order }))
|
||||
.sort((a, b) => {
|
||||
const byStart = compareNullableTime(a.start, b.start)
|
||||
if (byStart !== 0) {
|
||||
return byStart
|
||||
}
|
||||
const byEnd = compareNullableTime(a.end, b.end)
|
||||
if (byEnd !== 0) {
|
||||
return byEnd
|
||||
}
|
||||
return a.order - b.order
|
||||
})
|
||||
.map(({ order, ...token }) => token)
|
||||
|
||||
const languageMatch = (candidate, preferred) => {
|
||||
if (!candidate || !preferred) {
|
||||
return false
|
||||
}
|
||||
return (
|
||||
candidate === preferred ||
|
||||
candidate.startsWith(`${preferred}-`) ||
|
||||
preferred.startsWith(`${candidate}-`)
|
||||
)
|
||||
}
|
||||
|
||||
const hasTimedLines = (lyric) =>
|
||||
lyric &&
|
||||
lyric.synced &&
|
||||
Array.isArray(lyric.line) &&
|
||||
lyric.line.some((line) => Number.isFinite(Number(line.start)))
|
||||
|
||||
const normalizeToken = (token) => {
|
||||
if (!token) {
|
||||
return null
|
||||
}
|
||||
const value = typeof token.value === 'string' ? token.value : ''
|
||||
if (!value.trim()) {
|
||||
return null
|
||||
}
|
||||
return {
|
||||
start: toTime(token.start),
|
||||
end: toTime(token.end),
|
||||
value,
|
||||
role: typeof token.role === 'string' ? token.role : '',
|
||||
}
|
||||
}
|
||||
|
||||
const normalizeTokenLine = (tokenLine, fallbackIndex) => {
|
||||
const index = Number.isFinite(Number(tokenLine?.index))
|
||||
? Number(tokenLine.index)
|
||||
: fallbackIndex
|
||||
const tokens = sortTokensByStart(
|
||||
Array.isArray(tokenLine?.token)
|
||||
? tokenLine.token.map(normalizeToken).filter(Boolean)
|
||||
: [],
|
||||
)
|
||||
|
||||
return {
|
||||
index,
|
||||
start: toTime(tokenLine?.start),
|
||||
end: toTime(tokenLine?.end),
|
||||
value: typeof tokenLine?.value === 'string' ? tokenLine.value : '',
|
||||
tokens,
|
||||
}
|
||||
}
|
||||
|
||||
const normalizeLyricKind = (kind) => {
|
||||
const normalized = (kind || '').toLowerCase().trim()
|
||||
switch (normalized) {
|
||||
case LYRIC_KIND_TRANSLATION:
|
||||
return LYRIC_KIND_TRANSLATION
|
||||
case LYRIC_KIND_PRONUNCIATION:
|
||||
return LYRIC_KIND_PRONUNCIATION
|
||||
default:
|
||||
return LYRIC_KIND_MAIN
|
||||
}
|
||||
}
|
||||
|
||||
const pickLyricByLanguage = (lyrics, preferredLanguage) => {
|
||||
if (!Array.isArray(lyrics) || lyrics.length === 0) {
|
||||
return null
|
||||
}
|
||||
|
||||
const preferred = normalizeLanguageTag(preferredLanguage)
|
||||
const preferredBase = preferred.split('-')[0]
|
||||
|
||||
return (
|
||||
lyrics.find((lyric) =>
|
||||
languageMatch(normalizeLanguageTag(lyric.lang), preferred),
|
||||
) ||
|
||||
lyrics.find((lyric) =>
|
||||
languageMatch(normalizeLanguageTag(lyric.lang), preferredBase),
|
||||
) ||
|
||||
lyrics.find((lyric) =>
|
||||
languageMatch(normalizeLanguageTag(lyric.lang), 'en'),
|
||||
) ||
|
||||
lyrics[0]
|
||||
)
|
||||
}
|
||||
|
||||
const lineTimeWindow = (lines, index) => {
|
||||
const line = lines[index]
|
||||
if (!line) {
|
||||
return { start: null, end: null }
|
||||
}
|
||||
|
||||
const start = toTime(line.start)
|
||||
const end = toTime(line.end) ?? toTime(lines[index + 1]?.start)
|
||||
return { start, end }
|
||||
}
|
||||
|
||||
const buildSyntheticWordTokens = (line, token) => {
|
||||
const text = typeof line?.value === 'string' ? line.value : ''
|
||||
if (!text.trim()) {
|
||||
return null
|
||||
}
|
||||
|
||||
const chunks = text.match(/\S+\s*/g) || []
|
||||
if (chunks.length < 2) {
|
||||
return null
|
||||
}
|
||||
|
||||
const normalizedLine = text.replace(/\s+/g, ' ').trim().toLowerCase()
|
||||
const normalizedTokenValue = (token?.value || '')
|
||||
.replace(/\s+/g, ' ')
|
||||
.trim()
|
||||
.toLowerCase()
|
||||
if (!normalizedTokenValue || !normalizedLine) {
|
||||
return null
|
||||
}
|
||||
|
||||
const compressedLine = normalizedLine.replace(/\s+/g, '')
|
||||
const compressedToken = normalizedTokenValue.replace(/\s+/g, '')
|
||||
const tokenLooksLikeWholeLine =
|
||||
compressedToken === compressedLine ||
|
||||
compressedToken.length >= Math.floor(compressedLine.length * 0.8)
|
||||
if (!tokenLooksLikeWholeLine) {
|
||||
return null
|
||||
}
|
||||
|
||||
const tokenStart = toTime(token?.start)
|
||||
const tokenEnd = toTime(token?.end)
|
||||
const lineStart = toTime(line?.start)
|
||||
const lineEnd = toTime(line?.end)
|
||||
|
||||
const baseStart = tokenStart ?? lineStart
|
||||
const baseEnd = tokenEnd ?? lineEnd
|
||||
if (
|
||||
baseStart == null ||
|
||||
baseEnd == null ||
|
||||
!Number.isFinite(baseStart) ||
|
||||
!Number.isFinite(baseEnd) ||
|
||||
baseEnd <= baseStart
|
||||
) {
|
||||
return null
|
||||
}
|
||||
|
||||
const duration = baseEnd - baseStart
|
||||
return chunks.map((chunk, idx) => ({
|
||||
start: baseStart + (duration * idx) / chunks.length,
|
||||
end: baseStart + (duration * (idx + 1)) / chunks.length,
|
||||
value: chunk,
|
||||
role: typeof token?.role === 'string' ? token.role : '',
|
||||
}))
|
||||
}
|
||||
|
||||
export const hasTokenTiming = (structuredLyric) =>
|
||||
Boolean(
|
||||
structuredLyric &&
|
||||
Array.isArray(structuredLyric.tokenLine) &&
|
||||
structuredLyric.tokenLine.some(
|
||||
(tokenLine) =>
|
||||
Array.isArray(tokenLine?.token) &&
|
||||
tokenLine.token.some((token) => Number.isFinite(Number(token?.start))),
|
||||
),
|
||||
)
|
||||
|
||||
export const hasStructuredLyricContent = (structuredLyric) =>
|
||||
Boolean(
|
||||
structuredLyric &&
|
||||
((Array.isArray(structuredLyric.line) &&
|
||||
structuredLyric.line.some(
|
||||
(line) => typeof line?.value === 'string' && line.value.trim() !== '',
|
||||
)) ||
|
||||
hasTokenTiming(structuredLyric)),
|
||||
)
|
||||
|
||||
export const getPreferredLyricLanguage = () => {
|
||||
if (typeof window !== 'undefined' && window.localStorage) {
|
||||
const stored = window.localStorage.getItem('locale')
|
||||
if (stored) {
|
||||
return stored
|
||||
}
|
||||
}
|
||||
if (typeof navigator !== 'undefined' && navigator.language) {
|
||||
return navigator.language
|
||||
}
|
||||
return 'en'
|
||||
}
|
||||
|
||||
export const selectLyricLayers = (structuredLyrics, preferredLanguage) => {
|
||||
if (!Array.isArray(structuredLyrics)) {
|
||||
return {
|
||||
main: null,
|
||||
translation: null,
|
||||
pronunciation: null,
|
||||
}
|
||||
}
|
||||
|
||||
const synced = structuredLyrics.filter(hasTimedLines)
|
||||
if (synced.length === 0) {
|
||||
return {
|
||||
main: null,
|
||||
translation: null,
|
||||
pronunciation: null,
|
||||
}
|
||||
}
|
||||
|
||||
const grouped = {
|
||||
[LYRIC_KIND_MAIN]: [],
|
||||
[LYRIC_KIND_TRANSLATION]: [],
|
||||
[LYRIC_KIND_PRONUNCIATION]: [],
|
||||
}
|
||||
|
||||
for (const lyric of synced) {
|
||||
grouped[normalizeLyricKind(lyric?.kind)].push(lyric)
|
||||
}
|
||||
|
||||
const mainCandidates = grouped[LYRIC_KIND_MAIN].length
|
||||
? grouped[LYRIC_KIND_MAIN]
|
||||
: synced
|
||||
|
||||
return {
|
||||
main: pickLyricByLanguage(mainCandidates, preferredLanguage),
|
||||
translation: pickLyricByLanguage(
|
||||
grouped[LYRIC_KIND_TRANSLATION],
|
||||
preferredLanguage,
|
||||
),
|
||||
pronunciation: pickLyricByLanguage(
|
||||
grouped[LYRIC_KIND_PRONUNCIATION],
|
||||
preferredLanguage,
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
export const pickStructuredLyric = (structuredLyrics, preferredLanguage) =>
|
||||
selectLyricLayers(structuredLyrics, preferredLanguage).main
|
||||
|
||||
export const structuredLyricToLrc = (structuredLyric) => {
|
||||
if (!structuredLyric || !Array.isArray(structuredLyric.line)) {
|
||||
return ''
|
||||
}
|
||||
|
||||
let lyricText = ''
|
||||
for (const line of structuredLyric.line) {
|
||||
const start = Number(line.start)
|
||||
if (!Number.isFinite(start) || start < 0) {
|
||||
continue
|
||||
}
|
||||
|
||||
let time = Math.floor(start / 10)
|
||||
const ms = time % 100
|
||||
time = Math.floor(time / 100)
|
||||
const sec = time % 60
|
||||
time = Math.floor(time / 60)
|
||||
const min = time % 60
|
||||
|
||||
lyricText += `[${padTime(min)}:${padTime(sec)}.${padTime(ms)}] ${line.value || ''}\n`
|
||||
}
|
||||
return lyricText
|
||||
}
|
||||
|
||||
export const structuredLyricsToLrc = (structuredLyrics, preferredLanguage) => {
|
||||
const selected = pickStructuredLyric(structuredLyrics, preferredLanguage)
|
||||
if (!selected) {
|
||||
return ''
|
||||
}
|
||||
return structuredLyricToLrc(selected)
|
||||
}
|
||||
|
||||
export const buildKaraokeLines = (structuredLyric) => {
|
||||
if (!structuredLyric) {
|
||||
return []
|
||||
}
|
||||
|
||||
const baseLines = Array.isArray(structuredLyric.line)
|
||||
? structuredLyric.line
|
||||
: []
|
||||
const rawTokenLines = Array.isArray(structuredLyric.tokenLine)
|
||||
? structuredLyric.tokenLine
|
||||
: []
|
||||
|
||||
const lines =
|
||||
rawTokenLines.length > 0
|
||||
? rawTokenLines.map((tokenLine, fallbackIndex) => {
|
||||
const normalized = normalizeTokenLine(tokenLine, fallbackIndex)
|
||||
const baseLine = baseLines[normalized.index] || {}
|
||||
const tokens = normalized.tokens
|
||||
const fallbackStart =
|
||||
tokens.find((token) => token.start != null)?.start ?? null
|
||||
const fallbackEnd =
|
||||
[...tokens].reverse().find((token) => token.end != null)?.end ??
|
||||
null
|
||||
const value =
|
||||
normalized.value ||
|
||||
(typeof baseLine.value === 'string' ? baseLine.value : '') ||
|
||||
tokens.map((token) => token.value).join('')
|
||||
|
||||
return {
|
||||
index: normalized.index,
|
||||
start: normalized.start ?? toTime(baseLine.start) ?? fallbackStart,
|
||||
end: normalized.end ?? toTime(baseLine.end) ?? fallbackEnd,
|
||||
value,
|
||||
tokens,
|
||||
}
|
||||
})
|
||||
: baseLines.map((line, index) => ({
|
||||
index,
|
||||
start: toTime(line.start),
|
||||
end: toTime(line.end),
|
||||
value: typeof line.value === 'string' ? line.value : '',
|
||||
tokens: [],
|
||||
}))
|
||||
|
||||
const normalized = lines
|
||||
.filter((line) => line.value || line.tokens.length > 0)
|
||||
.sort((a, b) => {
|
||||
if (a.start == null && b.start == null) {
|
||||
return a.index - b.index
|
||||
}
|
||||
if (a.start == null) {
|
||||
return 1
|
||||
}
|
||||
if (b.start == null) {
|
||||
return -1
|
||||
}
|
||||
if (a.start !== b.start) {
|
||||
return a.start - b.start
|
||||
}
|
||||
return a.index - b.index
|
||||
})
|
||||
.map((line) => {
|
||||
const nextLine = { ...line }
|
||||
if (nextLine.tokens.length === 1) {
|
||||
const syntheticTokens = buildSyntheticWordTokens(
|
||||
nextLine,
|
||||
nextLine.tokens[0],
|
||||
)
|
||||
if (syntheticTokens) {
|
||||
nextLine.tokens = syntheticTokens
|
||||
}
|
||||
}
|
||||
return nextLine
|
||||
})
|
||||
|
||||
for (let i = 0; i < normalized.length; i += 1) {
|
||||
if (normalized[i].end == null) {
|
||||
const nextStart = normalized[i + 1]?.start
|
||||
if (nextStart != null) {
|
||||
normalized[i].end = nextStart
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return normalized
|
||||
}
|
||||
|
||||
export const resolveKaraokeTokenWindow = (
|
||||
line,
|
||||
tokenIndex,
|
||||
lineEndFallback = null,
|
||||
) => {
|
||||
const tokens = Array.isArray(line?.tokens) ? line.tokens : []
|
||||
const token = tokens[tokenIndex]
|
||||
if (!token) {
|
||||
return { start: null, end: null }
|
||||
}
|
||||
|
||||
const prevToken = tokenIndex > 0 ? tokens[tokenIndex - 1] : null
|
||||
const nextToken =
|
||||
tokenIndex + 1 < tokens.length ? tokens[tokenIndex + 1] : null
|
||||
|
||||
const lineStart = toTime(line?.start)
|
||||
const lineEnd = toTime(line?.end) ?? toTime(lineEndFallback)
|
||||
const tokenCount = tokens.length
|
||||
const hasLineWindow =
|
||||
lineStart != null &&
|
||||
lineEnd != null &&
|
||||
Number.isFinite(lineStart) &&
|
||||
Number.isFinite(lineEnd) &&
|
||||
lineEnd > lineStart
|
||||
const estimatedStart =
|
||||
hasLineWindow && tokenCount > 0
|
||||
? lineStart + ((lineEnd - lineStart) * tokenIndex) / tokenCount
|
||||
: null
|
||||
const estimatedEnd =
|
||||
hasLineWindow && tokenCount > 0
|
||||
? lineStart + ((lineEnd - lineStart) * (tokenIndex + 1)) / tokenCount
|
||||
: null
|
||||
|
||||
let explicitStartCount = 0
|
||||
let explicitEndCount = 0
|
||||
const uniqueStarts = new Set()
|
||||
const uniqueEnds = new Set()
|
||||
|
||||
for (let i = 0; i < tokenCount; i += 1) {
|
||||
const explicitStart = toTime(tokens[i]?.start)
|
||||
if (explicitStart != null) {
|
||||
explicitStartCount += 1
|
||||
uniqueStarts.add(explicitStart)
|
||||
}
|
||||
|
||||
const explicitEnd = toTime(tokens[i]?.end)
|
||||
if (explicitEnd != null) {
|
||||
explicitEndCount += 1
|
||||
uniqueEnds.add(explicitEnd)
|
||||
}
|
||||
}
|
||||
|
||||
const collapsedStarts =
|
||||
explicitStartCount > 1 && uniqueStarts.size <= Math.max(1, tokenCount / 4)
|
||||
const collapsedEnds =
|
||||
explicitEndCount > 1 && uniqueEnds.size <= Math.max(1, tokenCount / 4)
|
||||
const shouldForceEstimated =
|
||||
hasLineWindow && tokenCount > 1 && (collapsedStarts || collapsedEnds)
|
||||
|
||||
if (shouldForceEstimated) {
|
||||
return {
|
||||
start: estimatedStart,
|
||||
end: estimatedEnd,
|
||||
}
|
||||
}
|
||||
const prevEnd = toTime(prevToken?.end) ?? toTime(prevToken?.start)
|
||||
|
||||
let start = toTime(token.start)
|
||||
if (start == null) {
|
||||
start = prevEnd ?? estimatedStart ?? lineStart
|
||||
}
|
||||
|
||||
let end = toTime(token.end)
|
||||
if (end == null) {
|
||||
const nextDirectStart = toTime(nextToken?.start)
|
||||
const nextEstimatedStart =
|
||||
hasLineWindow && tokenIndex + 1 < tokenCount
|
||||
? lineStart + ((lineEnd - lineStart) * (tokenIndex + 1)) / tokenCount
|
||||
: null
|
||||
end = nextDirectStart ?? nextEstimatedStart ?? estimatedEnd ?? lineEnd
|
||||
}
|
||||
|
||||
if (
|
||||
tokenCount === 1 &&
|
||||
hasLineWindow &&
|
||||
(start == null || end == null || end <= start + 1)
|
||||
) {
|
||||
start = lineStart
|
||||
end = lineEnd
|
||||
}
|
||||
|
||||
if (start != null && end != null && end < start) {
|
||||
end = start
|
||||
}
|
||||
|
||||
return { start, end }
|
||||
}
|
||||
|
||||
export const getActiveKaraokeState = (lines, currentTimeMs) => {
|
||||
if (!Array.isArray(lines) || lines.length === 0) {
|
||||
return { lineIndex: -1, tokenIndex: -1 }
|
||||
}
|
||||
|
||||
const current = Number.isFinite(Number(currentTimeMs))
|
||||
? Number(currentTimeMs)
|
||||
: 0
|
||||
let lineIndex = 0
|
||||
for (let i = 0; i < lines.length; i += 1) {
|
||||
const lineStart = toTime(lines[i]?.start)
|
||||
if (lineStart == null || lineStart <= current + KARAOKE_SWITCH_EPSILON_MS) {
|
||||
lineIndex = i
|
||||
continue
|
||||
}
|
||||
break
|
||||
}
|
||||
|
||||
for (let i = lineIndex; i >= 0; i -= 1) {
|
||||
const lineStart = toTime(lines[i]?.start)
|
||||
const lineEnd = toTime(lines[i]?.end) ?? toTime(lines[i + 1]?.start)
|
||||
if (lineStart != null && current + KARAOKE_SWITCH_EPSILON_MS < lineStart) {
|
||||
continue
|
||||
}
|
||||
if (lineEnd == null || current <= lineEnd + KARAOKE_SWITCH_EPSILON_MS) {
|
||||
lineIndex = i
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
const activeLine = lines[lineIndex] || null
|
||||
const tokens = Array.isArray(activeLine?.tokens) ? activeLine.tokens : []
|
||||
let tokenIndex = -1
|
||||
for (let i = 0; i < tokens.length; i += 1) {
|
||||
const { start: tokenStart, end: tokenEnd } = resolveKaraokeTokenWindow(
|
||||
activeLine,
|
||||
i,
|
||||
lines[lineIndex + 1]?.start,
|
||||
)
|
||||
if (
|
||||
tokenStart == null ||
|
||||
tokenStart <= current + KARAOKE_SWITCH_EPSILON_MS
|
||||
) {
|
||||
tokenIndex = i
|
||||
if (tokenEnd != null && current <= tokenEnd + KARAOKE_SWITCH_EPSILON_MS) {
|
||||
break
|
||||
}
|
||||
continue
|
||||
}
|
||||
break
|
||||
}
|
||||
|
||||
return { lineIndex, tokenIndex }
|
||||
}
|
||||
|
||||
export const findLayerLineIndexForMain = (mainLines, layerLines, mainIndex) => {
|
||||
if (
|
||||
!Array.isArray(mainLines) ||
|
||||
!Array.isArray(layerLines) ||
|
||||
mainLines.length === 0 ||
|
||||
layerLines.length === 0 ||
|
||||
mainIndex < 0 ||
|
||||
mainIndex >= mainLines.length
|
||||
) {
|
||||
return -1
|
||||
}
|
||||
|
||||
const { start: mainStart, end: mainEnd } = lineTimeWindow(
|
||||
mainLines,
|
||||
mainIndex,
|
||||
)
|
||||
|
||||
if (mainStart == null) {
|
||||
return -1
|
||||
}
|
||||
const mainWindowEnd = mainEnd ?? mainStart
|
||||
const mainWindowDuration = Math.max(0, mainWindowEnd - mainStart)
|
||||
const maxDelta = Math.max(550, Math.min(1400, mainWindowDuration + 420))
|
||||
|
||||
let bestIdx = -1
|
||||
let bestScore = Number.POSITIVE_INFINITY
|
||||
|
||||
for (let i = 0; i < layerLines.length; i += 1) {
|
||||
const { start, end } = lineTimeWindow(layerLines, i)
|
||||
|
||||
if (start != null && end != null) {
|
||||
const overlap = Math.min(end, mainEnd ?? end) - Math.max(start, mainStart)
|
||||
if (overlap >= 0) {
|
||||
const score = Math.abs(start - mainStart) + Math.abs(i - mainIndex) * 30
|
||||
if (score < bestScore) {
|
||||
bestScore = score
|
||||
bestIdx = i
|
||||
}
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
if (start != null) {
|
||||
if (Math.abs(start - mainStart) > maxDelta) {
|
||||
continue
|
||||
}
|
||||
const score = Math.abs(start - mainStart) + Math.abs(i - mainIndex) * 45
|
||||
if (score < bestScore) {
|
||||
bestScore = score
|
||||
bestIdx = i
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return bestIdx
|
||||
}
|
||||
|
||||
export const resolveLayerLineForMain = (mainLines, layerLines, mainIndex) => {
|
||||
const index = findLayerLineIndexForMain(mainLines, layerLines, mainIndex)
|
||||
return {
|
||||
index,
|
||||
line: index >= 0 ? layerLines[index] : null,
|
||||
}
|
||||
}
|
||||
416
ui/src/audioplayer/lyrics.test.js
Normal file
416
ui/src/audioplayer/lyrics.test.js
Normal file
@ -0,0 +1,416 @@
|
||||
import {
|
||||
buildKaraokeLines,
|
||||
findLayerLineIndexForMain,
|
||||
getPreferredLyricLanguage,
|
||||
getActiveKaraokeState,
|
||||
hasStructuredLyricContent,
|
||||
pickStructuredLyric,
|
||||
resolveKaraokeTokenWindow,
|
||||
resolveLayerLineForMain,
|
||||
selectLyricLayers,
|
||||
structuredLyricToLrc,
|
||||
structuredLyricsToLrc,
|
||||
} from './lyrics'
|
||||
|
||||
describe('lyrics helpers', () => {
|
||||
beforeEach(() => {
|
||||
localStorage.clear()
|
||||
})
|
||||
|
||||
it('prefers a lyric track that matches the locale', () => {
|
||||
const selected = pickStructuredLyric(
|
||||
[
|
||||
{
|
||||
lang: 'eng',
|
||||
synced: true,
|
||||
line: [{ start: 1000, value: 'English line' }],
|
||||
},
|
||||
{
|
||||
lang: 'pt-BR',
|
||||
synced: true,
|
||||
line: [{ start: 1000, value: 'Linha em portugues' }],
|
||||
},
|
||||
],
|
||||
'pt-BR',
|
||||
)
|
||||
|
||||
expect(selected.lang).toBe('pt-BR')
|
||||
})
|
||||
|
||||
it('falls back to english when preferred locale is not available', () => {
|
||||
const selected = pickStructuredLyric(
|
||||
[
|
||||
{
|
||||
lang: 'eng',
|
||||
synced: true,
|
||||
line: [{ start: 1000, value: 'English line' }],
|
||||
},
|
||||
{
|
||||
lang: 'deu',
|
||||
synced: true,
|
||||
line: [{ start: 1000, value: 'Deutsche Zeile' }],
|
||||
},
|
||||
],
|
||||
'pt-BR',
|
||||
)
|
||||
|
||||
expect(selected.lang).toBe('eng')
|
||||
})
|
||||
|
||||
it('falls back to first synced track when english is missing', () => {
|
||||
const selected = pickStructuredLyric(
|
||||
[
|
||||
{
|
||||
lang: 'jpn',
|
||||
synced: true,
|
||||
line: [{ start: 1000, value: 'Nihongo' }],
|
||||
},
|
||||
{
|
||||
lang: 'deu',
|
||||
synced: true,
|
||||
line: [{ start: 1000, value: 'Deutsch' }],
|
||||
},
|
||||
],
|
||||
'pt-BR',
|
||||
)
|
||||
|
||||
expect(selected.lang).toBe('jpn')
|
||||
})
|
||||
|
||||
it('selects translation and pronunciation layers by kind', () => {
|
||||
const layers = selectLyricLayers(
|
||||
[
|
||||
{
|
||||
kind: 'main',
|
||||
lang: 'ja',
|
||||
synced: true,
|
||||
line: [{ start: 1000, value: 'こんにちは' }],
|
||||
},
|
||||
{
|
||||
kind: 'translation',
|
||||
lang: 'es',
|
||||
synced: true,
|
||||
line: [{ start: 1000, value: 'Hola' }],
|
||||
},
|
||||
{
|
||||
kind: 'pronunciation',
|
||||
lang: 'ja-Latn',
|
||||
synced: true,
|
||||
line: [{ start: 1000, value: 'konnichiwa' }],
|
||||
},
|
||||
],
|
||||
'es-MX',
|
||||
)
|
||||
|
||||
expect(layers.main.lang).toBe('ja')
|
||||
expect(layers.translation.lang).toBe('es')
|
||||
expect(layers.pronunciation.lang).toBe('ja-Latn')
|
||||
})
|
||||
|
||||
it('treats missing kind as main for backward compatibility', () => {
|
||||
const layers = selectLyricLayers(
|
||||
[
|
||||
{
|
||||
lang: 'eng',
|
||||
synced: true,
|
||||
line: [{ start: 1000, value: 'Main' }],
|
||||
},
|
||||
],
|
||||
'eng',
|
||||
)
|
||||
|
||||
expect(layers.main.lang).toBe('eng')
|
||||
expect(layers.translation).toBeNull()
|
||||
expect(layers.pronunciation).toBeNull()
|
||||
})
|
||||
|
||||
it('matches layer line by timing for the active main line', () => {
|
||||
const mainLines = [
|
||||
{ index: 0, start: 1000, end: 1800, value: 'Line A', tokens: [] },
|
||||
{ index: 1, start: 2000, end: 2800, value: 'Line B', tokens: [] },
|
||||
]
|
||||
const layerLines = [
|
||||
{ index: 0, start: 900, end: 1750, value: 'A2', tokens: [] },
|
||||
{ index: 1, start: 2050, end: 2900, value: 'B2', tokens: [] },
|
||||
]
|
||||
|
||||
expect(findLayerLineIndexForMain(mainLines, layerLines, 1)).toBe(1)
|
||||
expect(resolveLayerLineForMain(mainLines, layerLines, 0).line.value).toBe(
|
||||
'A2',
|
||||
)
|
||||
})
|
||||
|
||||
it('matches metadata layers by nearest timing even when indexes differ', () => {
|
||||
const mainLines = [
|
||||
{ index: 0, start: 1000, end: 1800, value: 'Line A', tokens: [] },
|
||||
{ index: 1, start: 2000, end: 2800, value: 'Line B', tokens: [] },
|
||||
{ index: 2, start: 3000, end: 3800, value: 'Line C', tokens: [] },
|
||||
]
|
||||
const layerLines = [
|
||||
{ index: 2, start: 3020, end: 3820, value: 'C2', tokens: [] },
|
||||
{ index: 0, start: 980, end: 1760, value: 'A2', tokens: [] },
|
||||
{ index: 1, start: 2010, end: 2810, value: 'B2', tokens: [] },
|
||||
]
|
||||
|
||||
expect(findLayerLineIndexForMain(mainLines, layerLines, 1)).toBe(2)
|
||||
expect(resolveLayerLineForMain(mainLines, layerLines, 2).line.value).toBe(
|
||||
'C2',
|
||||
)
|
||||
})
|
||||
|
||||
it('returns no layer match when the nearest line is too far in time', () => {
|
||||
const mainLines = [
|
||||
{ index: 0, start: 1000, end: 1800, value: 'Line A', tokens: [] },
|
||||
{ index: 1, start: 2000, end: 2800, value: 'Line B', tokens: [] },
|
||||
]
|
||||
const layerLines = [
|
||||
{ index: 0, start: 60000, end: 60800, value: 'Far line', tokens: [] },
|
||||
]
|
||||
|
||||
expect(findLayerLineIndexForMain(mainLines, layerLines, 1)).toBe(-1)
|
||||
expect(resolveLayerLineForMain(mainLines, layerLines, 1).line).toBeNull()
|
||||
})
|
||||
|
||||
it('converts a structured lyric track to LRC', () => {
|
||||
const lrc = structuredLyricToLrc({
|
||||
lang: 'eng',
|
||||
synced: true,
|
||||
line: [
|
||||
{ start: 18800, value: "We're no strangers to love" },
|
||||
{ start: 22801, value: 'You know the rules and so do I' },
|
||||
],
|
||||
})
|
||||
|
||||
expect(lrc).toBe(
|
||||
"[00:18.80] We're no strangers to love\n[00:22.80] You know the rules and so do I\n",
|
||||
)
|
||||
})
|
||||
|
||||
it('returns empty text when no synced lyrics are available', () => {
|
||||
const lrc = structuredLyricsToLrc(
|
||||
[{ lang: 'eng', synced: false, line: [{ value: 'Unsynced line' }] }],
|
||||
'eng',
|
||||
)
|
||||
|
||||
expect(lrc).toBe('')
|
||||
})
|
||||
|
||||
it('reads preferred language from localStorage first', () => {
|
||||
localStorage.setItem('locale', 'pt-BR')
|
||||
expect(getPreferredLyricLanguage()).toBe('pt-BR')
|
||||
})
|
||||
|
||||
it('builds karaoke lines from tokenLine payload', () => {
|
||||
const lines = buildKaraokeLines({
|
||||
lang: 'eng',
|
||||
synced: true,
|
||||
line: [{ start: 1000, end: 3000, value: 'Hello world' }],
|
||||
tokenLine: [
|
||||
{
|
||||
index: 0,
|
||||
start: 1000,
|
||||
end: 3000,
|
||||
value: 'Hello world',
|
||||
token: [
|
||||
{ start: 1000, end: 1500, value: 'Hello' },
|
||||
{ start: 2000, end: 2500, value: 'world', role: 'x-bg' },
|
||||
],
|
||||
},
|
||||
],
|
||||
})
|
||||
|
||||
expect(lines).toEqual([
|
||||
{
|
||||
index: 0,
|
||||
start: 1000,
|
||||
end: 3000,
|
||||
value: 'Hello world',
|
||||
tokens: [
|
||||
{ start: 1000, end: 1500, value: 'Hello', role: '' },
|
||||
{ start: 2000, end: 2500, value: 'world', role: 'x-bg' },
|
||||
],
|
||||
},
|
||||
])
|
||||
})
|
||||
|
||||
it('sorts token timing by start to keep playback stable', () => {
|
||||
const lines = buildKaraokeLines({
|
||||
lang: 'eng',
|
||||
synced: true,
|
||||
line: [{ start: 1000, end: 3000, value: 'Hello world' }],
|
||||
tokenLine: [
|
||||
{
|
||||
index: 0,
|
||||
start: 1000,
|
||||
end: 3000,
|
||||
value: 'Hello world',
|
||||
token: [
|
||||
{ start: 2000, end: 2500, value: 'world', role: '' },
|
||||
{ start: 1000, end: 1500, value: 'Hello', role: '' },
|
||||
],
|
||||
},
|
||||
],
|
||||
})
|
||||
|
||||
expect(lines[0].tokens.map((token) => token.value)).toEqual([
|
||||
'Hello',
|
||||
'world',
|
||||
])
|
||||
})
|
||||
|
||||
it('splits a single full-line token into synthetic word tokens', () => {
|
||||
const lines = buildKaraokeLines({
|
||||
lang: 'ko-Latn',
|
||||
synced: true,
|
||||
line: [{ start: 1000, end: 2000, value: 'Da-la-lun, dun' }],
|
||||
tokenLine: [
|
||||
{
|
||||
index: 0,
|
||||
start: 1000,
|
||||
end: 2000,
|
||||
value: 'Da-la-lun, dun',
|
||||
token: [{ start: 1000, end: 2000, value: 'Da-la-lun, dun' }],
|
||||
},
|
||||
],
|
||||
})
|
||||
|
||||
expect(lines).toHaveLength(1)
|
||||
expect(lines[0].tokens).toHaveLength(2)
|
||||
expect(lines[0].tokens[0].value).toBe('Da-la-lun, ')
|
||||
expect(lines[0].tokens[1].value).toBe('dun')
|
||||
|
||||
const firstWindow = resolveKaraokeTokenWindow(lines[0], 0)
|
||||
const secondWindow = resolveKaraokeTokenWindow(lines[0], 1)
|
||||
|
||||
expect(firstWindow.start).toBeCloseTo(1000)
|
||||
expect(firstWindow.end).toBeCloseTo(1500)
|
||||
expect(secondWindow.start).toBeCloseTo(1500)
|
||||
expect(secondWindow.end).toBeCloseTo(2000)
|
||||
})
|
||||
|
||||
it('detects active line and token for karaoke timing', () => {
|
||||
const state = getActiveKaraokeState(
|
||||
[
|
||||
{
|
||||
index: 0,
|
||||
start: 1000,
|
||||
end: 3000,
|
||||
value: 'Hello world',
|
||||
tokens: [
|
||||
{ start: 1000, end: 1500, value: 'Hello', role: '' },
|
||||
{ start: 2000, end: 2500, value: 'world', role: '' },
|
||||
],
|
||||
},
|
||||
{
|
||||
index: 1,
|
||||
start: 3500,
|
||||
end: 5000,
|
||||
value: 'Second line',
|
||||
tokens: [],
|
||||
},
|
||||
],
|
||||
2200,
|
||||
)
|
||||
|
||||
expect(state).toEqual({ lineIndex: 0, tokenIndex: 1 })
|
||||
})
|
||||
|
||||
it('resolves token window fallback boundaries from neighboring tokens', () => {
|
||||
const line = {
|
||||
start: 1000,
|
||||
end: 3000,
|
||||
value: 'Hello world',
|
||||
tokens: [
|
||||
{ start: 1200, value: 'Hello', role: '' },
|
||||
{ start: 1800, value: 'world', role: '' },
|
||||
],
|
||||
}
|
||||
|
||||
expect(resolveKaraokeTokenWindow(line, 0)).toEqual({
|
||||
start: 1200,
|
||||
end: 1800,
|
||||
})
|
||||
expect(resolveKaraokeTokenWindow(line, 1)).toEqual({
|
||||
start: 1800,
|
||||
end: 3000,
|
||||
})
|
||||
})
|
||||
|
||||
it('infers sequential token windows when token timings are missing', () => {
|
||||
const line = {
|
||||
start: 1000,
|
||||
end: 2000,
|
||||
value: 'A B C',
|
||||
tokens: [
|
||||
{ value: 'A', role: '' },
|
||||
{ value: 'B', role: '' },
|
||||
{ value: 'C', role: '' },
|
||||
],
|
||||
}
|
||||
|
||||
const first = resolveKaraokeTokenWindow(line, 0)
|
||||
const second = resolveKaraokeTokenWindow(line, 1)
|
||||
const third = resolveKaraokeTokenWindow(line, 2)
|
||||
|
||||
expect(first.start).toBeCloseTo(1000)
|
||||
expect(first.end).toBeCloseTo(1333.3333333333333)
|
||||
|
||||
expect(second.start).toBeCloseTo(1333.3333333333333)
|
||||
expect(second.end).toBeCloseTo(1666.6666666666667)
|
||||
|
||||
expect(third.start).toBeCloseTo(1666.6666666666667)
|
||||
expect(third.end).toBeCloseTo(2000)
|
||||
})
|
||||
|
||||
it('falls back to sequential windows when token timings are collapsed', () => {
|
||||
const line = {
|
||||
start: 1000,
|
||||
end: 2000,
|
||||
value: 'A B C',
|
||||
tokens: [
|
||||
{ start: 1000, end: 2000, value: 'A', role: '' },
|
||||
{ start: 1000, end: 2000, value: 'B', role: '' },
|
||||
{ start: 1000, end: 2000, value: 'C', role: '' },
|
||||
],
|
||||
}
|
||||
|
||||
const first = resolveKaraokeTokenWindow(line, 0)
|
||||
const second = resolveKaraokeTokenWindow(line, 1)
|
||||
const third = resolveKaraokeTokenWindow(line, 2)
|
||||
|
||||
expect(first.start).toBeCloseTo(1000)
|
||||
expect(first.end).toBeCloseTo(1333.3333333333333)
|
||||
expect(second.start).toBeCloseTo(1333.3333333333333)
|
||||
expect(second.end).toBeCloseTo(1666.6666666666667)
|
||||
expect(third.start).toBeCloseTo(1666.6666666666667)
|
||||
expect(third.end).toBeCloseTo(2000)
|
||||
})
|
||||
|
||||
it('keeps token selection stable near tight token boundaries', () => {
|
||||
const state = getActiveKaraokeState(
|
||||
[
|
||||
{
|
||||
index: 0,
|
||||
start: 1000,
|
||||
end: 2000,
|
||||
value: 'A B',
|
||||
tokens: [
|
||||
{ start: 1000, end: 1100, value: 'A', role: '' },
|
||||
{ start: 1110, end: 1300, value: 'B', role: '' },
|
||||
],
|
||||
},
|
||||
],
|
||||
1108,
|
||||
)
|
||||
|
||||
expect(state).toEqual({ lineIndex: 0, tokenIndex: 0 })
|
||||
})
|
||||
|
||||
it('reports structured lyric content when token timing exists', () => {
|
||||
expect(
|
||||
hasStructuredLyricContent({
|
||||
tokenLine: [{ token: [{ start: 100, value: 'a' }] }],
|
||||
}),
|
||||
).toBe(true)
|
||||
})
|
||||
})
|
||||
@ -7,6 +7,7 @@ import {
|
||||
PLAYER_CURRENT,
|
||||
PLAYER_PLAY_NEXT,
|
||||
PLAYER_PLAY_TRACKS,
|
||||
PLAYER_UPDATE_LYRIC,
|
||||
PLAYER_SET_TRACK,
|
||||
PLAYER_SET_VOLUME,
|
||||
PLAYER_SYNC_QUEUE,
|
||||
@ -60,21 +61,25 @@ const mapToAudioLists = (item) => {
|
||||
let lyricText = ''
|
||||
|
||||
if (lyrics) {
|
||||
const structured = JSON.parse(lyrics)
|
||||
for (const structuredLyric of structured) {
|
||||
if (structuredLyric.synced) {
|
||||
for (const line of structuredLyric.line) {
|
||||
let time = Math.floor(line.start / 10)
|
||||
const ms = time % 100
|
||||
time = Math.floor(time / 100)
|
||||
const sec = time % 60
|
||||
time = Math.floor(time / 60)
|
||||
const min = time % 60
|
||||
try {
|
||||
const structured = JSON.parse(lyrics)
|
||||
for (const structuredLyric of structured) {
|
||||
if (structuredLyric.synced) {
|
||||
for (const line of structuredLyric.line) {
|
||||
let time = Math.floor(line.start / 10)
|
||||
const ms = time % 100
|
||||
time = Math.floor(time / 100)
|
||||
const sec = time % 60
|
||||
time = Math.floor(time / 60)
|
||||
const min = time % 60
|
||||
|
||||
ms.toString()
|
||||
lyricText += `[${pad(min)}:${pad(sec)}.${pad(ms)}] ${line.value}\n`
|
||||
ms.toString()
|
||||
lyricText += `[${pad(min)}:${pad(sec)}.${pad(ms)}] ${line.value}\n`
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
lyricText = ''
|
||||
}
|
||||
}
|
||||
|
||||
@ -206,6 +211,45 @@ const reduceMode = (state, { data: { mode } }) => {
|
||||
}
|
||||
}
|
||||
|
||||
const reduceUpdateLyric = (state, { data: { trackId, lyric } }) => {
|
||||
if (!trackId) {
|
||||
return state
|
||||
}
|
||||
|
||||
let changed = false
|
||||
const queue = state.queue.map((item) => {
|
||||
if (item.trackId !== trackId) {
|
||||
return item
|
||||
}
|
||||
if (item.lyric === lyric) {
|
||||
return item
|
||||
}
|
||||
changed = true
|
||||
return {
|
||||
...item,
|
||||
lyric,
|
||||
}
|
||||
})
|
||||
|
||||
if (!changed) {
|
||||
return state
|
||||
}
|
||||
|
||||
const current =
|
||||
state.current?.trackId === trackId
|
||||
? {
|
||||
...state.current,
|
||||
lyric,
|
||||
}
|
||||
: state.current
|
||||
|
||||
return {
|
||||
...state,
|
||||
queue,
|
||||
current,
|
||||
}
|
||||
}
|
||||
|
||||
export const playerReducer = (previousState = initialState, payload) => {
|
||||
const { type } = payload
|
||||
switch (type) {
|
||||
@ -243,6 +287,8 @@ export const playerReducer = (previousState = initialState, payload) => {
|
||||
previousState.savedPlayIndex >= 0 ? previousState.savedPlayIndex : 0,
|
||||
}
|
||||
}
|
||||
case PLAYER_UPDATE_LYRIC:
|
||||
return reduceUpdateLyric(previousState, payload)
|
||||
default:
|
||||
return previousState
|
||||
}
|
||||
|
||||
@ -1,11 +1,24 @@
|
||||
import { describe, it, expect } from 'vitest'
|
||||
import { describe, expect, it, vi } from 'vitest'
|
||||
import { playerReducer } from './playerReducer'
|
||||
import {
|
||||
PLAYER_SYNC_QUEUE,
|
||||
PLAYER_CURRENT,
|
||||
PLAYER_REFRESH_QUEUE,
|
||||
PLAYER_SET_TRACK,
|
||||
PLAYER_SYNC_QUEUE,
|
||||
PLAYER_UPDATE_LYRIC,
|
||||
} from '../actions'
|
||||
|
||||
vi.mock('uuid', () => ({
|
||||
v4: () => 'test-uuid',
|
||||
}))
|
||||
|
||||
vi.mock('../subsonic', () => ({
|
||||
default: {
|
||||
streamUrl: vi.fn((id) => `/rest/stream?id=${id}`),
|
||||
getCoverArtUrl: vi.fn(() => '/rest/getCoverArt?id=test'),
|
||||
},
|
||||
}))
|
||||
|
||||
describe('playerReducer', () => {
|
||||
describe('pending track selection survives SYNC_QUEUE and premature CURRENT', () => {
|
||||
// Simulates the real sequence when clicking a new song while one is playing:
|
||||
@ -54,8 +67,6 @@ describe('playerReducer', () => {
|
||||
})
|
||||
|
||||
it('CURRENT for old track preserves pending playIndex', () => {
|
||||
// After SYNC_QUEUE, queue has new UUIDs. The old track's UUID (zzz)
|
||||
// is at index 2, but playIndex is 0. This is a premature callback.
|
||||
const stateAfterSync = {
|
||||
...stateAfterPlayTracks,
|
||||
queue: [
|
||||
@ -71,7 +82,7 @@ describe('playerReducer', () => {
|
||||
const result = playerReducer(stateAfterSync, action)
|
||||
expect(result.playIndex).toBe(0)
|
||||
expect(result.clear).toBe(true)
|
||||
expect(result.savedPlayIndex).toBe(2) // preserved from before
|
||||
expect(result.savedPlayIndex).toBe(2)
|
||||
})
|
||||
|
||||
it('CURRENT for correct track consumes pending playIndex', () => {
|
||||
@ -83,7 +94,6 @@ describe('playerReducer', () => {
|
||||
{ trackId: 's3', uuid: 'zzz', name: 'Song 3' },
|
||||
],
|
||||
}
|
||||
// Player switched to Song 1 (uuid 'xxx', index 0 == playIndex)
|
||||
const action = {
|
||||
type: PLAYER_CURRENT,
|
||||
data: { uuid: 'xxx', name: 'Song 1', volume: 1 },
|
||||
@ -142,4 +152,80 @@ describe('playerReducer', () => {
|
||||
expect(result.playIndex).toBe(0)
|
||||
})
|
||||
})
|
||||
|
||||
it('maps embedded synced lyrics to LRC text', () => {
|
||||
const lyrics = JSON.stringify([
|
||||
{
|
||||
lang: 'eng',
|
||||
synced: true,
|
||||
line: [{ start: 1000, value: 'Line one' }],
|
||||
},
|
||||
{
|
||||
lang: 'eng',
|
||||
synced: false,
|
||||
line: [{ value: 'Unsynced line' }],
|
||||
},
|
||||
])
|
||||
|
||||
const state = playerReducer(undefined, {
|
||||
type: PLAYER_SET_TRACK,
|
||||
data: {
|
||||
id: 'song-1',
|
||||
title: 'Test Song',
|
||||
artist: 'Test Artist',
|
||||
album: 'Test Album',
|
||||
duration: 60,
|
||||
lyrics,
|
||||
},
|
||||
})
|
||||
|
||||
expect(state.queue).toHaveLength(1)
|
||||
expect(state.queue[0].lyric).toBe('[00:01.00] Line one\n')
|
||||
})
|
||||
|
||||
it('updates queue lyric by track id', () => {
|
||||
const initial = playerReducer(undefined, {
|
||||
type: PLAYER_SET_TRACK,
|
||||
data: {
|
||||
id: 'song-1',
|
||||
title: 'Test Song',
|
||||
artist: 'Test Artist',
|
||||
album: 'Test Album',
|
||||
duration: 60,
|
||||
},
|
||||
})
|
||||
|
||||
const updated = playerReducer(initial, {
|
||||
type: PLAYER_UPDATE_LYRIC,
|
||||
data: {
|
||||
trackId: 'song-1',
|
||||
lyric: '[00:01.00] Updated lyric\n',
|
||||
},
|
||||
})
|
||||
|
||||
expect(updated.queue[0].lyric).toBe('[00:01.00] Updated lyric\n')
|
||||
})
|
||||
|
||||
it('returns same state when lyric update does not match any track', () => {
|
||||
const initial = playerReducer(undefined, {
|
||||
type: PLAYER_SET_TRACK,
|
||||
data: {
|
||||
id: 'song-1',
|
||||
title: 'Test Song',
|
||||
artist: 'Test Artist',
|
||||
album: 'Test Album',
|
||||
duration: 60,
|
||||
},
|
||||
})
|
||||
|
||||
const updated = playerReducer(initial, {
|
||||
type: PLAYER_UPDATE_LYRIC,
|
||||
data: {
|
||||
trackId: 'missing-track',
|
||||
lyric: '[00:01.00] Updated lyric\n',
|
||||
},
|
||||
})
|
||||
|
||||
expect(updated).toBe(initial)
|
||||
})
|
||||
})
|
||||
|
||||
@ -120,6 +120,10 @@ const getTopSongs = (artist, count = 50) => {
|
||||
return httpClient(url('getTopSongs', null, { artist, count }))
|
||||
}
|
||||
|
||||
const getLyricsBySongId = (id) => {
|
||||
return httpClient(url('getLyricsBySongId', id))
|
||||
}
|
||||
|
||||
const streamUrl = (id, options) => {
|
||||
return baseUrl(
|
||||
url('stream', id, {
|
||||
@ -149,4 +153,5 @@ export default {
|
||||
getArtistInfo,
|
||||
getTopSongs,
|
||||
getSimilarSongs2,
|
||||
getLyricsBySongId,
|
||||
}
|
||||
|
||||
@ -1,7 +1,12 @@
|
||||
import { vi } from 'vitest'
|
||||
import { COVER_ART_SIZE } from '../consts'
|
||||
import { httpClient } from '../dataProvider'
|
||||
import subsonic from './index'
|
||||
|
||||
vi.mock('../dataProvider', () => ({
|
||||
httpClient: vi.fn(() => Promise.resolve({})),
|
||||
}))
|
||||
|
||||
describe('getCoverArtUrl', () => {
|
||||
beforeEach(() => {
|
||||
// Mock window.location
|
||||
@ -178,3 +183,29 @@ describe('getAvatarUrl', () => {
|
||||
expect(url).toContain('username=john')
|
||||
})
|
||||
})
|
||||
|
||||
describe('getLyricsBySongId', () => {
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks()
|
||||
const localStorageMock = {
|
||||
getItem: vi.fn((key) => {
|
||||
const values = {
|
||||
username: 'testuser',
|
||||
'subsonic-token': 'testtoken',
|
||||
'subsonic-salt': 'testsalt',
|
||||
}
|
||||
return values[key] || null
|
||||
}),
|
||||
}
|
||||
Object.defineProperty(window, 'localStorage', { value: localStorageMock })
|
||||
})
|
||||
|
||||
it('calls the getLyricsBySongId endpoint', async () => {
|
||||
await subsonic.getLyricsBySongId('song-1')
|
||||
|
||||
expect(httpClient).toHaveBeenCalledTimes(1)
|
||||
const calledUrl = httpClient.mock.calls[0][0]
|
||||
expect(calledUrl).toContain('/rest/getLyricsBySongId?')
|
||||
expect(calledUrl).toContain('id=song-1')
|
||||
})
|
||||
})
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user