mirror of
https://github.com/navidrome/navidrome.git
synced 2026-05-03 06:51:16 +00:00
feat(lyrics): support agent-based lyric layers
This commit is contained in:
parent
ff40c030d9
commit
d6a684e60e
@ -677,7 +677,7 @@ func setViperDefaults() {
|
|||||||
viper.SetDefault("coverartquality", 75)
|
viper.SetDefault("coverartquality", 75)
|
||||||
viper.SetDefault("artistartpriority", "artist.*, album/artist.*, external")
|
viper.SetDefault("artistartpriority", "artist.*, album/artist.*, external")
|
||||||
viper.SetDefault("discartpriority", "disc*.*, cd*.*, cover.*, folder.*, front.*, discsubtitle, embedded")
|
viper.SetDefault("discartpriority", "disc*.*, cd*.*, cover.*, folder.*, front.*, discsubtitle, embedded")
|
||||||
viper.SetDefault("lyricspriority", ".lrc,.ttml,.txt,embedded")
|
viper.SetDefault("lyricspriority", ".ttml,.elrc,.lrc,.srt,.txt,embedded")
|
||||||
viper.SetDefault("enablegravatar", false)
|
viper.SetDefault("enablegravatar", false)
|
||||||
viper.SetDefault("enablefavourites", true)
|
viper.SetDefault("enablefavourites", true)
|
||||||
viper.SetDefault("enablestarrating", true)
|
viper.SetDefault("enablestarrating", true)
|
||||||
|
|||||||
@ -44,6 +44,36 @@ var _ = Describe("sources", func() {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
elrcLyrics := model.LyricList{
|
||||||
|
model.Lyrics{
|
||||||
|
DisplayArtist: "ELRC Artist",
|
||||||
|
DisplayTitle: "ELRC Song",
|
||||||
|
Lang: "eng",
|
||||||
|
Line: []model.Line{
|
||||||
|
{
|
||||||
|
Start: gg.P(int64(1000)),
|
||||||
|
End: gg.P(int64(1500)),
|
||||||
|
Value: "Lead words",
|
||||||
|
Cue: []model.Cue{
|
||||||
|
{
|
||||||
|
Start: gg.P(int64(1000)),
|
||||||
|
Value: "Lead ",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Start: gg.P(int64(1500)),
|
||||||
|
Value: "words",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Start: gg.P(int64(3000)),
|
||||||
|
Value: "Fallback line",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Synced: true,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
ttmlLyrics := model.LyricList{
|
ttmlLyrics := model.LyricList{
|
||||||
model.Lyrics{
|
model.Lyrics{
|
||||||
Kind: "main",
|
Kind: "main",
|
||||||
@ -88,6 +118,25 @@ var _ = Describe("sources", func() {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
srtLyrics := model.LyricList{
|
||||||
|
model.Lyrics{
|
||||||
|
Lang: "xxx",
|
||||||
|
Line: []model.Line{
|
||||||
|
{
|
||||||
|
Start: gg.P(int64(18800)),
|
||||||
|
End: gg.P(int64(22800)),
|
||||||
|
Value: "We're from subtitles",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Start: gg.P(int64(22801)),
|
||||||
|
End: gg.P(int64(26000)),
|
||||||
|
Value: "Another subtitle line",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Synced: true,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
BeforeEach(func() {
|
BeforeEach(func() {
|
||||||
DeferCleanup(configtest.SetupConfig())
|
DeferCleanup(configtest.SetupConfig())
|
||||||
|
|
||||||
@ -109,8 +158,10 @@ var _ = Describe("sources", func() {
|
|||||||
},
|
},
|
||||||
Entry("embedded > lrc > txt", "embedded,.lrc,.txt", embeddedLyrics),
|
Entry("embedded > lrc > txt", "embedded,.lrc,.txt", embeddedLyrics),
|
||||||
Entry("lrc > embedded > txt", ".lrc,embedded,.txt", syncedLyrics),
|
Entry("lrc > embedded > txt", ".lrc,embedded,.txt", syncedLyrics),
|
||||||
|
Entry("elrc > lrc > embedded", ".elrc,.lrc,embedded", elrcLyrics),
|
||||||
|
Entry("srt > txt > embedded", ".srt,.txt,embedded", srtLyrics),
|
||||||
Entry("txt > lrc > embedded", ".txt,.lrc,embedded", unsyncedLyrics),
|
Entry("txt > lrc > embedded", ".txt,.lrc,embedded", unsyncedLyrics),
|
||||||
Entry("ttml > lrc > embedded", ".ttml,.lrc,embedded", ttmlLyrics))
|
Entry("ttml > elrc > lrc > srt > embedded", ".ttml,.elrc,.lrc,.srt,embedded", ttmlLyrics))
|
||||||
|
|
||||||
Context("Errors", func() {
|
Context("Errors", func() {
|
||||||
var RegularUserContext = XContext
|
var RegularUserContext = XContext
|
||||||
|
|||||||
@ -38,13 +38,20 @@ func fromExternalFile(ctx context.Context, mf *model.MediaFile, suffix string) (
|
|||||||
}
|
}
|
||||||
|
|
||||||
var list model.LyricList
|
var list model.LyricList
|
||||||
if strings.EqualFold(suffix, ".ttml") {
|
switch {
|
||||||
|
case strings.EqualFold(suffix, ".ttml"):
|
||||||
list, err = parseTTML(contents)
|
list, err = parseTTML(contents)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error(ctx, "error parsing ttml external file", "path", externalLyric, err)
|
log.Error(ctx, "error parsing ttml external file", "path", externalLyric, err)
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
} else {
|
case strings.EqualFold(suffix, ".srt"):
|
||||||
|
list, err = parseSRT(contents)
|
||||||
|
if err != nil {
|
||||||
|
log.Error(ctx, "error parsing srt external file", "path", externalLyric, err)
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
default:
|
||||||
lyrics, err := model.ToLyrics("xxx", string(contents))
|
lyrics, err := model.ToLyrics("xxx", string(contents))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error(ctx, "error parsing lyric external file", "path", externalLyric, err)
|
log.Error(ctx, "error parsing lyric external file", "path", externalLyric, err)
|
||||||
|
|||||||
@ -106,10 +106,10 @@ var _ = Describe("sources", func() {
|
|||||||
Expect(lyrics[0].Line[0].Cue).To(HaveLen(3))
|
Expect(lyrics[0].Line[0].Cue).To(HaveLen(3))
|
||||||
Expect(*lyrics[0].Line[0].Cue[0].Start).To(Equal(int64(1000)))
|
Expect(*lyrics[0].Line[0].Cue[0].Start).To(Equal(int64(1000)))
|
||||||
Expect(lyrics[0].Line[0].Cue[0].Value).To(Equal("Some "))
|
Expect(lyrics[0].Line[0].Cue[0].Value).To(Equal("Some "))
|
||||||
Expect(*lyrics[0].Line[0].Cue[0].End).To(Equal(int64(1500)))
|
Expect(lyrics[0].Line[0].Cue[0].End).To(BeNil())
|
||||||
Expect(*lyrics[0].Line[0].Cue[1].Start).To(Equal(int64(1500)))
|
Expect(*lyrics[0].Line[0].Cue[1].Start).To(Equal(int64(1500)))
|
||||||
Expect(lyrics[0].Line[0].Cue[1].Value).To(Equal("lyrics "))
|
Expect(lyrics[0].Line[0].Cue[1].Value).To(Equal("lyrics "))
|
||||||
Expect(*lyrics[0].Line[0].Cue[1].End).To(Equal(int64(2000)))
|
Expect(lyrics[0].Line[0].Cue[1].End).To(BeNil())
|
||||||
Expect(*lyrics[0].Line[0].Cue[2].Start).To(Equal(int64(2000)))
|
Expect(*lyrics[0].Line[0].Cue[2].Start).To(Equal(int64(2000)))
|
||||||
Expect(lyrics[0].Line[0].Cue[2].Value).To(Equal("here"))
|
Expect(lyrics[0].Line[0].Cue[2].Value).To(Equal("here"))
|
||||||
Expect(lyrics[0].Line[0].Cue[2].End).To(BeNil())
|
Expect(lyrics[0].Line[0].Cue[2].End).To(BeNil())
|
||||||
@ -125,6 +125,33 @@ var _ = Describe("sources", func() {
|
|||||||
Expect(lyrics[0].Line[2].Cue).To(BeNil())
|
Expect(lyrics[0].Line[2].Cue).To(BeNil())
|
||||||
})
|
})
|
||||||
|
|
||||||
|
It("should return Enhanced LRC lyrics from an ELRC file", func() {
|
||||||
|
mf := model.MediaFile{Path: "tests/fixtures/test.mp3"}
|
||||||
|
lyrics, err := fromExternalFile(ctx, &mf, ".elrc")
|
||||||
|
|
||||||
|
Expect(err).To(BeNil())
|
||||||
|
Expect(lyrics).To(HaveLen(1))
|
||||||
|
Expect(lyrics[0].DisplayArtist).To(Equal("ELRC Artist"))
|
||||||
|
Expect(lyrics[0].DisplayTitle).To(Equal("ELRC Song"))
|
||||||
|
Expect(lyrics[0].Lang).To(Equal("eng"))
|
||||||
|
Expect(lyrics[0].Synced).To(BeTrue())
|
||||||
|
Expect(lyrics[0].Line).To(HaveLen(2))
|
||||||
|
|
||||||
|
Expect(lyrics[0].Line[0].Start).To(Equal(gg.P(int64(1000))))
|
||||||
|
Expect(lyrics[0].Line[0].Value).To(Equal("Lead words"))
|
||||||
|
Expect(lyrics[0].Line[0].Cue).To(HaveLen(2))
|
||||||
|
Expect(*lyrics[0].Line[0].Cue[0].Start).To(Equal(int64(1000)))
|
||||||
|
Expect(lyrics[0].Line[0].Cue[0].Value).To(Equal("Lead "))
|
||||||
|
Expect(lyrics[0].Line[0].Cue[0].End).To(BeNil())
|
||||||
|
Expect(*lyrics[0].Line[0].Cue[1].Start).To(Equal(int64(1500)))
|
||||||
|
Expect(lyrics[0].Line[0].Cue[1].Value).To(Equal("words"))
|
||||||
|
Expect(lyrics[0].Line[0].Cue[1].End).To(BeNil())
|
||||||
|
|
||||||
|
Expect(lyrics[0].Line[1].Start).To(Equal(gg.P(int64(3000))))
|
||||||
|
Expect(lyrics[0].Line[1].Value).To(Equal("Fallback line"))
|
||||||
|
Expect(lyrics[0].Line[1].Cue).To(BeNil())
|
||||||
|
})
|
||||||
|
|
||||||
It("should return unsynchronized lyrics from a file", func() {
|
It("should return unsynchronized lyrics from a file", func() {
|
||||||
mf := model.MediaFile{Path: "tests/fixtures/test.mp3"}
|
mf := model.MediaFile{Path: "tests/fixtures/test.mp3"}
|
||||||
lyrics, err := fromExternalFile(ctx, &mf, ".txt")
|
lyrics, err := fromExternalFile(ctx, &mf, ".txt")
|
||||||
@ -146,6 +173,31 @@ var _ = Describe("sources", func() {
|
|||||||
}))
|
}))
|
||||||
})
|
})
|
||||||
|
|
||||||
|
It("should return synchronized lyrics from an SRT file", func() {
|
||||||
|
mf := model.MediaFile{Path: "tests/fixtures/test.mp3"}
|
||||||
|
lyrics, err := fromExternalFile(ctx, &mf, ".srt")
|
||||||
|
|
||||||
|
Expect(err).To(BeNil())
|
||||||
|
Expect(lyrics).To(Equal(model.LyricList{
|
||||||
|
model.Lyrics{
|
||||||
|
Lang: "xxx",
|
||||||
|
Line: []model.Line{
|
||||||
|
{
|
||||||
|
Start: gg.P(int64(18800)),
|
||||||
|
End: gg.P(int64(22800)),
|
||||||
|
Value: "We're from subtitles",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Start: gg.P(int64(22801)),
|
||||||
|
End: gg.P(int64(26000)),
|
||||||
|
Value: "Another subtitle line",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Synced: true,
|
||||||
|
},
|
||||||
|
}))
|
||||||
|
})
|
||||||
|
|
||||||
It("should return synchronized multilingual lyrics from a TTML file", func() {
|
It("should return synchronized multilingual lyrics from a TTML file", func() {
|
||||||
mf := model.MediaFile{Path: "tests/fixtures/test.mp3"}
|
mf := model.MediaFile{Path: "tests/fixtures/test.mp3"}
|
||||||
lyrics, err := fromExternalFile(ctx, &mf, ".ttml")
|
lyrics, err := fromExternalFile(ctx, &mf, ".ttml")
|
||||||
|
|||||||
161
core/lyrics/srt.go
Normal file
161
core/lyrics/srt.go
Normal file
@ -0,0 +1,161 @@
|
|||||||
|
package lyrics
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"regexp"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/navidrome/navidrome/model"
|
||||||
|
"github.com/navidrome/navidrome/utils/str"
|
||||||
|
)
|
||||||
|
|
||||||
|
var srtTimeRegex = regexp.MustCompile(`^\s*(\d{1,2}):(\d{2}):(\d{2})[,.](\d{1,3})\s*$`)
|
||||||
|
|
||||||
|
func parseSRT(contents []byte) (model.LyricList, error) {
|
||||||
|
raw := strings.ReplaceAll(string(contents), "\r\n", "\n")
|
||||||
|
raw = strings.ReplaceAll(raw, "\r", "\n")
|
||||||
|
|
||||||
|
blocks := splitSRTBlocks(raw)
|
||||||
|
lines := make([]model.Line, 0, len(blocks))
|
||||||
|
|
||||||
|
for _, block := range blocks {
|
||||||
|
line, ok, err := parseSRTBlock(block)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if ok {
|
||||||
|
lines = append(lines, line)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(lines) == 0 {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
lyrics := model.NormalizeLyrics(model.Lyrics{
|
||||||
|
Lang: "xxx",
|
||||||
|
Line: lines,
|
||||||
|
Synced: true,
|
||||||
|
})
|
||||||
|
return model.LyricList{lyrics}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func splitSRTBlocks(raw string) []string {
|
||||||
|
raw = strings.TrimSpace(raw)
|
||||||
|
if raw == "" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
parts := strings.Split(raw, "\n\n")
|
||||||
|
blocks := make([]string, 0, len(parts))
|
||||||
|
for _, part := range parts {
|
||||||
|
part = strings.TrimSpace(part)
|
||||||
|
if part != "" {
|
||||||
|
blocks = append(blocks, part)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return blocks
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseSRTBlock(block string) (model.Line, bool, error) {
|
||||||
|
scanner := bytes.Split([]byte(block), []byte("\n"))
|
||||||
|
if len(scanner) == 0 {
|
||||||
|
return model.Line{}, false, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
lines := make([]string, 0, len(scanner))
|
||||||
|
for _, line := range scanner {
|
||||||
|
lines = append(lines, strings.TrimSpace(string(line)))
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(lines) == 0 {
|
||||||
|
return model.Line{}, false, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
startIdx := 0
|
||||||
|
if digitsOnly(lines[0]) {
|
||||||
|
startIdx = 1
|
||||||
|
}
|
||||||
|
if startIdx >= len(lines) {
|
||||||
|
return model.Line{}, false, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
timing := strings.Split(lines[startIdx], "-->")
|
||||||
|
if len(timing) != 2 {
|
||||||
|
return model.Line{}, false, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
startMs, err := parseSRTTime(timing[0])
|
||||||
|
if err != nil {
|
||||||
|
return model.Line{}, false, err
|
||||||
|
}
|
||||||
|
endMs, err := parseSRTTime(timing[1])
|
||||||
|
if err != nil {
|
||||||
|
return model.Line{}, false, err
|
||||||
|
}
|
||||||
|
|
||||||
|
textLines := make([]string, 0, len(lines)-startIdx-1)
|
||||||
|
for _, line := range lines[startIdx+1:] {
|
||||||
|
if line == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
textLines = append(textLines, line)
|
||||||
|
}
|
||||||
|
|
||||||
|
value := str.SanitizeText(strings.Join(textLines, "\n"))
|
||||||
|
if value == "" {
|
||||||
|
return model.Line{}, false, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return model.Line{
|
||||||
|
Start: &startMs,
|
||||||
|
End: &endMs,
|
||||||
|
Value: value,
|
||||||
|
}, true, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseSRTTime(value string) (int64, error) {
|
||||||
|
match := srtTimeRegex.FindStringSubmatch(strings.TrimSpace(value))
|
||||||
|
if match == nil {
|
||||||
|
return 0, strconv.ErrSyntax
|
||||||
|
}
|
||||||
|
|
||||||
|
hours, err := strconv.ParseInt(match[1], 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
minutes, err := strconv.ParseInt(match[2], 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
seconds, err := strconv.ParseInt(match[3], 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
millis, err := strconv.ParseInt(match[4], 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
switch len(match[4]) {
|
||||||
|
case 1:
|
||||||
|
millis *= 100
|
||||||
|
case 2:
|
||||||
|
millis *= 10
|
||||||
|
}
|
||||||
|
|
||||||
|
return (((hours*60)+minutes)*60+seconds)*1000 + millis, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func digitsOnly(value string) bool {
|
||||||
|
if value == "" {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
for _, ch := range value {
|
||||||
|
if ch < '0' || ch > '9' {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
@ -46,6 +46,7 @@ type ttmlTimingParams struct {
|
|||||||
type ttmlTimingContext struct {
|
type ttmlTimingContext struct {
|
||||||
lang string
|
lang string
|
||||||
role string
|
role string
|
||||||
|
agentID string
|
||||||
begin int64
|
begin int64
|
||||||
hasBegin bool
|
hasBegin bool
|
||||||
end int64
|
end int64
|
||||||
@ -70,6 +71,12 @@ type ttmlResolvedMetadataLine struct {
|
|||||||
line model.Line
|
line model.Line
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type ttmlDefinedAgent struct {
|
||||||
|
ID string
|
||||||
|
Type string
|
||||||
|
Name string
|
||||||
|
}
|
||||||
|
|
||||||
type ttmlParser struct {
|
type ttmlParser struct {
|
||||||
decoder *xml.Decoder
|
decoder *xml.Decoder
|
||||||
params ttmlTimingParams
|
params ttmlTimingParams
|
||||||
@ -86,6 +93,8 @@ type ttmlParser struct {
|
|||||||
pronunciationLangOrder []string
|
pronunciationLangOrder []string
|
||||||
pronunciationEntriesByLg map[string][]ttmlMetadataEntry
|
pronunciationEntriesByLg map[string][]ttmlMetadataEntry
|
||||||
|
|
||||||
|
definedAgents map[string]ttmlDefinedAgent
|
||||||
|
|
||||||
metadataSeq int
|
metadataSeq int
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -103,6 +112,7 @@ func parseTTML(contents []byte) (model.LyricList, error) {
|
|||||||
mainLineRefsByKey: make(map[string]ttmlLineRef),
|
mainLineRefsByKey: make(map[string]ttmlLineRef),
|
||||||
translationEntriesByLg: make(map[string][]ttmlMetadataEntry),
|
translationEntriesByLg: make(map[string][]ttmlMetadataEntry),
|
||||||
pronunciationEntriesByLg: make(map[string][]ttmlMetadataEntry),
|
pronunciationEntriesByLg: make(map[string][]ttmlMetadataEntry),
|
||||||
|
definedAgents: make(map[string]ttmlDefinedAgent),
|
||||||
}
|
}
|
||||||
|
|
||||||
root := ttmlTimingContext{lang: "xxx"}
|
root := ttmlTimingContext{lang: "xxx"}
|
||||||
@ -140,6 +150,8 @@ func (p *ttmlParser) parseElement(start xml.StartElement, parent ttmlTimingConte
|
|||||||
return p.parseMetadataTrack(start, parent, ttmlLyricKindTranslation)
|
return p.parseMetadataTrack(start, parent, ttmlLyricKindTranslation)
|
||||||
case "transliteration":
|
case "transliteration":
|
||||||
return p.parseMetadataTrack(start, parent, ttmlLyricKindPronunciation)
|
return p.parseMetadataTrack(start, parent, ttmlLyricKindPronunciation)
|
||||||
|
case "agent":
|
||||||
|
return p.parseAgentDefinition(start)
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx := p.childContext(start.Attr, parent)
|
ctx := p.childContext(start.Attr, parent)
|
||||||
@ -234,6 +246,49 @@ func (p *ttmlParser) parseMetadataTrack(start xml.StartElement, parent ttmlTimin
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (p *ttmlParser) parseAgentDefinition(start xml.StartElement) error {
|
||||||
|
id, ok := attrValue(start.Attr, "id")
|
||||||
|
id = strings.TrimSpace(id)
|
||||||
|
if !ok || id == "" {
|
||||||
|
return p.skipElement(start)
|
||||||
|
}
|
||||||
|
|
||||||
|
agent := ttmlDefinedAgent{
|
||||||
|
ID: id,
|
||||||
|
Type: strings.ToLower(strings.TrimSpace(attrOrEmpty(start.Attr, "type"))),
|
||||||
|
}
|
||||||
|
|
||||||
|
for {
|
||||||
|
token, err := p.decoder.Token()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
switch t := token.(type) {
|
||||||
|
case xml.StartElement:
|
||||||
|
if strings.EqualFold(t.Name.Local, "name") {
|
||||||
|
name, err := p.collectElementText(t)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
name = sanitizeTTMLText(name)
|
||||||
|
if name != "" && agent.Name == "" {
|
||||||
|
agent.Name = name
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if err := p.skipElement(t); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
case xml.EndElement:
|
||||||
|
if strings.EqualFold(t.Name.Local, start.Name.Local) {
|
||||||
|
p.definedAgents[agent.ID] = agent
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func (p *ttmlParser) parseMetadataText(start xml.StartElement, parent ttmlTimingContext) (ttmlMetadataEntry, bool, error) {
|
func (p *ttmlParser) parseMetadataText(start xml.StartElement, parent ttmlTimingContext) (ttmlMetadataEntry, bool, error) {
|
||||||
forKey, hasFor := attrValue(start.Attr, "for")
|
forKey, hasFor := attrValue(start.Attr, "for")
|
||||||
forKey = strings.TrimSpace(forKey)
|
forKey = strings.TrimSpace(forKey)
|
||||||
@ -338,8 +393,8 @@ func (p *ttmlParser) parseInlineElement(start xml.StartElement, parent ttmlTimin
|
|||||||
tokenText := sanitizeTTMLText(value)
|
tokenText := sanitizeTTMLText(value)
|
||||||
if local == "span" && hasOwnTiming && !ctx.invalid && tokenText != "" && len(tokens) == 0 {
|
if local == "span" && hasOwnTiming && !ctx.invalid && tokenText != "" && len(tokens) == 0 {
|
||||||
parsedToken := model.Cue{
|
parsedToken := model.Cue{
|
||||||
Value: tokenText,
|
Value: tokenText,
|
||||||
Role: ctx.role,
|
AgentID: p.resolveCueAgentID(ctx),
|
||||||
}
|
}
|
||||||
if ctx.hasBegin {
|
if ctx.hasBegin {
|
||||||
startMs := ctx.begin
|
startMs := ctx.begin
|
||||||
@ -366,12 +421,12 @@ func (p *ttmlParser) toLyricList() model.LyricList {
|
|||||||
if len(lines) == 0 {
|
if len(lines) == 0 {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
res = append(res, model.Lyrics{
|
res = append(res, p.finalizeLyrics(model.Lyrics{
|
||||||
Kind: ttmlLyricKindMain,
|
Kind: ttmlLyricKindMain,
|
||||||
Lang: lang,
|
Lang: lang,
|
||||||
Line: lines,
|
Line: lines,
|
||||||
Synced: linesAreSynced(lines),
|
Synced: linesAreSynced(lines),
|
||||||
})
|
}))
|
||||||
}
|
}
|
||||||
|
|
||||||
res = append(res, p.buildMetadataLyrics(ttmlLyricKindTranslation, p.translationLangOrder, p.translationEntriesByLg)...)
|
res = append(res, p.buildMetadataLyrics(ttmlLyricKindTranslation, p.translationLangOrder, p.translationEntriesByLg)...)
|
||||||
@ -440,17 +495,168 @@ func (p *ttmlParser) buildMetadataLyrics(kind string, langOrder []string, entrie
|
|||||||
lines[i] = resolved[i].line
|
lines[i] = resolved[i].line
|
||||||
}
|
}
|
||||||
|
|
||||||
res = append(res, model.Lyrics{
|
res = append(res, p.finalizeLyrics(model.Lyrics{
|
||||||
Kind: kind,
|
Kind: kind,
|
||||||
Lang: lang,
|
Lang: lang,
|
||||||
Line: lines,
|
Line: lines,
|
||||||
Synced: linesAreSynced(lines),
|
Synced: linesAreSynced(lines),
|
||||||
})
|
}))
|
||||||
}
|
}
|
||||||
|
|
||||||
return res
|
return res
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (p *ttmlParser) finalizeLyrics(lyrics model.Lyrics) model.Lyrics {
|
||||||
|
lyrics.Line = model.NormalizeCueLines(lyrics.Line)
|
||||||
|
lyrics.Line, lyrics.Agents = p.resolveAgents(lyrics.Line)
|
||||||
|
return model.NormalizeLyrics(lyrics)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *ttmlParser) resolveAgents(lines []model.Line) ([]model.Line, []model.Agent) {
|
||||||
|
if len(lines) == 0 {
|
||||||
|
return lines, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
normalized := model.NormalizeCueLines(lines)
|
||||||
|
usedOrder := make([]string, 0, 4)
|
||||||
|
usedSet := make(map[string]struct{}, 4)
|
||||||
|
sawEmptyCue := false
|
||||||
|
|
||||||
|
for i := range normalized {
|
||||||
|
for j := range normalized[i].Cue {
|
||||||
|
agentID := strings.TrimSpace(normalized[i].Cue[j].AgentID)
|
||||||
|
if agentID == "" {
|
||||||
|
sawEmptyCue = true
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if _, exists := usedSet[agentID]; !exists {
|
||||||
|
usedSet[agentID] = struct{}{}
|
||||||
|
usedOrder = append(usedOrder, agentID)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(usedOrder) == 0 {
|
||||||
|
return normalized, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
mainID := ""
|
||||||
|
for _, agentID := range usedOrder {
|
||||||
|
role := p.baseRoleForAgent(agentID)
|
||||||
|
if role != "bg" && role != "group" {
|
||||||
|
mainID = agentID
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if mainID == "" && sawEmptyCue {
|
||||||
|
mainID = "main"
|
||||||
|
}
|
||||||
|
if mainID == "" {
|
||||||
|
for _, agentID := range usedOrder {
|
||||||
|
if p.baseRoleForAgent(agentID) != "bg" {
|
||||||
|
mainID = agentID
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if mainID == "" {
|
||||||
|
mainID = usedOrder[0]
|
||||||
|
}
|
||||||
|
|
||||||
|
if _, exists := usedSet[mainID]; !exists {
|
||||||
|
usedSet[mainID] = struct{}{}
|
||||||
|
usedOrder = append([]string{mainID}, usedOrder...)
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := range normalized {
|
||||||
|
for j := range normalized[i].Cue {
|
||||||
|
if strings.TrimSpace(normalized[i].Cue[j].AgentID) == "" {
|
||||||
|
normalized[i].Cue[j].AgentID = mainID
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
agents := make([]model.Agent, 0, len(usedOrder))
|
||||||
|
for _, agentID := range usedOrder {
|
||||||
|
role := p.baseRoleForAgent(agentID)
|
||||||
|
if agentID == mainID {
|
||||||
|
role = "main"
|
||||||
|
}
|
||||||
|
agent := model.Agent{
|
||||||
|
ID: agentID,
|
||||||
|
Role: role,
|
||||||
|
Name: p.agentNameForID(agentID),
|
||||||
|
}
|
||||||
|
agents = append(agents, agent)
|
||||||
|
}
|
||||||
|
|
||||||
|
return normalized, agents
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *ttmlParser) resolveCueAgentID(ctx ttmlTimingContext) string {
|
||||||
|
agentID := strings.TrimSpace(ctx.agentID)
|
||||||
|
if contextHasRole(ctx.role, "x-bg") {
|
||||||
|
if agentID == "" {
|
||||||
|
agentID = "main"
|
||||||
|
}
|
||||||
|
return backgroundAgentID(agentID)
|
||||||
|
}
|
||||||
|
return agentID
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *ttmlParser) baseRoleForAgent(agentID string) string {
|
||||||
|
if isBackgroundAgentID(agentID) {
|
||||||
|
return "bg"
|
||||||
|
}
|
||||||
|
|
||||||
|
if agent, ok := p.definedAgents[agentID]; ok {
|
||||||
|
switch agent.Type {
|
||||||
|
case "group":
|
||||||
|
return "group"
|
||||||
|
default:
|
||||||
|
return "voice"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return "voice"
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *ttmlParser) agentNameForID(agentID string) string {
|
||||||
|
if isBackgroundAgentID(agentID) {
|
||||||
|
baseID := strings.TrimSuffix(agentID, "__bg")
|
||||||
|
if baseID == "main" {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
if agent, ok := p.definedAgents[baseID]; ok {
|
||||||
|
return agent.Name
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
if agent, ok := p.definedAgents[agentID]; ok {
|
||||||
|
return agent.Name
|
||||||
|
}
|
||||||
|
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
func backgroundAgentID(agentID string) string {
|
||||||
|
return agentID + "__bg"
|
||||||
|
}
|
||||||
|
|
||||||
|
func isBackgroundAgentID(agentID string) bool {
|
||||||
|
return strings.HasSuffix(agentID, "__bg")
|
||||||
|
}
|
||||||
|
|
||||||
|
func contextHasRole(roles string, role string) bool {
|
||||||
|
for _, candidate := range strings.Fields(strings.ToLower(roles)) {
|
||||||
|
if candidate == strings.ToLower(role) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
func (p *ttmlParser) addMainLine(lang string, lineKey string, line model.Line) {
|
func (p *ttmlParser) addMainLine(lang string, lineKey string, line model.Line) {
|
||||||
lang = normalizeTTMLLang(lang)
|
lang = normalizeTTMLLang(lang)
|
||||||
if _, ok := p.mainLinesByLang[lang]; !ok {
|
if _, ok := p.mainLinesByLang[lang]; !ok {
|
||||||
@ -495,6 +701,9 @@ func (p *ttmlParser) childContext(attrs []xml.Attr, parent ttmlTimingContext) tt
|
|||||||
if lang, ok := attrValue(attrs, "lang"); ok {
|
if lang, ok := attrValue(attrs, "lang"); ok {
|
||||||
ctx.lang = normalizeTTMLLang(lang)
|
ctx.lang = normalizeTTMLLang(lang)
|
||||||
}
|
}
|
||||||
|
if agentID, ok := attrValue(attrs, "agent"); ok {
|
||||||
|
ctx.agentID = strings.TrimSpace(agentID)
|
||||||
|
}
|
||||||
if role, ok := attrValue(attrs, "role"); ok {
|
if role, ok := attrValue(attrs, "role"); ok {
|
||||||
role = strings.TrimSpace(role)
|
role = strings.TrimSpace(role)
|
||||||
if role != "" {
|
if role != "" {
|
||||||
@ -805,6 +1014,55 @@ func attrValue(attrs []xml.Attr, key string) (string, bool) {
|
|||||||
return "", false
|
return "", false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func attrOrEmpty(attrs []xml.Attr, key string) string {
|
||||||
|
value, _ := attrValue(attrs, key)
|
||||||
|
return value
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *ttmlParser) collectElementText(start xml.StartElement) (string, error) {
|
||||||
|
var text strings.Builder
|
||||||
|
|
||||||
|
for {
|
||||||
|
token, err := p.decoder.Token()
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
switch t := token.(type) {
|
||||||
|
case xml.StartElement:
|
||||||
|
value, err := p.collectElementText(t)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
text.WriteString(value)
|
||||||
|
case xml.EndElement:
|
||||||
|
if strings.EqualFold(t.Name.Local, start.Name.Local) {
|
||||||
|
return text.String(), nil
|
||||||
|
}
|
||||||
|
case xml.CharData:
|
||||||
|
text.WriteString(string(t))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *ttmlParser) skipElement(_ xml.StartElement) error {
|
||||||
|
depth := 1
|
||||||
|
for depth > 0 {
|
||||||
|
token, err := p.decoder.Token()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
switch token.(type) {
|
||||||
|
case xml.StartElement:
|
||||||
|
depth++
|
||||||
|
case xml.EndElement:
|
||||||
|
depth--
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
func normalizeTTMLLang(lang string) string {
|
func normalizeTTMLLang(lang string) string {
|
||||||
lang = strings.ToLower(strings.TrimSpace(lang))
|
lang = strings.ToLower(strings.TrimSpace(lang))
|
||||||
if lang == "" {
|
if lang == "" {
|
||||||
@ -840,42 +1098,7 @@ func linesAreSynced(lines []model.Line) bool {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func hydrateLineTimingFromTokens(line model.Line) model.Line {
|
func hydrateLineTimingFromTokens(line model.Line) model.Line {
|
||||||
if len(line.Cue) == 0 {
|
return model.NormalizeLineTiming(line)
|
||||||
return line
|
|
||||||
}
|
|
||||||
|
|
||||||
var earliestStart *int64
|
|
||||||
var latestEnd *int64
|
|
||||||
for i := range line.Cue {
|
|
||||||
token := line.Cue[i]
|
|
||||||
if token.Start != nil {
|
|
||||||
if earliestStart == nil || *token.Start < *earliestStart {
|
|
||||||
v := *token.Start
|
|
||||||
earliestStart = &v
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
candidateEnd := token.End
|
|
||||||
if candidateEnd == nil {
|
|
||||||
candidateEnd = token.Start
|
|
||||||
}
|
|
||||||
if candidateEnd != nil {
|
|
||||||
if latestEnd == nil || *candidateEnd > *latestEnd {
|
|
||||||
v := *candidateEnd
|
|
||||||
latestEnd = &v
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if line.Start == nil && earliestStart != nil {
|
|
||||||
v := *earliestStart
|
|
||||||
line.Start = &v
|
|
||||||
}
|
|
||||||
if line.End == nil && latestEnd != nil {
|
|
||||||
v := *latestEnd
|
|
||||||
line.End = &v
|
|
||||||
}
|
|
||||||
return line
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func max(v float64, fallback float64) float64 {
|
func max(v float64, fallback float64) float64 {
|
||||||
|
|||||||
@ -129,6 +129,10 @@ var _ = Describe("parseTTML", func() {
|
|||||||
list, err := parseTTML(content)
|
list, err := parseTTML(content)
|
||||||
Expect(err).ToNot(HaveOccurred())
|
Expect(err).ToNot(HaveOccurred())
|
||||||
Expect(list).To(HaveLen(1))
|
Expect(list).To(HaveLen(1))
|
||||||
|
Expect(list[0].Agents).To(Equal([]model.Agent{
|
||||||
|
{ID: "main", Role: "main"},
|
||||||
|
{ID: "main__bg", Role: "bg"},
|
||||||
|
}))
|
||||||
Expect(list[0].Line).To(HaveLen(1))
|
Expect(list[0].Line).To(HaveLen(1))
|
||||||
|
|
||||||
line := list[0].Line[0]
|
line := list[0].Line[0]
|
||||||
@ -137,9 +141,41 @@ var _ = Describe("parseTTML", func() {
|
|||||||
Expect(line.End).To(Equal(gg.P(int64(3000))))
|
Expect(line.End).To(Equal(gg.P(int64(3000))))
|
||||||
Expect(line.Cue).To(HaveLen(3))
|
Expect(line.Cue).To(HaveLen(3))
|
||||||
|
|
||||||
Expect(line.Cue[0]).To(Equal(model.Cue{Start: gg.P(int64(1000)), End: gg.P(int64(1400)), Value: "He"}))
|
Expect(line.Cue[0]).To(Equal(model.Cue{Start: gg.P(int64(1000)), End: gg.P(int64(1400)), Value: "He", AgentID: "main"}))
|
||||||
Expect(line.Cue[1]).To(Equal(model.Cue{Start: gg.P(int64(1400)), End: gg.P(int64(1800)), Value: "llo"}))
|
Expect(line.Cue[1]).To(Equal(model.Cue{Start: gg.P(int64(1400)), End: gg.P(int64(1800)), Value: "llo", AgentID: "main"}))
|
||||||
Expect(line.Cue[2]).To(Equal(model.Cue{Start: gg.P(int64(2000)), End: gg.P(int64(2500)), Value: "echo", Role: "x-bg"}))
|
Expect(line.Cue[2]).To(Equal(model.Cue{Start: gg.P(int64(2000)), End: gg.P(int64(2500)), Value: "echo", AgentID: "main__bg"}))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("should parse named TTML agents into main, voice, and group roles", func() {
|
||||||
|
content := []byte(`<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<tt xmlns="http://www.w3.org/ns/ttml" xmlns:ttm="http://www.w3.org/ns/ttml#metadata">
|
||||||
|
<head>
|
||||||
|
<metadata>
|
||||||
|
<ttm:agent xml:id="v1" type="person"><ttm:name>Chris Martin</ttm:name></ttm:agent>
|
||||||
|
<ttm:agent xml:id="v2" type="person"><ttm:name>Jin</ttm:name></ttm:agent>
|
||||||
|
<ttm:agent xml:id="v1000" type="group"><ttm:name>All</ttm:name></ttm:agent>
|
||||||
|
</metadata>
|
||||||
|
</head>
|
||||||
|
<body xml:lang="eng">
|
||||||
|
<div>
|
||||||
|
<p begin="1s" end="2s" ttm:agent="v1"><span begin="1s" end="1.5s">You</span></p>
|
||||||
|
<p begin="2s" end="3s" ttm:agent="v2"><span begin="2s" end="2.5s">and</span></p>
|
||||||
|
<p begin="3s" end="4s" ttm:agent="v1000"><span begin="3s" end="3.5s">All</span></p>
|
||||||
|
</div>
|
||||||
|
</body>
|
||||||
|
</tt>`)
|
||||||
|
|
||||||
|
list, err := parseTTML(content)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
Expect(list).To(HaveLen(1))
|
||||||
|
Expect(list[0].Agents).To(Equal([]model.Agent{
|
||||||
|
{ID: "v1", Role: "main", Name: "Chris Martin"},
|
||||||
|
{ID: "v2", Role: "voice", Name: "Jin"},
|
||||||
|
{ID: "v1000", Role: "group", Name: "All"},
|
||||||
|
}))
|
||||||
|
Expect(list[0].Line[0].Cue[0].AgentID).To(Equal("v1"))
|
||||||
|
Expect(list[0].Line[1].Cue[0].AgentID).To(Equal("v2"))
|
||||||
|
Expect(list[0].Line[2].Cue[0].AgentID).To(Equal("v1000"))
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|||||||
160
model/lyrics.go
160
model/lyrics.go
@ -12,10 +12,16 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
type Cue struct {
|
type Cue struct {
|
||||||
Start *int64 `structs:"start,omitempty" json:"start,omitempty"`
|
Start *int64 `structs:"start,omitempty" json:"start,omitempty"`
|
||||||
End *int64 `structs:"end,omitempty" json:"end,omitempty"`
|
End *int64 `structs:"end,omitempty" json:"end,omitempty"`
|
||||||
Value string `structs:"value" json:"value"`
|
Value string `structs:"value" json:"value"`
|
||||||
Role string `structs:"role,omitempty" json:"role,omitempty"`
|
AgentID string `structs:"agentId,omitempty" json:"agentId,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type Agent struct {
|
||||||
|
ID string `structs:"id" json:"id"`
|
||||||
|
Role string `structs:"role" json:"role"`
|
||||||
|
Name string `structs:"name,omitempty" json:"name,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type Line struct {
|
type Line struct {
|
||||||
@ -26,13 +32,14 @@ type Line struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type Lyrics struct {
|
type Lyrics struct {
|
||||||
DisplayArtist string `structs:"displayArtist,omitempty" json:"displayArtist,omitempty"`
|
DisplayArtist string `structs:"displayArtist,omitempty" json:"displayArtist,omitempty"`
|
||||||
DisplayTitle string `structs:"displayTitle,omitempty" json:"displayTitle,omitempty"`
|
DisplayTitle string `structs:"displayTitle,omitempty" json:"displayTitle,omitempty"`
|
||||||
Kind string `structs:"kind,omitempty" json:"kind,omitempty"`
|
Kind string `structs:"kind,omitempty" json:"kind,omitempty"`
|
||||||
Lang string `structs:"lang" json:"lang"`
|
Lang string `structs:"lang" json:"lang"`
|
||||||
Line []Line `structs:"line" json:"line"`
|
Agents []Agent `structs:"agents,omitempty" json:"agents,omitempty"`
|
||||||
Offset *int64 `structs:"offset,omitempty" json:"offset,omitempty"`
|
Line []Line `structs:"line" json:"line"`
|
||||||
Synced bool `structs:"synced" json:"synced"`
|
Offset *int64 `structs:"offset,omitempty" json:"offset,omitempty"`
|
||||||
|
Synced bool `structs:"synced" json:"synced"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// support the standard [mm:ss.mm], as well as [hh:*] and [*.mmm]
|
// support the standard [mm:ss.mm], as well as [hh:*] and [*.mmm]
|
||||||
@ -199,7 +206,7 @@ func ToLyrics(language, text string) (*Lyrics, error) {
|
|||||||
DisplayArtist: artist,
|
DisplayArtist: artist,
|
||||||
DisplayTitle: title,
|
DisplayTitle: title,
|
||||||
Lang: language,
|
Lang: language,
|
||||||
Line: structuredLines,
|
Line: NormalizeCueLines(structuredLines),
|
||||||
Offset: offset,
|
Offset: offset,
|
||||||
Synced: synced,
|
Synced: synced,
|
||||||
}
|
}
|
||||||
@ -265,11 +272,6 @@ func parseEnhancedCues(text string) []Cue {
|
|||||||
Start: &start,
|
Start: &start,
|
||||||
Value: seg.text,
|
Value: seg.text,
|
||||||
}
|
}
|
||||||
// Derive End from the next cue's Start
|
|
||||||
if i+1 < len(segments) {
|
|
||||||
end := segments[i+1].start
|
|
||||||
cues[i].End = &end
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return cues
|
return cues
|
||||||
}
|
}
|
||||||
@ -338,3 +340,127 @@ func parseTime(line string, match []int) (int64, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type LyricList []Lyrics
|
type LyricList []Lyrics
|
||||||
|
|
||||||
|
func NormalizeLyrics(lyrics Lyrics) Lyrics {
|
||||||
|
lyrics.Line = NormalizeCueLines(lyrics.Line)
|
||||||
|
if len(lyrics.Agents) == 0 {
|
||||||
|
lyrics.Agents = nil
|
||||||
|
}
|
||||||
|
return lyrics
|
||||||
|
}
|
||||||
|
|
||||||
|
func NormalizeCueLines(lines []Line) []Line {
|
||||||
|
if len(lines) == 0 {
|
||||||
|
return lines
|
||||||
|
}
|
||||||
|
|
||||||
|
normalized := make([]Line, len(lines))
|
||||||
|
copy(normalized, lines)
|
||||||
|
|
||||||
|
for i := range normalized {
|
||||||
|
var fallbackEnd *int64
|
||||||
|
if normalized[i].End != nil {
|
||||||
|
v := *normalized[i].End
|
||||||
|
fallbackEnd = &v
|
||||||
|
} else if i+1 < len(normalized) && normalized[i+1].Start != nil {
|
||||||
|
v := *normalized[i+1].Start
|
||||||
|
fallbackEnd = &v
|
||||||
|
}
|
||||||
|
|
||||||
|
normalized[i] = normalizeCueLine(normalized[i], fallbackEnd)
|
||||||
|
}
|
||||||
|
|
||||||
|
return normalized
|
||||||
|
}
|
||||||
|
|
||||||
|
func NormalizeLineTiming(line Line) Line {
|
||||||
|
if len(line.Cue) == 0 {
|
||||||
|
return line
|
||||||
|
}
|
||||||
|
|
||||||
|
var earliestStart *int64
|
||||||
|
var latestEnd *int64
|
||||||
|
for i := range line.Cue {
|
||||||
|
token := line.Cue[i]
|
||||||
|
if token.Start != nil {
|
||||||
|
if earliestStart == nil || *token.Start < *earliestStart {
|
||||||
|
v := *token.Start
|
||||||
|
earliestStart = &v
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
candidateEnd := token.End
|
||||||
|
if candidateEnd == nil {
|
||||||
|
candidateEnd = token.Start
|
||||||
|
}
|
||||||
|
if candidateEnd != nil {
|
||||||
|
if latestEnd == nil || *candidateEnd > *latestEnd {
|
||||||
|
v := *candidateEnd
|
||||||
|
latestEnd = &v
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if line.Start == nil && earliestStart != nil {
|
||||||
|
v := *earliestStart
|
||||||
|
line.Start = &v
|
||||||
|
}
|
||||||
|
if line.End == nil && latestEnd != nil {
|
||||||
|
v := *latestEnd
|
||||||
|
line.End = &v
|
||||||
|
}
|
||||||
|
return line
|
||||||
|
}
|
||||||
|
|
||||||
|
func normalizeCueLine(line Line, fallbackEnd *int64) Line {
|
||||||
|
if len(line.Cue) == 0 {
|
||||||
|
return line
|
||||||
|
}
|
||||||
|
|
||||||
|
hasAnyEnd := false
|
||||||
|
for i := range line.Cue {
|
||||||
|
if line.Cue[i].End != nil {
|
||||||
|
hasAnyEnd = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !hasAnyEnd {
|
||||||
|
line.Cue = clearCueEnds(line.Cue)
|
||||||
|
return NormalizeLineTiming(line)
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := range line.Cue {
|
||||||
|
if line.Cue[i].End != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if i+1 < len(line.Cue) && line.Cue[i+1].Start != nil {
|
||||||
|
v := *line.Cue[i+1].Start
|
||||||
|
line.Cue[i].End = &v
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if fallbackEnd != nil {
|
||||||
|
v := *fallbackEnd
|
||||||
|
line.Cue[i].End = &v
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := range line.Cue {
|
||||||
|
if line.Cue[i].End == nil {
|
||||||
|
line.Cue = clearCueEnds(line.Cue)
|
||||||
|
return NormalizeLineTiming(line)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return NormalizeLineTiming(line)
|
||||||
|
}
|
||||||
|
|
||||||
|
func clearCueEnds(cues []Cue) []Cue {
|
||||||
|
normalized := make([]Cue, len(cues))
|
||||||
|
copy(normalized, cues)
|
||||||
|
for i := range normalized {
|
||||||
|
normalized[i].End = nil
|
||||||
|
}
|
||||||
|
return normalized
|
||||||
|
}
|
||||||
|
|||||||
@ -129,8 +129,8 @@ var _ = Describe("ToLyrics", func() {
|
|||||||
Expect(line0.Start).To(Equal(&t1000))
|
Expect(line0.Start).To(Equal(&t1000))
|
||||||
Expect(line0.Value).To(Equal("Some lyrics here"))
|
Expect(line0.Value).To(Equal("Some lyrics here"))
|
||||||
Expect(line0.Cue).To(Equal([]Cue{
|
Expect(line0.Cue).To(Equal([]Cue{
|
||||||
{Start: &t1000, End: &t1500, Value: "Some "},
|
{Start: &t1000, Value: "Some "},
|
||||||
{Start: &t1500, End: &t2000, Value: "lyrics "},
|
{Start: &t1500, Value: "lyrics "},
|
||||||
{Start: &t2000, Value: "here"},
|
{Start: &t2000, Value: "here"},
|
||||||
}))
|
}))
|
||||||
|
|
||||||
@ -138,7 +138,7 @@ var _ = Describe("ToLyrics", func() {
|
|||||||
Expect(line1.Start).To(Equal(&t3000))
|
Expect(line1.Start).To(Equal(&t3000))
|
||||||
Expect(line1.Value).To(Equal("More words"))
|
Expect(line1.Value).To(Equal("More words"))
|
||||||
Expect(line1.Cue).To(Equal([]Cue{
|
Expect(line1.Cue).To(Equal([]Cue{
|
||||||
{Start: &t3000, End: &t3500, Value: "More "},
|
{Start: &t3000, Value: "More "},
|
||||||
{Start: &t3500, Value: "words"},
|
{Start: &t3500, Value: "words"},
|
||||||
}))
|
}))
|
||||||
})
|
})
|
||||||
@ -161,7 +161,7 @@ var _ = Describe("ToLyrics", func() {
|
|||||||
t1000, t1500, t5000, t5500 := int64(1000), int64(1500), int64(5000), int64(5500)
|
t1000, t1500, t5000, t5500 := int64(1000), int64(1500), int64(5000), int64(5500)
|
||||||
|
|
||||||
Expect(lyrics.Line[0].Cue).To(Equal([]Cue{
|
Expect(lyrics.Line[0].Cue).To(Equal([]Cue{
|
||||||
{Start: &t1000, End: &t1500, Value: "Some "},
|
{Start: &t1000, Value: "Some "},
|
||||||
{Start: &t1500, Value: "lyrics"},
|
{Start: &t1500, Value: "lyrics"},
|
||||||
}))
|
}))
|
||||||
Expect(lyrics.Line[0].Value).To(Equal("Some lyrics"))
|
Expect(lyrics.Line[0].Value).To(Equal("Some lyrics"))
|
||||||
@ -170,7 +170,7 @@ var _ = Describe("ToLyrics", func() {
|
|||||||
Expect(lyrics.Line[1].Value).To(Equal("Plain line"))
|
Expect(lyrics.Line[1].Value).To(Equal("Plain line"))
|
||||||
|
|
||||||
Expect(lyrics.Line[2].Cue).To(Equal([]Cue{
|
Expect(lyrics.Line[2].Cue).To(Equal([]Cue{
|
||||||
{Start: &t5000, End: &t5500, Value: "More "},
|
{Start: &t5000, Value: "More "},
|
||||||
{Start: &t5500, Value: "words"},
|
{Start: &t5500, Value: "words"},
|
||||||
}))
|
}))
|
||||||
Expect(lyrics.Line[2].Value).To(Equal("More words"))
|
Expect(lyrics.Line[2].Value).To(Equal("More words"))
|
||||||
|
|||||||
@ -476,14 +476,22 @@ func mapExplicitStatus(explicitStatus string) string {
|
|||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
|
|
||||||
// sanitizeRole strips the TTML x- prefix from role values for the API.
|
|
||||||
func sanitizeRole(role string) string {
|
|
||||||
return strings.TrimPrefix(role, "x-")
|
|
||||||
}
|
|
||||||
|
|
||||||
func buildStructuredLyric(mf *model.MediaFile, lyrics model.Lyrics, enhanced bool) responses.StructuredLyric {
|
func buildStructuredLyric(mf *model.MediaFile, lyrics model.Lyrics, enhanced bool) responses.StructuredLyric {
|
||||||
lines := make([]responses.Line, len(lyrics.Line))
|
lines := make([]responses.Line, len(lyrics.Line))
|
||||||
var cueLines []responses.CueLine
|
var cueLines []responses.CueLine
|
||||||
|
agentOrderByID := make(map[string]int, len(lyrics.Agents))
|
||||||
|
agentRoleByID := make(map[string]string, len(lyrics.Agents))
|
||||||
|
responseAgents := make([]responses.Agent, 0, len(lyrics.Agents))
|
||||||
|
|
||||||
|
for i, agent := range lyrics.Agents {
|
||||||
|
agentOrderByID[agent.ID] = i
|
||||||
|
agentRoleByID[agent.ID] = agent.Role
|
||||||
|
responseAgents = append(responseAgents, responses.Agent{
|
||||||
|
ID: agent.ID,
|
||||||
|
Role: agent.Role,
|
||||||
|
Name: agent.Name,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
for i, line := range lyrics.Line {
|
for i, line := range lyrics.Line {
|
||||||
lines[i] = responses.Line{
|
lines[i] = responses.Line{
|
||||||
@ -494,41 +502,50 @@ func buildStructuredLyric(mf *model.MediaFile, lyrics model.Lyrics, enhanced boo
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
// Group cues by role, preserving order of first appearance
|
agentOrder := make([]string, 0, 2)
|
||||||
roleOrder := make([]string, 0, 2)
|
cuesByAgent := make(map[string][]model.Cue)
|
||||||
cuesByRole := make(map[string][]responses.LyricCue)
|
|
||||||
for _, cue := range line.Cue {
|
for _, cue := range line.Cue {
|
||||||
if cue.Start == nil {
|
if cue.Start == nil {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
role := sanitizeRole(cue.Role)
|
agentID := strings.TrimSpace(cue.AgentID)
|
||||||
if _, exists := cuesByRole[role]; !exists {
|
if _, exists := cuesByAgent[agentID]; !exists {
|
||||||
roleOrder = append(roleOrder, role)
|
agentOrder = append(agentOrder, agentID)
|
||||||
}
|
}
|
||||||
cuesByRole[role] = append(cuesByRole[role], responses.LyricCue{
|
cuesByAgent[agentID] = append(cuesByAgent[agentID], cue)
|
||||||
Start: *cue.Start,
|
|
||||||
End: cue.End,
|
|
||||||
Value: cue.Value,
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Ensure main vocals (empty role) always comes first
|
sort.SliceStable(agentOrder, func(i, j int) bool {
|
||||||
sort.SliceStable(roleOrder, func(i, j int) bool {
|
leftRole := agentRoleByID[agentOrder[i]]
|
||||||
return roleOrder[i] == "" && roleOrder[j] != ""
|
rightRole := agentRoleByID[agentOrder[j]]
|
||||||
|
if leftRole == "main" && rightRole != "main" {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
if rightRole == "main" && leftRole != "main" {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
leftOrder, leftOK := agentOrderByID[agentOrder[i]]
|
||||||
|
rightOrder, rightOK := agentOrderByID[agentOrder[j]]
|
||||||
|
if leftOK && rightOK && leftOrder != rightOrder {
|
||||||
|
return leftOrder < rightOrder
|
||||||
|
}
|
||||||
|
if leftOK != rightOK {
|
||||||
|
return leftOK
|
||||||
|
}
|
||||||
|
return i < j
|
||||||
})
|
})
|
||||||
|
|
||||||
// Create a separate CueLine for each role group
|
for _, agentID := range agentOrder {
|
||||||
for _, role := range roleOrder {
|
|
||||||
cues := cuesByRole[role]
|
|
||||||
cueLine := responses.CueLine{
|
cueLine := responses.CueLine{
|
||||||
Index: int32(i),
|
Index: int32(i),
|
||||||
Start: line.Start,
|
Start: line.Start,
|
||||||
End: line.End,
|
End: line.End,
|
||||||
Value: line.Value,
|
Value: line.Value,
|
||||||
Cue: cues,
|
Cue: buildLyricCues(cuesByAgent[agentID], line.End),
|
||||||
}
|
}
|
||||||
if role != "" {
|
if agentID != "" {
|
||||||
cueLine.Role = role
|
cueLine.AgentID = agentID
|
||||||
}
|
}
|
||||||
cueLines = append(cueLines, cueLine)
|
cueLines = append(cueLines, cueLine)
|
||||||
}
|
}
|
||||||
@ -550,6 +567,9 @@ func buildStructuredLyric(mf *model.MediaFile, lyrics model.Lyrics, enhanced boo
|
|||||||
kind = "main"
|
kind = "main"
|
||||||
}
|
}
|
||||||
structured.Kind = kind
|
structured.Kind = kind
|
||||||
|
if len(cueLines) > 0 && len(responseAgents) > 0 {
|
||||||
|
structured.Agents = responseAgents
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if structured.DisplayArtist == "" {
|
if structured.DisplayArtist == "" {
|
||||||
@ -562,6 +582,67 @@ func buildStructuredLyric(mf *model.MediaFile, lyrics model.Lyrics, enhanced boo
|
|||||||
return structured
|
return structured
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func buildLyricCues(cues []model.Cue, lineEnd *int64) []responses.LyricCue {
|
||||||
|
if len(cues) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
hasAnyEnd := false
|
||||||
|
for i := range cues {
|
||||||
|
if cues[i].End != nil {
|
||||||
|
hasAnyEnd = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
normalized := make([]responses.LyricCue, 0, len(cues))
|
||||||
|
for i := range cues {
|
||||||
|
if cues[i].Start == nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
cue := responses.LyricCue{
|
||||||
|
Start: *cues[i].Start,
|
||||||
|
Value: cues[i].Value,
|
||||||
|
}
|
||||||
|
if hasAnyEnd {
|
||||||
|
end := cues[i].End
|
||||||
|
if end == nil {
|
||||||
|
if i+1 < len(cues) && cues[i+1].Start != nil {
|
||||||
|
v := *cues[i+1].Start
|
||||||
|
end = &v
|
||||||
|
} else if lineEnd != nil {
|
||||||
|
v := *lineEnd
|
||||||
|
end = &v
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if end != nil && i+1 < len(cues) && cues[i+1].Start != nil && *end > *cues[i+1].Start {
|
||||||
|
v := *cues[i+1].Start
|
||||||
|
end = &v
|
||||||
|
}
|
||||||
|
if end != nil && *end < cue.Start {
|
||||||
|
v := cue.Start
|
||||||
|
end = &v
|
||||||
|
}
|
||||||
|
cue.End = end
|
||||||
|
}
|
||||||
|
normalized = append(normalized, cue)
|
||||||
|
}
|
||||||
|
|
||||||
|
if hasAnyEnd {
|
||||||
|
for i := range normalized {
|
||||||
|
if normalized[i].End == nil {
|
||||||
|
for j := range normalized {
|
||||||
|
normalized[j].End = nil
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return normalized
|
||||||
|
}
|
||||||
|
|
||||||
func buildLyricsList(mf *model.MediaFile, lyricsList model.LyricList, enhanced bool) *responses.LyricsList {
|
func buildLyricsList(mf *model.MediaFile, lyricsList model.LyricList, enhanced bool) *responses.LyricsList {
|
||||||
var filtered model.LyricList
|
var filtered model.LyricList
|
||||||
if enhanced {
|
if enhanced {
|
||||||
|
|||||||
@ -235,6 +235,7 @@ var _ = Describe("MediaRetrievalController", func() {
|
|||||||
Expect(realLyric.Kind).To(Equal(expectedLyric.Kind))
|
Expect(realLyric.Kind).To(Equal(expectedLyric.Kind))
|
||||||
Expect(realLyric.Lang).To(Equal(expectedLyric.Lang))
|
Expect(realLyric.Lang).To(Equal(expectedLyric.Lang))
|
||||||
Expect(realLyric.Synced).To(Equal(expectedLyric.Synced))
|
Expect(realLyric.Synced).To(Equal(expectedLyric.Synced))
|
||||||
|
Expect(realLyric.Agents).To(Equal(expectedLyric.Agents))
|
||||||
|
|
||||||
if expectedLyric.Offset == nil {
|
if expectedLyric.Offset == nil {
|
||||||
Expect(realLyric.Offset).To(BeNil())
|
Expect(realLyric.Offset).To(BeNil())
|
||||||
@ -259,7 +260,7 @@ var _ = Describe("MediaRetrievalController", func() {
|
|||||||
expectedCueLine := expectedLyric.CueLine[j]
|
expectedCueLine := expectedLyric.CueLine[j]
|
||||||
Expect(realCueLine.Index).To(Equal(expectedCueLine.Index))
|
Expect(realCueLine.Index).To(Equal(expectedCueLine.Index))
|
||||||
Expect(realCueLine.Value).To(Equal(expectedCueLine.Value))
|
Expect(realCueLine.Value).To(Equal(expectedCueLine.Value))
|
||||||
Expect(realCueLine.Role).To(Equal(expectedCueLine.Role))
|
Expect(realCueLine.AgentID).To(Equal(expectedCueLine.AgentID))
|
||||||
if expectedCueLine.Start == nil {
|
if expectedCueLine.Start == nil {
|
||||||
Expect(realCueLine.Start).To(BeNil())
|
Expect(realCueLine.Start).To(BeNil())
|
||||||
} else {
|
} else {
|
||||||
@ -542,6 +543,7 @@ var _ = Describe("MediaRetrievalController", func() {
|
|||||||
lyricsJson, err := json.Marshal(model.LyricList{
|
lyricsJson, err := json.Marshal(model.LyricList{
|
||||||
{
|
{
|
||||||
Lang: "eng",
|
Lang: "eng",
|
||||||
|
Agents: []model.Agent{{ID: "lead", Role: "main"}, {ID: "lead__bg", Role: "bg"}},
|
||||||
Synced: true,
|
Synced: true,
|
||||||
Line: []model.Line{
|
Line: []model.Line{
|
||||||
{
|
{
|
||||||
@ -550,15 +552,16 @@ var _ = Describe("MediaRetrievalController", func() {
|
|||||||
Value: "Hello echo",
|
Value: "Hello echo",
|
||||||
Cue: []model.Cue{
|
Cue: []model.Cue{
|
||||||
{
|
{
|
||||||
Start: &tokenStartA,
|
Start: &tokenStartA,
|
||||||
End: &tokenEndA,
|
End: &tokenEndA,
|
||||||
Value: "Hello",
|
Value: "Hello",
|
||||||
|
AgentID: "lead",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Start: &tokenStartB,
|
Start: &tokenStartB,
|
||||||
End: &tokenEndB,
|
End: &tokenEndB,
|
||||||
Value: "echo",
|
Value: "echo",
|
||||||
Role: "x-bg",
|
AgentID: "lead__bg",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@ -586,6 +589,10 @@ var _ = Describe("MediaRetrievalController", func() {
|
|||||||
Kind: "main",
|
Kind: "main",
|
||||||
Lang: "eng",
|
Lang: "eng",
|
||||||
Synced: true,
|
Synced: true,
|
||||||
|
Agents: []responses.Agent{
|
||||||
|
{ID: "lead", Role: "main"},
|
||||||
|
{ID: "lead__bg", Role: "bg"},
|
||||||
|
},
|
||||||
Line: []responses.Line{
|
Line: []responses.Line{
|
||||||
{
|
{
|
||||||
Start: &lineStart,
|
Start: &lineStart,
|
||||||
@ -594,10 +601,11 @@ var _ = Describe("MediaRetrievalController", func() {
|
|||||||
},
|
},
|
||||||
CueLine: []responses.CueLine{
|
CueLine: []responses.CueLine{
|
||||||
{
|
{
|
||||||
Index: 0,
|
Index: 0,
|
||||||
Start: &lineStart,
|
Start: &lineStart,
|
||||||
End: &lineEnd,
|
End: &lineEnd,
|
||||||
Value: "Hello echo",
|
Value: "Hello echo",
|
||||||
|
AgentID: "lead",
|
||||||
Cue: []responses.LyricCue{
|
Cue: []responses.LyricCue{
|
||||||
{
|
{
|
||||||
Start: tokenStartA,
|
Start: tokenStartA,
|
||||||
@ -607,11 +615,11 @@ var _ = Describe("MediaRetrievalController", func() {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Index: 0,
|
Index: 0,
|
||||||
Start: &lineStart,
|
Start: &lineStart,
|
||||||
End: &lineEnd,
|
End: &lineEnd,
|
||||||
Value: "Hello echo",
|
Value: "Hello echo",
|
||||||
Role: "bg",
|
AgentID: "lead__bg",
|
||||||
Cue: []responses.LyricCue{
|
Cue: []responses.LyricCue{
|
||||||
{
|
{
|
||||||
Start: tokenStartB,
|
Start: tokenStartB,
|
||||||
|
|||||||
@ -543,13 +543,19 @@ type LyricCue struct {
|
|||||||
Value string `xml:",chardata" json:"value"`
|
Value string `xml:",chardata" json:"value"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type Agent struct {
|
||||||
|
ID string `xml:"id,attr" json:"id"`
|
||||||
|
Role string `xml:"role,attr" json:"role"`
|
||||||
|
Name string `xml:"name,attr,omitempty" json:"name,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
type CueLine struct {
|
type CueLine struct {
|
||||||
Index int32 `xml:"index,attr" json:"index"`
|
Index int32 `xml:"index,attr" json:"index"`
|
||||||
Start *int64 `xml:"start,attr,omitempty" json:"start,omitempty"`
|
Start *int64 `xml:"start,attr,omitempty" json:"start,omitempty"`
|
||||||
End *int64 `xml:"end,attr,omitempty" json:"end,omitempty"`
|
End *int64 `xml:"end,attr,omitempty" json:"end,omitempty"`
|
||||||
Value string `xml:"value,attr,omitempty" json:"value,omitempty"`
|
Value string `xml:"value,attr,omitempty" json:"value,omitempty"`
|
||||||
Role string `xml:"role,attr,omitempty" json:"role,omitempty"`
|
AgentID string `xml:"agentId,attr,omitempty" json:"agentId,omitempty"`
|
||||||
Cue []LyricCue `xml:"cue,omitempty" json:"cue,omitempty"`
|
Cue []LyricCue `xml:"cue,omitempty" json:"cue,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type StructuredLyric struct {
|
type StructuredLyric struct {
|
||||||
@ -558,6 +564,7 @@ type StructuredLyric struct {
|
|||||||
Kind string `xml:"kind,attr,omitempty" json:"kind,omitempty"`
|
Kind string `xml:"kind,attr,omitempty" json:"kind,omitempty"`
|
||||||
Lang string `xml:"lang,attr" json:"lang"`
|
Lang string `xml:"lang,attr" json:"lang"`
|
||||||
Line []Line `xml:"line" json:"line"`
|
Line []Line `xml:"line" json:"line"`
|
||||||
|
Agents []Agent `xml:"agent,omitempty" json:"agents,omitempty"`
|
||||||
CueLine []CueLine `xml:"cueLine,omitempty" json:"cueLine,omitempty"`
|
CueLine []CueLine `xml:"cueLine,omitempty" json:"cueLine,omitempty"`
|
||||||
Offset *int64 `xml:"offset,attr,omitempty" json:"offset,omitempty"`
|
Offset *int64 `xml:"offset,attr,omitempty" json:"offset,omitempty"`
|
||||||
Synced bool `xml:"synced,attr" json:"synced"`
|
Synced bool `xml:"synced,attr" json:"synced"`
|
||||||
|
|||||||
5
tests/fixtures/test.elrc
vendored
Normal file
5
tests/fixtures/test.elrc
vendored
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
[ar:ELRC Artist]
|
||||||
|
[ti:ELRC Song]
|
||||||
|
[lang:eng]
|
||||||
|
[00:01.00]<00:01.00>Lead <00:01.50>words
|
||||||
|
[00:03.00]Fallback line
|
||||||
7
tests/fixtures/test.srt
vendored
Normal file
7
tests/fixtures/test.srt
vendored
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
1
|
||||||
|
00:00:18,800 --> 00:00:22,800
|
||||||
|
We're from subtitles
|
||||||
|
|
||||||
|
2
|
||||||
|
00:00:22,801 --> 00:00:26,000
|
||||||
|
Another subtitle line
|
||||||
@ -108,7 +108,7 @@ const PlayerToolbar = ({
|
|||||||
)
|
)
|
||||||
|
|
||||||
const toggleLyricsButton = (
|
const toggleLyricsButton = (
|
||||||
<Tooltip title="Toggle synchronized lyrics">
|
<Tooltip title="Toggle lyrics">
|
||||||
<span>
|
<span>
|
||||||
<IconButton
|
<IconButton
|
||||||
size={isDesktop ? 'small' : undefined}
|
size={isDesktop ? 'small' : undefined}
|
||||||
|
|||||||
@ -62,6 +62,11 @@ const hasTimedLines = (lyric) =>
|
|||||||
Array.isArray(lyric.line) &&
|
Array.isArray(lyric.line) &&
|
||||||
lyric.line.some((line) => Number.isFinite(Number(line.start)))
|
lyric.line.some((line) => Number.isFinite(Number(line.start)))
|
||||||
|
|
||||||
|
const preferTimedLyrics = (lyrics) => {
|
||||||
|
const timed = lyrics.filter(hasTimedLines)
|
||||||
|
return timed.length > 0 ? timed : lyrics
|
||||||
|
}
|
||||||
|
|
||||||
const normalizeToken = (token) => {
|
const normalizeToken = (token) => {
|
||||||
if (!token) {
|
if (!token) {
|
||||||
return null
|
return null
|
||||||
@ -77,10 +82,38 @@ const normalizeToken = (token) => {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const normalizeCueLine = (cueLine, fallbackIndex) => {
|
const buildAgentLookup = (structuredLyric) => {
|
||||||
|
const lookup = new Map()
|
||||||
|
const agents = Array.isArray(structuredLyric?.agents) ? structuredLyric.agents : []
|
||||||
|
for (const agent of agents) {
|
||||||
|
const id = typeof agent?.id === 'string' ? agent.id : ''
|
||||||
|
if (!id || lookup.has(id)) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
lookup.set(id, {
|
||||||
|
id,
|
||||||
|
role: typeof agent?.role === 'string' ? agent.role : '',
|
||||||
|
name: typeof agent?.name === 'string' ? agent.name : '',
|
||||||
|
})
|
||||||
|
}
|
||||||
|
return lookup
|
||||||
|
}
|
||||||
|
|
||||||
|
const deriveUiRole = (agent) => {
|
||||||
|
if (!agent?.role || agent.role === 'main') {
|
||||||
|
return ''
|
||||||
|
}
|
||||||
|
return agent.role
|
||||||
|
}
|
||||||
|
|
||||||
|
const normalizeCueLine = (cueLine, fallbackIndex, agentLookup) => {
|
||||||
const index = Number.isFinite(Number(cueLine?.index))
|
const index = Number.isFinite(Number(cueLine?.index))
|
||||||
? Number(cueLine.index)
|
? Number(cueLine.index)
|
||||||
: fallbackIndex
|
: fallbackIndex
|
||||||
|
const agentId = typeof cueLine?.agentId === 'string' ? cueLine.agentId : ''
|
||||||
|
const agent = agentId ? agentLookup.get(agentId) || null : null
|
||||||
|
const fallbackRole =
|
||||||
|
typeof cueLine?.role === 'string' ? cueLine.role : ''
|
||||||
const tokens = sortTokensByStart(
|
const tokens = sortTokensByStart(
|
||||||
Array.isArray(cueLine?.cue)
|
Array.isArray(cueLine?.cue)
|
||||||
? cueLine.cue.map(normalizeToken).filter(Boolean)
|
? cueLine.cue.map(normalizeToken).filter(Boolean)
|
||||||
@ -92,7 +125,10 @@ const normalizeCueLine = (cueLine, fallbackIndex) => {
|
|||||||
start: toTime(cueLine?.start),
|
start: toTime(cueLine?.start),
|
||||||
end: toTime(cueLine?.end),
|
end: toTime(cueLine?.end),
|
||||||
value: typeof cueLine?.value === 'string' ? cueLine.value : '',
|
value: typeof cueLine?.value === 'string' ? cueLine.value : '',
|
||||||
role: typeof cueLine?.role === 'string' ? cueLine.role : '',
|
role: agent ? deriveUiRole(agent) : fallbackRole,
|
||||||
|
agentId,
|
||||||
|
agentRole: agent?.role || fallbackRole,
|
||||||
|
agentName: agent?.name || '',
|
||||||
tokens,
|
tokens,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -194,6 +230,9 @@ const buildSyntheticWordTokens = (line, token) => {
|
|||||||
end: baseStart + (duration * (idx + 1)) / chunks.length,
|
end: baseStart + (duration * (idx + 1)) / chunks.length,
|
||||||
value: chunk,
|
value: chunk,
|
||||||
role: typeof token?.role === 'string' ? token.role : '',
|
role: typeof token?.role === 'string' ? token.role : '',
|
||||||
|
agentId: typeof token?.agentId === 'string' ? token.agentId : '',
|
||||||
|
agentName: typeof token?.agentName === 'string' ? token.agentName : '',
|
||||||
|
agentRole: typeof token?.agentRole === 'string' ? token.agentRole : '',
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -240,8 +279,8 @@ export const selectLyricLayers = (structuredLyrics, preferredLanguage) => {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const synced = structuredLyrics.filter(hasTimedLines)
|
const available = structuredLyrics.filter(hasStructuredLyricContent)
|
||||||
if (synced.length === 0) {
|
if (available.length === 0) {
|
||||||
return {
|
return {
|
||||||
main: null,
|
main: null,
|
||||||
translation: null,
|
translation: null,
|
||||||
@ -255,22 +294,25 @@ export const selectLyricLayers = (structuredLyrics, preferredLanguage) => {
|
|||||||
[LYRIC_KIND_PRONUNCIATION]: [],
|
[LYRIC_KIND_PRONUNCIATION]: [],
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const lyric of synced) {
|
for (const lyric of available) {
|
||||||
grouped[normalizeLyricKind(lyric?.kind)].push(lyric)
|
grouped[normalizeLyricKind(lyric?.kind)].push(lyric)
|
||||||
}
|
}
|
||||||
|
|
||||||
const mainCandidates = grouped[LYRIC_KIND_MAIN].length
|
const mainCandidates = grouped[LYRIC_KIND_MAIN].length
|
||||||
? grouped[LYRIC_KIND_MAIN]
|
? grouped[LYRIC_KIND_MAIN]
|
||||||
: synced
|
: available
|
||||||
|
|
||||||
return {
|
return {
|
||||||
main: pickLyricByLanguage(mainCandidates, preferredLanguage),
|
main: pickLyricByLanguage(
|
||||||
|
preferTimedLyrics(mainCandidates),
|
||||||
|
preferredLanguage,
|
||||||
|
),
|
||||||
translation: pickLyricByLanguage(
|
translation: pickLyricByLanguage(
|
||||||
grouped[LYRIC_KIND_TRANSLATION],
|
preferTimedLyrics(grouped[LYRIC_KIND_TRANSLATION]),
|
||||||
preferredLanguage,
|
preferredLanguage,
|
||||||
),
|
),
|
||||||
pronunciation: pickLyricByLanguage(
|
pronunciation: pickLyricByLanguage(
|
||||||
grouped[LYRIC_KIND_PRONUNCIATION],
|
preferTimedLyrics(grouped[LYRIC_KIND_PRONUNCIATION]),
|
||||||
preferredLanguage,
|
preferredLanguage,
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
@ -316,6 +358,7 @@ export const buildKaraokeLines = (structuredLyric) => {
|
|||||||
return []
|
return []
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const agentLookup = buildAgentLookup(structuredLyric)
|
||||||
const baseLines = Array.isArray(structuredLyric.line)
|
const baseLines = Array.isArray(structuredLyric.line)
|
||||||
? structuredLyric.line
|
? structuredLyric.line
|
||||||
: []
|
: []
|
||||||
@ -328,12 +371,19 @@ export const buildKaraokeLines = (structuredLyric) => {
|
|||||||
? (() => {
|
? (() => {
|
||||||
const normalizedCueLines = rawCueLines.map(
|
const normalizedCueLines = rawCueLines.map(
|
||||||
(cueLine, fallbackIndex) => {
|
(cueLine, fallbackIndex) => {
|
||||||
const normalized = normalizeCueLine(cueLine, fallbackIndex)
|
const normalized = normalizeCueLine(
|
||||||
|
cueLine,
|
||||||
|
fallbackIndex,
|
||||||
|
agentLookup,
|
||||||
|
)
|
||||||
return {
|
return {
|
||||||
...normalized,
|
...normalized,
|
||||||
tokens: normalized.tokens.map((token) => ({
|
tokens: normalized.tokens.map((token) => ({
|
||||||
...token,
|
...token,
|
||||||
role: normalized.role,
|
role: normalized.role,
|
||||||
|
agentId: normalized.agentId,
|
||||||
|
agentName: normalized.agentName,
|
||||||
|
agentRole: normalized.agentRole,
|
||||||
})),
|
})),
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
@ -366,6 +416,9 @@ export const buildKaraokeLines = (structuredLyric) => {
|
|||||||
start: first.start ?? toTime(baseLine.start) ?? fallbackStart,
|
start: first.start ?? toTime(baseLine.start) ?? fallbackStart,
|
||||||
end: first.end ?? toTime(baseLine.end) ?? fallbackEnd,
|
end: first.end ?? toTime(baseLine.end) ?? fallbackEnd,
|
||||||
value,
|
value,
|
||||||
|
agentId: first.agentId,
|
||||||
|
agentName: first.agentName,
|
||||||
|
agentRole: first.agentRole,
|
||||||
tokens,
|
tokens,
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|||||||
@ -124,6 +124,49 @@ describe('lyrics helpers', () => {
|
|||||||
expect(layers.pronunciation).toBeNull()
|
expect(layers.pronunciation).toBeNull()
|
||||||
})
|
})
|
||||||
|
|
||||||
|
it('falls back to unsynced lyric content when no timed track exists', () => {
|
||||||
|
const layers = selectLyricLayers(
|
||||||
|
[
|
||||||
|
{
|
||||||
|
lang: 'eng',
|
||||||
|
synced: false,
|
||||||
|
line: [{ value: 'Plain embedded lyric' }],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
'eng',
|
||||||
|
)
|
||||||
|
|
||||||
|
expect(layers.main).toEqual({
|
||||||
|
lang: 'eng',
|
||||||
|
synced: false,
|
||||||
|
line: [{ value: 'Plain embedded lyric' }],
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
it('still prefers timed lyrics when both timed and untimed tracks exist', () => {
|
||||||
|
const layers = selectLyricLayers(
|
||||||
|
[
|
||||||
|
{
|
||||||
|
lang: 'eng',
|
||||||
|
synced: false,
|
||||||
|
line: [{ value: 'Plain lyric' }],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
lang: 'eng',
|
||||||
|
synced: true,
|
||||||
|
line: [{ start: 1000, value: 'Timed lyric' }],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
'eng',
|
||||||
|
)
|
||||||
|
|
||||||
|
expect(layers.main).toEqual({
|
||||||
|
lang: 'eng',
|
||||||
|
synced: true,
|
||||||
|
line: [{ start: 1000, value: 'Timed lyric' }],
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
it('matches layer line by timing for the active main line', () => {
|
it('matches layer line by timing for the active main line', () => {
|
||||||
const mainLines = [
|
const mainLines = [
|
||||||
{ index: 0, start: 1000, end: 1800, value: 'Line A', tokens: [] },
|
{ index: 0, start: 1000, end: 1800, value: 'Line A', tokens: [] },
|
||||||
@ -200,43 +243,88 @@ describe('lyrics helpers', () => {
|
|||||||
expect(getPreferredLyricLanguage()).toBe('pt-BR')
|
expect(getPreferredLyricLanguage()).toBe('pt-BR')
|
||||||
})
|
})
|
||||||
|
|
||||||
it('builds karaoke lines from cueLine payload', () => {
|
it('builds karaoke lines from agent-based cueLine payload', () => {
|
||||||
|
const lines = buildKaraokeLines({
|
||||||
|
lang: 'eng',
|
||||||
|
synced: true,
|
||||||
|
line: [{ start: 1000, end: 3000, value: 'Hello world' }],
|
||||||
|
agents: [
|
||||||
|
{ id: 'lead', role: 'main', name: 'Lead Vocal' },
|
||||||
|
{ id: 'backing', role: 'bg' },
|
||||||
|
],
|
||||||
|
cueLine: [
|
||||||
|
{
|
||||||
|
index: 0,
|
||||||
|
start: 1000,
|
||||||
|
end: 3000,
|
||||||
|
value: 'Hello world',
|
||||||
|
agentId: 'lead',
|
||||||
|
cue: [{ start: 1000, end: 1500, value: 'Hello' }],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
index: 0,
|
||||||
|
start: 1000,
|
||||||
|
end: 3000,
|
||||||
|
value: 'Hello world',
|
||||||
|
agentId: 'backing',
|
||||||
|
cue: [{ start: 2000, end: 2500, value: 'world' }],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
})
|
||||||
|
|
||||||
|
expect(lines).toEqual([
|
||||||
|
{
|
||||||
|
agentId: 'lead',
|
||||||
|
agentName: 'Lead Vocal',
|
||||||
|
agentRole: 'main',
|
||||||
|
index: 0,
|
||||||
|
start: 1000,
|
||||||
|
end: 3000,
|
||||||
|
value: 'Hello world',
|
||||||
|
tokens: [
|
||||||
|
{
|
||||||
|
start: 1000,
|
||||||
|
end: 1500,
|
||||||
|
value: 'Hello',
|
||||||
|
role: '',
|
||||||
|
agentId: 'lead',
|
||||||
|
agentName: 'Lead Vocal',
|
||||||
|
agentRole: 'main',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
start: 2000,
|
||||||
|
end: 2500,
|
||||||
|
value: 'world',
|
||||||
|
role: 'bg',
|
||||||
|
agentId: 'backing',
|
||||||
|
agentName: '',
|
||||||
|
agentRole: 'bg',
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
])
|
||||||
|
})
|
||||||
|
|
||||||
|
it('falls back to legacy cueLine role values when agents are absent', () => {
|
||||||
const lines = buildKaraokeLines({
|
const lines = buildKaraokeLines({
|
||||||
lang: 'eng',
|
lang: 'eng',
|
||||||
synced: true,
|
synced: true,
|
||||||
line: [{ start: 1000, end: 3000, value: 'Hello world' }],
|
line: [{ start: 1000, end: 3000, value: 'Hello world' }],
|
||||||
cueLine: [
|
cueLine: [
|
||||||
{
|
|
||||||
index: 0,
|
|
||||||
start: 1000,
|
|
||||||
end: 3000,
|
|
||||||
value: 'Hello world',
|
|
||||||
role: '',
|
|
||||||
cue: [{ start: 1000, end: 1500, value: 'Hello' }],
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
index: 0,
|
index: 0,
|
||||||
start: 1000,
|
start: 1000,
|
||||||
end: 3000,
|
end: 3000,
|
||||||
value: 'Hello world',
|
value: 'Hello world',
|
||||||
role: 'bg',
|
role: 'bg',
|
||||||
cue: [{ start: 2000, end: 2500, value: 'world' }],
|
cue: [{ start: 1000, end: 1500, value: 'Hello' }],
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
})
|
})
|
||||||
|
|
||||||
expect(lines).toEqual([
|
expect(lines[0].tokens[0].role).toBe('bg')
|
||||||
{
|
expect(lines[0].tokens[0].agentId).toBe('')
|
||||||
index: 0,
|
expect(lines[0].tokens[0].agentName).toBe('')
|
||||||
start: 1000,
|
|
||||||
end: 3000,
|
|
||||||
value: 'Hello world',
|
|
||||||
tokens: [
|
|
||||||
{ start: 1000, end: 1500, value: 'Hello', role: '' },
|
|
||||||
{ start: 2000, end: 2500, value: 'world', role: 'bg' },
|
|
||||||
],
|
|
||||||
},
|
|
||||||
])
|
|
||||||
})
|
})
|
||||||
|
|
||||||
it('sorts token timing by start to keep playback stable', () => {
|
it('sorts token timing by start to keep playback stable', () => {
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user