fix: align songLyrics v2 with spec, add Enhanced LRC parser and bg role UI styling

- Fix LyricCue.Value XML tag: chardata instead of attribute
- Fix Kind field leaking to non-enhanced (v1) responses
- Guard against nil cue.Start values
- Add Enhanced LRC parser for word-level inline timing markers
- Add role-based UI styling: bg tokens render italic at 72% opacity
- Add integration test for Enhanced LRC file reading
- Add unit tests for Enhanced LRC parser
This commit is contained in:
ranokay 2026-03-05 22:20:32 +02:00
parent 1f9fb113f7
commit ff40c030d9
No known key found for this signature in database
8 changed files with 244 additions and 39 deletions

View File

@ -88,6 +88,43 @@ var _ = Describe("sources", func() {
}))
})
It("should return Enhanced LRC lyrics with word-level cues from a file", func() {
mf := model.MediaFile{Path: "tests/fixtures/test-enhanced.mp3"}
lyrics, err := fromExternalFile(ctx, &mf, ".lrc")
Expect(err).To(BeNil())
Expect(lyrics).To(HaveLen(1))
Expect(lyrics[0].DisplayArtist).To(Equal("Test Artist"))
Expect(lyrics[0].DisplayTitle).To(Equal("Enhanced Test"))
Expect(lyrics[0].Lang).To(Equal("eng"))
Expect(lyrics[0].Synced).To(BeTrue())
Expect(lyrics[0].Line).To(HaveLen(3))
// Line 1: has inline markers → Cue array populated
Expect(lyrics[0].Line[0].Start).To(Equal(gg.P(int64(1000))))
Expect(lyrics[0].Line[0].Value).To(Equal("Some lyrics here"))
Expect(lyrics[0].Line[0].Cue).To(HaveLen(3))
Expect(*lyrics[0].Line[0].Cue[0].Start).To(Equal(int64(1000)))
Expect(lyrics[0].Line[0].Cue[0].Value).To(Equal("Some "))
Expect(*lyrics[0].Line[0].Cue[0].End).To(Equal(int64(1500)))
Expect(*lyrics[0].Line[0].Cue[1].Start).To(Equal(int64(1500)))
Expect(lyrics[0].Line[0].Cue[1].Value).To(Equal("lyrics "))
Expect(*lyrics[0].Line[0].Cue[1].End).To(Equal(int64(2000)))
Expect(*lyrics[0].Line[0].Cue[2].Start).To(Equal(int64(2000)))
Expect(lyrics[0].Line[0].Cue[2].Value).To(Equal("here"))
Expect(lyrics[0].Line[0].Cue[2].End).To(BeNil())
// Line 2: has inline markers
Expect(lyrics[0].Line[1].Start).To(Equal(gg.P(int64(3000))))
Expect(lyrics[0].Line[1].Value).To(Equal("More words"))
Expect(lyrics[0].Line[1].Cue).To(HaveLen(2))
// Line 3: plain line, no cues
Expect(lyrics[0].Line[2].Start).To(Equal(gg.P(int64(5000))))
Expect(lyrics[0].Line[2].Value).To(Equal("Plain line without inline markers"))
Expect(lyrics[0].Line[2].Cue).To(BeNil())
})
It("should return unsynchronized lyrics from a file", func() {
mf := model.MediaFile{Path: "tests/fixtures/test.mp3"}
lyrics, err := fromExternalFile(ctx, &mf, ".txt")

View File

@ -43,6 +43,10 @@ var (
syncRegex = regexp.MustCompile(`(^|\n)\s*` + timeRegexString)
timeRegex = regexp.MustCompile(timeRegexString)
lrcIdRegex = regexp.MustCompile(`\[(ar|ti|offset|lang):([^]]+)]`)
// Enhanced LRC: inline word-level timing markers like <00:12.34>
enhancedLRCTimeString = `<([0-9]{1,2}:)?([0-9]{1,2}):([0-9]{1,2})(.[0-9]{1,3})?>`
enhancedLRCRegex = regexp.MustCompile(enhancedLRCTimeString)
)
func (l Lyrics) IsEmpty() bool {
@ -116,9 +120,15 @@ func ToLyrics(language, text string) (*Lyrics, error) {
if validLine {
for idx := range timestamps {
cues := parseEnhancedCues(priorLine)
value := priorLine
if cues != nil {
value = stripEnhancedMarkers(value)
}
structuredLines = append(structuredLines, Line{
Start: &timestamps[idx],
Value: strings.TrimSpace(priorLine),
Value: strings.TrimSpace(value),
Cue: cues,
})
}
timestamps = nil
@ -164,9 +174,15 @@ func ToLyrics(language, text string) (*Lyrics, error) {
if validLine {
for idx := range timestamps {
cues := parseEnhancedCues(priorLine)
value := priorLine
if cues != nil {
value = stripEnhancedMarkers(value)
}
structuredLines = append(structuredLines, Line{
Start: &timestamps[idx],
Value: strings.TrimSpace(priorLine),
Value: strings.TrimSpace(value),
Cue: cues,
})
}
}
@ -190,6 +206,91 @@ func ToLyrics(language, text string) (*Lyrics, error) {
return &lyrics, nil
}
// parseEnhancedCues extracts word-level timing cues from Enhanced LRC inline markers.
// Format: <mm:ss.mm>word <mm:ss.mm>word ...
// Returns nil if no inline markers are found.
func parseEnhancedCues(text string) []Cue {
matches := enhancedLRCRegex.FindAllStringSubmatchIndex(text, -1)
if len(matches) == 0 {
return nil
}
type segment struct {
start int64
text string
}
segments := make([]segment, 0, len(matches))
for i, match := range matches {
timeMs, err := parseTime(
// Rewrite <...> as [...] so parseTime can handle it with the same logic
"["+text[match[0]+1:match[1]-1]+"]",
// Adjust match indices to point into our rewritten string (need start/end pairs for each group)
[]int{
0, match[1] - match[0],
adjustGroup(match, 2), adjustGroup(match, 3),
adjustGroup(match, 4), adjustGroup(match, 5),
adjustGroup(match, 6), adjustGroup(match, 7),
adjustGroup(match, 8), adjustGroup(match, 9),
},
)
if err != nil {
continue
}
// Text runs from after this marker to the start of the next marker (or end of string)
textStart := match[1]
var textEnd int
if i+1 < len(matches) {
textEnd = matches[i+1][0]
} else {
textEnd = len(text)
}
word := text[textStart:textEnd]
if word == "" {
continue
}
segments = append(segments, segment{start: timeMs, text: word})
}
if len(segments) == 0 {
return nil
}
cues := make([]Cue, len(segments))
for i, seg := range segments {
start := seg.start
cues[i] = Cue{
Start: &start,
Value: seg.text,
}
// Derive End from the next cue's Start
if i+1 < len(segments) {
end := segments[i+1].start
cues[i].End = &end
}
}
return cues
}
// adjustGroup remaps a capture group index from the original match to our rewritten "[...]" string.
// The rewrite shifts by -1 (removed '<', added '[') so positions within the brackets stay the same.
func adjustGroup(match []int, groupIdx int) int {
orig := match[groupIdx]
if orig == -1 {
return -1
}
// Offset is: original position minus the position of '<' in the original, plus 1 for '['
return orig - match[0]
}
// stripEnhancedMarkers removes all <mm:ss.mm> inline markers from text,
// returning the plain lyric text.
func stripEnhancedMarkers(text string) string {
return enhancedLRCRegex.ReplaceAllString(text, "")
}
func parseTime(line string, match []int) (int64, error) {
var hours, millis int64
var err error

View File

@ -116,4 +116,63 @@ var _ = Describe("ToLyrics", func() {
{Start: &e, Value: "Test"},
}))
})
It("should parse Enhanced LRC with word-level timing", func() {
lyrics, err := ToLyrics("xxx", "[00:01.00]<00:01.00>Some <00:01.50>lyrics <00:02.00>here\n[00:03.00]<00:03.00>More <00:03.50>words")
Expect(err).ToNot(HaveOccurred())
Expect(lyrics.Synced).To(BeTrue())
Expect(lyrics.Line).To(HaveLen(2))
t1000, t1500, t2000, t3000, t3500 := int64(1000), int64(1500), int64(2000), int64(3000), int64(3500)
line0 := lyrics.Line[0]
Expect(line0.Start).To(Equal(&t1000))
Expect(line0.Value).To(Equal("Some lyrics here"))
Expect(line0.Cue).To(Equal([]Cue{
{Start: &t1000, End: &t1500, Value: "Some "},
{Start: &t1500, End: &t2000, Value: "lyrics "},
{Start: &t2000, Value: "here"},
}))
line1 := lyrics.Line[1]
Expect(line1.Start).To(Equal(&t3000))
Expect(line1.Value).To(Equal("More words"))
Expect(line1.Cue).To(Equal([]Cue{
{Start: &t3000, End: &t3500, Value: "More "},
{Start: &t3500, Value: "words"},
}))
})
It("should ignore Enhanced LRC markers and return plain lines when no markers present", func() {
a, b := int64(1000), int64(3000)
lyrics, err := ToLyrics("xxx", "[00:01.00]Plain line\n[00:03.00]Another plain line")
Expect(err).ToNot(HaveOccurred())
Expect(lyrics.Line).To(Equal([]Line{
{Start: &a, Value: "Plain line"},
{Start: &b, Value: "Another plain line"},
}))
})
It("should handle mixed Enhanced and plain LRC lines", func() {
lyrics, err := ToLyrics("xxx", "[00:01.00]<00:01.00>Some <00:01.50>lyrics\n[00:03.00]Plain line\n[00:05.00]<00:05.00>More <00:05.50>words")
Expect(err).ToNot(HaveOccurred())
Expect(lyrics.Line).To(HaveLen(3))
t1000, t1500, t5000, t5500 := int64(1000), int64(1500), int64(5000), int64(5500)
Expect(lyrics.Line[0].Cue).To(Equal([]Cue{
{Start: &t1000, End: &t1500, Value: "Some "},
{Start: &t1500, Value: "lyrics"},
}))
Expect(lyrics.Line[0].Value).To(Equal("Some lyrics"))
Expect(lyrics.Line[1].Cue).To(BeNil())
Expect(lyrics.Line[1].Value).To(Equal("Plain line"))
Expect(lyrics.Line[2].Cue).To(Equal([]Cue{
{Start: &t5000, End: &t5500, Value: "More "},
{Start: &t5500, Value: "words"},
}))
Expect(lyrics.Line[2].Value).To(Equal("More words"))
})
})

View File

@ -498,16 +498,15 @@ func buildStructuredLyric(mf *model.MediaFile, lyrics model.Lyrics, enhanced boo
roleOrder := make([]string, 0, 2)
cuesByRole := make(map[string][]responses.LyricCue)
for _, cue := range line.Cue {
if cue.Start == nil {
continue
}
role := sanitizeRole(cue.Role)
if _, exists := cuesByRole[role]; !exists {
roleOrder = append(roleOrder, role)
}
var start int64
if cue.Start != nil {
start = *cue.Start
}
cuesByRole[role] = append(cuesByRole[role], responses.LyricCue{
Start: start,
Start: *cue.Start,
End: cue.End,
Value: cue.Value,
})
@ -535,15 +534,9 @@ func buildStructuredLyric(mf *model.MediaFile, lyrics model.Lyrics, enhanced boo
}
}
kind := strings.TrimSpace(lyrics.Kind)
if kind == "" {
kind = "main"
}
structured := responses.StructuredLyric{
DisplayArtist: lyrics.DisplayArtist,
DisplayTitle: lyrics.DisplayTitle,
Kind: kind,
Lang: lyrics.Lang,
Line: lines,
CueLine: cueLines,
@ -551,6 +544,14 @@ func buildStructuredLyric(mf *model.MediaFile, lyrics model.Lyrics, enhanced boo
Synced: lyrics.Synced,
}
if enhanced {
kind := strings.TrimSpace(lyrics.Kind)
if kind == "" {
kind = "main"
}
structured.Kind = kind
}
if structured.DisplayArtist == "" {
structured.DisplayArtist = mf.Artist
}

View File

@ -232,11 +232,7 @@ var _ = Describe("MediaRetrievalController", func() {
Expect(realLyric.DisplayArtist).To(Equal(expectedLyric.DisplayArtist))
Expect(realLyric.DisplayTitle).To(Equal(expectedLyric.DisplayTitle))
expectedKind := expectedLyric.Kind
if expectedKind == "" {
expectedKind = "main"
}
Expect(realLyric.Kind).To(Equal(expectedKind))
Expect(realLyric.Kind).To(Equal(expectedLyric.Kind))
Expect(realLyric.Lang).To(Equal(expectedLyric.Lang))
Expect(realLyric.Synced).To(Equal(expectedLyric.Synced))
@ -587,6 +583,7 @@ var _ = Describe("MediaRetrievalController", func() {
{
DisplayArtist: "Rick Astley",
DisplayTitle: "Never Gonna Give You Up",
Kind: "main",
Lang: "eng",
Synced: true,
Line: []responses.Line{

View File

@ -540,7 +540,7 @@ type Line struct {
type LyricCue struct {
Start int64 `xml:"start,attr" json:"start"`
End *int64 `xml:"end,attr,omitempty" json:"end,omitempty"`
Value string `xml:"value,attr" json:"value"`
Value string `xml:",chardata" json:"value"`
}
type CueLine struct {

6
tests/fixtures/test-enhanced.lrc vendored Normal file
View File

@ -0,0 +1,6 @@
[ar:Test Artist]
[ti:Enhanced Test]
[lang:eng]
[00:01.00]<00:01.00>Some <00:01.50>lyrics <00:02.00>here
[00:03.00]<00:03.00>More <00:03.50>words
[00:05.00]Plain line without inline markers

View File

@ -1,3 +1,12 @@
import Button from '@material-ui/core/Button'
import IconButton from '@material-ui/core/IconButton'
import Popover from '@material-ui/core/Popover'
import Slider from '@material-ui/core/Slider'
import { makeStyles } from '@material-ui/core/styles'
import Typography from '@material-ui/core/Typography'
import CloseIcon from '@material-ui/icons/Close'
import TuneIcon from '@material-ui/icons/Tune'
import clsx from 'clsx'
import React, {
memo,
useCallback,
@ -6,21 +15,12 @@ import React, {
useRef,
useState,
} from 'react'
import clsx from 'clsx'
import Button from '@material-ui/core/Button'
import IconButton from '@material-ui/core/IconButton'
import Popover from '@material-ui/core/Popover'
import Slider from '@material-ui/core/Slider'
import Typography from '@material-ui/core/Typography'
import CloseIcon from '@material-ui/icons/Close'
import TuneIcon from '@material-ui/icons/Tune'
import { makeStyles } from '@material-ui/core/styles'
import {
buildKaraokeLines,
getActiveKaraokeState,
hasStructuredLyricContent,
resolveLayerLineForMain,
resolveKaraokeTokenWindow,
resolveLayerLineForMain,
} from './lyrics'
const KARAOKE_RENDER_LEAD_MS = 24
@ -421,9 +421,7 @@ const LyricsSettingsPopover = ({ settings, onChange }) => {
const easeInOut = (v) => {
const clamped = clamp(v, 0, 1)
return clamped < 0.5
? 2 * clamped * clamped
: 1 - Math.pow(-2 * clamped + 2, 2) / 2
return clamped < 0.5 ? 2 * clamped * clamped : 1 - (-2 * clamped + 2) ** 2 / 2
}
const getMaxHeightPx = () => {
@ -716,17 +714,23 @@ const KaraokeLineRow = memo(
}
alpha = clamp(alpha, TOKEN_FUTURE_ALPHA, TOKEN_ACTIVE_ALPHA)
const fillProgress = isDone ? 1 : isActive ? progress : 0
const isBgRole = segment.token?.role === 'bg'
return (
<span
key={`token-${idx}-${tokenStart ?? 'na'}`}
className={tokenClassName}
style={buildTokenWipeStyle({
fillProgress,
highlightAlpha: alpha,
futureAlpha: TOKEN_FUTURE_ALPHA,
rgb: tokenRGB,
})}
style={{
...buildTokenWipeStyle({
fillProgress,
highlightAlpha: isBgRole ? alpha * 0.72 : alpha,
futureAlpha: isBgRole
? TOKEN_FUTURE_ALPHA * 0.72
: TOKEN_FUTURE_ALPHA,
rgb: tokenRGB,
}),
...(isBgRole ? { fontStyle: 'italic' } : undefined),
}}
>
{segment.text}
</span>
@ -1066,7 +1070,7 @@ const KaraokeLyricsOverlay = ({
const isActive = delta === 0
let opacity = isActive ? 1 : delta < 0 ? 0.6 : 0.72
const [r, g, b] = parseColorRGB(getColorValue(lyricsSettings.main.colorKey))
let color = isActive
const color = isActive
? `rgba(${r}, ${g}, ${b}, 0.98)`
: delta < 0
? `rgba(${r}, ${g}, ${b}, 0.4)`