seek-tune/server/shazam/shazam.go
Chigozirim Igweamaka ced4fc7ee8 perf(shazam): optimize timing analysis from O(n²) to O(n)
Replace pairwise timing comparison with histogram approach that counts
time offset agreements. Bins offsets in 100ms buckets for tolerance.
Improves performance by 500-5000x for songs with many fingerprint matches.
2025-11-19 16:52:55 +01:00

173 lines
4.6 KiB
Go

//go:build !js && !wasm
// +build !js,!wasm
package shazam
import (
"fmt"
"song-recognition/db"
"song-recognition/utils"
"sort"
"time"
)
type Match struct {
SongID uint32
SongTitle string
SongArtist string
YouTubeID string
Timestamp uint32
Score float64
}
// FindMatches analyzes the audio sample to find matching songs in the database.
func FindMatches(audioSample []float64, audioDuration float64, sampleRate int) ([]Match, time.Duration, error) {
startTime := time.Now()
spectrogram, err := Spectrogram(audioSample, sampleRate)
if err != nil {
return nil, time.Since(startTime), fmt.Errorf("failed to get spectrogram of samples: %v", err)
}
peaks := ExtractPeaks(spectrogram, audioDuration, sampleRate)
// peaks := ExtractPeaksLMX(spectrogram, true)
sampleFingerprint := Fingerprint(peaks, utils.GenerateUniqueID())
sampleFingerprintMap := make(map[uint32]uint32)
for address, couple := range sampleFingerprint {
sampleFingerprintMap[address] = couple.AnchorTimeMs
}
matches, _, _ := FindMatchesFGP(sampleFingerprintMap)
return matches, time.Since(startTime), nil
}
// FindMatchesFGP uses the sample fingerprint to find matching songs in the database.
func FindMatchesFGP(sampleFingerprint map[uint32]uint32) ([]Match, time.Duration, error) {
startTime := time.Now()
logger := utils.GetLogger()
addresses := make([]uint32, 0, len(sampleFingerprint))
for address := range sampleFingerprint {
addresses = append(addresses, address)
}
db, err := db.NewDBClient()
if err != nil {
return nil, time.Since(startTime), err
}
defer db.Close()
m, err := db.GetCouples(addresses)
if err != nil {
return nil, time.Since(startTime), err
}
matches := map[uint32][][2]uint32{} // songID -> [(sampleTime, dbTime)]
timestamps := map[uint32]uint32{} // songID -> earliest timestamp
targetZones := map[uint32]map[uint32]int{} // songID -> timestamp -> count
for address, couples := range m {
for _, couple := range couples {
matches[couple.SongID] = append(
matches[couple.SongID],
[2]uint32{sampleFingerprint[address], couple.AnchorTimeMs},
)
if existingTime, ok := timestamps[couple.SongID]; !ok || couple.AnchorTimeMs < existingTime {
timestamps[couple.SongID] = couple.AnchorTimeMs
}
if _, ok := targetZones[couple.SongID]; !ok {
targetZones[couple.SongID] = make(map[uint32]int)
}
targetZones[couple.SongID][couple.AnchorTimeMs]++
}
}
// matches = filterMatches(10, matches, targetZones)
scores := analyzeRelativeTiming(matches)
var matchList []Match
for songID, points := range scores {
song, songExists, err := db.GetSongByID(songID)
if !songExists {
logger.Info(fmt.Sprintf("song with ID (%v) doesn't exist", songID))
continue
}
if err != nil {
logger.Info(fmt.Sprintf("failed to get song by ID (%v): %v", songID, err))
continue
}
match := Match{songID, song.Title, song.Artist, song.YouTubeID, timestamps[songID], points}
matchList = append(matchList, match)
}
sort.Slice(matchList, func(i, j int) bool {
return matchList[i].Score > matchList[j].Score
})
return matchList, time.Since(startTime), nil
}
// filterMatches filters out matches that don't have enough
// target zones to meet the specified threshold
func filterMatches(
threshold int,
matches map[uint32][][2]uint32,
targetZones map[uint32]map[uint32]int) map[uint32][][2]uint32 {
// Filter out non target zones.
// When a target zone has less than `targetZoneSize` anchor times, it is not considered a target zone.
for songID, anchorTimes := range targetZones {
for anchorTime, count := range anchorTimes {
if count < targetZoneSize {
delete(targetZones[songID], anchorTime)
}
}
}
filteredMatches := map[uint32][][2]uint32{}
for songID, zones := range targetZones {
if len(zones) >= threshold {
filteredMatches[songID] = matches[songID]
}
}
return filteredMatches
}
// analyzeRelativeTiming calculates a score for each song based on the
// consistency of time offsets between the sample and database.
func analyzeRelativeTiming(matches map[uint32][][2]uint32) map[uint32]float64 {
scores := make(map[uint32]float64)
for songID, times := range matches {
offsetCounts := make(map[int32]int)
for _, timePair := range times {
sampleTime := int32(timePair[0])
dbTime := int32(timePair[1])
offset := dbTime - sampleTime
// Bin offsets in 100ms buckets to allow for small timing variations
offsetBucket := offset / 100
offsetCounts[offsetBucket]++
}
maxCount := 0
for _, count := range offsetCounts {
if count > maxCount {
maxCount = count
}
}
scores[songID] = float64(maxCount)
}
return scores
}