mirror of
https://github.com/cgzirim/seek-tune.git
synced 2025-12-19 09:54:22 +00:00
commit
3788ada052
6 changed files with 130 additions and 59 deletions
|
|
@ -94,8 +94,8 @@ function App() {
|
||||||
audio: {
|
audio: {
|
||||||
autoGainControl: false,
|
autoGainControl: false,
|
||||||
channelCount: 1,
|
channelCount: 1,
|
||||||
echoCancellation: true,
|
echoCancellation: false,
|
||||||
noiseSuppression: true,
|
noiseSuppression: false,
|
||||||
sampleSize: 16,
|
sampleSize: 16,
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
|
||||||
|
|
@ -49,6 +49,12 @@ func find(filePath string) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if len(matches) == 0 {
|
||||||
|
fmt.Println("\nNo match found.")
|
||||||
|
fmt.Printf("\nSearch took: %s\n", searchDuration)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
msg := "Matches:"
|
msg := "Matches:"
|
||||||
topMatches := matches
|
topMatches := matches
|
||||||
if len(matches) >= 20 {
|
if len(matches) >= 20 {
|
||||||
|
|
@ -236,5 +242,5 @@ func erase(songsDir string) {
|
||||||
logger.ErrorContext(ctx, msg, slog.Any("error", err))
|
logger.ErrorContext(ctx, msg, slog.Any("error", err))
|
||||||
}
|
}
|
||||||
|
|
||||||
fmt.Println("Erase successful")
|
fmt.Println("Erase complete")
|
||||||
}
|
}
|
||||||
|
|
|
||||||
112
shazam/shazam.go
112
shazam/shazam.go
|
|
@ -3,6 +3,7 @@ package shazam
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"math"
|
"math"
|
||||||
|
"song-recognition/models"
|
||||||
"song-recognition/utils"
|
"song-recognition/utils"
|
||||||
"sort"
|
"sort"
|
||||||
"time"
|
"time"
|
||||||
|
|
@ -17,7 +18,6 @@ type Match struct {
|
||||||
Score float64
|
Score float64
|
||||||
}
|
}
|
||||||
|
|
||||||
// FindMatches processes the audio samples and finds matches in the database
|
|
||||||
func FindMatches(audioSamples []float64, audioDuration float64, sampleRate int) ([]Match, time.Duration, error) {
|
func FindMatches(audioSamples []float64, audioDuration float64, sampleRate int) ([]Match, time.Duration, error) {
|
||||||
startTime := time.Now()
|
startTime := time.Now()
|
||||||
logger := utils.GetLogger()
|
logger := utils.GetLogger()
|
||||||
|
|
@ -30,9 +30,11 @@ func FindMatches(audioSamples []float64, audioDuration float64, sampleRate int)
|
||||||
peaks := ExtractPeaks(spectrogram, audioDuration)
|
peaks := ExtractPeaks(spectrogram, audioDuration)
|
||||||
fingerprints := Fingerprint(peaks, utils.GenerateUniqueID())
|
fingerprints := Fingerprint(peaks, utils.GenerateUniqueID())
|
||||||
|
|
||||||
|
var sampleCouples []models.Couple
|
||||||
addresses := make([]uint32, 0, len(fingerprints))
|
addresses := make([]uint32, 0, len(fingerprints))
|
||||||
for address := range fingerprints {
|
for address := range fingerprints {
|
||||||
addresses = append(addresses, address)
|
addresses = append(addresses, address)
|
||||||
|
sampleCouples = append(sampleCouples, fingerprints[address])
|
||||||
}
|
}
|
||||||
|
|
||||||
db, err := utils.NewDbClient()
|
db, err := utils.NewDbClient()
|
||||||
|
|
@ -41,61 +43,103 @@ func FindMatches(audioSamples []float64, audioDuration float64, sampleRate int)
|
||||||
}
|
}
|
||||||
defer db.Close()
|
defer db.Close()
|
||||||
|
|
||||||
m, err := db.GetCouples(addresses)
|
couplesMap, err := db.GetCouples(addresses)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, time.Since(startTime), err
|
return nil, time.Since(startTime), err
|
||||||
}
|
}
|
||||||
|
|
||||||
matches := map[uint32][][2]uint32{} // songID -> [(sampleTime, dbTime)]
|
// Count occurrences of each couple to derive potential target zones
|
||||||
timestamps := map[uint32]uint32{}
|
coupleCounts := make(map[uint32]map[uint32]int)
|
||||||
|
for _, couples := range couplesMap {
|
||||||
for address, couples := range m {
|
|
||||||
for _, couple := range couples {
|
for _, couple := range couples {
|
||||||
matches[couple.SongID] = append(matches[couple.SongID], [2]uint32{fingerprints[address].AnchorTimeMs, couple.AnchorTimeMs})
|
key := (couple.SongID << 32) | uint32(couple.AnchorTimeMs)
|
||||||
timestamps[couple.SongID] = couple.AnchorTimeMs
|
if _, exists := coupleCounts[couple.SongID]; !exists {
|
||||||
|
coupleCounts[couple.SongID] = make(map[uint32]int)
|
||||||
|
}
|
||||||
|
coupleCounts[couple.SongID][key]++
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
scores := analyzeRelativeTiming(matches)
|
// Filter target zones with targets (couples) meeting or exceeding the threshold
|
||||||
|
threshold := 4
|
||||||
|
filteredCouples := make(map[uint32][]models.Couple)
|
||||||
|
for songID, counts := range coupleCounts {
|
||||||
|
for key, count := range counts {
|
||||||
|
if count >= threshold {
|
||||||
|
filteredCouples[songID] = append(filteredCouples[songID], models.Couple{
|
||||||
|
AnchorTimeMs: key & 0xFFFFFFFF,
|
||||||
|
SongID: songID,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
var matchList []Match
|
// Score matches by calculating mean absolute difference
|
||||||
for songID, points := range scores {
|
var matches []Match
|
||||||
|
for songID, songCouples := range filteredCouples {
|
||||||
song, songExists, err := db.GetSongByID(songID)
|
song, songExists, err := db.GetSongByID(songID)
|
||||||
if !songExists {
|
|
||||||
logger.Info(fmt.Sprintf("song with ID (%v) doesn't exist", songID))
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Info(fmt.Sprintf("failed to get song by ID (%v): %v", songID, err))
|
logger.Info(fmt.Sprintf("failed to get song by ID (%v): %v", songID, err))
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
if !songExists {
|
||||||
match := Match{songID, song.Title, song.Artist, song.YouTubeID, timestamps[songID], points}
|
logger.Info(fmt.Sprintf("song with ID (%v) doesn't exist", songID))
|
||||||
matchList = append(matchList, match)
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
sort.Slice(matchList, func(i, j int) bool {
|
m_a_d := meanAbsoluteDifference(songCouples, sampleCouples)
|
||||||
return matchList[i].Score > matchList[j].Score
|
|
||||||
|
tstamp := songCouples[len(songCouples)-1].AnchorTimeMs
|
||||||
|
match := Match{songID, song.Title, song.Artist, song.YouTubeID, tstamp, m_a_d}
|
||||||
|
matches = append(matches, match)
|
||||||
|
}
|
||||||
|
|
||||||
|
sort.Slice(matches, func(i, j int) bool {
|
||||||
|
return matches[i].Score > matches[j].Score
|
||||||
})
|
})
|
||||||
|
|
||||||
return matchList, time.Since(startTime), nil
|
// TODO: hanld case when there's no match for cmdHandlers
|
||||||
|
|
||||||
|
return matches, time.Since(startTime), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// AnalyzeRelativeTiming checks for consistent relative timing and returns a score
|
func meanAbsoluteDifference(A, B []models.Couple) float64 {
|
||||||
func analyzeRelativeTiming(matches map[uint32][][2]uint32) map[uint32]float64 {
|
minLen := len(A)
|
||||||
scores := make(map[uint32]float64)
|
if len(B) < minLen {
|
||||||
for songID, times := range matches {
|
minLen = len(B)
|
||||||
count := 0
|
}
|
||||||
for i := 0; i < len(times); i++ {
|
|
||||||
for j := i + 1; j < len(times); j++ {
|
var sumDiff float64
|
||||||
sampleDiff := math.Abs(float64(times[i][0] - times[j][0]))
|
for i := 0; i < minLen; i++ {
|
||||||
dbDiff := math.Abs(float64(times[i][1] - times[j][1]))
|
diff := math.Abs(float64(A[i].AnchorTimeMs - B[i].AnchorTimeMs))
|
||||||
if math.Abs(sampleDiff-dbDiff) < 100 { // Allow some tolerance
|
sumDiff += diff
|
||||||
count++
|
}
|
||||||
|
|
||||||
|
meanAbsDiff := sumDiff / float64(minLen)
|
||||||
|
return meanAbsDiff
|
||||||
|
}
|
||||||
|
|
||||||
|
// Function to calculate Dynamic Time Warping distance
|
||||||
|
func dynamicTimeWarping(A, B []models.Couple) float64 {
|
||||||
|
lenA := len(A)
|
||||||
|
lenB := len(B)
|
||||||
|
|
||||||
|
// Create a 2D array to store DTW distances
|
||||||
|
dtw := make([][]float64, lenA+1)
|
||||||
|
for i := range dtw {
|
||||||
|
dtw[i] = make([]float64, lenB+1)
|
||||||
|
for j := range dtw[i] {
|
||||||
|
dtw[i][j] = math.Inf(1)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
dtw[0][0] = 0
|
||||||
|
|
||||||
|
for i := 1; i <= lenA; i++ {
|
||||||
|
for j := 1; j <= lenB; j++ {
|
||||||
|
cost := math.Abs(float64(A[i-1].AnchorTimeMs - B[j-1].AnchorTimeMs))
|
||||||
|
dtw[i][j] = cost + math.Min(math.Min(dtw[i-1][j], dtw[i][j-1]), dtw[i-1][j-1])
|
||||||
}
|
}
|
||||||
scores[songID] = float64(count)
|
|
||||||
}
|
}
|
||||||
return scores
|
|
||||||
|
return dtw[lenA][lenB]
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -91,7 +91,7 @@ type Peak struct {
|
||||||
Freq complex128
|
Freq complex128
|
||||||
}
|
}
|
||||||
|
|
||||||
// ExtractPeaks extracts peaks from a spectrogram based on a specified algorithm
|
// ExtractPeaks analyzes a spectrogram and extracts significant peaks in the frequency domain over time.
|
||||||
func ExtractPeaks(spectrogram [][]complex128, audioDuration float64) []Peak {
|
func ExtractPeaks(spectrogram [][]complex128, audioDuration float64) []Peak {
|
||||||
if len(spectrogram) < 1 {
|
if len(spectrogram) < 1 {
|
||||||
return []Peak{}
|
return []Peak{}
|
||||||
|
|
|
||||||
|
|
@ -11,6 +11,7 @@ import (
|
||||||
"song-recognition/utils"
|
"song-recognition/utils"
|
||||||
"song-recognition/wav"
|
"song-recognition/wav"
|
||||||
"strings"
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
socketio "github.com/googollee/go-socket.io"
|
socketio "github.com/googollee/go-socket.io"
|
||||||
"github.com/mdobak/go-xerrors"
|
"github.com/mdobak/go-xerrors"
|
||||||
|
|
@ -207,37 +208,36 @@ func handleNewRecording(socket socketio.Conn, recordData string) {
|
||||||
sampleRate := recData.SampleRate
|
sampleRate := recData.SampleRate
|
||||||
bitsPerSample := recData.SampleSize
|
bitsPerSample := recData.SampleSize
|
||||||
|
|
||||||
|
fmt.Printf("Channels: %v, sampleRate: %v, bitsPerSample: %v\n", channels, sampleRate, bitsPerSample)
|
||||||
|
|
||||||
samples, err := wav.WavBytesToSamples(decodedAudioData)
|
samples, err := wav.WavBytesToSamples(decodedAudioData)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
err := xerrors.New(err)
|
err := xerrors.New(err)
|
||||||
logger.ErrorContext(ctx, "failed to convert decodedData to samples.", slog.Any("error", err))
|
logger.ErrorContext(ctx, "failed to convert decodedData to samples.", slog.Any("error", err))
|
||||||
}
|
}
|
||||||
|
|
||||||
/** this operation alters the audio, adds some level of bass to it.
|
|
||||||
if sampleRate != 44100 {
|
|
||||||
samples, err = shazam.Downsample(samples, sampleRate, 44100)
|
|
||||||
if err != nil {
|
|
||||||
err := xerrors.New(err)
|
|
||||||
logger.ErrorContext(ctx, "failed to downsample.", slog.Any("error", err))
|
|
||||||
}
|
|
||||||
sampleRate = 44100
|
|
||||||
}
|
|
||||||
|
|
||||||
// Save recording
|
// Save recording
|
||||||
recordingInBytes, err := utils.FloatsToBytes(samples, bitsPerSample)
|
now := time.Now()
|
||||||
if err != nil {
|
fileName := fmt.Sprintf("%04d_%02d_%02d_%02d_%02d_%02d.wav",
|
||||||
err := xerrors.New(err)
|
now.Second(), now.Minute(), now.Hour(),
|
||||||
logger.ErrorContext(ctx, "failed to convert bytes.", slog.Any("error", err))
|
now.Day(), now.Month(), now.Year(),
|
||||||
}
|
)
|
||||||
decodedAudioData = recordingInBytes
|
|
||||||
*/
|
|
||||||
|
|
||||||
err = wav.WriteWavFile("blob.wav", decodedAudioData, sampleRate, channels, bitsPerSample)
|
err = wav.WriteWavFile(fileName, decodedAudioData, sampleRate, channels, bitsPerSample)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
err := xerrors.New(err)
|
err := xerrors.New(err)
|
||||||
logger.ErrorContext(ctx, "failed to write wav file.", slog.Any("error", err))
|
logger.ErrorContext(ctx, "failed to write wav file.", slog.Any("error", err))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
wav.FFmpegConvertWAV(fileName, fileName, 44100, true)
|
||||||
|
wavInfo, _ := wav.ReadWavInfo("mono_" + fileName)
|
||||||
|
samples, _ = wav.WavBytesToSamples(wavInfo.Data)
|
||||||
|
// spotify.DeleteFile(fileName)
|
||||||
|
spotify.DeleteFile("mono_" + fileName)
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
matches, _, err := shazam.FindMatches(samples, recData.Duration, sampleRate)
|
matches, _, err := shazam.FindMatches(samples, recData.Duration, sampleRate)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
err := xerrors.New(err)
|
err := xerrors.New(err)
|
||||||
|
|
|
||||||
21
wav/wav.go
21
wav/wav.go
|
|
@ -7,6 +7,7 @@ import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
"os"
|
"os"
|
||||||
|
"os/exec"
|
||||||
)
|
)
|
||||||
|
|
||||||
// WavHeader defines the structure of a WAV header
|
// WavHeader defines the structure of a WAV header
|
||||||
|
|
@ -148,3 +149,23 @@ func WavBytesToSamples(input []byte) ([]float64, error) {
|
||||||
|
|
||||||
return output, nil
|
return output, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// FFmpegConvertWAV converts a WAV file using ffmpeg.
|
||||||
|
// It can change the sample rate and optionally convert to mono.
|
||||||
|
func FFmpegConvertWAV(inputFile, outputFile string, targetSampleRate int, toMono bool) error {
|
||||||
|
cmdArgs := []string{
|
||||||
|
"-i", inputFile,
|
||||||
|
"-ar", fmt.Sprintf("%d", targetSampleRate),
|
||||||
|
"-y",
|
||||||
|
}
|
||||||
|
|
||||||
|
if toMono {
|
||||||
|
outputFile = "mono_" + outputFile
|
||||||
|
cmdArgs = append(cmdArgs, "-ac", "1", "-c:a", "pcm_s16le")
|
||||||
|
}
|
||||||
|
|
||||||
|
cmdArgs = append(cmdArgs, outputFile)
|
||||||
|
|
||||||
|
cmd := exec.Command("ffmpeg", cmdArgs...)
|
||||||
|
return cmd.Run()
|
||||||
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue