From e5222c95051e1b940c4a420a2f33f6feff1ef28b Mon Sep 17 00:00:00 2001 From: Chigozirim Igweamaka Date: Sun, 30 Jun 2024 21:25:20 +0100 Subject: [PATCH 1/6] update print statement --- cmdHandlers.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmdHandlers.go b/cmdHandlers.go index 4572706..0021edf 100644 --- a/cmdHandlers.go +++ b/cmdHandlers.go @@ -236,5 +236,5 @@ func erase(songsDir string) { logger.ErrorContext(ctx, msg, slog.Any("error", err)) } - fmt.Println("Erase successful") + fmt.Println("Erase complete") } From b3b46cf21bbdaaf6b6c86f5fb5bd2459fa863fd3 Mon Sep 17 00:00:00 2001 From: Chigozirim Igweamaka Date: Tue, 9 Jul 2024 23:08:21 +0100 Subject: [PATCH 2/6] Reimplement FindMatches --- shazam/shazam.go | 118 ++++++++++++++++++++++++++++++++--------------- 1 file changed, 81 insertions(+), 37 deletions(-) diff --git a/shazam/shazam.go b/shazam/shazam.go index 77623cc..c8f84b2 100644 --- a/shazam/shazam.go +++ b/shazam/shazam.go @@ -3,6 +3,7 @@ package shazam import ( "fmt" "math" + "song-recognition/models" "song-recognition/utils" "sort" "time" @@ -17,7 +18,6 @@ type Match struct { Score float64 } -// FindMatches processes the audio samples and finds matches in the database func FindMatches(audioSamples []float64, audioDuration float64, sampleRate int) ([]Match, time.Duration, error) { startTime := time.Now() logger := utils.GetLogger() @@ -30,9 +30,11 @@ func FindMatches(audioSamples []float64, audioDuration float64, sampleRate int) peaks := ExtractPeaks(spectrogram, audioDuration) fingerprints := Fingerprint(peaks, utils.GenerateUniqueID()) + var sampleCouples []models.Couple addresses := make([]uint32, 0, len(fingerprints)) for address := range fingerprints { addresses = append(addresses, address) + sampleCouples = append(sampleCouples, fingerprints[address]) } db, err := utils.NewDbClient() @@ -41,61 +43,103 @@ func FindMatches(audioSamples []float64, audioDuration float64, sampleRate int) } defer db.Close() - m, err := db.GetCouples(addresses) + couplesMap, err := db.GetCouples(addresses) if err != nil { return nil, time.Since(startTime), err } - matches := map[uint32][][2]uint32{} // songID -> [(sampleTime, dbTime)] - timestamps := map[uint32]uint32{} - - for address, couples := range m { + // Count occurrences of each couple to derive potential target zones + coupleCounts := make(map[uint32]map[uint32]int) + for _, couples := range couplesMap { for _, couple := range couples { - matches[couple.SongID] = append(matches[couple.SongID], [2]uint32{fingerprints[address].AnchorTimeMs, couple.AnchorTimeMs}) - timestamps[couple.SongID] = couple.AnchorTimeMs + key := (couple.SongID << 32) | uint32(couple.AnchorTimeMs) + if _, exists := coupleCounts[couple.SongID]; !exists { + coupleCounts[couple.SongID] = make(map[uint32]int) + } + coupleCounts[couple.SongID][key]++ } } - scores := analyzeRelativeTiming(matches) - - var matchList []Match - for songID, points := range scores { - song, songExists, err := db.GetSongByID(songID) - if !songExists { - logger.Info(fmt.Sprintf("song with ID (%v) doesn't exist", songID)) - continue + // Filter target zones with targets (couples) meeting or exceeding the threshold + threshold := 4 + filteredCouples := make(map[uint32][]models.Couple) + for songID, counts := range coupleCounts { + for key, count := range counts { + if count >= threshold { + filteredCouples[songID] = append(filteredCouples[songID], models.Couple{ + AnchorTimeMs: key & 0xFFFFFFFF, + SongID: songID, + }) + } } + } + + // Score matches by calculating mean absolute difference + var matches []Match + for songID, songCouples := range filteredCouples { + song, songExists, err := db.GetSongByID(songID) if err != nil { logger.Info(fmt.Sprintf("failed to get song by ID (%v): %v", songID, err)) continue } + if !songExists { + logger.Info(fmt.Sprintf("song with ID (%v) doesn't exist", songID)) + continue + } - match := Match{songID, song.Title, song.Artist, song.YouTubeID, timestamps[songID], points} - matchList = append(matchList, match) + m_a_d := meanAbsoluteDifference(songCouples, sampleCouples) + + tstamp := songCouples[len(songCouples)-1].AnchorTimeMs + match := Match{songID, song.Title, song.Artist, song.YouTubeID, tstamp, m_a_d} + matches = append(matches, match) } - sort.Slice(matchList, func(i, j int) bool { - return matchList[i].Score > matchList[j].Score + sort.Slice(matches, func(i, j int) bool { + return matches[i].Score > matches[j].Score }) - return matchList, time.Since(startTime), nil + // TODO: hanld case when there's no match for cmdHandlers + + return matches, time.Since(startTime), nil } -// AnalyzeRelativeTiming checks for consistent relative timing and returns a score -func analyzeRelativeTiming(matches map[uint32][][2]uint32) map[uint32]float64 { - scores := make(map[uint32]float64) - for songID, times := range matches { - count := 0 - for i := 0; i < len(times); i++ { - for j := i + 1; j < len(times); j++ { - sampleDiff := math.Abs(float64(times[i][0] - times[j][0])) - dbDiff := math.Abs(float64(times[i][1] - times[j][1])) - if math.Abs(sampleDiff-dbDiff) < 100 { // Allow some tolerance - count++ - } - } - } - scores[songID] = float64(count) +func meanAbsoluteDifference(A, B []models.Couple) float64 { + minLen := len(A) + if len(B) < minLen { + minLen = len(B) } - return scores + + var sumDiff float64 + for i := 0; i < minLen; i++ { + diff := math.Abs(float64(A[i].AnchorTimeMs - B[i].AnchorTimeMs)) + sumDiff += diff + } + + meanAbsDiff := sumDiff / float64(minLen) + return meanAbsDiff +} + +// Function to calculate Dynamic Time Warping distance +func dynamicTimeWarping(A, B []models.Couple) float64 { + lenA := len(A) + lenB := len(B) + + // Create a 2D array to store DTW distances + dtw := make([][]float64, lenA+1) + for i := range dtw { + dtw[i] = make([]float64, lenB+1) + for j := range dtw[i] { + dtw[i][j] = math.Inf(1) + } + } + dtw[0][0] = 0 + + for i := 1; i <= lenA; i++ { + for j := 1; j <= lenB; j++ { + cost := math.Abs(float64(A[i-1].AnchorTimeMs - B[j-1].AnchorTimeMs)) + dtw[i][j] = cost + math.Min(math.Min(dtw[i-1][j], dtw[i][j-1]), dtw[i-1][j-1]) + } + } + + return dtw[lenA][lenB] } From 783c81ae0f0f65fcc8b55df2c05207ff158f3b71 Mon Sep 17 00:00:00 2001 From: Chigozirim Igweamaka Date: Tue, 9 Jul 2024 23:10:50 +0100 Subject: [PATCH 3/6] Handle case where no match was found --- cmdHandlers.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/cmdHandlers.go b/cmdHandlers.go index 0021edf..f125aef 100644 --- a/cmdHandlers.go +++ b/cmdHandlers.go @@ -49,6 +49,12 @@ func find(filePath string) { return } + if len(matches) == 0 { + fmt.Println("\nNo match found.") + fmt.Printf("\nSearch took: %s\n", searchDuration) + return + } + msg := "Matches:" topMatches := matches if len(matches) >= 20 { From e45909977588a9708bdf3ab8962f34becad17896 Mon Sep 17 00:00:00 2001 From: Chigozirim Igweamaka Date: Tue, 9 Jul 2024 23:11:42 +0100 Subject: [PATCH 4/6] Update comment --- shazam/spectrogram.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/shazam/spectrogram.go b/shazam/spectrogram.go index 6d123d5..6f2ebc8 100644 --- a/shazam/spectrogram.go +++ b/shazam/spectrogram.go @@ -91,7 +91,7 @@ type Peak struct { Freq complex128 } -// ExtractPeaks extracts peaks from a spectrogram based on a specified algorithm +// ExtractPeaks analyzes a spectrogram and extracts significant peaks in the frequency domain over time. func ExtractPeaks(spectrogram [][]complex128, audioDuration float64) []Peak { if len(spectrogram) < 1 { return []Peak{} From cf88253c00bbd1bb1be130b2f64432f11feab03c Mon Sep 17 00:00:00 2001 From: Chigozirim Igweamaka Date: Tue, 9 Jul 2024 23:16:54 +0100 Subject: [PATCH 5/6] FFmpeg command to downsample and convert to mono --- socketHandlers.go | 36 ++++++++++++++++++------------------ wav/wav.go | 21 +++++++++++++++++++++ 2 files changed, 39 insertions(+), 18 deletions(-) diff --git a/socketHandlers.go b/socketHandlers.go index ff3668c..d0bc8f7 100644 --- a/socketHandlers.go +++ b/socketHandlers.go @@ -11,6 +11,7 @@ import ( "song-recognition/utils" "song-recognition/wav" "strings" + "time" socketio "github.com/googollee/go-socket.io" "github.com/mdobak/go-xerrors" @@ -207,37 +208,36 @@ func handleNewRecording(socket socketio.Conn, recordData string) { sampleRate := recData.SampleRate bitsPerSample := recData.SampleSize + fmt.Printf("Channels: %v, sampleRate: %v, bitsPerSample: %v\n", channels, sampleRate, bitsPerSample) + samples, err := wav.WavBytesToSamples(decodedAudioData) if err != nil { err := xerrors.New(err) logger.ErrorContext(ctx, "failed to convert decodedData to samples.", slog.Any("error", err)) } - /** this operation alters the audio, adds some level of bass to it. - if sampleRate != 44100 { - samples, err = shazam.Downsample(samples, sampleRate, 44100) - if err != nil { - err := xerrors.New(err) - logger.ErrorContext(ctx, "failed to downsample.", slog.Any("error", err)) - } - sampleRate = 44100 - } - // Save recording - recordingInBytes, err := utils.FloatsToBytes(samples, bitsPerSample) - if err != nil { - err := xerrors.New(err) - logger.ErrorContext(ctx, "failed to convert bytes.", slog.Any("error", err)) - } - decodedAudioData = recordingInBytes - */ + now := time.Now() + fileName := fmt.Sprintf("%04d_%02d_%02d_%02d_%02d_%02d.wav", + now.Second(), now.Minute(), now.Hour(), + now.Day(), now.Month(), now.Year(), + ) - err = wav.WriteWavFile("blob.wav", decodedAudioData, sampleRate, channels, bitsPerSample) + err = wav.WriteWavFile(fileName, decodedAudioData, sampleRate, channels, bitsPerSample) if err != nil { err := xerrors.New(err) logger.ErrorContext(ctx, "failed to write wav file.", slog.Any("error", err)) } + /* + wav.FFmpegConvertWAV(fileName, fileName, 44100, true) + wavInfo, _ := wav.ReadWavInfo("mono_" + fileName) + samples, _ = wav.WavBytesToSamples(wavInfo.Data) + // spotify.DeleteFile(fileName) + spotify.DeleteFile("mono_" + fileName) + + */ + matches, _, err := shazam.FindMatches(samples, recData.Duration, sampleRate) if err != nil { err := xerrors.New(err) diff --git a/wav/wav.go b/wav/wav.go index b55453b..8b42007 100644 --- a/wav/wav.go +++ b/wav/wav.go @@ -7,6 +7,7 @@ import ( "fmt" "io/ioutil" "os" + "os/exec" ) // WavHeader defines the structure of a WAV header @@ -148,3 +149,23 @@ func WavBytesToSamples(input []byte) ([]float64, error) { return output, nil } + +// FFmpegConvertWAV converts a WAV file using ffmpeg. +// It can change the sample rate and optionally convert to mono. +func FFmpegConvertWAV(inputFile, outputFile string, targetSampleRate int, toMono bool) error { + cmdArgs := []string{ + "-i", inputFile, + "-ar", fmt.Sprintf("%d", targetSampleRate), + "-y", + } + + if toMono { + outputFile = "mono_" + outputFile + cmdArgs = append(cmdArgs, "-ac", "1", "-c:a", "pcm_s16le") + } + + cmdArgs = append(cmdArgs, outputFile) + + cmd := exec.Command("ffmpeg", cmdArgs...) + return cmd.Run() +} From f85b3f00d48e08261f8f5acbfb7206e9eae742ea Mon Sep 17 00:00:00 2001 From: Chigozirim Igweamaka Date: Tue, 9 Jul 2024 23:19:59 +0100 Subject: [PATCH 6/6] disable audio default processing to get better sound quality --- client/src/App.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/client/src/App.js b/client/src/App.js index 5c3a89f..a054590 100644 --- a/client/src/App.js +++ b/client/src/App.js @@ -94,8 +94,8 @@ function App() { audio: { autoGainControl: false, channelCount: 1, - echoCancellation: true, - noiseSuppression: true, + echoCancellation: false, + noiseSuppression: false, sampleSize: 16, }, };