From 894094ed962b92e8fe954e4baecfbe276afeb5ac Mon Sep 17 00:00:00 2001
From: Chigozirim Igweamaka <cgzirim@gmail.com>
Date: Wed, 15 May 2024 05:03:32 +0100
Subject: [PATCH] Write to find matches

---
 shazam/shazam.go | 472 +++++++++++------------------------------------
 1 file changed, 104 insertions(+), 368 deletions(-)

diff --git a/shazam/shazam.go b/shazam/shazam.go
index e5a2ad6..de9d5fd 100644
--- a/shazam/shazam.go
+++ b/shazam/shazam.go
@@ -1,46 +1,38 @@
 package shazam
 
 import (
-	"crypto/sha256"
-	"encoding/binary"
 	"fmt"
 	"math"
-	"math/cmplx"
-	"math/rand"
+	"song-recognition/models"
 	"song-recognition/utils"
 	"sort"
-	"time"
-
-	"github.com/mjibson/go-dsp/fft"
-	"go.mongodb.org/mongo-driver/bson/primitive"
 )
 
-// Constants
-const (
-	chunkSize = 4096 // 4KB
-	// hopSize      = 128
-	fuzzFactor   = 2
-	bitDepth     = 2
-	channels     = 1
-	samplingRate = 44100
-)
-
-type ChunkTag struct {
+type Match struct {
+	SongID     uint32
 	SongTitle  string
 	SongArtist string
 	YouTubeID  string
-	TimeStamp  string
+	Timestamp  uint32
+	Score      float64
 }
 
-type Match struct {
-	songKey       string
-	ChunkTag      primitive.M
-	WeightedScore float64
-}
+func FindMatches(audioSamples []float64, audioDuration float64, sampleRate int) ([]Match, error) {
+	logger := utils.GetLogger()
 
-func FindMatches(sampleAudio []byte) ([]Match, error) {
-	sampleChunks := Chunkify(sampleAudio)
-	chunkFingerprints, _ := FingerprintChunks(sampleChunks, nil)
+	spectrogram, err := Spectrogram(audioSamples, sampleRate)
+	if err != nil {
+		return nil, fmt.Errorf("failed to get spectrogram of samples: %v", err)
+	}
+
+	peaks := ExtractPeaks(spectrogram, audioDuration)
+	fingerprints := Fingerprint(peaks, utils.GenerateUniqueID())
+	fmt.Println("peaks len: ", len(peaks))
+
+	addresses := make([]uint32, 0, len(fingerprints))
+	for address, _ := range fingerprints {
+		addresses = append(addresses, address)
+	}
 
 	db, err := utils.NewDbClient()
 	if err != nil {
@@ -48,376 +40,120 @@ func FindMatches(sampleAudio []byte) ([]Match, error) {
 	}
 	defer db.Close()
 
-	var chunkTags = make(map[string]primitive.M)
-	var songsTimestamps = make(map[string][]string)
-	for _, chunkfgp := range chunkFingerprints {
-		listOfChunkTags, err := db.GetChunkTags(chunkfgp)
-		if err != nil {
-			return nil, err
-		}
+	m, err := db.GetCouples(addresses)
+	if err != nil {
+		return nil, err
+	}
 
-		for _, chunkTag := range listOfChunkTags {
-			timeStamp := fmt.Sprint(chunkTag["timestamp"])
-			songKey := fmt.Sprintf("%s by %s", chunkTag["songtitle"], chunkTag["songartist"])
+	matches := map[uint32]map[uint32]models.Couple{}
+	timestamps := map[uint32]uint32{}
 
-			if songsTimestamps[songKey] == nil {
-				songsTimestamps[songKey] = []string{timeStamp}
-				chunkTags[songKey] = chunkTag
-			} else {
-				songsTimestamps[songKey] = append(songsTimestamps[songKey], timeStamp)
+	for address, couples := range m {
+		for _, couple := range couples {
+
+			if _, ok := matches[couple.SongID]; !ok {
+				matches[couple.SongID] = map[uint32]models.Couple{}
+				timestamps[couple.SongID] = couple.AnchorTimeMs
 			}
+
+			matches[couple.SongID][address] = couple
 		}
 	}
 
-	var matches []Match
-	for songKey, timestamps := range songsTimestamps {
-		timestampsInSeconds, err := timestampsInSeconds(timestamps)
+	scores := map[uint32]float64{}
+	for songID, couples := range matches {
+		song, songExists, err := db.GetSongByID(songID)
+		if err != nil || !songExists {
+			// log error
+			fmt.Println("Continuing")
+			continue
+		}
+		fmt.Printf("Song: %v, Scores:\n", song.Title)
+
+		scores[songID] = matchScore(fingerprints, couples)
+		fmt.Println("------------------------------------")
+	}
+
+	var matchList []Match
+	for songID, points := range scores {
+		song, songExists, err := db.GetSongByID(songID)
+		if !songExists {
+			logger.Info(fmt.Sprintf("song with ID (%v) doesn't exist", songID))
+			continue
+		}
 		if err != nil {
-			return nil, err
-		}
-
-		maxPeak, differenceSum, err := getMaxPeak(timestampsInSeconds)
-		if err != nil {
-			if err.Error() == "insufficient timestamps" || err.Error() == "no peak was identified" {
-				continue
-			} else {
-				return nil, err
-			}
-		}
-
-		weightedScore := float64(differenceSum) / float64(len(maxPeak))
-		matches = append(matches, Match{songKey, chunkTags[songKey], weightedScore})
-
-		fmt.Printf("%s MaxPeak: %v, DifferenceSum: %d\n", songKey, maxPeak, differenceSum)
-		fmt.Println("=====================================================\n")
-	}
-
-	sort.Slice(matches, func(i, j int) bool {
-		return matches[i].WeightedScore < matches[j].WeightedScore
-	})
-
-	display := make(map[string]float64)
-	for _, match := range matches {
-		key := match.songKey
-		display[key] = match.WeightedScore
-	}
-
-	fmt.Println("New Matches: ", display)
-	fmt.Println("Matches: ", matches)
-	return matches, nil
-}
-
-func sortMatchesByTimeDifference(matches map[string][]int, chunkTags map[string]primitive.M) []primitive.M {
-	type songDifferences struct {
-		songKey     string
-		differences []int
-		sum         int
-	}
-
-	var kvPairs []songDifferences
-	for songKey, differences := range matches {
-		sum := 0
-		for _, difference := range differences {
-			sum += difference
-		}
-		kvPairs = append(kvPairs, songDifferences{songKey, differences, sum})
-	}
-
-	sort.Slice(kvPairs, func(i, j int) bool {
-		return kvPairs[i].sum > kvPairs[j].sum
-	})
-
-	var sortedChunkTags []primitive.M
-	for _, pair := range kvPairs {
-		sortedChunkTags = append(sortedChunkTags, chunkTags[pair.songKey])
-	}
-
-	return sortedChunkTags
-}
-
-func timestampsInSeconds(timestamps []string) ([]int, error) {
-	layout := "15:04:05"
-
-	timestampsInSeconds := make([]int, len(timestamps))
-	for i, ts := range timestamps {
-		parsedTime, err := time.Parse(layout, ts)
-		if err != nil {
-			return nil, fmt.Errorf("error parsing timestamp %q: %w", ts, err)
-		}
-		hours := parsedTime.Hour()
-		minutes := parsedTime.Minute()
-		seconds := parsedTime.Second()
-		timestampsInSeconds[i] = (hours * 3600) + (minutes * 60) + seconds
-	}
-
-	return timestampsInSeconds, nil
-}
-
-// getMaxPeak identifies clusters of timestamps (peaks) within a sequence where the differences between adjacent timestamps
-// are below a certain threshold. It returns the largest peak, the sum of differences within that peak, and an error if any.
-func getMaxPeak(timestamps []int) ([]int, int, error) {
-	if len(timestamps) < 2 {
-		return nil, 0, fmt.Errorf("insufficient timestamps")
-	}
-
-	var peaks [][]int
-	maxDifference := 15
-
-	var cluster []int
-
-	// Iterate over timestamps to identify peaks
-	for i := 0; i < len(timestamps)-1; i++ {
-		minuend, subtrahend := timestamps[i], timestamps[i+1]
-
-		// Ensure timestamps are in ascending order
-		if minuend > subtrahend {
-			if len(cluster) > 0 {
-				peaks = append(peaks, cluster)
-				cluster = nil
-			}
+			logger.Info(fmt.Sprintf("failed to get song by ID (%v): %v", songID, err))
 			continue
 		}
 
-		difference := int(math.Abs(float64(minuend - subtrahend)))
-
-		// Check if the difference is within the maximum allowed difference
-		if len(cluster) == 0 && difference <= maxDifference {
-			cluster = append(cluster, minuend, subtrahend)
-		} else if difference <= maxDifference {
-			cluster = append(cluster, subtrahend)
-		} else if difference > maxDifference {
-			if len(cluster) > 0 {
-				peaks = append(peaks, cluster)
-				cluster = nil
-			}
-		}
+		fmt.Printf("Song: %v, Score: %v\n", song.Title, points)
+		fmt.Println("====================================")
+		match := Match{songID, song.Title, song.Artist, song.YouTubeID, timestamps[songID], points}
+		matchList = append(matchList, match)
 	}
 
-	if len(peaks) < 1 {
-		return nil, 0, fmt.Errorf("no peak was identified")
-	}
+	sort.Slice(matchList, func(i, j int) bool {
+		return matchList[i].Score > matchList[j].Score
+	})
 
-	// Identify the largest peak(s)
-	largestPeak := [][]int{peaks[0]}
-	for _, peak := range peaks[1:] {
-		if len(peak) == len(largestPeak[0]) {
-			largestPeak = append(largestPeak, peak)
-		} else if len(peak) > len(largestPeak[0]) {
-			largestPeak = nil
-			largestPeak = append(largestPeak, peak)
-		}
-	}
-
-	// In the case where there are multiple largest peaks,
-	// identify and return the largest peak with the smallest sum of differences
-	if len(largestPeak) > 1 {
-		fmt.Println("Largest Peak > 1: ", largestPeak)
-
-		// Deduplicate largest peaks to get accurate result.
-		// How? Consider two peaks: A: [53, 53, 53] and B: [14, 15].
-		// Peak A has only one unique value (53) repeated three times, while peak B has two unique values (14 and 15).
-		// In this case, peak B would be prioritized over peak A
-		var largestPeakDeduplicated [][]int
-		for _, peak := range largestPeak {
-			largestPeakDeduplicated = append(largestPeakDeduplicated, deduplicate(peak))
-		}
-		fmt.Println("Largest Peak deduplicated: ", largestPeakDeduplicated)
-
-		minDifferenceSum := math.Inf(1)
-		var peakWithMinDifferenceSum []int
-		for idx, peak := range largestPeakDeduplicated {
-			if len(peak) <= 1 {
-				continue
-			}
-
-			differenceSum := 0.0
-			for i := len(peak) - 1; i >= 1; i-- {
-				differenceSum += math.Abs(float64(peak[i] - peak[i-1]))
-			}
-			if differenceSum < minDifferenceSum {
-				minDifferenceSum = differenceSum
-				peakWithMinDifferenceSum = largestPeak[idx]
-			}
-		}
-
-		// In the case where no peak with the min difference sum was identified,
-		// probably because they are all duplicates, return the first from the largestspeaks
-		if len(peakWithMinDifferenceSum) == 0 {
-			peakWithMinDifferenceSum = largestPeak[0]
-			minDifferenceSum = 0
-		}
-
-		return peakWithMinDifferenceSum, int(minDifferenceSum), nil
-	}
-
-	// Otherwise, return the largest peak
-	maxPeak := largestPeak[0]
-	differenceSum := 0
-	for i := len(maxPeak) - 1; i >= 1; i-- {
-		differenceSum += maxPeak[i] - maxPeak[i-1]
-	}
-
-	return maxPeak, differenceSum, nil
+	fmt.Println("MatchList len: ", len(matchList))
+	return matchList, nil
 }
 
-// Chunkify divides the input audio signal into chunks and calculates the Short-Time Fourier Transform (STFT) for each chunk.
-// The function returns a 2D slice containing the STFT coefficients for each chunk.
-func Chunkify(audio []byte) [][]complex128 {
-	numWindows := len(audio) / (chunkSize - hopSize)
-	chunks := make([][]complex128, numWindows)
-
-	// Apply Hamming window function
-	window := make([]float64, chunkSize)
-	for i := range window {
-		window[i] = 0.54 - 0.46*math.Cos(2*math.Pi*float64(i)/float64(chunkSize-1))
-	}
-
-	// Perform STFT
-	for i := 0; i < numWindows; i++ {
-		// Extract current chunk
-		start := i * hopSize
-		end := start + chunkSize
-		if end > len(audio) {
-			end = len(audio)
+// MatchScore computes a match score between the two transformed audio samples (into a list of Key + TableValue)
+func matchScore(sample, match map[uint32]models.Couple) float64 {
+	// Will hold a list of points (time in the sample sound file, time in the matched database sound file)
+	points := [2][]float64{}
+	matches := 0.0
+	for k, sampleValue := range sample {
+		if matchValue, ok := match[k]; ok {
+			points[0] = append(points[0], float64(sampleValue.AnchorTimeMs))
+			points[1] = append(points[1], float64(matchValue.AnchorTimeMs))
+			matches++
 		}
-
-		chunk := make([]complex128, chunkSize)
-		for j := start; j < end; j++ {
-			chunk[j-start] = complex(float64(audio[j])*window[j-start], 0)
-		}
-
-		// Compute FFT
-		// chunks[i] = Fft(chunk)
-		chunks[i] = fft.FFT(chunk)
 	}
-
-	return chunks
+	corr := correlation(points[0], points[1])
+	fmt.Printf("Score (%v * %v * %v): %v\n", corr, corr, matches, corr*corr*matches)
+	return corr * corr * matches
 }
 
-// FingerprintChunks processes a collection of audio data represented as chunks of complex numbers and
-// generates fingerprints for each chunk based on the magnitude of frequency components within specific frequency ranges.
-func FingerprintChunks(chunks [][]complex128, chunkTag *ChunkTag) ([]int64, map[int64]ChunkTag) {
-	var fingerprintList []int64
-	fingerprintMap := make(map[int64]ChunkTag)
+// Correlation computes the correlation between 2 series of points
+// the length used is x's
+func correlation(x []float64, y []float64) float64 {
+	n := len(x)
+	meanX, meanY := Avg(x[:n]), Avg(y[:n])
 
-	var chunksPerSecond int
-	var chunkCount int
-	var chunkTime time.Time
+	sXY := 0.0
+	sX := 0.0
+	sY := 0.0
 
-	if chunkTag != nil {
-		// bytesPerSecond = (samplingRate * bitDepth * channels) / 8
-		chunksPerSecond = (chunkSize - hopSize) / samplingRate
-		chunksPerSecond = len(chunks)
+	for i, xp := range x {
+		dx := xp - meanX
+		dy := y[i] - meanY
 
-		fmt.Println("CHUNKS PER SECOND: ", chunksPerSecond)
-		chunksPerSecond = 3
-		fmt.Println("CHUNKS PER SECOND: ", chunksPerSecond)
-		// if chunkSize == 4096 {
-		// 	chunksPerSecond = 10
-		// }
-		chunkCount = 0
-		chunkTime = time.Date(1, 1, 1, 0, 0, 0, 0, time.UTC)
+		sX += dx * dx
+		sY += dy * dy
+
+		sXY += dx * dy
 	}
 
-	for _, chunk := range chunks {
-		if chunkTag != nil {
-			chunkCount++
-			if chunkCount == chunksPerSecond {
-				chunkCount = 0
-				chunkTime = chunkTime.Add(1 * time.Second)
-				// fmt.Println(chunkTime.Format("15:04:05"))
-			}
-		}
-
-		chunkMags := map[string]int{
-			"20-60": 0, "60-250": 0, "250-500": 0,
-			"500-2000": 0, "2000-4000": 0, "4000-8000": 0, "8000-20000": 0,
-		}
-
-		for _, frequency := range chunk {
-			magnitude := int(cmplx.Abs(frequency))
-			ranges := []struct{ min, max int }{{20, 60}, {60, 250}, {250, 500}, {500, 2000}, {2000, 4000}, {4000, 8000}, {8000, 20001}}
-
-			for _, r := range ranges {
-				if magnitude >= r.min && magnitude < r.max &&
-					chunkMags[fmt.Sprintf("%d-%d", r.min, r.max)] < magnitude {
-					chunkMags[fmt.Sprintf("%d-%d", r.min, r.max)] = magnitude
-				}
-			}
-		}
-
-		// fingerprint := fmt.Sprintf("%d-%d-%d-%d-%d-%d-%d",
-		// 	chunkMags["20-60"],
-		// 	chunkMags["60-250"],
-		// 	chunkMags["250-500"],
-		// 	chunkMags["500-2000"],
-		// 	chunkMags["2000-4000"],
-		// 	chunkMags["4000-8000"],
-		// 	chunkMags["8000-20000"])
-
-		// fmt.Println(fingerprint)
-
-		points := [4]int64{
-			int64(chunkMags["60-250"]),
-			int64(chunkMags["250-500"]),
-			int64(chunkMags["500-2000"]),
-			int64(chunkMags["2000-4000"])}
-		// key := hash1(points[:])
-		// fmt.Printf("%s: %v\n", fingerprint, key)
-
-		// points := [6]int64{
-		// 	int64(chunkMags["20-60"]),
-		// 	int64(chunkMags["60-250"]),
-		// 	int64(chunkMags["250-500"]),
-		// 	int64(chunkMags["500-2000"]),
-		// 	int64(chunkMags["2000-4000"]),
-		// 	int64(chunkMags["4000-8000"])}
-		key := hash(points[:])
-
-		if chunkTag != nil {
-			newSampleTag := *chunkTag
-			newSampleTag.TimeStamp = chunkTime.Format("15:04:05")
-			fingerprintMap[key] = newSampleTag
-		} else {
-			fingerprintList = append(fingerprintList, key)
-		}
+	if sX == 0 || sY == 0 {
+		return 0
 	}
 
-	return fingerprintList, fingerprintMap
+	return sXY / (math.Sqrt(sX) * math.Sqrt(sY))
 }
 
-func hash(values []int64) int64 {
-	weight := 100
-	var result int64
-	for _, value := range values {
-		result += (value - (value % fuzzFactor)) * int64(weight)
-		weight = weight * weight
+// Avg computes the average of the given array
+func Avg(arr []float64) float64 {
+	if len(arr) == 0 {
+		return 0
 	}
 
-	return result
-}
-
-func hash1(values []int64) int64 {
-	p1, p2, p3, p4 := values[0], values[1], values[2], values[3]
-	return (p4-(p4%fuzzFactor))*100000000 +
-		(p3-(p3%fuzzFactor))*100000 +
-		(p2-(p2%fuzzFactor))*100 +
-		(p1 - (p1 % fuzzFactor))
-}
-
-func hash2(values []int64) int64 {
-	for i := range values {
-		values[i] += rand.Int63n(fuzzFactor) - fuzzFactor/2
+	sum := 0.0
+	for _, v := range arr {
+		sum += v
 	}
 
-	var buf []byte
-	for _, v := range values {
-		b := make([]byte, 8)
-		binary.LittleEndian.PutUint64(b, uint64(v))
-		buf = append(buf, b...)
-	}
-
-	hash := sha256.Sum256(buf)
-
-	return int64(binary.BigEndian.Uint64(hash[:8]))
+	return sum / float64(len(arr))
 }