Merge pull request #5 from cgzirim/development

Development
This commit is contained in:
Chigozirim Igweamaka 2024-07-09 23:21:08 +01:00 committed by GitHub
commit 3788ada052
6 changed files with 130 additions and 59 deletions

View file

@ -94,8 +94,8 @@ function App() {
audio: {
autoGainControl: false,
channelCount: 1,
echoCancellation: true,
noiseSuppression: true,
echoCancellation: false,
noiseSuppression: false,
sampleSize: 16,
},
};

View file

@ -49,6 +49,12 @@ func find(filePath string) {
return
}
if len(matches) == 0 {
fmt.Println("\nNo match found.")
fmt.Printf("\nSearch took: %s\n", searchDuration)
return
}
msg := "Matches:"
topMatches := matches
if len(matches) >= 20 {
@ -236,5 +242,5 @@ func erase(songsDir string) {
logger.ErrorContext(ctx, msg, slog.Any("error", err))
}
fmt.Println("Erase successful")
fmt.Println("Erase complete")
}

View file

@ -3,6 +3,7 @@ package shazam
import (
"fmt"
"math"
"song-recognition/models"
"song-recognition/utils"
"sort"
"time"
@ -17,7 +18,6 @@ type Match struct {
Score float64
}
// FindMatches processes the audio samples and finds matches in the database
func FindMatches(audioSamples []float64, audioDuration float64, sampleRate int) ([]Match, time.Duration, error) {
startTime := time.Now()
logger := utils.GetLogger()
@ -30,9 +30,11 @@ func FindMatches(audioSamples []float64, audioDuration float64, sampleRate int)
peaks := ExtractPeaks(spectrogram, audioDuration)
fingerprints := Fingerprint(peaks, utils.GenerateUniqueID())
var sampleCouples []models.Couple
addresses := make([]uint32, 0, len(fingerprints))
for address := range fingerprints {
addresses = append(addresses, address)
sampleCouples = append(sampleCouples, fingerprints[address])
}
db, err := utils.NewDbClient()
@ -41,61 +43,103 @@ func FindMatches(audioSamples []float64, audioDuration float64, sampleRate int)
}
defer db.Close()
m, err := db.GetCouples(addresses)
couplesMap, err := db.GetCouples(addresses)
if err != nil {
return nil, time.Since(startTime), err
}
matches := map[uint32][][2]uint32{} // songID -> [(sampleTime, dbTime)]
timestamps := map[uint32]uint32{}
for address, couples := range m {
// Count occurrences of each couple to derive potential target zones
coupleCounts := make(map[uint32]map[uint32]int)
for _, couples := range couplesMap {
for _, couple := range couples {
matches[couple.SongID] = append(matches[couple.SongID], [2]uint32{fingerprints[address].AnchorTimeMs, couple.AnchorTimeMs})
timestamps[couple.SongID] = couple.AnchorTimeMs
key := (couple.SongID << 32) | uint32(couple.AnchorTimeMs)
if _, exists := coupleCounts[couple.SongID]; !exists {
coupleCounts[couple.SongID] = make(map[uint32]int)
}
coupleCounts[couple.SongID][key]++
}
}
scores := analyzeRelativeTiming(matches)
var matchList []Match
for songID, points := range scores {
song, songExists, err := db.GetSongByID(songID)
if !songExists {
logger.Info(fmt.Sprintf("song with ID (%v) doesn't exist", songID))
continue
// Filter target zones with targets (couples) meeting or exceeding the threshold
threshold := 4
filteredCouples := make(map[uint32][]models.Couple)
for songID, counts := range coupleCounts {
for key, count := range counts {
if count >= threshold {
filteredCouples[songID] = append(filteredCouples[songID], models.Couple{
AnchorTimeMs: key & 0xFFFFFFFF,
SongID: songID,
})
}
}
}
// Score matches by calculating mean absolute difference
var matches []Match
for songID, songCouples := range filteredCouples {
song, songExists, err := db.GetSongByID(songID)
if err != nil {
logger.Info(fmt.Sprintf("failed to get song by ID (%v): %v", songID, err))
continue
}
if !songExists {
logger.Info(fmt.Sprintf("song with ID (%v) doesn't exist", songID))
continue
}
match := Match{songID, song.Title, song.Artist, song.YouTubeID, timestamps[songID], points}
matchList = append(matchList, match)
m_a_d := meanAbsoluteDifference(songCouples, sampleCouples)
tstamp := songCouples[len(songCouples)-1].AnchorTimeMs
match := Match{songID, song.Title, song.Artist, song.YouTubeID, tstamp, m_a_d}
matches = append(matches, match)
}
sort.Slice(matchList, func(i, j int) bool {
return matchList[i].Score > matchList[j].Score
sort.Slice(matches, func(i, j int) bool {
return matches[i].Score > matches[j].Score
})
return matchList, time.Since(startTime), nil
// TODO: hanld case when there's no match for cmdHandlers
return matches, time.Since(startTime), nil
}
// AnalyzeRelativeTiming checks for consistent relative timing and returns a score
func analyzeRelativeTiming(matches map[uint32][][2]uint32) map[uint32]float64 {
scores := make(map[uint32]float64)
for songID, times := range matches {
count := 0
for i := 0; i < len(times); i++ {
for j := i + 1; j < len(times); j++ {
sampleDiff := math.Abs(float64(times[i][0] - times[j][0]))
dbDiff := math.Abs(float64(times[i][1] - times[j][1]))
if math.Abs(sampleDiff-dbDiff) < 100 { // Allow some tolerance
count++
}
}
}
scores[songID] = float64(count)
func meanAbsoluteDifference(A, B []models.Couple) float64 {
minLen := len(A)
if len(B) < minLen {
minLen = len(B)
}
return scores
var sumDiff float64
for i := 0; i < minLen; i++ {
diff := math.Abs(float64(A[i].AnchorTimeMs - B[i].AnchorTimeMs))
sumDiff += diff
}
meanAbsDiff := sumDiff / float64(minLen)
return meanAbsDiff
}
// Function to calculate Dynamic Time Warping distance
func dynamicTimeWarping(A, B []models.Couple) float64 {
lenA := len(A)
lenB := len(B)
// Create a 2D array to store DTW distances
dtw := make([][]float64, lenA+1)
for i := range dtw {
dtw[i] = make([]float64, lenB+1)
for j := range dtw[i] {
dtw[i][j] = math.Inf(1)
}
}
dtw[0][0] = 0
for i := 1; i <= lenA; i++ {
for j := 1; j <= lenB; j++ {
cost := math.Abs(float64(A[i-1].AnchorTimeMs - B[j-1].AnchorTimeMs))
dtw[i][j] = cost + math.Min(math.Min(dtw[i-1][j], dtw[i][j-1]), dtw[i-1][j-1])
}
}
return dtw[lenA][lenB]
}

View file

@ -91,7 +91,7 @@ type Peak struct {
Freq complex128
}
// ExtractPeaks extracts peaks from a spectrogram based on a specified algorithm
// ExtractPeaks analyzes a spectrogram and extracts significant peaks in the frequency domain over time.
func ExtractPeaks(spectrogram [][]complex128, audioDuration float64) []Peak {
if len(spectrogram) < 1 {
return []Peak{}

View file

@ -11,6 +11,7 @@ import (
"song-recognition/utils"
"song-recognition/wav"
"strings"
"time"
socketio "github.com/googollee/go-socket.io"
"github.com/mdobak/go-xerrors"
@ -207,37 +208,36 @@ func handleNewRecording(socket socketio.Conn, recordData string) {
sampleRate := recData.SampleRate
bitsPerSample := recData.SampleSize
fmt.Printf("Channels: %v, sampleRate: %v, bitsPerSample: %v\n", channels, sampleRate, bitsPerSample)
samples, err := wav.WavBytesToSamples(decodedAudioData)
if err != nil {
err := xerrors.New(err)
logger.ErrorContext(ctx, "failed to convert decodedData to samples.", slog.Any("error", err))
}
/** this operation alters the audio, adds some level of bass to it.
if sampleRate != 44100 {
samples, err = shazam.Downsample(samples, sampleRate, 44100)
if err != nil {
err := xerrors.New(err)
logger.ErrorContext(ctx, "failed to downsample.", slog.Any("error", err))
}
sampleRate = 44100
}
// Save recording
recordingInBytes, err := utils.FloatsToBytes(samples, bitsPerSample)
if err != nil {
err := xerrors.New(err)
logger.ErrorContext(ctx, "failed to convert bytes.", slog.Any("error", err))
}
decodedAudioData = recordingInBytes
*/
now := time.Now()
fileName := fmt.Sprintf("%04d_%02d_%02d_%02d_%02d_%02d.wav",
now.Second(), now.Minute(), now.Hour(),
now.Day(), now.Month(), now.Year(),
)
err = wav.WriteWavFile("blob.wav", decodedAudioData, sampleRate, channels, bitsPerSample)
err = wav.WriteWavFile(fileName, decodedAudioData, sampleRate, channels, bitsPerSample)
if err != nil {
err := xerrors.New(err)
logger.ErrorContext(ctx, "failed to write wav file.", slog.Any("error", err))
}
/*
wav.FFmpegConvertWAV(fileName, fileName, 44100, true)
wavInfo, _ := wav.ReadWavInfo("mono_" + fileName)
samples, _ = wav.WavBytesToSamples(wavInfo.Data)
// spotify.DeleteFile(fileName)
spotify.DeleteFile("mono_" + fileName)
*/
matches, _, err := shazam.FindMatches(samples, recData.Duration, sampleRate)
if err != nil {
err := xerrors.New(err)

View file

@ -7,6 +7,7 @@ import (
"fmt"
"io/ioutil"
"os"
"os/exec"
)
// WavHeader defines the structure of a WAV header
@ -148,3 +149,23 @@ func WavBytesToSamples(input []byte) ([]float64, error) {
return output, nil
}
// FFmpegConvertWAV converts a WAV file using ffmpeg.
// It can change the sample rate and optionally convert to mono.
func FFmpegConvertWAV(inputFile, outputFile string, targetSampleRate int, toMono bool) error {
cmdArgs := []string{
"-i", inputFile,
"-ar", fmt.Sprintf("%d", targetSampleRate),
"-y",
}
if toMono {
outputFile = "mono_" + outputFile
cmdArgs = append(cmdArgs, "-ac", "1", "-c:a", "pcm_s16le")
}
cmdArgs = append(cmdArgs, outputFile)
cmd := exec.Command("ffmpeg", cmdArgs...)
return cmd.Run()
}