mirror of
https://github.com/cgzirim/seek-tune.git
synced 2025-12-17 17:04:22 +00:00
commit
c38e035a71
11 changed files with 181 additions and 181 deletions
|
|
@ -153,10 +153,10 @@ function App() {
|
|||
chunks.push(e.data);
|
||||
};
|
||||
|
||||
// Stop recording after 15 seconds
|
||||
// Stop recording after 20 seconds
|
||||
setTimeout(function () {
|
||||
mediaRecorder.stop();
|
||||
}, 15000);
|
||||
}, 20000);
|
||||
|
||||
mediaRecorder.addEventListener("stop", () => {
|
||||
const blob = new Blob(chunks, { type: "audio/wav" });
|
||||
|
|
|
|||
|
|
@ -75,7 +75,7 @@ func find(filePath string) {
|
|||
}
|
||||
|
||||
func download(spotifyURL string) {
|
||||
err := spotify.CreateFolder(SONGS_DIR)
|
||||
err := utils.CreateFolder(SONGS_DIR)
|
||||
if err != nil {
|
||||
err := xerrors.New(err)
|
||||
logger := utils.GetLogger()
|
||||
|
|
@ -112,15 +112,6 @@ func serve(protocol, port string) {
|
|||
return true
|
||||
}
|
||||
|
||||
err := spotify.CreateFolder(SONGS_DIR)
|
||||
if err != nil {
|
||||
err := xerrors.New(err)
|
||||
logger := utils.GetLogger()
|
||||
ctx := context.Background()
|
||||
logMsg := fmt.Sprintf("failed to create directory %v", SONGS_DIR)
|
||||
logger.ErrorContext(ctx, logMsg, slog.Any("error", err))
|
||||
}
|
||||
|
||||
server := socketio.NewServer(&engineio.Options{
|
||||
Transports: []transport.Transport{
|
||||
&polling.Transport{
|
||||
|
|
|
|||
22
main.go
22
main.go
|
|
@ -1,12 +1,34 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"flag"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"os"
|
||||
"song-recognition/utils"
|
||||
|
||||
"github.com/mdobak/go-xerrors"
|
||||
)
|
||||
|
||||
func main() {
|
||||
err := utils.CreateFolder("tmp")
|
||||
if err != nil {
|
||||
logger := utils.GetLogger()
|
||||
err := xerrors.New(err)
|
||||
ctx := context.Background()
|
||||
logger.ErrorContext(ctx, "Failed create tmp dir.", slog.Any("error", err))
|
||||
}
|
||||
|
||||
err = utils.CreateFolder(SONGS_DIR)
|
||||
if err != nil {
|
||||
err := xerrors.New(err)
|
||||
logger := utils.GetLogger()
|
||||
ctx := context.Background()
|
||||
logMsg := fmt.Sprintf("failed to create directory %v", SONGS_DIR)
|
||||
logger.ErrorContext(ctx, logMsg, slog.Any("error", err))
|
||||
}
|
||||
|
||||
if len(os.Args) < 2 {
|
||||
fmt.Println("Expected 'find', 'download', 'erase', or 'serve' subcommands")
|
||||
os.Exit(1)
|
||||
|
|
|
|||
|
|
@ -4,3 +4,11 @@ type Couple struct {
|
|||
AnchorTimeMs uint32
|
||||
SongID uint32
|
||||
}
|
||||
|
||||
type RecordData struct {
|
||||
Audio string `json:"audio"`
|
||||
Duration float64 `json:"duration"`
|
||||
Channels int `json:"channels"`
|
||||
SampleRate int `json:"sampleRate"`
|
||||
SampleSize int `json:"sampleSize"`
|
||||
}
|
||||
|
|
|
|||
112
shazam/shazam.go
112
shazam/shazam.go
|
|
@ -3,7 +3,6 @@ package shazam
|
|||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
"song-recognition/models"
|
||||
"song-recognition/utils"
|
||||
"sort"
|
||||
"time"
|
||||
|
|
@ -18,6 +17,7 @@ type Match struct {
|
|||
Score float64
|
||||
}
|
||||
|
||||
// FindMatches processes the audio samples and finds matches in the database
|
||||
func FindMatches(audioSamples []float64, audioDuration float64, sampleRate int) ([]Match, time.Duration, error) {
|
||||
startTime := time.Now()
|
||||
logger := utils.GetLogger()
|
||||
|
|
@ -30,11 +30,9 @@ func FindMatches(audioSamples []float64, audioDuration float64, sampleRate int)
|
|||
peaks := ExtractPeaks(spectrogram, audioDuration)
|
||||
fingerprints := Fingerprint(peaks, utils.GenerateUniqueID())
|
||||
|
||||
var sampleCouples []models.Couple
|
||||
addresses := make([]uint32, 0, len(fingerprints))
|
||||
for address := range fingerprints {
|
||||
addresses = append(addresses, address)
|
||||
sampleCouples = append(sampleCouples, fingerprints[address])
|
||||
}
|
||||
|
||||
db, err := utils.NewDbClient()
|
||||
|
|
@ -43,103 +41,65 @@ func FindMatches(audioSamples []float64, audioDuration float64, sampleRate int)
|
|||
}
|
||||
defer db.Close()
|
||||
|
||||
couplesMap, err := db.GetCouples(addresses)
|
||||
m, err := db.GetCouples(addresses)
|
||||
if err != nil {
|
||||
return nil, time.Since(startTime), err
|
||||
}
|
||||
|
||||
// Count occurrences of each couple to derive potential target zones
|
||||
coupleCounts := make(map[uint32]map[uint32]int)
|
||||
for _, couples := range couplesMap {
|
||||
matches := map[uint32][][2]uint32{} // songID -> [(sampleTime, dbTime)]
|
||||
timestamps := map[uint32][]uint32{}
|
||||
|
||||
for address, couples := range m {
|
||||
for _, couple := range couples {
|
||||
key := (couple.SongID << 32) | uint32(couple.AnchorTimeMs)
|
||||
if _, exists := coupleCounts[couple.SongID]; !exists {
|
||||
coupleCounts[couple.SongID] = make(map[uint32]int)
|
||||
}
|
||||
coupleCounts[couple.SongID][key]++
|
||||
matches[couple.SongID] = append(matches[couple.SongID], [2]uint32{fingerprints[address].AnchorTimeMs, couple.AnchorTimeMs})
|
||||
timestamps[couple.SongID] = append(timestamps[couple.SongID], couple.AnchorTimeMs)
|
||||
}
|
||||
}
|
||||
|
||||
// Filter target zones with targets (couples) meeting or exceeding the threshold
|
||||
threshold := 4
|
||||
filteredCouples := make(map[uint32][]models.Couple)
|
||||
for songID, counts := range coupleCounts {
|
||||
for key, count := range counts {
|
||||
if count >= threshold {
|
||||
filteredCouples[songID] = append(filteredCouples[songID], models.Couple{
|
||||
AnchorTimeMs: key & 0xFFFFFFFF,
|
||||
SongID: songID,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
scores := analyzeRelativeTiming(matches)
|
||||
|
||||
// Score matches by calculating mean absolute difference
|
||||
var matches []Match
|
||||
for songID, songCouples := range filteredCouples {
|
||||
var matchList []Match
|
||||
for songID, points := range scores {
|
||||
song, songExists, err := db.GetSongByID(songID)
|
||||
if err != nil {
|
||||
logger.Info(fmt.Sprintf("failed to get song by ID (%v): %v", songID, err))
|
||||
continue
|
||||
}
|
||||
if !songExists {
|
||||
logger.Info(fmt.Sprintf("song with ID (%v) doesn't exist", songID))
|
||||
continue
|
||||
}
|
||||
|
||||
m_a_d := meanAbsoluteDifference(songCouples, sampleCouples)
|
||||
|
||||
tstamp := songCouples[len(songCouples)-1].AnchorTimeMs
|
||||
match := Match{songID, song.Title, song.Artist, song.YouTubeID, tstamp, m_a_d}
|
||||
matches = append(matches, match)
|
||||
if err != nil {
|
||||
logger.Info(fmt.Sprintf("failed to get song by ID (%v): %v", songID, err))
|
||||
continue
|
||||
}
|
||||
|
||||
sort.Slice(matches, func(i, j int) bool {
|
||||
return matches[i].Score > matches[j].Score
|
||||
sort.Slice(timestamps[songID], func(i, j int) bool {
|
||||
return timestamps[songID][i] < timestamps[songID][j]
|
||||
})
|
||||
|
||||
// TODO: hanld case when there's no match for cmdHandlers
|
||||
|
||||
return matches, time.Since(startTime), nil
|
||||
match := Match{songID, song.Title, song.Artist, song.YouTubeID, timestamps[songID][0], points}
|
||||
matchList = append(matchList, match)
|
||||
}
|
||||
|
||||
func meanAbsoluteDifference(A, B []models.Couple) float64 {
|
||||
minLen := len(A)
|
||||
if len(B) < minLen {
|
||||
minLen = len(B)
|
||||
sort.Slice(matchList, func(i, j int) bool {
|
||||
return matchList[i].Score > matchList[j].Score
|
||||
})
|
||||
|
||||
return matchList, time.Since(startTime), nil
|
||||
}
|
||||
|
||||
var sumDiff float64
|
||||
for i := 0; i < minLen; i++ {
|
||||
diff := math.Abs(float64(A[i].AnchorTimeMs - B[i].AnchorTimeMs))
|
||||
sumDiff += diff
|
||||
}
|
||||
|
||||
meanAbsDiff := sumDiff / float64(minLen)
|
||||
return meanAbsDiff
|
||||
}
|
||||
|
||||
// Function to calculate Dynamic Time Warping distance
|
||||
func dynamicTimeWarping(A, B []models.Couple) float64 {
|
||||
lenA := len(A)
|
||||
lenB := len(B)
|
||||
|
||||
// Create a 2D array to store DTW distances
|
||||
dtw := make([][]float64, lenA+1)
|
||||
for i := range dtw {
|
||||
dtw[i] = make([]float64, lenB+1)
|
||||
for j := range dtw[i] {
|
||||
dtw[i][j] = math.Inf(1)
|
||||
// AnalyzeRelativeTiming checks for consistent relative timing and returns a score
|
||||
func analyzeRelativeTiming(matches map[uint32][][2]uint32) map[uint32]float64 {
|
||||
scores := make(map[uint32]float64)
|
||||
for songID, times := range matches {
|
||||
count := 0
|
||||
for i := 0; i < len(times); i++ {
|
||||
for j := i + 1; j < len(times); j++ {
|
||||
sampleDiff := math.Abs(float64(times[i][0] - times[j][0]))
|
||||
dbDiff := math.Abs(float64(times[i][1] - times[j][1]))
|
||||
if math.Abs(sampleDiff-dbDiff) < 100 { // Allow some tolerance
|
||||
count++
|
||||
}
|
||||
}
|
||||
dtw[0][0] = 0
|
||||
|
||||
for i := 1; i <= lenA; i++ {
|
||||
for j := 1; j <= lenB; j++ {
|
||||
cost := math.Abs(float64(A[i-1].AnchorTimeMs - B[j-1].AnchorTimeMs))
|
||||
dtw[i][j] = cost + math.Min(math.Min(dtw[i-1][j], dtw[i][j-1]), dtw[i-1][j-1])
|
||||
}
|
||||
scores[songID] = float64(count)
|
||||
}
|
||||
|
||||
return dtw[lenA][lenB]
|
||||
return scores
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,16 +2,14 @@ package main
|
|||
|
||||
import (
|
||||
"context"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"song-recognition/models"
|
||||
"song-recognition/shazam"
|
||||
"song-recognition/spotify"
|
||||
"song-recognition/utils"
|
||||
"song-recognition/wav"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
socketio "github.com/googollee/go-socket.io"
|
||||
"github.com/mdobak/go-xerrors"
|
||||
|
|
@ -30,14 +28,6 @@ func downloadStatus(statusType, message string) string {
|
|||
return string(jsonData)
|
||||
}
|
||||
|
||||
type RecordData struct {
|
||||
Audio string `json:"audio"`
|
||||
Duration float64 `json:"duration"`
|
||||
Channels int `json:"channels"`
|
||||
SampleRate int `json:"sampleRate"`
|
||||
SampleSize int `json:"sampleSize"`
|
||||
}
|
||||
|
||||
func handleTotalSongs(socket socketio.Conn) {
|
||||
logger := utils.GetLogger()
|
||||
ctx := context.Background()
|
||||
|
|
@ -188,57 +178,21 @@ func handleNewRecording(socket socketio.Conn, recordData string) {
|
|||
logger := utils.GetLogger()
|
||||
ctx := context.Background()
|
||||
|
||||
var recData RecordData
|
||||
var recData models.RecordData
|
||||
if err := json.Unmarshal([]byte(recordData), &recData); err != nil {
|
||||
err := xerrors.New(err)
|
||||
logger.ErrorContext(ctx, "Failed to unmarshal record data.", slog.Any("error", err))
|
||||
return
|
||||
}
|
||||
|
||||
// Decode base64 data
|
||||
decodedAudioData, err := base64.StdEncoding.DecodeString(recData.Audio)
|
||||
samples, err := utils.ProcessRecording(&recData, true)
|
||||
if err != nil {
|
||||
err := xerrors.New(err)
|
||||
logger.ErrorContext(ctx, "failed to decode base64 data.", slog.Any("error", err))
|
||||
logger.ErrorContext(ctx, "Failed to process recording.", slog.Any("error", err))
|
||||
return
|
||||
}
|
||||
|
||||
// Save the decoded data to a file
|
||||
channels := recData.Channels
|
||||
sampleRate := recData.SampleRate
|
||||
bitsPerSample := recData.SampleSize
|
||||
|
||||
fmt.Printf("Channels: %v, sampleRate: %v, bitsPerSample: %v\n", channels, sampleRate, bitsPerSample)
|
||||
|
||||
samples, err := wav.WavBytesToSamples(decodedAudioData)
|
||||
if err != nil {
|
||||
err := xerrors.New(err)
|
||||
logger.ErrorContext(ctx, "failed to convert decodedData to samples.", slog.Any("error", err))
|
||||
}
|
||||
|
||||
// Save recording
|
||||
now := time.Now()
|
||||
fileName := fmt.Sprintf("%04d_%02d_%02d_%02d_%02d_%02d.wav",
|
||||
now.Second(), now.Minute(), now.Hour(),
|
||||
now.Day(), now.Month(), now.Year(),
|
||||
)
|
||||
|
||||
err = wav.WriteWavFile(fileName, decodedAudioData, sampleRate, channels, bitsPerSample)
|
||||
if err != nil {
|
||||
err := xerrors.New(err)
|
||||
logger.ErrorContext(ctx, "failed to write wav file.", slog.Any("error", err))
|
||||
}
|
||||
|
||||
/*
|
||||
wav.FFmpegConvertWAV(fileName, fileName, 44100, true)
|
||||
wavInfo, _ := wav.ReadWavInfo("mono_" + fileName)
|
||||
samples, _ = wav.WavBytesToSamples(wavInfo.Data)
|
||||
// spotify.DeleteFile(fileName)
|
||||
spotify.DeleteFile("mono_" + fileName)
|
||||
|
||||
*/
|
||||
|
||||
matches, _, err := shazam.FindMatches(samples, recData.Duration, sampleRate)
|
||||
matches, _, err := shazam.FindMatches(samples, recData.Duration, recData.SampleRate)
|
||||
if err != nil {
|
||||
err := xerrors.New(err)
|
||||
logger.ErrorContext(ctx, "failed to get matches.", slog.Any("error", err))
|
||||
|
|
|
|||
|
|
@ -146,10 +146,10 @@ func dlTrack(tracks []Track, path string) (int, error) {
|
|||
return
|
||||
}
|
||||
|
||||
DeleteFile(filepath.Join(path, fileName+".m4a"))
|
||||
utils.DeleteFile(filepath.Join(path, fileName+".m4a"))
|
||||
|
||||
if DELETE_SONG_FILE {
|
||||
DeleteFile(filepath.Join(path, fileName+".wav"))
|
||||
utils.DeleteFile(filepath.Join(path, fileName+".wav"))
|
||||
}
|
||||
|
||||
fmt.Printf("'%s' by '%s' was downloaded\n", track.Title, track.Artist)
|
||||
|
|
|
|||
|
|
@ -39,22 +39,6 @@ func GetFileSize(file string) (int64, error) {
|
|||
return size, nil
|
||||
}
|
||||
|
||||
func DeleteFile(filePath string) {
|
||||
if _, err := os.Stat(filePath); err == nil {
|
||||
if err := os.RemoveAll(filePath); err != nil {
|
||||
fmt.Println("Error deleting file:", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func CreateFolder(folderPath string) error {
|
||||
err := os.MkdirAll(folderPath, 0755)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func SongKeyExists(key string) (bool, error) {
|
||||
db, err := utils.NewDbClient()
|
||||
if err != nil {
|
||||
|
|
|
|||
|
|
@ -1,10 +1,37 @@
|
|||
package utils
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/base64"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"os"
|
||||
"song-recognition/models"
|
||||
"song-recognition/wav"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/mdobak/go-xerrors"
|
||||
)
|
||||
|
||||
func DeleteFile(filePath string) error {
|
||||
if _, err := os.Stat(filePath); err == nil {
|
||||
if err := os.RemoveAll(filePath); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func CreateFolder(folderPath string) error {
|
||||
err := os.MkdirAll(folderPath, 0755)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func FloatsToBytes(data []float64, bitsPerSample int) ([]byte, error) {
|
||||
var byteData []byte
|
||||
|
||||
|
|
@ -45,3 +72,52 @@ func FloatsToBytes(data []float64, bitsPerSample int) ([]byte, error) {
|
|||
|
||||
return byteData, nil
|
||||
}
|
||||
|
||||
func ProcessRecording(recData *models.RecordData, saveRecording bool) ([]float64, error) {
|
||||
decodedAudioData, err := base64.StdEncoding.DecodeString(recData.Audio)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
now := time.Now()
|
||||
fileName := fmt.Sprintf("%04d_%02d_%02d_%02d_%02d_%02d.wav",
|
||||
now.Second(), now.Minute(), now.Hour(),
|
||||
now.Day(), now.Month(), now.Year(),
|
||||
)
|
||||
filePath := "tmp/" + fileName
|
||||
|
||||
err = wav.WriteWavFile(filePath, decodedAudioData, recData.SampleRate, recData.Channels, recData.SampleSize)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
reformatedWavFile, err := wav.ReformatWAV(filePath, 1)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
wavInfo, _ := wav.ReadWavInfo(reformatedWavFile)
|
||||
samples, _ := wav.WavBytesToSamples(wavInfo.Data)
|
||||
|
||||
if saveRecording {
|
||||
logger := GetLogger()
|
||||
ctx := context.Background()
|
||||
|
||||
err := CreateFolder("recordings")
|
||||
if err != nil {
|
||||
err := xerrors.New(err)
|
||||
logger.ErrorContext(ctx, "Failed create folder.", slog.Any("error", err))
|
||||
}
|
||||
|
||||
newFilePath := strings.Replace(reformatedWavFile, "tmp/", "recordings/", 1)
|
||||
err = os.Rename(reformatedWavFile, newFilePath)
|
||||
if err != nil {
|
||||
logger.ErrorContext(ctx, "Failed to move file.", slog.Any("error", err))
|
||||
}
|
||||
}
|
||||
|
||||
DeleteFile(fileName)
|
||||
DeleteFile(reformatedWavFile)
|
||||
|
||||
return samples, nil
|
||||
}
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ func ConvertToWAV(inputFilePath string, channels int) (wavFilePath string, errr
|
|||
return "", fmt.Errorf("input file does not exist: %v", err)
|
||||
}
|
||||
|
||||
if channels != 1 || channels != 2 {
|
||||
if channels < 1 || channels > 2 {
|
||||
channels = 1
|
||||
}
|
||||
|
||||
|
|
@ -39,3 +39,29 @@ func ConvertToWAV(inputFilePath string, channels int) (wavFilePath string, errr
|
|||
|
||||
return outputFile, nil
|
||||
}
|
||||
|
||||
func ReformatWAV(inputFilePath string, channels int) (reformatedFilePath string, errr error) {
|
||||
if channels < 1 || channels > 2 {
|
||||
channels = 1
|
||||
}
|
||||
|
||||
fileExt := filepath.Ext(inputFilePath)
|
||||
outputFile := strings.TrimSuffix(inputFilePath, fileExt) + "rfm.wav"
|
||||
|
||||
cmd := exec.Command(
|
||||
"ffmpeg",
|
||||
"-y",
|
||||
"-i", inputFilePath,
|
||||
"-c", "pcm_s16le",
|
||||
"-ar", "44100",
|
||||
"-ac", fmt.Sprint(channels),
|
||||
outputFile,
|
||||
)
|
||||
|
||||
output, err := cmd.CombinedOutput()
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to convert to WAV: %v, output %v", err, string(output))
|
||||
}
|
||||
|
||||
return outputFile, nil
|
||||
}
|
||||
|
|
|
|||
21
wav/wav.go
21
wav/wav.go
|
|
@ -7,7 +7,6 @@ import (
|
|||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"os/exec"
|
||||
)
|
||||
|
||||
// WavHeader defines the structure of a WAV header
|
||||
|
|
@ -149,23 +148,3 @@ func WavBytesToSamples(input []byte) ([]float64, error) {
|
|||
|
||||
return output, nil
|
||||
}
|
||||
|
||||
// FFmpegConvertWAV converts a WAV file using ffmpeg.
|
||||
// It can change the sample rate and optionally convert to mono.
|
||||
func FFmpegConvertWAV(inputFile, outputFile string, targetSampleRate int, toMono bool) error {
|
||||
cmdArgs := []string{
|
||||
"-i", inputFile,
|
||||
"-ar", fmt.Sprintf("%d", targetSampleRate),
|
||||
"-y",
|
||||
}
|
||||
|
||||
if toMono {
|
||||
outputFile = "mono_" + outputFile
|
||||
cmdArgs = append(cmdArgs, "-ac", "1", "-c:a", "pcm_s16le")
|
||||
}
|
||||
|
||||
cmdArgs = append(cmdArgs, outputFile)
|
||||
|
||||
cmd := exec.Command("ffmpeg", cmdArgs...)
|
||||
return cmd.Run()
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue