diff --git a/.dockerignore b/.dockerignore index be44689..cd491fd 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,28 +1,88 @@ -# Binaries for programs and plugins -*.exe -*.ogg -*.m4a -*.zip -*.exe~ -*.dll -*.so -*.dylib +# Git +.git +.gitignore +.gitattributes -# Test binary, built with `go test -c` -*.test +# Documentation +*.md +!README.md +LICENSE -# Output of the go coverage tool, specifically when used with LiteIDE -*.out - -# Dependency directories (remove the comment below to include it) -# vendor/ - -# Go workspace file -go.work -**/songs +# IDE .vscode +.idea +*.swp +*.swo +*~ -package-lock.json +# OS +.DS_Store +Thumbs.db -*sqlite3 -.env \ No newline at end of file +# Node +client/node_modules +client/.env.local +client/.env.development.local +client/.env.test.local +client/.env.production.local +client/build +client/coverage +client/npm-debug.log* +client/yarn-debug.log* +client/yarn-error.log* + +# Go +server/seek-tune +server/*.exe +server/*.test +server/*.out +server/vendor/ + +# Application data (don't copy into image) +server/songs/** +server/recordings/** +server/snippets/** +server/tmp/** +server/db/*.sqlite3 +server/db/*.db + +# Audio files +*.mp3 +*.wav +*.m4a +*.ogg +*.flac +*.aac + +# Archives +*.zip +*.tar +*.gz +*.rar + +# Environment +.env +.env.* +!.env.example + +# CI/CD +.github +.gitlab-ci.yml +.travis.yml + +# Docker +docker-compose*.yml +!docker-compose.yml +Dockerfile* +!Dockerfile + +# WASM (already built separately if needed) +wasm/fingerprint.wasm +wasm/go.sum + +# Scripts +scripts/ +appspec.yml + +# Logs +*.log \ No newline at end of file diff --git a/.env.example b/.env.example deleted file mode 100644 index e30fdc5..0000000 --- a/.env.example +++ /dev/null @@ -1,9 +0,0 @@ -DB_TYPE=mongo -DB_USER=user -DB_PASS=password -DB_NAME=seek-tune -DB_HOST=192.168.0.1 -DB_PORT=27017 -REACT_APP_BACKEND_URL=http://localhost:5000 -SPOTIFY_CLIENT_ID=yourclientid -SPOTIFY_CLIENT_SECRET=yoursecret \ No newline at end of file diff --git a/.gitignore b/.gitignore index be17357..52c8f30 100644 --- a/.gitignore +++ b/.gitignore @@ -28,4 +28,6 @@ go.work package-lock.json *sqlite3 -.env \ No newline at end of file +.env + +token.json \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 44c8845..a64b0fa 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,33 +1,63 @@ -# build react +# Build React frontend FROM node:20-alpine AS build_react_stage -RUN mkdir -p /home/react -WORKDIR /home/react +WORKDIR /app/client -COPY client/package.json ./ -RUN npm install +COPY client/package*.json ./ +RUN npm ci --only=production && npm cache clean --force COPY client/ ./ ARG REACT_APP_BACKEND_URL ENV REACT_APP_BACKEND_URL=${REACT_APP_BACKEND_URL} RUN npm run build -# build go -FROM golang:1.21.6 +# Build Go backend +FROM golang:1.24-alpine AS build_go_stage -WORKDIR /home/seek-tune +RUN apk add --no-cache git ca-certificates tzdata gcc musl-dev + +WORKDIR /app/server COPY server/go.mod server/go.sum ./ -RUN go mod download +RUN go mod download && go mod verify COPY server/ ./ -ENV ENV=production +RUN go build -ldflags="-w -s" -o seek-tune + +# Final runtime image +FROM alpine:latest + +# Install runtime dependencies +RUN apk add --no-cache \ + ca-certificates \ + tzdata \ + ffmpeg \ + python3 \ + py3-pip \ + && pip3 install --no-cache-dir yt-dlp --break-system-packages + +WORKDIR /app + +COPY --from=build_go_stage /app/server/seek-tune . RUN mkdir -p static -COPY --from=build_react_stage /home/react/build static +COPY --from=build_react_stage /app/client/build ./static -RUN go build -o seek-tune +RUN mkdir -p db songs recordings snippets tmp && \ + chmod -R 755 db songs recordings snippets tmp + +ENV ENV=production EXPOSE 5000 -CMD [ "/home/seek-tune/seek-tune", "serve" ] \ No newline at end of file +HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \ + CMD wget --no-verbose --tries=1 --spider http://localhost:5000/ || exit 1 + +# Run as non-root user for security +RUN addgroup -g 1001 -S appuser && \ + adduser -u 1001 -S appuser -G appuser && \ + chown -R appuser:appuser /app + +USER appuser + +CMD ["./seek-tune", "serve", "http", "5000"] \ No newline at end of file diff --git a/README.md b/README.md index 3c66eb4..d3cabf4 100644 --- a/README.md +++ b/README.md @@ -8,21 +8,17 @@

-

Demo in Video

+

Demo in Video | How it was made (YouTube)

## Description 🎼 SeekTune is an implementation of Shazam's song recognition algorithm based on insights from these [resources](#resources--card_file_box). It integrates Spotify and YouTube APIs to find and download songs. -[//]: # (## Current Limitations -While the algorithm works excellently in matching a song with its exact file, it doesn't always find the right match from a recording. However, this project is still a work in progress. I'm hopeful about making it work, but I could definitely use some help :slightly_smiling_face:. -Additionally, it currently only supports song files in WAV format. -) - ## Installation :desktop_computer: ### Prerequisites - Golang: [Install Golang](https://golang.org/dl/) - FFmpeg: [Install FFmpeg](https://ffmpeg.org/download.html) -- NPM: To run the client (frontend). +- NPM: [Install Node](https://nodejs.org/en/download) +- YT-DLP: [Install YT-DLP](https://github.com/yt-dlp/yt-dlp/wiki/Installation) ### Steps 📦 Clone the repository: @@ -42,27 +38,17 @@ Prerequisites: [Docker](https://docs.docker.com/get-docker/) and [Docker Compose docker-compose down ``` -#### 🎧 Spotify API +#### 🎧 Spotify API Setup -To access Spotify metadata, the project now uses the official [Spotify Web API](https://developer.spotify.com/documentation/web-api/). This requires creating a developer application and retrieving a client ID and client secret. +1. Get credentials: Follow the [official getting started guide](https://developer.spotify.com/documentation/web-api/tutorials/getting-started) to create a Spotify app and obtain your **Client ID** and **Client Secret**. -Follow the [official getting started guide](https://developer.spotify.com/documentation/web-api/tutorials/getting-started#request-an-access-token) to: +2. Configure: Create a `.env` file in the `server` directory: + ```bash + SPOTIFY_CLIENT_ID=your-client-id + SPOTIFY_CLIENT_SECRET=your-client-secret + ``` -1. Create a Spotify developer app. -2. Copy your **Client ID** and **Client Secret**. - -##### Setting up Credentials -Instead of using a credentials.json file, the application now reads these values from environment variables. - -Create a .env file in the server directory with the following content: - -``` -SPOTIFY_CLIENT_ID=your-client-id -SPOTIFY_CLIENT_SECRET=your-client-secret -``` - -Make sure this .env file is loaded into your environment before running the server. -The application will automatically read this file to fetch and cache access tokens. If the token is expired or missing, a new one will be requested. +The app will automatically fetch and cache access tokens as needed. #### 💻 Set Up Natively Install dependencies for the backend @@ -109,7 +95,12 @@ go run *.go find ``` #### ▸ Delete fingerprints and songs 🗑️ ``` +# Delete only database (default) go run *.go erase +go run *.go erase db + +# Delete both database and song files +go run *.go erase all ``` ## Example :film_projector: diff --git a/client/.env.example b/client/.env.example new file mode 100644 index 0000000..4c9fd18 --- /dev/null +++ b/client/.env.example @@ -0,0 +1 @@ +REACT_APP_BACKEND_URL=http://localhost:5000 \ No newline at end of file diff --git a/client/public/fingerprint.wasm b/client/public/fingerprint.wasm new file mode 100755 index 0000000..e3bae6e Binary files /dev/null and b/client/public/fingerprint.wasm differ diff --git a/client/public/main.wasm b/client/public/main.wasm deleted file mode 100755 index 34ccaac..0000000 Binary files a/client/public/main.wasm and /dev/null differ diff --git a/client/src/App.js b/client/src/App.js index cfa0e1f..52caa02 100644 --- a/client/src/App.js +++ b/client/src/App.js @@ -15,7 +15,8 @@ import { fetchFile } from '@ffmpeg/util'; import AnimatedNumber from "./components/AnimatedNumber"; -const server = process.env.REACT_APP_BACKEND_URL || "http://localhost:5000"; +const server = process.env.REACT_APP_BACKEND_URL || "http://localhost:5500"; +const recordStereo = process.env.REACT_APP_RECORD_STEREO === "true" || false; // https://seek-tune-rq4gn.ondigitalocean.app/ var socket = io(server); @@ -91,7 +92,7 @@ function App() { try { const go = new window.Go(); const result = await WebAssembly.instantiateStreaming( - fetch("/main.wasm"), + fetch("/fingerprint.wasm"), go.importObject ); go.run(result.instance); @@ -175,15 +176,15 @@ function App() { cleanUp(); const inputFile = 'input.wav'; - const outputFile = 'output_mono.wav'; + const outputFile = 'output_formatted.wav'; - // Convert audio to mono with a sample rate of 44100 Hz await ffmpeg.writeFile(inputFile, await fetchFile(blob)) const exitCode = await ffmpeg.exec([ '-i', inputFile, '-c', 'pcm_s16le', '-ar', '44100', - '-ac', '1', + '-ac', recordStereo ? '2' : '1', + '-acodec', 'pcm_s16le', '-f', 'wav', outputFile ]); @@ -191,11 +192,11 @@ function App() { throw new Error(`FFmpeg exec failed with exit code: ${exitCode}`); } - const monoData = await ffmpeg.readFile(outputFile); - const monoBlob = new Blob([monoData.buffer], { type: 'audio/wav' }); + const audioData = await ffmpeg.readFile(outputFile); + const audioBlob = new Blob([audioData.buffer], { type: 'audio/wav' }); const reader = new FileReader(); - reader.readAsArrayBuffer(monoBlob); + reader.readAsArrayBuffer(audioBlob); reader.onload = async (event) => { const arrayBuffer = event.target.result; const audioContext = new AudioContext(); @@ -205,7 +206,11 @@ function App() { const audioData = audioBufferDecoded.getChannelData(0); const audioArray = Array.from(audioData); - const result = genFingerprint(audioArray, audioBufferDecoded.sampleRate); + const result = genFingerprint( + audioArray, + audioBufferDecoded.sampleRate, + audioBufferDecoded.numberOfChannels + ); if (result.error !== 0) { toast["error"](() =>
An error occured
) console.log("An error occured: ", result) @@ -288,7 +293,7 @@ function App() { return (
-

!Shazam

+

SeekTune

 Songs diff --git a/docker-compose.yml b/docker-compose.yml index 9601c8f..5dc779a 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,15 +1,21 @@ -version: '3.1' +version: '3.8' volumes: seek-tune-db: + driver: local seek-tune-songs: + driver: local + seek-tune-recordings: + driver: local services: seek-tune: - image: 'seek-tune' + image: seek-tune:latest + container_name: seek-tune-app restart: unless-stopped + ports: - - 8080:5000 + - "${HOST_PORT:-8080}:5000" environment: DB_TYPE: ${DB_TYPE:-sqlite} @@ -18,14 +24,67 @@ services: DB_NAME: ${DB_NAME:-seek_tune_db} DB_HOST: ${DB_HOST:-localhost} DB_PORT: ${DB_PORT:-27017} - + + ENV: production REACT_APP_BACKEND_URL: ${REACT_APP_BACKEND_URL:-http://localhost:8080} + + SPOTIFY_CLIENT_ID: ${SPOTIFY_CLIENT_ID:-} + SPOTIFY_CLIENT_SECRET: ${SPOTIFY_CLIENT_SECRET:-} build: context: . + dockerfile: Dockerfile args: REACT_APP_BACKEND_URL: ${REACT_APP_BACKEND_URL:-http://localhost:8080} + cache_from: + - seek-tune:latest volumes: - - seek-tune-db:/home/seek-tune/db - - seek-tune-songs:/home/seek-tune/songs \ No newline at end of file + - seek-tune-db:/app/db + - seek-tune-songs:/app/songs + - seek-tune-recordings:/app/recordings + # Optional: Mount local songs directory for development + # - ./server/songs:/app/songs + + # Resource limits (adjust based on your needs) + deploy: + resources: + limits: + cpus: '2.0' + memory: 2G + reservations: + cpus: '0.5' + memory: 512M + + healthcheck: + test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:5000/"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 10s + + logging: + driver: "json-file" + options: + max-size: "10m" + max-file: "3" + + # Optional: MongoDB service (if using MongoDB instead of SQLite) + # mongodb: + # image: mongo:7 + # container_name: seek-tune-mongo + # restart: unless-stopped + # environment: + # MONGO_INITDB_ROOT_USERNAME: ${DB_USER:-root} + # MONGO_INITDB_ROOT_PASSWORD: ${DB_PASSWORD:-password} + # MONGO_INITDB_DATABASE: ${DB_NAME:-seek_tune_db} + # ports: + # - "27017:27017" + # volumes: + # - seek-tune-db:/data/db + # healthcheck: + # test: echo 'db.runCommand("ping").ok' | mongosh localhost:27017/test --quiet + # interval: 10s + # timeout: 10s + # retries: 5 + # start_period: 40s diff --git a/server/.env.example b/server/.env.example new file mode 100644 index 0000000..8bbe19f --- /dev/null +++ b/server/.env.example @@ -0,0 +1,14 @@ +DB_TYPE=mongo # or sqlite +DB_USER=user +DB_PASS=password +DB_NAME=seek-tune +DB_HOST=192.168.0.1 +DB_PORT=27017 + +# Set to true to enable stereo fingerprinting (uses more storage but may improve accuracy) +FINGERPRINT_STEREO=false + +SPOTIFY_CLIENT_ID=yourclientid +SPOTIFY_CLIENT_SECRET=yoursecret + + diff --git a/server/cmdHandlers.go b/server/cmdHandlers.go index b04ede5..82986fd 100644 --- a/server/cmdHandlers.go +++ b/server/cmdHandlers.go @@ -10,6 +10,7 @@ import ( "net/http" "os" "path/filepath" + "runtime" "song-recognition/db" "song-recognition/shazam" "song-recognition/spotify" @@ -34,19 +35,24 @@ const ( var yellow = color.New(color.FgYellow) func find(filePath string) { - wavInfo, err := wav.ReadWavInfo(filePath) + wavFilePath, err := wav.ConvertToWAV(filePath) if err != nil { - yellow.Println("Error reading wave info:", err) + yellow.Println("Error converting to WAV:", err) return } - samples, err := wav.WavBytesToSamples(wavInfo.Data) + fingerprint, err := shazam.FingerprintAudio(wavFilePath, utils.GenerateUniqueID()) if err != nil { - yellow.Println("Error converting to samples:", err) + yellow.Println("Error generating fingerprint for sample: ", err) return } - matches, searchDuration, err := shazam.FindMatches(samples, wavInfo.Duration, wavInfo.SampleRate) + sampleFingerprint := make(map[uint32]uint32) + for address, couple := range fingerprint { + sampleFingerprint[address] = couple.AnchorTimeMs + } + + matches, searchDuration, err := shazam.FindMatchesFGP(sampleFingerprint) if err != nil { yellow.Println("Error finding matches:", err) return @@ -193,7 +199,7 @@ func serveHTTP(socketServer *socketio.Server, serveHTTPS bool, port string) { } } -func erase(songsDir string) { +func erase(songsDir string, dbOnly bool, all bool) { logger := utils.GetLogger() ctx := context.Background() @@ -216,26 +222,31 @@ func erase(songsDir string) { logger.ErrorContext(ctx, msg, slog.Any("error", err)) } - // delete song files - err = filepath.Walk(songsDir, func(path string, info os.FileInfo, err error) error { - if err != nil { - return err - } + fmt.Println("Database cleared") - if !info.IsDir() { - ext := filepath.Ext(path) - if ext == ".wav" || ext == ".m4a" { - err := os.Remove(path) - if err != nil { - return err + // delete song files only if -all flag is set + if all { + err = filepath.Walk(songsDir, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + + if !info.IsDir() { + ext := filepath.Ext(path) + if ext == ".wav" || ext == ".m4a" { + err := os.Remove(path) + if err != nil { + return err + } } } + return nil + }) + if err != nil { + msg := fmt.Sprintf("Error walking through directory %s: %v\n", songsDir, err) + logger.ErrorContext(ctx, msg, slog.Any("error", err)) } - return nil - }) - if err != nil { - msg := fmt.Sprintf("Error walking through directory %s: %v\n", songsDir, err) - logger.ErrorContext(ctx, msg, slog.Any("error", err)) + fmt.Println("Songs folder cleared") } fmt.Println("Erase complete") @@ -249,6 +260,7 @@ func save(path string, force bool) { } if fileInfo.IsDir() { + var filePaths []string err := filepath.Walk(path, func(filePath string, info os.FileInfo, err error) error { if err != nil { fmt.Printf("Error walking the path %v: %v\n", filePath, err) @@ -256,16 +268,16 @@ func save(path string, force bool) { } // Process only files, skip directories if !info.IsDir() { - err := saveSong(filePath, force) - if err != nil { - fmt.Printf("Error saving song (%v): %v\n", filePath, err) - } + filePaths = append(filePaths, filePath) } return nil }) if err != nil { fmt.Printf("Error walking the directory %v: %v\n", path, err) + return } + + processFilesConCurrently(filePaths, force) } else { err := saveSong(path, force) if err != nil { @@ -274,6 +286,50 @@ func save(path string, force bool) { } } +func processFilesConCurrently(filePaths []string, force bool) { + maxWorkers := runtime.NumCPU() / 2 + numFiles := len(filePaths) + + if numFiles == 0 { + return + } + + if numFiles < maxWorkers { + maxWorkers = numFiles + } + + jobs := make(chan string, numFiles) + results := make(chan error, numFiles) + + for w := 0; w < maxWorkers; w++ { + go func(workerID int) { + for filePath := range jobs { + err := saveSong(filePath, force) + results <- err + } + }(w + 1) + } + + for _, filePath := range filePaths { + jobs <- filePath + } + close(jobs) + + successCount := 0 + errorCount := 0 + for i := 0; i < numFiles; i++ { + err := <-results + if err != nil { + fmt.Printf("Error: %v\n", err) + errorCount++ + } else { + successCount++ + } + } + + fmt.Printf("\n ->> Processed %d files: %d successful, %d failed\n", numFiles, successCount, errorCount) +} + func saveSong(filePath string, force bool) error { metadata, err := wav.GetMetadata(filePath) if err != nil { diff --git a/server/main.go b/server/main.go index 5fba59c..ab95842 100644 --- a/server/main.go +++ b/server/main.go @@ -8,8 +8,8 @@ import ( "os" "song-recognition/utils" - "github.com/mdobak/go-xerrors" "github.com/joho/godotenv" + "github.com/mdobak/go-xerrors" ) func main() { @@ -32,10 +32,16 @@ func main() { if len(os.Args) < 2 { fmt.Println("Expected 'find', 'download', 'erase', 'save', or 'serve' subcommands") + fmt.Println("\nUsage examples:") + fmt.Println(" find ") + fmt.Println(" download ") + fmt.Println(" erase [db | all] (default: db)") + fmt.Println(" save [-f|--force] ") + fmt.Println(" serve [-proto ] [-p ]") os.Exit(1) } _ = godotenv.Load() - + switch os.Args[1] { case "find": if len(os.Args) < 3 { @@ -58,7 +64,28 @@ func main() { serveCmd.Parse(os.Args[2:]) serve(*protocol, *port) case "erase": - erase(SONGS_DIR) + // Default is to clear only database (db mode) + dbOnly := true + all := false + + if len(os.Args) > 2 { + subCmd := os.Args[2] + switch subCmd { + case "db": + dbOnly = true + all = false + case "all": + dbOnly = false + all = true + default: + fmt.Println("Usage: main.go erase [db | all]") + fmt.Println(" db : only clear the database (default)") + fmt.Println(" all : clear database and songs folder") + os.Exit(1) + } + } + + erase(SONGS_DIR, dbOnly, all) case "save": indexCmd := flag.NewFlagSet("save", flag.ExitOnError) force := indexCmd.Bool("force", false, "save song with or without YouTube ID") @@ -72,6 +99,12 @@ func main() { save(filePath, *force) default: fmt.Println("Expected 'find', 'download', 'erase', 'save', or 'serve' subcommands") + fmt.Println("\nUsage examples:") + fmt.Println(" find ") + fmt.Println(" download ") + fmt.Println(" erase [db | all] (default: db)") + fmt.Println(" save [-f|--force] ") + fmt.Println(" serve [-proto ] [-p ]") os.Exit(1) } } diff --git a/server/shazam/fingerprint.go b/server/shazam/fingerprint.go index a29398b..3768358 100644 --- a/server/shazam/fingerprint.go +++ b/server/shazam/fingerprint.go @@ -1,7 +1,10 @@ package shazam import ( + "fmt" "song-recognition/models" + "song-recognition/utils" + "song-recognition/wav" ) const ( @@ -23,7 +26,10 @@ func Fingerprint(peaks []Peak, songID uint32) map[uint32]models.Couple { address := createAddress(anchor, target) anchorTimeMs := uint32(anchor.Time * 1000) - fingerprints[address] = models.Couple{anchorTimeMs, songID} + fingerprints[address] = models.Couple{ + AnchorTimeMs: anchorTimeMs, + SongID: songID, + } } } @@ -35,12 +41,52 @@ func Fingerprint(peaks []Peak, songID uint32) map[uint32]models.Couple { // the anchor and target points, and other bits represent the time difference (delta time) // between them. This function combines these components into a single address (a hash). func createAddress(anchor, target Peak) uint32 { - anchorFreq := int(real(anchor.Freq)) - targetFreq := int(real(target.Freq)) - deltaMs := uint32((target.Time - anchor.Time) * 1000) + anchorFreqBin := uint32(anchor.Freq / 10) // Scale down to fit in 9 bits + targetFreqBin := uint32(target.Freq / 10) - // Combine the frequency of the anchor, target, and delta time into a 32-bit address - address := uint32(anchorFreq<<23) | uint32(targetFreq<<14) | deltaMs + deltaMsRaw := uint32((target.Time - anchor.Time) * 1000) + + // Mask to fit within bit constraints + anchorFreqBits := anchorFreqBin & ((1 << maxFreqBits) - 1) // 9 bits + targetFreqBits := targetFreqBin & ((1 << maxFreqBits) - 1) // 9 bits + deltaBits := deltaMsRaw & ((1 << maxDeltaBits) - 1) // 14 bits (max ~16 seconds) + + // Combine into 32-bit address + address := (anchorFreqBits << 23) | (targetFreqBits << 14) | deltaBits return address } + +func FingerprintAudio(songFilePath string, songID uint32) (map[uint32]models.Couple, error) { + wavFilePath, err := wav.ConvertToWAV(songFilePath) + if err != nil { + return nil, fmt.Errorf("error converting input file to WAV: %v", err) + } + + wavInfo, err := wav.ReadWavInfo(wavFilePath) + if err != nil { + return nil, fmt.Errorf("error reading WAV info: %v", err) + } + + fingerprint := make(map[uint32]models.Couple) + + spectro, err := Spectrogram(wavInfo.LeftChannelSamples, wavInfo.SampleRate) + if err != nil { + return nil, fmt.Errorf("error creating spectrogram: %v", err) + } + + peaks := ExtractPeaks(spectro, wavInfo.Duration, wavInfo.SampleRate) + utils.ExtendMap(fingerprint, Fingerprint(peaks, songID)) + + if wavInfo.Channels == 2 { + spectro, err = Spectrogram(wavInfo.RightChannelSamples, wavInfo.SampleRate) + if err != nil { + return nil, fmt.Errorf("error creating spectrogram for right channel: %v", err) + } + + peaks = ExtractPeaks(spectro, wavInfo.Duration, wavInfo.SampleRate) + utils.ExtendMap(fingerprint, Fingerprint(peaks, songID)) + } + + return fingerprint, nil +} diff --git a/server/shazam/shazam.go b/server/shazam/shazam.go index 0683019..fde0f0c 100644 --- a/server/shazam/shazam.go +++ b/server/shazam/shazam.go @@ -5,7 +5,6 @@ package shazam import ( "fmt" - "math" "song-recognition/db" "song-recognition/utils" "sort" @@ -30,7 +29,8 @@ func FindMatches(audioSample []float64, audioDuration float64, sampleRate int) ( return nil, time.Since(startTime), fmt.Errorf("failed to get spectrogram of samples: %v", err) } - peaks := ExtractPeaks(spectrogram, audioDuration) + peaks := ExtractPeaks(spectrogram, audioDuration, sampleRate) + // peaks := ExtractPeaksLMX(spectrogram, true) sampleFingerprint := Fingerprint(peaks, utils.GenerateUniqueID()) sampleFingerprintMap := make(map[uint32]uint32) @@ -38,7 +38,7 @@ func FindMatches(audioSample []float64, audioDuration float64, sampleRate int) ( sampleFingerprintMap[address] = couple.AnchorTimeMs } - matches, _, err := FindMatchesFGP(sampleFingerprintMap) + matches, _, _ := FindMatchesFGP(sampleFingerprintMap) return matches, time.Since(startTime), nil } @@ -142,21 +142,32 @@ func filterMatches( } // analyzeRelativeTiming calculates a score for each song based on the -// relative timing between the song and the sample's anchor times. +// consistency of time offsets between the sample and database. func analyzeRelativeTiming(matches map[uint32][][2]uint32) map[uint32]float64 { scores := make(map[uint32]float64) + for songID, times := range matches { - count := 0 - for i := 0; i < len(times); i++ { - for j := i + 1; j < len(times); j++ { - sampleDiff := math.Abs(float64(times[i][0] - times[j][0])) - dbDiff := math.Abs(float64(times[i][1] - times[j][1])) - if math.Abs(sampleDiff-dbDiff) < 100 { // Allow some tolerance - count++ - } + offsetCounts := make(map[int32]int) + + for _, timePair := range times { + sampleTime := int32(timePair[0]) + dbTime := int32(timePair[1]) + offset := dbTime - sampleTime + + // Bin offsets in 100ms buckets to allow for small timing variations + offsetBucket := offset / 100 + offsetCounts[offsetBucket]++ + } + + maxCount := 0 + for _, count := range offsetCounts { + if count > maxCount { + maxCount = count } } - scores[songID] = float64(count) + + scores[songID] = float64(maxCount) } + return scores } diff --git a/server/shazam/spectrogram.go b/server/shazam/spectrogram.go index 2488019..bb7c9d5 100644 --- a/server/shazam/spectrogram.go +++ b/server/shazam/spectrogram.go @@ -8,13 +8,14 @@ import ( ) const ( - dspRatio = 4 - freqBinSize = 1024 - maxFreq = 5000.0 // 5kHz - hopSize = freqBinSize / 32 + dspRatio = 4 + windowSize = 1024 + maxFreq = 5000.0 // 5kHz + hopSize = windowSize / 2 // 50% overlap for better time-frequency resolution + windowType = "hanning" // choices: "hanning" or "hamming" ) -func Spectrogram(sample []float64, sampleRate int) ([][]complex128, error) { +func Spectrogram(sample []float64, sampleRate int) ([][]float64, error) { filteredSample := LowPassFilter(maxFreq, float64(sampleRate), sample) downsampledSample, err := Downsample(filteredSample, sampleRate, sampleRate/dspRatio) @@ -22,31 +23,42 @@ func Spectrogram(sample []float64, sampleRate int) ([][]complex128, error) { return nil, fmt.Errorf("couldn't downsample audio sample: %v", err) } - numOfWindows := len(downsampledSample) / (freqBinSize - hopSize) - spectrogram := make([][]complex128, numOfWindows) - - window := make([]float64, freqBinSize) + window := make([]float64, windowSize) for i := range window { - window[i] = 0.54 - 0.46*math.Cos(2*math.Pi*float64(i)/(float64(freqBinSize)-1)) + theta := 2 * math.Pi * float64(i) / float64(windowSize-1) + switch windowType { + case "hamming": + window[i] = 0.54 - 0.46*math.Cos(theta) + default: // Hanning window + window[i] = 0.5 - 0.5*math.Cos(theta) + } } + // Initialize spectrogram slice + spectrogram := make([][]float64, 0) + // Perform STFT - for i := 0; i < numOfWindows; i++ { - start := i * hopSize - end := start + freqBinSize - if end > len(downsampledSample) { - end = len(downsampledSample) - } + for start := 0; start+windowSize <= len(downsampledSample); start += hopSize { + end := start + windowSize - bin := make([]float64, freqBinSize) - copy(bin, downsampledSample[start:end]) + frame := make([]float64, windowSize) + copy(frame, downsampledSample[start:end]) - // Apply Hamming window + // Apply window for j := range window { - bin[j] *= window[j] + frame[j] *= window[j] } - spectrogram[i] = FFT(bin) + // Perform FFT + fftResult := FFT(frame) + + // Convert complex spectrum to magnitude spectrum + magnitude := make([]float64, len(fftResult)/2) + for j := range magnitude { + magnitude[j] = cmplx.Abs(fftResult[j]) + } + + spectrogram = append(spectrogram, magnitude) } return spectrogram, nil @@ -107,43 +119,47 @@ func Downsample(input []float64, originalSampleRate, targetSampleRate int) ([]fl return resampled, nil } +// Peak represents a significant point in the spectrogram. type Peak struct { - Time float64 - Freq complex128 + Freq float64 // Frequency in Hz + Time float64 // Time in seconds } // ExtractPeaks analyzes a spectrogram and extracts significant peaks in the frequency domain over time. -func ExtractPeaks(spectrogram [][]complex128, audioDuration float64) []Peak { +func ExtractPeaks(spectrogram [][]float64, audioDuration float64, sampleRate int) []Peak { if len(spectrogram) < 1 { return []Peak{} } type maxies struct { maxMag float64 - maxFreq complex128 freqIdx int } - bands := []struct{ min, max int }{{0, 10}, {10, 20}, {20, 40}, {40, 80}, {80, 160}, {160, 512}} + bands := []struct{ min, max int }{ + {0, 10}, {10, 20}, {20, 40}, {40, 80}, {80, 160}, {160, 512}, + } var peaks []Peak - binDuration := audioDuration / float64(len(spectrogram)) + frameDuration := audioDuration / float64(len(spectrogram)) - for binIdx, bin := range spectrogram { + // Calculate frequency resolution (Hz per bin) + effectiveSampleRate := float64(sampleRate) / float64(dspRatio) + freqResolution := effectiveSampleRate / float64(windowSize) + + for frameIdx, frame := range spectrogram { var maxMags []float64 - var maxFreqs []complex128 - var freqIndices []float64 + var freqIndices []int binBandMaxies := []maxies{} for _, band := range bands { var maxx maxies var maxMag float64 - for idx, freq := range bin[band.min:band.max] { - magnitude := cmplx.Abs(freq) - if magnitude > maxMag { - maxMag = magnitude + for idx, mag := range frame[band.min:band.max] { + if mag > maxMag { + maxMag = mag freqIdx := band.min + idx - maxx = maxies{magnitude, freq, freqIdx} + maxx = maxies{mag, freqIdx} } } binBandMaxies = append(binBandMaxies, maxx) @@ -151,8 +167,7 @@ func ExtractPeaks(spectrogram [][]complex128, audioDuration float64) []Peak { for _, value := range binBandMaxies { maxMags = append(maxMags, value.maxMag) - maxFreqs = append(maxFreqs, value.maxFreq) - freqIndices = append(freqIndices, float64(value.freqIdx)) + freqIndices = append(freqIndices, value.freqIdx) } // Calculate the average magnitude @@ -160,17 +175,15 @@ func ExtractPeaks(spectrogram [][]complex128, audioDuration float64) []Peak { for _, max := range maxMags { maxMagsSum += max } - avg := maxMagsSum / float64(len(maxFreqs)) // * coefficient + avg := maxMagsSum / float64(len(maxMags)) // Add peaks that exceed the average magnitude for i, value := range maxMags { if value > avg { - peakTimeInBin := freqIndices[i] * binDuration / float64(len(bin)) + peakTime := float64(frameIdx) * frameDuration + peakFreq := float64(freqIndices[i]) * freqResolution - // Calculate the absolute time of the peak - peakTime := float64(binIdx)*binDuration + peakTimeInBin - - peaks = append(peaks, Peak{Time: peakTime, Freq: maxFreqs[i]}) + peaks = append(peaks, Peak{Time: peakTime, Freq: peakFreq}) } } } diff --git a/server/spotify/downloader.go b/server/spotify/downloader.go index b9a95fb..7a8eeb2 100644 --- a/server/spotify/downloader.go +++ b/server/spotify/downloader.go @@ -2,9 +2,7 @@ package spotify import ( "context" - "errors" "fmt" - "io" "log/slog" "os" "os/exec" @@ -13,17 +11,15 @@ import ( "song-recognition/db" "song-recognition/shazam" "song-recognition/utils" - "song-recognition/wav" "strings" "sync" "time" "github.com/fatih/color" - "github.com/kkdai/youtube/v2" "github.com/mdobak/go-xerrors" ) -const DELETE_SONG_FILE = false +const DELETE_SONG_FILE = false // Set true to delete the song file after fingerprinting var yellow = color.New(color.FgYellow) @@ -135,9 +131,9 @@ func dlTrack(tracks []Track, path string) (int, error) { trackCopy.Title, trackCopy.Artist = correctFilename(trackCopy.Title, trackCopy.Artist) fileName := fmt.Sprintf("%s - %s", trackCopy.Title, trackCopy.Artist) - filePath := filepath.Join(path, fileName+".m4a") + filePath := filepath.Join(path, fileName) - err = downloadYTaudio(ytID, path, filePath) + filePath, err = downloadYTaudio(ytID, filePath) if err != nil { logMessage := fmt.Sprintf("'%s' by '%s' could not be downloaded", trackCopy.Title, trackCopy.Artist) logger.ErrorContext(ctx, logMessage, slog.Any("error", xerrors.New(err))) @@ -151,12 +147,10 @@ func dlTrack(tracks []Track, path string) (int, error) { return } - utils.DeleteFile(filepath.Join(path, fileName+".m4a")) - wavFilePath := filepath.Join(path, fileName+".wav") if err := addTags(wavFilePath, *trackCopy); err != nil { - logMessage := fmt.Sprintf("Error adding tags: %s", filePath+".wav") + logMessage := fmt.Sprintf("Error adding tags: %s", wavFilePath) logger.ErrorContext(ctx, logMessage, slog.Any("error", xerrors.New(err))) return @@ -186,65 +180,6 @@ func dlTrack(tracks []Track, path string) (int, error) { } -/* github.com/kkdai/youtube */ -func downloadYTaudio(id, path, filePath string) error { - logger := utils.GetLogger() - dir, err := os.Stat(path) - if err != nil { - logger.Error("Error accessing path", slog.Any("error", err)) - return err - } - - if !dir.IsDir() { - err := errors.New("the path is not valid (not a dir)") - logger.Error("Invalid directory path", slog.Any("error", err)) - return err - } - - client := youtube.Client{} - video, err := client.GetVideo(id) - if err != nil { - logger.Error("Error getting YouTube video", slog.Any("error", err)) - return err - } - - /* - itag code: 140, container: m4a, content: audio, bitrate: 128k - change the FindByItag parameter to 139 if you want smaller files (but with a bitrate of 48k) - https://gist.github.com/sidneys/7095afe4da4ae58694d128b1034e01e2 - */ - formats := video.Formats.Itag(140) - - /* in some cases, when attempting to download the audio - using the library github.com/kkdai/youtube, - the download fails (and shows the file size as 0 bytes) - until the second or third attempt. */ - var fileSize int64 - file, err := os.Create(filePath) - if err != nil { - logger.Error("Error creating file", slog.Any("error", err)) - return err - } - - for fileSize == 0 { - stream, _, err := client.GetStream(video, &formats[0]) - if err != nil { - logger.Error("Error getting stream", slog.Any("error", err)) - return err - } - - if _, err = io.Copy(file, stream); err != nil { - logger.Error("Error copying stream to file", slog.Any("error", err)) - return err - } - - fileSize, _ = GetFileSize(filePath) - } - defer file.Close() - - return nil -} - func addTags(file string, track Track) error { logger := utils.GetLogger() // Create a temporary file name by appending "2" before the extension @@ -255,7 +190,7 @@ func addTags(file string, track Track) error { tempFile = baseName + "2" + ".wav" // Temporary filename ('/path/to/title - artist2.wav') } - // Execute FFmpeg command to add metadata tags + // FFmpeg command to add metadata tags cmd := exec.Command( "ffmpeg", "-i", file, // Input file path @@ -291,40 +226,20 @@ func ProcessAndSaveSong(songFilePath, songTitle, songArtist, ytID string) error } defer dbclient.Close() - wavFilePath, err := wav.ConvertToWAV(songFilePath, 1) - if err != nil { - logger.Error("Failed to convert to WAV", slog.Any("error", err)) - return err - } - - wavInfo, err := wav.ReadWavInfo(wavFilePath) - if err != nil { - logger.Error("Failed to read WAV info", slog.Any("error", err)) - return err - } - - samples, err := wav.WavBytesToSamples(wavInfo.Data) - if err != nil { - logger.Error("Error converting WAV bytes to samples", slog.Any("error", err)) - return fmt.Errorf("error converting wav bytes to float64: %v", err) - } - - spectro, err := shazam.Spectrogram(samples, wavInfo.SampleRate) - if err != nil { - logger.Error("Error creating spectrogram", slog.Any("error", err)) - return fmt.Errorf("error creating spectrogram: %v", err) - } - songID, err := dbclient.RegisterSong(songTitle, songArtist, ytID) if err != nil { logger.Error("Failed to register song", slog.Any("error", err)) - return err + return fmt.Errorf("error registering song '%s' by '%s': %v", songTitle, songArtist, err) } - peaks := shazam.ExtractPeaks(spectro, wavInfo.Duration) - fingerprints := shazam.Fingerprint(peaks, songID) + fingerprint, err := shazam.FingerprintAudio(songFilePath, songID) + if err != nil { + dbclient.DeleteSongByID(songID) + logger.Error("Failed to create fingerprint", slog.String("wavFilePath", songFilePath)) + return fmt.Errorf("error generating fingerprint for %s by %s", songTitle, songArtist) + } - err = dbclient.StoreFingerprints(fingerprints) + err = dbclient.StoreFingerprints(fingerprint) if err != nil { dbclient.DeleteSongByID(songID) logger.Error("Failed to store fingerprints", slog.Any("error", err)) diff --git a/server/spotify/youtube.go b/server/spotify/youtube.go index 51d5d04..e18ced1 100644 --- a/server/spotify/youtube.go +++ b/server/spotify/youtube.go @@ -4,6 +4,11 @@ import ( "context" "fmt" "log" + "log/slog" + "os" + "os/exec" + "path/filepath" + "song-recognition/utils" "errors" "io" @@ -215,3 +220,37 @@ func ytSearch(searchTerm string, limit int) (results []*SearchResult, err error) return results, nil } + +// downloadYTaudio downloads audio from a YouTube video using yt-dlp command line tool. +func downloadYTaudio(videoURL, outputFilePath string) (string, error) { + logger := utils.GetLogger() + + dir := filepath.Dir(outputFilePath) + if stat, err := os.Stat(dir); err != nil || !stat.IsDir() { + logger.Error("Invalid directory for output file", slog.Any("error", err)) + return "", errors.New("output directory does not exist or is not a directory") + } + + _, err := exec.LookPath("yt-dlp") + if err != nil { + logger.Error("yt-dlp not found in PATH", slog.Any("error", err)) + return "", errors.New("yt-dlp is not installed or not in PATH") + } + + audioFmt := "wav" + cmd := exec.Command( + "yt-dlp", + "-f", "bestaudio", + "--extract-audio", + "--audio-format", audioFmt, + "-o", outputFilePath, + videoURL, + ) + + output, err := cmd.CombinedOutput() + if err != nil { + logger.Error("yt-dlp command failed", slog.String("output", string(output)), slog.Any("error", err)) + return "", err + } + return outputFilePath + "." + audioFmt, nil +} diff --git a/server/utils/utils.go b/server/utils/utils.go index b2b2fce..309e44f 100644 --- a/server/utils/utils.go +++ b/server/utils/utils.go @@ -27,3 +27,9 @@ func GetEnv(key string, fallback ...string) string { } return "" } + +func ExtendMap[K comparable, V any](dest, src map[K]V) { + for k, v := range src { + dest[k] = v + } +} diff --git a/server/wasm/wasm_main.go b/server/wasm/wasm_main.go deleted file mode 100644 index 2afb489..0000000 --- a/server/wasm/wasm_main.go +++ /dev/null @@ -1,64 +0,0 @@ -//go:build js && wasm -// +build js,wasm - -package main - -import ( - "song-recognition/shazam" - "song-recognition/utils" - "syscall/js" -) - -func generateFingerprint(this js.Value, args []js.Value) interface{} { - if len(args) < 2 { - return js.ValueOf(map[string]interface{}{ - "error": 1, - "data": "Expected audio array and sample rate", - }) - } - - if args[0].Type() != js.TypeObject || args[1].Type() != js.TypeNumber { - return js.ValueOf(map[string]interface{}{ - "error": 2, - "data": "Invalid argument types; Expected audio array and samplerate (type: int)", - }) - } - - inputArray := args[0] - sampleRate := args[1].Int() - - audioData := make([]float64, inputArray.Length()) - for i := 0; i < inputArray.Length(); i++ { - audioData[i] = inputArray.Index(i).Float() - } - - spectrogram, err := shazam.Spectrogram(audioData, sampleRate) - if err != nil { - return js.ValueOf(map[string]interface{}{ - "error": 3, - "data": "Error generating spectrogram: " + err.Error(), - }) - } - - peaks := shazam.ExtractPeaks(spectrogram, float64(len(audioData)/sampleRate)) - fingerprint := shazam.Fingerprint(peaks, utils.GenerateUniqueID()) - - fingerprintArray := []interface{}{} - for address, couple := range fingerprint { - entry := map[string]interface{}{ - "address": address, - "anchorTime": couple.AnchorTimeMs, - } - fingerprintArray = append(fingerprintArray, entry) - } - - return js.ValueOf(map[string]interface{}{ - "error": 0, - "data": fingerprintArray, - }) -} - -func main() { - js.Global().Set("generateFingerprint", js.FuncOf(generateFingerprint)) - select {} -} diff --git a/server/wav/convert.go b/server/wav/convert.go index 0d6dc20..4262d20 100644 --- a/server/wav/convert.go +++ b/server/wav/convert.go @@ -6,21 +6,33 @@ import ( "os/exec" "path/filepath" "song-recognition/utils" + "strconv" "strings" ) // ConvertToWAV converts an input audio file to WAV format with specified channels. -func ConvertToWAV(inputFilePath string, channels int) (wavFilePath string, err error) { +func ConvertToWAV(inputFilePath string) (wavFilePath string, err error) { _, err = os.Stat(inputFilePath) if err != nil { return "", fmt.Errorf("input file does not exist: %v", err) } - if channels < 1 || channels > 2 { - channels = 1 + to_stereoStr := utils.GetEnv("FINGERPRINT_STEREO", "false") + to_stereo, err := strconv.ParseBool(to_stereoStr) + if err != nil { + return "", fmt.Errorf("failed to convert env variable (%s) to bool: %v", "FINGERPRINT_STEREO", err) + } + + channels := 1 + if to_stereo { + channels = 2 } fileExt := filepath.Ext(inputFilePath) + if fileExt != ".wav" { + defer os.Remove(inputFilePath) + } + outputFile := strings.TrimSuffix(inputFilePath, fileExt) + ".wav" // Output file may already exists. If it does FFmpeg will fail as diff --git a/server/wav/wav.go b/server/wav/wav.go index 1b05186..9fb2e63 100644 --- a/server/wav/wav.go +++ b/server/wav/wav.go @@ -94,49 +94,83 @@ func WriteWavFile(filename string, data []byte, sampleRate int, channels int, bi return err } -// WavInfo defines a struct containing information extracted from the WAV header type WavInfo struct { - Channels int - SampleRate int - Data []byte - Duration float64 + Channels int + SampleRate int + Duration float64 + Data []byte + LeftChannelSamples []float64 + RightChannelSamples []float64 } +// ReadWavInfo reads a 16-bit PCM WAV file and returns its metadata and audio samples. +// Supports mono and stereo files. Note that it only supports 16-bit PCM format. func ReadWavInfo(filename string) (*WavInfo, error) { data, err := ioutil.ReadFile(filename) if err != nil { return nil, err } - if len(data) < 44 { return nil, errors.New("invalid WAV file size (too small)") } - // Read header chunks + // Parse PCM header to extract metadata + // https://en.wikipedia.org/wiki/WAV#WAV_file_header var header WavHeader - err = binary.Read(bytes.NewReader(data[:44]), binary.LittleEndian, &header) - if err != nil { + if err := binary.Read(bytes.NewReader(data[:44]), binary.LittleEndian, &header); err != nil { return nil, err } - - if string(header.ChunkID[:]) != "RIFF" || string(header.Format[:]) != "WAVE" || header.AudioFormat != 1 { + if string(header.ChunkID[:]) != "RIFF" || + string(header.Format[:]) != "WAVE" || + header.AudioFormat != 1 { return nil, errors.New("invalid WAV header format") } - // Extract information info := &WavInfo{ Channels: int(header.NumChannels), SampleRate: int(header.SampleRate), Data: data[44:], } - // Calculate audio duration (assuming data contains PCM data) - if header.BitsPerSample == 16 { - info.Duration = float64(len(info.Data)) / float64(int(header.NumChannels)*2*int(header.SampleRate)) - } else { - return nil, errors.New("unsupported bits per sample format") + if header.BitsPerSample != 16 { + return nil, errors.New("unsupported bits‑per‑sample (expect 16‑bit PCM)") } + sampleCount := len(info.Data) / 2 + int16Buf := make([]int16, sampleCount) + if err := binary.Read(bytes.NewReader(info.Data), binary.LittleEndian, int16Buf); err != nil { + return nil, err + } + + const scale = 1.0 / 32768.0 // 16‑bit normalisation factor + + switch header.NumChannels { + case 1: + left := make([]float64, sampleCount) + for i, s := range int16Buf { + left[i] = float64(s) * scale + } + info.LeftChannelSamples = left + + case 2: + frameCount := sampleCount / 2 + left := make([]float64, frameCount) + right := make([]float64, frameCount) + for i := 0; i < frameCount; i++ { + left[i] = float64(int16Buf[2*i]) * scale + right[i] = float64(int16Buf[2*i+1]) * scale + } + info.LeftChannelSamples = left + info.RightChannelSamples = right + + default: + return nil, errors.New("unsupported channel count (only mono/stereo)") + } + + // Compute audio duration in seconds + info.Duration = float64(sampleCount) / + (float64(header.NumChannels) * float64(header.SampleRate)) + return info, nil } diff --git a/wasm/build.sh b/wasm/build.sh new file mode 100755 index 0000000..ebb7d1c --- /dev/null +++ b/wasm/build.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +# Build script for WASM fingerprint generator + +echo "Building WASM module..." + +export GOOS=js +export GOARCH=wasm + +go build -o fingerprint.wasm wasm_main.go + +if [ $? -eq 0 ]; then + echo "✓ WASM build successful: fingerprint.wasm" + + cp fingerprint.wasm ../client/public/ + echo "✓ Copied fingerprint.wasm to client/public/" + +else + echo "x WASM build failed" + cd ../wasm + exit 1 +fi diff --git a/wasm/go.mod b/wasm/go.mod new file mode 100644 index 0000000..5cc2fe0 --- /dev/null +++ b/wasm/go.mod @@ -0,0 +1,25 @@ +module wasm-fingerprint + +go 1.23.0 + +toolchain go1.24.3 + +require song-recognition v0.0.0-00010101000000-000000000000 + +require ( + github.com/golang/snappy v0.0.4 // indirect + github.com/klauspost/compress v1.17.6 // indirect + github.com/mattn/go-sqlite3 v1.14.22 // indirect + github.com/mdobak/go-xerrors v0.3.1 // indirect + github.com/montanaflynn/stats v0.7.1 // indirect + github.com/xdg-go/pbkdf2 v1.0.0 // indirect + github.com/xdg-go/scram v1.1.2 // indirect + github.com/xdg-go/stringprep v1.0.4 // indirect + github.com/youmark/pkcs8 v0.0.0-20201027041543-1326539a0a0a // indirect + go.mongodb.org/mongo-driver v1.14.0 // indirect + golang.org/x/crypto v0.33.0 // indirect + golang.org/x/sync v0.11.0 // indirect + golang.org/x/text v0.22.0 // indirect +) + +replace song-recognition => ../server diff --git a/wasm/go.sum b/wasm/go.sum new file mode 100644 index 0000000..5e7ccba --- /dev/null +++ b/wasm/go.sum @@ -0,0 +1,59 @@ +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM= +github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= +github.com/google/go-cmp v0.5.2 h1:X2ev0eStA3AbceY54o37/0PQ/UWqKEiiO2dKL5OPaFM= +github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/klauspost/compress v1.17.6 h1:60eq2E/jlfwQXtvZEeBUYADs+BwKBWURIY+Gj2eRGjI= +github.com/klauspost/compress v1.17.6/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM= +github.com/mattn/go-sqlite3 v1.14.22 h1:2gZY6PC6kBnID23Tichd1K+Z0oS6nE/XwU+Vz/5o4kU= +github.com/mattn/go-sqlite3 v1.14.22/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y= +github.com/mdobak/go-xerrors v0.3.1 h1:XfqaLMNN5T4qsHSlLHGJ35f6YlDTVeINSYYeeuK4VpQ= +github.com/mdobak/go-xerrors v0.3.1/go.mod h1:nIR+HMAJuj/uNqyp5+MTN6PJ7ymuIJq3UVs9QCgAHbY= +github.com/montanaflynn/stats v0.7.1 h1:etflOAAHORrCC44V+aR6Ftzort912ZU+YLiSTuV8eaE= +github.com/montanaflynn/stats v0.7.1/go.mod h1:etXPPgVO6n31NxCd9KQUMvCM+ve0ruNzt6R8Bnaayow= +github.com/xdg-go/pbkdf2 v1.0.0 h1:Su7DPu48wXMwC3bs7MCNG+z4FhcyEuz5dlvchbq0B0c= +github.com/xdg-go/pbkdf2 v1.0.0/go.mod h1:jrpuAogTd400dnrH08LKmI/xc1MbPOebTwRqcT5RDeI= +github.com/xdg-go/scram v1.1.2 h1:FHX5I5B4i4hKRVRBCFRxq1iQRej7WO3hhBuJf+UUySY= +github.com/xdg-go/scram v1.1.2/go.mod h1:RT/sEzTbU5y00aCK8UOx6R7YryM0iF1N2MOmC3kKLN4= +github.com/xdg-go/stringprep v1.0.4 h1:XLI/Ng3O1Atzq0oBs3TWm+5ZVgkq2aqdlvP9JtoZ6c8= +github.com/xdg-go/stringprep v1.0.4/go.mod h1:mPGuuIYwz7CmR2bT9j4GbQqutWS1zV24gijq1dTyGkM= +github.com/youmark/pkcs8 v0.0.0-20201027041543-1326539a0a0a h1:fZHgsYlfvtyqToslyjUt3VOPF4J7aK/3MPcK7xp3PDk= +github.com/youmark/pkcs8 v0.0.0-20201027041543-1326539a0a0a/go.mod h1:ul22v+Nro/R083muKhosV54bj5niojjWZvU8xrevuH4= +github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= +go.mongodb.org/mongo-driver v1.14.0 h1:P98w8egYRjYe3XDjxhYJagTokP/H6HzlsnojRgZRd80= +go.mongodb.org/mongo-driver v1.14.0/go.mod h1:Vzb0Mk/pa7e6cWw85R4F/endUC3u0U9jGcNU603k65c= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20200302210943-78000ba7a073/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= +golang.org/x/crypto v0.33.0 h1:IOBPskki6Lysi0lo9qQvbxiQ+FvsCC/YWOecCHAixus= +golang.org/x/crypto v0.33.0/go.mod h1:bVdXmD7IV/4GdElGPozy6U7lWdRXA4qyRVGJV57uQ5M= +golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.11.0 h1:GGz8+XQP4FvTTrjZPzNKTMFtSXH80RAzG+5ghFPgK9w= +golang.org/x/sync v0.11.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ= +golang.org/x/text v0.22.0 h1:bofq7m3/HAFvbF51jz3Q9wLg3jkvSPuiZu/pD1XwgtM= +golang.org/x/text v0.22.0/go.mod h1:YRoo4H8PVmsu+E3Ou7cqLVH8oXWIHVoX0jqUWALQhfY= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/wasm/wasm_main.go b/wasm/wasm_main.go new file mode 100644 index 0000000..840e0c5 --- /dev/null +++ b/wasm/wasm_main.go @@ -0,0 +1,111 @@ +//go:build js && wasm +// +build js,wasm + +package main + +import ( + "song-recognition/models" + "song-recognition/shazam" + "song-recognition/utils" + "syscall/js" +) + +// generateFingerprint takes audio data from the frontend and generates fingerprints +// Arguments: [audioArray, sampleRate, channels] +// Returns: { error: number, data: fingerprintArray or error message } +func generateFingerprint(this js.Value, args []js.Value) interface{} { + if len(args) < 3 { + return js.ValueOf(map[string]interface{}{ + "error": 1, + "data": "Expected audio array, sample rate, and number of channels", + }) + } + + if args[0].Type() != js.TypeObject || args[1].Type() != js.TypeNumber { + return js.ValueOf(map[string]interface{}{ + "error": 2, + "data": "Invalid argument types; Expected audio array and samplerate (type: int)", + }) + } + + channels := args[2].Int() + if args[2].Type() != js.TypeNumber || (channels != 1 && channels != 2) { + return js.ValueOf(map[string]interface{}{ + "error": 2, + "data": "Invalid number of channels; expected 1 or 2", + }) + } + + inputArray := args[0] + sampleRate := args[1].Int() + + audioData := make([]float64, inputArray.Length()) + for i := 0; i < inputArray.Length(); i++ { + audioData[i] = inputArray.Index(i).Float() + } + + fingerprint := make(map[uint32]models.Couple) + var leftChannel, rightChannel []float64 + + if channels == 1 { + leftChannel = audioData + spectrogram, err := shazam.Spectrogram(audioData, sampleRate) + if err != nil { + return js.ValueOf(map[string]interface{}{ + "error": 3, + "data": "Error generating spectrogram: " + err.Error(), + }) + } + peaks := shazam.ExtractPeaks(spectrogram, float64(len(audioData))/float64(sampleRate), sampleRate) + fingerprint = shazam.Fingerprint(peaks, utils.GenerateUniqueID()) + } else { + for i := 0; i < len(audioData); i += 2 { + leftChannel = append(leftChannel, audioData[i]) + rightChannel = append(rightChannel, audioData[i+1]) + } + + // LEFT + spectrogram, err := shazam.Spectrogram(leftChannel, sampleRate) + if err != nil { + return js.ValueOf(map[string]interface{}{ + "error": 3, + "data": "Error generating spectrogram: " + err.Error(), + }) + } + peaks := shazam.ExtractPeaks(spectrogram, float64(len(leftChannel))/float64(sampleRate), sampleRate) + utils.ExtendMap(fingerprint, shazam.Fingerprint(peaks, utils.GenerateUniqueID())) + + // RIGHT + spectrogram, err = shazam.Spectrogram(rightChannel, sampleRate) + if err != nil { + return js.ValueOf(map[string]interface{}{ + "error": 3, + "data": "Error generating spectrogram: " + err.Error(), + }) + } + peaks = shazam.ExtractPeaks(spectrogram, float64(len(rightChannel))/float64(sampleRate), sampleRate) + utils.ExtendMap(fingerprint, shazam.Fingerprint(peaks, utils.GenerateUniqueID())) + } + + fingerprintArray := []interface{}{} + for address, couple := range fingerprint { + entry := map[string]interface{}{ + "address": address, + "anchorTime": couple.AnchorTimeMs, + } + fingerprintArray = append(fingerprintArray, entry) + } + + return js.ValueOf(map[string]interface{}{ + "error": 0, + "data": fingerprintArray, + }) +} + +func main() { + js.Global().Set("generateFingerprint", js.FuncOf(generateFingerprint)) + + js.Global().Call("dispatchEvent", js.Global().Get("Event").New("wasmReady")) + + select {} +}