diff --git a/.dockerignore b/.dockerignore
index be44689..cd491fd 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -1,28 +1,88 @@
-# Binaries for programs and plugins
-*.exe
-*.ogg
-*.m4a
-*.zip
-*.exe~
-*.dll
-*.so
-*.dylib
+# Git
+.git
+.gitignore
+.gitattributes
-# Test binary, built with `go test -c`
-*.test
+# Documentation
+*.md
+!README.md
+LICENSE
-# Output of the go coverage tool, specifically when used with LiteIDE
-*.out
-
-# Dependency directories (remove the comment below to include it)
-# vendor/
-
-# Go workspace file
-go.work
-**/songs
+# IDE
.vscode
+.idea
+*.swp
+*.swo
+*~
-package-lock.json
+# OS
+.DS_Store
+Thumbs.db
-*sqlite3
-.env
\ No newline at end of file
+# Node
+client/node_modules
+client/.env.local
+client/.env.development.local
+client/.env.test.local
+client/.env.production.local
+client/build
+client/coverage
+client/npm-debug.log*
+client/yarn-debug.log*
+client/yarn-error.log*
+
+# Go
+server/seek-tune
+server/*.exe
+server/*.test
+server/*.out
+server/vendor/
+
+# Application data (don't copy into image)
+server/songs/**
+server/recordings/**
+server/snippets/**
+server/tmp/**
+server/db/*.sqlite3
+server/db/*.db
+
+# Audio files
+*.mp3
+*.wav
+*.m4a
+*.ogg
+*.flac
+*.aac
+
+# Archives
+*.zip
+*.tar
+*.gz
+*.rar
+
+# Environment
+.env
+.env.*
+!.env.example
+
+# CI/CD
+.github
+.gitlab-ci.yml
+.travis.yml
+
+# Docker
+docker-compose*.yml
+!docker-compose.yml
+Dockerfile*
+!Dockerfile
+
+# WASM (already built separately if needed)
+wasm/fingerprint.wasm
+wasm/go.sum
+
+# Scripts
+scripts/
+appspec.yml
+
+# Logs
+*.log
\ No newline at end of file
diff --git a/.env.example b/.env.example
deleted file mode 100644
index e30fdc5..0000000
--- a/.env.example
+++ /dev/null
@@ -1,9 +0,0 @@
-DB_TYPE=mongo
-DB_USER=user
-DB_PASS=password
-DB_NAME=seek-tune
-DB_HOST=192.168.0.1
-DB_PORT=27017
-REACT_APP_BACKEND_URL=http://localhost:5000
-SPOTIFY_CLIENT_ID=yourclientid
-SPOTIFY_CLIENT_SECRET=yoursecret
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index be17357..52c8f30 100644
--- a/.gitignore
+++ b/.gitignore
@@ -28,4 +28,6 @@ go.work
package-lock.json
*sqlite3
-.env
\ No newline at end of file
+.env
+
+token.json
\ No newline at end of file
diff --git a/Dockerfile b/Dockerfile
index 44c8845..a64b0fa 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,33 +1,63 @@
-# build react
+# Build React frontend
FROM node:20-alpine AS build_react_stage
-RUN mkdir -p /home/react
-WORKDIR /home/react
+WORKDIR /app/client
-COPY client/package.json ./
-RUN npm install
+COPY client/package*.json ./
+RUN npm ci --only=production && npm cache clean --force
COPY client/ ./
ARG REACT_APP_BACKEND_URL
ENV REACT_APP_BACKEND_URL=${REACT_APP_BACKEND_URL}
RUN npm run build
-# build go
-FROM golang:1.21.6
+# Build Go backend
+FROM golang:1.24-alpine AS build_go_stage
-WORKDIR /home/seek-tune
+RUN apk add --no-cache git ca-certificates tzdata gcc musl-dev
+
+WORKDIR /app/server
COPY server/go.mod server/go.sum ./
-RUN go mod download
+RUN go mod download && go mod verify
COPY server/ ./
-ENV ENV=production
+RUN go build -ldflags="-w -s" -o seek-tune
+
+# Final runtime image
+FROM alpine:latest
+
+# Install runtime dependencies
+RUN apk add --no-cache \
+ ca-certificates \
+ tzdata \
+ ffmpeg \
+ python3 \
+ py3-pip \
+ && pip3 install --no-cache-dir yt-dlp --break-system-packages
+
+WORKDIR /app
+
+COPY --from=build_go_stage /app/server/seek-tune .
RUN mkdir -p static
-COPY --from=build_react_stage /home/react/build static
+COPY --from=build_react_stage /app/client/build ./static
-RUN go build -o seek-tune
+RUN mkdir -p db songs recordings snippets tmp && \
+ chmod -R 755 db songs recordings snippets tmp
+
+ENV ENV=production
EXPOSE 5000
-CMD [ "/home/seek-tune/seek-tune", "serve" ]
\ No newline at end of file
+HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
+ CMD wget --no-verbose --tries=1 --spider http://localhost:5000/ || exit 1
+
+# Run as non-root user for security
+RUN addgroup -g 1001 -S appuser && \
+ adduser -u 1001 -S appuser -G appuser && \
+ chown -R appuser:appuser /app
+
+USER appuser
+
+CMD ["./seek-tune", "serve", "http", "5000"]
\ No newline at end of file
diff --git a/README.md b/README.md
index 3c66eb4..d3cabf4 100644
--- a/README.md
+++ b/README.md
@@ -8,21 +8,17 @@
-Demo in Video
+Demo in Video | How it was made (YouTube)
## Description 🎼
SeekTune is an implementation of Shazam's song recognition algorithm based on insights from these [resources](#resources--card_file_box). It integrates Spotify and YouTube APIs to find and download songs.
-[//]: # (## Current Limitations
-While the algorithm works excellently in matching a song with its exact file, it doesn't always find the right match from a recording. However, this project is still a work in progress. I'm hopeful about making it work, but I could definitely use some help :slightly_smiling_face:.
-Additionally, it currently only supports song files in WAV format.
-)
-
## Installation :desktop_computer:
### Prerequisites
- Golang: [Install Golang](https://golang.org/dl/)
- FFmpeg: [Install FFmpeg](https://ffmpeg.org/download.html)
-- NPM: To run the client (frontend).
+- NPM: [Install Node](https://nodejs.org/en/download)
+- YT-DLP: [Install YT-DLP](https://github.com/yt-dlp/yt-dlp/wiki/Installation)
### Steps
📦 Clone the repository:
@@ -42,27 +38,17 @@ Prerequisites: [Docker](https://docs.docker.com/get-docker/) and [Docker Compose
docker-compose down
```
-#### 🎧 Spotify API
+#### 🎧 Spotify API Setup
-To access Spotify metadata, the project now uses the official [Spotify Web API](https://developer.spotify.com/documentation/web-api/). This requires creating a developer application and retrieving a client ID and client secret.
+1. Get credentials: Follow the [official getting started guide](https://developer.spotify.com/documentation/web-api/tutorials/getting-started) to create a Spotify app and obtain your **Client ID** and **Client Secret**.
-Follow the [official getting started guide](https://developer.spotify.com/documentation/web-api/tutorials/getting-started#request-an-access-token) to:
+2. Configure: Create a `.env` file in the `server` directory:
+ ```bash
+ SPOTIFY_CLIENT_ID=your-client-id
+ SPOTIFY_CLIENT_SECRET=your-client-secret
+ ```
-1. Create a Spotify developer app.
-2. Copy your **Client ID** and **Client Secret**.
-
-##### Setting up Credentials
-Instead of using a credentials.json file, the application now reads these values from environment variables.
-
-Create a .env file in the server directory with the following content:
-
-```
-SPOTIFY_CLIENT_ID=your-client-id
-SPOTIFY_CLIENT_SECRET=your-client-secret
-```
-
-Make sure this .env file is loaded into your environment before running the server.
-The application will automatically read this file to fetch and cache access tokens. If the token is expired or missing, a new one will be requested.
+The app will automatically fetch and cache access tokens as needed.
#### 💻 Set Up Natively
Install dependencies for the backend
@@ -109,7 +95,12 @@ go run *.go find
```
#### ▸ Delete fingerprints and songs 🗑️
```
+# Delete only database (default)
go run *.go erase
+go run *.go erase db
+
+# Delete both database and song files
+go run *.go erase all
```
## Example :film_projector:
diff --git a/client/.env.example b/client/.env.example
new file mode 100644
index 0000000..4c9fd18
--- /dev/null
+++ b/client/.env.example
@@ -0,0 +1 @@
+REACT_APP_BACKEND_URL=http://localhost:5000
\ No newline at end of file
diff --git a/client/public/fingerprint.wasm b/client/public/fingerprint.wasm
new file mode 100755
index 0000000..e3bae6e
Binary files /dev/null and b/client/public/fingerprint.wasm differ
diff --git a/client/public/main.wasm b/client/public/main.wasm
deleted file mode 100755
index 34ccaac..0000000
Binary files a/client/public/main.wasm and /dev/null differ
diff --git a/client/src/App.js b/client/src/App.js
index cfa0e1f..52caa02 100644
--- a/client/src/App.js
+++ b/client/src/App.js
@@ -15,7 +15,8 @@ import { fetchFile } from '@ffmpeg/util';
import AnimatedNumber from "./components/AnimatedNumber";
-const server = process.env.REACT_APP_BACKEND_URL || "http://localhost:5000";
+const server = process.env.REACT_APP_BACKEND_URL || "http://localhost:5500";
+const recordStereo = process.env.REACT_APP_RECORD_STEREO === "true" || false;
// https://seek-tune-rq4gn.ondigitalocean.app/
var socket = io(server);
@@ -91,7 +92,7 @@ function App() {
try {
const go = new window.Go();
const result = await WebAssembly.instantiateStreaming(
- fetch("/main.wasm"),
+ fetch("/fingerprint.wasm"),
go.importObject
);
go.run(result.instance);
@@ -175,15 +176,15 @@ function App() {
cleanUp();
const inputFile = 'input.wav';
- const outputFile = 'output_mono.wav';
+ const outputFile = 'output_formatted.wav';
- // Convert audio to mono with a sample rate of 44100 Hz
await ffmpeg.writeFile(inputFile, await fetchFile(blob))
const exitCode = await ffmpeg.exec([
'-i', inputFile,
'-c', 'pcm_s16le',
'-ar', '44100',
- '-ac', '1',
+ '-ac', recordStereo ? '2' : '1',
+ '-acodec', 'pcm_s16le',
'-f', 'wav',
outputFile
]);
@@ -191,11 +192,11 @@ function App() {
throw new Error(`FFmpeg exec failed with exit code: ${exitCode}`);
}
- const monoData = await ffmpeg.readFile(outputFile);
- const monoBlob = new Blob([monoData.buffer], { type: 'audio/wav' });
+ const audioData = await ffmpeg.readFile(outputFile);
+ const audioBlob = new Blob([audioData.buffer], { type: 'audio/wav' });
const reader = new FileReader();
- reader.readAsArrayBuffer(monoBlob);
+ reader.readAsArrayBuffer(audioBlob);
reader.onload = async (event) => {
const arrayBuffer = event.target.result;
const audioContext = new AudioContext();
@@ -205,7 +206,11 @@ function App() {
const audioData = audioBufferDecoded.getChannelData(0);
const audioArray = Array.from(audioData);
- const result = genFingerprint(audioArray, audioBufferDecoded.sampleRate);
+ const result = genFingerprint(
+ audioArray,
+ audioBufferDecoded.sampleRate,
+ audioBufferDecoded.numberOfChannels
+ );
if (result.error !== 0) {
toast["error"](() => An error occured
)
console.log("An error occured: ", result)
@@ -288,7 +293,7 @@ function App() {
return (
-
!Shazam
+
SeekTune
Songs
diff --git a/docker-compose.yml b/docker-compose.yml
index 9601c8f..5dc779a 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,15 +1,21 @@
-version: '3.1'
+version: '3.8'
volumes:
seek-tune-db:
+ driver: local
seek-tune-songs:
+ driver: local
+ seek-tune-recordings:
+ driver: local
services:
seek-tune:
- image: 'seek-tune'
+ image: seek-tune:latest
+ container_name: seek-tune-app
restart: unless-stopped
+
ports:
- - 8080:5000
+ - "${HOST_PORT:-8080}:5000"
environment:
DB_TYPE: ${DB_TYPE:-sqlite}
@@ -18,14 +24,67 @@ services:
DB_NAME: ${DB_NAME:-seek_tune_db}
DB_HOST: ${DB_HOST:-localhost}
DB_PORT: ${DB_PORT:-27017}
-
+
+ ENV: production
REACT_APP_BACKEND_URL: ${REACT_APP_BACKEND_URL:-http://localhost:8080}
+
+ SPOTIFY_CLIENT_ID: ${SPOTIFY_CLIENT_ID:-}
+ SPOTIFY_CLIENT_SECRET: ${SPOTIFY_CLIENT_SECRET:-}
build:
context: .
+ dockerfile: Dockerfile
args:
REACT_APP_BACKEND_URL: ${REACT_APP_BACKEND_URL:-http://localhost:8080}
+ cache_from:
+ - seek-tune:latest
volumes:
- - seek-tune-db:/home/seek-tune/db
- - seek-tune-songs:/home/seek-tune/songs
\ No newline at end of file
+ - seek-tune-db:/app/db
+ - seek-tune-songs:/app/songs
+ - seek-tune-recordings:/app/recordings
+ # Optional: Mount local songs directory for development
+ # - ./server/songs:/app/songs
+
+ # Resource limits (adjust based on your needs)
+ deploy:
+ resources:
+ limits:
+ cpus: '2.0'
+ memory: 2G
+ reservations:
+ cpus: '0.5'
+ memory: 512M
+
+ healthcheck:
+ test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:5000/"]
+ interval: 30s
+ timeout: 10s
+ retries: 3
+ start_period: 10s
+
+ logging:
+ driver: "json-file"
+ options:
+ max-size: "10m"
+ max-file: "3"
+
+ # Optional: MongoDB service (if using MongoDB instead of SQLite)
+ # mongodb:
+ # image: mongo:7
+ # container_name: seek-tune-mongo
+ # restart: unless-stopped
+ # environment:
+ # MONGO_INITDB_ROOT_USERNAME: ${DB_USER:-root}
+ # MONGO_INITDB_ROOT_PASSWORD: ${DB_PASSWORD:-password}
+ # MONGO_INITDB_DATABASE: ${DB_NAME:-seek_tune_db}
+ # ports:
+ # - "27017:27017"
+ # volumes:
+ # - seek-tune-db:/data/db
+ # healthcheck:
+ # test: echo 'db.runCommand("ping").ok' | mongosh localhost:27017/test --quiet
+ # interval: 10s
+ # timeout: 10s
+ # retries: 5
+ # start_period: 40s
diff --git a/server/.env.example b/server/.env.example
new file mode 100644
index 0000000..8bbe19f
--- /dev/null
+++ b/server/.env.example
@@ -0,0 +1,14 @@
+DB_TYPE=mongo # or sqlite
+DB_USER=user
+DB_PASS=password
+DB_NAME=seek-tune
+DB_HOST=192.168.0.1
+DB_PORT=27017
+
+# Set to true to enable stereo fingerprinting (uses more storage but may improve accuracy)
+FINGERPRINT_STEREO=false
+
+SPOTIFY_CLIENT_ID=yourclientid
+SPOTIFY_CLIENT_SECRET=yoursecret
+
+
diff --git a/server/cmdHandlers.go b/server/cmdHandlers.go
index b04ede5..82986fd 100644
--- a/server/cmdHandlers.go
+++ b/server/cmdHandlers.go
@@ -10,6 +10,7 @@ import (
"net/http"
"os"
"path/filepath"
+ "runtime"
"song-recognition/db"
"song-recognition/shazam"
"song-recognition/spotify"
@@ -34,19 +35,24 @@ const (
var yellow = color.New(color.FgYellow)
func find(filePath string) {
- wavInfo, err := wav.ReadWavInfo(filePath)
+ wavFilePath, err := wav.ConvertToWAV(filePath)
if err != nil {
- yellow.Println("Error reading wave info:", err)
+ yellow.Println("Error converting to WAV:", err)
return
}
- samples, err := wav.WavBytesToSamples(wavInfo.Data)
+ fingerprint, err := shazam.FingerprintAudio(wavFilePath, utils.GenerateUniqueID())
if err != nil {
- yellow.Println("Error converting to samples:", err)
+ yellow.Println("Error generating fingerprint for sample: ", err)
return
}
- matches, searchDuration, err := shazam.FindMatches(samples, wavInfo.Duration, wavInfo.SampleRate)
+ sampleFingerprint := make(map[uint32]uint32)
+ for address, couple := range fingerprint {
+ sampleFingerprint[address] = couple.AnchorTimeMs
+ }
+
+ matches, searchDuration, err := shazam.FindMatchesFGP(sampleFingerprint)
if err != nil {
yellow.Println("Error finding matches:", err)
return
@@ -193,7 +199,7 @@ func serveHTTP(socketServer *socketio.Server, serveHTTPS bool, port string) {
}
}
-func erase(songsDir string) {
+func erase(songsDir string, dbOnly bool, all bool) {
logger := utils.GetLogger()
ctx := context.Background()
@@ -216,26 +222,31 @@ func erase(songsDir string) {
logger.ErrorContext(ctx, msg, slog.Any("error", err))
}
- // delete song files
- err = filepath.Walk(songsDir, func(path string, info os.FileInfo, err error) error {
- if err != nil {
- return err
- }
+ fmt.Println("Database cleared")
- if !info.IsDir() {
- ext := filepath.Ext(path)
- if ext == ".wav" || ext == ".m4a" {
- err := os.Remove(path)
- if err != nil {
- return err
+ // delete song files only if -all flag is set
+ if all {
+ err = filepath.Walk(songsDir, func(path string, info os.FileInfo, err error) error {
+ if err != nil {
+ return err
+ }
+
+ if !info.IsDir() {
+ ext := filepath.Ext(path)
+ if ext == ".wav" || ext == ".m4a" {
+ err := os.Remove(path)
+ if err != nil {
+ return err
+ }
}
}
+ return nil
+ })
+ if err != nil {
+ msg := fmt.Sprintf("Error walking through directory %s: %v\n", songsDir, err)
+ logger.ErrorContext(ctx, msg, slog.Any("error", err))
}
- return nil
- })
- if err != nil {
- msg := fmt.Sprintf("Error walking through directory %s: %v\n", songsDir, err)
- logger.ErrorContext(ctx, msg, slog.Any("error", err))
+ fmt.Println("Songs folder cleared")
}
fmt.Println("Erase complete")
@@ -249,6 +260,7 @@ func save(path string, force bool) {
}
if fileInfo.IsDir() {
+ var filePaths []string
err := filepath.Walk(path, func(filePath string, info os.FileInfo, err error) error {
if err != nil {
fmt.Printf("Error walking the path %v: %v\n", filePath, err)
@@ -256,16 +268,16 @@ func save(path string, force bool) {
}
// Process only files, skip directories
if !info.IsDir() {
- err := saveSong(filePath, force)
- if err != nil {
- fmt.Printf("Error saving song (%v): %v\n", filePath, err)
- }
+ filePaths = append(filePaths, filePath)
}
return nil
})
if err != nil {
fmt.Printf("Error walking the directory %v: %v\n", path, err)
+ return
}
+
+ processFilesConCurrently(filePaths, force)
} else {
err := saveSong(path, force)
if err != nil {
@@ -274,6 +286,50 @@ func save(path string, force bool) {
}
}
+func processFilesConCurrently(filePaths []string, force bool) {
+ maxWorkers := runtime.NumCPU() / 2
+ numFiles := len(filePaths)
+
+ if numFiles == 0 {
+ return
+ }
+
+ if numFiles < maxWorkers {
+ maxWorkers = numFiles
+ }
+
+ jobs := make(chan string, numFiles)
+ results := make(chan error, numFiles)
+
+ for w := 0; w < maxWorkers; w++ {
+ go func(workerID int) {
+ for filePath := range jobs {
+ err := saveSong(filePath, force)
+ results <- err
+ }
+ }(w + 1)
+ }
+
+ for _, filePath := range filePaths {
+ jobs <- filePath
+ }
+ close(jobs)
+
+ successCount := 0
+ errorCount := 0
+ for i := 0; i < numFiles; i++ {
+ err := <-results
+ if err != nil {
+ fmt.Printf("Error: %v\n", err)
+ errorCount++
+ } else {
+ successCount++
+ }
+ }
+
+ fmt.Printf("\n ->> Processed %d files: %d successful, %d failed\n", numFiles, successCount, errorCount)
+}
+
func saveSong(filePath string, force bool) error {
metadata, err := wav.GetMetadata(filePath)
if err != nil {
diff --git a/server/main.go b/server/main.go
index 5fba59c..ab95842 100644
--- a/server/main.go
+++ b/server/main.go
@@ -8,8 +8,8 @@ import (
"os"
"song-recognition/utils"
- "github.com/mdobak/go-xerrors"
"github.com/joho/godotenv"
+ "github.com/mdobak/go-xerrors"
)
func main() {
@@ -32,10 +32,16 @@ func main() {
if len(os.Args) < 2 {
fmt.Println("Expected 'find', 'download', 'erase', 'save', or 'serve' subcommands")
+ fmt.Println("\nUsage examples:")
+ fmt.Println(" find ")
+ fmt.Println(" download ")
+ fmt.Println(" erase [db | all] (default: db)")
+ fmt.Println(" save [-f|--force] ")
+ fmt.Println(" serve [-proto ] [-p ]")
os.Exit(1)
}
_ = godotenv.Load()
-
+
switch os.Args[1] {
case "find":
if len(os.Args) < 3 {
@@ -58,7 +64,28 @@ func main() {
serveCmd.Parse(os.Args[2:])
serve(*protocol, *port)
case "erase":
- erase(SONGS_DIR)
+ // Default is to clear only database (db mode)
+ dbOnly := true
+ all := false
+
+ if len(os.Args) > 2 {
+ subCmd := os.Args[2]
+ switch subCmd {
+ case "db":
+ dbOnly = true
+ all = false
+ case "all":
+ dbOnly = false
+ all = true
+ default:
+ fmt.Println("Usage: main.go erase [db | all]")
+ fmt.Println(" db : only clear the database (default)")
+ fmt.Println(" all : clear database and songs folder")
+ os.Exit(1)
+ }
+ }
+
+ erase(SONGS_DIR, dbOnly, all)
case "save":
indexCmd := flag.NewFlagSet("save", flag.ExitOnError)
force := indexCmd.Bool("force", false, "save song with or without YouTube ID")
@@ -72,6 +99,12 @@ func main() {
save(filePath, *force)
default:
fmt.Println("Expected 'find', 'download', 'erase', 'save', or 'serve' subcommands")
+ fmt.Println("\nUsage examples:")
+ fmt.Println(" find ")
+ fmt.Println(" download ")
+ fmt.Println(" erase [db | all] (default: db)")
+ fmt.Println(" save [-f|--force] ")
+ fmt.Println(" serve [-proto ] [-p ]")
os.Exit(1)
}
}
diff --git a/server/shazam/fingerprint.go b/server/shazam/fingerprint.go
index a29398b..3768358 100644
--- a/server/shazam/fingerprint.go
+++ b/server/shazam/fingerprint.go
@@ -1,7 +1,10 @@
package shazam
import (
+ "fmt"
"song-recognition/models"
+ "song-recognition/utils"
+ "song-recognition/wav"
)
const (
@@ -23,7 +26,10 @@ func Fingerprint(peaks []Peak, songID uint32) map[uint32]models.Couple {
address := createAddress(anchor, target)
anchorTimeMs := uint32(anchor.Time * 1000)
- fingerprints[address] = models.Couple{anchorTimeMs, songID}
+ fingerprints[address] = models.Couple{
+ AnchorTimeMs: anchorTimeMs,
+ SongID: songID,
+ }
}
}
@@ -35,12 +41,52 @@ func Fingerprint(peaks []Peak, songID uint32) map[uint32]models.Couple {
// the anchor and target points, and other bits represent the time difference (delta time)
// between them. This function combines these components into a single address (a hash).
func createAddress(anchor, target Peak) uint32 {
- anchorFreq := int(real(anchor.Freq))
- targetFreq := int(real(target.Freq))
- deltaMs := uint32((target.Time - anchor.Time) * 1000)
+ anchorFreqBin := uint32(anchor.Freq / 10) // Scale down to fit in 9 bits
+ targetFreqBin := uint32(target.Freq / 10)
- // Combine the frequency of the anchor, target, and delta time into a 32-bit address
- address := uint32(anchorFreq<<23) | uint32(targetFreq<<14) | deltaMs
+ deltaMsRaw := uint32((target.Time - anchor.Time) * 1000)
+
+ // Mask to fit within bit constraints
+ anchorFreqBits := anchorFreqBin & ((1 << maxFreqBits) - 1) // 9 bits
+ targetFreqBits := targetFreqBin & ((1 << maxFreqBits) - 1) // 9 bits
+ deltaBits := deltaMsRaw & ((1 << maxDeltaBits) - 1) // 14 bits (max ~16 seconds)
+
+ // Combine into 32-bit address
+ address := (anchorFreqBits << 23) | (targetFreqBits << 14) | deltaBits
return address
}
+
+func FingerprintAudio(songFilePath string, songID uint32) (map[uint32]models.Couple, error) {
+ wavFilePath, err := wav.ConvertToWAV(songFilePath)
+ if err != nil {
+ return nil, fmt.Errorf("error converting input file to WAV: %v", err)
+ }
+
+ wavInfo, err := wav.ReadWavInfo(wavFilePath)
+ if err != nil {
+ return nil, fmt.Errorf("error reading WAV info: %v", err)
+ }
+
+ fingerprint := make(map[uint32]models.Couple)
+
+ spectro, err := Spectrogram(wavInfo.LeftChannelSamples, wavInfo.SampleRate)
+ if err != nil {
+ return nil, fmt.Errorf("error creating spectrogram: %v", err)
+ }
+
+ peaks := ExtractPeaks(spectro, wavInfo.Duration, wavInfo.SampleRate)
+ utils.ExtendMap(fingerprint, Fingerprint(peaks, songID))
+
+ if wavInfo.Channels == 2 {
+ spectro, err = Spectrogram(wavInfo.RightChannelSamples, wavInfo.SampleRate)
+ if err != nil {
+ return nil, fmt.Errorf("error creating spectrogram for right channel: %v", err)
+ }
+
+ peaks = ExtractPeaks(spectro, wavInfo.Duration, wavInfo.SampleRate)
+ utils.ExtendMap(fingerprint, Fingerprint(peaks, songID))
+ }
+
+ return fingerprint, nil
+}
diff --git a/server/shazam/shazam.go b/server/shazam/shazam.go
index 0683019..fde0f0c 100644
--- a/server/shazam/shazam.go
+++ b/server/shazam/shazam.go
@@ -5,7 +5,6 @@ package shazam
import (
"fmt"
- "math"
"song-recognition/db"
"song-recognition/utils"
"sort"
@@ -30,7 +29,8 @@ func FindMatches(audioSample []float64, audioDuration float64, sampleRate int) (
return nil, time.Since(startTime), fmt.Errorf("failed to get spectrogram of samples: %v", err)
}
- peaks := ExtractPeaks(spectrogram, audioDuration)
+ peaks := ExtractPeaks(spectrogram, audioDuration, sampleRate)
+ // peaks := ExtractPeaksLMX(spectrogram, true)
sampleFingerprint := Fingerprint(peaks, utils.GenerateUniqueID())
sampleFingerprintMap := make(map[uint32]uint32)
@@ -38,7 +38,7 @@ func FindMatches(audioSample []float64, audioDuration float64, sampleRate int) (
sampleFingerprintMap[address] = couple.AnchorTimeMs
}
- matches, _, err := FindMatchesFGP(sampleFingerprintMap)
+ matches, _, _ := FindMatchesFGP(sampleFingerprintMap)
return matches, time.Since(startTime), nil
}
@@ -142,21 +142,32 @@ func filterMatches(
}
// analyzeRelativeTiming calculates a score for each song based on the
-// relative timing between the song and the sample's anchor times.
+// consistency of time offsets between the sample and database.
func analyzeRelativeTiming(matches map[uint32][][2]uint32) map[uint32]float64 {
scores := make(map[uint32]float64)
+
for songID, times := range matches {
- count := 0
- for i := 0; i < len(times); i++ {
- for j := i + 1; j < len(times); j++ {
- sampleDiff := math.Abs(float64(times[i][0] - times[j][0]))
- dbDiff := math.Abs(float64(times[i][1] - times[j][1]))
- if math.Abs(sampleDiff-dbDiff) < 100 { // Allow some tolerance
- count++
- }
+ offsetCounts := make(map[int32]int)
+
+ for _, timePair := range times {
+ sampleTime := int32(timePair[0])
+ dbTime := int32(timePair[1])
+ offset := dbTime - sampleTime
+
+ // Bin offsets in 100ms buckets to allow for small timing variations
+ offsetBucket := offset / 100
+ offsetCounts[offsetBucket]++
+ }
+
+ maxCount := 0
+ for _, count := range offsetCounts {
+ if count > maxCount {
+ maxCount = count
}
}
- scores[songID] = float64(count)
+
+ scores[songID] = float64(maxCount)
}
+
return scores
}
diff --git a/server/shazam/spectrogram.go b/server/shazam/spectrogram.go
index 2488019..bb7c9d5 100644
--- a/server/shazam/spectrogram.go
+++ b/server/shazam/spectrogram.go
@@ -8,13 +8,14 @@ import (
)
const (
- dspRatio = 4
- freqBinSize = 1024
- maxFreq = 5000.0 // 5kHz
- hopSize = freqBinSize / 32
+ dspRatio = 4
+ windowSize = 1024
+ maxFreq = 5000.0 // 5kHz
+ hopSize = windowSize / 2 // 50% overlap for better time-frequency resolution
+ windowType = "hanning" // choices: "hanning" or "hamming"
)
-func Spectrogram(sample []float64, sampleRate int) ([][]complex128, error) {
+func Spectrogram(sample []float64, sampleRate int) ([][]float64, error) {
filteredSample := LowPassFilter(maxFreq, float64(sampleRate), sample)
downsampledSample, err := Downsample(filteredSample, sampleRate, sampleRate/dspRatio)
@@ -22,31 +23,42 @@ func Spectrogram(sample []float64, sampleRate int) ([][]complex128, error) {
return nil, fmt.Errorf("couldn't downsample audio sample: %v", err)
}
- numOfWindows := len(downsampledSample) / (freqBinSize - hopSize)
- spectrogram := make([][]complex128, numOfWindows)
-
- window := make([]float64, freqBinSize)
+ window := make([]float64, windowSize)
for i := range window {
- window[i] = 0.54 - 0.46*math.Cos(2*math.Pi*float64(i)/(float64(freqBinSize)-1))
+ theta := 2 * math.Pi * float64(i) / float64(windowSize-1)
+ switch windowType {
+ case "hamming":
+ window[i] = 0.54 - 0.46*math.Cos(theta)
+ default: // Hanning window
+ window[i] = 0.5 - 0.5*math.Cos(theta)
+ }
}
+ // Initialize spectrogram slice
+ spectrogram := make([][]float64, 0)
+
// Perform STFT
- for i := 0; i < numOfWindows; i++ {
- start := i * hopSize
- end := start + freqBinSize
- if end > len(downsampledSample) {
- end = len(downsampledSample)
- }
+ for start := 0; start+windowSize <= len(downsampledSample); start += hopSize {
+ end := start + windowSize
- bin := make([]float64, freqBinSize)
- copy(bin, downsampledSample[start:end])
+ frame := make([]float64, windowSize)
+ copy(frame, downsampledSample[start:end])
- // Apply Hamming window
+ // Apply window
for j := range window {
- bin[j] *= window[j]
+ frame[j] *= window[j]
}
- spectrogram[i] = FFT(bin)
+ // Perform FFT
+ fftResult := FFT(frame)
+
+ // Convert complex spectrum to magnitude spectrum
+ magnitude := make([]float64, len(fftResult)/2)
+ for j := range magnitude {
+ magnitude[j] = cmplx.Abs(fftResult[j])
+ }
+
+ spectrogram = append(spectrogram, magnitude)
}
return spectrogram, nil
@@ -107,43 +119,47 @@ func Downsample(input []float64, originalSampleRate, targetSampleRate int) ([]fl
return resampled, nil
}
+// Peak represents a significant point in the spectrogram.
type Peak struct {
- Time float64
- Freq complex128
+ Freq float64 // Frequency in Hz
+ Time float64 // Time in seconds
}
// ExtractPeaks analyzes a spectrogram and extracts significant peaks in the frequency domain over time.
-func ExtractPeaks(spectrogram [][]complex128, audioDuration float64) []Peak {
+func ExtractPeaks(spectrogram [][]float64, audioDuration float64, sampleRate int) []Peak {
if len(spectrogram) < 1 {
return []Peak{}
}
type maxies struct {
maxMag float64
- maxFreq complex128
freqIdx int
}
- bands := []struct{ min, max int }{{0, 10}, {10, 20}, {20, 40}, {40, 80}, {80, 160}, {160, 512}}
+ bands := []struct{ min, max int }{
+ {0, 10}, {10, 20}, {20, 40}, {40, 80}, {80, 160}, {160, 512},
+ }
var peaks []Peak
- binDuration := audioDuration / float64(len(spectrogram))
+ frameDuration := audioDuration / float64(len(spectrogram))
- for binIdx, bin := range spectrogram {
+ // Calculate frequency resolution (Hz per bin)
+ effectiveSampleRate := float64(sampleRate) / float64(dspRatio)
+ freqResolution := effectiveSampleRate / float64(windowSize)
+
+ for frameIdx, frame := range spectrogram {
var maxMags []float64
- var maxFreqs []complex128
- var freqIndices []float64
+ var freqIndices []int
binBandMaxies := []maxies{}
for _, band := range bands {
var maxx maxies
var maxMag float64
- for idx, freq := range bin[band.min:band.max] {
- magnitude := cmplx.Abs(freq)
- if magnitude > maxMag {
- maxMag = magnitude
+ for idx, mag := range frame[band.min:band.max] {
+ if mag > maxMag {
+ maxMag = mag
freqIdx := band.min + idx
- maxx = maxies{magnitude, freq, freqIdx}
+ maxx = maxies{mag, freqIdx}
}
}
binBandMaxies = append(binBandMaxies, maxx)
@@ -151,8 +167,7 @@ func ExtractPeaks(spectrogram [][]complex128, audioDuration float64) []Peak {
for _, value := range binBandMaxies {
maxMags = append(maxMags, value.maxMag)
- maxFreqs = append(maxFreqs, value.maxFreq)
- freqIndices = append(freqIndices, float64(value.freqIdx))
+ freqIndices = append(freqIndices, value.freqIdx)
}
// Calculate the average magnitude
@@ -160,17 +175,15 @@ func ExtractPeaks(spectrogram [][]complex128, audioDuration float64) []Peak {
for _, max := range maxMags {
maxMagsSum += max
}
- avg := maxMagsSum / float64(len(maxFreqs)) // * coefficient
+ avg := maxMagsSum / float64(len(maxMags))
// Add peaks that exceed the average magnitude
for i, value := range maxMags {
if value > avg {
- peakTimeInBin := freqIndices[i] * binDuration / float64(len(bin))
+ peakTime := float64(frameIdx) * frameDuration
+ peakFreq := float64(freqIndices[i]) * freqResolution
- // Calculate the absolute time of the peak
- peakTime := float64(binIdx)*binDuration + peakTimeInBin
-
- peaks = append(peaks, Peak{Time: peakTime, Freq: maxFreqs[i]})
+ peaks = append(peaks, Peak{Time: peakTime, Freq: peakFreq})
}
}
}
diff --git a/server/spotify/downloader.go b/server/spotify/downloader.go
index b9a95fb..7a8eeb2 100644
--- a/server/spotify/downloader.go
+++ b/server/spotify/downloader.go
@@ -2,9 +2,7 @@ package spotify
import (
"context"
- "errors"
"fmt"
- "io"
"log/slog"
"os"
"os/exec"
@@ -13,17 +11,15 @@ import (
"song-recognition/db"
"song-recognition/shazam"
"song-recognition/utils"
- "song-recognition/wav"
"strings"
"sync"
"time"
"github.com/fatih/color"
- "github.com/kkdai/youtube/v2"
"github.com/mdobak/go-xerrors"
)
-const DELETE_SONG_FILE = false
+const DELETE_SONG_FILE = false // Set true to delete the song file after fingerprinting
var yellow = color.New(color.FgYellow)
@@ -135,9 +131,9 @@ func dlTrack(tracks []Track, path string) (int, error) {
trackCopy.Title, trackCopy.Artist = correctFilename(trackCopy.Title, trackCopy.Artist)
fileName := fmt.Sprintf("%s - %s", trackCopy.Title, trackCopy.Artist)
- filePath := filepath.Join(path, fileName+".m4a")
+ filePath := filepath.Join(path, fileName)
- err = downloadYTaudio(ytID, path, filePath)
+ filePath, err = downloadYTaudio(ytID, filePath)
if err != nil {
logMessage := fmt.Sprintf("'%s' by '%s' could not be downloaded", trackCopy.Title, trackCopy.Artist)
logger.ErrorContext(ctx, logMessage, slog.Any("error", xerrors.New(err)))
@@ -151,12 +147,10 @@ func dlTrack(tracks []Track, path string) (int, error) {
return
}
- utils.DeleteFile(filepath.Join(path, fileName+".m4a"))
-
wavFilePath := filepath.Join(path, fileName+".wav")
if err := addTags(wavFilePath, *trackCopy); err != nil {
- logMessage := fmt.Sprintf("Error adding tags: %s", filePath+".wav")
+ logMessage := fmt.Sprintf("Error adding tags: %s", wavFilePath)
logger.ErrorContext(ctx, logMessage, slog.Any("error", xerrors.New(err)))
return
@@ -186,65 +180,6 @@ func dlTrack(tracks []Track, path string) (int, error) {
}
-/* github.com/kkdai/youtube */
-func downloadYTaudio(id, path, filePath string) error {
- logger := utils.GetLogger()
- dir, err := os.Stat(path)
- if err != nil {
- logger.Error("Error accessing path", slog.Any("error", err))
- return err
- }
-
- if !dir.IsDir() {
- err := errors.New("the path is not valid (not a dir)")
- logger.Error("Invalid directory path", slog.Any("error", err))
- return err
- }
-
- client := youtube.Client{}
- video, err := client.GetVideo(id)
- if err != nil {
- logger.Error("Error getting YouTube video", slog.Any("error", err))
- return err
- }
-
- /*
- itag code: 140, container: m4a, content: audio, bitrate: 128k
- change the FindByItag parameter to 139 if you want smaller files (but with a bitrate of 48k)
- https://gist.github.com/sidneys/7095afe4da4ae58694d128b1034e01e2
- */
- formats := video.Formats.Itag(140)
-
- /* in some cases, when attempting to download the audio
- using the library github.com/kkdai/youtube,
- the download fails (and shows the file size as 0 bytes)
- until the second or third attempt. */
- var fileSize int64
- file, err := os.Create(filePath)
- if err != nil {
- logger.Error("Error creating file", slog.Any("error", err))
- return err
- }
-
- for fileSize == 0 {
- stream, _, err := client.GetStream(video, &formats[0])
- if err != nil {
- logger.Error("Error getting stream", slog.Any("error", err))
- return err
- }
-
- if _, err = io.Copy(file, stream); err != nil {
- logger.Error("Error copying stream to file", slog.Any("error", err))
- return err
- }
-
- fileSize, _ = GetFileSize(filePath)
- }
- defer file.Close()
-
- return nil
-}
-
func addTags(file string, track Track) error {
logger := utils.GetLogger()
// Create a temporary file name by appending "2" before the extension
@@ -255,7 +190,7 @@ func addTags(file string, track Track) error {
tempFile = baseName + "2" + ".wav" // Temporary filename ('/path/to/title - artist2.wav')
}
- // Execute FFmpeg command to add metadata tags
+ // FFmpeg command to add metadata tags
cmd := exec.Command(
"ffmpeg",
"-i", file, // Input file path
@@ -291,40 +226,20 @@ func ProcessAndSaveSong(songFilePath, songTitle, songArtist, ytID string) error
}
defer dbclient.Close()
- wavFilePath, err := wav.ConvertToWAV(songFilePath, 1)
- if err != nil {
- logger.Error("Failed to convert to WAV", slog.Any("error", err))
- return err
- }
-
- wavInfo, err := wav.ReadWavInfo(wavFilePath)
- if err != nil {
- logger.Error("Failed to read WAV info", slog.Any("error", err))
- return err
- }
-
- samples, err := wav.WavBytesToSamples(wavInfo.Data)
- if err != nil {
- logger.Error("Error converting WAV bytes to samples", slog.Any("error", err))
- return fmt.Errorf("error converting wav bytes to float64: %v", err)
- }
-
- spectro, err := shazam.Spectrogram(samples, wavInfo.SampleRate)
- if err != nil {
- logger.Error("Error creating spectrogram", slog.Any("error", err))
- return fmt.Errorf("error creating spectrogram: %v", err)
- }
-
songID, err := dbclient.RegisterSong(songTitle, songArtist, ytID)
if err != nil {
logger.Error("Failed to register song", slog.Any("error", err))
- return err
+ return fmt.Errorf("error registering song '%s' by '%s': %v", songTitle, songArtist, err)
}
- peaks := shazam.ExtractPeaks(spectro, wavInfo.Duration)
- fingerprints := shazam.Fingerprint(peaks, songID)
+ fingerprint, err := shazam.FingerprintAudio(songFilePath, songID)
+ if err != nil {
+ dbclient.DeleteSongByID(songID)
+ logger.Error("Failed to create fingerprint", slog.String("wavFilePath", songFilePath))
+ return fmt.Errorf("error generating fingerprint for %s by %s", songTitle, songArtist)
+ }
- err = dbclient.StoreFingerprints(fingerprints)
+ err = dbclient.StoreFingerprints(fingerprint)
if err != nil {
dbclient.DeleteSongByID(songID)
logger.Error("Failed to store fingerprints", slog.Any("error", err))
diff --git a/server/spotify/youtube.go b/server/spotify/youtube.go
index 51d5d04..e18ced1 100644
--- a/server/spotify/youtube.go
+++ b/server/spotify/youtube.go
@@ -4,6 +4,11 @@ import (
"context"
"fmt"
"log"
+ "log/slog"
+ "os"
+ "os/exec"
+ "path/filepath"
+ "song-recognition/utils"
"errors"
"io"
@@ -215,3 +220,37 @@ func ytSearch(searchTerm string, limit int) (results []*SearchResult, err error)
return results, nil
}
+
+// downloadYTaudio downloads audio from a YouTube video using yt-dlp command line tool.
+func downloadYTaudio(videoURL, outputFilePath string) (string, error) {
+ logger := utils.GetLogger()
+
+ dir := filepath.Dir(outputFilePath)
+ if stat, err := os.Stat(dir); err != nil || !stat.IsDir() {
+ logger.Error("Invalid directory for output file", slog.Any("error", err))
+ return "", errors.New("output directory does not exist or is not a directory")
+ }
+
+ _, err := exec.LookPath("yt-dlp")
+ if err != nil {
+ logger.Error("yt-dlp not found in PATH", slog.Any("error", err))
+ return "", errors.New("yt-dlp is not installed or not in PATH")
+ }
+
+ audioFmt := "wav"
+ cmd := exec.Command(
+ "yt-dlp",
+ "-f", "bestaudio",
+ "--extract-audio",
+ "--audio-format", audioFmt,
+ "-o", outputFilePath,
+ videoURL,
+ )
+
+ output, err := cmd.CombinedOutput()
+ if err != nil {
+ logger.Error("yt-dlp command failed", slog.String("output", string(output)), slog.Any("error", err))
+ return "", err
+ }
+ return outputFilePath + "." + audioFmt, nil
+}
diff --git a/server/utils/utils.go b/server/utils/utils.go
index b2b2fce..309e44f 100644
--- a/server/utils/utils.go
+++ b/server/utils/utils.go
@@ -27,3 +27,9 @@ func GetEnv(key string, fallback ...string) string {
}
return ""
}
+
+func ExtendMap[K comparable, V any](dest, src map[K]V) {
+ for k, v := range src {
+ dest[k] = v
+ }
+}
diff --git a/server/wasm/wasm_main.go b/server/wasm/wasm_main.go
deleted file mode 100644
index 2afb489..0000000
--- a/server/wasm/wasm_main.go
+++ /dev/null
@@ -1,64 +0,0 @@
-//go:build js && wasm
-// +build js,wasm
-
-package main
-
-import (
- "song-recognition/shazam"
- "song-recognition/utils"
- "syscall/js"
-)
-
-func generateFingerprint(this js.Value, args []js.Value) interface{} {
- if len(args) < 2 {
- return js.ValueOf(map[string]interface{}{
- "error": 1,
- "data": "Expected audio array and sample rate",
- })
- }
-
- if args[0].Type() != js.TypeObject || args[1].Type() != js.TypeNumber {
- return js.ValueOf(map[string]interface{}{
- "error": 2,
- "data": "Invalid argument types; Expected audio array and samplerate (type: int)",
- })
- }
-
- inputArray := args[0]
- sampleRate := args[1].Int()
-
- audioData := make([]float64, inputArray.Length())
- for i := 0; i < inputArray.Length(); i++ {
- audioData[i] = inputArray.Index(i).Float()
- }
-
- spectrogram, err := shazam.Spectrogram(audioData, sampleRate)
- if err != nil {
- return js.ValueOf(map[string]interface{}{
- "error": 3,
- "data": "Error generating spectrogram: " + err.Error(),
- })
- }
-
- peaks := shazam.ExtractPeaks(spectrogram, float64(len(audioData)/sampleRate))
- fingerprint := shazam.Fingerprint(peaks, utils.GenerateUniqueID())
-
- fingerprintArray := []interface{}{}
- for address, couple := range fingerprint {
- entry := map[string]interface{}{
- "address": address,
- "anchorTime": couple.AnchorTimeMs,
- }
- fingerprintArray = append(fingerprintArray, entry)
- }
-
- return js.ValueOf(map[string]interface{}{
- "error": 0,
- "data": fingerprintArray,
- })
-}
-
-func main() {
- js.Global().Set("generateFingerprint", js.FuncOf(generateFingerprint))
- select {}
-}
diff --git a/server/wav/convert.go b/server/wav/convert.go
index 0d6dc20..4262d20 100644
--- a/server/wav/convert.go
+++ b/server/wav/convert.go
@@ -6,21 +6,33 @@ import (
"os/exec"
"path/filepath"
"song-recognition/utils"
+ "strconv"
"strings"
)
// ConvertToWAV converts an input audio file to WAV format with specified channels.
-func ConvertToWAV(inputFilePath string, channels int) (wavFilePath string, err error) {
+func ConvertToWAV(inputFilePath string) (wavFilePath string, err error) {
_, err = os.Stat(inputFilePath)
if err != nil {
return "", fmt.Errorf("input file does not exist: %v", err)
}
- if channels < 1 || channels > 2 {
- channels = 1
+ to_stereoStr := utils.GetEnv("FINGERPRINT_STEREO", "false")
+ to_stereo, err := strconv.ParseBool(to_stereoStr)
+ if err != nil {
+ return "", fmt.Errorf("failed to convert env variable (%s) to bool: %v", "FINGERPRINT_STEREO", err)
+ }
+
+ channels := 1
+ if to_stereo {
+ channels = 2
}
fileExt := filepath.Ext(inputFilePath)
+ if fileExt != ".wav" {
+ defer os.Remove(inputFilePath)
+ }
+
outputFile := strings.TrimSuffix(inputFilePath, fileExt) + ".wav"
// Output file may already exists. If it does FFmpeg will fail as
diff --git a/server/wav/wav.go b/server/wav/wav.go
index 1b05186..9fb2e63 100644
--- a/server/wav/wav.go
+++ b/server/wav/wav.go
@@ -94,49 +94,83 @@ func WriteWavFile(filename string, data []byte, sampleRate int, channels int, bi
return err
}
-// WavInfo defines a struct containing information extracted from the WAV header
type WavInfo struct {
- Channels int
- SampleRate int
- Data []byte
- Duration float64
+ Channels int
+ SampleRate int
+ Duration float64
+ Data []byte
+ LeftChannelSamples []float64
+ RightChannelSamples []float64
}
+// ReadWavInfo reads a 16-bit PCM WAV file and returns its metadata and audio samples.
+// Supports mono and stereo files. Note that it only supports 16-bit PCM format.
func ReadWavInfo(filename string) (*WavInfo, error) {
data, err := ioutil.ReadFile(filename)
if err != nil {
return nil, err
}
-
if len(data) < 44 {
return nil, errors.New("invalid WAV file size (too small)")
}
- // Read header chunks
+ // Parse PCM header to extract metadata
+ // https://en.wikipedia.org/wiki/WAV#WAV_file_header
var header WavHeader
- err = binary.Read(bytes.NewReader(data[:44]), binary.LittleEndian, &header)
- if err != nil {
+ if err := binary.Read(bytes.NewReader(data[:44]), binary.LittleEndian, &header); err != nil {
return nil, err
}
-
- if string(header.ChunkID[:]) != "RIFF" || string(header.Format[:]) != "WAVE" || header.AudioFormat != 1 {
+ if string(header.ChunkID[:]) != "RIFF" ||
+ string(header.Format[:]) != "WAVE" ||
+ header.AudioFormat != 1 {
return nil, errors.New("invalid WAV header format")
}
- // Extract information
info := &WavInfo{
Channels: int(header.NumChannels),
SampleRate: int(header.SampleRate),
Data: data[44:],
}
- // Calculate audio duration (assuming data contains PCM data)
- if header.BitsPerSample == 16 {
- info.Duration = float64(len(info.Data)) / float64(int(header.NumChannels)*2*int(header.SampleRate))
- } else {
- return nil, errors.New("unsupported bits per sample format")
+ if header.BitsPerSample != 16 {
+ return nil, errors.New("unsupported bits‑per‑sample (expect 16‑bit PCM)")
}
+ sampleCount := len(info.Data) / 2
+ int16Buf := make([]int16, sampleCount)
+ if err := binary.Read(bytes.NewReader(info.Data), binary.LittleEndian, int16Buf); err != nil {
+ return nil, err
+ }
+
+ const scale = 1.0 / 32768.0 // 16‑bit normalisation factor
+
+ switch header.NumChannels {
+ case 1:
+ left := make([]float64, sampleCount)
+ for i, s := range int16Buf {
+ left[i] = float64(s) * scale
+ }
+ info.LeftChannelSamples = left
+
+ case 2:
+ frameCount := sampleCount / 2
+ left := make([]float64, frameCount)
+ right := make([]float64, frameCount)
+ for i := 0; i < frameCount; i++ {
+ left[i] = float64(int16Buf[2*i]) * scale
+ right[i] = float64(int16Buf[2*i+1]) * scale
+ }
+ info.LeftChannelSamples = left
+ info.RightChannelSamples = right
+
+ default:
+ return nil, errors.New("unsupported channel count (only mono/stereo)")
+ }
+
+ // Compute audio duration in seconds
+ info.Duration = float64(sampleCount) /
+ (float64(header.NumChannels) * float64(header.SampleRate))
+
return info, nil
}
diff --git a/wasm/build.sh b/wasm/build.sh
new file mode 100755
index 0000000..ebb7d1c
--- /dev/null
+++ b/wasm/build.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+# Build script for WASM fingerprint generator
+
+echo "Building WASM module..."
+
+export GOOS=js
+export GOARCH=wasm
+
+go build -o fingerprint.wasm wasm_main.go
+
+if [ $? -eq 0 ]; then
+ echo "✓ WASM build successful: fingerprint.wasm"
+
+ cp fingerprint.wasm ../client/public/
+ echo "✓ Copied fingerprint.wasm to client/public/"
+
+else
+ echo "x WASM build failed"
+ cd ../wasm
+ exit 1
+fi
diff --git a/wasm/go.mod b/wasm/go.mod
new file mode 100644
index 0000000..5cc2fe0
--- /dev/null
+++ b/wasm/go.mod
@@ -0,0 +1,25 @@
+module wasm-fingerprint
+
+go 1.23.0
+
+toolchain go1.24.3
+
+require song-recognition v0.0.0-00010101000000-000000000000
+
+require (
+ github.com/golang/snappy v0.0.4 // indirect
+ github.com/klauspost/compress v1.17.6 // indirect
+ github.com/mattn/go-sqlite3 v1.14.22 // indirect
+ github.com/mdobak/go-xerrors v0.3.1 // indirect
+ github.com/montanaflynn/stats v0.7.1 // indirect
+ github.com/xdg-go/pbkdf2 v1.0.0 // indirect
+ github.com/xdg-go/scram v1.1.2 // indirect
+ github.com/xdg-go/stringprep v1.0.4 // indirect
+ github.com/youmark/pkcs8 v0.0.0-20201027041543-1326539a0a0a // indirect
+ go.mongodb.org/mongo-driver v1.14.0 // indirect
+ golang.org/x/crypto v0.33.0 // indirect
+ golang.org/x/sync v0.11.0 // indirect
+ golang.org/x/text v0.22.0 // indirect
+)
+
+replace song-recognition => ../server
diff --git a/wasm/go.sum b/wasm/go.sum
new file mode 100644
index 0000000..5e7ccba
--- /dev/null
+++ b/wasm/go.sum
@@ -0,0 +1,59 @@
+github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
+github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM=
+github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
+github.com/google/go-cmp v0.5.2 h1:X2ev0eStA3AbceY54o37/0PQ/UWqKEiiO2dKL5OPaFM=
+github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
+github.com/klauspost/compress v1.17.6 h1:60eq2E/jlfwQXtvZEeBUYADs+BwKBWURIY+Gj2eRGjI=
+github.com/klauspost/compress v1.17.6/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM=
+github.com/mattn/go-sqlite3 v1.14.22 h1:2gZY6PC6kBnID23Tichd1K+Z0oS6nE/XwU+Vz/5o4kU=
+github.com/mattn/go-sqlite3 v1.14.22/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
+github.com/mdobak/go-xerrors v0.3.1 h1:XfqaLMNN5T4qsHSlLHGJ35f6YlDTVeINSYYeeuK4VpQ=
+github.com/mdobak/go-xerrors v0.3.1/go.mod h1:nIR+HMAJuj/uNqyp5+MTN6PJ7ymuIJq3UVs9QCgAHbY=
+github.com/montanaflynn/stats v0.7.1 h1:etflOAAHORrCC44V+aR6Ftzort912ZU+YLiSTuV8eaE=
+github.com/montanaflynn/stats v0.7.1/go.mod h1:etXPPgVO6n31NxCd9KQUMvCM+ve0ruNzt6R8Bnaayow=
+github.com/xdg-go/pbkdf2 v1.0.0 h1:Su7DPu48wXMwC3bs7MCNG+z4FhcyEuz5dlvchbq0B0c=
+github.com/xdg-go/pbkdf2 v1.0.0/go.mod h1:jrpuAogTd400dnrH08LKmI/xc1MbPOebTwRqcT5RDeI=
+github.com/xdg-go/scram v1.1.2 h1:FHX5I5B4i4hKRVRBCFRxq1iQRej7WO3hhBuJf+UUySY=
+github.com/xdg-go/scram v1.1.2/go.mod h1:RT/sEzTbU5y00aCK8UOx6R7YryM0iF1N2MOmC3kKLN4=
+github.com/xdg-go/stringprep v1.0.4 h1:XLI/Ng3O1Atzq0oBs3TWm+5ZVgkq2aqdlvP9JtoZ6c8=
+github.com/xdg-go/stringprep v1.0.4/go.mod h1:mPGuuIYwz7CmR2bT9j4GbQqutWS1zV24gijq1dTyGkM=
+github.com/youmark/pkcs8 v0.0.0-20201027041543-1326539a0a0a h1:fZHgsYlfvtyqToslyjUt3VOPF4J7aK/3MPcK7xp3PDk=
+github.com/youmark/pkcs8 v0.0.0-20201027041543-1326539a0a0a/go.mod h1:ul22v+Nro/R083muKhosV54bj5niojjWZvU8xrevuH4=
+github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
+go.mongodb.org/mongo-driver v1.14.0 h1:P98w8egYRjYe3XDjxhYJagTokP/H6HzlsnojRgZRd80=
+go.mongodb.org/mongo-driver v1.14.0/go.mod h1:Vzb0Mk/pa7e6cWw85R4F/endUC3u0U9jGcNU603k65c=
+golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
+golang.org/x/crypto v0.0.0-20200302210943-78000ba7a073/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
+golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
+golang.org/x/crypto v0.33.0 h1:IOBPskki6Lysi0lo9qQvbxiQ+FvsCC/YWOecCHAixus=
+golang.org/x/crypto v0.33.0/go.mod h1:bVdXmD7IV/4GdElGPozy6U7lWdRXA4qyRVGJV57uQ5M=
+golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
+golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
+golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
+golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
+golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.11.0 h1:GGz8+XQP4FvTTrjZPzNKTMFtSXH80RAzG+5ghFPgK9w=
+golang.org/x/sync v0.11.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
+golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
+golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
+golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
+golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
+golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ=
+golang.org/x/text v0.22.0 h1:bofq7m3/HAFvbF51jz3Q9wLg3jkvSPuiZu/pD1XwgtM=
+golang.org/x/text v0.22.0/go.mod h1:YRoo4H8PVmsu+E3Ou7cqLVH8oXWIHVoX0jqUWALQhfY=
+golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
+golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
+golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
+golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
+golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
diff --git a/wasm/wasm_main.go b/wasm/wasm_main.go
new file mode 100644
index 0000000..840e0c5
--- /dev/null
+++ b/wasm/wasm_main.go
@@ -0,0 +1,111 @@
+//go:build js && wasm
+// +build js,wasm
+
+package main
+
+import (
+ "song-recognition/models"
+ "song-recognition/shazam"
+ "song-recognition/utils"
+ "syscall/js"
+)
+
+// generateFingerprint takes audio data from the frontend and generates fingerprints
+// Arguments: [audioArray, sampleRate, channels]
+// Returns: { error: number, data: fingerprintArray or error message }
+func generateFingerprint(this js.Value, args []js.Value) interface{} {
+ if len(args) < 3 {
+ return js.ValueOf(map[string]interface{}{
+ "error": 1,
+ "data": "Expected audio array, sample rate, and number of channels",
+ })
+ }
+
+ if args[0].Type() != js.TypeObject || args[1].Type() != js.TypeNumber {
+ return js.ValueOf(map[string]interface{}{
+ "error": 2,
+ "data": "Invalid argument types; Expected audio array and samplerate (type: int)",
+ })
+ }
+
+ channels := args[2].Int()
+ if args[2].Type() != js.TypeNumber || (channels != 1 && channels != 2) {
+ return js.ValueOf(map[string]interface{}{
+ "error": 2,
+ "data": "Invalid number of channels; expected 1 or 2",
+ })
+ }
+
+ inputArray := args[0]
+ sampleRate := args[1].Int()
+
+ audioData := make([]float64, inputArray.Length())
+ for i := 0; i < inputArray.Length(); i++ {
+ audioData[i] = inputArray.Index(i).Float()
+ }
+
+ fingerprint := make(map[uint32]models.Couple)
+ var leftChannel, rightChannel []float64
+
+ if channels == 1 {
+ leftChannel = audioData
+ spectrogram, err := shazam.Spectrogram(audioData, sampleRate)
+ if err != nil {
+ return js.ValueOf(map[string]interface{}{
+ "error": 3,
+ "data": "Error generating spectrogram: " + err.Error(),
+ })
+ }
+ peaks := shazam.ExtractPeaks(spectrogram, float64(len(audioData))/float64(sampleRate), sampleRate)
+ fingerprint = shazam.Fingerprint(peaks, utils.GenerateUniqueID())
+ } else {
+ for i := 0; i < len(audioData); i += 2 {
+ leftChannel = append(leftChannel, audioData[i])
+ rightChannel = append(rightChannel, audioData[i+1])
+ }
+
+ // LEFT
+ spectrogram, err := shazam.Spectrogram(leftChannel, sampleRate)
+ if err != nil {
+ return js.ValueOf(map[string]interface{}{
+ "error": 3,
+ "data": "Error generating spectrogram: " + err.Error(),
+ })
+ }
+ peaks := shazam.ExtractPeaks(spectrogram, float64(len(leftChannel))/float64(sampleRate), sampleRate)
+ utils.ExtendMap(fingerprint, shazam.Fingerprint(peaks, utils.GenerateUniqueID()))
+
+ // RIGHT
+ spectrogram, err = shazam.Spectrogram(rightChannel, sampleRate)
+ if err != nil {
+ return js.ValueOf(map[string]interface{}{
+ "error": 3,
+ "data": "Error generating spectrogram: " + err.Error(),
+ })
+ }
+ peaks = shazam.ExtractPeaks(spectrogram, float64(len(rightChannel))/float64(sampleRate), sampleRate)
+ utils.ExtendMap(fingerprint, shazam.Fingerprint(peaks, utils.GenerateUniqueID()))
+ }
+
+ fingerprintArray := []interface{}{}
+ for address, couple := range fingerprint {
+ entry := map[string]interface{}{
+ "address": address,
+ "anchorTime": couple.AnchorTimeMs,
+ }
+ fingerprintArray = append(fingerprintArray, entry)
+ }
+
+ return js.ValueOf(map[string]interface{}{
+ "error": 0,
+ "data": fingerprintArray,
+ })
+}
+
+func main() {
+ js.Global().Set("generateFingerprint", js.FuncOf(generateFingerprint))
+
+ js.Global().Call("dispatchEvent", js.Global().Get("Event").New("wasmReady"))
+
+ select {}
+}