Merge pull request #44 from cgzirim/development

Fix Critical Audio Fingerprinting Algorithm Bugs
This commit is contained in:
Chigozirim Igweamaka 2025-11-19 12:43:33 -08:00 committed by GitHub
commit e825099e17
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
26 changed files with 831 additions and 360 deletions

View file

@ -1,28 +1,88 @@
# Binaries for programs and plugins
*.exe
*.ogg
*.m4a
*.zip
*.exe~
*.dll
*.so
*.dylib
# Git
.git
.gitignore
.gitattributes
# Test binary, built with `go test -c`
*.test
# Documentation
*.md
!README.md
LICENSE
# Output of the go coverage tool, specifically when used with LiteIDE
*.out
# Dependency directories (remove the comment below to include it)
# vendor/
# Go workspace file
go.work
**/songs
# IDE
.vscode
.idea
*.swp
*.swo
*~
package-lock.json
# OS
.DS_Store
Thumbs.db
*sqlite3
.env
# Node
client/node_modules
client/.env.local
client/.env.development.local
client/.env.test.local
client/.env.production.local
client/build
client/coverage
client/npm-debug.log*
client/yarn-debug.log*
client/yarn-error.log*
# Go
server/seek-tune
server/*.exe
server/*.test
server/*.out
server/vendor/
# Application data (don't copy into image)
server/songs/**
server/recordings/**
server/snippets/**
server/tmp/**
server/db/*.sqlite3
server/db/*.db
# Audio files
*.mp3
*.wav
*.m4a
*.ogg
*.flac
*.aac
# Archives
*.zip
*.tar
*.gz
*.rar
# Environment
.env
.env.*
!.env.example
# CI/CD
.github
.gitlab-ci.yml
.travis.yml
# Docker
docker-compose*.yml
!docker-compose.yml
Dockerfile*
!Dockerfile
# WASM (already built separately if needed)
wasm/fingerprint.wasm
wasm/go.sum
# Scripts
scripts/
appspec.yml
# Logs
*.log

View file

@ -1,9 +0,0 @@
DB_TYPE=mongo
DB_USER=user
DB_PASS=password
DB_NAME=seek-tune
DB_HOST=192.168.0.1
DB_PORT=27017
REACT_APP_BACKEND_URL=http://localhost:5000
SPOTIFY_CLIENT_ID=yourclientid
SPOTIFY_CLIENT_SECRET=yoursecret

4
.gitignore vendored
View file

@ -28,4 +28,6 @@ go.work
package-lock.json
*sqlite3
.env
.env
token.json

View file

@ -1,33 +1,63 @@
# build react
# Build React frontend
FROM node:20-alpine AS build_react_stage
RUN mkdir -p /home/react
WORKDIR /home/react
WORKDIR /app/client
COPY client/package.json ./
RUN npm install
COPY client/package*.json ./
RUN npm ci --only=production && npm cache clean --force
COPY client/ ./
ARG REACT_APP_BACKEND_URL
ENV REACT_APP_BACKEND_URL=${REACT_APP_BACKEND_URL}
RUN npm run build
# build go
FROM golang:1.21.6
# Build Go backend
FROM golang:1.24-alpine AS build_go_stage
WORKDIR /home/seek-tune
RUN apk add --no-cache git ca-certificates tzdata gcc musl-dev
WORKDIR /app/server
COPY server/go.mod server/go.sum ./
RUN go mod download
RUN go mod download && go mod verify
COPY server/ ./
ENV ENV=production
RUN go build -ldflags="-w -s" -o seek-tune
# Final runtime image
FROM alpine:latest
# Install runtime dependencies
RUN apk add --no-cache \
ca-certificates \
tzdata \
ffmpeg \
python3 \
py3-pip \
&& pip3 install --no-cache-dir yt-dlp --break-system-packages
WORKDIR /app
COPY --from=build_go_stage /app/server/seek-tune .
RUN mkdir -p static
COPY --from=build_react_stage /home/react/build static
COPY --from=build_react_stage /app/client/build ./static
RUN go build -o seek-tune
RUN mkdir -p db songs recordings snippets tmp && \
chmod -R 755 db songs recordings snippets tmp
ENV ENV=production
EXPOSE 5000
CMD [ "/home/seek-tune/seek-tune", "serve" ]
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
CMD wget --no-verbose --tries=1 --spider http://localhost:5000/ || exit 1
# Run as non-root user for security
RUN addgroup -g 1001 -S appuser && \
adduser -u 1001 -S appuser -G appuser && \
chown -R appuser:appuser /app
USER appuser
CMD ["./seek-tune", "serve", "http", "5000"]

View file

@ -8,21 +8,17 @@
</a>
</p>
<p align="center"><a href="https://drive.google.com/file/d/1I2esH2U4DtXHsNgYbUi4OL-ukV5i_1PI/view" target="_blank">Demo in Video</a></p>
<p align="center"><a href="https://drive.google.com/file/d/1I2esH2U4DtXHsNgYbUi4OL-ukV5i_1PI/view" target="_blank">Demo in Video</a> | <a href="https://www.youtube.com/watch?v=a0CVCcb0RJM" target="_blank">How it was made (YouTube)</a></p>
## Description 🎼
SeekTune is an implementation of Shazam's song recognition algorithm based on insights from these [resources](#resources--card_file_box). It integrates Spotify and YouTube APIs to find and download songs.
[//]: # (## Current Limitations
While the algorithm works excellently in matching a song with its exact file, it doesn't always find the right match from a recording. However, this project is still a work in progress. I'm hopeful about making it work, but I could definitely use some help :slightly_smiling_face:.
Additionally, it currently only supports song files in WAV format.
)
## Installation :desktop_computer:
### Prerequisites
- Golang: [Install Golang](https://golang.org/dl/)
- FFmpeg: [Install FFmpeg](https://ffmpeg.org/download.html)
- NPM: To run the client (frontend).
- NPM: [Install Node](https://nodejs.org/en/download)
- YT-DLP: [Install YT-DLP](https://github.com/yt-dlp/yt-dlp/wiki/Installation)
### Steps
📦 Clone the repository:
@ -42,27 +38,17 @@ Prerequisites: [Docker](https://docs.docker.com/get-docker/) and [Docker Compose
docker-compose down
```
#### 🎧 Spotify API
#### 🎧 Spotify API Setup
To access Spotify metadata, the project now uses the official [Spotify Web API](https://developer.spotify.com/documentation/web-api/). This requires creating a developer application and retrieving a client ID and client secret.
1. Get credentials: Follow the [official getting started guide](https://developer.spotify.com/documentation/web-api/tutorials/getting-started) to create a Spotify app and obtain your **Client ID** and **Client Secret**.
Follow the [official getting started guide](https://developer.spotify.com/documentation/web-api/tutorials/getting-started#request-an-access-token) to:
2. Configure: Create a `.env` file in the `server` directory:
```bash
SPOTIFY_CLIENT_ID=your-client-id
SPOTIFY_CLIENT_SECRET=your-client-secret
```
1. Create a Spotify developer app.
2. Copy your **Client ID** and **Client Secret**.
##### Setting up Credentials
Instead of using a credentials.json file, the application now reads these values from environment variables.
Create a .env file in the server directory with the following content:
```
SPOTIFY_CLIENT_ID=your-client-id
SPOTIFY_CLIENT_SECRET=your-client-secret
```
Make sure this .env file is loaded into your environment before running the server.
The application will automatically read this file to fetch and cache access tokens. If the token is expired or missing, a new one will be requested.
The app will automatically fetch and cache access tokens as needed.
#### 💻 Set Up Natively
Install dependencies for the backend
@ -109,7 +95,12 @@ go run *.go find <path-to-wav-file>
```
#### ▸ Delete fingerprints and songs 🗑️
```
# Delete only database (default)
go run *.go erase
go run *.go erase db
# Delete both database and song files
go run *.go erase all
```
## Example :film_projector:

1
client/.env.example Normal file
View file

@ -0,0 +1 @@
REACT_APP_BACKEND_URL=http://localhost:5000

BIN
client/public/fingerprint.wasm Executable file

Binary file not shown.

Binary file not shown.

View file

@ -15,7 +15,8 @@ import { fetchFile } from '@ffmpeg/util';
import AnimatedNumber from "./components/AnimatedNumber";
const server = process.env.REACT_APP_BACKEND_URL || "http://localhost:5000";
const server = process.env.REACT_APP_BACKEND_URL || "http://localhost:5500";
const recordStereo = process.env.REACT_APP_RECORD_STEREO === "true" || false;
// https://seek-tune-rq4gn.ondigitalocean.app/
var socket = io(server);
@ -91,7 +92,7 @@ function App() {
try {
const go = new window.Go();
const result = await WebAssembly.instantiateStreaming(
fetch("/main.wasm"),
fetch("/fingerprint.wasm"),
go.importObject
);
go.run(result.instance);
@ -175,15 +176,15 @@ function App() {
cleanUp();
const inputFile = 'input.wav';
const outputFile = 'output_mono.wav';
const outputFile = 'output_formatted.wav';
// Convert audio to mono with a sample rate of 44100 Hz
await ffmpeg.writeFile(inputFile, await fetchFile(blob))
const exitCode = await ffmpeg.exec([
'-i', inputFile,
'-c', 'pcm_s16le',
'-ar', '44100',
'-ac', '1',
'-ac', recordStereo ? '2' : '1',
'-acodec', 'pcm_s16le',
'-f', 'wav',
outputFile
]);
@ -191,11 +192,11 @@ function App() {
throw new Error(`FFmpeg exec failed with exit code: ${exitCode}`);
}
const monoData = await ffmpeg.readFile(outputFile);
const monoBlob = new Blob([monoData.buffer], { type: 'audio/wav' });
const audioData = await ffmpeg.readFile(outputFile);
const audioBlob = new Blob([audioData.buffer], { type: 'audio/wav' });
const reader = new FileReader();
reader.readAsArrayBuffer(monoBlob);
reader.readAsArrayBuffer(audioBlob);
reader.onload = async (event) => {
const arrayBuffer = event.target.result;
const audioContext = new AudioContext();
@ -205,7 +206,11 @@ function App() {
const audioData = audioBufferDecoded.getChannelData(0);
const audioArray = Array.from(audioData);
const result = genFingerprint(audioArray, audioBufferDecoded.sampleRate);
const result = genFingerprint(
audioArray,
audioBufferDecoded.sampleRate,
audioBufferDecoded.numberOfChannels
);
if (result.error !== 0) {
toast["error"](() => <div>An error occured</div>)
console.log("An error occured: ", result)
@ -288,7 +293,7 @@ function App() {
return (
<div className="App">
<div className="TopHeader">
<h2 style={{ color: "#374151" }}>!Shazam</h2>
<h2 style={{ color: "#374151" }}>SeekTune</h2>
<h4 style={{ display: "flex", justifyContent: "flex-end" }}>
<AnimatedNumber includeComma={true} animateToNumber={totalSongs} />
&nbsp;Songs

View file

@ -1,15 +1,21 @@
version: '3.1'
version: '3.8'
volumes:
seek-tune-db:
driver: local
seek-tune-songs:
driver: local
seek-tune-recordings:
driver: local
services:
seek-tune:
image: 'seek-tune'
image: seek-tune:latest
container_name: seek-tune-app
restart: unless-stopped
ports:
- 8080:5000
- "${HOST_PORT:-8080}:5000"
environment:
DB_TYPE: ${DB_TYPE:-sqlite}
@ -18,14 +24,67 @@ services:
DB_NAME: ${DB_NAME:-seek_tune_db}
DB_HOST: ${DB_HOST:-localhost}
DB_PORT: ${DB_PORT:-27017}
ENV: production
REACT_APP_BACKEND_URL: ${REACT_APP_BACKEND_URL:-http://localhost:8080}
SPOTIFY_CLIENT_ID: ${SPOTIFY_CLIENT_ID:-}
SPOTIFY_CLIENT_SECRET: ${SPOTIFY_CLIENT_SECRET:-}
build:
context: .
dockerfile: Dockerfile
args:
REACT_APP_BACKEND_URL: ${REACT_APP_BACKEND_URL:-http://localhost:8080}
cache_from:
- seek-tune:latest
volumes:
- seek-tune-db:/home/seek-tune/db
- seek-tune-songs:/home/seek-tune/songs
- seek-tune-db:/app/db
- seek-tune-songs:/app/songs
- seek-tune-recordings:/app/recordings
# Optional: Mount local songs directory for development
# - ./server/songs:/app/songs
# Resource limits (adjust based on your needs)
deploy:
resources:
limits:
cpus: '2.0'
memory: 2G
reservations:
cpus: '0.5'
memory: 512M
healthcheck:
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:5000/"]
interval: 30s
timeout: 10s
retries: 3
start_period: 10s
logging:
driver: "json-file"
options:
max-size: "10m"
max-file: "3"
# Optional: MongoDB service (if using MongoDB instead of SQLite)
# mongodb:
# image: mongo:7
# container_name: seek-tune-mongo
# restart: unless-stopped
# environment:
# MONGO_INITDB_ROOT_USERNAME: ${DB_USER:-root}
# MONGO_INITDB_ROOT_PASSWORD: ${DB_PASSWORD:-password}
# MONGO_INITDB_DATABASE: ${DB_NAME:-seek_tune_db}
# ports:
# - "27017:27017"
# volumes:
# - seek-tune-db:/data/db
# healthcheck:
# test: echo 'db.runCommand("ping").ok' | mongosh localhost:27017/test --quiet
# interval: 10s
# timeout: 10s
# retries: 5
# start_period: 40s

14
server/.env.example Normal file
View file

@ -0,0 +1,14 @@
DB_TYPE=mongo # or sqlite
DB_USER=user
DB_PASS=password
DB_NAME=seek-tune
DB_HOST=192.168.0.1
DB_PORT=27017
# Set to true to enable stereo fingerprinting (uses more storage but may improve accuracy)
FINGERPRINT_STEREO=false
SPOTIFY_CLIENT_ID=yourclientid
SPOTIFY_CLIENT_SECRET=yoursecret

View file

@ -10,6 +10,7 @@ import (
"net/http"
"os"
"path/filepath"
"runtime"
"song-recognition/db"
"song-recognition/shazam"
"song-recognition/spotify"
@ -34,19 +35,24 @@ const (
var yellow = color.New(color.FgYellow)
func find(filePath string) {
wavInfo, err := wav.ReadWavInfo(filePath)
wavFilePath, err := wav.ConvertToWAV(filePath)
if err != nil {
yellow.Println("Error reading wave info:", err)
yellow.Println("Error converting to WAV:", err)
return
}
samples, err := wav.WavBytesToSamples(wavInfo.Data)
fingerprint, err := shazam.FingerprintAudio(wavFilePath, utils.GenerateUniqueID())
if err != nil {
yellow.Println("Error converting to samples:", err)
yellow.Println("Error generating fingerprint for sample: ", err)
return
}
matches, searchDuration, err := shazam.FindMatches(samples, wavInfo.Duration, wavInfo.SampleRate)
sampleFingerprint := make(map[uint32]uint32)
for address, couple := range fingerprint {
sampleFingerprint[address] = couple.AnchorTimeMs
}
matches, searchDuration, err := shazam.FindMatchesFGP(sampleFingerprint)
if err != nil {
yellow.Println("Error finding matches:", err)
return
@ -193,7 +199,7 @@ func serveHTTP(socketServer *socketio.Server, serveHTTPS bool, port string) {
}
}
func erase(songsDir string) {
func erase(songsDir string, dbOnly bool, all bool) {
logger := utils.GetLogger()
ctx := context.Background()
@ -216,26 +222,31 @@ func erase(songsDir string) {
logger.ErrorContext(ctx, msg, slog.Any("error", err))
}
// delete song files
err = filepath.Walk(songsDir, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
fmt.Println("Database cleared")
if !info.IsDir() {
ext := filepath.Ext(path)
if ext == ".wav" || ext == ".m4a" {
err := os.Remove(path)
if err != nil {
return err
// delete song files only if -all flag is set
if all {
err = filepath.Walk(songsDir, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if !info.IsDir() {
ext := filepath.Ext(path)
if ext == ".wav" || ext == ".m4a" {
err := os.Remove(path)
if err != nil {
return err
}
}
}
return nil
})
if err != nil {
msg := fmt.Sprintf("Error walking through directory %s: %v\n", songsDir, err)
logger.ErrorContext(ctx, msg, slog.Any("error", err))
}
return nil
})
if err != nil {
msg := fmt.Sprintf("Error walking through directory %s: %v\n", songsDir, err)
logger.ErrorContext(ctx, msg, slog.Any("error", err))
fmt.Println("Songs folder cleared")
}
fmt.Println("Erase complete")
@ -249,6 +260,7 @@ func save(path string, force bool) {
}
if fileInfo.IsDir() {
var filePaths []string
err := filepath.Walk(path, func(filePath string, info os.FileInfo, err error) error {
if err != nil {
fmt.Printf("Error walking the path %v: %v\n", filePath, err)
@ -256,16 +268,16 @@ func save(path string, force bool) {
}
// Process only files, skip directories
if !info.IsDir() {
err := saveSong(filePath, force)
if err != nil {
fmt.Printf("Error saving song (%v): %v\n", filePath, err)
}
filePaths = append(filePaths, filePath)
}
return nil
})
if err != nil {
fmt.Printf("Error walking the directory %v: %v\n", path, err)
return
}
processFilesConCurrently(filePaths, force)
} else {
err := saveSong(path, force)
if err != nil {
@ -274,6 +286,50 @@ func save(path string, force bool) {
}
}
func processFilesConCurrently(filePaths []string, force bool) {
maxWorkers := runtime.NumCPU() / 2
numFiles := len(filePaths)
if numFiles == 0 {
return
}
if numFiles < maxWorkers {
maxWorkers = numFiles
}
jobs := make(chan string, numFiles)
results := make(chan error, numFiles)
for w := 0; w < maxWorkers; w++ {
go func(workerID int) {
for filePath := range jobs {
err := saveSong(filePath, force)
results <- err
}
}(w + 1)
}
for _, filePath := range filePaths {
jobs <- filePath
}
close(jobs)
successCount := 0
errorCount := 0
for i := 0; i < numFiles; i++ {
err := <-results
if err != nil {
fmt.Printf("Error: %v\n", err)
errorCount++
} else {
successCount++
}
}
fmt.Printf("\n ->> Processed %d files: %d successful, %d failed\n", numFiles, successCount, errorCount)
}
func saveSong(filePath string, force bool) error {
metadata, err := wav.GetMetadata(filePath)
if err != nil {

View file

@ -8,8 +8,8 @@ import (
"os"
"song-recognition/utils"
"github.com/mdobak/go-xerrors"
"github.com/joho/godotenv"
"github.com/mdobak/go-xerrors"
)
func main() {
@ -32,10 +32,16 @@ func main() {
if len(os.Args) < 2 {
fmt.Println("Expected 'find', 'download', 'erase', 'save', or 'serve' subcommands")
fmt.Println("\nUsage examples:")
fmt.Println(" find <path_to_wav_file>")
fmt.Println(" download <spotify_url>")
fmt.Println(" erase [db | all] (default: db)")
fmt.Println(" save [-f|--force] <path_to_file_or_dir>")
fmt.Println(" serve [-proto <http|https>] [-p <port>]")
os.Exit(1)
}
_ = godotenv.Load()
switch os.Args[1] {
case "find":
if len(os.Args) < 3 {
@ -58,7 +64,28 @@ func main() {
serveCmd.Parse(os.Args[2:])
serve(*protocol, *port)
case "erase":
erase(SONGS_DIR)
// Default is to clear only database (db mode)
dbOnly := true
all := false
if len(os.Args) > 2 {
subCmd := os.Args[2]
switch subCmd {
case "db":
dbOnly = true
all = false
case "all":
dbOnly = false
all = true
default:
fmt.Println("Usage: main.go erase [db | all]")
fmt.Println(" db : only clear the database (default)")
fmt.Println(" all : clear database and songs folder")
os.Exit(1)
}
}
erase(SONGS_DIR, dbOnly, all)
case "save":
indexCmd := flag.NewFlagSet("save", flag.ExitOnError)
force := indexCmd.Bool("force", false, "save song with or without YouTube ID")
@ -72,6 +99,12 @@ func main() {
save(filePath, *force)
default:
fmt.Println("Expected 'find', 'download', 'erase', 'save', or 'serve' subcommands")
fmt.Println("\nUsage examples:")
fmt.Println(" find <path_to_wav_file>")
fmt.Println(" download <spotify_url>")
fmt.Println(" erase [db | all] (default: db)")
fmt.Println(" save [-f|--force] <path_to_file_or_dir>")
fmt.Println(" serve [-proto <http|https>] [-p <port>]")
os.Exit(1)
}
}

View file

@ -1,7 +1,10 @@
package shazam
import (
"fmt"
"song-recognition/models"
"song-recognition/utils"
"song-recognition/wav"
)
const (
@ -23,7 +26,10 @@ func Fingerprint(peaks []Peak, songID uint32) map[uint32]models.Couple {
address := createAddress(anchor, target)
anchorTimeMs := uint32(anchor.Time * 1000)
fingerprints[address] = models.Couple{anchorTimeMs, songID}
fingerprints[address] = models.Couple{
AnchorTimeMs: anchorTimeMs,
SongID: songID,
}
}
}
@ -35,12 +41,52 @@ func Fingerprint(peaks []Peak, songID uint32) map[uint32]models.Couple {
// the anchor and target points, and other bits represent the time difference (delta time)
// between them. This function combines these components into a single address (a hash).
func createAddress(anchor, target Peak) uint32 {
anchorFreq := int(real(anchor.Freq))
targetFreq := int(real(target.Freq))
deltaMs := uint32((target.Time - anchor.Time) * 1000)
anchorFreqBin := uint32(anchor.Freq / 10) // Scale down to fit in 9 bits
targetFreqBin := uint32(target.Freq / 10)
// Combine the frequency of the anchor, target, and delta time into a 32-bit address
address := uint32(anchorFreq<<23) | uint32(targetFreq<<14) | deltaMs
deltaMsRaw := uint32((target.Time - anchor.Time) * 1000)
// Mask to fit within bit constraints
anchorFreqBits := anchorFreqBin & ((1 << maxFreqBits) - 1) // 9 bits
targetFreqBits := targetFreqBin & ((1 << maxFreqBits) - 1) // 9 bits
deltaBits := deltaMsRaw & ((1 << maxDeltaBits) - 1) // 14 bits (max ~16 seconds)
// Combine into 32-bit address
address := (anchorFreqBits << 23) | (targetFreqBits << 14) | deltaBits
return address
}
func FingerprintAudio(songFilePath string, songID uint32) (map[uint32]models.Couple, error) {
wavFilePath, err := wav.ConvertToWAV(songFilePath)
if err != nil {
return nil, fmt.Errorf("error converting input file to WAV: %v", err)
}
wavInfo, err := wav.ReadWavInfo(wavFilePath)
if err != nil {
return nil, fmt.Errorf("error reading WAV info: %v", err)
}
fingerprint := make(map[uint32]models.Couple)
spectro, err := Spectrogram(wavInfo.LeftChannelSamples, wavInfo.SampleRate)
if err != nil {
return nil, fmt.Errorf("error creating spectrogram: %v", err)
}
peaks := ExtractPeaks(spectro, wavInfo.Duration, wavInfo.SampleRate)
utils.ExtendMap(fingerprint, Fingerprint(peaks, songID))
if wavInfo.Channels == 2 {
spectro, err = Spectrogram(wavInfo.RightChannelSamples, wavInfo.SampleRate)
if err != nil {
return nil, fmt.Errorf("error creating spectrogram for right channel: %v", err)
}
peaks = ExtractPeaks(spectro, wavInfo.Duration, wavInfo.SampleRate)
utils.ExtendMap(fingerprint, Fingerprint(peaks, songID))
}
return fingerprint, nil
}

View file

@ -5,7 +5,6 @@ package shazam
import (
"fmt"
"math"
"song-recognition/db"
"song-recognition/utils"
"sort"
@ -30,7 +29,8 @@ func FindMatches(audioSample []float64, audioDuration float64, sampleRate int) (
return nil, time.Since(startTime), fmt.Errorf("failed to get spectrogram of samples: %v", err)
}
peaks := ExtractPeaks(spectrogram, audioDuration)
peaks := ExtractPeaks(spectrogram, audioDuration, sampleRate)
// peaks := ExtractPeaksLMX(spectrogram, true)
sampleFingerprint := Fingerprint(peaks, utils.GenerateUniqueID())
sampleFingerprintMap := make(map[uint32]uint32)
@ -38,7 +38,7 @@ func FindMatches(audioSample []float64, audioDuration float64, sampleRate int) (
sampleFingerprintMap[address] = couple.AnchorTimeMs
}
matches, _, err := FindMatchesFGP(sampleFingerprintMap)
matches, _, _ := FindMatchesFGP(sampleFingerprintMap)
return matches, time.Since(startTime), nil
}
@ -142,21 +142,32 @@ func filterMatches(
}
// analyzeRelativeTiming calculates a score for each song based on the
// relative timing between the song and the sample's anchor times.
// consistency of time offsets between the sample and database.
func analyzeRelativeTiming(matches map[uint32][][2]uint32) map[uint32]float64 {
scores := make(map[uint32]float64)
for songID, times := range matches {
count := 0
for i := 0; i < len(times); i++ {
for j := i + 1; j < len(times); j++ {
sampleDiff := math.Abs(float64(times[i][0] - times[j][0]))
dbDiff := math.Abs(float64(times[i][1] - times[j][1]))
if math.Abs(sampleDiff-dbDiff) < 100 { // Allow some tolerance
count++
}
offsetCounts := make(map[int32]int)
for _, timePair := range times {
sampleTime := int32(timePair[0])
dbTime := int32(timePair[1])
offset := dbTime - sampleTime
// Bin offsets in 100ms buckets to allow for small timing variations
offsetBucket := offset / 100
offsetCounts[offsetBucket]++
}
maxCount := 0
for _, count := range offsetCounts {
if count > maxCount {
maxCount = count
}
}
scores[songID] = float64(count)
scores[songID] = float64(maxCount)
}
return scores
}

View file

@ -8,13 +8,14 @@ import (
)
const (
dspRatio = 4
freqBinSize = 1024
maxFreq = 5000.0 // 5kHz
hopSize = freqBinSize / 32
dspRatio = 4
windowSize = 1024
maxFreq = 5000.0 // 5kHz
hopSize = windowSize / 2 // 50% overlap for better time-frequency resolution
windowType = "hanning" // choices: "hanning" or "hamming"
)
func Spectrogram(sample []float64, sampleRate int) ([][]complex128, error) {
func Spectrogram(sample []float64, sampleRate int) ([][]float64, error) {
filteredSample := LowPassFilter(maxFreq, float64(sampleRate), sample)
downsampledSample, err := Downsample(filteredSample, sampleRate, sampleRate/dspRatio)
@ -22,31 +23,42 @@ func Spectrogram(sample []float64, sampleRate int) ([][]complex128, error) {
return nil, fmt.Errorf("couldn't downsample audio sample: %v", err)
}
numOfWindows := len(downsampledSample) / (freqBinSize - hopSize)
spectrogram := make([][]complex128, numOfWindows)
window := make([]float64, freqBinSize)
window := make([]float64, windowSize)
for i := range window {
window[i] = 0.54 - 0.46*math.Cos(2*math.Pi*float64(i)/(float64(freqBinSize)-1))
theta := 2 * math.Pi * float64(i) / float64(windowSize-1)
switch windowType {
case "hamming":
window[i] = 0.54 - 0.46*math.Cos(theta)
default: // Hanning window
window[i] = 0.5 - 0.5*math.Cos(theta)
}
}
// Initialize spectrogram slice
spectrogram := make([][]float64, 0)
// Perform STFT
for i := 0; i < numOfWindows; i++ {
start := i * hopSize
end := start + freqBinSize
if end > len(downsampledSample) {
end = len(downsampledSample)
}
for start := 0; start+windowSize <= len(downsampledSample); start += hopSize {
end := start + windowSize
bin := make([]float64, freqBinSize)
copy(bin, downsampledSample[start:end])
frame := make([]float64, windowSize)
copy(frame, downsampledSample[start:end])
// Apply Hamming window
// Apply window
for j := range window {
bin[j] *= window[j]
frame[j] *= window[j]
}
spectrogram[i] = FFT(bin)
// Perform FFT
fftResult := FFT(frame)
// Convert complex spectrum to magnitude spectrum
magnitude := make([]float64, len(fftResult)/2)
for j := range magnitude {
magnitude[j] = cmplx.Abs(fftResult[j])
}
spectrogram = append(spectrogram, magnitude)
}
return spectrogram, nil
@ -107,43 +119,47 @@ func Downsample(input []float64, originalSampleRate, targetSampleRate int) ([]fl
return resampled, nil
}
// Peak represents a significant point in the spectrogram.
type Peak struct {
Time float64
Freq complex128
Freq float64 // Frequency in Hz
Time float64 // Time in seconds
}
// ExtractPeaks analyzes a spectrogram and extracts significant peaks in the frequency domain over time.
func ExtractPeaks(spectrogram [][]complex128, audioDuration float64) []Peak {
func ExtractPeaks(spectrogram [][]float64, audioDuration float64, sampleRate int) []Peak {
if len(spectrogram) < 1 {
return []Peak{}
}
type maxies struct {
maxMag float64
maxFreq complex128
freqIdx int
}
bands := []struct{ min, max int }{{0, 10}, {10, 20}, {20, 40}, {40, 80}, {80, 160}, {160, 512}}
bands := []struct{ min, max int }{
{0, 10}, {10, 20}, {20, 40}, {40, 80}, {80, 160}, {160, 512},
}
var peaks []Peak
binDuration := audioDuration / float64(len(spectrogram))
frameDuration := audioDuration / float64(len(spectrogram))
for binIdx, bin := range spectrogram {
// Calculate frequency resolution (Hz per bin)
effectiveSampleRate := float64(sampleRate) / float64(dspRatio)
freqResolution := effectiveSampleRate / float64(windowSize)
for frameIdx, frame := range spectrogram {
var maxMags []float64
var maxFreqs []complex128
var freqIndices []float64
var freqIndices []int
binBandMaxies := []maxies{}
for _, band := range bands {
var maxx maxies
var maxMag float64
for idx, freq := range bin[band.min:band.max] {
magnitude := cmplx.Abs(freq)
if magnitude > maxMag {
maxMag = magnitude
for idx, mag := range frame[band.min:band.max] {
if mag > maxMag {
maxMag = mag
freqIdx := band.min + idx
maxx = maxies{magnitude, freq, freqIdx}
maxx = maxies{mag, freqIdx}
}
}
binBandMaxies = append(binBandMaxies, maxx)
@ -151,8 +167,7 @@ func ExtractPeaks(spectrogram [][]complex128, audioDuration float64) []Peak {
for _, value := range binBandMaxies {
maxMags = append(maxMags, value.maxMag)
maxFreqs = append(maxFreqs, value.maxFreq)
freqIndices = append(freqIndices, float64(value.freqIdx))
freqIndices = append(freqIndices, value.freqIdx)
}
// Calculate the average magnitude
@ -160,17 +175,15 @@ func ExtractPeaks(spectrogram [][]complex128, audioDuration float64) []Peak {
for _, max := range maxMags {
maxMagsSum += max
}
avg := maxMagsSum / float64(len(maxFreqs)) // * coefficient
avg := maxMagsSum / float64(len(maxMags))
// Add peaks that exceed the average magnitude
for i, value := range maxMags {
if value > avg {
peakTimeInBin := freqIndices[i] * binDuration / float64(len(bin))
peakTime := float64(frameIdx) * frameDuration
peakFreq := float64(freqIndices[i]) * freqResolution
// Calculate the absolute time of the peak
peakTime := float64(binIdx)*binDuration + peakTimeInBin
peaks = append(peaks, Peak{Time: peakTime, Freq: maxFreqs[i]})
peaks = append(peaks, Peak{Time: peakTime, Freq: peakFreq})
}
}
}

View file

@ -2,9 +2,7 @@ package spotify
import (
"context"
"errors"
"fmt"
"io"
"log/slog"
"os"
"os/exec"
@ -13,17 +11,15 @@ import (
"song-recognition/db"
"song-recognition/shazam"
"song-recognition/utils"
"song-recognition/wav"
"strings"
"sync"
"time"
"github.com/fatih/color"
"github.com/kkdai/youtube/v2"
"github.com/mdobak/go-xerrors"
)
const DELETE_SONG_FILE = false
const DELETE_SONG_FILE = false // Set true to delete the song file after fingerprinting
var yellow = color.New(color.FgYellow)
@ -135,9 +131,9 @@ func dlTrack(tracks []Track, path string) (int, error) {
trackCopy.Title, trackCopy.Artist = correctFilename(trackCopy.Title, trackCopy.Artist)
fileName := fmt.Sprintf("%s - %s", trackCopy.Title, trackCopy.Artist)
filePath := filepath.Join(path, fileName+".m4a")
filePath := filepath.Join(path, fileName)
err = downloadYTaudio(ytID, path, filePath)
filePath, err = downloadYTaudio(ytID, filePath)
if err != nil {
logMessage := fmt.Sprintf("'%s' by '%s' could not be downloaded", trackCopy.Title, trackCopy.Artist)
logger.ErrorContext(ctx, logMessage, slog.Any("error", xerrors.New(err)))
@ -151,12 +147,10 @@ func dlTrack(tracks []Track, path string) (int, error) {
return
}
utils.DeleteFile(filepath.Join(path, fileName+".m4a"))
wavFilePath := filepath.Join(path, fileName+".wav")
if err := addTags(wavFilePath, *trackCopy); err != nil {
logMessage := fmt.Sprintf("Error adding tags: %s", filePath+".wav")
logMessage := fmt.Sprintf("Error adding tags: %s", wavFilePath)
logger.ErrorContext(ctx, logMessage, slog.Any("error", xerrors.New(err)))
return
@ -186,65 +180,6 @@ func dlTrack(tracks []Track, path string) (int, error) {
}
/* github.com/kkdai/youtube */
func downloadYTaudio(id, path, filePath string) error {
logger := utils.GetLogger()
dir, err := os.Stat(path)
if err != nil {
logger.Error("Error accessing path", slog.Any("error", err))
return err
}
if !dir.IsDir() {
err := errors.New("the path is not valid (not a dir)")
logger.Error("Invalid directory path", slog.Any("error", err))
return err
}
client := youtube.Client{}
video, err := client.GetVideo(id)
if err != nil {
logger.Error("Error getting YouTube video", slog.Any("error", err))
return err
}
/*
itag code: 140, container: m4a, content: audio, bitrate: 128k
change the FindByItag parameter to 139 if you want smaller files (but with a bitrate of 48k)
https://gist.github.com/sidneys/7095afe4da4ae58694d128b1034e01e2
*/
formats := video.Formats.Itag(140)
/* in some cases, when attempting to download the audio
using the library github.com/kkdai/youtube,
the download fails (and shows the file size as 0 bytes)
until the second or third attempt. */
var fileSize int64
file, err := os.Create(filePath)
if err != nil {
logger.Error("Error creating file", slog.Any("error", err))
return err
}
for fileSize == 0 {
stream, _, err := client.GetStream(video, &formats[0])
if err != nil {
logger.Error("Error getting stream", slog.Any("error", err))
return err
}
if _, err = io.Copy(file, stream); err != nil {
logger.Error("Error copying stream to file", slog.Any("error", err))
return err
}
fileSize, _ = GetFileSize(filePath)
}
defer file.Close()
return nil
}
func addTags(file string, track Track) error {
logger := utils.GetLogger()
// Create a temporary file name by appending "2" before the extension
@ -255,7 +190,7 @@ func addTags(file string, track Track) error {
tempFile = baseName + "2" + ".wav" // Temporary filename ('/path/to/title - artist2.wav')
}
// Execute FFmpeg command to add metadata tags
// FFmpeg command to add metadata tags
cmd := exec.Command(
"ffmpeg",
"-i", file, // Input file path
@ -291,40 +226,20 @@ func ProcessAndSaveSong(songFilePath, songTitle, songArtist, ytID string) error
}
defer dbclient.Close()
wavFilePath, err := wav.ConvertToWAV(songFilePath, 1)
if err != nil {
logger.Error("Failed to convert to WAV", slog.Any("error", err))
return err
}
wavInfo, err := wav.ReadWavInfo(wavFilePath)
if err != nil {
logger.Error("Failed to read WAV info", slog.Any("error", err))
return err
}
samples, err := wav.WavBytesToSamples(wavInfo.Data)
if err != nil {
logger.Error("Error converting WAV bytes to samples", slog.Any("error", err))
return fmt.Errorf("error converting wav bytes to float64: %v", err)
}
spectro, err := shazam.Spectrogram(samples, wavInfo.SampleRate)
if err != nil {
logger.Error("Error creating spectrogram", slog.Any("error", err))
return fmt.Errorf("error creating spectrogram: %v", err)
}
songID, err := dbclient.RegisterSong(songTitle, songArtist, ytID)
if err != nil {
logger.Error("Failed to register song", slog.Any("error", err))
return err
return fmt.Errorf("error registering song '%s' by '%s': %v", songTitle, songArtist, err)
}
peaks := shazam.ExtractPeaks(spectro, wavInfo.Duration)
fingerprints := shazam.Fingerprint(peaks, songID)
fingerprint, err := shazam.FingerprintAudio(songFilePath, songID)
if err != nil {
dbclient.DeleteSongByID(songID)
logger.Error("Failed to create fingerprint", slog.String("wavFilePath", songFilePath))
return fmt.Errorf("error generating fingerprint for %s by %s", songTitle, songArtist)
}
err = dbclient.StoreFingerprints(fingerprints)
err = dbclient.StoreFingerprints(fingerprint)
if err != nil {
dbclient.DeleteSongByID(songID)
logger.Error("Failed to store fingerprints", slog.Any("error", err))

View file

@ -4,6 +4,11 @@ import (
"context"
"fmt"
"log"
"log/slog"
"os"
"os/exec"
"path/filepath"
"song-recognition/utils"
"errors"
"io"
@ -215,3 +220,37 @@ func ytSearch(searchTerm string, limit int) (results []*SearchResult, err error)
return results, nil
}
// downloadYTaudio downloads audio from a YouTube video using yt-dlp command line tool.
func downloadYTaudio(videoURL, outputFilePath string) (string, error) {
logger := utils.GetLogger()
dir := filepath.Dir(outputFilePath)
if stat, err := os.Stat(dir); err != nil || !stat.IsDir() {
logger.Error("Invalid directory for output file", slog.Any("error", err))
return "", errors.New("output directory does not exist or is not a directory")
}
_, err := exec.LookPath("yt-dlp")
if err != nil {
logger.Error("yt-dlp not found in PATH", slog.Any("error", err))
return "", errors.New("yt-dlp is not installed or not in PATH")
}
audioFmt := "wav"
cmd := exec.Command(
"yt-dlp",
"-f", "bestaudio",
"--extract-audio",
"--audio-format", audioFmt,
"-o", outputFilePath,
videoURL,
)
output, err := cmd.CombinedOutput()
if err != nil {
logger.Error("yt-dlp command failed", slog.String("output", string(output)), slog.Any("error", err))
return "", err
}
return outputFilePath + "." + audioFmt, nil
}

View file

@ -27,3 +27,9 @@ func GetEnv(key string, fallback ...string) string {
}
return ""
}
func ExtendMap[K comparable, V any](dest, src map[K]V) {
for k, v := range src {
dest[k] = v
}
}

View file

@ -1,64 +0,0 @@
//go:build js && wasm
// +build js,wasm
package main
import (
"song-recognition/shazam"
"song-recognition/utils"
"syscall/js"
)
func generateFingerprint(this js.Value, args []js.Value) interface{} {
if len(args) < 2 {
return js.ValueOf(map[string]interface{}{
"error": 1,
"data": "Expected audio array and sample rate",
})
}
if args[0].Type() != js.TypeObject || args[1].Type() != js.TypeNumber {
return js.ValueOf(map[string]interface{}{
"error": 2,
"data": "Invalid argument types; Expected audio array and samplerate (type: int)",
})
}
inputArray := args[0]
sampleRate := args[1].Int()
audioData := make([]float64, inputArray.Length())
for i := 0; i < inputArray.Length(); i++ {
audioData[i] = inputArray.Index(i).Float()
}
spectrogram, err := shazam.Spectrogram(audioData, sampleRate)
if err != nil {
return js.ValueOf(map[string]interface{}{
"error": 3,
"data": "Error generating spectrogram: " + err.Error(),
})
}
peaks := shazam.ExtractPeaks(spectrogram, float64(len(audioData)/sampleRate))
fingerprint := shazam.Fingerprint(peaks, utils.GenerateUniqueID())
fingerprintArray := []interface{}{}
for address, couple := range fingerprint {
entry := map[string]interface{}{
"address": address,
"anchorTime": couple.AnchorTimeMs,
}
fingerprintArray = append(fingerprintArray, entry)
}
return js.ValueOf(map[string]interface{}{
"error": 0,
"data": fingerprintArray,
})
}
func main() {
js.Global().Set("generateFingerprint", js.FuncOf(generateFingerprint))
select {}
}

View file

@ -6,21 +6,33 @@ import (
"os/exec"
"path/filepath"
"song-recognition/utils"
"strconv"
"strings"
)
// ConvertToWAV converts an input audio file to WAV format with specified channels.
func ConvertToWAV(inputFilePath string, channels int) (wavFilePath string, err error) {
func ConvertToWAV(inputFilePath string) (wavFilePath string, err error) {
_, err = os.Stat(inputFilePath)
if err != nil {
return "", fmt.Errorf("input file does not exist: %v", err)
}
if channels < 1 || channels > 2 {
channels = 1
to_stereoStr := utils.GetEnv("FINGERPRINT_STEREO", "false")
to_stereo, err := strconv.ParseBool(to_stereoStr)
if err != nil {
return "", fmt.Errorf("failed to convert env variable (%s) to bool: %v", "FINGERPRINT_STEREO", err)
}
channels := 1
if to_stereo {
channels = 2
}
fileExt := filepath.Ext(inputFilePath)
if fileExt != ".wav" {
defer os.Remove(inputFilePath)
}
outputFile := strings.TrimSuffix(inputFilePath, fileExt) + ".wav"
// Output file may already exists. If it does FFmpeg will fail as

View file

@ -94,49 +94,83 @@ func WriteWavFile(filename string, data []byte, sampleRate int, channels int, bi
return err
}
// WavInfo defines a struct containing information extracted from the WAV header
type WavInfo struct {
Channels int
SampleRate int
Data []byte
Duration float64
Channels int
SampleRate int
Duration float64
Data []byte
LeftChannelSamples []float64
RightChannelSamples []float64
}
// ReadWavInfo reads a 16-bit PCM WAV file and returns its metadata and audio samples.
// Supports mono and stereo files. Note that it only supports 16-bit PCM format.
func ReadWavInfo(filename string) (*WavInfo, error) {
data, err := ioutil.ReadFile(filename)
if err != nil {
return nil, err
}
if len(data) < 44 {
return nil, errors.New("invalid WAV file size (too small)")
}
// Read header chunks
// Parse PCM header to extract metadata
// https://en.wikipedia.org/wiki/WAV#WAV_file_header
var header WavHeader
err = binary.Read(bytes.NewReader(data[:44]), binary.LittleEndian, &header)
if err != nil {
if err := binary.Read(bytes.NewReader(data[:44]), binary.LittleEndian, &header); err != nil {
return nil, err
}
if string(header.ChunkID[:]) != "RIFF" || string(header.Format[:]) != "WAVE" || header.AudioFormat != 1 {
if string(header.ChunkID[:]) != "RIFF" ||
string(header.Format[:]) != "WAVE" ||
header.AudioFormat != 1 {
return nil, errors.New("invalid WAV header format")
}
// Extract information
info := &WavInfo{
Channels: int(header.NumChannels),
SampleRate: int(header.SampleRate),
Data: data[44:],
}
// Calculate audio duration (assuming data contains PCM data)
if header.BitsPerSample == 16 {
info.Duration = float64(len(info.Data)) / float64(int(header.NumChannels)*2*int(header.SampleRate))
} else {
return nil, errors.New("unsupported bits per sample format")
if header.BitsPerSample != 16 {
return nil, errors.New("unsupported bitspersample (expect 16bit PCM)")
}
sampleCount := len(info.Data) / 2
int16Buf := make([]int16, sampleCount)
if err := binary.Read(bytes.NewReader(info.Data), binary.LittleEndian, int16Buf); err != nil {
return nil, err
}
const scale = 1.0 / 32768.0 // 16bit normalisation factor
switch header.NumChannels {
case 1:
left := make([]float64, sampleCount)
for i, s := range int16Buf {
left[i] = float64(s) * scale
}
info.LeftChannelSamples = left
case 2:
frameCount := sampleCount / 2
left := make([]float64, frameCount)
right := make([]float64, frameCount)
for i := 0; i < frameCount; i++ {
left[i] = float64(int16Buf[2*i]) * scale
right[i] = float64(int16Buf[2*i+1]) * scale
}
info.LeftChannelSamples = left
info.RightChannelSamples = right
default:
return nil, errors.New("unsupported channel count (only mono/stereo)")
}
// Compute audio duration in seconds
info.Duration = float64(sampleCount) /
(float64(header.NumChannels) * float64(header.SampleRate))
return info, nil
}

22
wasm/build.sh Executable file
View file

@ -0,0 +1,22 @@
#!/bin/bash
# Build script for WASM fingerprint generator
echo "Building WASM module..."
export GOOS=js
export GOARCH=wasm
go build -o fingerprint.wasm wasm_main.go
if [ $? -eq 0 ]; then
echo "✓ WASM build successful: fingerprint.wasm"
cp fingerprint.wasm ../client/public/
echo "✓ Copied fingerprint.wasm to client/public/"
else
echo "x WASM build failed"
cd ../wasm
exit 1
fi

25
wasm/go.mod Normal file
View file

@ -0,0 +1,25 @@
module wasm-fingerprint
go 1.23.0
toolchain go1.24.3
require song-recognition v0.0.0-00010101000000-000000000000
require (
github.com/golang/snappy v0.0.4 // indirect
github.com/klauspost/compress v1.17.6 // indirect
github.com/mattn/go-sqlite3 v1.14.22 // indirect
github.com/mdobak/go-xerrors v0.3.1 // indirect
github.com/montanaflynn/stats v0.7.1 // indirect
github.com/xdg-go/pbkdf2 v1.0.0 // indirect
github.com/xdg-go/scram v1.1.2 // indirect
github.com/xdg-go/stringprep v1.0.4 // indirect
github.com/youmark/pkcs8 v0.0.0-20201027041543-1326539a0a0a // indirect
go.mongodb.org/mongo-driver v1.14.0 // indirect
golang.org/x/crypto v0.33.0 // indirect
golang.org/x/sync v0.11.0 // indirect
golang.org/x/text v0.22.0 // indirect
)
replace song-recognition => ../server

59
wasm/go.sum Normal file
View file

@ -0,0 +1,59 @@
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM=
github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/google/go-cmp v0.5.2 h1:X2ev0eStA3AbceY54o37/0PQ/UWqKEiiO2dKL5OPaFM=
github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/klauspost/compress v1.17.6 h1:60eq2E/jlfwQXtvZEeBUYADs+BwKBWURIY+Gj2eRGjI=
github.com/klauspost/compress v1.17.6/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM=
github.com/mattn/go-sqlite3 v1.14.22 h1:2gZY6PC6kBnID23Tichd1K+Z0oS6nE/XwU+Vz/5o4kU=
github.com/mattn/go-sqlite3 v1.14.22/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
github.com/mdobak/go-xerrors v0.3.1 h1:XfqaLMNN5T4qsHSlLHGJ35f6YlDTVeINSYYeeuK4VpQ=
github.com/mdobak/go-xerrors v0.3.1/go.mod h1:nIR+HMAJuj/uNqyp5+MTN6PJ7ymuIJq3UVs9QCgAHbY=
github.com/montanaflynn/stats v0.7.1 h1:etflOAAHORrCC44V+aR6Ftzort912ZU+YLiSTuV8eaE=
github.com/montanaflynn/stats v0.7.1/go.mod h1:etXPPgVO6n31NxCd9KQUMvCM+ve0ruNzt6R8Bnaayow=
github.com/xdg-go/pbkdf2 v1.0.0 h1:Su7DPu48wXMwC3bs7MCNG+z4FhcyEuz5dlvchbq0B0c=
github.com/xdg-go/pbkdf2 v1.0.0/go.mod h1:jrpuAogTd400dnrH08LKmI/xc1MbPOebTwRqcT5RDeI=
github.com/xdg-go/scram v1.1.2 h1:FHX5I5B4i4hKRVRBCFRxq1iQRej7WO3hhBuJf+UUySY=
github.com/xdg-go/scram v1.1.2/go.mod h1:RT/sEzTbU5y00aCK8UOx6R7YryM0iF1N2MOmC3kKLN4=
github.com/xdg-go/stringprep v1.0.4 h1:XLI/Ng3O1Atzq0oBs3TWm+5ZVgkq2aqdlvP9JtoZ6c8=
github.com/xdg-go/stringprep v1.0.4/go.mod h1:mPGuuIYwz7CmR2bT9j4GbQqutWS1zV24gijq1dTyGkM=
github.com/youmark/pkcs8 v0.0.0-20201027041543-1326539a0a0a h1:fZHgsYlfvtyqToslyjUt3VOPF4J7aK/3MPcK7xp3PDk=
github.com/youmark/pkcs8 v0.0.0-20201027041543-1326539a0a0a/go.mod h1:ul22v+Nro/R083muKhosV54bj5niojjWZvU8xrevuH4=
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
go.mongodb.org/mongo-driver v1.14.0 h1:P98w8egYRjYe3XDjxhYJagTokP/H6HzlsnojRgZRd80=
go.mongodb.org/mongo-driver v1.14.0/go.mod h1:Vzb0Mk/pa7e6cWw85R4F/endUC3u0U9jGcNU603k65c=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20200302210943-78000ba7a073/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/crypto v0.33.0 h1:IOBPskki6Lysi0lo9qQvbxiQ+FvsCC/YWOecCHAixus=
golang.org/x/crypto v0.33.0/go.mod h1:bVdXmD7IV/4GdElGPozy6U7lWdRXA4qyRVGJV57uQ5M=
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.11.0 h1:GGz8+XQP4FvTTrjZPzNKTMFtSXH80RAzG+5ghFPgK9w=
golang.org/x/sync v0.11.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ=
golang.org/x/text v0.22.0 h1:bofq7m3/HAFvbF51jz3Q9wLg3jkvSPuiZu/pD1XwgtM=
golang.org/x/text v0.22.0/go.mod h1:YRoo4H8PVmsu+E3Ou7cqLVH8oXWIHVoX0jqUWALQhfY=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=

111
wasm/wasm_main.go Normal file
View file

@ -0,0 +1,111 @@
//go:build js && wasm
// +build js,wasm
package main
import (
"song-recognition/models"
"song-recognition/shazam"
"song-recognition/utils"
"syscall/js"
)
// generateFingerprint takes audio data from the frontend and generates fingerprints
// Arguments: [audioArray, sampleRate, channels]
// Returns: { error: number, data: fingerprintArray or error message }
func generateFingerprint(this js.Value, args []js.Value) interface{} {
if len(args) < 3 {
return js.ValueOf(map[string]interface{}{
"error": 1,
"data": "Expected audio array, sample rate, and number of channels",
})
}
if args[0].Type() != js.TypeObject || args[1].Type() != js.TypeNumber {
return js.ValueOf(map[string]interface{}{
"error": 2,
"data": "Invalid argument types; Expected audio array and samplerate (type: int)",
})
}
channels := args[2].Int()
if args[2].Type() != js.TypeNumber || (channels != 1 && channels != 2) {
return js.ValueOf(map[string]interface{}{
"error": 2,
"data": "Invalid number of channels; expected 1 or 2",
})
}
inputArray := args[0]
sampleRate := args[1].Int()
audioData := make([]float64, inputArray.Length())
for i := 0; i < inputArray.Length(); i++ {
audioData[i] = inputArray.Index(i).Float()
}
fingerprint := make(map[uint32]models.Couple)
var leftChannel, rightChannel []float64
if channels == 1 {
leftChannel = audioData
spectrogram, err := shazam.Spectrogram(audioData, sampleRate)
if err != nil {
return js.ValueOf(map[string]interface{}{
"error": 3,
"data": "Error generating spectrogram: " + err.Error(),
})
}
peaks := shazam.ExtractPeaks(spectrogram, float64(len(audioData))/float64(sampleRate), sampleRate)
fingerprint = shazam.Fingerprint(peaks, utils.GenerateUniqueID())
} else {
for i := 0; i < len(audioData); i += 2 {
leftChannel = append(leftChannel, audioData[i])
rightChannel = append(rightChannel, audioData[i+1])
}
// LEFT
spectrogram, err := shazam.Spectrogram(leftChannel, sampleRate)
if err != nil {
return js.ValueOf(map[string]interface{}{
"error": 3,
"data": "Error generating spectrogram: " + err.Error(),
})
}
peaks := shazam.ExtractPeaks(spectrogram, float64(len(leftChannel))/float64(sampleRate), sampleRate)
utils.ExtendMap(fingerprint, shazam.Fingerprint(peaks, utils.GenerateUniqueID()))
// RIGHT
spectrogram, err = shazam.Spectrogram(rightChannel, sampleRate)
if err != nil {
return js.ValueOf(map[string]interface{}{
"error": 3,
"data": "Error generating spectrogram: " + err.Error(),
})
}
peaks = shazam.ExtractPeaks(spectrogram, float64(len(rightChannel))/float64(sampleRate), sampleRate)
utils.ExtendMap(fingerprint, shazam.Fingerprint(peaks, utils.GenerateUniqueID()))
}
fingerprintArray := []interface{}{}
for address, couple := range fingerprint {
entry := map[string]interface{}{
"address": address,
"anchorTime": couple.AnchorTimeMs,
}
fingerprintArray = append(fingerprintArray, entry)
}
return js.ValueOf(map[string]interface{}{
"error": 0,
"data": fingerprintArray,
})
}
func main() {
js.Global().Set("generateFingerprint", js.FuncOf(generateFingerprint))
js.Global().Call("dispatchEvent", js.Global().Get("Event").New("wasmReady"))
select {}
}