mirror of
https://github.com/cgzirim/seek-tune.git
synced 2025-12-19 01:44:19 +00:00
Merge pull request #44 from cgzirim/development
Fix Critical Audio Fingerprinting Algorithm Bugs
This commit is contained in:
commit
e825099e17
26 changed files with 831 additions and 360 deletions
104
.dockerignore
104
.dockerignore
|
|
@ -1,28 +1,88 @@
|
||||||
# Binaries for programs and plugins
|
# Git
|
||||||
*.exe
|
.git
|
||||||
*.ogg
|
.gitignore
|
||||||
*.m4a
|
.gitattributes
|
||||||
*.zip
|
|
||||||
*.exe~
|
|
||||||
*.dll
|
|
||||||
*.so
|
|
||||||
*.dylib
|
|
||||||
|
|
||||||
# Test binary, built with `go test -c`
|
# Documentation
|
||||||
*.test
|
*.md
|
||||||
|
!README.md
|
||||||
|
LICENSE
|
||||||
|
|
||||||
# Output of the go coverage tool, specifically when used with LiteIDE
|
# IDE
|
||||||
*.out
|
|
||||||
|
|
||||||
# Dependency directories (remove the comment below to include it)
|
|
||||||
# vendor/
|
|
||||||
|
|
||||||
# Go workspace file
|
|
||||||
go.work
|
|
||||||
**/songs
|
|
||||||
.vscode
|
.vscode
|
||||||
|
.idea
|
||||||
|
*.swp
|
||||||
|
*.swo
|
||||||
|
*~
|
||||||
|
|
||||||
package-lock.json
|
# OS
|
||||||
|
.DS_Store
|
||||||
|
Thumbs.db
|
||||||
|
|
||||||
*sqlite3
|
# Node
|
||||||
|
client/node_modules
|
||||||
|
client/.env.local
|
||||||
|
client/.env.development.local
|
||||||
|
client/.env.test.local
|
||||||
|
client/.env.production.local
|
||||||
|
client/build
|
||||||
|
client/coverage
|
||||||
|
client/npm-debug.log*
|
||||||
|
client/yarn-debug.log*
|
||||||
|
client/yarn-error.log*
|
||||||
|
|
||||||
|
# Go
|
||||||
|
server/seek-tune
|
||||||
|
server/*.exe
|
||||||
|
server/*.test
|
||||||
|
server/*.out
|
||||||
|
server/vendor/
|
||||||
|
|
||||||
|
# Application data (don't copy into image)
|
||||||
|
server/songs/**
|
||||||
|
server/recordings/**
|
||||||
|
server/snippets/**
|
||||||
|
server/tmp/**
|
||||||
|
server/db/*.sqlite3
|
||||||
|
server/db/*.db
|
||||||
|
|
||||||
|
# Audio files
|
||||||
|
*.mp3
|
||||||
|
*.wav
|
||||||
|
*.m4a
|
||||||
|
*.ogg
|
||||||
|
*.flac
|
||||||
|
*.aac
|
||||||
|
|
||||||
|
# Archives
|
||||||
|
*.zip
|
||||||
|
*.tar
|
||||||
|
*.gz
|
||||||
|
*.rar
|
||||||
|
|
||||||
|
# Environment
|
||||||
.env
|
.env
|
||||||
|
.env.*
|
||||||
|
!.env.example
|
||||||
|
|
||||||
|
# CI/CD
|
||||||
|
.github
|
||||||
|
.gitlab-ci.yml
|
||||||
|
.travis.yml
|
||||||
|
|
||||||
|
# Docker
|
||||||
|
docker-compose*.yml
|
||||||
|
!docker-compose.yml
|
||||||
|
Dockerfile*
|
||||||
|
!Dockerfile
|
||||||
|
|
||||||
|
# WASM (already built separately if needed)
|
||||||
|
wasm/fingerprint.wasm
|
||||||
|
wasm/go.sum
|
||||||
|
|
||||||
|
# Scripts
|
||||||
|
scripts/
|
||||||
|
appspec.yml
|
||||||
|
|
||||||
|
# Logs
|
||||||
|
*.log
|
||||||
|
|
@ -1,9 +0,0 @@
|
||||||
DB_TYPE=mongo
|
|
||||||
DB_USER=user
|
|
||||||
DB_PASS=password
|
|
||||||
DB_NAME=seek-tune
|
|
||||||
DB_HOST=192.168.0.1
|
|
||||||
DB_PORT=27017
|
|
||||||
REACT_APP_BACKEND_URL=http://localhost:5000
|
|
||||||
SPOTIFY_CLIENT_ID=yourclientid
|
|
||||||
SPOTIFY_CLIENT_SECRET=yoursecret
|
|
||||||
2
.gitignore
vendored
2
.gitignore
vendored
|
|
@ -29,3 +29,5 @@ go.work
|
||||||
package-lock.json
|
package-lock.json
|
||||||
*sqlite3
|
*sqlite3
|
||||||
.env
|
.env
|
||||||
|
|
||||||
|
token.json
|
||||||
56
Dockerfile
56
Dockerfile
|
|
@ -1,33 +1,63 @@
|
||||||
# build react
|
# Build React frontend
|
||||||
FROM node:20-alpine AS build_react_stage
|
FROM node:20-alpine AS build_react_stage
|
||||||
|
|
||||||
RUN mkdir -p /home/react
|
WORKDIR /app/client
|
||||||
WORKDIR /home/react
|
|
||||||
|
|
||||||
COPY client/package.json ./
|
COPY client/package*.json ./
|
||||||
RUN npm install
|
RUN npm ci --only=production && npm cache clean --force
|
||||||
|
|
||||||
COPY client/ ./
|
COPY client/ ./
|
||||||
ARG REACT_APP_BACKEND_URL
|
ARG REACT_APP_BACKEND_URL
|
||||||
ENV REACT_APP_BACKEND_URL=${REACT_APP_BACKEND_URL}
|
ENV REACT_APP_BACKEND_URL=${REACT_APP_BACKEND_URL}
|
||||||
RUN npm run build
|
RUN npm run build
|
||||||
|
|
||||||
# build go
|
# Build Go backend
|
||||||
FROM golang:1.21.6
|
FROM golang:1.24-alpine AS build_go_stage
|
||||||
|
|
||||||
WORKDIR /home/seek-tune
|
RUN apk add --no-cache git ca-certificates tzdata gcc musl-dev
|
||||||
|
|
||||||
|
WORKDIR /app/server
|
||||||
|
|
||||||
COPY server/go.mod server/go.sum ./
|
COPY server/go.mod server/go.sum ./
|
||||||
RUN go mod download
|
RUN go mod download && go mod verify
|
||||||
|
|
||||||
COPY server/ ./
|
COPY server/ ./
|
||||||
ENV ENV=production
|
RUN go build -ldflags="-w -s" -o seek-tune
|
||||||
|
|
||||||
|
# Final runtime image
|
||||||
|
FROM alpine:latest
|
||||||
|
|
||||||
|
# Install runtime dependencies
|
||||||
|
RUN apk add --no-cache \
|
||||||
|
ca-certificates \
|
||||||
|
tzdata \
|
||||||
|
ffmpeg \
|
||||||
|
python3 \
|
||||||
|
py3-pip \
|
||||||
|
&& pip3 install --no-cache-dir yt-dlp --break-system-packages
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
COPY --from=build_go_stage /app/server/seek-tune .
|
||||||
|
|
||||||
RUN mkdir -p static
|
RUN mkdir -p static
|
||||||
COPY --from=build_react_stage /home/react/build static
|
COPY --from=build_react_stage /app/client/build ./static
|
||||||
|
|
||||||
RUN go build -o seek-tune
|
RUN mkdir -p db songs recordings snippets tmp && \
|
||||||
|
chmod -R 755 db songs recordings snippets tmp
|
||||||
|
|
||||||
|
ENV ENV=production
|
||||||
|
|
||||||
EXPOSE 5000
|
EXPOSE 5000
|
||||||
|
|
||||||
CMD [ "/home/seek-tune/seek-tune", "serve" ]
|
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
|
||||||
|
CMD wget --no-verbose --tries=1 --spider http://localhost:5000/ || exit 1
|
||||||
|
|
||||||
|
# Run as non-root user for security
|
||||||
|
RUN addgroup -g 1001 -S appuser && \
|
||||||
|
adduser -u 1001 -S appuser -G appuser && \
|
||||||
|
chown -R appuser:appuser /app
|
||||||
|
|
||||||
|
USER appuser
|
||||||
|
|
||||||
|
CMD ["./seek-tune", "serve", "http", "5000"]
|
||||||
41
README.md
41
README.md
|
|
@ -8,21 +8,17 @@
|
||||||
</a>
|
</a>
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
<p align="center"><a href="https://drive.google.com/file/d/1I2esH2U4DtXHsNgYbUi4OL-ukV5i_1PI/view" target="_blank">Demo in Video</a></p>
|
<p align="center"><a href="https://drive.google.com/file/d/1I2esH2U4DtXHsNgYbUi4OL-ukV5i_1PI/view" target="_blank">Demo in Video</a> | <a href="https://www.youtube.com/watch?v=a0CVCcb0RJM" target="_blank">How it was made (YouTube)</a></p>
|
||||||
|
|
||||||
## Description 🎼
|
## Description 🎼
|
||||||
SeekTune is an implementation of Shazam's song recognition algorithm based on insights from these [resources](#resources--card_file_box). It integrates Spotify and YouTube APIs to find and download songs.
|
SeekTune is an implementation of Shazam's song recognition algorithm based on insights from these [resources](#resources--card_file_box). It integrates Spotify and YouTube APIs to find and download songs.
|
||||||
|
|
||||||
[//]: # (## Current Limitations
|
|
||||||
While the algorithm works excellently in matching a song with its exact file, it doesn't always find the right match from a recording. However, this project is still a work in progress. I'm hopeful about making it work, but I could definitely use some help :slightly_smiling_face:.
|
|
||||||
Additionally, it currently only supports song files in WAV format.
|
|
||||||
)
|
|
||||||
|
|
||||||
## Installation :desktop_computer:
|
## Installation :desktop_computer:
|
||||||
### Prerequisites
|
### Prerequisites
|
||||||
- Golang: [Install Golang](https://golang.org/dl/)
|
- Golang: [Install Golang](https://golang.org/dl/)
|
||||||
- FFmpeg: [Install FFmpeg](https://ffmpeg.org/download.html)
|
- FFmpeg: [Install FFmpeg](https://ffmpeg.org/download.html)
|
||||||
- NPM: To run the client (frontend).
|
- NPM: [Install Node](https://nodejs.org/en/download)
|
||||||
|
- YT-DLP: [Install YT-DLP](https://github.com/yt-dlp/yt-dlp/wiki/Installation)
|
||||||
|
|
||||||
### Steps
|
### Steps
|
||||||
📦 Clone the repository:
|
📦 Clone the repository:
|
||||||
|
|
@ -42,27 +38,17 @@ Prerequisites: [Docker](https://docs.docker.com/get-docker/) and [Docker Compose
|
||||||
docker-compose down
|
docker-compose down
|
||||||
```
|
```
|
||||||
|
|
||||||
#### 🎧 Spotify API
|
#### 🎧 Spotify API Setup
|
||||||
|
|
||||||
To access Spotify metadata, the project now uses the official [Spotify Web API](https://developer.spotify.com/documentation/web-api/). This requires creating a developer application and retrieving a client ID and client secret.
|
1. Get credentials: Follow the [official getting started guide](https://developer.spotify.com/documentation/web-api/tutorials/getting-started) to create a Spotify app and obtain your **Client ID** and **Client Secret**.
|
||||||
|
|
||||||
Follow the [official getting started guide](https://developer.spotify.com/documentation/web-api/tutorials/getting-started#request-an-access-token) to:
|
2. Configure: Create a `.env` file in the `server` directory:
|
||||||
|
```bash
|
||||||
|
SPOTIFY_CLIENT_ID=your-client-id
|
||||||
|
SPOTIFY_CLIENT_SECRET=your-client-secret
|
||||||
|
```
|
||||||
|
|
||||||
1. Create a Spotify developer app.
|
The app will automatically fetch and cache access tokens as needed.
|
||||||
2. Copy your **Client ID** and **Client Secret**.
|
|
||||||
|
|
||||||
##### Setting up Credentials
|
|
||||||
Instead of using a credentials.json file, the application now reads these values from environment variables.
|
|
||||||
|
|
||||||
Create a .env file in the server directory with the following content:
|
|
||||||
|
|
||||||
```
|
|
||||||
SPOTIFY_CLIENT_ID=your-client-id
|
|
||||||
SPOTIFY_CLIENT_SECRET=your-client-secret
|
|
||||||
```
|
|
||||||
|
|
||||||
Make sure this .env file is loaded into your environment before running the server.
|
|
||||||
The application will automatically read this file to fetch and cache access tokens. If the token is expired or missing, a new one will be requested.
|
|
||||||
|
|
||||||
#### 💻 Set Up Natively
|
#### 💻 Set Up Natively
|
||||||
Install dependencies for the backend
|
Install dependencies for the backend
|
||||||
|
|
@ -109,7 +95,12 @@ go run *.go find <path-to-wav-file>
|
||||||
```
|
```
|
||||||
#### ▸ Delete fingerprints and songs 🗑️
|
#### ▸ Delete fingerprints and songs 🗑️
|
||||||
```
|
```
|
||||||
|
# Delete only database (default)
|
||||||
go run *.go erase
|
go run *.go erase
|
||||||
|
go run *.go erase db
|
||||||
|
|
||||||
|
# Delete both database and song files
|
||||||
|
go run *.go erase all
|
||||||
```
|
```
|
||||||
|
|
||||||
## Example :film_projector:
|
## Example :film_projector:
|
||||||
|
|
|
||||||
1
client/.env.example
Normal file
1
client/.env.example
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
REACT_APP_BACKEND_URL=http://localhost:5000
|
||||||
BIN
client/public/fingerprint.wasm
Executable file
BIN
client/public/fingerprint.wasm
Executable file
Binary file not shown.
Binary file not shown.
|
|
@ -15,7 +15,8 @@ import { fetchFile } from '@ffmpeg/util';
|
||||||
|
|
||||||
import AnimatedNumber from "./components/AnimatedNumber";
|
import AnimatedNumber from "./components/AnimatedNumber";
|
||||||
|
|
||||||
const server = process.env.REACT_APP_BACKEND_URL || "http://localhost:5000";
|
const server = process.env.REACT_APP_BACKEND_URL || "http://localhost:5500";
|
||||||
|
const recordStereo = process.env.REACT_APP_RECORD_STEREO === "true" || false;
|
||||||
// https://seek-tune-rq4gn.ondigitalocean.app/
|
// https://seek-tune-rq4gn.ondigitalocean.app/
|
||||||
|
|
||||||
var socket = io(server);
|
var socket = io(server);
|
||||||
|
|
@ -91,7 +92,7 @@ function App() {
|
||||||
try {
|
try {
|
||||||
const go = new window.Go();
|
const go = new window.Go();
|
||||||
const result = await WebAssembly.instantiateStreaming(
|
const result = await WebAssembly.instantiateStreaming(
|
||||||
fetch("/main.wasm"),
|
fetch("/fingerprint.wasm"),
|
||||||
go.importObject
|
go.importObject
|
||||||
);
|
);
|
||||||
go.run(result.instance);
|
go.run(result.instance);
|
||||||
|
|
@ -175,15 +176,15 @@ function App() {
|
||||||
cleanUp();
|
cleanUp();
|
||||||
|
|
||||||
const inputFile = 'input.wav';
|
const inputFile = 'input.wav';
|
||||||
const outputFile = 'output_mono.wav';
|
const outputFile = 'output_formatted.wav';
|
||||||
|
|
||||||
// Convert audio to mono with a sample rate of 44100 Hz
|
|
||||||
await ffmpeg.writeFile(inputFile, await fetchFile(blob))
|
await ffmpeg.writeFile(inputFile, await fetchFile(blob))
|
||||||
const exitCode = await ffmpeg.exec([
|
const exitCode = await ffmpeg.exec([
|
||||||
'-i', inputFile,
|
'-i', inputFile,
|
||||||
'-c', 'pcm_s16le',
|
'-c', 'pcm_s16le',
|
||||||
'-ar', '44100',
|
'-ar', '44100',
|
||||||
'-ac', '1',
|
'-ac', recordStereo ? '2' : '1',
|
||||||
|
'-acodec', 'pcm_s16le',
|
||||||
'-f', 'wav',
|
'-f', 'wav',
|
||||||
outputFile
|
outputFile
|
||||||
]);
|
]);
|
||||||
|
|
@ -191,11 +192,11 @@ function App() {
|
||||||
throw new Error(`FFmpeg exec failed with exit code: ${exitCode}`);
|
throw new Error(`FFmpeg exec failed with exit code: ${exitCode}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
const monoData = await ffmpeg.readFile(outputFile);
|
const audioData = await ffmpeg.readFile(outputFile);
|
||||||
const monoBlob = new Blob([monoData.buffer], { type: 'audio/wav' });
|
const audioBlob = new Blob([audioData.buffer], { type: 'audio/wav' });
|
||||||
|
|
||||||
const reader = new FileReader();
|
const reader = new FileReader();
|
||||||
reader.readAsArrayBuffer(monoBlob);
|
reader.readAsArrayBuffer(audioBlob);
|
||||||
reader.onload = async (event) => {
|
reader.onload = async (event) => {
|
||||||
const arrayBuffer = event.target.result;
|
const arrayBuffer = event.target.result;
|
||||||
const audioContext = new AudioContext();
|
const audioContext = new AudioContext();
|
||||||
|
|
@ -205,7 +206,11 @@ function App() {
|
||||||
const audioData = audioBufferDecoded.getChannelData(0);
|
const audioData = audioBufferDecoded.getChannelData(0);
|
||||||
const audioArray = Array.from(audioData);
|
const audioArray = Array.from(audioData);
|
||||||
|
|
||||||
const result = genFingerprint(audioArray, audioBufferDecoded.sampleRate);
|
const result = genFingerprint(
|
||||||
|
audioArray,
|
||||||
|
audioBufferDecoded.sampleRate,
|
||||||
|
audioBufferDecoded.numberOfChannels
|
||||||
|
);
|
||||||
if (result.error !== 0) {
|
if (result.error !== 0) {
|
||||||
toast["error"](() => <div>An error occured</div>)
|
toast["error"](() => <div>An error occured</div>)
|
||||||
console.log("An error occured: ", result)
|
console.log("An error occured: ", result)
|
||||||
|
|
@ -288,7 +293,7 @@ function App() {
|
||||||
return (
|
return (
|
||||||
<div className="App">
|
<div className="App">
|
||||||
<div className="TopHeader">
|
<div className="TopHeader">
|
||||||
<h2 style={{ color: "#374151" }}>!Shazam</h2>
|
<h2 style={{ color: "#374151" }}>SeekTune</h2>
|
||||||
<h4 style={{ display: "flex", justifyContent: "flex-end" }}>
|
<h4 style={{ display: "flex", justifyContent: "flex-end" }}>
|
||||||
<AnimatedNumber includeComma={true} animateToNumber={totalSongs} />
|
<AnimatedNumber includeComma={true} animateToNumber={totalSongs} />
|
||||||
Songs
|
Songs
|
||||||
|
|
|
||||||
|
|
@ -1,15 +1,21 @@
|
||||||
version: '3.1'
|
version: '3.8'
|
||||||
|
|
||||||
volumes:
|
volumes:
|
||||||
seek-tune-db:
|
seek-tune-db:
|
||||||
|
driver: local
|
||||||
seek-tune-songs:
|
seek-tune-songs:
|
||||||
|
driver: local
|
||||||
|
seek-tune-recordings:
|
||||||
|
driver: local
|
||||||
|
|
||||||
services:
|
services:
|
||||||
seek-tune:
|
seek-tune:
|
||||||
image: 'seek-tune'
|
image: seek-tune:latest
|
||||||
|
container_name: seek-tune-app
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
|
|
||||||
ports:
|
ports:
|
||||||
- 8080:5000
|
- "${HOST_PORT:-8080}:5000"
|
||||||
|
|
||||||
environment:
|
environment:
|
||||||
DB_TYPE: ${DB_TYPE:-sqlite}
|
DB_TYPE: ${DB_TYPE:-sqlite}
|
||||||
|
|
@ -19,13 +25,66 @@ services:
|
||||||
DB_HOST: ${DB_HOST:-localhost}
|
DB_HOST: ${DB_HOST:-localhost}
|
||||||
DB_PORT: ${DB_PORT:-27017}
|
DB_PORT: ${DB_PORT:-27017}
|
||||||
|
|
||||||
|
ENV: production
|
||||||
REACT_APP_BACKEND_URL: ${REACT_APP_BACKEND_URL:-http://localhost:8080}
|
REACT_APP_BACKEND_URL: ${REACT_APP_BACKEND_URL:-http://localhost:8080}
|
||||||
|
|
||||||
|
SPOTIFY_CLIENT_ID: ${SPOTIFY_CLIENT_ID:-}
|
||||||
|
SPOTIFY_CLIENT_SECRET: ${SPOTIFY_CLIENT_SECRET:-}
|
||||||
|
|
||||||
build:
|
build:
|
||||||
context: .
|
context: .
|
||||||
|
dockerfile: Dockerfile
|
||||||
args:
|
args:
|
||||||
REACT_APP_BACKEND_URL: ${REACT_APP_BACKEND_URL:-http://localhost:8080}
|
REACT_APP_BACKEND_URL: ${REACT_APP_BACKEND_URL:-http://localhost:8080}
|
||||||
|
cache_from:
|
||||||
|
- seek-tune:latest
|
||||||
|
|
||||||
volumes:
|
volumes:
|
||||||
- seek-tune-db:/home/seek-tune/db
|
- seek-tune-db:/app/db
|
||||||
- seek-tune-songs:/home/seek-tune/songs
|
- seek-tune-songs:/app/songs
|
||||||
|
- seek-tune-recordings:/app/recordings
|
||||||
|
# Optional: Mount local songs directory for development
|
||||||
|
# - ./server/songs:/app/songs
|
||||||
|
|
||||||
|
# Resource limits (adjust based on your needs)
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpus: '2.0'
|
||||||
|
memory: 2G
|
||||||
|
reservations:
|
||||||
|
cpus: '0.5'
|
||||||
|
memory: 512M
|
||||||
|
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:5000/"]
|
||||||
|
interval: 30s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 3
|
||||||
|
start_period: 10s
|
||||||
|
|
||||||
|
logging:
|
||||||
|
driver: "json-file"
|
||||||
|
options:
|
||||||
|
max-size: "10m"
|
||||||
|
max-file: "3"
|
||||||
|
|
||||||
|
# Optional: MongoDB service (if using MongoDB instead of SQLite)
|
||||||
|
# mongodb:
|
||||||
|
# image: mongo:7
|
||||||
|
# container_name: seek-tune-mongo
|
||||||
|
# restart: unless-stopped
|
||||||
|
# environment:
|
||||||
|
# MONGO_INITDB_ROOT_USERNAME: ${DB_USER:-root}
|
||||||
|
# MONGO_INITDB_ROOT_PASSWORD: ${DB_PASSWORD:-password}
|
||||||
|
# MONGO_INITDB_DATABASE: ${DB_NAME:-seek_tune_db}
|
||||||
|
# ports:
|
||||||
|
# - "27017:27017"
|
||||||
|
# volumes:
|
||||||
|
# - seek-tune-db:/data/db
|
||||||
|
# healthcheck:
|
||||||
|
# test: echo 'db.runCommand("ping").ok' | mongosh localhost:27017/test --quiet
|
||||||
|
# interval: 10s
|
||||||
|
# timeout: 10s
|
||||||
|
# retries: 5
|
||||||
|
# start_period: 40s
|
||||||
|
|
|
||||||
14
server/.env.example
Normal file
14
server/.env.example
Normal file
|
|
@ -0,0 +1,14 @@
|
||||||
|
DB_TYPE=mongo # or sqlite
|
||||||
|
DB_USER=user
|
||||||
|
DB_PASS=password
|
||||||
|
DB_NAME=seek-tune
|
||||||
|
DB_HOST=192.168.0.1
|
||||||
|
DB_PORT=27017
|
||||||
|
|
||||||
|
# Set to true to enable stereo fingerprinting (uses more storage but may improve accuracy)
|
||||||
|
FINGERPRINT_STEREO=false
|
||||||
|
|
||||||
|
SPOTIFY_CLIENT_ID=yourclientid
|
||||||
|
SPOTIFY_CLIENT_SECRET=yoursecret
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -10,6 +10,7 @@ import (
|
||||||
"net/http"
|
"net/http"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
"runtime"
|
||||||
"song-recognition/db"
|
"song-recognition/db"
|
||||||
"song-recognition/shazam"
|
"song-recognition/shazam"
|
||||||
"song-recognition/spotify"
|
"song-recognition/spotify"
|
||||||
|
|
@ -34,19 +35,24 @@ const (
|
||||||
var yellow = color.New(color.FgYellow)
|
var yellow = color.New(color.FgYellow)
|
||||||
|
|
||||||
func find(filePath string) {
|
func find(filePath string) {
|
||||||
wavInfo, err := wav.ReadWavInfo(filePath)
|
wavFilePath, err := wav.ConvertToWAV(filePath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
yellow.Println("Error reading wave info:", err)
|
yellow.Println("Error converting to WAV:", err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
samples, err := wav.WavBytesToSamples(wavInfo.Data)
|
fingerprint, err := shazam.FingerprintAudio(wavFilePath, utils.GenerateUniqueID())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
yellow.Println("Error converting to samples:", err)
|
yellow.Println("Error generating fingerprint for sample: ", err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
matches, searchDuration, err := shazam.FindMatches(samples, wavInfo.Duration, wavInfo.SampleRate)
|
sampleFingerprint := make(map[uint32]uint32)
|
||||||
|
for address, couple := range fingerprint {
|
||||||
|
sampleFingerprint[address] = couple.AnchorTimeMs
|
||||||
|
}
|
||||||
|
|
||||||
|
matches, searchDuration, err := shazam.FindMatchesFGP(sampleFingerprint)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
yellow.Println("Error finding matches:", err)
|
yellow.Println("Error finding matches:", err)
|
||||||
return
|
return
|
||||||
|
|
@ -193,7 +199,7 @@ func serveHTTP(socketServer *socketio.Server, serveHTTPS bool, port string) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func erase(songsDir string) {
|
func erase(songsDir string, dbOnly bool, all bool) {
|
||||||
logger := utils.GetLogger()
|
logger := utils.GetLogger()
|
||||||
ctx := context.Background()
|
ctx := context.Background()
|
||||||
|
|
||||||
|
|
@ -216,26 +222,31 @@ func erase(songsDir string) {
|
||||||
logger.ErrorContext(ctx, msg, slog.Any("error", err))
|
logger.ErrorContext(ctx, msg, slog.Any("error", err))
|
||||||
}
|
}
|
||||||
|
|
||||||
// delete song files
|
fmt.Println("Database cleared")
|
||||||
err = filepath.Walk(songsDir, func(path string, info os.FileInfo, err error) error {
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
if !info.IsDir() {
|
// delete song files only if -all flag is set
|
||||||
ext := filepath.Ext(path)
|
if all {
|
||||||
if ext == ".wav" || ext == ".m4a" {
|
err = filepath.Walk(songsDir, func(path string, info os.FileInfo, err error) error {
|
||||||
err := os.Remove(path)
|
if err != nil {
|
||||||
if err != nil {
|
return err
|
||||||
return err
|
}
|
||||||
|
|
||||||
|
if !info.IsDir() {
|
||||||
|
ext := filepath.Ext(path)
|
||||||
|
if ext == ".wav" || ext == ".m4a" {
|
||||||
|
err := os.Remove(path)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
msg := fmt.Sprintf("Error walking through directory %s: %v\n", songsDir, err)
|
||||||
|
logger.ErrorContext(ctx, msg, slog.Any("error", err))
|
||||||
}
|
}
|
||||||
return nil
|
fmt.Println("Songs folder cleared")
|
||||||
})
|
|
||||||
if err != nil {
|
|
||||||
msg := fmt.Sprintf("Error walking through directory %s: %v\n", songsDir, err)
|
|
||||||
logger.ErrorContext(ctx, msg, slog.Any("error", err))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fmt.Println("Erase complete")
|
fmt.Println("Erase complete")
|
||||||
|
|
@ -249,6 +260,7 @@ func save(path string, force bool) {
|
||||||
}
|
}
|
||||||
|
|
||||||
if fileInfo.IsDir() {
|
if fileInfo.IsDir() {
|
||||||
|
var filePaths []string
|
||||||
err := filepath.Walk(path, func(filePath string, info os.FileInfo, err error) error {
|
err := filepath.Walk(path, func(filePath string, info os.FileInfo, err error) error {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Printf("Error walking the path %v: %v\n", filePath, err)
|
fmt.Printf("Error walking the path %v: %v\n", filePath, err)
|
||||||
|
|
@ -256,16 +268,16 @@ func save(path string, force bool) {
|
||||||
}
|
}
|
||||||
// Process only files, skip directories
|
// Process only files, skip directories
|
||||||
if !info.IsDir() {
|
if !info.IsDir() {
|
||||||
err := saveSong(filePath, force)
|
filePaths = append(filePaths, filePath)
|
||||||
if err != nil {
|
|
||||||
fmt.Printf("Error saving song (%v): %v\n", filePath, err)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Printf("Error walking the directory %v: %v\n", path, err)
|
fmt.Printf("Error walking the directory %v: %v\n", path, err)
|
||||||
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
processFilesConCurrently(filePaths, force)
|
||||||
} else {
|
} else {
|
||||||
err := saveSong(path, force)
|
err := saveSong(path, force)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|
@ -274,6 +286,50 @@ func save(path string, force bool) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func processFilesConCurrently(filePaths []string, force bool) {
|
||||||
|
maxWorkers := runtime.NumCPU() / 2
|
||||||
|
numFiles := len(filePaths)
|
||||||
|
|
||||||
|
if numFiles == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if numFiles < maxWorkers {
|
||||||
|
maxWorkers = numFiles
|
||||||
|
}
|
||||||
|
|
||||||
|
jobs := make(chan string, numFiles)
|
||||||
|
results := make(chan error, numFiles)
|
||||||
|
|
||||||
|
for w := 0; w < maxWorkers; w++ {
|
||||||
|
go func(workerID int) {
|
||||||
|
for filePath := range jobs {
|
||||||
|
err := saveSong(filePath, force)
|
||||||
|
results <- err
|
||||||
|
}
|
||||||
|
}(w + 1)
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, filePath := range filePaths {
|
||||||
|
jobs <- filePath
|
||||||
|
}
|
||||||
|
close(jobs)
|
||||||
|
|
||||||
|
successCount := 0
|
||||||
|
errorCount := 0
|
||||||
|
for i := 0; i < numFiles; i++ {
|
||||||
|
err := <-results
|
||||||
|
if err != nil {
|
||||||
|
fmt.Printf("Error: %v\n", err)
|
||||||
|
errorCount++
|
||||||
|
} else {
|
||||||
|
successCount++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Printf("\n ->> Processed %d files: %d successful, %d failed\n", numFiles, successCount, errorCount)
|
||||||
|
}
|
||||||
|
|
||||||
func saveSong(filePath string, force bool) error {
|
func saveSong(filePath string, force bool) error {
|
||||||
metadata, err := wav.GetMetadata(filePath)
|
metadata, err := wav.GetMetadata(filePath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|
|
||||||
|
|
@ -8,8 +8,8 @@ import (
|
||||||
"os"
|
"os"
|
||||||
"song-recognition/utils"
|
"song-recognition/utils"
|
||||||
|
|
||||||
"github.com/mdobak/go-xerrors"
|
|
||||||
"github.com/joho/godotenv"
|
"github.com/joho/godotenv"
|
||||||
|
"github.com/mdobak/go-xerrors"
|
||||||
)
|
)
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
|
|
@ -32,6 +32,12 @@ func main() {
|
||||||
|
|
||||||
if len(os.Args) < 2 {
|
if len(os.Args) < 2 {
|
||||||
fmt.Println("Expected 'find', 'download', 'erase', 'save', or 'serve' subcommands")
|
fmt.Println("Expected 'find', 'download', 'erase', 'save', or 'serve' subcommands")
|
||||||
|
fmt.Println("\nUsage examples:")
|
||||||
|
fmt.Println(" find <path_to_wav_file>")
|
||||||
|
fmt.Println(" download <spotify_url>")
|
||||||
|
fmt.Println(" erase [db | all] (default: db)")
|
||||||
|
fmt.Println(" save [-f|--force] <path_to_file_or_dir>")
|
||||||
|
fmt.Println(" serve [-proto <http|https>] [-p <port>]")
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
_ = godotenv.Load()
|
_ = godotenv.Load()
|
||||||
|
|
@ -58,7 +64,28 @@ func main() {
|
||||||
serveCmd.Parse(os.Args[2:])
|
serveCmd.Parse(os.Args[2:])
|
||||||
serve(*protocol, *port)
|
serve(*protocol, *port)
|
||||||
case "erase":
|
case "erase":
|
||||||
erase(SONGS_DIR)
|
// Default is to clear only database (db mode)
|
||||||
|
dbOnly := true
|
||||||
|
all := false
|
||||||
|
|
||||||
|
if len(os.Args) > 2 {
|
||||||
|
subCmd := os.Args[2]
|
||||||
|
switch subCmd {
|
||||||
|
case "db":
|
||||||
|
dbOnly = true
|
||||||
|
all = false
|
||||||
|
case "all":
|
||||||
|
dbOnly = false
|
||||||
|
all = true
|
||||||
|
default:
|
||||||
|
fmt.Println("Usage: main.go erase [db | all]")
|
||||||
|
fmt.Println(" db : only clear the database (default)")
|
||||||
|
fmt.Println(" all : clear database and songs folder")
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
erase(SONGS_DIR, dbOnly, all)
|
||||||
case "save":
|
case "save":
|
||||||
indexCmd := flag.NewFlagSet("save", flag.ExitOnError)
|
indexCmd := flag.NewFlagSet("save", flag.ExitOnError)
|
||||||
force := indexCmd.Bool("force", false, "save song with or without YouTube ID")
|
force := indexCmd.Bool("force", false, "save song with or without YouTube ID")
|
||||||
|
|
@ -72,6 +99,12 @@ func main() {
|
||||||
save(filePath, *force)
|
save(filePath, *force)
|
||||||
default:
|
default:
|
||||||
fmt.Println("Expected 'find', 'download', 'erase', 'save', or 'serve' subcommands")
|
fmt.Println("Expected 'find', 'download', 'erase', 'save', or 'serve' subcommands")
|
||||||
|
fmt.Println("\nUsage examples:")
|
||||||
|
fmt.Println(" find <path_to_wav_file>")
|
||||||
|
fmt.Println(" download <spotify_url>")
|
||||||
|
fmt.Println(" erase [db | all] (default: db)")
|
||||||
|
fmt.Println(" save [-f|--force] <path_to_file_or_dir>")
|
||||||
|
fmt.Println(" serve [-proto <http|https>] [-p <port>]")
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,10 @@
|
||||||
package shazam
|
package shazam
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"fmt"
|
||||||
"song-recognition/models"
|
"song-recognition/models"
|
||||||
|
"song-recognition/utils"
|
||||||
|
"song-recognition/wav"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
|
|
@ -23,7 +26,10 @@ func Fingerprint(peaks []Peak, songID uint32) map[uint32]models.Couple {
|
||||||
address := createAddress(anchor, target)
|
address := createAddress(anchor, target)
|
||||||
anchorTimeMs := uint32(anchor.Time * 1000)
|
anchorTimeMs := uint32(anchor.Time * 1000)
|
||||||
|
|
||||||
fingerprints[address] = models.Couple{anchorTimeMs, songID}
|
fingerprints[address] = models.Couple{
|
||||||
|
AnchorTimeMs: anchorTimeMs,
|
||||||
|
SongID: songID,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -35,12 +41,52 @@ func Fingerprint(peaks []Peak, songID uint32) map[uint32]models.Couple {
|
||||||
// the anchor and target points, and other bits represent the time difference (delta time)
|
// the anchor and target points, and other bits represent the time difference (delta time)
|
||||||
// between them. This function combines these components into a single address (a hash).
|
// between them. This function combines these components into a single address (a hash).
|
||||||
func createAddress(anchor, target Peak) uint32 {
|
func createAddress(anchor, target Peak) uint32 {
|
||||||
anchorFreq := int(real(anchor.Freq))
|
anchorFreqBin := uint32(anchor.Freq / 10) // Scale down to fit in 9 bits
|
||||||
targetFreq := int(real(target.Freq))
|
targetFreqBin := uint32(target.Freq / 10)
|
||||||
deltaMs := uint32((target.Time - anchor.Time) * 1000)
|
|
||||||
|
|
||||||
// Combine the frequency of the anchor, target, and delta time into a 32-bit address
|
deltaMsRaw := uint32((target.Time - anchor.Time) * 1000)
|
||||||
address := uint32(anchorFreq<<23) | uint32(targetFreq<<14) | deltaMs
|
|
||||||
|
// Mask to fit within bit constraints
|
||||||
|
anchorFreqBits := anchorFreqBin & ((1 << maxFreqBits) - 1) // 9 bits
|
||||||
|
targetFreqBits := targetFreqBin & ((1 << maxFreqBits) - 1) // 9 bits
|
||||||
|
deltaBits := deltaMsRaw & ((1 << maxDeltaBits) - 1) // 14 bits (max ~16 seconds)
|
||||||
|
|
||||||
|
// Combine into 32-bit address
|
||||||
|
address := (anchorFreqBits << 23) | (targetFreqBits << 14) | deltaBits
|
||||||
|
|
||||||
return address
|
return address
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func FingerprintAudio(songFilePath string, songID uint32) (map[uint32]models.Couple, error) {
|
||||||
|
wavFilePath, err := wav.ConvertToWAV(songFilePath)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("error converting input file to WAV: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
wavInfo, err := wav.ReadWavInfo(wavFilePath)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("error reading WAV info: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
fingerprint := make(map[uint32]models.Couple)
|
||||||
|
|
||||||
|
spectro, err := Spectrogram(wavInfo.LeftChannelSamples, wavInfo.SampleRate)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("error creating spectrogram: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
peaks := ExtractPeaks(spectro, wavInfo.Duration, wavInfo.SampleRate)
|
||||||
|
utils.ExtendMap(fingerprint, Fingerprint(peaks, songID))
|
||||||
|
|
||||||
|
if wavInfo.Channels == 2 {
|
||||||
|
spectro, err = Spectrogram(wavInfo.RightChannelSamples, wavInfo.SampleRate)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("error creating spectrogram for right channel: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
peaks = ExtractPeaks(spectro, wavInfo.Duration, wavInfo.SampleRate)
|
||||||
|
utils.ExtendMap(fingerprint, Fingerprint(peaks, songID))
|
||||||
|
}
|
||||||
|
|
||||||
|
return fingerprint, nil
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,6 @@ package shazam
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"math"
|
|
||||||
"song-recognition/db"
|
"song-recognition/db"
|
||||||
"song-recognition/utils"
|
"song-recognition/utils"
|
||||||
"sort"
|
"sort"
|
||||||
|
|
@ -30,7 +29,8 @@ func FindMatches(audioSample []float64, audioDuration float64, sampleRate int) (
|
||||||
return nil, time.Since(startTime), fmt.Errorf("failed to get spectrogram of samples: %v", err)
|
return nil, time.Since(startTime), fmt.Errorf("failed to get spectrogram of samples: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
peaks := ExtractPeaks(spectrogram, audioDuration)
|
peaks := ExtractPeaks(spectrogram, audioDuration, sampleRate)
|
||||||
|
// peaks := ExtractPeaksLMX(spectrogram, true)
|
||||||
sampleFingerprint := Fingerprint(peaks, utils.GenerateUniqueID())
|
sampleFingerprint := Fingerprint(peaks, utils.GenerateUniqueID())
|
||||||
|
|
||||||
sampleFingerprintMap := make(map[uint32]uint32)
|
sampleFingerprintMap := make(map[uint32]uint32)
|
||||||
|
|
@ -38,7 +38,7 @@ func FindMatches(audioSample []float64, audioDuration float64, sampleRate int) (
|
||||||
sampleFingerprintMap[address] = couple.AnchorTimeMs
|
sampleFingerprintMap[address] = couple.AnchorTimeMs
|
||||||
}
|
}
|
||||||
|
|
||||||
matches, _, err := FindMatchesFGP(sampleFingerprintMap)
|
matches, _, _ := FindMatchesFGP(sampleFingerprintMap)
|
||||||
|
|
||||||
return matches, time.Since(startTime), nil
|
return matches, time.Since(startTime), nil
|
||||||
}
|
}
|
||||||
|
|
@ -142,21 +142,32 @@ func filterMatches(
|
||||||
}
|
}
|
||||||
|
|
||||||
// analyzeRelativeTiming calculates a score for each song based on the
|
// analyzeRelativeTiming calculates a score for each song based on the
|
||||||
// relative timing between the song and the sample's anchor times.
|
// consistency of time offsets between the sample and database.
|
||||||
func analyzeRelativeTiming(matches map[uint32][][2]uint32) map[uint32]float64 {
|
func analyzeRelativeTiming(matches map[uint32][][2]uint32) map[uint32]float64 {
|
||||||
scores := make(map[uint32]float64)
|
scores := make(map[uint32]float64)
|
||||||
|
|
||||||
for songID, times := range matches {
|
for songID, times := range matches {
|
||||||
count := 0
|
offsetCounts := make(map[int32]int)
|
||||||
for i := 0; i < len(times); i++ {
|
|
||||||
for j := i + 1; j < len(times); j++ {
|
for _, timePair := range times {
|
||||||
sampleDiff := math.Abs(float64(times[i][0] - times[j][0]))
|
sampleTime := int32(timePair[0])
|
||||||
dbDiff := math.Abs(float64(times[i][1] - times[j][1]))
|
dbTime := int32(timePair[1])
|
||||||
if math.Abs(sampleDiff-dbDiff) < 100 { // Allow some tolerance
|
offset := dbTime - sampleTime
|
||||||
count++
|
|
||||||
}
|
// Bin offsets in 100ms buckets to allow for small timing variations
|
||||||
|
offsetBucket := offset / 100
|
||||||
|
offsetCounts[offsetBucket]++
|
||||||
|
}
|
||||||
|
|
||||||
|
maxCount := 0
|
||||||
|
for _, count := range offsetCounts {
|
||||||
|
if count > maxCount {
|
||||||
|
maxCount = count
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
scores[songID] = float64(count)
|
|
||||||
|
scores[songID] = float64(maxCount)
|
||||||
}
|
}
|
||||||
|
|
||||||
return scores
|
return scores
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -8,13 +8,14 @@ import (
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
dspRatio = 4
|
dspRatio = 4
|
||||||
freqBinSize = 1024
|
windowSize = 1024
|
||||||
maxFreq = 5000.0 // 5kHz
|
maxFreq = 5000.0 // 5kHz
|
||||||
hopSize = freqBinSize / 32
|
hopSize = windowSize / 2 // 50% overlap for better time-frequency resolution
|
||||||
|
windowType = "hanning" // choices: "hanning" or "hamming"
|
||||||
)
|
)
|
||||||
|
|
||||||
func Spectrogram(sample []float64, sampleRate int) ([][]complex128, error) {
|
func Spectrogram(sample []float64, sampleRate int) ([][]float64, error) {
|
||||||
filteredSample := LowPassFilter(maxFreq, float64(sampleRate), sample)
|
filteredSample := LowPassFilter(maxFreq, float64(sampleRate), sample)
|
||||||
|
|
||||||
downsampledSample, err := Downsample(filteredSample, sampleRate, sampleRate/dspRatio)
|
downsampledSample, err := Downsample(filteredSample, sampleRate, sampleRate/dspRatio)
|
||||||
|
|
@ -22,31 +23,42 @@ func Spectrogram(sample []float64, sampleRate int) ([][]complex128, error) {
|
||||||
return nil, fmt.Errorf("couldn't downsample audio sample: %v", err)
|
return nil, fmt.Errorf("couldn't downsample audio sample: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
numOfWindows := len(downsampledSample) / (freqBinSize - hopSize)
|
window := make([]float64, windowSize)
|
||||||
spectrogram := make([][]complex128, numOfWindows)
|
|
||||||
|
|
||||||
window := make([]float64, freqBinSize)
|
|
||||||
for i := range window {
|
for i := range window {
|
||||||
window[i] = 0.54 - 0.46*math.Cos(2*math.Pi*float64(i)/(float64(freqBinSize)-1))
|
theta := 2 * math.Pi * float64(i) / float64(windowSize-1)
|
||||||
|
switch windowType {
|
||||||
|
case "hamming":
|
||||||
|
window[i] = 0.54 - 0.46*math.Cos(theta)
|
||||||
|
default: // Hanning window
|
||||||
|
window[i] = 0.5 - 0.5*math.Cos(theta)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Initialize spectrogram slice
|
||||||
|
spectrogram := make([][]float64, 0)
|
||||||
|
|
||||||
// Perform STFT
|
// Perform STFT
|
||||||
for i := 0; i < numOfWindows; i++ {
|
for start := 0; start+windowSize <= len(downsampledSample); start += hopSize {
|
||||||
start := i * hopSize
|
end := start + windowSize
|
||||||
end := start + freqBinSize
|
|
||||||
if end > len(downsampledSample) {
|
|
||||||
end = len(downsampledSample)
|
|
||||||
}
|
|
||||||
|
|
||||||
bin := make([]float64, freqBinSize)
|
frame := make([]float64, windowSize)
|
||||||
copy(bin, downsampledSample[start:end])
|
copy(frame, downsampledSample[start:end])
|
||||||
|
|
||||||
// Apply Hamming window
|
// Apply window
|
||||||
for j := range window {
|
for j := range window {
|
||||||
bin[j] *= window[j]
|
frame[j] *= window[j]
|
||||||
}
|
}
|
||||||
|
|
||||||
spectrogram[i] = FFT(bin)
|
// Perform FFT
|
||||||
|
fftResult := FFT(frame)
|
||||||
|
|
||||||
|
// Convert complex spectrum to magnitude spectrum
|
||||||
|
magnitude := make([]float64, len(fftResult)/2)
|
||||||
|
for j := range magnitude {
|
||||||
|
magnitude[j] = cmplx.Abs(fftResult[j])
|
||||||
|
}
|
||||||
|
|
||||||
|
spectrogram = append(spectrogram, magnitude)
|
||||||
}
|
}
|
||||||
|
|
||||||
return spectrogram, nil
|
return spectrogram, nil
|
||||||
|
|
@ -107,43 +119,47 @@ func Downsample(input []float64, originalSampleRate, targetSampleRate int) ([]fl
|
||||||
return resampled, nil
|
return resampled, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Peak represents a significant point in the spectrogram.
|
||||||
type Peak struct {
|
type Peak struct {
|
||||||
Time float64
|
Freq float64 // Frequency in Hz
|
||||||
Freq complex128
|
Time float64 // Time in seconds
|
||||||
}
|
}
|
||||||
|
|
||||||
// ExtractPeaks analyzes a spectrogram and extracts significant peaks in the frequency domain over time.
|
// ExtractPeaks analyzes a spectrogram and extracts significant peaks in the frequency domain over time.
|
||||||
func ExtractPeaks(spectrogram [][]complex128, audioDuration float64) []Peak {
|
func ExtractPeaks(spectrogram [][]float64, audioDuration float64, sampleRate int) []Peak {
|
||||||
if len(spectrogram) < 1 {
|
if len(spectrogram) < 1 {
|
||||||
return []Peak{}
|
return []Peak{}
|
||||||
}
|
}
|
||||||
|
|
||||||
type maxies struct {
|
type maxies struct {
|
||||||
maxMag float64
|
maxMag float64
|
||||||
maxFreq complex128
|
|
||||||
freqIdx int
|
freqIdx int
|
||||||
}
|
}
|
||||||
|
|
||||||
bands := []struct{ min, max int }{{0, 10}, {10, 20}, {20, 40}, {40, 80}, {80, 160}, {160, 512}}
|
bands := []struct{ min, max int }{
|
||||||
|
{0, 10}, {10, 20}, {20, 40}, {40, 80}, {80, 160}, {160, 512},
|
||||||
|
}
|
||||||
|
|
||||||
var peaks []Peak
|
var peaks []Peak
|
||||||
binDuration := audioDuration / float64(len(spectrogram))
|
frameDuration := audioDuration / float64(len(spectrogram))
|
||||||
|
|
||||||
for binIdx, bin := range spectrogram {
|
// Calculate frequency resolution (Hz per bin)
|
||||||
|
effectiveSampleRate := float64(sampleRate) / float64(dspRatio)
|
||||||
|
freqResolution := effectiveSampleRate / float64(windowSize)
|
||||||
|
|
||||||
|
for frameIdx, frame := range spectrogram {
|
||||||
var maxMags []float64
|
var maxMags []float64
|
||||||
var maxFreqs []complex128
|
var freqIndices []int
|
||||||
var freqIndices []float64
|
|
||||||
|
|
||||||
binBandMaxies := []maxies{}
|
binBandMaxies := []maxies{}
|
||||||
for _, band := range bands {
|
for _, band := range bands {
|
||||||
var maxx maxies
|
var maxx maxies
|
||||||
var maxMag float64
|
var maxMag float64
|
||||||
for idx, freq := range bin[band.min:band.max] {
|
for idx, mag := range frame[band.min:band.max] {
|
||||||
magnitude := cmplx.Abs(freq)
|
if mag > maxMag {
|
||||||
if magnitude > maxMag {
|
maxMag = mag
|
||||||
maxMag = magnitude
|
|
||||||
freqIdx := band.min + idx
|
freqIdx := band.min + idx
|
||||||
maxx = maxies{magnitude, freq, freqIdx}
|
maxx = maxies{mag, freqIdx}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
binBandMaxies = append(binBandMaxies, maxx)
|
binBandMaxies = append(binBandMaxies, maxx)
|
||||||
|
|
@ -151,8 +167,7 @@ func ExtractPeaks(spectrogram [][]complex128, audioDuration float64) []Peak {
|
||||||
|
|
||||||
for _, value := range binBandMaxies {
|
for _, value := range binBandMaxies {
|
||||||
maxMags = append(maxMags, value.maxMag)
|
maxMags = append(maxMags, value.maxMag)
|
||||||
maxFreqs = append(maxFreqs, value.maxFreq)
|
freqIndices = append(freqIndices, value.freqIdx)
|
||||||
freqIndices = append(freqIndices, float64(value.freqIdx))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Calculate the average magnitude
|
// Calculate the average magnitude
|
||||||
|
|
@ -160,17 +175,15 @@ func ExtractPeaks(spectrogram [][]complex128, audioDuration float64) []Peak {
|
||||||
for _, max := range maxMags {
|
for _, max := range maxMags {
|
||||||
maxMagsSum += max
|
maxMagsSum += max
|
||||||
}
|
}
|
||||||
avg := maxMagsSum / float64(len(maxFreqs)) // * coefficient
|
avg := maxMagsSum / float64(len(maxMags))
|
||||||
|
|
||||||
// Add peaks that exceed the average magnitude
|
// Add peaks that exceed the average magnitude
|
||||||
for i, value := range maxMags {
|
for i, value := range maxMags {
|
||||||
if value > avg {
|
if value > avg {
|
||||||
peakTimeInBin := freqIndices[i] * binDuration / float64(len(bin))
|
peakTime := float64(frameIdx) * frameDuration
|
||||||
|
peakFreq := float64(freqIndices[i]) * freqResolution
|
||||||
|
|
||||||
// Calculate the absolute time of the peak
|
peaks = append(peaks, Peak{Time: peakTime, Freq: peakFreq})
|
||||||
peakTime := float64(binIdx)*binDuration + peakTimeInBin
|
|
||||||
|
|
||||||
peaks = append(peaks, Peak{Time: peakTime, Freq: maxFreqs[i]})
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -2,9 +2,7 @@ package spotify
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"errors"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"os"
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
|
|
@ -13,17 +11,15 @@ import (
|
||||||
"song-recognition/db"
|
"song-recognition/db"
|
||||||
"song-recognition/shazam"
|
"song-recognition/shazam"
|
||||||
"song-recognition/utils"
|
"song-recognition/utils"
|
||||||
"song-recognition/wav"
|
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/fatih/color"
|
"github.com/fatih/color"
|
||||||
"github.com/kkdai/youtube/v2"
|
|
||||||
"github.com/mdobak/go-xerrors"
|
"github.com/mdobak/go-xerrors"
|
||||||
)
|
)
|
||||||
|
|
||||||
const DELETE_SONG_FILE = false
|
const DELETE_SONG_FILE = false // Set true to delete the song file after fingerprinting
|
||||||
|
|
||||||
var yellow = color.New(color.FgYellow)
|
var yellow = color.New(color.FgYellow)
|
||||||
|
|
||||||
|
|
@ -135,9 +131,9 @@ func dlTrack(tracks []Track, path string) (int, error) {
|
||||||
|
|
||||||
trackCopy.Title, trackCopy.Artist = correctFilename(trackCopy.Title, trackCopy.Artist)
|
trackCopy.Title, trackCopy.Artist = correctFilename(trackCopy.Title, trackCopy.Artist)
|
||||||
fileName := fmt.Sprintf("%s - %s", trackCopy.Title, trackCopy.Artist)
|
fileName := fmt.Sprintf("%s - %s", trackCopy.Title, trackCopy.Artist)
|
||||||
filePath := filepath.Join(path, fileName+".m4a")
|
filePath := filepath.Join(path, fileName)
|
||||||
|
|
||||||
err = downloadYTaudio(ytID, path, filePath)
|
filePath, err = downloadYTaudio(ytID, filePath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logMessage := fmt.Sprintf("'%s' by '%s' could not be downloaded", trackCopy.Title, trackCopy.Artist)
|
logMessage := fmt.Sprintf("'%s' by '%s' could not be downloaded", trackCopy.Title, trackCopy.Artist)
|
||||||
logger.ErrorContext(ctx, logMessage, slog.Any("error", xerrors.New(err)))
|
logger.ErrorContext(ctx, logMessage, slog.Any("error", xerrors.New(err)))
|
||||||
|
|
@ -151,12 +147,10 @@ func dlTrack(tracks []Track, path string) (int, error) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
utils.DeleteFile(filepath.Join(path, fileName+".m4a"))
|
|
||||||
|
|
||||||
wavFilePath := filepath.Join(path, fileName+".wav")
|
wavFilePath := filepath.Join(path, fileName+".wav")
|
||||||
|
|
||||||
if err := addTags(wavFilePath, *trackCopy); err != nil {
|
if err := addTags(wavFilePath, *trackCopy); err != nil {
|
||||||
logMessage := fmt.Sprintf("Error adding tags: %s", filePath+".wav")
|
logMessage := fmt.Sprintf("Error adding tags: %s", wavFilePath)
|
||||||
logger.ErrorContext(ctx, logMessage, slog.Any("error", xerrors.New(err)))
|
logger.ErrorContext(ctx, logMessage, slog.Any("error", xerrors.New(err)))
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
@ -186,65 +180,6 @@ func dlTrack(tracks []Track, path string) (int, error) {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* github.com/kkdai/youtube */
|
|
||||||
func downloadYTaudio(id, path, filePath string) error {
|
|
||||||
logger := utils.GetLogger()
|
|
||||||
dir, err := os.Stat(path)
|
|
||||||
if err != nil {
|
|
||||||
logger.Error("Error accessing path", slog.Any("error", err))
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
if !dir.IsDir() {
|
|
||||||
err := errors.New("the path is not valid (not a dir)")
|
|
||||||
logger.Error("Invalid directory path", slog.Any("error", err))
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
client := youtube.Client{}
|
|
||||||
video, err := client.GetVideo(id)
|
|
||||||
if err != nil {
|
|
||||||
logger.Error("Error getting YouTube video", slog.Any("error", err))
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
itag code: 140, container: m4a, content: audio, bitrate: 128k
|
|
||||||
change the FindByItag parameter to 139 if you want smaller files (but with a bitrate of 48k)
|
|
||||||
https://gist.github.com/sidneys/7095afe4da4ae58694d128b1034e01e2
|
|
||||||
*/
|
|
||||||
formats := video.Formats.Itag(140)
|
|
||||||
|
|
||||||
/* in some cases, when attempting to download the audio
|
|
||||||
using the library github.com/kkdai/youtube,
|
|
||||||
the download fails (and shows the file size as 0 bytes)
|
|
||||||
until the second or third attempt. */
|
|
||||||
var fileSize int64
|
|
||||||
file, err := os.Create(filePath)
|
|
||||||
if err != nil {
|
|
||||||
logger.Error("Error creating file", slog.Any("error", err))
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
for fileSize == 0 {
|
|
||||||
stream, _, err := client.GetStream(video, &formats[0])
|
|
||||||
if err != nil {
|
|
||||||
logger.Error("Error getting stream", slog.Any("error", err))
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
if _, err = io.Copy(file, stream); err != nil {
|
|
||||||
logger.Error("Error copying stream to file", slog.Any("error", err))
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
fileSize, _ = GetFileSize(filePath)
|
|
||||||
}
|
|
||||||
defer file.Close()
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func addTags(file string, track Track) error {
|
func addTags(file string, track Track) error {
|
||||||
logger := utils.GetLogger()
|
logger := utils.GetLogger()
|
||||||
// Create a temporary file name by appending "2" before the extension
|
// Create a temporary file name by appending "2" before the extension
|
||||||
|
|
@ -255,7 +190,7 @@ func addTags(file string, track Track) error {
|
||||||
tempFile = baseName + "2" + ".wav" // Temporary filename ('/path/to/title - artist2.wav')
|
tempFile = baseName + "2" + ".wav" // Temporary filename ('/path/to/title - artist2.wav')
|
||||||
}
|
}
|
||||||
|
|
||||||
// Execute FFmpeg command to add metadata tags
|
// FFmpeg command to add metadata tags
|
||||||
cmd := exec.Command(
|
cmd := exec.Command(
|
||||||
"ffmpeg",
|
"ffmpeg",
|
||||||
"-i", file, // Input file path
|
"-i", file, // Input file path
|
||||||
|
|
@ -291,40 +226,20 @@ func ProcessAndSaveSong(songFilePath, songTitle, songArtist, ytID string) error
|
||||||
}
|
}
|
||||||
defer dbclient.Close()
|
defer dbclient.Close()
|
||||||
|
|
||||||
wavFilePath, err := wav.ConvertToWAV(songFilePath, 1)
|
|
||||||
if err != nil {
|
|
||||||
logger.Error("Failed to convert to WAV", slog.Any("error", err))
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
wavInfo, err := wav.ReadWavInfo(wavFilePath)
|
|
||||||
if err != nil {
|
|
||||||
logger.Error("Failed to read WAV info", slog.Any("error", err))
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
samples, err := wav.WavBytesToSamples(wavInfo.Data)
|
|
||||||
if err != nil {
|
|
||||||
logger.Error("Error converting WAV bytes to samples", slog.Any("error", err))
|
|
||||||
return fmt.Errorf("error converting wav bytes to float64: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
spectro, err := shazam.Spectrogram(samples, wavInfo.SampleRate)
|
|
||||||
if err != nil {
|
|
||||||
logger.Error("Error creating spectrogram", slog.Any("error", err))
|
|
||||||
return fmt.Errorf("error creating spectrogram: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
songID, err := dbclient.RegisterSong(songTitle, songArtist, ytID)
|
songID, err := dbclient.RegisterSong(songTitle, songArtist, ytID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Error("Failed to register song", slog.Any("error", err))
|
logger.Error("Failed to register song", slog.Any("error", err))
|
||||||
return err
|
return fmt.Errorf("error registering song '%s' by '%s': %v", songTitle, songArtist, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
peaks := shazam.ExtractPeaks(spectro, wavInfo.Duration)
|
fingerprint, err := shazam.FingerprintAudio(songFilePath, songID)
|
||||||
fingerprints := shazam.Fingerprint(peaks, songID)
|
if err != nil {
|
||||||
|
dbclient.DeleteSongByID(songID)
|
||||||
|
logger.Error("Failed to create fingerprint", slog.String("wavFilePath", songFilePath))
|
||||||
|
return fmt.Errorf("error generating fingerprint for %s by %s", songTitle, songArtist)
|
||||||
|
}
|
||||||
|
|
||||||
err = dbclient.StoreFingerprints(fingerprints)
|
err = dbclient.StoreFingerprints(fingerprint)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
dbclient.DeleteSongByID(songID)
|
dbclient.DeleteSongByID(songID)
|
||||||
logger.Error("Failed to store fingerprints", slog.Any("error", err))
|
logger.Error("Failed to store fingerprints", slog.Any("error", err))
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,11 @@ import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"log"
|
"log"
|
||||||
|
"log/slog"
|
||||||
|
"os"
|
||||||
|
"os/exec"
|
||||||
|
"path/filepath"
|
||||||
|
"song-recognition/utils"
|
||||||
|
|
||||||
"errors"
|
"errors"
|
||||||
"io"
|
"io"
|
||||||
|
|
@ -215,3 +220,37 @@ func ytSearch(searchTerm string, limit int) (results []*SearchResult, err error)
|
||||||
|
|
||||||
return results, nil
|
return results, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// downloadYTaudio downloads audio from a YouTube video using yt-dlp command line tool.
|
||||||
|
func downloadYTaudio(videoURL, outputFilePath string) (string, error) {
|
||||||
|
logger := utils.GetLogger()
|
||||||
|
|
||||||
|
dir := filepath.Dir(outputFilePath)
|
||||||
|
if stat, err := os.Stat(dir); err != nil || !stat.IsDir() {
|
||||||
|
logger.Error("Invalid directory for output file", slog.Any("error", err))
|
||||||
|
return "", errors.New("output directory does not exist or is not a directory")
|
||||||
|
}
|
||||||
|
|
||||||
|
_, err := exec.LookPath("yt-dlp")
|
||||||
|
if err != nil {
|
||||||
|
logger.Error("yt-dlp not found in PATH", slog.Any("error", err))
|
||||||
|
return "", errors.New("yt-dlp is not installed or not in PATH")
|
||||||
|
}
|
||||||
|
|
||||||
|
audioFmt := "wav"
|
||||||
|
cmd := exec.Command(
|
||||||
|
"yt-dlp",
|
||||||
|
"-f", "bestaudio",
|
||||||
|
"--extract-audio",
|
||||||
|
"--audio-format", audioFmt,
|
||||||
|
"-o", outputFilePath,
|
||||||
|
videoURL,
|
||||||
|
)
|
||||||
|
|
||||||
|
output, err := cmd.CombinedOutput()
|
||||||
|
if err != nil {
|
||||||
|
logger.Error("yt-dlp command failed", slog.String("output", string(output)), slog.Any("error", err))
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
return outputFilePath + "." + audioFmt, nil
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -27,3 +27,9 @@ func GetEnv(key string, fallback ...string) string {
|
||||||
}
|
}
|
||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func ExtendMap[K comparable, V any](dest, src map[K]V) {
|
||||||
|
for k, v := range src {
|
||||||
|
dest[k] = v
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,64 +0,0 @@
|
||||||
//go:build js && wasm
|
|
||||||
// +build js,wasm
|
|
||||||
|
|
||||||
package main
|
|
||||||
|
|
||||||
import (
|
|
||||||
"song-recognition/shazam"
|
|
||||||
"song-recognition/utils"
|
|
||||||
"syscall/js"
|
|
||||||
)
|
|
||||||
|
|
||||||
func generateFingerprint(this js.Value, args []js.Value) interface{} {
|
|
||||||
if len(args) < 2 {
|
|
||||||
return js.ValueOf(map[string]interface{}{
|
|
||||||
"error": 1,
|
|
||||||
"data": "Expected audio array and sample rate",
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
if args[0].Type() != js.TypeObject || args[1].Type() != js.TypeNumber {
|
|
||||||
return js.ValueOf(map[string]interface{}{
|
|
||||||
"error": 2,
|
|
||||||
"data": "Invalid argument types; Expected audio array and samplerate (type: int)",
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
inputArray := args[0]
|
|
||||||
sampleRate := args[1].Int()
|
|
||||||
|
|
||||||
audioData := make([]float64, inputArray.Length())
|
|
||||||
for i := 0; i < inputArray.Length(); i++ {
|
|
||||||
audioData[i] = inputArray.Index(i).Float()
|
|
||||||
}
|
|
||||||
|
|
||||||
spectrogram, err := shazam.Spectrogram(audioData, sampleRate)
|
|
||||||
if err != nil {
|
|
||||||
return js.ValueOf(map[string]interface{}{
|
|
||||||
"error": 3,
|
|
||||||
"data": "Error generating spectrogram: " + err.Error(),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
peaks := shazam.ExtractPeaks(spectrogram, float64(len(audioData)/sampleRate))
|
|
||||||
fingerprint := shazam.Fingerprint(peaks, utils.GenerateUniqueID())
|
|
||||||
|
|
||||||
fingerprintArray := []interface{}{}
|
|
||||||
for address, couple := range fingerprint {
|
|
||||||
entry := map[string]interface{}{
|
|
||||||
"address": address,
|
|
||||||
"anchorTime": couple.AnchorTimeMs,
|
|
||||||
}
|
|
||||||
fingerprintArray = append(fingerprintArray, entry)
|
|
||||||
}
|
|
||||||
|
|
||||||
return js.ValueOf(map[string]interface{}{
|
|
||||||
"error": 0,
|
|
||||||
"data": fingerprintArray,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func main() {
|
|
||||||
js.Global().Set("generateFingerprint", js.FuncOf(generateFingerprint))
|
|
||||||
select {}
|
|
||||||
}
|
|
||||||
|
|
@ -6,21 +6,33 @@ import (
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"song-recognition/utils"
|
"song-recognition/utils"
|
||||||
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
// ConvertToWAV converts an input audio file to WAV format with specified channels.
|
// ConvertToWAV converts an input audio file to WAV format with specified channels.
|
||||||
func ConvertToWAV(inputFilePath string, channels int) (wavFilePath string, err error) {
|
func ConvertToWAV(inputFilePath string) (wavFilePath string, err error) {
|
||||||
_, err = os.Stat(inputFilePath)
|
_, err = os.Stat(inputFilePath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", fmt.Errorf("input file does not exist: %v", err)
|
return "", fmt.Errorf("input file does not exist: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if channels < 1 || channels > 2 {
|
to_stereoStr := utils.GetEnv("FINGERPRINT_STEREO", "false")
|
||||||
channels = 1
|
to_stereo, err := strconv.ParseBool(to_stereoStr)
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("failed to convert env variable (%s) to bool: %v", "FINGERPRINT_STEREO", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
channels := 1
|
||||||
|
if to_stereo {
|
||||||
|
channels = 2
|
||||||
}
|
}
|
||||||
|
|
||||||
fileExt := filepath.Ext(inputFilePath)
|
fileExt := filepath.Ext(inputFilePath)
|
||||||
|
if fileExt != ".wav" {
|
||||||
|
defer os.Remove(inputFilePath)
|
||||||
|
}
|
||||||
|
|
||||||
outputFile := strings.TrimSuffix(inputFilePath, fileExt) + ".wav"
|
outputFile := strings.TrimSuffix(inputFilePath, fileExt) + ".wav"
|
||||||
|
|
||||||
// Output file may already exists. If it does FFmpeg will fail as
|
// Output file may already exists. If it does FFmpeg will fail as
|
||||||
|
|
|
||||||
|
|
@ -94,49 +94,83 @@ func WriteWavFile(filename string, data []byte, sampleRate int, channels int, bi
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
// WavInfo defines a struct containing information extracted from the WAV header
|
|
||||||
type WavInfo struct {
|
type WavInfo struct {
|
||||||
Channels int
|
Channels int
|
||||||
SampleRate int
|
SampleRate int
|
||||||
Data []byte
|
Duration float64
|
||||||
Duration float64
|
Data []byte
|
||||||
|
LeftChannelSamples []float64
|
||||||
|
RightChannelSamples []float64
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ReadWavInfo reads a 16-bit PCM WAV file and returns its metadata and audio samples.
|
||||||
|
// Supports mono and stereo files. Note that it only supports 16-bit PCM format.
|
||||||
func ReadWavInfo(filename string) (*WavInfo, error) {
|
func ReadWavInfo(filename string) (*WavInfo, error) {
|
||||||
data, err := ioutil.ReadFile(filename)
|
data, err := ioutil.ReadFile(filename)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(data) < 44 {
|
if len(data) < 44 {
|
||||||
return nil, errors.New("invalid WAV file size (too small)")
|
return nil, errors.New("invalid WAV file size (too small)")
|
||||||
}
|
}
|
||||||
|
|
||||||
// Read header chunks
|
// Parse PCM header to extract metadata
|
||||||
|
// https://en.wikipedia.org/wiki/WAV#WAV_file_header
|
||||||
var header WavHeader
|
var header WavHeader
|
||||||
err = binary.Read(bytes.NewReader(data[:44]), binary.LittleEndian, &header)
|
if err := binary.Read(bytes.NewReader(data[:44]), binary.LittleEndian, &header); err != nil {
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
if string(header.ChunkID[:]) != "RIFF" ||
|
||||||
if string(header.ChunkID[:]) != "RIFF" || string(header.Format[:]) != "WAVE" || header.AudioFormat != 1 {
|
string(header.Format[:]) != "WAVE" ||
|
||||||
|
header.AudioFormat != 1 {
|
||||||
return nil, errors.New("invalid WAV header format")
|
return nil, errors.New("invalid WAV header format")
|
||||||
}
|
}
|
||||||
|
|
||||||
// Extract information
|
|
||||||
info := &WavInfo{
|
info := &WavInfo{
|
||||||
Channels: int(header.NumChannels),
|
Channels: int(header.NumChannels),
|
||||||
SampleRate: int(header.SampleRate),
|
SampleRate: int(header.SampleRate),
|
||||||
Data: data[44:],
|
Data: data[44:],
|
||||||
}
|
}
|
||||||
|
|
||||||
// Calculate audio duration (assuming data contains PCM data)
|
if header.BitsPerSample != 16 {
|
||||||
if header.BitsPerSample == 16 {
|
return nil, errors.New("unsupported bits‑per‑sample (expect 16‑bit PCM)")
|
||||||
info.Duration = float64(len(info.Data)) / float64(int(header.NumChannels)*2*int(header.SampleRate))
|
|
||||||
} else {
|
|
||||||
return nil, errors.New("unsupported bits per sample format")
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
sampleCount := len(info.Data) / 2
|
||||||
|
int16Buf := make([]int16, sampleCount)
|
||||||
|
if err := binary.Read(bytes.NewReader(info.Data), binary.LittleEndian, int16Buf); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
const scale = 1.0 / 32768.0 // 16‑bit normalisation factor
|
||||||
|
|
||||||
|
switch header.NumChannels {
|
||||||
|
case 1:
|
||||||
|
left := make([]float64, sampleCount)
|
||||||
|
for i, s := range int16Buf {
|
||||||
|
left[i] = float64(s) * scale
|
||||||
|
}
|
||||||
|
info.LeftChannelSamples = left
|
||||||
|
|
||||||
|
case 2:
|
||||||
|
frameCount := sampleCount / 2
|
||||||
|
left := make([]float64, frameCount)
|
||||||
|
right := make([]float64, frameCount)
|
||||||
|
for i := 0; i < frameCount; i++ {
|
||||||
|
left[i] = float64(int16Buf[2*i]) * scale
|
||||||
|
right[i] = float64(int16Buf[2*i+1]) * scale
|
||||||
|
}
|
||||||
|
info.LeftChannelSamples = left
|
||||||
|
info.RightChannelSamples = right
|
||||||
|
|
||||||
|
default:
|
||||||
|
return nil, errors.New("unsupported channel count (only mono/stereo)")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compute audio duration in seconds
|
||||||
|
info.Duration = float64(sampleCount) /
|
||||||
|
(float64(header.NumChannels) * float64(header.SampleRate))
|
||||||
|
|
||||||
return info, nil
|
return info, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
22
wasm/build.sh
Executable file
22
wasm/build.sh
Executable file
|
|
@ -0,0 +1,22 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Build script for WASM fingerprint generator
|
||||||
|
|
||||||
|
echo "Building WASM module..."
|
||||||
|
|
||||||
|
export GOOS=js
|
||||||
|
export GOARCH=wasm
|
||||||
|
|
||||||
|
go build -o fingerprint.wasm wasm_main.go
|
||||||
|
|
||||||
|
if [ $? -eq 0 ]; then
|
||||||
|
echo "✓ WASM build successful: fingerprint.wasm"
|
||||||
|
|
||||||
|
cp fingerprint.wasm ../client/public/
|
||||||
|
echo "✓ Copied fingerprint.wasm to client/public/"
|
||||||
|
|
||||||
|
else
|
||||||
|
echo "x WASM build failed"
|
||||||
|
cd ../wasm
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
25
wasm/go.mod
Normal file
25
wasm/go.mod
Normal file
|
|
@ -0,0 +1,25 @@
|
||||||
|
module wasm-fingerprint
|
||||||
|
|
||||||
|
go 1.23.0
|
||||||
|
|
||||||
|
toolchain go1.24.3
|
||||||
|
|
||||||
|
require song-recognition v0.0.0-00010101000000-000000000000
|
||||||
|
|
||||||
|
require (
|
||||||
|
github.com/golang/snappy v0.0.4 // indirect
|
||||||
|
github.com/klauspost/compress v1.17.6 // indirect
|
||||||
|
github.com/mattn/go-sqlite3 v1.14.22 // indirect
|
||||||
|
github.com/mdobak/go-xerrors v0.3.1 // indirect
|
||||||
|
github.com/montanaflynn/stats v0.7.1 // indirect
|
||||||
|
github.com/xdg-go/pbkdf2 v1.0.0 // indirect
|
||||||
|
github.com/xdg-go/scram v1.1.2 // indirect
|
||||||
|
github.com/xdg-go/stringprep v1.0.4 // indirect
|
||||||
|
github.com/youmark/pkcs8 v0.0.0-20201027041543-1326539a0a0a // indirect
|
||||||
|
go.mongodb.org/mongo-driver v1.14.0 // indirect
|
||||||
|
golang.org/x/crypto v0.33.0 // indirect
|
||||||
|
golang.org/x/sync v0.11.0 // indirect
|
||||||
|
golang.org/x/text v0.22.0 // indirect
|
||||||
|
)
|
||||||
|
|
||||||
|
replace song-recognition => ../server
|
||||||
59
wasm/go.sum
Normal file
59
wasm/go.sum
Normal file
|
|
@ -0,0 +1,59 @@
|
||||||
|
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
|
||||||
|
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
|
github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM=
|
||||||
|
github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
|
||||||
|
github.com/google/go-cmp v0.5.2 h1:X2ev0eStA3AbceY54o37/0PQ/UWqKEiiO2dKL5OPaFM=
|
||||||
|
github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
||||||
|
github.com/klauspost/compress v1.17.6 h1:60eq2E/jlfwQXtvZEeBUYADs+BwKBWURIY+Gj2eRGjI=
|
||||||
|
github.com/klauspost/compress v1.17.6/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM=
|
||||||
|
github.com/mattn/go-sqlite3 v1.14.22 h1:2gZY6PC6kBnID23Tichd1K+Z0oS6nE/XwU+Vz/5o4kU=
|
||||||
|
github.com/mattn/go-sqlite3 v1.14.22/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
|
||||||
|
github.com/mdobak/go-xerrors v0.3.1 h1:XfqaLMNN5T4qsHSlLHGJ35f6YlDTVeINSYYeeuK4VpQ=
|
||||||
|
github.com/mdobak/go-xerrors v0.3.1/go.mod h1:nIR+HMAJuj/uNqyp5+MTN6PJ7ymuIJq3UVs9QCgAHbY=
|
||||||
|
github.com/montanaflynn/stats v0.7.1 h1:etflOAAHORrCC44V+aR6Ftzort912ZU+YLiSTuV8eaE=
|
||||||
|
github.com/montanaflynn/stats v0.7.1/go.mod h1:etXPPgVO6n31NxCd9KQUMvCM+ve0ruNzt6R8Bnaayow=
|
||||||
|
github.com/xdg-go/pbkdf2 v1.0.0 h1:Su7DPu48wXMwC3bs7MCNG+z4FhcyEuz5dlvchbq0B0c=
|
||||||
|
github.com/xdg-go/pbkdf2 v1.0.0/go.mod h1:jrpuAogTd400dnrH08LKmI/xc1MbPOebTwRqcT5RDeI=
|
||||||
|
github.com/xdg-go/scram v1.1.2 h1:FHX5I5B4i4hKRVRBCFRxq1iQRej7WO3hhBuJf+UUySY=
|
||||||
|
github.com/xdg-go/scram v1.1.2/go.mod h1:RT/sEzTbU5y00aCK8UOx6R7YryM0iF1N2MOmC3kKLN4=
|
||||||
|
github.com/xdg-go/stringprep v1.0.4 h1:XLI/Ng3O1Atzq0oBs3TWm+5ZVgkq2aqdlvP9JtoZ6c8=
|
||||||
|
github.com/xdg-go/stringprep v1.0.4/go.mod h1:mPGuuIYwz7CmR2bT9j4GbQqutWS1zV24gijq1dTyGkM=
|
||||||
|
github.com/youmark/pkcs8 v0.0.0-20201027041543-1326539a0a0a h1:fZHgsYlfvtyqToslyjUt3VOPF4J7aK/3MPcK7xp3PDk=
|
||||||
|
github.com/youmark/pkcs8 v0.0.0-20201027041543-1326539a0a0a/go.mod h1:ul22v+Nro/R083muKhosV54bj5niojjWZvU8xrevuH4=
|
||||||
|
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
|
||||||
|
go.mongodb.org/mongo-driver v1.14.0 h1:P98w8egYRjYe3XDjxhYJagTokP/H6HzlsnojRgZRd80=
|
||||||
|
go.mongodb.org/mongo-driver v1.14.0/go.mod h1:Vzb0Mk/pa7e6cWw85R4F/endUC3u0U9jGcNU603k65c=
|
||||||
|
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
||||||
|
golang.org/x/crypto v0.0.0-20200302210943-78000ba7a073/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
|
||||||
|
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
|
||||||
|
golang.org/x/crypto v0.33.0 h1:IOBPskki6Lysi0lo9qQvbxiQ+FvsCC/YWOecCHAixus=
|
||||||
|
golang.org/x/crypto v0.33.0/go.mod h1:bVdXmD7IV/4GdElGPozy6U7lWdRXA4qyRVGJV57uQ5M=
|
||||||
|
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
|
||||||
|
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
|
||||||
|
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||||
|
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
|
||||||
|
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
|
||||||
|
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||||
|
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||||
|
golang.org/x/sync v0.11.0 h1:GGz8+XQP4FvTTrjZPzNKTMFtSXH80RAzG+5ghFPgK9w=
|
||||||
|
golang.org/x/sync v0.11.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
|
||||||
|
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||||
|
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||||
|
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||||
|
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
|
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
|
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
|
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
||||||
|
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
|
||||||
|
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||||
|
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||||
|
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
|
||||||
|
golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ=
|
||||||
|
golang.org/x/text v0.22.0 h1:bofq7m3/HAFvbF51jz3Q9wLg3jkvSPuiZu/pD1XwgtM=
|
||||||
|
golang.org/x/text v0.22.0/go.mod h1:YRoo4H8PVmsu+E3Ou7cqLVH8oXWIHVoX0jqUWALQhfY=
|
||||||
|
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||||
|
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
|
||||||
|
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
|
||||||
|
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||||
|
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
|
||||||
|
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||||
111
wasm/wasm_main.go
Normal file
111
wasm/wasm_main.go
Normal file
|
|
@ -0,0 +1,111 @@
|
||||||
|
//go:build js && wasm
|
||||||
|
// +build js,wasm
|
||||||
|
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"song-recognition/models"
|
||||||
|
"song-recognition/shazam"
|
||||||
|
"song-recognition/utils"
|
||||||
|
"syscall/js"
|
||||||
|
)
|
||||||
|
|
||||||
|
// generateFingerprint takes audio data from the frontend and generates fingerprints
|
||||||
|
// Arguments: [audioArray, sampleRate, channels]
|
||||||
|
// Returns: { error: number, data: fingerprintArray or error message }
|
||||||
|
func generateFingerprint(this js.Value, args []js.Value) interface{} {
|
||||||
|
if len(args) < 3 {
|
||||||
|
return js.ValueOf(map[string]interface{}{
|
||||||
|
"error": 1,
|
||||||
|
"data": "Expected audio array, sample rate, and number of channels",
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
if args[0].Type() != js.TypeObject || args[1].Type() != js.TypeNumber {
|
||||||
|
return js.ValueOf(map[string]interface{}{
|
||||||
|
"error": 2,
|
||||||
|
"data": "Invalid argument types; Expected audio array and samplerate (type: int)",
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
channels := args[2].Int()
|
||||||
|
if args[2].Type() != js.TypeNumber || (channels != 1 && channels != 2) {
|
||||||
|
return js.ValueOf(map[string]interface{}{
|
||||||
|
"error": 2,
|
||||||
|
"data": "Invalid number of channels; expected 1 or 2",
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
inputArray := args[0]
|
||||||
|
sampleRate := args[1].Int()
|
||||||
|
|
||||||
|
audioData := make([]float64, inputArray.Length())
|
||||||
|
for i := 0; i < inputArray.Length(); i++ {
|
||||||
|
audioData[i] = inputArray.Index(i).Float()
|
||||||
|
}
|
||||||
|
|
||||||
|
fingerprint := make(map[uint32]models.Couple)
|
||||||
|
var leftChannel, rightChannel []float64
|
||||||
|
|
||||||
|
if channels == 1 {
|
||||||
|
leftChannel = audioData
|
||||||
|
spectrogram, err := shazam.Spectrogram(audioData, sampleRate)
|
||||||
|
if err != nil {
|
||||||
|
return js.ValueOf(map[string]interface{}{
|
||||||
|
"error": 3,
|
||||||
|
"data": "Error generating spectrogram: " + err.Error(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
peaks := shazam.ExtractPeaks(spectrogram, float64(len(audioData))/float64(sampleRate), sampleRate)
|
||||||
|
fingerprint = shazam.Fingerprint(peaks, utils.GenerateUniqueID())
|
||||||
|
} else {
|
||||||
|
for i := 0; i < len(audioData); i += 2 {
|
||||||
|
leftChannel = append(leftChannel, audioData[i])
|
||||||
|
rightChannel = append(rightChannel, audioData[i+1])
|
||||||
|
}
|
||||||
|
|
||||||
|
// LEFT
|
||||||
|
spectrogram, err := shazam.Spectrogram(leftChannel, sampleRate)
|
||||||
|
if err != nil {
|
||||||
|
return js.ValueOf(map[string]interface{}{
|
||||||
|
"error": 3,
|
||||||
|
"data": "Error generating spectrogram: " + err.Error(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
peaks := shazam.ExtractPeaks(spectrogram, float64(len(leftChannel))/float64(sampleRate), sampleRate)
|
||||||
|
utils.ExtendMap(fingerprint, shazam.Fingerprint(peaks, utils.GenerateUniqueID()))
|
||||||
|
|
||||||
|
// RIGHT
|
||||||
|
spectrogram, err = shazam.Spectrogram(rightChannel, sampleRate)
|
||||||
|
if err != nil {
|
||||||
|
return js.ValueOf(map[string]interface{}{
|
||||||
|
"error": 3,
|
||||||
|
"data": "Error generating spectrogram: " + err.Error(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
peaks = shazam.ExtractPeaks(spectrogram, float64(len(rightChannel))/float64(sampleRate), sampleRate)
|
||||||
|
utils.ExtendMap(fingerprint, shazam.Fingerprint(peaks, utils.GenerateUniqueID()))
|
||||||
|
}
|
||||||
|
|
||||||
|
fingerprintArray := []interface{}{}
|
||||||
|
for address, couple := range fingerprint {
|
||||||
|
entry := map[string]interface{}{
|
||||||
|
"address": address,
|
||||||
|
"anchorTime": couple.AnchorTimeMs,
|
||||||
|
}
|
||||||
|
fingerprintArray = append(fingerprintArray, entry)
|
||||||
|
}
|
||||||
|
|
||||||
|
return js.ValueOf(map[string]interface{}{
|
||||||
|
"error": 0,
|
||||||
|
"data": fingerprintArray,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
js.Global().Set("generateFingerprint", js.FuncOf(generateFingerprint))
|
||||||
|
|
||||||
|
js.Global().Call("dispatchEvent", js.Global().Get("Event").New("wasmReady"))
|
||||||
|
|
||||||
|
select {}
|
||||||
|
}
|
||||||
Loading…
Add table
Reference in a new issue