mirror of
https://github.com/cgzirim/seek-tune.git
synced 2025-12-16 16:34:21 +00:00
Merge pull request #44 from cgzirim/development
Fix Critical Audio Fingerprinting Algorithm Bugs
This commit is contained in:
commit
e825099e17
26 changed files with 831 additions and 360 deletions
106
.dockerignore
106
.dockerignore
|
|
@ -1,28 +1,88 @@
|
|||
# Binaries for programs and plugins
|
||||
*.exe
|
||||
*.ogg
|
||||
*.m4a
|
||||
*.zip
|
||||
*.exe~
|
||||
*.dll
|
||||
*.so
|
||||
*.dylib
|
||||
# Git
|
||||
.git
|
||||
.gitignore
|
||||
.gitattributes
|
||||
|
||||
# Test binary, built with `go test -c`
|
||||
*.test
|
||||
# Documentation
|
||||
*.md
|
||||
!README.md
|
||||
LICENSE
|
||||
|
||||
# Output of the go coverage tool, specifically when used with LiteIDE
|
||||
*.out
|
||||
|
||||
# Dependency directories (remove the comment below to include it)
|
||||
# vendor/
|
||||
|
||||
# Go workspace file
|
||||
go.work
|
||||
**/songs
|
||||
# IDE
|
||||
.vscode
|
||||
.idea
|
||||
*.swp
|
||||
*.swo
|
||||
*~
|
||||
|
||||
package-lock.json
|
||||
# OS
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
|
||||
*sqlite3
|
||||
.env
|
||||
# Node
|
||||
client/node_modules
|
||||
client/.env.local
|
||||
client/.env.development.local
|
||||
client/.env.test.local
|
||||
client/.env.production.local
|
||||
client/build
|
||||
client/coverage
|
||||
client/npm-debug.log*
|
||||
client/yarn-debug.log*
|
||||
client/yarn-error.log*
|
||||
|
||||
# Go
|
||||
server/seek-tune
|
||||
server/*.exe
|
||||
server/*.test
|
||||
server/*.out
|
||||
server/vendor/
|
||||
|
||||
# Application data (don't copy into image)
|
||||
server/songs/**
|
||||
server/recordings/**
|
||||
server/snippets/**
|
||||
server/tmp/**
|
||||
server/db/*.sqlite3
|
||||
server/db/*.db
|
||||
|
||||
# Audio files
|
||||
*.mp3
|
||||
*.wav
|
||||
*.m4a
|
||||
*.ogg
|
||||
*.flac
|
||||
*.aac
|
||||
|
||||
# Archives
|
||||
*.zip
|
||||
*.tar
|
||||
*.gz
|
||||
*.rar
|
||||
|
||||
# Environment
|
||||
.env
|
||||
.env.*
|
||||
!.env.example
|
||||
|
||||
# CI/CD
|
||||
.github
|
||||
.gitlab-ci.yml
|
||||
.travis.yml
|
||||
|
||||
# Docker
|
||||
docker-compose*.yml
|
||||
!docker-compose.yml
|
||||
Dockerfile*
|
||||
!Dockerfile
|
||||
|
||||
# WASM (already built separately if needed)
|
||||
wasm/fingerprint.wasm
|
||||
wasm/go.sum
|
||||
|
||||
# Scripts
|
||||
scripts/
|
||||
appspec.yml
|
||||
|
||||
# Logs
|
||||
*.log
|
||||
|
|
@ -1,9 +0,0 @@
|
|||
DB_TYPE=mongo
|
||||
DB_USER=user
|
||||
DB_PASS=password
|
||||
DB_NAME=seek-tune
|
||||
DB_HOST=192.168.0.1
|
||||
DB_PORT=27017
|
||||
REACT_APP_BACKEND_URL=http://localhost:5000
|
||||
SPOTIFY_CLIENT_ID=yourclientid
|
||||
SPOTIFY_CLIENT_SECRET=yoursecret
|
||||
4
.gitignore
vendored
4
.gitignore
vendored
|
|
@ -28,4 +28,6 @@ go.work
|
|||
|
||||
package-lock.json
|
||||
*sqlite3
|
||||
.env
|
||||
.env
|
||||
|
||||
token.json
|
||||
56
Dockerfile
56
Dockerfile
|
|
@ -1,33 +1,63 @@
|
|||
# build react
|
||||
# Build React frontend
|
||||
FROM node:20-alpine AS build_react_stage
|
||||
|
||||
RUN mkdir -p /home/react
|
||||
WORKDIR /home/react
|
||||
WORKDIR /app/client
|
||||
|
||||
COPY client/package.json ./
|
||||
RUN npm install
|
||||
COPY client/package*.json ./
|
||||
RUN npm ci --only=production && npm cache clean --force
|
||||
|
||||
COPY client/ ./
|
||||
ARG REACT_APP_BACKEND_URL
|
||||
ENV REACT_APP_BACKEND_URL=${REACT_APP_BACKEND_URL}
|
||||
RUN npm run build
|
||||
|
||||
# build go
|
||||
FROM golang:1.21.6
|
||||
# Build Go backend
|
||||
FROM golang:1.24-alpine AS build_go_stage
|
||||
|
||||
WORKDIR /home/seek-tune
|
||||
RUN apk add --no-cache git ca-certificates tzdata gcc musl-dev
|
||||
|
||||
WORKDIR /app/server
|
||||
|
||||
COPY server/go.mod server/go.sum ./
|
||||
RUN go mod download
|
||||
RUN go mod download && go mod verify
|
||||
|
||||
COPY server/ ./
|
||||
ENV ENV=production
|
||||
RUN go build -ldflags="-w -s" -o seek-tune
|
||||
|
||||
# Final runtime image
|
||||
FROM alpine:latest
|
||||
|
||||
# Install runtime dependencies
|
||||
RUN apk add --no-cache \
|
||||
ca-certificates \
|
||||
tzdata \
|
||||
ffmpeg \
|
||||
python3 \
|
||||
py3-pip \
|
||||
&& pip3 install --no-cache-dir yt-dlp --break-system-packages
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY --from=build_go_stage /app/server/seek-tune .
|
||||
|
||||
RUN mkdir -p static
|
||||
COPY --from=build_react_stage /home/react/build static
|
||||
COPY --from=build_react_stage /app/client/build ./static
|
||||
|
||||
RUN go build -o seek-tune
|
||||
RUN mkdir -p db songs recordings snippets tmp && \
|
||||
chmod -R 755 db songs recordings snippets tmp
|
||||
|
||||
ENV ENV=production
|
||||
|
||||
EXPOSE 5000
|
||||
|
||||
CMD [ "/home/seek-tune/seek-tune", "serve" ]
|
||||
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
|
||||
CMD wget --no-verbose --tries=1 --spider http://localhost:5000/ || exit 1
|
||||
|
||||
# Run as non-root user for security
|
||||
RUN addgroup -g 1001 -S appuser && \
|
||||
adduser -u 1001 -S appuser -G appuser && \
|
||||
chown -R appuser:appuser /app
|
||||
|
||||
USER appuser
|
||||
|
||||
CMD ["./seek-tune", "serve", "http", "5000"]
|
||||
41
README.md
41
README.md
|
|
@ -8,21 +8,17 @@
|
|||
</a>
|
||||
</p>
|
||||
|
||||
<p align="center"><a href="https://drive.google.com/file/d/1I2esH2U4DtXHsNgYbUi4OL-ukV5i_1PI/view" target="_blank">Demo in Video</a></p>
|
||||
<p align="center"><a href="https://drive.google.com/file/d/1I2esH2U4DtXHsNgYbUi4OL-ukV5i_1PI/view" target="_blank">Demo in Video</a> | <a href="https://www.youtube.com/watch?v=a0CVCcb0RJM" target="_blank">How it was made (YouTube)</a></p>
|
||||
|
||||
## Description 🎼
|
||||
SeekTune is an implementation of Shazam's song recognition algorithm based on insights from these [resources](#resources--card_file_box). It integrates Spotify and YouTube APIs to find and download songs.
|
||||
|
||||
[//]: # (## Current Limitations
|
||||
While the algorithm works excellently in matching a song with its exact file, it doesn't always find the right match from a recording. However, this project is still a work in progress. I'm hopeful about making it work, but I could definitely use some help :slightly_smiling_face:.
|
||||
Additionally, it currently only supports song files in WAV format.
|
||||
)
|
||||
|
||||
## Installation :desktop_computer:
|
||||
### Prerequisites
|
||||
- Golang: [Install Golang](https://golang.org/dl/)
|
||||
- FFmpeg: [Install FFmpeg](https://ffmpeg.org/download.html)
|
||||
- NPM: To run the client (frontend).
|
||||
- NPM: [Install Node](https://nodejs.org/en/download)
|
||||
- YT-DLP: [Install YT-DLP](https://github.com/yt-dlp/yt-dlp/wiki/Installation)
|
||||
|
||||
### Steps
|
||||
📦 Clone the repository:
|
||||
|
|
@ -42,27 +38,17 @@ Prerequisites: [Docker](https://docs.docker.com/get-docker/) and [Docker Compose
|
|||
docker-compose down
|
||||
```
|
||||
|
||||
#### 🎧 Spotify API
|
||||
#### 🎧 Spotify API Setup
|
||||
|
||||
To access Spotify metadata, the project now uses the official [Spotify Web API](https://developer.spotify.com/documentation/web-api/). This requires creating a developer application and retrieving a client ID and client secret.
|
||||
1. Get credentials: Follow the [official getting started guide](https://developer.spotify.com/documentation/web-api/tutorials/getting-started) to create a Spotify app and obtain your **Client ID** and **Client Secret**.
|
||||
|
||||
Follow the [official getting started guide](https://developer.spotify.com/documentation/web-api/tutorials/getting-started#request-an-access-token) to:
|
||||
2. Configure: Create a `.env` file in the `server` directory:
|
||||
```bash
|
||||
SPOTIFY_CLIENT_ID=your-client-id
|
||||
SPOTIFY_CLIENT_SECRET=your-client-secret
|
||||
```
|
||||
|
||||
1. Create a Spotify developer app.
|
||||
2. Copy your **Client ID** and **Client Secret**.
|
||||
|
||||
##### Setting up Credentials
|
||||
Instead of using a credentials.json file, the application now reads these values from environment variables.
|
||||
|
||||
Create a .env file in the server directory with the following content:
|
||||
|
||||
```
|
||||
SPOTIFY_CLIENT_ID=your-client-id
|
||||
SPOTIFY_CLIENT_SECRET=your-client-secret
|
||||
```
|
||||
|
||||
Make sure this .env file is loaded into your environment before running the server.
|
||||
The application will automatically read this file to fetch and cache access tokens. If the token is expired or missing, a new one will be requested.
|
||||
The app will automatically fetch and cache access tokens as needed.
|
||||
|
||||
#### 💻 Set Up Natively
|
||||
Install dependencies for the backend
|
||||
|
|
@ -109,7 +95,12 @@ go run *.go find <path-to-wav-file>
|
|||
```
|
||||
#### ▸ Delete fingerprints and songs 🗑️
|
||||
```
|
||||
# Delete only database (default)
|
||||
go run *.go erase
|
||||
go run *.go erase db
|
||||
|
||||
# Delete both database and song files
|
||||
go run *.go erase all
|
||||
```
|
||||
|
||||
## Example :film_projector:
|
||||
|
|
|
|||
1
client/.env.example
Normal file
1
client/.env.example
Normal file
|
|
@ -0,0 +1 @@
|
|||
REACT_APP_BACKEND_URL=http://localhost:5000
|
||||
BIN
client/public/fingerprint.wasm
Executable file
BIN
client/public/fingerprint.wasm
Executable file
Binary file not shown.
Binary file not shown.
|
|
@ -15,7 +15,8 @@ import { fetchFile } from '@ffmpeg/util';
|
|||
|
||||
import AnimatedNumber from "./components/AnimatedNumber";
|
||||
|
||||
const server = process.env.REACT_APP_BACKEND_URL || "http://localhost:5000";
|
||||
const server = process.env.REACT_APP_BACKEND_URL || "http://localhost:5500";
|
||||
const recordStereo = process.env.REACT_APP_RECORD_STEREO === "true" || false;
|
||||
// https://seek-tune-rq4gn.ondigitalocean.app/
|
||||
|
||||
var socket = io(server);
|
||||
|
|
@ -91,7 +92,7 @@ function App() {
|
|||
try {
|
||||
const go = new window.Go();
|
||||
const result = await WebAssembly.instantiateStreaming(
|
||||
fetch("/main.wasm"),
|
||||
fetch("/fingerprint.wasm"),
|
||||
go.importObject
|
||||
);
|
||||
go.run(result.instance);
|
||||
|
|
@ -175,15 +176,15 @@ function App() {
|
|||
cleanUp();
|
||||
|
||||
const inputFile = 'input.wav';
|
||||
const outputFile = 'output_mono.wav';
|
||||
const outputFile = 'output_formatted.wav';
|
||||
|
||||
// Convert audio to mono with a sample rate of 44100 Hz
|
||||
await ffmpeg.writeFile(inputFile, await fetchFile(blob))
|
||||
const exitCode = await ffmpeg.exec([
|
||||
'-i', inputFile,
|
||||
'-c', 'pcm_s16le',
|
||||
'-ar', '44100',
|
||||
'-ac', '1',
|
||||
'-ac', recordStereo ? '2' : '1',
|
||||
'-acodec', 'pcm_s16le',
|
||||
'-f', 'wav',
|
||||
outputFile
|
||||
]);
|
||||
|
|
@ -191,11 +192,11 @@ function App() {
|
|||
throw new Error(`FFmpeg exec failed with exit code: ${exitCode}`);
|
||||
}
|
||||
|
||||
const monoData = await ffmpeg.readFile(outputFile);
|
||||
const monoBlob = new Blob([monoData.buffer], { type: 'audio/wav' });
|
||||
const audioData = await ffmpeg.readFile(outputFile);
|
||||
const audioBlob = new Blob([audioData.buffer], { type: 'audio/wav' });
|
||||
|
||||
const reader = new FileReader();
|
||||
reader.readAsArrayBuffer(monoBlob);
|
||||
reader.readAsArrayBuffer(audioBlob);
|
||||
reader.onload = async (event) => {
|
||||
const arrayBuffer = event.target.result;
|
||||
const audioContext = new AudioContext();
|
||||
|
|
@ -205,7 +206,11 @@ function App() {
|
|||
const audioData = audioBufferDecoded.getChannelData(0);
|
||||
const audioArray = Array.from(audioData);
|
||||
|
||||
const result = genFingerprint(audioArray, audioBufferDecoded.sampleRate);
|
||||
const result = genFingerprint(
|
||||
audioArray,
|
||||
audioBufferDecoded.sampleRate,
|
||||
audioBufferDecoded.numberOfChannels
|
||||
);
|
||||
if (result.error !== 0) {
|
||||
toast["error"](() => <div>An error occured</div>)
|
||||
console.log("An error occured: ", result)
|
||||
|
|
@ -288,7 +293,7 @@ function App() {
|
|||
return (
|
||||
<div className="App">
|
||||
<div className="TopHeader">
|
||||
<h2 style={{ color: "#374151" }}>!Shazam</h2>
|
||||
<h2 style={{ color: "#374151" }}>SeekTune</h2>
|
||||
<h4 style={{ display: "flex", justifyContent: "flex-end" }}>
|
||||
<AnimatedNumber includeComma={true} animateToNumber={totalSongs} />
|
||||
Songs
|
||||
|
|
|
|||
|
|
@ -1,15 +1,21 @@
|
|||
version: '3.1'
|
||||
version: '3.8'
|
||||
|
||||
volumes:
|
||||
seek-tune-db:
|
||||
driver: local
|
||||
seek-tune-songs:
|
||||
driver: local
|
||||
seek-tune-recordings:
|
||||
driver: local
|
||||
|
||||
services:
|
||||
seek-tune:
|
||||
image: 'seek-tune'
|
||||
image: seek-tune:latest
|
||||
container_name: seek-tune-app
|
||||
restart: unless-stopped
|
||||
|
||||
ports:
|
||||
- 8080:5000
|
||||
- "${HOST_PORT:-8080}:5000"
|
||||
|
||||
environment:
|
||||
DB_TYPE: ${DB_TYPE:-sqlite}
|
||||
|
|
@ -18,14 +24,67 @@ services:
|
|||
DB_NAME: ${DB_NAME:-seek_tune_db}
|
||||
DB_HOST: ${DB_HOST:-localhost}
|
||||
DB_PORT: ${DB_PORT:-27017}
|
||||
|
||||
|
||||
ENV: production
|
||||
REACT_APP_BACKEND_URL: ${REACT_APP_BACKEND_URL:-http://localhost:8080}
|
||||
|
||||
SPOTIFY_CLIENT_ID: ${SPOTIFY_CLIENT_ID:-}
|
||||
SPOTIFY_CLIENT_SECRET: ${SPOTIFY_CLIENT_SECRET:-}
|
||||
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile
|
||||
args:
|
||||
REACT_APP_BACKEND_URL: ${REACT_APP_BACKEND_URL:-http://localhost:8080}
|
||||
cache_from:
|
||||
- seek-tune:latest
|
||||
|
||||
volumes:
|
||||
- seek-tune-db:/home/seek-tune/db
|
||||
- seek-tune-songs:/home/seek-tune/songs
|
||||
- seek-tune-db:/app/db
|
||||
- seek-tune-songs:/app/songs
|
||||
- seek-tune-recordings:/app/recordings
|
||||
# Optional: Mount local songs directory for development
|
||||
# - ./server/songs:/app/songs
|
||||
|
||||
# Resource limits (adjust based on your needs)
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: '2.0'
|
||||
memory: 2G
|
||||
reservations:
|
||||
cpus: '0.5'
|
||||
memory: 512M
|
||||
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:5000/"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 10s
|
||||
|
||||
logging:
|
||||
driver: "json-file"
|
||||
options:
|
||||
max-size: "10m"
|
||||
max-file: "3"
|
||||
|
||||
# Optional: MongoDB service (if using MongoDB instead of SQLite)
|
||||
# mongodb:
|
||||
# image: mongo:7
|
||||
# container_name: seek-tune-mongo
|
||||
# restart: unless-stopped
|
||||
# environment:
|
||||
# MONGO_INITDB_ROOT_USERNAME: ${DB_USER:-root}
|
||||
# MONGO_INITDB_ROOT_PASSWORD: ${DB_PASSWORD:-password}
|
||||
# MONGO_INITDB_DATABASE: ${DB_NAME:-seek_tune_db}
|
||||
# ports:
|
||||
# - "27017:27017"
|
||||
# volumes:
|
||||
# - seek-tune-db:/data/db
|
||||
# healthcheck:
|
||||
# test: echo 'db.runCommand("ping").ok' | mongosh localhost:27017/test --quiet
|
||||
# interval: 10s
|
||||
# timeout: 10s
|
||||
# retries: 5
|
||||
# start_period: 40s
|
||||
|
|
|
|||
14
server/.env.example
Normal file
14
server/.env.example
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
DB_TYPE=mongo # or sqlite
|
||||
DB_USER=user
|
||||
DB_PASS=password
|
||||
DB_NAME=seek-tune
|
||||
DB_HOST=192.168.0.1
|
||||
DB_PORT=27017
|
||||
|
||||
# Set to true to enable stereo fingerprinting (uses more storage but may improve accuracy)
|
||||
FINGERPRINT_STEREO=false
|
||||
|
||||
SPOTIFY_CLIENT_ID=yourclientid
|
||||
SPOTIFY_CLIENT_SECRET=yoursecret
|
||||
|
||||
|
||||
|
|
@ -10,6 +10,7 @@ import (
|
|||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"song-recognition/db"
|
||||
"song-recognition/shazam"
|
||||
"song-recognition/spotify"
|
||||
|
|
@ -34,19 +35,24 @@ const (
|
|||
var yellow = color.New(color.FgYellow)
|
||||
|
||||
func find(filePath string) {
|
||||
wavInfo, err := wav.ReadWavInfo(filePath)
|
||||
wavFilePath, err := wav.ConvertToWAV(filePath)
|
||||
if err != nil {
|
||||
yellow.Println("Error reading wave info:", err)
|
||||
yellow.Println("Error converting to WAV:", err)
|
||||
return
|
||||
}
|
||||
|
||||
samples, err := wav.WavBytesToSamples(wavInfo.Data)
|
||||
fingerprint, err := shazam.FingerprintAudio(wavFilePath, utils.GenerateUniqueID())
|
||||
if err != nil {
|
||||
yellow.Println("Error converting to samples:", err)
|
||||
yellow.Println("Error generating fingerprint for sample: ", err)
|
||||
return
|
||||
}
|
||||
|
||||
matches, searchDuration, err := shazam.FindMatches(samples, wavInfo.Duration, wavInfo.SampleRate)
|
||||
sampleFingerprint := make(map[uint32]uint32)
|
||||
for address, couple := range fingerprint {
|
||||
sampleFingerprint[address] = couple.AnchorTimeMs
|
||||
}
|
||||
|
||||
matches, searchDuration, err := shazam.FindMatchesFGP(sampleFingerprint)
|
||||
if err != nil {
|
||||
yellow.Println("Error finding matches:", err)
|
||||
return
|
||||
|
|
@ -193,7 +199,7 @@ func serveHTTP(socketServer *socketio.Server, serveHTTPS bool, port string) {
|
|||
}
|
||||
}
|
||||
|
||||
func erase(songsDir string) {
|
||||
func erase(songsDir string, dbOnly bool, all bool) {
|
||||
logger := utils.GetLogger()
|
||||
ctx := context.Background()
|
||||
|
||||
|
|
@ -216,26 +222,31 @@ func erase(songsDir string) {
|
|||
logger.ErrorContext(ctx, msg, slog.Any("error", err))
|
||||
}
|
||||
|
||||
// delete song files
|
||||
err = filepath.Walk(songsDir, func(path string, info os.FileInfo, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
fmt.Println("Database cleared")
|
||||
|
||||
if !info.IsDir() {
|
||||
ext := filepath.Ext(path)
|
||||
if ext == ".wav" || ext == ".m4a" {
|
||||
err := os.Remove(path)
|
||||
if err != nil {
|
||||
return err
|
||||
// delete song files only if -all flag is set
|
||||
if all {
|
||||
err = filepath.Walk(songsDir, func(path string, info os.FileInfo, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if !info.IsDir() {
|
||||
ext := filepath.Ext(path)
|
||||
if ext == ".wav" || ext == ".m4a" {
|
||||
err := os.Remove(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
msg := fmt.Sprintf("Error walking through directory %s: %v\n", songsDir, err)
|
||||
logger.ErrorContext(ctx, msg, slog.Any("error", err))
|
||||
}
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
msg := fmt.Sprintf("Error walking through directory %s: %v\n", songsDir, err)
|
||||
logger.ErrorContext(ctx, msg, slog.Any("error", err))
|
||||
fmt.Println("Songs folder cleared")
|
||||
}
|
||||
|
||||
fmt.Println("Erase complete")
|
||||
|
|
@ -249,6 +260,7 @@ func save(path string, force bool) {
|
|||
}
|
||||
|
||||
if fileInfo.IsDir() {
|
||||
var filePaths []string
|
||||
err := filepath.Walk(path, func(filePath string, info os.FileInfo, err error) error {
|
||||
if err != nil {
|
||||
fmt.Printf("Error walking the path %v: %v\n", filePath, err)
|
||||
|
|
@ -256,16 +268,16 @@ func save(path string, force bool) {
|
|||
}
|
||||
// Process only files, skip directories
|
||||
if !info.IsDir() {
|
||||
err := saveSong(filePath, force)
|
||||
if err != nil {
|
||||
fmt.Printf("Error saving song (%v): %v\n", filePath, err)
|
||||
}
|
||||
filePaths = append(filePaths, filePath)
|
||||
}
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
fmt.Printf("Error walking the directory %v: %v\n", path, err)
|
||||
return
|
||||
}
|
||||
|
||||
processFilesConCurrently(filePaths, force)
|
||||
} else {
|
||||
err := saveSong(path, force)
|
||||
if err != nil {
|
||||
|
|
@ -274,6 +286,50 @@ func save(path string, force bool) {
|
|||
}
|
||||
}
|
||||
|
||||
func processFilesConCurrently(filePaths []string, force bool) {
|
||||
maxWorkers := runtime.NumCPU() / 2
|
||||
numFiles := len(filePaths)
|
||||
|
||||
if numFiles == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
if numFiles < maxWorkers {
|
||||
maxWorkers = numFiles
|
||||
}
|
||||
|
||||
jobs := make(chan string, numFiles)
|
||||
results := make(chan error, numFiles)
|
||||
|
||||
for w := 0; w < maxWorkers; w++ {
|
||||
go func(workerID int) {
|
||||
for filePath := range jobs {
|
||||
err := saveSong(filePath, force)
|
||||
results <- err
|
||||
}
|
||||
}(w + 1)
|
||||
}
|
||||
|
||||
for _, filePath := range filePaths {
|
||||
jobs <- filePath
|
||||
}
|
||||
close(jobs)
|
||||
|
||||
successCount := 0
|
||||
errorCount := 0
|
||||
for i := 0; i < numFiles; i++ {
|
||||
err := <-results
|
||||
if err != nil {
|
||||
fmt.Printf("Error: %v\n", err)
|
||||
errorCount++
|
||||
} else {
|
||||
successCount++
|
||||
}
|
||||
}
|
||||
|
||||
fmt.Printf("\n ->> Processed %d files: %d successful, %d failed\n", numFiles, successCount, errorCount)
|
||||
}
|
||||
|
||||
func saveSong(filePath string, force bool) error {
|
||||
metadata, err := wav.GetMetadata(filePath)
|
||||
if err != nil {
|
||||
|
|
|
|||
|
|
@ -8,8 +8,8 @@ import (
|
|||
"os"
|
||||
"song-recognition/utils"
|
||||
|
||||
"github.com/mdobak/go-xerrors"
|
||||
"github.com/joho/godotenv"
|
||||
"github.com/mdobak/go-xerrors"
|
||||
)
|
||||
|
||||
func main() {
|
||||
|
|
@ -32,10 +32,16 @@ func main() {
|
|||
|
||||
if len(os.Args) < 2 {
|
||||
fmt.Println("Expected 'find', 'download', 'erase', 'save', or 'serve' subcommands")
|
||||
fmt.Println("\nUsage examples:")
|
||||
fmt.Println(" find <path_to_wav_file>")
|
||||
fmt.Println(" download <spotify_url>")
|
||||
fmt.Println(" erase [db | all] (default: db)")
|
||||
fmt.Println(" save [-f|--force] <path_to_file_or_dir>")
|
||||
fmt.Println(" serve [-proto <http|https>] [-p <port>]")
|
||||
os.Exit(1)
|
||||
}
|
||||
_ = godotenv.Load()
|
||||
|
||||
|
||||
switch os.Args[1] {
|
||||
case "find":
|
||||
if len(os.Args) < 3 {
|
||||
|
|
@ -58,7 +64,28 @@ func main() {
|
|||
serveCmd.Parse(os.Args[2:])
|
||||
serve(*protocol, *port)
|
||||
case "erase":
|
||||
erase(SONGS_DIR)
|
||||
// Default is to clear only database (db mode)
|
||||
dbOnly := true
|
||||
all := false
|
||||
|
||||
if len(os.Args) > 2 {
|
||||
subCmd := os.Args[2]
|
||||
switch subCmd {
|
||||
case "db":
|
||||
dbOnly = true
|
||||
all = false
|
||||
case "all":
|
||||
dbOnly = false
|
||||
all = true
|
||||
default:
|
||||
fmt.Println("Usage: main.go erase [db | all]")
|
||||
fmt.Println(" db : only clear the database (default)")
|
||||
fmt.Println(" all : clear database and songs folder")
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
erase(SONGS_DIR, dbOnly, all)
|
||||
case "save":
|
||||
indexCmd := flag.NewFlagSet("save", flag.ExitOnError)
|
||||
force := indexCmd.Bool("force", false, "save song with or without YouTube ID")
|
||||
|
|
@ -72,6 +99,12 @@ func main() {
|
|||
save(filePath, *force)
|
||||
default:
|
||||
fmt.Println("Expected 'find', 'download', 'erase', 'save', or 'serve' subcommands")
|
||||
fmt.Println("\nUsage examples:")
|
||||
fmt.Println(" find <path_to_wav_file>")
|
||||
fmt.Println(" download <spotify_url>")
|
||||
fmt.Println(" erase [db | all] (default: db)")
|
||||
fmt.Println(" save [-f|--force] <path_to_file_or_dir>")
|
||||
fmt.Println(" serve [-proto <http|https>] [-p <port>]")
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,7 +1,10 @@
|
|||
package shazam
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"song-recognition/models"
|
||||
"song-recognition/utils"
|
||||
"song-recognition/wav"
|
||||
)
|
||||
|
||||
const (
|
||||
|
|
@ -23,7 +26,10 @@ func Fingerprint(peaks []Peak, songID uint32) map[uint32]models.Couple {
|
|||
address := createAddress(anchor, target)
|
||||
anchorTimeMs := uint32(anchor.Time * 1000)
|
||||
|
||||
fingerprints[address] = models.Couple{anchorTimeMs, songID}
|
||||
fingerprints[address] = models.Couple{
|
||||
AnchorTimeMs: anchorTimeMs,
|
||||
SongID: songID,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -35,12 +41,52 @@ func Fingerprint(peaks []Peak, songID uint32) map[uint32]models.Couple {
|
|||
// the anchor and target points, and other bits represent the time difference (delta time)
|
||||
// between them. This function combines these components into a single address (a hash).
|
||||
func createAddress(anchor, target Peak) uint32 {
|
||||
anchorFreq := int(real(anchor.Freq))
|
||||
targetFreq := int(real(target.Freq))
|
||||
deltaMs := uint32((target.Time - anchor.Time) * 1000)
|
||||
anchorFreqBin := uint32(anchor.Freq / 10) // Scale down to fit in 9 bits
|
||||
targetFreqBin := uint32(target.Freq / 10)
|
||||
|
||||
// Combine the frequency of the anchor, target, and delta time into a 32-bit address
|
||||
address := uint32(anchorFreq<<23) | uint32(targetFreq<<14) | deltaMs
|
||||
deltaMsRaw := uint32((target.Time - anchor.Time) * 1000)
|
||||
|
||||
// Mask to fit within bit constraints
|
||||
anchorFreqBits := anchorFreqBin & ((1 << maxFreqBits) - 1) // 9 bits
|
||||
targetFreqBits := targetFreqBin & ((1 << maxFreqBits) - 1) // 9 bits
|
||||
deltaBits := deltaMsRaw & ((1 << maxDeltaBits) - 1) // 14 bits (max ~16 seconds)
|
||||
|
||||
// Combine into 32-bit address
|
||||
address := (anchorFreqBits << 23) | (targetFreqBits << 14) | deltaBits
|
||||
|
||||
return address
|
||||
}
|
||||
|
||||
func FingerprintAudio(songFilePath string, songID uint32) (map[uint32]models.Couple, error) {
|
||||
wavFilePath, err := wav.ConvertToWAV(songFilePath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error converting input file to WAV: %v", err)
|
||||
}
|
||||
|
||||
wavInfo, err := wav.ReadWavInfo(wavFilePath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error reading WAV info: %v", err)
|
||||
}
|
||||
|
||||
fingerprint := make(map[uint32]models.Couple)
|
||||
|
||||
spectro, err := Spectrogram(wavInfo.LeftChannelSamples, wavInfo.SampleRate)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error creating spectrogram: %v", err)
|
||||
}
|
||||
|
||||
peaks := ExtractPeaks(spectro, wavInfo.Duration, wavInfo.SampleRate)
|
||||
utils.ExtendMap(fingerprint, Fingerprint(peaks, songID))
|
||||
|
||||
if wavInfo.Channels == 2 {
|
||||
spectro, err = Spectrogram(wavInfo.RightChannelSamples, wavInfo.SampleRate)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error creating spectrogram for right channel: %v", err)
|
||||
}
|
||||
|
||||
peaks = ExtractPeaks(spectro, wavInfo.Duration, wavInfo.SampleRate)
|
||||
utils.ExtendMap(fingerprint, Fingerprint(peaks, songID))
|
||||
}
|
||||
|
||||
return fingerprint, nil
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@ package shazam
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
"song-recognition/db"
|
||||
"song-recognition/utils"
|
||||
"sort"
|
||||
|
|
@ -30,7 +29,8 @@ func FindMatches(audioSample []float64, audioDuration float64, sampleRate int) (
|
|||
return nil, time.Since(startTime), fmt.Errorf("failed to get spectrogram of samples: %v", err)
|
||||
}
|
||||
|
||||
peaks := ExtractPeaks(spectrogram, audioDuration)
|
||||
peaks := ExtractPeaks(spectrogram, audioDuration, sampleRate)
|
||||
// peaks := ExtractPeaksLMX(spectrogram, true)
|
||||
sampleFingerprint := Fingerprint(peaks, utils.GenerateUniqueID())
|
||||
|
||||
sampleFingerprintMap := make(map[uint32]uint32)
|
||||
|
|
@ -38,7 +38,7 @@ func FindMatches(audioSample []float64, audioDuration float64, sampleRate int) (
|
|||
sampleFingerprintMap[address] = couple.AnchorTimeMs
|
||||
}
|
||||
|
||||
matches, _, err := FindMatchesFGP(sampleFingerprintMap)
|
||||
matches, _, _ := FindMatchesFGP(sampleFingerprintMap)
|
||||
|
||||
return matches, time.Since(startTime), nil
|
||||
}
|
||||
|
|
@ -142,21 +142,32 @@ func filterMatches(
|
|||
}
|
||||
|
||||
// analyzeRelativeTiming calculates a score for each song based on the
|
||||
// relative timing between the song and the sample's anchor times.
|
||||
// consistency of time offsets between the sample and database.
|
||||
func analyzeRelativeTiming(matches map[uint32][][2]uint32) map[uint32]float64 {
|
||||
scores := make(map[uint32]float64)
|
||||
|
||||
for songID, times := range matches {
|
||||
count := 0
|
||||
for i := 0; i < len(times); i++ {
|
||||
for j := i + 1; j < len(times); j++ {
|
||||
sampleDiff := math.Abs(float64(times[i][0] - times[j][0]))
|
||||
dbDiff := math.Abs(float64(times[i][1] - times[j][1]))
|
||||
if math.Abs(sampleDiff-dbDiff) < 100 { // Allow some tolerance
|
||||
count++
|
||||
}
|
||||
offsetCounts := make(map[int32]int)
|
||||
|
||||
for _, timePair := range times {
|
||||
sampleTime := int32(timePair[0])
|
||||
dbTime := int32(timePair[1])
|
||||
offset := dbTime - sampleTime
|
||||
|
||||
// Bin offsets in 100ms buckets to allow for small timing variations
|
||||
offsetBucket := offset / 100
|
||||
offsetCounts[offsetBucket]++
|
||||
}
|
||||
|
||||
maxCount := 0
|
||||
for _, count := range offsetCounts {
|
||||
if count > maxCount {
|
||||
maxCount = count
|
||||
}
|
||||
}
|
||||
scores[songID] = float64(count)
|
||||
|
||||
scores[songID] = float64(maxCount)
|
||||
}
|
||||
|
||||
return scores
|
||||
}
|
||||
|
|
|
|||
|
|
@ -8,13 +8,14 @@ import (
|
|||
)
|
||||
|
||||
const (
|
||||
dspRatio = 4
|
||||
freqBinSize = 1024
|
||||
maxFreq = 5000.0 // 5kHz
|
||||
hopSize = freqBinSize / 32
|
||||
dspRatio = 4
|
||||
windowSize = 1024
|
||||
maxFreq = 5000.0 // 5kHz
|
||||
hopSize = windowSize / 2 // 50% overlap for better time-frequency resolution
|
||||
windowType = "hanning" // choices: "hanning" or "hamming"
|
||||
)
|
||||
|
||||
func Spectrogram(sample []float64, sampleRate int) ([][]complex128, error) {
|
||||
func Spectrogram(sample []float64, sampleRate int) ([][]float64, error) {
|
||||
filteredSample := LowPassFilter(maxFreq, float64(sampleRate), sample)
|
||||
|
||||
downsampledSample, err := Downsample(filteredSample, sampleRate, sampleRate/dspRatio)
|
||||
|
|
@ -22,31 +23,42 @@ func Spectrogram(sample []float64, sampleRate int) ([][]complex128, error) {
|
|||
return nil, fmt.Errorf("couldn't downsample audio sample: %v", err)
|
||||
}
|
||||
|
||||
numOfWindows := len(downsampledSample) / (freqBinSize - hopSize)
|
||||
spectrogram := make([][]complex128, numOfWindows)
|
||||
|
||||
window := make([]float64, freqBinSize)
|
||||
window := make([]float64, windowSize)
|
||||
for i := range window {
|
||||
window[i] = 0.54 - 0.46*math.Cos(2*math.Pi*float64(i)/(float64(freqBinSize)-1))
|
||||
theta := 2 * math.Pi * float64(i) / float64(windowSize-1)
|
||||
switch windowType {
|
||||
case "hamming":
|
||||
window[i] = 0.54 - 0.46*math.Cos(theta)
|
||||
default: // Hanning window
|
||||
window[i] = 0.5 - 0.5*math.Cos(theta)
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize spectrogram slice
|
||||
spectrogram := make([][]float64, 0)
|
||||
|
||||
// Perform STFT
|
||||
for i := 0; i < numOfWindows; i++ {
|
||||
start := i * hopSize
|
||||
end := start + freqBinSize
|
||||
if end > len(downsampledSample) {
|
||||
end = len(downsampledSample)
|
||||
}
|
||||
for start := 0; start+windowSize <= len(downsampledSample); start += hopSize {
|
||||
end := start + windowSize
|
||||
|
||||
bin := make([]float64, freqBinSize)
|
||||
copy(bin, downsampledSample[start:end])
|
||||
frame := make([]float64, windowSize)
|
||||
copy(frame, downsampledSample[start:end])
|
||||
|
||||
// Apply Hamming window
|
||||
// Apply window
|
||||
for j := range window {
|
||||
bin[j] *= window[j]
|
||||
frame[j] *= window[j]
|
||||
}
|
||||
|
||||
spectrogram[i] = FFT(bin)
|
||||
// Perform FFT
|
||||
fftResult := FFT(frame)
|
||||
|
||||
// Convert complex spectrum to magnitude spectrum
|
||||
magnitude := make([]float64, len(fftResult)/2)
|
||||
for j := range magnitude {
|
||||
magnitude[j] = cmplx.Abs(fftResult[j])
|
||||
}
|
||||
|
||||
spectrogram = append(spectrogram, magnitude)
|
||||
}
|
||||
|
||||
return spectrogram, nil
|
||||
|
|
@ -107,43 +119,47 @@ func Downsample(input []float64, originalSampleRate, targetSampleRate int) ([]fl
|
|||
return resampled, nil
|
||||
}
|
||||
|
||||
// Peak represents a significant point in the spectrogram.
|
||||
type Peak struct {
|
||||
Time float64
|
||||
Freq complex128
|
||||
Freq float64 // Frequency in Hz
|
||||
Time float64 // Time in seconds
|
||||
}
|
||||
|
||||
// ExtractPeaks analyzes a spectrogram and extracts significant peaks in the frequency domain over time.
|
||||
func ExtractPeaks(spectrogram [][]complex128, audioDuration float64) []Peak {
|
||||
func ExtractPeaks(spectrogram [][]float64, audioDuration float64, sampleRate int) []Peak {
|
||||
if len(spectrogram) < 1 {
|
||||
return []Peak{}
|
||||
}
|
||||
|
||||
type maxies struct {
|
||||
maxMag float64
|
||||
maxFreq complex128
|
||||
freqIdx int
|
||||
}
|
||||
|
||||
bands := []struct{ min, max int }{{0, 10}, {10, 20}, {20, 40}, {40, 80}, {80, 160}, {160, 512}}
|
||||
bands := []struct{ min, max int }{
|
||||
{0, 10}, {10, 20}, {20, 40}, {40, 80}, {80, 160}, {160, 512},
|
||||
}
|
||||
|
||||
var peaks []Peak
|
||||
binDuration := audioDuration / float64(len(spectrogram))
|
||||
frameDuration := audioDuration / float64(len(spectrogram))
|
||||
|
||||
for binIdx, bin := range spectrogram {
|
||||
// Calculate frequency resolution (Hz per bin)
|
||||
effectiveSampleRate := float64(sampleRate) / float64(dspRatio)
|
||||
freqResolution := effectiveSampleRate / float64(windowSize)
|
||||
|
||||
for frameIdx, frame := range spectrogram {
|
||||
var maxMags []float64
|
||||
var maxFreqs []complex128
|
||||
var freqIndices []float64
|
||||
var freqIndices []int
|
||||
|
||||
binBandMaxies := []maxies{}
|
||||
for _, band := range bands {
|
||||
var maxx maxies
|
||||
var maxMag float64
|
||||
for idx, freq := range bin[band.min:band.max] {
|
||||
magnitude := cmplx.Abs(freq)
|
||||
if magnitude > maxMag {
|
||||
maxMag = magnitude
|
||||
for idx, mag := range frame[band.min:band.max] {
|
||||
if mag > maxMag {
|
||||
maxMag = mag
|
||||
freqIdx := band.min + idx
|
||||
maxx = maxies{magnitude, freq, freqIdx}
|
||||
maxx = maxies{mag, freqIdx}
|
||||
}
|
||||
}
|
||||
binBandMaxies = append(binBandMaxies, maxx)
|
||||
|
|
@ -151,8 +167,7 @@ func ExtractPeaks(spectrogram [][]complex128, audioDuration float64) []Peak {
|
|||
|
||||
for _, value := range binBandMaxies {
|
||||
maxMags = append(maxMags, value.maxMag)
|
||||
maxFreqs = append(maxFreqs, value.maxFreq)
|
||||
freqIndices = append(freqIndices, float64(value.freqIdx))
|
||||
freqIndices = append(freqIndices, value.freqIdx)
|
||||
}
|
||||
|
||||
// Calculate the average magnitude
|
||||
|
|
@ -160,17 +175,15 @@ func ExtractPeaks(spectrogram [][]complex128, audioDuration float64) []Peak {
|
|||
for _, max := range maxMags {
|
||||
maxMagsSum += max
|
||||
}
|
||||
avg := maxMagsSum / float64(len(maxFreqs)) // * coefficient
|
||||
avg := maxMagsSum / float64(len(maxMags))
|
||||
|
||||
// Add peaks that exceed the average magnitude
|
||||
for i, value := range maxMags {
|
||||
if value > avg {
|
||||
peakTimeInBin := freqIndices[i] * binDuration / float64(len(bin))
|
||||
peakTime := float64(frameIdx) * frameDuration
|
||||
peakFreq := float64(freqIndices[i]) * freqResolution
|
||||
|
||||
// Calculate the absolute time of the peak
|
||||
peakTime := float64(binIdx)*binDuration + peakTimeInBin
|
||||
|
||||
peaks = append(peaks, Peak{Time: peakTime, Freq: maxFreqs[i]})
|
||||
peaks = append(peaks, Peak{Time: peakTime, Freq: peakFreq})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,9 +2,7 @@ package spotify
|
|||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"log/slog"
|
||||
"os"
|
||||
"os/exec"
|
||||
|
|
@ -13,17 +11,15 @@ import (
|
|||
"song-recognition/db"
|
||||
"song-recognition/shazam"
|
||||
"song-recognition/utils"
|
||||
"song-recognition/wav"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/fatih/color"
|
||||
"github.com/kkdai/youtube/v2"
|
||||
"github.com/mdobak/go-xerrors"
|
||||
)
|
||||
|
||||
const DELETE_SONG_FILE = false
|
||||
const DELETE_SONG_FILE = false // Set true to delete the song file after fingerprinting
|
||||
|
||||
var yellow = color.New(color.FgYellow)
|
||||
|
||||
|
|
@ -135,9 +131,9 @@ func dlTrack(tracks []Track, path string) (int, error) {
|
|||
|
||||
trackCopy.Title, trackCopy.Artist = correctFilename(trackCopy.Title, trackCopy.Artist)
|
||||
fileName := fmt.Sprintf("%s - %s", trackCopy.Title, trackCopy.Artist)
|
||||
filePath := filepath.Join(path, fileName+".m4a")
|
||||
filePath := filepath.Join(path, fileName)
|
||||
|
||||
err = downloadYTaudio(ytID, path, filePath)
|
||||
filePath, err = downloadYTaudio(ytID, filePath)
|
||||
if err != nil {
|
||||
logMessage := fmt.Sprintf("'%s' by '%s' could not be downloaded", trackCopy.Title, trackCopy.Artist)
|
||||
logger.ErrorContext(ctx, logMessage, slog.Any("error", xerrors.New(err)))
|
||||
|
|
@ -151,12 +147,10 @@ func dlTrack(tracks []Track, path string) (int, error) {
|
|||
return
|
||||
}
|
||||
|
||||
utils.DeleteFile(filepath.Join(path, fileName+".m4a"))
|
||||
|
||||
wavFilePath := filepath.Join(path, fileName+".wav")
|
||||
|
||||
if err := addTags(wavFilePath, *trackCopy); err != nil {
|
||||
logMessage := fmt.Sprintf("Error adding tags: %s", filePath+".wav")
|
||||
logMessage := fmt.Sprintf("Error adding tags: %s", wavFilePath)
|
||||
logger.ErrorContext(ctx, logMessage, slog.Any("error", xerrors.New(err)))
|
||||
|
||||
return
|
||||
|
|
@ -186,65 +180,6 @@ func dlTrack(tracks []Track, path string) (int, error) {
|
|||
|
||||
}
|
||||
|
||||
/* github.com/kkdai/youtube */
|
||||
func downloadYTaudio(id, path, filePath string) error {
|
||||
logger := utils.GetLogger()
|
||||
dir, err := os.Stat(path)
|
||||
if err != nil {
|
||||
logger.Error("Error accessing path", slog.Any("error", err))
|
||||
return err
|
||||
}
|
||||
|
||||
if !dir.IsDir() {
|
||||
err := errors.New("the path is not valid (not a dir)")
|
||||
logger.Error("Invalid directory path", slog.Any("error", err))
|
||||
return err
|
||||
}
|
||||
|
||||
client := youtube.Client{}
|
||||
video, err := client.GetVideo(id)
|
||||
if err != nil {
|
||||
logger.Error("Error getting YouTube video", slog.Any("error", err))
|
||||
return err
|
||||
}
|
||||
|
||||
/*
|
||||
itag code: 140, container: m4a, content: audio, bitrate: 128k
|
||||
change the FindByItag parameter to 139 if you want smaller files (but with a bitrate of 48k)
|
||||
https://gist.github.com/sidneys/7095afe4da4ae58694d128b1034e01e2
|
||||
*/
|
||||
formats := video.Formats.Itag(140)
|
||||
|
||||
/* in some cases, when attempting to download the audio
|
||||
using the library github.com/kkdai/youtube,
|
||||
the download fails (and shows the file size as 0 bytes)
|
||||
until the second or third attempt. */
|
||||
var fileSize int64
|
||||
file, err := os.Create(filePath)
|
||||
if err != nil {
|
||||
logger.Error("Error creating file", slog.Any("error", err))
|
||||
return err
|
||||
}
|
||||
|
||||
for fileSize == 0 {
|
||||
stream, _, err := client.GetStream(video, &formats[0])
|
||||
if err != nil {
|
||||
logger.Error("Error getting stream", slog.Any("error", err))
|
||||
return err
|
||||
}
|
||||
|
||||
if _, err = io.Copy(file, stream); err != nil {
|
||||
logger.Error("Error copying stream to file", slog.Any("error", err))
|
||||
return err
|
||||
}
|
||||
|
||||
fileSize, _ = GetFileSize(filePath)
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func addTags(file string, track Track) error {
|
||||
logger := utils.GetLogger()
|
||||
// Create a temporary file name by appending "2" before the extension
|
||||
|
|
@ -255,7 +190,7 @@ func addTags(file string, track Track) error {
|
|||
tempFile = baseName + "2" + ".wav" // Temporary filename ('/path/to/title - artist2.wav')
|
||||
}
|
||||
|
||||
// Execute FFmpeg command to add metadata tags
|
||||
// FFmpeg command to add metadata tags
|
||||
cmd := exec.Command(
|
||||
"ffmpeg",
|
||||
"-i", file, // Input file path
|
||||
|
|
@ -291,40 +226,20 @@ func ProcessAndSaveSong(songFilePath, songTitle, songArtist, ytID string) error
|
|||
}
|
||||
defer dbclient.Close()
|
||||
|
||||
wavFilePath, err := wav.ConvertToWAV(songFilePath, 1)
|
||||
if err != nil {
|
||||
logger.Error("Failed to convert to WAV", slog.Any("error", err))
|
||||
return err
|
||||
}
|
||||
|
||||
wavInfo, err := wav.ReadWavInfo(wavFilePath)
|
||||
if err != nil {
|
||||
logger.Error("Failed to read WAV info", slog.Any("error", err))
|
||||
return err
|
||||
}
|
||||
|
||||
samples, err := wav.WavBytesToSamples(wavInfo.Data)
|
||||
if err != nil {
|
||||
logger.Error("Error converting WAV bytes to samples", slog.Any("error", err))
|
||||
return fmt.Errorf("error converting wav bytes to float64: %v", err)
|
||||
}
|
||||
|
||||
spectro, err := shazam.Spectrogram(samples, wavInfo.SampleRate)
|
||||
if err != nil {
|
||||
logger.Error("Error creating spectrogram", slog.Any("error", err))
|
||||
return fmt.Errorf("error creating spectrogram: %v", err)
|
||||
}
|
||||
|
||||
songID, err := dbclient.RegisterSong(songTitle, songArtist, ytID)
|
||||
if err != nil {
|
||||
logger.Error("Failed to register song", slog.Any("error", err))
|
||||
return err
|
||||
return fmt.Errorf("error registering song '%s' by '%s': %v", songTitle, songArtist, err)
|
||||
}
|
||||
|
||||
peaks := shazam.ExtractPeaks(spectro, wavInfo.Duration)
|
||||
fingerprints := shazam.Fingerprint(peaks, songID)
|
||||
fingerprint, err := shazam.FingerprintAudio(songFilePath, songID)
|
||||
if err != nil {
|
||||
dbclient.DeleteSongByID(songID)
|
||||
logger.Error("Failed to create fingerprint", slog.String("wavFilePath", songFilePath))
|
||||
return fmt.Errorf("error generating fingerprint for %s by %s", songTitle, songArtist)
|
||||
}
|
||||
|
||||
err = dbclient.StoreFingerprints(fingerprints)
|
||||
err = dbclient.StoreFingerprints(fingerprint)
|
||||
if err != nil {
|
||||
dbclient.DeleteSongByID(songID)
|
||||
logger.Error("Failed to store fingerprints", slog.Any("error", err))
|
||||
|
|
|
|||
|
|
@ -4,6 +4,11 @@ import (
|
|||
"context"
|
||||
"fmt"
|
||||
"log"
|
||||
"log/slog"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"song-recognition/utils"
|
||||
|
||||
"errors"
|
||||
"io"
|
||||
|
|
@ -215,3 +220,37 @@ func ytSearch(searchTerm string, limit int) (results []*SearchResult, err error)
|
|||
|
||||
return results, nil
|
||||
}
|
||||
|
||||
// downloadYTaudio downloads audio from a YouTube video using yt-dlp command line tool.
|
||||
func downloadYTaudio(videoURL, outputFilePath string) (string, error) {
|
||||
logger := utils.GetLogger()
|
||||
|
||||
dir := filepath.Dir(outputFilePath)
|
||||
if stat, err := os.Stat(dir); err != nil || !stat.IsDir() {
|
||||
logger.Error("Invalid directory for output file", slog.Any("error", err))
|
||||
return "", errors.New("output directory does not exist or is not a directory")
|
||||
}
|
||||
|
||||
_, err := exec.LookPath("yt-dlp")
|
||||
if err != nil {
|
||||
logger.Error("yt-dlp not found in PATH", slog.Any("error", err))
|
||||
return "", errors.New("yt-dlp is not installed or not in PATH")
|
||||
}
|
||||
|
||||
audioFmt := "wav"
|
||||
cmd := exec.Command(
|
||||
"yt-dlp",
|
||||
"-f", "bestaudio",
|
||||
"--extract-audio",
|
||||
"--audio-format", audioFmt,
|
||||
"-o", outputFilePath,
|
||||
videoURL,
|
||||
)
|
||||
|
||||
output, err := cmd.CombinedOutput()
|
||||
if err != nil {
|
||||
logger.Error("yt-dlp command failed", slog.String("output", string(output)), slog.Any("error", err))
|
||||
return "", err
|
||||
}
|
||||
return outputFilePath + "." + audioFmt, nil
|
||||
}
|
||||
|
|
|
|||
|
|
@ -27,3 +27,9 @@ func GetEnv(key string, fallback ...string) string {
|
|||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func ExtendMap[K comparable, V any](dest, src map[K]V) {
|
||||
for k, v := range src {
|
||||
dest[k] = v
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,64 +0,0 @@
|
|||
//go:build js && wasm
|
||||
// +build js,wasm
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"song-recognition/shazam"
|
||||
"song-recognition/utils"
|
||||
"syscall/js"
|
||||
)
|
||||
|
||||
func generateFingerprint(this js.Value, args []js.Value) interface{} {
|
||||
if len(args) < 2 {
|
||||
return js.ValueOf(map[string]interface{}{
|
||||
"error": 1,
|
||||
"data": "Expected audio array and sample rate",
|
||||
})
|
||||
}
|
||||
|
||||
if args[0].Type() != js.TypeObject || args[1].Type() != js.TypeNumber {
|
||||
return js.ValueOf(map[string]interface{}{
|
||||
"error": 2,
|
||||
"data": "Invalid argument types; Expected audio array and samplerate (type: int)",
|
||||
})
|
||||
}
|
||||
|
||||
inputArray := args[0]
|
||||
sampleRate := args[1].Int()
|
||||
|
||||
audioData := make([]float64, inputArray.Length())
|
||||
for i := 0; i < inputArray.Length(); i++ {
|
||||
audioData[i] = inputArray.Index(i).Float()
|
||||
}
|
||||
|
||||
spectrogram, err := shazam.Spectrogram(audioData, sampleRate)
|
||||
if err != nil {
|
||||
return js.ValueOf(map[string]interface{}{
|
||||
"error": 3,
|
||||
"data": "Error generating spectrogram: " + err.Error(),
|
||||
})
|
||||
}
|
||||
|
||||
peaks := shazam.ExtractPeaks(spectrogram, float64(len(audioData)/sampleRate))
|
||||
fingerprint := shazam.Fingerprint(peaks, utils.GenerateUniqueID())
|
||||
|
||||
fingerprintArray := []interface{}{}
|
||||
for address, couple := range fingerprint {
|
||||
entry := map[string]interface{}{
|
||||
"address": address,
|
||||
"anchorTime": couple.AnchorTimeMs,
|
||||
}
|
||||
fingerprintArray = append(fingerprintArray, entry)
|
||||
}
|
||||
|
||||
return js.ValueOf(map[string]interface{}{
|
||||
"error": 0,
|
||||
"data": fingerprintArray,
|
||||
})
|
||||
}
|
||||
|
||||
func main() {
|
||||
js.Global().Set("generateFingerprint", js.FuncOf(generateFingerprint))
|
||||
select {}
|
||||
}
|
||||
|
|
@ -6,21 +6,33 @@ import (
|
|||
"os/exec"
|
||||
"path/filepath"
|
||||
"song-recognition/utils"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// ConvertToWAV converts an input audio file to WAV format with specified channels.
|
||||
func ConvertToWAV(inputFilePath string, channels int) (wavFilePath string, err error) {
|
||||
func ConvertToWAV(inputFilePath string) (wavFilePath string, err error) {
|
||||
_, err = os.Stat(inputFilePath)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("input file does not exist: %v", err)
|
||||
}
|
||||
|
||||
if channels < 1 || channels > 2 {
|
||||
channels = 1
|
||||
to_stereoStr := utils.GetEnv("FINGERPRINT_STEREO", "false")
|
||||
to_stereo, err := strconv.ParseBool(to_stereoStr)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to convert env variable (%s) to bool: %v", "FINGERPRINT_STEREO", err)
|
||||
}
|
||||
|
||||
channels := 1
|
||||
if to_stereo {
|
||||
channels = 2
|
||||
}
|
||||
|
||||
fileExt := filepath.Ext(inputFilePath)
|
||||
if fileExt != ".wav" {
|
||||
defer os.Remove(inputFilePath)
|
||||
}
|
||||
|
||||
outputFile := strings.TrimSuffix(inputFilePath, fileExt) + ".wav"
|
||||
|
||||
// Output file may already exists. If it does FFmpeg will fail as
|
||||
|
|
|
|||
|
|
@ -94,49 +94,83 @@ func WriteWavFile(filename string, data []byte, sampleRate int, channels int, bi
|
|||
return err
|
||||
}
|
||||
|
||||
// WavInfo defines a struct containing information extracted from the WAV header
|
||||
type WavInfo struct {
|
||||
Channels int
|
||||
SampleRate int
|
||||
Data []byte
|
||||
Duration float64
|
||||
Channels int
|
||||
SampleRate int
|
||||
Duration float64
|
||||
Data []byte
|
||||
LeftChannelSamples []float64
|
||||
RightChannelSamples []float64
|
||||
}
|
||||
|
||||
// ReadWavInfo reads a 16-bit PCM WAV file and returns its metadata and audio samples.
|
||||
// Supports mono and stereo files. Note that it only supports 16-bit PCM format.
|
||||
func ReadWavInfo(filename string) (*WavInfo, error) {
|
||||
data, err := ioutil.ReadFile(filename)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if len(data) < 44 {
|
||||
return nil, errors.New("invalid WAV file size (too small)")
|
||||
}
|
||||
|
||||
// Read header chunks
|
||||
// Parse PCM header to extract metadata
|
||||
// https://en.wikipedia.org/wiki/WAV#WAV_file_header
|
||||
var header WavHeader
|
||||
err = binary.Read(bytes.NewReader(data[:44]), binary.LittleEndian, &header)
|
||||
if err != nil {
|
||||
if err := binary.Read(bytes.NewReader(data[:44]), binary.LittleEndian, &header); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if string(header.ChunkID[:]) != "RIFF" || string(header.Format[:]) != "WAVE" || header.AudioFormat != 1 {
|
||||
if string(header.ChunkID[:]) != "RIFF" ||
|
||||
string(header.Format[:]) != "WAVE" ||
|
||||
header.AudioFormat != 1 {
|
||||
return nil, errors.New("invalid WAV header format")
|
||||
}
|
||||
|
||||
// Extract information
|
||||
info := &WavInfo{
|
||||
Channels: int(header.NumChannels),
|
||||
SampleRate: int(header.SampleRate),
|
||||
Data: data[44:],
|
||||
}
|
||||
|
||||
// Calculate audio duration (assuming data contains PCM data)
|
||||
if header.BitsPerSample == 16 {
|
||||
info.Duration = float64(len(info.Data)) / float64(int(header.NumChannels)*2*int(header.SampleRate))
|
||||
} else {
|
||||
return nil, errors.New("unsupported bits per sample format")
|
||||
if header.BitsPerSample != 16 {
|
||||
return nil, errors.New("unsupported bits‑per‑sample (expect 16‑bit PCM)")
|
||||
}
|
||||
|
||||
sampleCount := len(info.Data) / 2
|
||||
int16Buf := make([]int16, sampleCount)
|
||||
if err := binary.Read(bytes.NewReader(info.Data), binary.LittleEndian, int16Buf); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
const scale = 1.0 / 32768.0 // 16‑bit normalisation factor
|
||||
|
||||
switch header.NumChannels {
|
||||
case 1:
|
||||
left := make([]float64, sampleCount)
|
||||
for i, s := range int16Buf {
|
||||
left[i] = float64(s) * scale
|
||||
}
|
||||
info.LeftChannelSamples = left
|
||||
|
||||
case 2:
|
||||
frameCount := sampleCount / 2
|
||||
left := make([]float64, frameCount)
|
||||
right := make([]float64, frameCount)
|
||||
for i := 0; i < frameCount; i++ {
|
||||
left[i] = float64(int16Buf[2*i]) * scale
|
||||
right[i] = float64(int16Buf[2*i+1]) * scale
|
||||
}
|
||||
info.LeftChannelSamples = left
|
||||
info.RightChannelSamples = right
|
||||
|
||||
default:
|
||||
return nil, errors.New("unsupported channel count (only mono/stereo)")
|
||||
}
|
||||
|
||||
// Compute audio duration in seconds
|
||||
info.Duration = float64(sampleCount) /
|
||||
(float64(header.NumChannels) * float64(header.SampleRate))
|
||||
|
||||
return info, nil
|
||||
}
|
||||
|
||||
|
|
|
|||
22
wasm/build.sh
Executable file
22
wasm/build.sh
Executable file
|
|
@ -0,0 +1,22 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Build script for WASM fingerprint generator
|
||||
|
||||
echo "Building WASM module..."
|
||||
|
||||
export GOOS=js
|
||||
export GOARCH=wasm
|
||||
|
||||
go build -o fingerprint.wasm wasm_main.go
|
||||
|
||||
if [ $? -eq 0 ]; then
|
||||
echo "✓ WASM build successful: fingerprint.wasm"
|
||||
|
||||
cp fingerprint.wasm ../client/public/
|
||||
echo "✓ Copied fingerprint.wasm to client/public/"
|
||||
|
||||
else
|
||||
echo "x WASM build failed"
|
||||
cd ../wasm
|
||||
exit 1
|
||||
fi
|
||||
25
wasm/go.mod
Normal file
25
wasm/go.mod
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
module wasm-fingerprint
|
||||
|
||||
go 1.23.0
|
||||
|
||||
toolchain go1.24.3
|
||||
|
||||
require song-recognition v0.0.0-00010101000000-000000000000
|
||||
|
||||
require (
|
||||
github.com/golang/snappy v0.0.4 // indirect
|
||||
github.com/klauspost/compress v1.17.6 // indirect
|
||||
github.com/mattn/go-sqlite3 v1.14.22 // indirect
|
||||
github.com/mdobak/go-xerrors v0.3.1 // indirect
|
||||
github.com/montanaflynn/stats v0.7.1 // indirect
|
||||
github.com/xdg-go/pbkdf2 v1.0.0 // indirect
|
||||
github.com/xdg-go/scram v1.1.2 // indirect
|
||||
github.com/xdg-go/stringprep v1.0.4 // indirect
|
||||
github.com/youmark/pkcs8 v0.0.0-20201027041543-1326539a0a0a // indirect
|
||||
go.mongodb.org/mongo-driver v1.14.0 // indirect
|
||||
golang.org/x/crypto v0.33.0 // indirect
|
||||
golang.org/x/sync v0.11.0 // indirect
|
||||
golang.org/x/text v0.22.0 // indirect
|
||||
)
|
||||
|
||||
replace song-recognition => ../server
|
||||
59
wasm/go.sum
Normal file
59
wasm/go.sum
Normal file
|
|
@ -0,0 +1,59 @@
|
|||
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
|
||||
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM=
|
||||
github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
|
||||
github.com/google/go-cmp v0.5.2 h1:X2ev0eStA3AbceY54o37/0PQ/UWqKEiiO2dKL5OPaFM=
|
||||
github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
||||
github.com/klauspost/compress v1.17.6 h1:60eq2E/jlfwQXtvZEeBUYADs+BwKBWURIY+Gj2eRGjI=
|
||||
github.com/klauspost/compress v1.17.6/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM=
|
||||
github.com/mattn/go-sqlite3 v1.14.22 h1:2gZY6PC6kBnID23Tichd1K+Z0oS6nE/XwU+Vz/5o4kU=
|
||||
github.com/mattn/go-sqlite3 v1.14.22/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
|
||||
github.com/mdobak/go-xerrors v0.3.1 h1:XfqaLMNN5T4qsHSlLHGJ35f6YlDTVeINSYYeeuK4VpQ=
|
||||
github.com/mdobak/go-xerrors v0.3.1/go.mod h1:nIR+HMAJuj/uNqyp5+MTN6PJ7ymuIJq3UVs9QCgAHbY=
|
||||
github.com/montanaflynn/stats v0.7.1 h1:etflOAAHORrCC44V+aR6Ftzort912ZU+YLiSTuV8eaE=
|
||||
github.com/montanaflynn/stats v0.7.1/go.mod h1:etXPPgVO6n31NxCd9KQUMvCM+ve0ruNzt6R8Bnaayow=
|
||||
github.com/xdg-go/pbkdf2 v1.0.0 h1:Su7DPu48wXMwC3bs7MCNG+z4FhcyEuz5dlvchbq0B0c=
|
||||
github.com/xdg-go/pbkdf2 v1.0.0/go.mod h1:jrpuAogTd400dnrH08LKmI/xc1MbPOebTwRqcT5RDeI=
|
||||
github.com/xdg-go/scram v1.1.2 h1:FHX5I5B4i4hKRVRBCFRxq1iQRej7WO3hhBuJf+UUySY=
|
||||
github.com/xdg-go/scram v1.1.2/go.mod h1:RT/sEzTbU5y00aCK8UOx6R7YryM0iF1N2MOmC3kKLN4=
|
||||
github.com/xdg-go/stringprep v1.0.4 h1:XLI/Ng3O1Atzq0oBs3TWm+5ZVgkq2aqdlvP9JtoZ6c8=
|
||||
github.com/xdg-go/stringprep v1.0.4/go.mod h1:mPGuuIYwz7CmR2bT9j4GbQqutWS1zV24gijq1dTyGkM=
|
||||
github.com/youmark/pkcs8 v0.0.0-20201027041543-1326539a0a0a h1:fZHgsYlfvtyqToslyjUt3VOPF4J7aK/3MPcK7xp3PDk=
|
||||
github.com/youmark/pkcs8 v0.0.0-20201027041543-1326539a0a0a/go.mod h1:ul22v+Nro/R083muKhosV54bj5niojjWZvU8xrevuH4=
|
||||
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
|
||||
go.mongodb.org/mongo-driver v1.14.0 h1:P98w8egYRjYe3XDjxhYJagTokP/H6HzlsnojRgZRd80=
|
||||
go.mongodb.org/mongo-driver v1.14.0/go.mod h1:Vzb0Mk/pa7e6cWw85R4F/endUC3u0U9jGcNU603k65c=
|
||||
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
||||
golang.org/x/crypto v0.0.0-20200302210943-78000ba7a073/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
|
||||
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
|
||||
golang.org/x/crypto v0.33.0 h1:IOBPskki6Lysi0lo9qQvbxiQ+FvsCC/YWOecCHAixus=
|
||||
golang.org/x/crypto v0.33.0/go.mod h1:bVdXmD7IV/4GdElGPozy6U7lWdRXA4qyRVGJV57uQ5M=
|
||||
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
|
||||
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
|
||||
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
|
||||
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
|
||||
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.11.0 h1:GGz8+XQP4FvTTrjZPzNKTMFtSXH80RAzG+5ghFPgK9w=
|
||||
golang.org/x/sync v0.11.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
|
||||
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
||||
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
|
||||
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
|
||||
golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ=
|
||||
golang.org/x/text v0.22.0 h1:bofq7m3/HAFvbF51jz3Q9wLg3jkvSPuiZu/pD1XwgtM=
|
||||
golang.org/x/text v0.22.0/go.mod h1:YRoo4H8PVmsu+E3Ou7cqLVH8oXWIHVoX0jqUWALQhfY=
|
||||
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
|
||||
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
|
||||
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
|
||||
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
111
wasm/wasm_main.go
Normal file
111
wasm/wasm_main.go
Normal file
|
|
@ -0,0 +1,111 @@
|
|||
//go:build js && wasm
|
||||
// +build js,wasm
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"song-recognition/models"
|
||||
"song-recognition/shazam"
|
||||
"song-recognition/utils"
|
||||
"syscall/js"
|
||||
)
|
||||
|
||||
// generateFingerprint takes audio data from the frontend and generates fingerprints
|
||||
// Arguments: [audioArray, sampleRate, channels]
|
||||
// Returns: { error: number, data: fingerprintArray or error message }
|
||||
func generateFingerprint(this js.Value, args []js.Value) interface{} {
|
||||
if len(args) < 3 {
|
||||
return js.ValueOf(map[string]interface{}{
|
||||
"error": 1,
|
||||
"data": "Expected audio array, sample rate, and number of channels",
|
||||
})
|
||||
}
|
||||
|
||||
if args[0].Type() != js.TypeObject || args[1].Type() != js.TypeNumber {
|
||||
return js.ValueOf(map[string]interface{}{
|
||||
"error": 2,
|
||||
"data": "Invalid argument types; Expected audio array and samplerate (type: int)",
|
||||
})
|
||||
}
|
||||
|
||||
channels := args[2].Int()
|
||||
if args[2].Type() != js.TypeNumber || (channels != 1 && channels != 2) {
|
||||
return js.ValueOf(map[string]interface{}{
|
||||
"error": 2,
|
||||
"data": "Invalid number of channels; expected 1 or 2",
|
||||
})
|
||||
}
|
||||
|
||||
inputArray := args[0]
|
||||
sampleRate := args[1].Int()
|
||||
|
||||
audioData := make([]float64, inputArray.Length())
|
||||
for i := 0; i < inputArray.Length(); i++ {
|
||||
audioData[i] = inputArray.Index(i).Float()
|
||||
}
|
||||
|
||||
fingerprint := make(map[uint32]models.Couple)
|
||||
var leftChannel, rightChannel []float64
|
||||
|
||||
if channels == 1 {
|
||||
leftChannel = audioData
|
||||
spectrogram, err := shazam.Spectrogram(audioData, sampleRate)
|
||||
if err != nil {
|
||||
return js.ValueOf(map[string]interface{}{
|
||||
"error": 3,
|
||||
"data": "Error generating spectrogram: " + err.Error(),
|
||||
})
|
||||
}
|
||||
peaks := shazam.ExtractPeaks(spectrogram, float64(len(audioData))/float64(sampleRate), sampleRate)
|
||||
fingerprint = shazam.Fingerprint(peaks, utils.GenerateUniqueID())
|
||||
} else {
|
||||
for i := 0; i < len(audioData); i += 2 {
|
||||
leftChannel = append(leftChannel, audioData[i])
|
||||
rightChannel = append(rightChannel, audioData[i+1])
|
||||
}
|
||||
|
||||
// LEFT
|
||||
spectrogram, err := shazam.Spectrogram(leftChannel, sampleRate)
|
||||
if err != nil {
|
||||
return js.ValueOf(map[string]interface{}{
|
||||
"error": 3,
|
||||
"data": "Error generating spectrogram: " + err.Error(),
|
||||
})
|
||||
}
|
||||
peaks := shazam.ExtractPeaks(spectrogram, float64(len(leftChannel))/float64(sampleRate), sampleRate)
|
||||
utils.ExtendMap(fingerprint, shazam.Fingerprint(peaks, utils.GenerateUniqueID()))
|
||||
|
||||
// RIGHT
|
||||
spectrogram, err = shazam.Spectrogram(rightChannel, sampleRate)
|
||||
if err != nil {
|
||||
return js.ValueOf(map[string]interface{}{
|
||||
"error": 3,
|
||||
"data": "Error generating spectrogram: " + err.Error(),
|
||||
})
|
||||
}
|
||||
peaks = shazam.ExtractPeaks(spectrogram, float64(len(rightChannel))/float64(sampleRate), sampleRate)
|
||||
utils.ExtendMap(fingerprint, shazam.Fingerprint(peaks, utils.GenerateUniqueID()))
|
||||
}
|
||||
|
||||
fingerprintArray := []interface{}{}
|
||||
for address, couple := range fingerprint {
|
||||
entry := map[string]interface{}{
|
||||
"address": address,
|
||||
"anchorTime": couple.AnchorTimeMs,
|
||||
}
|
||||
fingerprintArray = append(fingerprintArray, entry)
|
||||
}
|
||||
|
||||
return js.ValueOf(map[string]interface{}{
|
||||
"error": 0,
|
||||
"data": fingerprintArray,
|
||||
})
|
||||
}
|
||||
|
||||
func main() {
|
||||
js.Global().Set("generateFingerprint", js.FuncOf(generateFingerprint))
|
||||
|
||||
js.Global().Call("dispatchEvent", js.Global().Get("Event").New("wasmReady"))
|
||||
|
||||
select {}
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue