Merge pull request #35 from cgzirim/development

Add WASM support for fingerprint generation on the client side
This commit is contained in:
Chigozirim Igweamaka 2025-04-01 18:12:07 +01:00 committed by GitHub
commit be76a55c52
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 145 additions and 65 deletions

View file

@ -3,6 +3,8 @@
"version": "0.1.0", "version": "0.1.0",
"private": true, "private": true,
"dependencies": { "dependencies": {
"@ffmpeg/ffmpeg": "^0.12.15",
"@ffmpeg/util": "^0.12.2",
"@testing-library/jest-dom": "^5.17.0", "@testing-library/jest-dom": "^5.17.0",
"@testing-library/react": "^13.4.0", "@testing-library/react": "^13.4.0",
"@testing-library/user-event": "^13.5.0", "@testing-library/user-event": "^13.5.0",

View file

@ -9,20 +9,28 @@ import { ToastContainer, toast, Slide } from "react-toastify";
import "react-toastify/dist/ReactToastify.css"; import "react-toastify/dist/ReactToastify.css";
import { MediaRecorder, register } from "extendable-media-recorder"; import { MediaRecorder, register } from "extendable-media-recorder";
import { connect } from "extendable-media-recorder-wav-encoder"; import { connect } from "extendable-media-recorder-wav-encoder";
import { FFmpeg } from '@ffmpeg/ffmpeg';
import { fetchFile } from '@ffmpeg/util';
import AnimatedNumber from "./components/AnimatedNumber"; import AnimatedNumber from "./components/AnimatedNumber";
const server = process.env.REACT_APP_BACKEND_URL || "http://localhost:5000"; const server = process.env.REACT_APP_BACKEND_URL || "http://localhost:5000";
// https://seek-tune-rq4gn.ondigitalocean.app/
var socket = io(server); var socket = io(server);
function App() { function App() {
let ffmpegLoaded = false;
const ffmpeg = new FFmpeg();
const uploadRecording = true
const isPhone = window.innerWidth <= 550
const [stream, setStream] = useState(); const [stream, setStream] = useState();
const [matches, setMatches] = useState([]); const [matches, setMatches] = useState([]);
const [totalSongs, setTotalSongs] = useState(10); const [totalSongs, setTotalSongs] = useState(10);
const [isListening, setisListening] = useState(false); const [isListening, setisListening] = useState(false);
const [audioInput, setAudioInput] = useState("device"); // or "mic" const [audioInput, setAudioInput] = useState("device"); // or "mic"
const [isPhone, setIsPhone] = useState(window.innerWidth <= 550); const [genFingerprint, setGenFingerprint] = useState(null);
const [registeredMediaEncoder, setRegisteredMediaEncoder] = useState(false); const [registeredMediaEncoder, setRegisteredMediaEncoder] = useState(false);
const streamRef = useRef(stream); const streamRef = useRef(stream);
@ -78,8 +86,38 @@ function App() {
return () => clearInterval(intervalId); return () => clearInterval(intervalId);
}, []); }, []);
useEffect(() => {
(async () => {
try {
const go = new window.Go();
const result = await WebAssembly.instantiateStreaming(
fetch("/main.wasm"),
go.importObject
);
go.run(result.instance);
if (typeof window.generateFingerprint === "function") {
setGenFingerprint(() => window.generateFingerprint);
}
} catch (error) {
console.error("Error loading WASM:", error);
}
})();
}, []);
async function record() { async function record() {
try { try {
if (!genFingerprint) {
console.error("WASM is not loaded yet.");
return;
}
if (!ffmpegLoaded) {
await ffmpeg.load();
ffmpegLoaded = true;
}
const mediaDevice = const mediaDevice =
audioInput === "device" audioInput === "device"
? navigator.mediaDevices.getDisplayMedia.bind(navigator.mediaDevices) ? navigator.mediaDevices.getDisplayMedia.bind(navigator.mediaDevices)
@ -113,33 +151,6 @@ function App() {
track.stop(); track.stop();
} }
/** Attempt to change sampleRate
const audioContext = new AudioContext({
sampleRate: 44100,
});
const mediaStreamAudioSourceNode = new MediaStreamAudioSourceNode(
audioContext,
{ mediaStream: audioStream }
);
const mediaStreamAudioDestinationNode =
new MediaStreamAudioDestinationNode(audioContext, {
channelCount: 1,
});
mediaStreamAudioSourceNode.connect(mediaStreamAudioDestinationNode);
const mediaRecorder = new MediaRecorder(
mediaStreamAudioDestinationNode.stream,
{ mimeType: "audio/wav" }
);
const settings = mediaStreamAudioDestinationNode.stream
.getAudioTracks()[0]
.getSettings();
console.log("Settings: ", settings);
*/
const mediaRecorder = new MediaRecorder(audioStream, { const mediaRecorder = new MediaRecorder(audioStream, {
mimeType: "audio/wav", mimeType: "audio/wav",
}); });
@ -158,45 +169,77 @@ function App() {
mediaRecorder.stop(); mediaRecorder.stop();
}, 20000); }, 20000);
mediaRecorder.addEventListener("stop", () => { mediaRecorder.addEventListener("stop", async () => {
const blob = new Blob(chunks, { type: "audio/wav" }); const blob = new Blob(chunks, { type: "audio/wav" });
const reader = new FileReader();
cleanUp(); cleanUp();
// downloadRecording(blob);
reader.readAsArrayBuffer(blob); const inputFile = 'input.wav';
const outputFile = 'output_mono.wav';
// Convert audio to mono with a sample rate of 44100 Hz
await ffmpeg.writeFile(inputFile, await fetchFile(blob))
const exitCode = await ffmpeg.exec([
'-i', inputFile,
'-c', 'pcm_s16le',
'-ar', '44100',
'-ac', '1',
'-f', 'wav',
outputFile
]);
if (exitCode !== 0) {
throw new Error(`FFmpeg exec failed with exit code: ${exitCode}`);
}
const monoData = await ffmpeg.readFile(outputFile);
const monoBlob = new Blob([monoData.buffer], { type: 'audio/wav' });
const reader = new FileReader();
reader.readAsArrayBuffer(monoBlob);
reader.onload = async (event) => { reader.onload = async (event) => {
const arrayBuffer = event.target.result; const arrayBuffer = event.target.result;
// get record duration
const arrayBufferCopy = arrayBuffer.slice(0);
const audioContext = new AudioContext(); const audioContext = new AudioContext();
const audioBufferDecoded = await audioContext.decodeAudioData( const arrayBufferCopy = arrayBuffer.slice(0);
arrayBufferCopy const audioBufferDecoded = await audioContext.decodeAudioData(arrayBufferCopy);
);
const recordDuration = audioBufferDecoded.duration; const audioData = audioBufferDecoded.getChannelData(0);
const audioArray = Array.from(audioData);
var binary = ""; const result = genFingerprint(audioArray, audioBufferDecoded.sampleRate);
var bytes = new Uint8Array(arrayBuffer); if (result.error !== 0) {
var len = bytes.byteLength; toast["error"](() => <div>An error occured</div>)
for (var i = 0; i < len; i++) { console.log("An error occured: ", result)
binary += String.fromCharCode(bytes[i]); return
} }
// Convert byte array to base64 const fingerprintMap = result.data.reduce((dict, item) => {
const rawAudio = btoa(binary); dict[item.address] = item.anchorTime;
const audioConfig = audioStream.getAudioTracks()[0].getSettings(); return dict;
}, {});
const recordData = {
audio: rawAudio,
duration: recordDuration,
channels: audioConfig.channelCount,
sampleRate: audioConfig.sampleRate,
sampleSize: audioConfig.sampleSize,
};
if (sendRecordingRef.current) { if (sendRecordingRef.current) {
socket.emit("newFingerprint", JSON.stringify({ fingerprint: fingerprintMap }));
}
if (uploadRecording) {
var bytes = new Uint8Array(arrayBuffer);
var rawAudio = "";
for (var i = 0; i < bytes.byteLength; i++) {
rawAudio += String.fromCharCode(bytes[i]);
}
const dataView = new DataView(arrayBuffer);
const recordData = {
audio: btoa(rawAudio),
channels: dataView.getUint16(22, true),
sampleRate: dataView.getUint16(24, true),
sampleSize: dataView.getUint16(34, true),
duration: audioBufferDecoded.duration,
};
console.log("Record data: ", recordData);
socket.emit("newRecording", JSON.stringify(recordData)); socket.emit("newRecording", JSON.stringify(recordData));
} }
}; };
@ -207,10 +250,11 @@ function App() {
} }
} }
function downloadRecording(blob) { function downloadRecording(blob) {
const blobUrl = URL.createObjectURL(blob); const blobUrl = URL.createObjectURL(blob);
// Create a download link
const downloadLink = document.createElement("a"); const downloadLink = document.createElement("a");
downloadLink.href = blobUrl; downloadLink.href = blobUrl;
downloadLink.download = "recorded_audio.wav"; downloadLink.download = "recorded_audio.wav";
@ -244,7 +288,7 @@ function App() {
return ( return (
<div className="App"> <div className="App">
<div className="TopHeader"> <div className="TopHeader">
<h2 style={{ color: "#374151" }}>SeekTune</h2> <h2 style={{ color: "#374151" }}>!Shazam</h2>
<h4 style={{ display: "flex", justifyContent: "flex-end" }}> <h4 style={{ display: "flex", justifyContent: "flex-end" }}>
<AnimatedNumber includeComma={true} animateToNumber={totalSongs} /> <AnimatedNumber includeComma={true} animateToNumber={totalSongs} />
&nbsp;Songs &nbsp;Songs
@ -302,4 +346,4 @@ function App() {
); );
} }
export default App; export default App;

View file

@ -136,6 +136,7 @@ func serve(protocol, port string) {
server.OnEvent("/", "totalSongs", handleTotalSongs) server.OnEvent("/", "totalSongs", handleTotalSongs)
server.OnEvent("/", "newDownload", handleSongDownload) server.OnEvent("/", "newDownload", handleSongDownload)
server.OnEvent("/", "newRecording", handleNewRecording) server.OnEvent("/", "newRecording", handleNewRecording)
server.OnEvent("/", "newFingerprint", handleNewFingerprint)
server.OnError("/", func(s socketio.Conn, e error) { server.OnError("/", func(s socketio.Conn, e error) {
log.Println("meet error:", e) log.Println("meet error:", e)

View file

@ -2,6 +2,7 @@ package main
import ( import (
"context" "context"
"encoding/base64"
"encoding/json" "encoding/json"
"fmt" "fmt"
"log/slog" "log/slog"
@ -12,6 +13,7 @@ import (
"song-recognition/utils" "song-recognition/utils"
"song-recognition/wav" "song-recognition/wav"
"strings" "strings"
"time"
socketio "github.com/googollee/go-socket.io" socketio "github.com/googollee/go-socket.io"
"github.com/mdobak/go-xerrors" "github.com/mdobak/go-xerrors"
@ -176,6 +178,7 @@ func handleSongDownload(socket socketio.Conn, spotifyURL string) {
} }
} }
// handleNewRecording saves new recorded audio snippet to a WAV file.
func handleNewRecording(socket socketio.Conn, recordData string) { func handleNewRecording(socket socketio.Conn, recordData string) {
logger := utils.GetLogger() logger := utils.GetLogger()
ctx := context.Background() ctx := context.Background()
@ -187,14 +190,46 @@ func handleNewRecording(socket socketio.Conn, recordData string) {
return return
} }
samples, err := wav.ProcessRecording(&recData, true) err := utils.CreateFolder("recordings")
if err != nil { if err != nil {
err := xerrors.New(err) err := xerrors.New(err)
logger.ErrorContext(ctx, "Failed to process recording.", slog.Any("error", err)) logger.ErrorContext(ctx, "Failed create folder.", slog.Any("error", err))
}
now := time.Now()
fileName := fmt.Sprintf("%04d_%02d_%02d_%02d_%02d_%02d.wav",
now.Second(), now.Minute(), now.Hour(),
now.Day(), now.Month(), now.Year(),
)
filePath := "recordings/" + fileName
decodedAudioData, err := base64.StdEncoding.DecodeString(recData.Audio)
if err != nil {
err := xerrors.New(err)
logger.ErrorContext(ctx, "Failed to decode base64", slog.Any("error", err))
}
err = wav.WriteWavFile(filePath, decodedAudioData, recData.SampleRate, recData.Channels, recData.SampleSize)
if err != nil {
err := xerrors.New(err)
logger.ErrorContext(ctx, "Failed write wav file.", slog.Any("error", err))
}
}
func handleNewFingerprint(socket socketio.Conn, fingerprintData string) {
logger := utils.GetLogger()
ctx := context.Background()
var data struct {
Fingerprint map[uint32]uint32 `json:"fingerprint"`
}
if err := json.Unmarshal([]byte(fingerprintData), &data); err != nil {
err := xerrors.New(err)
logger.ErrorContext(ctx, "Failed to unmarshal fingerprint data.", slog.Any("error", err))
return return
} }
matches, _, err := shazam.FindMatches(samples, recData.Duration, recData.SampleRate) matches, _, err := shazam.FindMatchesFGP(data.Fingerprint)
if err != nil { if err != nil {
err := xerrors.New(err) err := xerrors.New(err)
logger.ErrorContext(ctx, "failed to get matches.", slog.Any("error", err)) logger.ErrorContext(ctx, "failed to get matches.", slog.Any("error", err))

View file

@ -60,13 +60,11 @@ func FloatsToBytes(data []float64, bitsPerSample int) ([]byte, error) {
switch bitsPerSample { switch bitsPerSample {
case 8: case 8:
for _, sample := range data { for _, sample := range data {
// Convert float to 8-bit unsigned integer
val := uint8((sample + 1.0) * 127.5) val := uint8((sample + 1.0) * 127.5)
byteData = append(byteData, byte(val)) byteData = append(byteData, byte(val))
} }
case 16: case 16:
for _, sample := range data { for _, sample := range data {
// Convert float to 16-bit signed integer
val := int16(sample * 32767.0) val := int16(sample * 32767.0)
buf := make([]byte, 2) buf := make([]byte, 2)
binary.LittleEndian.PutUint16(buf, uint16(val)) binary.LittleEndian.PutUint16(buf, uint16(val))
@ -74,7 +72,6 @@ func FloatsToBytes(data []float64, bitsPerSample int) ([]byte, error) {
} }
case 24: case 24:
for _, sample := range data { for _, sample := range data {
// Convert float to 24-bit signed integer
val := int32(sample * 8388607.0) val := int32(sample * 8388607.0)
buf := make([]byte, 4) buf := make([]byte, 4)
binary.LittleEndian.PutUint32(buf, uint32(val)<<8) // Shift by 8 bits to fit 24-bit binary.LittleEndian.PutUint32(buf, uint32(val)<<8) // Shift by 8 bits to fit 24-bit
@ -82,7 +79,6 @@ func FloatsToBytes(data []float64, bitsPerSample int) ([]byte, error) {
} }
case 32: case 32:
for _, sample := range data { for _, sample := range data {
// Convert float to 32-bit signed integer
val := int32(sample * 2147483647.0) val := int32(sample * 2147483647.0)
buf := make([]byte, 4) buf := make([]byte, 4)
binary.LittleEndian.PutUint32(buf, uint32(val)) binary.LittleEndian.PutUint32(buf, uint32(val))

View file

@ -52,6 +52,8 @@ func ConvertToWAV(inputFilePath string, channels int) (wavFilePath string, err e
return outputFile, nil return outputFile, nil
} }
// ReformatWAV converts a given WAV file to the specified number of channels,
// either mono (1 channel) or stereo (2 channels).
func ReformatWAV(inputFilePath string, channels int) (reformatedFilePath string, errr error) { func ReformatWAV(inputFilePath string, channels int) (reformatedFilePath string, errr error) {
if channels < 1 || channels > 2 { if channels < 1 || channels > 2 {
channels = 1 channels = 1