From 66b0071698de67b695ae5f2c084c08502163b922 Mon Sep 17 00:00:00 2001 From: Chigozirim Igweamaka Date: Tue, 1 Apr 2025 17:44:54 +0100 Subject: [PATCH 1/5] feat: integrate FFmpeg for audio processing and add WASM support for fingerprint generation --- client/src/App.js | 160 +++++++++++++++++++++++++++++----------------- 1 file changed, 102 insertions(+), 58 deletions(-) diff --git a/client/src/App.js b/client/src/App.js index 904c618..cfa0e1f 100644 --- a/client/src/App.js +++ b/client/src/App.js @@ -9,20 +9,28 @@ import { ToastContainer, toast, Slide } from "react-toastify"; import "react-toastify/dist/ReactToastify.css"; import { MediaRecorder, register } from "extendable-media-recorder"; import { connect } from "extendable-media-recorder-wav-encoder"; +import { FFmpeg } from '@ffmpeg/ffmpeg'; +import { fetchFile } from '@ffmpeg/util'; + import AnimatedNumber from "./components/AnimatedNumber"; const server = process.env.REACT_APP_BACKEND_URL || "http://localhost:5000"; +// https://seek-tune-rq4gn.ondigitalocean.app/ var socket = io(server); function App() { + let ffmpegLoaded = false; + const ffmpeg = new FFmpeg(); + const uploadRecording = true + const isPhone = window.innerWidth <= 550 const [stream, setStream] = useState(); const [matches, setMatches] = useState([]); const [totalSongs, setTotalSongs] = useState(10); const [isListening, setisListening] = useState(false); const [audioInput, setAudioInput] = useState("device"); // or "mic" - const [isPhone, setIsPhone] = useState(window.innerWidth <= 550); + const [genFingerprint, setGenFingerprint] = useState(null); const [registeredMediaEncoder, setRegisteredMediaEncoder] = useState(false); const streamRef = useRef(stream); @@ -78,8 +86,38 @@ function App() { return () => clearInterval(intervalId); }, []); + useEffect(() => { + (async () => { + try { + const go = new window.Go(); + const result = await WebAssembly.instantiateStreaming( + fetch("/main.wasm"), + go.importObject + ); + go.run(result.instance); + + if (typeof window.generateFingerprint === "function") { + setGenFingerprint(() => window.generateFingerprint); + } + + } catch (error) { + console.error("Error loading WASM:", error); + } + })(); + }, []); + async function record() { try { + if (!genFingerprint) { + console.error("WASM is not loaded yet."); + return; + } + + if (!ffmpegLoaded) { + await ffmpeg.load(); + ffmpegLoaded = true; + } + const mediaDevice = audioInput === "device" ? navigator.mediaDevices.getDisplayMedia.bind(navigator.mediaDevices) @@ -113,33 +151,6 @@ function App() { track.stop(); } - /** Attempt to change sampleRate - const audioContext = new AudioContext({ - sampleRate: 44100, - }); - const mediaStreamAudioSourceNode = new MediaStreamAudioSourceNode( - audioContext, - { mediaStream: audioStream } - ); - const mediaStreamAudioDestinationNode = - new MediaStreamAudioDestinationNode(audioContext, { - channelCount: 1, - }); - - mediaStreamAudioSourceNode.connect(mediaStreamAudioDestinationNode); - - const mediaRecorder = new MediaRecorder( - mediaStreamAudioDestinationNode.stream, - { mimeType: "audio/wav" } - ); - - const settings = mediaStreamAudioDestinationNode.stream - .getAudioTracks()[0] - .getSettings(); - - console.log("Settings: ", settings); - */ - const mediaRecorder = new MediaRecorder(audioStream, { mimeType: "audio/wav", }); @@ -158,45 +169,77 @@ function App() { mediaRecorder.stop(); }, 20000); - mediaRecorder.addEventListener("stop", () => { + mediaRecorder.addEventListener("stop", async () => { const blob = new Blob(chunks, { type: "audio/wav" }); - const reader = new FileReader(); cleanUp(); - // downloadRecording(blob); - reader.readAsArrayBuffer(blob); + const inputFile = 'input.wav'; + const outputFile = 'output_mono.wav'; + + // Convert audio to mono with a sample rate of 44100 Hz + await ffmpeg.writeFile(inputFile, await fetchFile(blob)) + const exitCode = await ffmpeg.exec([ + '-i', inputFile, + '-c', 'pcm_s16le', + '-ar', '44100', + '-ac', '1', + '-f', 'wav', + outputFile + ]); + if (exitCode !== 0) { + throw new Error(`FFmpeg exec failed with exit code: ${exitCode}`); + } + + const monoData = await ffmpeg.readFile(outputFile); + const monoBlob = new Blob([monoData.buffer], { type: 'audio/wav' }); + + const reader = new FileReader(); + reader.readAsArrayBuffer(monoBlob); reader.onload = async (event) => { const arrayBuffer = event.target.result; - - // get record duration - const arrayBufferCopy = arrayBuffer.slice(0); const audioContext = new AudioContext(); - const audioBufferDecoded = await audioContext.decodeAudioData( - arrayBufferCopy - ); - const recordDuration = audioBufferDecoded.duration; + const arrayBufferCopy = arrayBuffer.slice(0); + const audioBufferDecoded = await audioContext.decodeAudioData(arrayBufferCopy); + + const audioData = audioBufferDecoded.getChannelData(0); + const audioArray = Array.from(audioData); - var binary = ""; - var bytes = new Uint8Array(arrayBuffer); - var len = bytes.byteLength; - for (var i = 0; i < len; i++) { - binary += String.fromCharCode(bytes[i]); + const result = genFingerprint(audioArray, audioBufferDecoded.sampleRate); + if (result.error !== 0) { + toast["error"](() =>
An error occured
) + console.log("An error occured: ", result) + return } - // Convert byte array to base64 - const rawAudio = btoa(binary); - const audioConfig = audioStream.getAudioTracks()[0].getSettings(); - - const recordData = { - audio: rawAudio, - duration: recordDuration, - channels: audioConfig.channelCount, - sampleRate: audioConfig.sampleRate, - sampleSize: audioConfig.sampleSize, - }; + const fingerprintMap = result.data.reduce((dict, item) => { + dict[item.address] = item.anchorTime; + return dict; + }, {}); if (sendRecordingRef.current) { + socket.emit("newFingerprint", JSON.stringify({ fingerprint: fingerprintMap })); + } + + if (uploadRecording) { + var bytes = new Uint8Array(arrayBuffer); + var rawAudio = ""; + for (var i = 0; i < bytes.byteLength; i++) { + rawAudio += String.fromCharCode(bytes[i]); + } + + const dataView = new DataView(arrayBuffer); + + const recordData = { + audio: btoa(rawAudio), + channels: dataView.getUint16(22, true), + sampleRate: dataView.getUint16(24, true), + sampleSize: dataView.getUint16(34, true), + duration: audioBufferDecoded.duration, + }; + + console.log("Record data: ", recordData); + socket.emit("newRecording", JSON.stringify(recordData)); } }; @@ -207,10 +250,11 @@ function App() { } } + + function downloadRecording(blob) { const blobUrl = URL.createObjectURL(blob); - // Create a download link const downloadLink = document.createElement("a"); downloadLink.href = blobUrl; downloadLink.download = "recorded_audio.wav"; @@ -244,7 +288,7 @@ function App() { return (
-

SeekTune

+

!Shazam

 Songs @@ -302,4 +346,4 @@ function App() { ); } -export default App; +export default App; \ No newline at end of file From 8e1e34a7f5f67f42dfa95f053ad3649f52f35f8d Mon Sep 17 00:00:00 2001 From: Chigozirim Igweamaka Date: Tue, 1 Apr 2025 17:45:14 +0100 Subject: [PATCH 2/5] feat: add FFmpeg dependencies --- client/package.json | 2 ++ 1 file changed, 2 insertions(+) diff --git a/client/package.json b/client/package.json index ebc9936..98c99a2 100644 --- a/client/package.json +++ b/client/package.json @@ -3,6 +3,8 @@ "version": "0.1.0", "private": true, "dependencies": { + "@ffmpeg/ffmpeg": "^0.12.15", + "@ffmpeg/util": "^0.12.2", "@testing-library/jest-dom": "^5.17.0", "@testing-library/react": "^13.4.0", "@testing-library/user-event": "^13.5.0", From 6647fa1af7d12badd55c9777d52a876a990dc07c Mon Sep 17 00:00:00 2001 From: Chigozirim Igweamaka Date: Tue, 1 Apr 2025 17:46:30 +0100 Subject: [PATCH 3/5] feat: add event handler to process new fingerprints --- cmdHandlers.go | 1 + 1 file changed, 1 insertion(+) diff --git a/cmdHandlers.go b/cmdHandlers.go index 71e64d0..c336eaf 100644 --- a/cmdHandlers.go +++ b/cmdHandlers.go @@ -136,6 +136,7 @@ func serve(protocol, port string) { server.OnEvent("/", "totalSongs", handleTotalSongs) server.OnEvent("/", "newDownload", handleSongDownload) server.OnEvent("/", "newRecording", handleNewRecording) + server.OnEvent("/", "newFingerprint", handleNewFingerprint) server.OnError("/", func(s socketio.Conn, e error) { log.Println("meet error:", e) From 8a918c74cd3c5f58ef56a8aab0ac2f94f40a8c1f Mon Sep 17 00:00:00 2001 From: Chigozirim Igweamaka Date: Tue, 1 Apr 2025 17:53:17 +0100 Subject: [PATCH 4/5] feat: implement audio snippet recording and fingerprint processing --- socketHandlers.go | 42 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 39 insertions(+), 3 deletions(-) diff --git a/socketHandlers.go b/socketHandlers.go index 75ce0c6..d3f34c2 100644 --- a/socketHandlers.go +++ b/socketHandlers.go @@ -2,6 +2,7 @@ package main import ( "context" + "encoding/base64" "encoding/json" "fmt" "log/slog" @@ -10,7 +11,9 @@ import ( "song-recognition/shazam" "song-recognition/spotify" "song-recognition/utils" + "song-recognition/wav" "strings" + "time" socketio "github.com/googollee/go-socket.io" "github.com/mdobak/go-xerrors" @@ -175,6 +178,7 @@ func handleSongDownload(socket socketio.Conn, spotifyURL string) { } } +// handleNewRecording saves new recorded audio snippet to a WAV file. func handleNewRecording(socket socketio.Conn, recordData string) { logger := utils.GetLogger() ctx := context.Background() @@ -186,14 +190,46 @@ func handleNewRecording(socket socketio.Conn, recordData string) { return } - samples, err := utils.ProcessRecording(&recData, true) + err := utils.CreateFolder("recordings") if err != nil { err := xerrors.New(err) - logger.ErrorContext(ctx, "Failed to process recording.", slog.Any("error", err)) + logger.ErrorContext(ctx, "Failed create folder.", slog.Any("error", err)) + } + + now := time.Now() + fileName := fmt.Sprintf("%04d_%02d_%02d_%02d_%02d_%02d.wav", + now.Second(), now.Minute(), now.Hour(), + now.Day(), now.Month(), now.Year(), + ) + filePath := "recordings/" + fileName + + decodedAudioData, err := base64.StdEncoding.DecodeString(recData.Audio) + if err != nil { + err := xerrors.New(err) + logger.ErrorContext(ctx, "Failed to decode base64", slog.Any("error", err)) + } + + err = wav.WriteWavFile(filePath, decodedAudioData, recData.SampleRate, recData.Channels, recData.SampleSize) + if err != nil { + err := xerrors.New(err) + logger.ErrorContext(ctx, "Failed write wav file.", slog.Any("error", err)) + } +} + +func handleNewFingerprint(socket socketio.Conn, fingerprintData string) { + logger := utils.GetLogger() + ctx := context.Background() + + var data struct { + Fingerprint map[uint32]uint32 `json:"fingerprint"` + } + if err := json.Unmarshal([]byte(fingerprintData), &data); err != nil { + err := xerrors.New(err) + logger.ErrorContext(ctx, "Failed to unmarshal fingerprint data.", slog.Any("error", err)) return } - matches, _, err := shazam.FindMatches(samples, recData.Duration, recData.SampleRate) + matches, _, err := shazam.FindMatchesFGP(data.Fingerprint) if err != nil { err := xerrors.New(err) logger.ErrorContext(ctx, "failed to get matches.", slog.Any("error", err)) From 77e544ce4121810cb347d4d6ce2e5507cde3550c Mon Sep 17 00:00:00 2001 From: Chigozirim Igweamaka Date: Tue, 1 Apr 2025 17:57:25 +0100 Subject: [PATCH 5/5] Add ReformatWAV function for channel conversion --- utils/helpers.go | 4 ---- wav/convert.go | 2 ++ 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/utils/helpers.go b/utils/helpers.go index cd7ceb0..98c1e56 100644 --- a/utils/helpers.go +++ b/utils/helpers.go @@ -38,13 +38,11 @@ func FloatsToBytes(data []float64, bitsPerSample int) ([]byte, error) { switch bitsPerSample { case 8: for _, sample := range data { - // Convert float to 8-bit unsigned integer val := uint8((sample + 1.0) * 127.5) byteData = append(byteData, byte(val)) } case 16: for _, sample := range data { - // Convert float to 16-bit signed integer val := int16(sample * 32767.0) buf := make([]byte, 2) binary.LittleEndian.PutUint16(buf, uint16(val)) @@ -52,7 +50,6 @@ func FloatsToBytes(data []float64, bitsPerSample int) ([]byte, error) { } case 24: for _, sample := range data { - // Convert float to 24-bit signed integer val := int32(sample * 8388607.0) buf := make([]byte, 4) binary.LittleEndian.PutUint32(buf, uint32(val)<<8) // Shift by 8 bits to fit 24-bit @@ -60,7 +57,6 @@ func FloatsToBytes(data []float64, bitsPerSample int) ([]byte, error) { } case 32: for _, sample := range data { - // Convert float to 32-bit signed integer val := int32(sample * 2147483647.0) buf := make([]byte, 4) binary.LittleEndian.PutUint32(buf, uint32(val)) diff --git a/wav/convert.go b/wav/convert.go index aa5366b..1c815ba 100644 --- a/wav/convert.go +++ b/wav/convert.go @@ -51,6 +51,8 @@ func ConvertToWAV(inputFilePath string, channels int) (wavFilePath string, err e return outputFile, nil } +// ReformatWAV converts a given WAV file to the specified number of channels, +// either mono (1 channel) or stereo (2 channels). func ReformatWAV(inputFilePath string, channels int) (reformatedFilePath string, errr error) { if channels < 1 || channels > 2 { channels = 1