Merge pull request #35 from cgzirim/development

Add WASM support for fingerprint generation on the client side
2025-12-17 08:54:19 +00:00 · 2025-04-01 18:12:07 +01:00 · 2025-04-01 18:12:07 +01:00 · be76a55c52
commit be76a55c52
parent 7e883c30d5 2eb682ffe2
6 changed files with 145 additions and 65 deletions
--- a/client/package.json
+++ b/client/package.json
@ -3,6 +3,8 @@
  "version": "0.1.0",
  "private": true,
  "dependencies": {
+    "@ffmpeg/ffmpeg": "^0.12.15",
+    "@ffmpeg/util": "^0.12.2",
    "@testing-library/jest-dom": "^5.17.0",
    "@testing-library/react": "^13.4.0",
    "@testing-library/user-event": "^13.5.0",
--- a/client/src/App.js
+++ b/client/src/App.js
@ -9,20 +9,28 @@ import { ToastContainer, toast, Slide } from "react-toastify";
 import "react-toastify/dist/ReactToastify.css";
 import { MediaRecorder, register } from "extendable-media-recorder";
 import { connect } from "extendable-media-recorder-wav-encoder";
+import { FFmpeg } from '@ffmpeg/ffmpeg';
+import { fetchFile } from '@ffmpeg/util';
+

 import AnimatedNumber from "./components/AnimatedNumber";

 const server = process.env.REACT_APP_BACKEND_URL || "http://localhost:5000";
+// https://seek-tune-rq4gn.ondigitalocean.app/

 var socket = io(server);

 function App() {
+  let ffmpegLoaded = false;
+  const ffmpeg = new FFmpeg();
+  const uploadRecording = true
+  const isPhone = window.innerWidth <= 550
  const [stream, setStream] = useState();
  const [matches, setMatches] = useState([]);
  const [totalSongs, setTotalSongs] = useState(10);
  const [isListening, setisListening] = useState(false);
  const [audioInput, setAudioInput] = useState("device"); // or "mic"
-  const [isPhone, setIsPhone] = useState(window.innerWidth <= 550);
+  const [genFingerprint, setGenFingerprint] = useState(null);
  const [registeredMediaEncoder, setRegisteredMediaEncoder] = useState(false);

  const streamRef = useRef(stream);
@ -78,8 +86,38 @@ function App() {
    return () => clearInterval(intervalId);
  }, []);

+  useEffect(() => { 
+    (async () => {
+      try {
+        const go = new window.Go();
+        const result = await WebAssembly.instantiateStreaming(
+          fetch("/main.wasm"), 
+          go.importObject
+        );
+        go.run(result.instance);
+
+        if (typeof window.generateFingerprint === "function") {
+          setGenFingerprint(() => window.generateFingerprint);
+        }
+
+      } catch (error) {
+        console.error("Error loading WASM:", error);
+      }
+    })();
+  }, []);
+
  async function record() {
    try {
+      if (!genFingerprint) {
+        console.error("WASM is not loaded yet.");
+        return;
+      }
+
+      if (!ffmpegLoaded) {
+        await ffmpeg.load();
+        ffmpegLoaded = true;
+      }
+
      const mediaDevice =
        audioInput === "device"
          ? navigator.mediaDevices.getDisplayMedia.bind(navigator.mediaDevices)
@ -113,33 +151,6 @@ function App() {
        track.stop();
      }

-      /** Attempt to change sampleRate
-      const audioContext = new AudioContext({
-        sampleRate: 44100,
-      });
-      const mediaStreamAudioSourceNode = new MediaStreamAudioSourceNode(
-        audioContext,
-        { mediaStream: audioStream }
-      );
-      const mediaStreamAudioDestinationNode =
-        new MediaStreamAudioDestinationNode(audioContext, {
-          channelCount: 1,
-        });
-
-      mediaStreamAudioSourceNode.connect(mediaStreamAudioDestinationNode);
-
-      const mediaRecorder = new MediaRecorder(
-        mediaStreamAudioDestinationNode.stream,
-        { mimeType: "audio/wav" }
-      );
-
-      const settings = mediaStreamAudioDestinationNode.stream
-        .getAudioTracks()[0]
-        .getSettings();
-
-      console.log("Settings: ", settings);
-      */
-
      const mediaRecorder = new MediaRecorder(audioStream, {
        mimeType: "audio/wav",
      });
@ -158,45 +169,77 @@ function App() {
        mediaRecorder.stop();
      }, 20000);

-      mediaRecorder.addEventListener("stop", () => {
+      mediaRecorder.addEventListener("stop", async () => {
        const blob = new Blob(chunks, { type: "audio/wav" });
-        const reader = new FileReader();

        cleanUp();
-        // downloadRecording(blob);

-        reader.readAsArrayBuffer(blob);
+        const inputFile = 'input.wav';
+        const outputFile = 'output_mono.wav';
+
+        // Convert audio to mono with a sample rate of 44100 Hz
+        await ffmpeg.writeFile(inputFile, await fetchFile(blob))
+        const exitCode = await ffmpeg.exec([
+          '-i', inputFile,
+          '-c', 'pcm_s16le',
+          '-ar', '44100',
+          '-ac', '1',
+          '-f', 'wav',
+          outputFile
+        ]);
+        if (exitCode !== 0) {
+          throw new Error(`FFmpeg exec failed with exit code: ${exitCode}`);
+        }
+
+        const monoData = await ffmpeg.readFile(outputFile);
+        const monoBlob = new Blob([monoData.buffer], { type: 'audio/wav' });
+
+        const reader = new FileReader();
+        reader.readAsArrayBuffer(monoBlob);
        reader.onload = async (event) => {
          const arrayBuffer = event.target.result;
-
-          // get record duration
-          const arrayBufferCopy = arrayBuffer.slice(0);
          const audioContext = new AudioContext();
-          const audioBufferDecoded = await audioContext.decodeAudioData(
-            arrayBufferCopy
-          );
-          const recordDuration = audioBufferDecoded.duration;
+          const arrayBufferCopy = arrayBuffer.slice(0);
+          const audioBufferDecoded = await audioContext.decodeAudioData(arrayBufferCopy);
+          
+          const audioData = audioBufferDecoded.getChannelData(0);
+          const audioArray = Array.from(audioData);

-          var binary = "";
-          var bytes = new Uint8Array(arrayBuffer);
-          var len = bytes.byteLength;
-          for (var i = 0; i < len; i++) {
-            binary += String.fromCharCode(bytes[i]);
+          const result = genFingerprint(audioArray, audioBufferDecoded.sampleRate);
+          if (result.error !== 0) {
+            toast["error"](() => <div>An error occured</div>)
+            console.log("An error occured: ", result)
+            return
          }

-          // Convert byte array to base64
-          const rawAudio = btoa(binary);
-          const audioConfig = audioStream.getAudioTracks()[0].getSettings();
-
-          const recordData = {
-            audio: rawAudio,
-            duration: recordDuration,
-            channels: audioConfig.channelCount,
-            sampleRate: audioConfig.sampleRate,
-            sampleSize: audioConfig.sampleSize,
-          };
+          const fingerprintMap = result.data.reduce((dict, item) => {
+            dict[item.address] = item.anchorTime;
+            return dict;
+          }, {});

          if (sendRecordingRef.current) {
+            socket.emit("newFingerprint", JSON.stringify({ fingerprint: fingerprintMap }));
+          }
+
+          if (uploadRecording) {
+            var bytes = new Uint8Array(arrayBuffer);
+            var rawAudio = "";
+            for (var i = 0; i < bytes.byteLength; i++) {
+              rawAudio += String.fromCharCode(bytes[i]);
+            }
+
+            const dataView = new DataView(arrayBuffer);
+
+            const recordData = {
+              audio: btoa(rawAudio),
+              channels: dataView.getUint16(22, true),
+              sampleRate: dataView.getUint16(24, true),
+              sampleSize: dataView.getUint16(34, true),
+              duration: audioBufferDecoded.duration,
+            };
+
+            console.log("Record data: ", recordData);
+
            socket.emit("newRecording", JSON.stringify(recordData));
          }
        };
@ -207,10 +250,11 @@ function App() {
    }
  }

+
+
  function downloadRecording(blob) {
    const blobUrl = URL.createObjectURL(blob);

-    // Create a download link
    const downloadLink = document.createElement("a");
    downloadLink.href = blobUrl;
    downloadLink.download = "recorded_audio.wav";
@ -244,7 +288,7 @@ function App() {
  return (
    <div className="App">
      <div className="TopHeader">
-        <h2 style={{ color: "#374151" }}>SeekTune</h2>
+        <h2 style={{ color: "#374151" }}>!Shazam</h2>
        <h4 style={{ display: "flex", justifyContent: "flex-end" }}>
          <AnimatedNumber includeComma={true} animateToNumber={totalSongs} />
          &nbsp;Songs
@ -302,4 +346,4 @@ function App() {
  );
 }

-export default App;
+export default App;
--- a/cmdHandlers.go
+++ b/cmdHandlers.go
@ -136,6 +136,7 @@ func serve(protocol, port string) {
 	server.OnEvent("/", "totalSongs", handleTotalSongs)
 	server.OnEvent("/", "newDownload", handleSongDownload)
 	server.OnEvent("/", "newRecording", handleNewRecording)
+	server.OnEvent("/", "newFingerprint", handleNewFingerprint)

 	server.OnError("/", func(s socketio.Conn, e error) {
 		log.Println("meet error:", e)
--- a/socketHandlers.go
+++ b/socketHandlers.go
@ -2,6 +2,7 @@ package main

 import (
 	"context"
+	"encoding/base64"
 	"encoding/json"
 	"fmt"
 	"log/slog"
@ -12,6 +13,7 @@ import (
 	"song-recognition/utils"
 	"song-recognition/wav"
 	"strings"
+	"time"

 	socketio "github.com/googollee/go-socket.io"
 	"github.com/mdobak/go-xerrors"
@ -176,6 +178,7 @@ func handleSongDownload(socket socketio.Conn, spotifyURL string) {
 	}
 }

+// handleNewRecording saves new recorded audio snippet to a WAV file.
 func handleNewRecording(socket socketio.Conn, recordData string) {
 	logger := utils.GetLogger()
 	ctx := context.Background()
@ -187,14 +190,46 @@ func handleNewRecording(socket socketio.Conn, recordData string) {
 		return
 	}

-	samples, err := wav.ProcessRecording(&recData, true)
+	err := utils.CreateFolder("recordings")
 	if err != nil {
 		err := xerrors.New(err)
-		logger.ErrorContext(ctx, "Failed to process recording.", slog.Any("error", err))
+		logger.ErrorContext(ctx, "Failed create folder.", slog.Any("error", err))
+	}
+
+	now := time.Now()
+	fileName := fmt.Sprintf("%04d_%02d_%02d_%02d_%02d_%02d.wav",
+		now.Second(), now.Minute(), now.Hour(),
+		now.Day(), now.Month(), now.Year(),
+	)
+	filePath := "recordings/" + fileName
+
+	decodedAudioData, err := base64.StdEncoding.DecodeString(recData.Audio)
+	if err != nil {
+		err := xerrors.New(err)
+		logger.ErrorContext(ctx, "Failed to decode base64", slog.Any("error", err))
+	}
+
+	err = wav.WriteWavFile(filePath, decodedAudioData, recData.SampleRate, recData.Channels, recData.SampleSize)
+	if err != nil {
+		err := xerrors.New(err)
+		logger.ErrorContext(ctx, "Failed write wav file.", slog.Any("error", err))
+	}
+}
+
+func handleNewFingerprint(socket socketio.Conn, fingerprintData string) {
+	logger := utils.GetLogger()
+	ctx := context.Background()
+
+	var data struct {
+		Fingerprint map[uint32]uint32 `json:"fingerprint"`
+	}
+	if err := json.Unmarshal([]byte(fingerprintData), &data); err != nil {
+		err := xerrors.New(err)
+		logger.ErrorContext(ctx, "Failed to unmarshal fingerprint data.", slog.Any("error", err))
 		return
 	}

-	matches, _, err := shazam.FindMatches(samples, recData.Duration, recData.SampleRate)
+	matches, _, err := shazam.FindMatchesFGP(data.Fingerprint)
 	if err != nil {
 		err := xerrors.New(err)
 		logger.ErrorContext(ctx, "failed to get matches.", slog.Any("error", err))
--- a/utils/helpers.go
+++ b/utils/helpers.go
@ -60,13 +60,11 @@ func FloatsToBytes(data []float64, bitsPerSample int) ([]byte, error) {
 	switch bitsPerSample {
 	case 8:
 		for _, sample := range data {
-			// Convert float to 8-bit unsigned integer
 			val := uint8((sample + 1.0) * 127.5)
 			byteData = append(byteData, byte(val))
 		}
 	case 16:
 		for _, sample := range data {
-			// Convert float to 16-bit signed integer
 			val := int16(sample * 32767.0)
 			buf := make([]byte, 2)
 			binary.LittleEndian.PutUint16(buf, uint16(val))
@ -74,7 +72,6 @@ func FloatsToBytes(data []float64, bitsPerSample int) ([]byte, error) {
 		}
 	case 24:
 		for _, sample := range data {
-			// Convert float to 24-bit signed integer
 			val := int32(sample * 8388607.0)
 			buf := make([]byte, 4)
 			binary.LittleEndian.PutUint32(buf, uint32(val)<<8) // Shift by 8 bits to fit 24-bit
@ -82,7 +79,6 @@ func FloatsToBytes(data []float64, bitsPerSample int) ([]byte, error) {
 		}
 	case 32:
 		for _, sample := range data {
-			// Convert float to 32-bit signed integer
 			val := int32(sample * 2147483647.0)
 			buf := make([]byte, 4)
 			binary.LittleEndian.PutUint32(buf, uint32(val))
--- a/wav/convert.go
+++ b/wav/convert.go
@ -52,6 +52,8 @@ func ConvertToWAV(inputFilePath string, channels int) (wavFilePath string, err e
 	return outputFile, nil
 }

+// ReformatWAV converts a given WAV file to the specified number of channels,
+// either mono (1 channel) or stereo (2 channels).
 func ReformatWAV(inputFilePath string, channels int) (reformatedFilePath string, errr error) {
 	if channels < 1 || channels > 2 {
 		channels = 1