diff --git a/client/package.json b/client/package.json
index ebc9936..98c99a2 100644
--- a/client/package.json
+++ b/client/package.json
@@ -3,6 +3,8 @@
"version": "0.1.0",
"private": true,
"dependencies": {
+ "@ffmpeg/ffmpeg": "^0.12.15",
+ "@ffmpeg/util": "^0.12.2",
"@testing-library/jest-dom": "^5.17.0",
"@testing-library/react": "^13.4.0",
"@testing-library/user-event": "^13.5.0",
diff --git a/client/src/App.js b/client/src/App.js
index 904c618..cfa0e1f 100644
--- a/client/src/App.js
+++ b/client/src/App.js
@@ -9,20 +9,28 @@ import { ToastContainer, toast, Slide } from "react-toastify";
import "react-toastify/dist/ReactToastify.css";
import { MediaRecorder, register } from "extendable-media-recorder";
import { connect } from "extendable-media-recorder-wav-encoder";
+import { FFmpeg } from '@ffmpeg/ffmpeg';
+import { fetchFile } from '@ffmpeg/util';
+
import AnimatedNumber from "./components/AnimatedNumber";
const server = process.env.REACT_APP_BACKEND_URL || "http://localhost:5000";
+// https://seek-tune-rq4gn.ondigitalocean.app/
var socket = io(server);
function App() {
+ let ffmpegLoaded = false;
+ const ffmpeg = new FFmpeg();
+ const uploadRecording = true
+ const isPhone = window.innerWidth <= 550
const [stream, setStream] = useState();
const [matches, setMatches] = useState([]);
const [totalSongs, setTotalSongs] = useState(10);
const [isListening, setisListening] = useState(false);
const [audioInput, setAudioInput] = useState("device"); // or "mic"
- const [isPhone, setIsPhone] = useState(window.innerWidth <= 550);
+ const [genFingerprint, setGenFingerprint] = useState(null);
const [registeredMediaEncoder, setRegisteredMediaEncoder] = useState(false);
const streamRef = useRef(stream);
@@ -78,8 +86,38 @@ function App() {
return () => clearInterval(intervalId);
}, []);
+ useEffect(() => {
+ (async () => {
+ try {
+ const go = new window.Go();
+ const result = await WebAssembly.instantiateStreaming(
+ fetch("/main.wasm"),
+ go.importObject
+ );
+ go.run(result.instance);
+
+ if (typeof window.generateFingerprint === "function") {
+ setGenFingerprint(() => window.generateFingerprint);
+ }
+
+ } catch (error) {
+ console.error("Error loading WASM:", error);
+ }
+ })();
+ }, []);
+
async function record() {
try {
+ if (!genFingerprint) {
+ console.error("WASM is not loaded yet.");
+ return;
+ }
+
+ if (!ffmpegLoaded) {
+ await ffmpeg.load();
+ ffmpegLoaded = true;
+ }
+
const mediaDevice =
audioInput === "device"
? navigator.mediaDevices.getDisplayMedia.bind(navigator.mediaDevices)
@@ -113,33 +151,6 @@ function App() {
track.stop();
}
- /** Attempt to change sampleRate
- const audioContext = new AudioContext({
- sampleRate: 44100,
- });
- const mediaStreamAudioSourceNode = new MediaStreamAudioSourceNode(
- audioContext,
- { mediaStream: audioStream }
- );
- const mediaStreamAudioDestinationNode =
- new MediaStreamAudioDestinationNode(audioContext, {
- channelCount: 1,
- });
-
- mediaStreamAudioSourceNode.connect(mediaStreamAudioDestinationNode);
-
- const mediaRecorder = new MediaRecorder(
- mediaStreamAudioDestinationNode.stream,
- { mimeType: "audio/wav" }
- );
-
- const settings = mediaStreamAudioDestinationNode.stream
- .getAudioTracks()[0]
- .getSettings();
-
- console.log("Settings: ", settings);
- */
-
const mediaRecorder = new MediaRecorder(audioStream, {
mimeType: "audio/wav",
});
@@ -158,45 +169,77 @@ function App() {
mediaRecorder.stop();
}, 20000);
- mediaRecorder.addEventListener("stop", () => {
+ mediaRecorder.addEventListener("stop", async () => {
const blob = new Blob(chunks, { type: "audio/wav" });
- const reader = new FileReader();
cleanUp();
- // downloadRecording(blob);
- reader.readAsArrayBuffer(blob);
+ const inputFile = 'input.wav';
+ const outputFile = 'output_mono.wav';
+
+ // Convert audio to mono with a sample rate of 44100 Hz
+ await ffmpeg.writeFile(inputFile, await fetchFile(blob))
+ const exitCode = await ffmpeg.exec([
+ '-i', inputFile,
+ '-c', 'pcm_s16le',
+ '-ar', '44100',
+ '-ac', '1',
+ '-f', 'wav',
+ outputFile
+ ]);
+ if (exitCode !== 0) {
+ throw new Error(`FFmpeg exec failed with exit code: ${exitCode}`);
+ }
+
+ const monoData = await ffmpeg.readFile(outputFile);
+ const monoBlob = new Blob([monoData.buffer], { type: 'audio/wav' });
+
+ const reader = new FileReader();
+ reader.readAsArrayBuffer(monoBlob);
reader.onload = async (event) => {
const arrayBuffer = event.target.result;
-
- // get record duration
- const arrayBufferCopy = arrayBuffer.slice(0);
const audioContext = new AudioContext();
- const audioBufferDecoded = await audioContext.decodeAudioData(
- arrayBufferCopy
- );
- const recordDuration = audioBufferDecoded.duration;
+ const arrayBufferCopy = arrayBuffer.slice(0);
+ const audioBufferDecoded = await audioContext.decodeAudioData(arrayBufferCopy);
+
+ const audioData = audioBufferDecoded.getChannelData(0);
+ const audioArray = Array.from(audioData);
- var binary = "";
- var bytes = new Uint8Array(arrayBuffer);
- var len = bytes.byteLength;
- for (var i = 0; i < len; i++) {
- binary += String.fromCharCode(bytes[i]);
+ const result = genFingerprint(audioArray, audioBufferDecoded.sampleRate);
+ if (result.error !== 0) {
+ toast["error"](() =>
An error occured
)
+ console.log("An error occured: ", result)
+ return
}
- // Convert byte array to base64
- const rawAudio = btoa(binary);
- const audioConfig = audioStream.getAudioTracks()[0].getSettings();
-
- const recordData = {
- audio: rawAudio,
- duration: recordDuration,
- channels: audioConfig.channelCount,
- sampleRate: audioConfig.sampleRate,
- sampleSize: audioConfig.sampleSize,
- };
+ const fingerprintMap = result.data.reduce((dict, item) => {
+ dict[item.address] = item.anchorTime;
+ return dict;
+ }, {});
if (sendRecordingRef.current) {
+ socket.emit("newFingerprint", JSON.stringify({ fingerprint: fingerprintMap }));
+ }
+
+ if (uploadRecording) {
+ var bytes = new Uint8Array(arrayBuffer);
+ var rawAudio = "";
+ for (var i = 0; i < bytes.byteLength; i++) {
+ rawAudio += String.fromCharCode(bytes[i]);
+ }
+
+ const dataView = new DataView(arrayBuffer);
+
+ const recordData = {
+ audio: btoa(rawAudio),
+ channels: dataView.getUint16(22, true),
+ sampleRate: dataView.getUint16(24, true),
+ sampleSize: dataView.getUint16(34, true),
+ duration: audioBufferDecoded.duration,
+ };
+
+ console.log("Record data: ", recordData);
+
socket.emit("newRecording", JSON.stringify(recordData));
}
};
@@ -207,10 +250,11 @@ function App() {
}
}
+
+
function downloadRecording(blob) {
const blobUrl = URL.createObjectURL(blob);
- // Create a download link
const downloadLink = document.createElement("a");
downloadLink.href = blobUrl;
downloadLink.download = "recorded_audio.wav";
@@ -244,7 +288,7 @@ function App() {
return (
-
SeekTune
+
!Shazam
Songs
@@ -302,4 +346,4 @@ function App() {
);
}
-export default App;
+export default App;
\ No newline at end of file
diff --git a/cmdHandlers.go b/cmdHandlers.go
index 3330450..105d46d 100644
--- a/cmdHandlers.go
+++ b/cmdHandlers.go
@@ -136,6 +136,7 @@ func serve(protocol, port string) {
server.OnEvent("/", "totalSongs", handleTotalSongs)
server.OnEvent("/", "newDownload", handleSongDownload)
server.OnEvent("/", "newRecording", handleNewRecording)
+ server.OnEvent("/", "newFingerprint", handleNewFingerprint)
server.OnError("/", func(s socketio.Conn, e error) {
log.Println("meet error:", e)
diff --git a/socketHandlers.go b/socketHandlers.go
index 406f2b4..d3f34c2 100644
--- a/socketHandlers.go
+++ b/socketHandlers.go
@@ -2,6 +2,7 @@ package main
import (
"context"
+ "encoding/base64"
"encoding/json"
"fmt"
"log/slog"
@@ -12,6 +13,7 @@ import (
"song-recognition/utils"
"song-recognition/wav"
"strings"
+ "time"
socketio "github.com/googollee/go-socket.io"
"github.com/mdobak/go-xerrors"
@@ -176,6 +178,7 @@ func handleSongDownload(socket socketio.Conn, spotifyURL string) {
}
}
+// handleNewRecording saves new recorded audio snippet to a WAV file.
func handleNewRecording(socket socketio.Conn, recordData string) {
logger := utils.GetLogger()
ctx := context.Background()
@@ -187,14 +190,46 @@ func handleNewRecording(socket socketio.Conn, recordData string) {
return
}
- samples, err := wav.ProcessRecording(&recData, true)
+ err := utils.CreateFolder("recordings")
if err != nil {
err := xerrors.New(err)
- logger.ErrorContext(ctx, "Failed to process recording.", slog.Any("error", err))
+ logger.ErrorContext(ctx, "Failed create folder.", slog.Any("error", err))
+ }
+
+ now := time.Now()
+ fileName := fmt.Sprintf("%04d_%02d_%02d_%02d_%02d_%02d.wav",
+ now.Second(), now.Minute(), now.Hour(),
+ now.Day(), now.Month(), now.Year(),
+ )
+ filePath := "recordings/" + fileName
+
+ decodedAudioData, err := base64.StdEncoding.DecodeString(recData.Audio)
+ if err != nil {
+ err := xerrors.New(err)
+ logger.ErrorContext(ctx, "Failed to decode base64", slog.Any("error", err))
+ }
+
+ err = wav.WriteWavFile(filePath, decodedAudioData, recData.SampleRate, recData.Channels, recData.SampleSize)
+ if err != nil {
+ err := xerrors.New(err)
+ logger.ErrorContext(ctx, "Failed write wav file.", slog.Any("error", err))
+ }
+}
+
+func handleNewFingerprint(socket socketio.Conn, fingerprintData string) {
+ logger := utils.GetLogger()
+ ctx := context.Background()
+
+ var data struct {
+ Fingerprint map[uint32]uint32 `json:"fingerprint"`
+ }
+ if err := json.Unmarshal([]byte(fingerprintData), &data); err != nil {
+ err := xerrors.New(err)
+ logger.ErrorContext(ctx, "Failed to unmarshal fingerprint data.", slog.Any("error", err))
return
}
- matches, _, err := shazam.FindMatches(samples, recData.Duration, recData.SampleRate)
+ matches, _, err := shazam.FindMatchesFGP(data.Fingerprint)
if err != nil {
err := xerrors.New(err)
logger.ErrorContext(ctx, "failed to get matches.", slog.Any("error", err))
diff --git a/utils/helpers.go b/utils/helpers.go
index 4502ca7..b1d2ac5 100644
--- a/utils/helpers.go
+++ b/utils/helpers.go
@@ -60,13 +60,11 @@ func FloatsToBytes(data []float64, bitsPerSample int) ([]byte, error) {
switch bitsPerSample {
case 8:
for _, sample := range data {
- // Convert float to 8-bit unsigned integer
val := uint8((sample + 1.0) * 127.5)
byteData = append(byteData, byte(val))
}
case 16:
for _, sample := range data {
- // Convert float to 16-bit signed integer
val := int16(sample * 32767.0)
buf := make([]byte, 2)
binary.LittleEndian.PutUint16(buf, uint16(val))
@@ -74,7 +72,6 @@ func FloatsToBytes(data []float64, bitsPerSample int) ([]byte, error) {
}
case 24:
for _, sample := range data {
- // Convert float to 24-bit signed integer
val := int32(sample * 8388607.0)
buf := make([]byte, 4)
binary.LittleEndian.PutUint32(buf, uint32(val)<<8) // Shift by 8 bits to fit 24-bit
@@ -82,7 +79,6 @@ func FloatsToBytes(data []float64, bitsPerSample int) ([]byte, error) {
}
case 32:
for _, sample := range data {
- // Convert float to 32-bit signed integer
val := int32(sample * 2147483647.0)
buf := make([]byte, 4)
binary.LittleEndian.PutUint32(buf, uint32(val))
diff --git a/wav/convert.go b/wav/convert.go
index b8ca3ca..0d6dc20 100644
--- a/wav/convert.go
+++ b/wav/convert.go
@@ -52,6 +52,8 @@ func ConvertToWAV(inputFilePath string, channels int) (wavFilePath string, err e
return outputFile, nil
}
+// ReformatWAV converts a given WAV file to the specified number of channels,
+// either mono (1 channel) or stereo (2 channels).
func ReformatWAV(inputFilePath string, channels int) (reformatedFilePath string, errr error) {
if channels < 1 || channels > 2 {
channels = 1