diff --git a/client/package.json b/client/package.json index ebc9936..98c99a2 100644 --- a/client/package.json +++ b/client/package.json @@ -3,6 +3,8 @@ "version": "0.1.0", "private": true, "dependencies": { + "@ffmpeg/ffmpeg": "^0.12.15", + "@ffmpeg/util": "^0.12.2", "@testing-library/jest-dom": "^5.17.0", "@testing-library/react": "^13.4.0", "@testing-library/user-event": "^13.5.0", diff --git a/client/src/App.js b/client/src/App.js index 904c618..cfa0e1f 100644 --- a/client/src/App.js +++ b/client/src/App.js @@ -9,20 +9,28 @@ import { ToastContainer, toast, Slide } from "react-toastify"; import "react-toastify/dist/ReactToastify.css"; import { MediaRecorder, register } from "extendable-media-recorder"; import { connect } from "extendable-media-recorder-wav-encoder"; +import { FFmpeg } from '@ffmpeg/ffmpeg'; +import { fetchFile } from '@ffmpeg/util'; + import AnimatedNumber from "./components/AnimatedNumber"; const server = process.env.REACT_APP_BACKEND_URL || "http://localhost:5000"; +// https://seek-tune-rq4gn.ondigitalocean.app/ var socket = io(server); function App() { + let ffmpegLoaded = false; + const ffmpeg = new FFmpeg(); + const uploadRecording = true + const isPhone = window.innerWidth <= 550 const [stream, setStream] = useState(); const [matches, setMatches] = useState([]); const [totalSongs, setTotalSongs] = useState(10); const [isListening, setisListening] = useState(false); const [audioInput, setAudioInput] = useState("device"); // or "mic" - const [isPhone, setIsPhone] = useState(window.innerWidth <= 550); + const [genFingerprint, setGenFingerprint] = useState(null); const [registeredMediaEncoder, setRegisteredMediaEncoder] = useState(false); const streamRef = useRef(stream); @@ -78,8 +86,38 @@ function App() { return () => clearInterval(intervalId); }, []); + useEffect(() => { + (async () => { + try { + const go = new window.Go(); + const result = await WebAssembly.instantiateStreaming( + fetch("/main.wasm"), + go.importObject + ); + go.run(result.instance); + + if (typeof window.generateFingerprint === "function") { + setGenFingerprint(() => window.generateFingerprint); + } + + } catch (error) { + console.error("Error loading WASM:", error); + } + })(); + }, []); + async function record() { try { + if (!genFingerprint) { + console.error("WASM is not loaded yet."); + return; + } + + if (!ffmpegLoaded) { + await ffmpeg.load(); + ffmpegLoaded = true; + } + const mediaDevice = audioInput === "device" ? navigator.mediaDevices.getDisplayMedia.bind(navigator.mediaDevices) @@ -113,33 +151,6 @@ function App() { track.stop(); } - /** Attempt to change sampleRate - const audioContext = new AudioContext({ - sampleRate: 44100, - }); - const mediaStreamAudioSourceNode = new MediaStreamAudioSourceNode( - audioContext, - { mediaStream: audioStream } - ); - const mediaStreamAudioDestinationNode = - new MediaStreamAudioDestinationNode(audioContext, { - channelCount: 1, - }); - - mediaStreamAudioSourceNode.connect(mediaStreamAudioDestinationNode); - - const mediaRecorder = new MediaRecorder( - mediaStreamAudioDestinationNode.stream, - { mimeType: "audio/wav" } - ); - - const settings = mediaStreamAudioDestinationNode.stream - .getAudioTracks()[0] - .getSettings(); - - console.log("Settings: ", settings); - */ - const mediaRecorder = new MediaRecorder(audioStream, { mimeType: "audio/wav", }); @@ -158,45 +169,77 @@ function App() { mediaRecorder.stop(); }, 20000); - mediaRecorder.addEventListener("stop", () => { + mediaRecorder.addEventListener("stop", async () => { const blob = new Blob(chunks, { type: "audio/wav" }); - const reader = new FileReader(); cleanUp(); - // downloadRecording(blob); - reader.readAsArrayBuffer(blob); + const inputFile = 'input.wav'; + const outputFile = 'output_mono.wav'; + + // Convert audio to mono with a sample rate of 44100 Hz + await ffmpeg.writeFile(inputFile, await fetchFile(blob)) + const exitCode = await ffmpeg.exec([ + '-i', inputFile, + '-c', 'pcm_s16le', + '-ar', '44100', + '-ac', '1', + '-f', 'wav', + outputFile + ]); + if (exitCode !== 0) { + throw new Error(`FFmpeg exec failed with exit code: ${exitCode}`); + } + + const monoData = await ffmpeg.readFile(outputFile); + const monoBlob = new Blob([monoData.buffer], { type: 'audio/wav' }); + + const reader = new FileReader(); + reader.readAsArrayBuffer(monoBlob); reader.onload = async (event) => { const arrayBuffer = event.target.result; - - // get record duration - const arrayBufferCopy = arrayBuffer.slice(0); const audioContext = new AudioContext(); - const audioBufferDecoded = await audioContext.decodeAudioData( - arrayBufferCopy - ); - const recordDuration = audioBufferDecoded.duration; + const arrayBufferCopy = arrayBuffer.slice(0); + const audioBufferDecoded = await audioContext.decodeAudioData(arrayBufferCopy); + + const audioData = audioBufferDecoded.getChannelData(0); + const audioArray = Array.from(audioData); - var binary = ""; - var bytes = new Uint8Array(arrayBuffer); - var len = bytes.byteLength; - for (var i = 0; i < len; i++) { - binary += String.fromCharCode(bytes[i]); + const result = genFingerprint(audioArray, audioBufferDecoded.sampleRate); + if (result.error !== 0) { + toast["error"](() =>
An error occured
) + console.log("An error occured: ", result) + return } - // Convert byte array to base64 - const rawAudio = btoa(binary); - const audioConfig = audioStream.getAudioTracks()[0].getSettings(); - - const recordData = { - audio: rawAudio, - duration: recordDuration, - channels: audioConfig.channelCount, - sampleRate: audioConfig.sampleRate, - sampleSize: audioConfig.sampleSize, - }; + const fingerprintMap = result.data.reduce((dict, item) => { + dict[item.address] = item.anchorTime; + return dict; + }, {}); if (sendRecordingRef.current) { + socket.emit("newFingerprint", JSON.stringify({ fingerprint: fingerprintMap })); + } + + if (uploadRecording) { + var bytes = new Uint8Array(arrayBuffer); + var rawAudio = ""; + for (var i = 0; i < bytes.byteLength; i++) { + rawAudio += String.fromCharCode(bytes[i]); + } + + const dataView = new DataView(arrayBuffer); + + const recordData = { + audio: btoa(rawAudio), + channels: dataView.getUint16(22, true), + sampleRate: dataView.getUint16(24, true), + sampleSize: dataView.getUint16(34, true), + duration: audioBufferDecoded.duration, + }; + + console.log("Record data: ", recordData); + socket.emit("newRecording", JSON.stringify(recordData)); } }; @@ -207,10 +250,11 @@ function App() { } } + + function downloadRecording(blob) { const blobUrl = URL.createObjectURL(blob); - // Create a download link const downloadLink = document.createElement("a"); downloadLink.href = blobUrl; downloadLink.download = "recorded_audio.wav"; @@ -244,7 +288,7 @@ function App() { return (
-

SeekTune

+

!Shazam

 Songs @@ -302,4 +346,4 @@ function App() { ); } -export default App; +export default App; \ No newline at end of file diff --git a/cmdHandlers.go b/cmdHandlers.go index 3330450..105d46d 100644 --- a/cmdHandlers.go +++ b/cmdHandlers.go @@ -136,6 +136,7 @@ func serve(protocol, port string) { server.OnEvent("/", "totalSongs", handleTotalSongs) server.OnEvent("/", "newDownload", handleSongDownload) server.OnEvent("/", "newRecording", handleNewRecording) + server.OnEvent("/", "newFingerprint", handleNewFingerprint) server.OnError("/", func(s socketio.Conn, e error) { log.Println("meet error:", e) diff --git a/socketHandlers.go b/socketHandlers.go index 406f2b4..d3f34c2 100644 --- a/socketHandlers.go +++ b/socketHandlers.go @@ -2,6 +2,7 @@ package main import ( "context" + "encoding/base64" "encoding/json" "fmt" "log/slog" @@ -12,6 +13,7 @@ import ( "song-recognition/utils" "song-recognition/wav" "strings" + "time" socketio "github.com/googollee/go-socket.io" "github.com/mdobak/go-xerrors" @@ -176,6 +178,7 @@ func handleSongDownload(socket socketio.Conn, spotifyURL string) { } } +// handleNewRecording saves new recorded audio snippet to a WAV file. func handleNewRecording(socket socketio.Conn, recordData string) { logger := utils.GetLogger() ctx := context.Background() @@ -187,14 +190,46 @@ func handleNewRecording(socket socketio.Conn, recordData string) { return } - samples, err := wav.ProcessRecording(&recData, true) + err := utils.CreateFolder("recordings") if err != nil { err := xerrors.New(err) - logger.ErrorContext(ctx, "Failed to process recording.", slog.Any("error", err)) + logger.ErrorContext(ctx, "Failed create folder.", slog.Any("error", err)) + } + + now := time.Now() + fileName := fmt.Sprintf("%04d_%02d_%02d_%02d_%02d_%02d.wav", + now.Second(), now.Minute(), now.Hour(), + now.Day(), now.Month(), now.Year(), + ) + filePath := "recordings/" + fileName + + decodedAudioData, err := base64.StdEncoding.DecodeString(recData.Audio) + if err != nil { + err := xerrors.New(err) + logger.ErrorContext(ctx, "Failed to decode base64", slog.Any("error", err)) + } + + err = wav.WriteWavFile(filePath, decodedAudioData, recData.SampleRate, recData.Channels, recData.SampleSize) + if err != nil { + err := xerrors.New(err) + logger.ErrorContext(ctx, "Failed write wav file.", slog.Any("error", err)) + } +} + +func handleNewFingerprint(socket socketio.Conn, fingerprintData string) { + logger := utils.GetLogger() + ctx := context.Background() + + var data struct { + Fingerprint map[uint32]uint32 `json:"fingerprint"` + } + if err := json.Unmarshal([]byte(fingerprintData), &data); err != nil { + err := xerrors.New(err) + logger.ErrorContext(ctx, "Failed to unmarshal fingerprint data.", slog.Any("error", err)) return } - matches, _, err := shazam.FindMatches(samples, recData.Duration, recData.SampleRate) + matches, _, err := shazam.FindMatchesFGP(data.Fingerprint) if err != nil { err := xerrors.New(err) logger.ErrorContext(ctx, "failed to get matches.", slog.Any("error", err)) diff --git a/utils/helpers.go b/utils/helpers.go index 4502ca7..b1d2ac5 100644 --- a/utils/helpers.go +++ b/utils/helpers.go @@ -60,13 +60,11 @@ func FloatsToBytes(data []float64, bitsPerSample int) ([]byte, error) { switch bitsPerSample { case 8: for _, sample := range data { - // Convert float to 8-bit unsigned integer val := uint8((sample + 1.0) * 127.5) byteData = append(byteData, byte(val)) } case 16: for _, sample := range data { - // Convert float to 16-bit signed integer val := int16(sample * 32767.0) buf := make([]byte, 2) binary.LittleEndian.PutUint16(buf, uint16(val)) @@ -74,7 +72,6 @@ func FloatsToBytes(data []float64, bitsPerSample int) ([]byte, error) { } case 24: for _, sample := range data { - // Convert float to 24-bit signed integer val := int32(sample * 8388607.0) buf := make([]byte, 4) binary.LittleEndian.PutUint32(buf, uint32(val)<<8) // Shift by 8 bits to fit 24-bit @@ -82,7 +79,6 @@ func FloatsToBytes(data []float64, bitsPerSample int) ([]byte, error) { } case 32: for _, sample := range data { - // Convert float to 32-bit signed integer val := int32(sample * 2147483647.0) buf := make([]byte, 4) binary.LittleEndian.PutUint32(buf, uint32(val)) diff --git a/wav/convert.go b/wav/convert.go index b8ca3ca..0d6dc20 100644 --- a/wav/convert.go +++ b/wav/convert.go @@ -52,6 +52,8 @@ func ConvertToWAV(inputFilePath string, channels int) (wavFilePath string, err e return outputFile, nil } +// ReformatWAV converts a given WAV file to the specified number of channels, +// either mono (1 channel) or stereo (2 channels). func ReformatWAV(inputFilePath string, channels int) (reformatedFilePath string, errr error) { if channels < 1 || channels > 2 { channels = 1