mirror of
https://github.com/cgzirim/seek-tune.git
synced 2025-12-17 08:54:19 +00:00
Merge pull request #35 from cgzirim/development
Add WASM support for fingerprint generation on the client side
This commit is contained in:
commit
be76a55c52
6 changed files with 145 additions and 65 deletions
|
|
@ -3,6 +3,8 @@
|
||||||
"version": "0.1.0",
|
"version": "0.1.0",
|
||||||
"private": true,
|
"private": true,
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
|
"@ffmpeg/ffmpeg": "^0.12.15",
|
||||||
|
"@ffmpeg/util": "^0.12.2",
|
||||||
"@testing-library/jest-dom": "^5.17.0",
|
"@testing-library/jest-dom": "^5.17.0",
|
||||||
"@testing-library/react": "^13.4.0",
|
"@testing-library/react": "^13.4.0",
|
||||||
"@testing-library/user-event": "^13.5.0",
|
"@testing-library/user-event": "^13.5.0",
|
||||||
|
|
|
||||||
|
|
@ -9,20 +9,28 @@ import { ToastContainer, toast, Slide } from "react-toastify";
|
||||||
import "react-toastify/dist/ReactToastify.css";
|
import "react-toastify/dist/ReactToastify.css";
|
||||||
import { MediaRecorder, register } from "extendable-media-recorder";
|
import { MediaRecorder, register } from "extendable-media-recorder";
|
||||||
import { connect } from "extendable-media-recorder-wav-encoder";
|
import { connect } from "extendable-media-recorder-wav-encoder";
|
||||||
|
import { FFmpeg } from '@ffmpeg/ffmpeg';
|
||||||
|
import { fetchFile } from '@ffmpeg/util';
|
||||||
|
|
||||||
|
|
||||||
import AnimatedNumber from "./components/AnimatedNumber";
|
import AnimatedNumber from "./components/AnimatedNumber";
|
||||||
|
|
||||||
const server = process.env.REACT_APP_BACKEND_URL || "http://localhost:5000";
|
const server = process.env.REACT_APP_BACKEND_URL || "http://localhost:5000";
|
||||||
|
// https://seek-tune-rq4gn.ondigitalocean.app/
|
||||||
|
|
||||||
var socket = io(server);
|
var socket = io(server);
|
||||||
|
|
||||||
function App() {
|
function App() {
|
||||||
|
let ffmpegLoaded = false;
|
||||||
|
const ffmpeg = new FFmpeg();
|
||||||
|
const uploadRecording = true
|
||||||
|
const isPhone = window.innerWidth <= 550
|
||||||
const [stream, setStream] = useState();
|
const [stream, setStream] = useState();
|
||||||
const [matches, setMatches] = useState([]);
|
const [matches, setMatches] = useState([]);
|
||||||
const [totalSongs, setTotalSongs] = useState(10);
|
const [totalSongs, setTotalSongs] = useState(10);
|
||||||
const [isListening, setisListening] = useState(false);
|
const [isListening, setisListening] = useState(false);
|
||||||
const [audioInput, setAudioInput] = useState("device"); // or "mic"
|
const [audioInput, setAudioInput] = useState("device"); // or "mic"
|
||||||
const [isPhone, setIsPhone] = useState(window.innerWidth <= 550);
|
const [genFingerprint, setGenFingerprint] = useState(null);
|
||||||
const [registeredMediaEncoder, setRegisteredMediaEncoder] = useState(false);
|
const [registeredMediaEncoder, setRegisteredMediaEncoder] = useState(false);
|
||||||
|
|
||||||
const streamRef = useRef(stream);
|
const streamRef = useRef(stream);
|
||||||
|
|
@ -78,8 +86,38 @@ function App() {
|
||||||
return () => clearInterval(intervalId);
|
return () => clearInterval(intervalId);
|
||||||
}, []);
|
}, []);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
(async () => {
|
||||||
|
try {
|
||||||
|
const go = new window.Go();
|
||||||
|
const result = await WebAssembly.instantiateStreaming(
|
||||||
|
fetch("/main.wasm"),
|
||||||
|
go.importObject
|
||||||
|
);
|
||||||
|
go.run(result.instance);
|
||||||
|
|
||||||
|
if (typeof window.generateFingerprint === "function") {
|
||||||
|
setGenFingerprint(() => window.generateFingerprint);
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (error) {
|
||||||
|
console.error("Error loading WASM:", error);
|
||||||
|
}
|
||||||
|
})();
|
||||||
|
}, []);
|
||||||
|
|
||||||
async function record() {
|
async function record() {
|
||||||
try {
|
try {
|
||||||
|
if (!genFingerprint) {
|
||||||
|
console.error("WASM is not loaded yet.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!ffmpegLoaded) {
|
||||||
|
await ffmpeg.load();
|
||||||
|
ffmpegLoaded = true;
|
||||||
|
}
|
||||||
|
|
||||||
const mediaDevice =
|
const mediaDevice =
|
||||||
audioInput === "device"
|
audioInput === "device"
|
||||||
? navigator.mediaDevices.getDisplayMedia.bind(navigator.mediaDevices)
|
? navigator.mediaDevices.getDisplayMedia.bind(navigator.mediaDevices)
|
||||||
|
|
@ -113,33 +151,6 @@ function App() {
|
||||||
track.stop();
|
track.stop();
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Attempt to change sampleRate
|
|
||||||
const audioContext = new AudioContext({
|
|
||||||
sampleRate: 44100,
|
|
||||||
});
|
|
||||||
const mediaStreamAudioSourceNode = new MediaStreamAudioSourceNode(
|
|
||||||
audioContext,
|
|
||||||
{ mediaStream: audioStream }
|
|
||||||
);
|
|
||||||
const mediaStreamAudioDestinationNode =
|
|
||||||
new MediaStreamAudioDestinationNode(audioContext, {
|
|
||||||
channelCount: 1,
|
|
||||||
});
|
|
||||||
|
|
||||||
mediaStreamAudioSourceNode.connect(mediaStreamAudioDestinationNode);
|
|
||||||
|
|
||||||
const mediaRecorder = new MediaRecorder(
|
|
||||||
mediaStreamAudioDestinationNode.stream,
|
|
||||||
{ mimeType: "audio/wav" }
|
|
||||||
);
|
|
||||||
|
|
||||||
const settings = mediaStreamAudioDestinationNode.stream
|
|
||||||
.getAudioTracks()[0]
|
|
||||||
.getSettings();
|
|
||||||
|
|
||||||
console.log("Settings: ", settings);
|
|
||||||
*/
|
|
||||||
|
|
||||||
const mediaRecorder = new MediaRecorder(audioStream, {
|
const mediaRecorder = new MediaRecorder(audioStream, {
|
||||||
mimeType: "audio/wav",
|
mimeType: "audio/wav",
|
||||||
});
|
});
|
||||||
|
|
@ -158,45 +169,77 @@ function App() {
|
||||||
mediaRecorder.stop();
|
mediaRecorder.stop();
|
||||||
}, 20000);
|
}, 20000);
|
||||||
|
|
||||||
mediaRecorder.addEventListener("stop", () => {
|
mediaRecorder.addEventListener("stop", async () => {
|
||||||
const blob = new Blob(chunks, { type: "audio/wav" });
|
const blob = new Blob(chunks, { type: "audio/wav" });
|
||||||
const reader = new FileReader();
|
|
||||||
|
|
||||||
cleanUp();
|
cleanUp();
|
||||||
// downloadRecording(blob);
|
|
||||||
|
|
||||||
reader.readAsArrayBuffer(blob);
|
const inputFile = 'input.wav';
|
||||||
|
const outputFile = 'output_mono.wav';
|
||||||
|
|
||||||
|
// Convert audio to mono with a sample rate of 44100 Hz
|
||||||
|
await ffmpeg.writeFile(inputFile, await fetchFile(blob))
|
||||||
|
const exitCode = await ffmpeg.exec([
|
||||||
|
'-i', inputFile,
|
||||||
|
'-c', 'pcm_s16le',
|
||||||
|
'-ar', '44100',
|
||||||
|
'-ac', '1',
|
||||||
|
'-f', 'wav',
|
||||||
|
outputFile
|
||||||
|
]);
|
||||||
|
if (exitCode !== 0) {
|
||||||
|
throw new Error(`FFmpeg exec failed with exit code: ${exitCode}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const monoData = await ffmpeg.readFile(outputFile);
|
||||||
|
const monoBlob = new Blob([monoData.buffer], { type: 'audio/wav' });
|
||||||
|
|
||||||
|
const reader = new FileReader();
|
||||||
|
reader.readAsArrayBuffer(monoBlob);
|
||||||
reader.onload = async (event) => {
|
reader.onload = async (event) => {
|
||||||
const arrayBuffer = event.target.result;
|
const arrayBuffer = event.target.result;
|
||||||
|
|
||||||
// get record duration
|
|
||||||
const arrayBufferCopy = arrayBuffer.slice(0);
|
|
||||||
const audioContext = new AudioContext();
|
const audioContext = new AudioContext();
|
||||||
const audioBufferDecoded = await audioContext.decodeAudioData(
|
const arrayBufferCopy = arrayBuffer.slice(0);
|
||||||
arrayBufferCopy
|
const audioBufferDecoded = await audioContext.decodeAudioData(arrayBufferCopy);
|
||||||
);
|
|
||||||
const recordDuration = audioBufferDecoded.duration;
|
const audioData = audioBufferDecoded.getChannelData(0);
|
||||||
|
const audioArray = Array.from(audioData);
|
||||||
|
|
||||||
var binary = "";
|
const result = genFingerprint(audioArray, audioBufferDecoded.sampleRate);
|
||||||
var bytes = new Uint8Array(arrayBuffer);
|
if (result.error !== 0) {
|
||||||
var len = bytes.byteLength;
|
toast["error"](() => <div>An error occured</div>)
|
||||||
for (var i = 0; i < len; i++) {
|
console.log("An error occured: ", result)
|
||||||
binary += String.fromCharCode(bytes[i]);
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Convert byte array to base64
|
const fingerprintMap = result.data.reduce((dict, item) => {
|
||||||
const rawAudio = btoa(binary);
|
dict[item.address] = item.anchorTime;
|
||||||
const audioConfig = audioStream.getAudioTracks()[0].getSettings();
|
return dict;
|
||||||
|
}, {});
|
||||||
const recordData = {
|
|
||||||
audio: rawAudio,
|
|
||||||
duration: recordDuration,
|
|
||||||
channels: audioConfig.channelCount,
|
|
||||||
sampleRate: audioConfig.sampleRate,
|
|
||||||
sampleSize: audioConfig.sampleSize,
|
|
||||||
};
|
|
||||||
|
|
||||||
if (sendRecordingRef.current) {
|
if (sendRecordingRef.current) {
|
||||||
|
socket.emit("newFingerprint", JSON.stringify({ fingerprint: fingerprintMap }));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (uploadRecording) {
|
||||||
|
var bytes = new Uint8Array(arrayBuffer);
|
||||||
|
var rawAudio = "";
|
||||||
|
for (var i = 0; i < bytes.byteLength; i++) {
|
||||||
|
rawAudio += String.fromCharCode(bytes[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
const dataView = new DataView(arrayBuffer);
|
||||||
|
|
||||||
|
const recordData = {
|
||||||
|
audio: btoa(rawAudio),
|
||||||
|
channels: dataView.getUint16(22, true),
|
||||||
|
sampleRate: dataView.getUint16(24, true),
|
||||||
|
sampleSize: dataView.getUint16(34, true),
|
||||||
|
duration: audioBufferDecoded.duration,
|
||||||
|
};
|
||||||
|
|
||||||
|
console.log("Record data: ", recordData);
|
||||||
|
|
||||||
socket.emit("newRecording", JSON.stringify(recordData));
|
socket.emit("newRecording", JSON.stringify(recordData));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
@ -207,10 +250,11 @@ function App() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
function downloadRecording(blob) {
|
function downloadRecording(blob) {
|
||||||
const blobUrl = URL.createObjectURL(blob);
|
const blobUrl = URL.createObjectURL(blob);
|
||||||
|
|
||||||
// Create a download link
|
|
||||||
const downloadLink = document.createElement("a");
|
const downloadLink = document.createElement("a");
|
||||||
downloadLink.href = blobUrl;
|
downloadLink.href = blobUrl;
|
||||||
downloadLink.download = "recorded_audio.wav";
|
downloadLink.download = "recorded_audio.wav";
|
||||||
|
|
@ -244,7 +288,7 @@ function App() {
|
||||||
return (
|
return (
|
||||||
<div className="App">
|
<div className="App">
|
||||||
<div className="TopHeader">
|
<div className="TopHeader">
|
||||||
<h2 style={{ color: "#374151" }}>SeekTune</h2>
|
<h2 style={{ color: "#374151" }}>!Shazam</h2>
|
||||||
<h4 style={{ display: "flex", justifyContent: "flex-end" }}>
|
<h4 style={{ display: "flex", justifyContent: "flex-end" }}>
|
||||||
<AnimatedNumber includeComma={true} animateToNumber={totalSongs} />
|
<AnimatedNumber includeComma={true} animateToNumber={totalSongs} />
|
||||||
Songs
|
Songs
|
||||||
|
|
@ -302,4 +346,4 @@ function App() {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
export default App;
|
export default App;
|
||||||
|
|
@ -136,6 +136,7 @@ func serve(protocol, port string) {
|
||||||
server.OnEvent("/", "totalSongs", handleTotalSongs)
|
server.OnEvent("/", "totalSongs", handleTotalSongs)
|
||||||
server.OnEvent("/", "newDownload", handleSongDownload)
|
server.OnEvent("/", "newDownload", handleSongDownload)
|
||||||
server.OnEvent("/", "newRecording", handleNewRecording)
|
server.OnEvent("/", "newRecording", handleNewRecording)
|
||||||
|
server.OnEvent("/", "newFingerprint", handleNewFingerprint)
|
||||||
|
|
||||||
server.OnError("/", func(s socketio.Conn, e error) {
|
server.OnError("/", func(s socketio.Conn, e error) {
|
||||||
log.Println("meet error:", e)
|
log.Println("meet error:", e)
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,7 @@ package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"encoding/base64"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
|
|
@ -12,6 +13,7 @@ import (
|
||||||
"song-recognition/utils"
|
"song-recognition/utils"
|
||||||
"song-recognition/wav"
|
"song-recognition/wav"
|
||||||
"strings"
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
socketio "github.com/googollee/go-socket.io"
|
socketio "github.com/googollee/go-socket.io"
|
||||||
"github.com/mdobak/go-xerrors"
|
"github.com/mdobak/go-xerrors"
|
||||||
|
|
@ -176,6 +178,7 @@ func handleSongDownload(socket socketio.Conn, spotifyURL string) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// handleNewRecording saves new recorded audio snippet to a WAV file.
|
||||||
func handleNewRecording(socket socketio.Conn, recordData string) {
|
func handleNewRecording(socket socketio.Conn, recordData string) {
|
||||||
logger := utils.GetLogger()
|
logger := utils.GetLogger()
|
||||||
ctx := context.Background()
|
ctx := context.Background()
|
||||||
|
|
@ -187,14 +190,46 @@ func handleNewRecording(socket socketio.Conn, recordData string) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
samples, err := wav.ProcessRecording(&recData, true)
|
err := utils.CreateFolder("recordings")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
err := xerrors.New(err)
|
err := xerrors.New(err)
|
||||||
logger.ErrorContext(ctx, "Failed to process recording.", slog.Any("error", err))
|
logger.ErrorContext(ctx, "Failed create folder.", slog.Any("error", err))
|
||||||
|
}
|
||||||
|
|
||||||
|
now := time.Now()
|
||||||
|
fileName := fmt.Sprintf("%04d_%02d_%02d_%02d_%02d_%02d.wav",
|
||||||
|
now.Second(), now.Minute(), now.Hour(),
|
||||||
|
now.Day(), now.Month(), now.Year(),
|
||||||
|
)
|
||||||
|
filePath := "recordings/" + fileName
|
||||||
|
|
||||||
|
decodedAudioData, err := base64.StdEncoding.DecodeString(recData.Audio)
|
||||||
|
if err != nil {
|
||||||
|
err := xerrors.New(err)
|
||||||
|
logger.ErrorContext(ctx, "Failed to decode base64", slog.Any("error", err))
|
||||||
|
}
|
||||||
|
|
||||||
|
err = wav.WriteWavFile(filePath, decodedAudioData, recData.SampleRate, recData.Channels, recData.SampleSize)
|
||||||
|
if err != nil {
|
||||||
|
err := xerrors.New(err)
|
||||||
|
logger.ErrorContext(ctx, "Failed write wav file.", slog.Any("error", err))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func handleNewFingerprint(socket socketio.Conn, fingerprintData string) {
|
||||||
|
logger := utils.GetLogger()
|
||||||
|
ctx := context.Background()
|
||||||
|
|
||||||
|
var data struct {
|
||||||
|
Fingerprint map[uint32]uint32 `json:"fingerprint"`
|
||||||
|
}
|
||||||
|
if err := json.Unmarshal([]byte(fingerprintData), &data); err != nil {
|
||||||
|
err := xerrors.New(err)
|
||||||
|
logger.ErrorContext(ctx, "Failed to unmarshal fingerprint data.", slog.Any("error", err))
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
matches, _, err := shazam.FindMatches(samples, recData.Duration, recData.SampleRate)
|
matches, _, err := shazam.FindMatchesFGP(data.Fingerprint)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
err := xerrors.New(err)
|
err := xerrors.New(err)
|
||||||
logger.ErrorContext(ctx, "failed to get matches.", slog.Any("error", err))
|
logger.ErrorContext(ctx, "failed to get matches.", slog.Any("error", err))
|
||||||
|
|
|
||||||
|
|
@ -60,13 +60,11 @@ func FloatsToBytes(data []float64, bitsPerSample int) ([]byte, error) {
|
||||||
switch bitsPerSample {
|
switch bitsPerSample {
|
||||||
case 8:
|
case 8:
|
||||||
for _, sample := range data {
|
for _, sample := range data {
|
||||||
// Convert float to 8-bit unsigned integer
|
|
||||||
val := uint8((sample + 1.0) * 127.5)
|
val := uint8((sample + 1.0) * 127.5)
|
||||||
byteData = append(byteData, byte(val))
|
byteData = append(byteData, byte(val))
|
||||||
}
|
}
|
||||||
case 16:
|
case 16:
|
||||||
for _, sample := range data {
|
for _, sample := range data {
|
||||||
// Convert float to 16-bit signed integer
|
|
||||||
val := int16(sample * 32767.0)
|
val := int16(sample * 32767.0)
|
||||||
buf := make([]byte, 2)
|
buf := make([]byte, 2)
|
||||||
binary.LittleEndian.PutUint16(buf, uint16(val))
|
binary.LittleEndian.PutUint16(buf, uint16(val))
|
||||||
|
|
@ -74,7 +72,6 @@ func FloatsToBytes(data []float64, bitsPerSample int) ([]byte, error) {
|
||||||
}
|
}
|
||||||
case 24:
|
case 24:
|
||||||
for _, sample := range data {
|
for _, sample := range data {
|
||||||
// Convert float to 24-bit signed integer
|
|
||||||
val := int32(sample * 8388607.0)
|
val := int32(sample * 8388607.0)
|
||||||
buf := make([]byte, 4)
|
buf := make([]byte, 4)
|
||||||
binary.LittleEndian.PutUint32(buf, uint32(val)<<8) // Shift by 8 bits to fit 24-bit
|
binary.LittleEndian.PutUint32(buf, uint32(val)<<8) // Shift by 8 bits to fit 24-bit
|
||||||
|
|
@ -82,7 +79,6 @@ func FloatsToBytes(data []float64, bitsPerSample int) ([]byte, error) {
|
||||||
}
|
}
|
||||||
case 32:
|
case 32:
|
||||||
for _, sample := range data {
|
for _, sample := range data {
|
||||||
// Convert float to 32-bit signed integer
|
|
||||||
val := int32(sample * 2147483647.0)
|
val := int32(sample * 2147483647.0)
|
||||||
buf := make([]byte, 4)
|
buf := make([]byte, 4)
|
||||||
binary.LittleEndian.PutUint32(buf, uint32(val))
|
binary.LittleEndian.PutUint32(buf, uint32(val))
|
||||||
|
|
|
||||||
|
|
@ -52,6 +52,8 @@ func ConvertToWAV(inputFilePath string, channels int) (wavFilePath string, err e
|
||||||
return outputFile, nil
|
return outputFile, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ReformatWAV converts a given WAV file to the specified number of channels,
|
||||||
|
// either mono (1 channel) or stereo (2 channels).
|
||||||
func ReformatWAV(inputFilePath string, channels int) (reformatedFilePath string, errr error) {
|
func ReformatWAV(inputFilePath string, channels int) (reformatedFilePath string, errr error) {
|
||||||
if channels < 1 || channels > 2 {
|
if channels < 1 || channels > 2 {
|
||||||
channels = 1
|
channels = 1
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue