FFmpeg command to downsample and convert to mono

This commit is contained in:
Chigozirim Igweamaka 2024-07-09 23:16:54 +01:00
parent e459099775
commit cf88253c00
2 changed files with 39 additions and 18 deletions

View file

@ -11,6 +11,7 @@ import (
"song-recognition/utils"
"song-recognition/wav"
"strings"
"time"
socketio "github.com/googollee/go-socket.io"
"github.com/mdobak/go-xerrors"
@ -207,37 +208,36 @@ func handleNewRecording(socket socketio.Conn, recordData string) {
sampleRate := recData.SampleRate
bitsPerSample := recData.SampleSize
fmt.Printf("Channels: %v, sampleRate: %v, bitsPerSample: %v\n", channels, sampleRate, bitsPerSample)
samples, err := wav.WavBytesToSamples(decodedAudioData)
if err != nil {
err := xerrors.New(err)
logger.ErrorContext(ctx, "failed to convert decodedData to samples.", slog.Any("error", err))
}
/** this operation alters the audio, adds some level of bass to it.
if sampleRate != 44100 {
samples, err = shazam.Downsample(samples, sampleRate, 44100)
if err != nil {
err := xerrors.New(err)
logger.ErrorContext(ctx, "failed to downsample.", slog.Any("error", err))
}
sampleRate = 44100
}
// Save recording
recordingInBytes, err := utils.FloatsToBytes(samples, bitsPerSample)
if err != nil {
err := xerrors.New(err)
logger.ErrorContext(ctx, "failed to convert bytes.", slog.Any("error", err))
}
decodedAudioData = recordingInBytes
*/
now := time.Now()
fileName := fmt.Sprintf("%04d_%02d_%02d_%02d_%02d_%02d.wav",
now.Second(), now.Minute(), now.Hour(),
now.Day(), now.Month(), now.Year(),
)
err = wav.WriteWavFile("blob.wav", decodedAudioData, sampleRate, channels, bitsPerSample)
err = wav.WriteWavFile(fileName, decodedAudioData, sampleRate, channels, bitsPerSample)
if err != nil {
err := xerrors.New(err)
logger.ErrorContext(ctx, "failed to write wav file.", slog.Any("error", err))
}
/*
wav.FFmpegConvertWAV(fileName, fileName, 44100, true)
wavInfo, _ := wav.ReadWavInfo("mono_" + fileName)
samples, _ = wav.WavBytesToSamples(wavInfo.Data)
// spotify.DeleteFile(fileName)
spotify.DeleteFile("mono_" + fileName)
*/
matches, _, err := shazam.FindMatches(samples, recData.Duration, sampleRate)
if err != nil {
err := xerrors.New(err)

View file

@ -7,6 +7,7 @@ import (
"fmt"
"io/ioutil"
"os"
"os/exec"
)
// WavHeader defines the structure of a WAV header
@ -148,3 +149,23 @@ func WavBytesToSamples(input []byte) ([]float64, error) {
return output, nil
}
// FFmpegConvertWAV converts a WAV file using ffmpeg.
// It can change the sample rate and optionally convert to mono.
func FFmpegConvertWAV(inputFile, outputFile string, targetSampleRate int, toMono bool) error {
cmdArgs := []string{
"-i", inputFile,
"-ar", fmt.Sprintf("%d", targetSampleRate),
"-y",
}
if toMono {
outputFile = "mono_" + outputFile
cmdArgs = append(cmdArgs, "-ac", "1", "-c:a", "pcm_s16le")
}
cmdArgs = append(cmdArgs, outputFile)
cmd := exec.Command("ffmpeg", cmdArgs...)
return cmd.Run()
}