From e283d2ae3c4360d81bd8addea244b85a968c83df Mon Sep 17 00:00:00 2001 From: Chigozirim Igweamaka Date: Fri, 3 May 2024 05:29:48 +0100 Subject: [PATCH] Reimplement algorithm based of coding-geek's article. --- go.mod | 11 ++++ go.sum | 28 +++++++++ models/models.go | 6 ++ shazam/fingerprint.go | 58 +++++++++++++++++++ shazam/image.go | 54 ++++++++++++++++++ shazam/shazam.go | 4 +- shazam/spectrogram.go | 129 ++++++++++++++++++++++++++++++------------ utils/dbClient.go | 88 +++++++++++++++++++++++++++- utils/wav.go | 15 ++++- 9 files changed, 352 insertions(+), 41 deletions(-) create mode 100644 models/models.go create mode 100644 shazam/fingerprint.go create mode 100644 shazam/image.go diff --git a/go.mod b/go.mod index 83eff5a..cc303e4 100644 --- a/go.mod +++ b/go.mod @@ -5,12 +5,16 @@ go 1.21.6 require ( cloud.google.com/go/compute v1.23.4 // indirect cloud.google.com/go/compute/metadata v0.2.3 // indirect + git.sr.ht/~sbinet/gg v0.5.0 // indirect github.com/BharatKalluri/spotifydl v0.1.0 // indirect github.com/adrg/strutil v0.3.1 // indirect + github.com/ajstarks/svgo v0.0.0-20211024235047-1546f124cd8b // indirect + github.com/auroraapi/aurora-go v0.1.2 // indirect github.com/bitly/go-simplejson v0.5.1 // indirect github.com/bogem/id3v2 v1.1.1 // indirect github.com/buger/jsonparser v1.1.1 // indirect github.com/bytedance/sonic v1.9.1 // indirect + github.com/campoy/embedmd v1.0.0 // indirect github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 // indirect github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect github.com/dlclark/regexp2 v1.11.0 // indirect @@ -26,14 +30,18 @@ require ( github.com/go-audio/wav v1.1.0 // indirect github.com/go-fingerprint/fingerprint v0.0.0-20140803133125-29397256b7ff // indirect github.com/go-fingerprint/gochroma v0.0.0-20211004000611-a294aa5ccab6 // indirect + github.com/go-fonts/liberation v0.3.1 // indirect + github.com/go-latex/latex v0.0.0-20230307184459-12ec69307ad9 // indirect github.com/go-logr/logr v1.4.1 // indirect github.com/go-logr/stdr v1.2.2 // indirect + github.com/go-pdf/fpdf v0.8.0 // indirect github.com/go-playground/locales v0.14.1 // indirect github.com/go-playground/universal-translator v0.18.1 // indirect github.com/go-playground/validator/v10 v10.14.0 // indirect github.com/go-sourcemap/sourcemap v2.1.4+incompatible // indirect github.com/goccy/go-json v0.10.2 // indirect github.com/gofrs/uuid v4.0.0+incompatible // indirect + github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0 // indirect github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect github.com/golang/protobuf v1.5.3 // indirect github.com/golang/snappy v0.0.4 // indirect @@ -44,6 +52,7 @@ require ( github.com/googleapis/enterprise-certificate-proxy v0.3.2 // indirect github.com/googleapis/gax-go/v2 v2.12.1 // indirect github.com/googollee/go-socket.io v1.7.0 // indirect + github.com/gordonklaus/portaudio v0.0.0-20230709114228-aafa478834f5 // indirect github.com/gorilla/websocket v1.4.2 // indirect github.com/hajimehoshi/go-mp3 v0.3.4 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect @@ -100,11 +109,13 @@ require ( go.opentelemetry.io/otel/trace v1.23.0 // indirect golang.org/x/arch v0.3.0 // indirect golang.org/x/crypto v0.21.0 // indirect + golang.org/x/image v0.11.0 // indirect golang.org/x/net v0.22.0 // indirect golang.org/x/oauth2 v0.17.0 // indirect golang.org/x/sync v0.6.0 // indirect golang.org/x/sys v0.18.0 // indirect golang.org/x/text v0.14.0 // indirect + gonum.org/v1/plot v0.14.0 // indirect google.golang.org/api v0.166.0 // indirect google.golang.org/appengine v1.6.8 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20240213162025-012b6fc9bca9 // indirect diff --git a/go.sum b/go.sum index 6231683..7f246bf 100644 --- a/go.sum +++ b/go.sum @@ -5,6 +5,8 @@ cloud.google.com/go/compute v1.23.4 h1:EBT9Nw4q3zyE7G45Wvv3MzolIrCJEuHys5muLY0wv cloud.google.com/go/compute v1.23.4/go.mod h1:/EJMj55asU6kAFnuZET8zqgwgJ9FvXWXOkkfQZa4ioI= cloud.google.com/go/compute/metadata v0.2.3 h1:mg4jlk7mCAj6xXp9UJ4fjI9VUI5rubuGBW5aJ7UnBMY= cloud.google.com/go/compute/metadata v0.2.3/go.mod h1:VAV5nSsACxMJvgaAuX6Pk2AawlZn8kiOGuCv6gTkwuA= +git.sr.ht/~sbinet/gg v0.5.0 h1:6V43j30HM623V329xA9Ntq+WJrMjDxRjuAB1LFWF5m8= +git.sr.ht/~sbinet/gg v0.5.0/go.mod h1:G2C0eRESqlKhS7ErsNey6HHrqU1PwsnCQlekFi9Q2Oo= github.com/BharatKalluri/spotifydl v0.1.0 h1:BzaukOeFenfmSFFSfHPUIF839+lIPr7G8MRaw0Q1b2Q= github.com/BharatKalluri/spotifydl v0.1.0/go.mod h1:NBlYj+lhmo/TaL4w1c1nsVcU2/prqcqvKYrrsEUYcjA= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= @@ -12,7 +14,13 @@ github.com/ZekeLu/go-mp3 v0.3.5-pre h1:D2Ttzfp/ZazLKVryN0Hv0kyuKbej0ofU1Cagwu+e8 github.com/ZekeLu/go-mp3 v0.3.5-pre/go.mod h1:fRtZraRFcWb0pu7ok0LqyFhCUrPeMsGRSVop0eemFmo= github.com/adrg/strutil v0.3.1 h1:OLvSS7CSJO8lBii4YmBt8jiK9QOtB9CzCzwl4Ic/Fz4= github.com/adrg/strutil v0.3.1/go.mod h1:8h90y18QLrs11IBffcGX3NW/GFBXCMcNg4M7H6MspPA= +github.com/ajstarks/deck v0.0.0-20200831202436-30c9fc6549a9/go.mod h1:JynElWSGnm/4RlzPXRlREEwqTHAN3T56Bv2ITsFT3gY= +github.com/ajstarks/deck/generate v0.0.0-20210309230005-c3f852c02e19/go.mod h1:T13YZdzov6OU0A1+RfKZiZN9ca6VeKdBdyDV+BY97Tk= +github.com/ajstarks/svgo v0.0.0-20211024235047-1546f124cd8b h1:slYM766cy2nI3BwyRiyQj/Ud48djTMtMebDqepE95rw= +github.com/ajstarks/svgo v0.0.0-20211024235047-1546f124cd8b/go.mod h1:1KcenG0jGWcpt8ov532z81sp/kMMUG485J2InIOyADM= github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8= +github.com/auroraapi/aurora-go v0.1.2 h1:Ic231WCsZvpTqjBeih/DE7tq/bJqm9LNapmXZhx7kEs= +github.com/auroraapi/aurora-go v0.1.2/go.mod h1:PLOCtL9ZC1Sd9bCNr9b3i4J2vglqzQIjKfUNPhYDqis= github.com/bitly/go-simplejson v0.5.1 h1:xgwPbetQScXt1gh9BmoJ6j9JMr3TElvuIyjR8pgdoow= github.com/bitly/go-simplejson v0.5.1/go.mod h1:YOPVLzCfwK14b4Sff3oP1AmGhI9T9Vsg84etUnlyp+Q= github.com/bogem/id3v2 v1.1.1 h1:FnjS2vytMeEb39tOMG09uz852MaEccA2A3asRM3XxbE= @@ -22,6 +30,8 @@ github.com/buger/jsonparser v1.1.1/go.mod h1:6RYKKt7H4d4+iWqouImQ9R2FZql3VbhNgx2 github.com/bytedance/sonic v1.5.0/go.mod h1:ED5hyg4y6t3/9Ku1R6dU/4KyJ48DZ4jPhfY1O2AihPM= github.com/bytedance/sonic v1.9.1 h1:6iJ6NqdoxCDr6mbY8h18oSO+cShGSMRGCEo7F2h0x8s= github.com/bytedance/sonic v1.9.1/go.mod h1:i736AoUSYt75HyZLoJW9ERYxcy6eaN6h4BZXU064P/U= +github.com/campoy/embedmd v1.0.0 h1:V4kI2qTJJLf4J29RzI/MAt2c3Bl4dQSYPuflzwFH2hY= +github.com/campoy/embedmd v1.0.0/go.mod h1:oxyr9RCiSXg0M3VJ3ks0UGfp98BpSSGr0kpiX3MzVl8= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= github.com/chenzhuoyu/base64x v0.0.0-20211019084208-fb5309c8db06/go.mod h1:DH46F32mSOjUmXrMHnKwZdA8wcEefY7UVqBKYGjpdQY= github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 h1:qSGYFH7+jGhDF8vLC+iwCD4WpbV1EBDSzWkJODFLams= @@ -83,11 +93,17 @@ github.com/go-fingerprint/fingerprint v0.0.0-20140803133125-29397256b7ff h1:MVMR github.com/go-fingerprint/fingerprint v0.0.0-20140803133125-29397256b7ff/go.mod h1:p+iFTUBRUOKBOZtWQCQAZHhLI7fC5bMdiDc5B4PPBU0= github.com/go-fingerprint/gochroma v0.0.0-20211004000611-a294aa5ccab6 h1:ofe4/jf63isEPwFgoy81WKWzuA3gfegrthdJXgdXKJI= github.com/go-fingerprint/gochroma v0.0.0-20211004000611-a294aa5ccab6/go.mod h1:zsgLdL2ov2nW56GWAEjcSp75I6ZIZjxnToJGZ/ouYwQ= +github.com/go-fonts/liberation v0.3.1 h1:9RPT2NhUpxQ7ukUvz3jeUckmN42T9D9TpjtQcqK/ceM= +github.com/go-fonts/liberation v0.3.1/go.mod h1:jdJ+cqF+F4SUL2V+qxBth8fvBpBDS7yloUL5Fi8GTGY= +github.com/go-latex/latex v0.0.0-20230307184459-12ec69307ad9 h1:NxXI5pTAtpEaU49bpLpQoDsu1zrteW/vxzTz8Cd2UAs= +github.com/go-latex/latex v0.0.0-20230307184459-12ec69307ad9/go.mod h1:gWuR/CrFDDeVRFQwHPvsv9soJVB/iqymhuZQuJ3a9OM= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/logr v1.4.1 h1:pKouT5E8xu9zeFC39JXRDukb6JFQPXM5p5I91188VAQ= github.com/go-logr/logr v1.4.1/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= +github.com/go-pdf/fpdf v0.8.0 h1:IJKpdaagnWUeSkUFUjTcSzTppFxmv8ucGQyNPQWxYOQ= +github.com/go-pdf/fpdf v0.8.0/go.mod h1:gfqhcNwXrsd3XYKte9a7vM3smvU/jB4ZRDrmWSxpfdc= github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA= github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY= github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJnYK9S473LQFuzCbDbfSFY= @@ -103,6 +119,8 @@ github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU= github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= github.com/gofrs/uuid v4.0.0+incompatible h1:1SD/1F5pU8p29ybwgQSwpQk+mwdRrXCYuPhW6m+TnJw= github.com/gofrs/uuid v4.0.0+incompatible/go.mod h1:b2aQJv3Z4Fp6yNu3cdSllBxTCLRxnplIgP/c0N/04lM= +github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0 h1:DACJavvAHhabrF08vX0COfcOBJRhZ8lUbR+ZWIs0Y5g= +github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE= @@ -157,6 +175,8 @@ github.com/googleapis/gax-go/v2 v2.12.1 h1:9F8GV9r9ztXyAi00gsMQHNoF51xPZm8uj1dpY github.com/googleapis/gax-go/v2 v2.12.1/go.mod h1:61M8vcyyXR2kqKFxKrfA22jaA8JGF7Dc8App1U3H6jc= github.com/googollee/go-socket.io v1.7.0 h1:ODcQSAvVIPvKozXtUGuJDV3pLwdpBLDs1Uoq/QHIlY8= github.com/googollee/go-socket.io v1.7.0/go.mod h1:0vGP8/dXR9SZUMMD4+xxaGo/lohOw3YWMh2WRiWeKxg= +github.com/gordonklaus/portaudio v0.0.0-20230709114228-aafa478834f5 h1:5AlozfqaVjGYGhms2OsdUyfdJME76E6rx5MdGpjzZpc= +github.com/gordonklaus/portaudio v0.0.0-20230709114228-aafa478834f5/go.mod h1:WY8R6YKlI2ZI3UyzFk7P6yGSuS+hFwNtEzrexRyD7Es= github.com/gorilla/websocket v1.4.2 h1:+/TMaTYc4QFitKJxsQ7Yye35DkWvkdLcvGKqM+x0Ufc= github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= github.com/gosuri/uilive v0.0.0-20170323041506-ac356e6e42cd/go.mod h1:qkLSc0A5EXSP6B04TrN4oQoxqFI7A8XvoXSlJi8cwk8= @@ -175,6 +195,7 @@ github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLf github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU= +github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/kkdai/youtube/v2 v2.10.0 h1:s8gSWo3AxIafK560XwDVnha9aPXp3N2HQAh1x81R5Og= github.com/kkdai/youtube/v2 v2.10.0/go.mod h1:H5MLUXiXYuovcEhQT/uZf7BC/syIbAJlDKCDsG+WDsU= github.com/kkdai/youtube/v2 v2.10.1 h1:jdPho4R7VxWoRi9Wx4ULMq4+hlzSVOXxh4Zh83f2F9M= @@ -361,6 +382,8 @@ golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDf golang.org/x/crypto v0.21.0 h1:X31++rzVUdKhX5sWmSOFZxx8UW/ldWx55cbf08iNAMA= golang.org/x/crypto v0.21.0/go.mod h1:0BP7YvVV9gBbVKyeTG0Gyn+gZm94bibOW5BjDEYAOMs= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= +golang.org/x/image v0.11.0 h1:ds2RoQvBvYTiJkwpSFDwCcDFNX7DqjL2WsUgTNk0Ooo= +golang.org/x/image v0.11.0/go.mod h1:bglhjqbqVuEb9e9+eNR45Jfu7D+T4Qan+NhQk8Ck2P8= golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= golang.org/x/lint v0.0.0-20190301231843-5614ed5bae6f/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= @@ -424,6 +447,7 @@ golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210112080510-489259a85091/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210119212857-b64e53b001e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220310020820-b874c991c1a5/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= @@ -476,12 +500,15 @@ golang.org/x/tools v0.0.0-20190506145303-2d16b83fe98c/go.mod h1:RgjU9mgBXZiqYHBn golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20201224043029-2b0845dc783e/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/tools v0.1.0/go.mod h1:xkSsbof2nBLbhDlRMhhhyNLN/zl3eTqcnHD5viDpcZ0= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +gonum.org/v1/plot v0.14.0 h1:+LBDVFYwFe4LHhdP8coW6296MBEY4nQ+Y4vuUpJopcE= +gonum.org/v1/plot v0.14.0/go.mod h1:MLdR9424SJed+5VqC6MsouEpig9pZX2VZ57H9ko2bXU= google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE= google.golang.org/api v0.14.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI= google.golang.org/api v0.166.0 h1:6m4NUwrZYhAaVIHZWxaKjw1L1vNAjtMwORmKRyEEo24= @@ -539,4 +566,5 @@ honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWh honnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190418001031-e561f6794a2a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +honnef.co/go/tools v0.1.3/go.mod h1:NgwopIslSNH47DimFoV78dnkksY2EFtX0ajyb3K/las= rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4= diff --git a/models/models.go b/models/models.go new file mode 100644 index 0000000..74cf015 --- /dev/null +++ b/models/models.go @@ -0,0 +1,6 @@ +package models + +type Table struct { + AnchorTimeMs uint32 + SongID string +} diff --git a/shazam/fingerprint.go b/shazam/fingerprint.go new file mode 100644 index 0000000..fd394ef --- /dev/null +++ b/shazam/fingerprint.go @@ -0,0 +1,58 @@ +package shazam + +import ( + "fmt" + "song-recognition/models" +) + +const ( + maxFreqBits = 9 + maxDeltaBits = 14 + targetZoneSize = 5 +) + +// Fingerprint generates fingerprints from a list of peaks and stores them in an array. +// The fingerprints are encoded using a 32-bit integer format and stored in an array. +// Each fingerprint consists of an address and a table value. +// The address is calculated based on the frequency of the anchor and target points, +// as well as the delta time between them. +// The table value contains the anchor time and the song ID. +func Fingerprint(peaks []Peak, songID string) map[uint32]models.Table { + fingerprints := map[uint32]models.Table{} + duplicates := 0 + + for i, anchor := range peaks { + for j := i + 1; j < len(peaks) && j <= i+targetZoneSize; j++ { + target := peaks[j] + + address := createAddress(anchor, target) + + anchorTimeMs := uint32(anchor.Time * 1000) + + _, ok := fingerprints[address] + if ok { + duplicates++ + } + fingerprints[address] = models.Table{anchorTimeMs, songID} + } + } + + fmt.Println("Duplicates: ", duplicates) + + return fingerprints +} + +// createAddress generates a unique address for a pair of anchor and target points. +// The address is a 32-bit integer where certain bits represent the frequency of +// the anchor and target points, and other bits represent the time difference (delta time) +// between them. This function combines these components into a single address. +func createAddress(anchor, target Peak) uint32 { + anchorFreq := int(real(anchor.Freq)) + targetFreq := int(real(target.Freq)) + deltaMs := uint32((target.Time - anchor.Time) * 1000) + + // Combine the frequency of the anchor, target, and delta time into a 32-bit address + address := uint32(anchorFreq<<23) | uint32(targetFreq<<14) | deltaMs + + return address +} diff --git a/shazam/image.go b/shazam/image.go new file mode 100644 index 0000000..fb87a22 --- /dev/null +++ b/shazam/image.go @@ -0,0 +1,54 @@ +package shazam + +import ( + "image" + "image/color" + "image/png" + "math" + "math/cmplx" + "os" +) + +// ConvertSpectrogramToImage converts a spectrogram to a heat map image +func SpectrogramToImage(spectrogram [][]complex128, outputPath string) error { + // Determine dimensions of the spectrogram + numWindows := len(spectrogram) + numFreqBins := len(spectrogram[0]) + + // Create a new grayscale image + img := image.NewGray(image.Rect(0, 0, numFreqBins, numWindows)) + + // Scale the values in the spectrogram to the range [0, 255] + maxMagnitude := 0.0 + for i := 0; i < numWindows; i++ { + for j := 0; j < numFreqBins; j++ { + magnitude := cmplx.Abs(spectrogram[i][j]) + if magnitude > maxMagnitude { + maxMagnitude = magnitude + } + } + } + + // Convert spectrogram values to pixel intensities + for i := 0; i < numWindows; i++ { + for j := 0; j < numFreqBins; j++ { + magnitude := cmplx.Abs(spectrogram[i][j]) + intensity := uint8(math.Floor(255 * (magnitude / maxMagnitude))) + img.SetGray(j, i, color.Gray{Y: intensity}) + } + } + + // Save the image to a PNG file + file, err := os.Create(outputPath) + if err != nil { + return err + } + defer file.Close() + + err = png.Encode(file, img) + if err != nil { + return err + } + + return nil +} diff --git a/shazam/shazam.go b/shazam/shazam.go index 55326aa..e5a2ad6 100644 --- a/shazam/shazam.go +++ b/shazam/shazam.go @@ -17,8 +17,8 @@ import ( // Constants const ( - chunkSize = 4096 // 4KB - hopSize = 128 + chunkSize = 4096 // 4KB + // hopSize = 128 fuzzFactor = 2 bitDepth = 2 channels = 1 diff --git a/shazam/spectrogram.go b/shazam/spectrogram.go index d129da0..bbc500c 100644 --- a/shazam/spectrogram.go +++ b/shazam/spectrogram.go @@ -4,88 +4,147 @@ import ( "errors" "fmt" "math" + "math/cmplx" ) const ( - dspRatio = 4 - lowPassFilter = 5000.0 // 5kHz - samplesPerWindow = 1024 + dspRatio = 4 + freqBinSize = 1024 + maxFreq = 5000.0 // 5kHz + hopSize = freqBinSize / 32 ) -func Spectrogram(samples []float64, channels, sampleRate int) [][]complex128 { - lpf := NewLowPassFilter(lowPassFilter, float64(sampleRate)) +func Spectrogram(samples []float64, sampleRate int) ([][]complex128, error) { + lpf := NewLowPassFilter(maxFreq, float64(sampleRate)) filteredSamples := lpf.Filter(samples) downsampledSamples, err := downsample(filteredSamples, dspRatio) if err != nil { - fmt.Println("Couldn't downsample audio samples: ", err) + return nil, fmt.Errorf("couldn't downsample audio samples: %v", err) } - hopSize := samplesPerWindow / 32 - numOfWindows := len(downsampledSamples) / (samplesPerWindow - hopSize) + numOfWindows := len(downsampledSamples) / (freqBinSize - hopSize) spectrogram := make([][]complex128, numOfWindows) // Apply Hamming window function - windowSize := len(samples) - for i := 0; i < len(downsampledSamples); i++ { - downsampledSamples[i] = 0.54 - 0.46*math.Cos(2*math.Pi*float64(i)/(float64(windowSize)-1)) + window := make([]float64, freqBinSize) + for i := range window { + window[i] = 0.54 - 0.46*math.Cos(2*math.Pi*float64(i)/(float64(freqBinSize)-1)) } // Perform STFT for i := 0; i < numOfWindows; i++ { start := i * hopSize - end := start + samplesPerWindow + end := start + freqBinSize if end > len(downsampledSamples) { end = len(downsampledSamples) } - spec := make([]float64, samplesPerWindow) - for j := start; j < end; j++ { - spec[j-start] = downsampledSamples[j] + bin := make([]float64, freqBinSize) + copy(bin, downsampledSamples[start:end]) + + // Apply Hamming window + for j := range window { + bin[j] *= window[j] } - applyHammingWindow(spec) - spectrogram[i] = FFT(spec) + spectrogram[i] = FFT(bin) } - return spectrogram + return spectrogram, nil } -func applyHammingWindow(samples []float64) { - windowSize := len(samples) - - for i := 0; i < windowSize; i++ { - samples[i] *= 0.54 - 0.46*math.Cos(2*math.Pi*float64(i)/(float64(windowSize)-1)) - } -} - -// Downsample downsamples a list of float64 values from 44100 Hz to a specified ratio by averaging groups of samples +// Downsample downsamples a list of float64 values to a specified ratio by averaging groups of samples func downsample(input []float64, ratio int) ([]float64, error) { - // Ensure the ratio is valid and compatible with the input length - if ratio <= 0 || len(input)%ratio != 0 { + // if ratio <= 0 || len(input)%ratio != 0 { + // return nil, errors.New("invalid or incompatible ratio") + // } + if ratio <= 0 { return nil, errors.New("invalid or incompatible ratio") } - // Calculate the size of the output slice outputSize := len(input) / ratio - - // Create the output slice output := make([]float64, outputSize) - // Iterate over the input and calculate averages for each group of samples for i := 0; i < outputSize; i++ { startIndex := i * ratio endIndex := startIndex + ratio sum := 0.0 - // Sum up the values in the current group of samples for j := startIndex; j < endIndex; j++ { sum += input[j] } - // Calculate the average for the current group output[i] = sum / float64(ratio) } return output, nil } + +type Peak struct { + Time float64 + Freq complex128 +} + +// ExtractPeaks extracts peaks from a spectrogram based on a specified algorithm +func ExtractPeaks(spectrogram [][]complex128, audioDuration float64) []Peak { + type maxies struct { + maxMag float64 + maxFreq complex128 + freqIdx int + } + + bands := []struct{ min, max int }{{0, 10}, {10, 20}, {20, 40}, {40, 80}, {80, 160}, {160, 512}} + + var peaks []Peak + binDuration := audioDuration / float64(len(spectrogram)) + + for binIdx, bin := range spectrogram { + var maxMags []float64 + var maxFreqs []complex128 + var freqIndices []float64 + + binBandMaxies := map[string]maxies{} + for freqIdx, freq := range bin { + magnitude := cmplx.Abs(freq) + + for _, band := range bands { + if magnitude >= float64(band.min) && magnitude < float64(band.max) { + key := fmt.Sprintf("%d-%d", band.min, band.max) + value, ok := binBandMaxies[key] + + if !ok || magnitude > value.maxMag { + binBandMaxies[key] = maxies{magnitude, freq, freqIdx} + } + } + } + } + + for _, value := range binBandMaxies { + maxMags = append(maxMags, value.maxMag) + maxFreqs = append(maxFreqs, value.maxFreq) + freqIndices = append(freqIndices, float64(value.freqIdx)) + } + + // Calculate the average magnitude + var maxMagsSum float64 + for _, max := range maxMags { + maxMagsSum += max + } + avg := maxMagsSum / float64(len(maxFreqs)) // * coefficient + + // Add peaks that exceed the average magnitude + for i, value := range maxMags { + if value > avg { + peakTimeInBin := freqIndices[i] * binDuration / float64(len(bin)) + + // Calculate the absolute time of the peak + peakTime := float64(binIdx)*binDuration + peakTimeInBin + + peaks = append(peaks, Peak{Time: peakTime, Freq: maxFreqs[i]}) + } + } + } + + return peaks +} diff --git a/utils/dbClient.go b/utils/dbClient.go index 19053f8..6e191a9 100644 --- a/utils/dbClient.go +++ b/utils/dbClient.go @@ -3,6 +3,7 @@ package utils import ( "context" "fmt" + "song-recognition/models" "go.mongodb.org/mongo-driver/bson" "go.mongodb.org/mongo-driver/bson/primitive" @@ -35,6 +36,91 @@ func (db *DbClient) Close() error { return nil } +func (db *DbClient) StoreFingerprints(fingerprints map[uint32]models.Table) error { + collection := db.client.Database("song-recognition").Collection("fingerprints") + + for address, table := range fingerprints { + // Check if the address already exists in the database + var existingDoc bson.M + err := collection.FindOne(context.Background(), bson.M{"_id": address}).Decode(&existingDoc) + if err != nil { + if err == mongo.ErrNoDocuments { + // If address doesn't exist, insert a new document + doc := bson.M{ + "_id": address, + "tables": []interface{}{ + bson.M{ + "anchorTimeMs": table.AnchorTimeMs, + "songID": table.SongID, + }, + }, + } + + _, err := collection.InsertOne(context.Background(), doc) + if err != nil { + return fmt.Errorf("error inserting document: %s", err) + } + } else { + return fmt.Errorf("error checking if document exists: %s", err) + } + } else { + // If address exists, append the new table to the existing tables list + + _, err := collection.UpdateOne( + context.Background(), + bson.M{"_id": address}, + bson.M{"$push": bson.M{"tables": bson.M{"anchorTimeMs": table.AnchorTimeMs, "songID": table.SongID}}}, + ) + if err != nil { + return fmt.Errorf("error updating document: %s", err) + } + } + } + + return nil +} + +func (db *DbClient) GetTables(addresses []uint32) (map[uint32][]models.Table, error) { + collection := db.client.Database("song-recognition").Collection("fingerprints") + + tables := make(map[uint32][]models.Table) + + for _, address := range addresses { + // Find the document corresponding to the address + var result bson.M + err := collection.FindOne(context.Background(), bson.M{"_id": address}).Decode(&result) + if err != nil { + if err == mongo.ErrNoDocuments { + continue + } + return nil, fmt.Errorf("error retrieving document for address %d: %s", address, err) + } + + // Extract tables from the document and append them to the tables map + var docTables []models.Table + tableArray, ok := result["tables"].(primitive.A) + if !ok { + return nil, fmt.Errorf("tables field in document for address %d is not valid", address) + } + + for _, item := range tableArray { + itemMap, ok := item.(primitive.M) + if !ok { + return nil, fmt.Errorf("invalid table format in document for address %d", address) + } + + table := models.Table{ + AnchorTimeMs: uint32(itemMap["anchorTimeMs"].(int64)), + SongID: itemMap["songID"].(string), + } + docTables = append(docTables, table) + } + tables[address] = docTables + } + + return tables, nil +} + func (db *DbClient) TotalSongs() (int, error) { existingSongsCollection := db.client.Database("song-recognition").Collection("existing-songs") total, err := existingSongsCollection.CountDocuments(context.Background(), bson.D{}) @@ -68,7 +154,7 @@ func (db *DbClient) SongExists(songTitle, songArtist, ytID string) (bool, error) } func (db *DbClient) RegisterSong(songTitle, songArtist, ytID string) error { - existingSongsCollection := db.client.Database("song-recognition").Collection("existing-songs") + existingSongsCollection := db.client.Database("song-recognition").Collection("songs") // Create a compound unique index on ytID and key, if it doesn't already exist indexModel := mongo.IndexModel{ diff --git a/utils/wav.go b/utils/wav.go index 3aa215a..857efcd 100644 --- a/utils/wav.go +++ b/utils/wav.go @@ -80,6 +80,7 @@ type WavInfo struct { Channels int SampleRate int Data []byte + Duration float64 } func ReadWavInfo(filename string) (*WavInfo, error) { @@ -99,17 +100,25 @@ func ReadWavInfo(filename string) (*WavInfo, error) { return nil, err } - // Validate header if string(header.ChunkID[:]) != "RIFF" || string(header.Format[:]) != "WAVE" || header.AudioFormat != 1 { return nil, errors.New("invalid WAV header format") } // Extract information - return &WavInfo{ + info := &WavInfo{ Channels: int(header.NumChannels), SampleRate: int(header.SampleRate), Data: data[44:], - }, nil + } + + // Calculate audio duration (assuming data contains PCM data) + if header.BitsPerSample == 16 { + info.Duration = float64(len(info.Data)) / float64(int(header.NumChannels)*2*int(header.SampleRate)) + } else { + return nil, errors.New("unsupported bits per sample format") + } + + return info, nil } // WavBytesToFloat64 converts a slice of bytes from a .wav file to a slice of float64 samples