misc/py-jiwer: New port: Evaluate speech-to-text system with similarity measures

2024-08-05 17:12:46 -07:00 · 2024-08-05 17:12:46 -07:00 · 219e72a7c3
commit 219e72a7c3
parent d5bf0a77eb
4 changed files with 45 additions and 0 deletions
--- a/misc/Makefile
+++ b/misc/Makefile
@ -439,6 +439,7 @@
    SUBDIR += py-icoextract
    SUBDIR += py-instructor
    SUBDIR += py-ipyfastscape
+    SUBDIR += py-jiwer
    SUBDIR += py-kartograph
    SUBDIR += py-laspy
    SUBDIR += py-lazrs
--- a/misc/py-jiwer/Makefile
+++ b/misc/py-jiwer/Makefile
@ -0,0 +1,29 @@
+PORTNAME=	jiwer
+#DISTVERSIONPREFIX=	v
+DISTVERSION=	3.0.4 # see https://github.com/jitsi/jiwer/issues/91
+CATEGORIES=	misc python # machine-learning
+MASTER_SITES=	PYPI # no tests
+PKGNAMEPREFIX=	${PYTHON_PKGNAMEPREFIX}
+
+MAINTAINER=	yuri@FreeBSD.org
+COMMENT=	Evaluate speech-to-text system with similarity measures
+WWW=		https://github.com/jitsi/jiwer
+
+LICENSE=	APACHE20
+LICENSE_FILE=	${WRKSRC}/LICENSE
+
+BUILD_DEPENDS=	${PYTHON_PKGNAMEPREFIX}poetry-core>0:devel/py-poetry-core@${PY_FLAVOR}
+RUN_DEPENDS=	${PYTHON_PKGNAMEPREFIX}click>=8.1.3:devel/py-click@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}rapidfuzz>=3:devel/py-rapidfuzz@${PY_FLAVOR}
+
+USES=		python
+USE_PYTHON=	pep517 concurrent autoplist #pytest
+
+#USE_GITHUB=	yes
+#GH_ACCOUNT=	jitsi
+
+TEST_ENV=	${MAKE_ENV} PYTHONPATH=${STAGEDIR}${PYTHONPREFIX_SITELIBDIR}
+
+NO_ARCH=	yes
+
+.include <bsd.port.mk>
--- a/misc/py-jiwer/distinfo
+++ b/misc/py-jiwer/distinfo
@ -0,0 +1,3 @@
+TIMESTAMP = 1722887112
+SHA256 (jiwer-3.0.4.tar.gz) = 2438acdc7ca22128fcab4be60db595809d2b5e73785b736de36dc3281a2a6ae8
+SIZE (jiwer-3.0.4.tar.gz) = 17515
--- a/misc/py-jiwer/pkg-descr
+++ b/misc/py-jiwer/pkg-descr
@ -0,0 +1,12 @@
+JiWER is a simple and fast python package to evaluate an automatic speech
+recognition system. It supports the following measures:
+* word error rate (WER)
+* match error rate (MER)
+* word information lost (WIL)
+* word information preserved (WIP)
+* character error rate (CER)
+
+These measures are computed with the use of the minimum-edit distance between
+one or more reference and hypothesis sentences. The minimum-edit distance is
+calculated using RapidFuzz, which uses C++ under the hood, and is therefore
+faster than a pure python implementation.