You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
147 lines
4.1 KiB
147 lines
4.1 KiB
# Copyright 1999-2017 Gentoo Foundation
|
|
# Distributed under the terms of the GNU General Public License v2
|
|
|
|
EAPI=6
|
|
|
|
MY_PN="tesseract-ocr"
|
|
LANGPACKV="4.00"
|
|
URI_PREFIX="https://github.com/${MY_PN}/tessdata/raw/${LANGPACKV}/"
|
|
JAVA_PKG_OPT_USE="scrollview"
|
|
|
|
inherit autotools java-pkg-opt-2 toolchain-funcs
|
|
|
|
DESCRIPTION="An OCR Engine, orginally developed at HP, now open source."
|
|
HOMEPAGE="https://github.com/tesseract-ocr"
|
|
SRC_URI="https://github.com/${MY_PN}/${PN}/archive/${PV/_}.tar.gz -> ${P}.tar.gz
|
|
${URI_PREFIX}eng.traineddata -> eng.traineddata-${LANGPACKV}
|
|
math? ( ${URI_PREFIX}equ.traineddata -> equ.traineddata-${LANGPACKV} )
|
|
osd? ( ${URI_PREFIX}osd.traineddata -> osd.traineddata-${LANGPACKV} )"
|
|
|
|
LICENSE="Apache-2.0"
|
|
SLOT="0"
|
|
KEYWORDS="~alpha ~amd64 ~arm ~arm64 ~mips ~ppc ~ppc64 ~sparc ~x86"
|
|
IUSE="doc examples jpeg math opencl openmp osd png scrollview static-libs tiff training webp"
|
|
|
|
# List of supported Gentoo linguas and their upstream mapping
|
|
# https://github.com/tesseract-ocr/tesseract/wiki/Data-Files
|
|
# "old" variants were regrouped in the matching modern locale
|
|
LANGUAGES="af:afr am:amh ar:ara as:asm az:aze,aze_cyrl be:bel bn:ben bo:bod bs:bos bg:bul ca:cat cs:ces zh:chi_sim,chi_tra cy:cym da:dan de:deu,frk dz:dzo el:ell,grc en:enm eo:epo et:est eu:eus fa:fas fi:fin fr:fra,frm ga:gle gl:glg gu:guj he:heb hi:hin hr:hrv hu:hun id:ind is:isl it:ita,ita_old ja:jpn kn:kan ka:kat,kat_old kk:kaz km:khm ky:kir ko:kor ku:kur lo:lao la:lat lv:lav lt:lit ml:mal mr:mar mk:mkd ms:msa my:mya ne:nep nl:nld no:nor or:ori pa:pan pl:pol pt:por ro:ron ru:rus sa:san si:sin sk:slk sl:slv es:spa,spa_old sq:sqi sr:srp,srp_latn sw:swa sv:swe syc:syr ta:tam te:tel tg:tgk tl:tgl th:tha tr:tur ug:uig uk:ukr uz:uzb,uzb_cyrl vi:vie"
|
|
# Missing matches:
|
|
# ceb Cebuano
|
|
# chr Cherokee
|
|
# hat Haitian; Haitian Creole
|
|
# iku Inuktitut
|
|
# jav Javanese
|
|
# mlt Maltese
|
|
# pus Pushto; Pashto
|
|
# tir Tigrinya
|
|
# urd Urdu
|
|
# yid Yiddish
|
|
# l10n_en provides the additional data:
|
|
# enm English, Middle (1100-1500)
|
|
|
|
for lang in ${LANGUAGES}; do
|
|
gentoo_lang=${lang%:*}
|
|
tess_langs=${lang#*:}
|
|
for tess_lang in ${tess_langs//,/ }; do
|
|
SRC_URI+=" l10n_${gentoo_lang}? ( ${URI_PREFIX}${tess_lang}.traineddata -> ${tess_lang}.traineddata-${LANGPACKV} )"
|
|
done
|
|
IUSE+=" l10n_${gentoo_lang}"
|
|
done
|
|
|
|
# With opencl USE=tiff is necessary in leptonica
|
|
CDEPEND=">=media-libs/leptonica-1.74:=[zlib,tiff?,jpeg?,png?,webp?]
|
|
opencl? (
|
|
virtual/opencl
|
|
media-libs/tiff:0=
|
|
media-libs/leptonica:=[tiff]
|
|
)
|
|
scrollview? (
|
|
>=dev-java/piccolo2d-3.0:0
|
|
)
|
|
training? (
|
|
dev-libs/icu:=
|
|
x11-libs/pango:=
|
|
x11-libs/cairo:=
|
|
)"
|
|
|
|
DEPEND="${CDEPEND}
|
|
doc? ( app-doc/doxygen )
|
|
scrollview? ( >=virtual/jdk-1.7 )"
|
|
|
|
RDEPEND="${CDEPEND}
|
|
scrollview? ( >=virtual/jre-1.7 )"
|
|
|
|
DOCS=( AUTHORS ChangeLog NEWS README.md )
|
|
|
|
PATCHES=(
|
|
"${FILESDIR}/${PN}-3.04.01-use-system-piccolo2d.patch"
|
|
"${FILESDIR}/${P}-isnan.patch"
|
|
"${FILESDIR}/${P}-openmp.patch"
|
|
"${FILESDIR}/${P}-no_graphics.patch"
|
|
)
|
|
|
|
S=${WORKDIR}/${P/_}
|
|
|
|
pkg_pretend() {
|
|
[[ ${MERGE_TYPE} != binary ]] && use openmp && tc-check-openmp
|
|
}
|
|
|
|
pkg_setup() {
|
|
[[ ${MERGE_TYPE} != binary ]] && use openmp && tc-check-openmp
|
|
}
|
|
|
|
src_unpack() {
|
|
unpack ${P}.tar.gz
|
|
for file in ${A}; do
|
|
if [[ "${file}" == *traineddata* ]]; then
|
|
cp "${DISTDIR}/${file}" "${S}/tessdata/${file%-*}" || die
|
|
fi
|
|
done
|
|
}
|
|
|
|
src_prepare() {
|
|
default
|
|
eautoreconf
|
|
|
|
java-pkg-opt-2_src_prepare
|
|
}
|
|
|
|
src_configure() {
|
|
local myeconfargs=(
|
|
--enable-shared
|
|
$(use_enable opencl)
|
|
$(use_enable openmp)
|
|
$(use_enable scrollview graphics)
|
|
$(use_enable static-libs static)
|
|
)
|
|
|
|
econf "${myeconfargs[@]}"
|
|
}
|
|
|
|
src_compile() {
|
|
default
|
|
use doc && emake doc
|
|
use scrollview && emake ScrollView.jar JAVAC="javac $(java-pkg_javac-args)"
|
|
use training && emake training
|
|
}
|
|
|
|
src_install() {
|
|
use doc && HTML_DOCS=( doc/html/. )
|
|
default
|
|
prune_libtool_files
|
|
|
|
if use training; then
|
|
emake DESTDIR="${D}" training-install
|
|
fi
|
|
|
|
if use examples; then
|
|
insinto /usr/share/doc/${PF}/examples
|
|
doins testing/eurotext.tif testing/phototest.tif
|
|
fi
|
|
|
|
insinto /usr/share/tessdata
|
|
doins tessdata/*traineddata* # language files
|
|
use scrollview && doins java/ScrollView.jar # scrollview
|
|
}
|