You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
gentoo-overlay/media-gfx/iscan/files/iscan-3.62.0-tests-tesserac...

112 lines
3.7 KiB

commit 9ce60e8df3b613950c483f0ae5ec772afc329104
Author: Olaf Meeuwissen <paddy-hack@member.fsf.org>
Date: Fri Jun 21 22:20:30 2019 +0900
Fix tesseract command-line invocation. Re #78
diff --git a/filters/get-text-orientation b/filters/get-text-orientation
index 847f2c6..6f0978c 100755
--- a/filters/get-text-orientation
+++ b/filters/get-text-orientation
@@ -47,26 +47,14 @@ if test $? != 0; then
exit 1
fi
-tmpfile=$(mktemp -q .reorient.XXX)
-trap "rm -f $tmpfile" 0 1 2 15
-
case "$engine" in
*/tesseract|tesseract)
- # Notwithstanding what the manual page says, tesseract
- # doesn't support reading from standard input with the
- # `-psm 0` option. We stuff incoming image data into a
- # temporary file to work around this limitation.
- # See https://github.com/tesseract-ocr/tesseract/issues/85
-
- cat - > $tmpfile
+ # Tesseract 3.03 outputs the results we want to standard
+ # error; Tesseract 3.04 dumps it on standard output. We
+ # want it on the latter.
- # We don't care about the "regular" tesseract output so
- # divert that to /dev/null. The output that we do care
- # about ends up on standard error, but our caller looks
- # for it on standard output. Redirect to handle that.
-
- $engine $tmpfile /dev/null -psm 0 -l osd 2>&1
+ $engine - - -psm 0 -l osd 2>&1
;;
*/ocr-engine-getrotate)
@@ -74,6 +62,9 @@ case "$engine" in
# The ocr-engine-getrotate utility expects an uncompressed
# BMP image.
+ tmpfile=$(mktemp -q .reorient.XXX)
+ trap "rm -f $tmpfile" 0 1 2 15
+
$convert - -compress None bmp3:$tmpfile
$engine $tmpfile
;;
commit 56f1d8ed51cc7140b961ef8ab8c7501f69d87fd5
Author: Olaf Meeuwissen <paddy-hack@member.fsf.org>
Date: Mon Oct 21 12:41:20 2019 +0900
Fix get-text-orientation for newer Tesseract versions. Fixes #86
diff --git a/filters/get-text-orientation b/filters/get-text-orientation
index 6f0978c..eb64e4b 100755
--- a/filters/get-text-orientation
+++ b/filters/get-text-orientation
@@ -51,10 +51,21 @@ case "$engine" in
*/tesseract|tesseract)
# Tesseract 3.03 outputs the results we want to standard
- # error; Tesseract 3.04 dumps it on standard output. We
- # want it on the latter.
+ # error; Tesseract 3.04 dumps it on standard output. So
+ # do later versions. We want it on the latter.
+ # Command-line options changed in 3.05.00.
- $engine - - -psm 0 -l osd 2>&1
+ version=$($engine --version 2>&1 | sed -n 's/.*tesseract *//p')
+
+ case "$version" in
+ 3.0[34].* )
+ $engine - - -psm 0 -l osd 2>&1
+ ;;
+
+ 3.05.* | [45].* )
+ $engine - - --psm 0 -l osd 2>&1
+ ;;
+ esac
;;
*/ocr-engine-getrotate)
commit 9d5edc4c52e5a6b59d61a43ddcc13353b82992f5
Author: Olaf Meeuwissen <paddy-hack@member.fsf.org>
Date: Sun Jun 23 17:22:22 2019 +0900
Fix reorientation logic for newer Tesseract versions. Re #78
diff --git a/filters/reorient.cpp b/filters/reorient.cpp
index e0c1dc1..60c5173 100644
--- a/filters/reorient.cpp
+++ b/filters/reorient.cpp
@@ -483,6 +483,13 @@ reorient::finalize (const context& ctx)
log::alert
(format ("unexpected document orientation: %1% degrees")
% degrees);
+
+ if (engine_ == "tesseract"
+ && !tesseract_version_before_("3.04")) {
+ // Orientation reporting changed direct with 3.04. See #78
+ /**/ if ( 90 == degrees) rv.orientation (context::left_bottom);
+ else if (270 == degrees) rv.orientation (context::right_top);
+ }
}
return rv;
}