diff --git a/.github/workflows/autotools-macos.yml b/.github/workflows/autotools-macos.yml index e904d71b1d..6b29fc4fd5 100644 --- a/.github/workflows/autotools-macos.yml +++ b/.github/workflows/autotools-macos.yml @@ -14,8 +14,7 @@ jobs: fail-fast: false matrix: config: - - { name: macos-12-clang-14-autotools, os: macos-12, cxx: clang++ } - #- { name: macos-12-gcc-11-autotools, os: macos-12, cxx: g++-11 } + - { name: macos-latest-clang-autotools, os: macos-latest, cxx: clang++ } steps: - uses: actions/checkout@v4 @@ -30,15 +29,11 @@ jobs: - name: Install dependencies run: | - brew install autoconf automake - brew install leptonica - brew install cairo pango icu4c - brew install cabextract - brew install libarchive curl + brew install autoconf automake cabextract libtool + brew install curl icu4c leptonica libarchive pango - name: Setup Tesseract run: | - mkdir -p m4 ./autogen.sh - name: Configure Tesseract @@ -115,7 +110,7 @@ jobs: fail-fast: false matrix: config: - - { name: macos-12-clang-14-autotools, os: macos-12, cxx: clang++ } + - { name: macos-latest-clang-autotools, os: macos-latest, cxx: clang++ } steps: - uses: actions/checkout@v4 @@ -130,7 +125,7 @@ jobs: - name: Install Macports run: | - curl -LO https://raw.githubusercontent.com/GiovanniBussi/macports-ci/master/macports-ci; source ./macports-ci install + curl -sSLO https://raw.githubusercontent.com/GiovanniBussi/macports-ci/master/macports-ci; source ./macports-ci install # --remove-brew does not remove the Homebrew entries in bin, # so remove them now. rm -v $(brew --prefix)/bin/* @@ -145,7 +140,6 @@ jobs: - name: Setup Tesseract run: | - mkdir -p m4 ./autogen.sh - name: Configure Tesseract diff --git a/.github/workflows/autotools-openmp.yml b/.github/workflows/autotools-openmp.yml index c95ff100c1..4719eb12db 100644 --- a/.github/workflows/autotools-openmp.yml +++ b/.github/workflows/autotools-openmp.yml @@ -37,7 +37,6 @@ jobs: - name: Setup Tesseract run: | - mkdir -p m4 ./autogen.sh - name: Configure Tesseract diff --git a/.github/workflows/autotools.yml b/.github/workflows/autotools.yml index dde846785d..b657556fb8 100644 --- a/.github/workflows/autotools.yml +++ b/.github/workflows/autotools.yml @@ -45,7 +45,6 @@ jobs: - name: Setup Tesseract run: | - mkdir -p m4 ./autogen.sh - name: Configure Tesseract diff --git a/.github/workflows/cmake-win64.yml b/.github/workflows/cmake-win64.yml index eacefc6c6a..261814fbb2 100644 --- a/.github/workflows/cmake-win64.yml +++ b/.github/workflows/cmake-win64.yml @@ -117,8 +117,8 @@ jobs: - name: Display Tesseract Version and Test Command Line Usage shell: cmd run: | - curl -L https://github.com/tesseract-ocr/tessdata/raw/main/eng.traineddata --output ${{env.ILOC}}/share/tessdata/eng.traineddata - curl -L https://github.com/tesseract-ocr/tessdata/raw/main/osd.traineddata --output ${{env.ILOC}}/share/tessdata/osd.traineddata + curl -sSL https://github.com/tesseract-ocr/tessdata/raw/main/eng.traineddata --output ${{env.ILOC}}/share/tessdata/eng.traineddata + curl -sSL https://github.com/tesseract-ocr/tessdata/raw/main/osd.traineddata --output ${{env.ILOC}}/share/tessdata/osd.traineddata echo "Setting TESSDATA_PREFIX..." set TESSDATA_PREFIX=${{env.ILOC}}/share/tessdata echo "Setting PATH..." diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index de0af27835..e723871178 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -14,11 +14,9 @@ jobs: fail-fast: false matrix: config: - - - { name: macos-12-clang-14-cmake, os: macos-12, cxx: clang++ } # default - - { name: macos-11-clang-13-cmake, os: macos-11, cxx: clang++ } # default - - - { name: macos-11-gcc-12-cmake, os: macos-11, cxx: g++-12 } #installed + - { name: macos-14-clang-15-cmake, os: macos-14, cxx: clang++ } # default + - { name: macos-14-gcc-14-cmake, os: macos-14, cxx: g++-14 } #installed + - { name: macos-15-clang-cmake, os: macos-15, cxx: clang++ } # default - { name: ubuntu-22.04-clang-15-cmake, os: ubuntu-22.04, cxx: clang++-15 } #installed @@ -55,6 +53,8 @@ jobs: brew install ninja ninja --version cmake --version + clang++ --version + g++ --version if: runner.os == 'macOS' - name: Checkout Source diff --git a/.github/workflows/installer-for-windows.yml b/.github/workflows/installer-for-windows.yml new file mode 100644 index 0000000000..3aa44dad53 --- /dev/null +++ b/.github/workflows/installer-for-windows.yml @@ -0,0 +1,27 @@ +# GitHub actions - Create Tesseract installer for Windows + +name: Cross build for Windows + +on: + # Trigger workflow in GitHub web frontend or from API. + workflow_dispatch: + inputs: + targets: + description: 'Target operating system' + required: true + default: 'Windows (64 bit)' + type: choice + options: + - 'Windows (64 bit)' + +jobs: + build64: + runs-on: [ubuntu-24.04] + steps: + - uses: actions/checkout@v4 + - name: Build Tesseract installer (64 bit) + run: nsis/build.sh x86_64 + - uses: actions/upload-artifact@v4 + with: + name: Tesseract Installer for Windows (64 bit) + path: dist diff --git a/.github/workflows/msys2-4.1.1.yml b/.github/workflows/msys2-4.1.1.yml deleted file mode 100644 index 47da44a437..0000000000 --- a/.github/workflows/msys2-4.1.1.yml +++ /dev/null @@ -1,31 +0,0 @@ -name: msys2-4.1.1 -on: - #push: - schedule: - - cron: 0 18 1 * * -jobs: - windows: - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - include: - - os: windows-2019 - msystem: MINGW32 - mingw_package_prefix: mingw-w64-i686 - - os: windows-2019 - msystem: MINGW64 - mingw_package_prefix: mingw-w64-x86_64 - defaults: - run: - shell: msys2 {0} - steps: - - uses: msys2/setup-msys2@v2 - with: - msystem: ${{ matrix.msystem }} - - run: pacman --noconfirm -S ${{ matrix.mingw_package_prefix }}-tesseract-ocr - - name: Display version - run: | - tesseract -v - text2image -v - lstmtraining -v diff --git a/.github/workflows/msys2.yml b/.github/workflows/msys2.yml index ceb45225c6..4ebb7a72a9 100644 --- a/.github/workflows/msys2.yml +++ b/.github/workflows/msys2.yml @@ -41,7 +41,6 @@ jobs: - name: Setup Tesseract run: | - mkdir -p m4 ./autogen.sh - name: Configure Tesseract diff --git a/.github/workflows/sw.yml b/.github/workflows/sw.yml index c744369086..db11bb5b57 100644 --- a/.github/workflows/sw.yml +++ b/.github/workflows/sw.yml @@ -1,23 +1,9 @@ name: sw on: - push: - paths: - - '**.cpp' - - '**.h' - - '**/sw.yml' - - 'unittest/**.c' - - 'unittest/**.cc' - pull_request: - paths: - - '**.cpp' - - '**.h' - - '**/sw.yml' - - 'unittest/**.c' - - 'unittest/**.cc' schedule: - # every day - - cron: 0 0 * * * + # every 3rd day + - cron: 0 0 */3 * * jobs: build: diff --git a/.github/workflows/unittest-disablelegacy.yml b/.github/workflows/unittest-disablelegacy.yml index 2c0145b0d0..12daaa84fb 100644 --- a/.github/workflows/unittest-disablelegacy.yml +++ b/.github/workflows/unittest-disablelegacy.yml @@ -31,7 +31,6 @@ jobs: - name: Setup run: | - mkdir -p m4 ./autogen.sh - name: Configure diff --git a/.github/workflows/unittest-macos.yml b/.github/workflows/unittest-macos.yml index d4414f583c..e34fae90b6 100644 --- a/.github/workflows/unittest-macos.yml +++ b/.github/workflows/unittest-macos.yml @@ -14,8 +14,8 @@ jobs: matrix: config: - { name: macos-arm-14-clang-unittest, os: macos-14, cxx: clang++ } # Apple silicon - - { name: macos-12-clang-unittest, os: macos-12, cxx: clang++ } - - { name: macos-12-gcc-unittest, os: macos-12, cxx: g++ } + - { name: macos-latest-clang-unittest, os: macos-latest, cxx: clang++ } + - { name: macos-latest-gcc-unittest, os: macos-latest, cxx: g++ } steps: - uses: actions/checkout@v4 @@ -24,13 +24,10 @@ jobs: - name: Install dependencies (macOS Homebrew) run: | - brew install autoconf automake libarchive - brew install leptonica cairo pango - brew install cabextract libtool - + brew install autoconf automake cabextract libtool + brew install curl icu4c leptonica libarchive pango - name: Setup run: | - mkdir -p m4 ./autogen.sh - name: Configure (macOS Homebrew) diff --git a/.github/workflows/unittest.yml b/.github/workflows/unittest.yml index f179d1cb4c..2e268b6d0f 100644 --- a/.github/workflows/unittest.yml +++ b/.github/workflows/unittest.yml @@ -37,7 +37,6 @@ jobs: - name: Setup run: | - mkdir -p m4 ./autogen.sh - name: Configure (Linux) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0dec189626..6cb5a6c84e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -471,9 +471,7 @@ else() set(DOTPRODUCT_FLAGS "${DOTPRODUCT_FLAGS} -O3 -ffast-math") endif() -if(NOT DEFINED CMAKE_INSTALL_LIBDIR) - set(CMAKE_INSTALL_LIBDIR lib) -endif(NOT DEFINED CMAKE_INSTALL_LIBDIR) +include (GNUInstallDirs) set(AUTOCONFIG_SRC ${CMAKE_CURRENT_BINARY_DIR}/config_auto.h.in) set(AUTOCONFIG ${CMAKE_CURRENT_BINARY_DIR}/config_auto.h) @@ -484,13 +482,13 @@ if(GRAPHICS_DISABLED) endif() set(CMAKE_REQUIRED_INCLUDES ${CMAKE_REQUIRED_INCLUDES} "${CMAKE_PREFIX_PATH}/include" - "${CMAKE_INSTALL_PREFIX}/include") + ${CMAKE_INSTALL_INCLUDEDIR}) include(Configure) configure_file(${AUTOCONFIG_SRC} ${AUTOCONFIG} @ONLY) -set(INCLUDE_DIR "${CMAKE_INSTALL_PREFIX}/include") -set(LIBRARY_DIRS "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}") +set(INCLUDE_DIR ${CMAKE_INSTALL_INCLUDEDIR}) +set(LIBRARY_DIRS ${CMAKE_INSTALL_LIBDIR}) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/include/tesseract/version.h.in ${CMAKE_CURRENT_BINARY_DIR}/include/tesseract/version.h @ONLY) @@ -829,7 +827,7 @@ set_target_properties( ${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_PATCH}) set_target_properties( libtesseract PROPERTIES SOVERSION - ${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_PATCH}) + ${VERSION_MAJOR}.${VERSION_MINOR}) set_target_properties( libtesseract @@ -937,7 +935,7 @@ install( RUNTIME DESTINATION bin LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) -if (MSVC) +if (MSVC AND BUILD_SHARED_LIBS) install(FILES $ DESTINATION bin OPTIONAL) endif() install( @@ -965,9 +963,9 @@ install( if(INSTALL_CONFIGS) install(FILES ${TESSERACT_CONFIGS} - DESTINATION ${CMAKE_INSTALL_PREFIX}/share/tessdata/configs) + DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/tessdata/configs) install(FILES ${TESSERACT_TESSCONFIGS} - DESTINATION ${CMAKE_INSTALL_PREFIX}/share/tessdata/tessconfigs) + DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/tessdata/tessconfigs) endif() # ############################################################################## diff --git a/ChangeLog b/ChangeLog index a331550d2d..9e7ec162cf 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,21 @@ +2024-11-10 - V5.5.0 +* Set hOCR capabilities ocrp_dir and ocrp_lang unconditionally. +* Calculate row bounding box in single-word mode per (issue #4304). +* Reduce clock syscalls (#4303). +* Several small performance and other code fixes. +* Modernized code. +* Print time for tessedit_timing_debug in milliseconds. +* Print time for ErrorCounter::ComputeErrorRate in milliseconds. +* cmake: Correctly set the soversion based on SemVer properties. +* Do not export PDBs for static libraries (issue #4279). +* Several other small fixes and improvements for builds and CI. +* Modernize code for renderers and remove filename conversion for Windows (#4330). +* Add build rule for Windows installer. +* Support symbolic values for --oem and --psm options. +* Remove Tensorflow support. +* Add RISC-V V support (#4346). +* Remove broken GitHub action msys2-4.1.1. + 2024-06-11 - V5.4.1 * Avoid FP overflow in NormEvidenceOf (fixes issue #4257) (#4259) * Small build fixes and code improvements (#4262, #4263, #4266, #4267) diff --git a/Makefile.am b/Makefile.am index a611335c91..85ff31a081 100644 --- a/Makefile.am +++ b/Makefile.am @@ -6,6 +6,9 @@ ACLOCAL_AMFLAGS = -I m4 CLEANFILES = SUBDIRS = . tessdata +if MINGW +SUBDIRS += nsis +endif EXTRA_DIST = README.md LICENSE EXTRA_DIST += aclocal.m4 config configure.ac autogen.sh @@ -23,7 +26,6 @@ uninstall-hook: rm -rf $(DESTDIR)$(pkgincludedir) dist-hook: -# Need to remove .svn directories from directories # added using EXTRA_DIST. $(distdir)/tessdata would in # theory suffice. rm -rf `find $(distdir) -name .deps -type d` @@ -51,6 +53,11 @@ doc-pack: doc doc-clean: rm -rf $(top_builddir)/doc/html/* +if MINGW +winsetup: training ScrollView.jar + @cd "$(top_builddir)/nsis" && $(MAKE) winsetup +endif + pkgconfigdir = $(libdir)/pkgconfig pkgconfig_DATA = tesseract.pc @@ -103,7 +110,6 @@ lib_LTLIBRARIES = libtesseract.la libtesseract_la_LDFLAGS = $(LEPTONICA_LIBS) libtesseract_la_LDFLAGS += $(libarchive_LIBS) libtesseract_la_LDFLAGS += $(libcurl_LIBS) -libtesseract_la_LDFLAGS += $(TENSORFLOW_LIBS) if T_WIN libtesseract_la_LDFLAGS += -no-undefined -lws2_32 else @@ -193,6 +199,15 @@ libtesseract_la_LIBADD += libtesseract_neon.la noinst_LTLIBRARIES += libtesseract_neon.la endif +if HAVE_RVV +libtesseract_rvv_la_CXXFLAGS = $(RVV_CXXFLAGS) +libtesseract_rvv_la_CXXFLAGS += -O3 +libtesseract_rvv_la_CXXFLAGS += -I$(top_srcdir)/src/ccutil +libtesseract_rvv_la_SOURCES = src/arch/intsimdmatrixrvv.cpp +libtesseract_la_LIBADD += libtesseract_rvv.la +noinst_LTLIBRARIES += libtesseract_rvv.la +endif + libtesseract_la_SOURCES += src/arch/intsimdmatrix.cpp libtesseract_la_SOURCES += src/arch/simddetect.cpp @@ -502,10 +517,6 @@ libtesseract_lstm_la_CPPFLAGS += -I$(top_srcdir)/src/cutil libtesseract_lstm_la_CPPFLAGS += -I$(top_srcdir)/src/dict libtesseract_lstm_la_CPPFLAGS += -I$(top_srcdir)/src/lstm libtesseract_lstm_la_CPPFLAGS += -I$(top_srcdir)/src/viewer -if TENSORFLOW -libtesseract_lstm_la_CPPFLAGS += -DINCLUDE_TENSORFLOW -libtesseract_lstm_la_CPPFLAGS += -I/usr/include/tensorflow -endif if !NO_TESSDATA_PREFIX libtesseract_lstm_la_CPPFLAGS += -DTESSDATA_PREFIX='"@datadir@"' endif @@ -528,7 +539,6 @@ noinst_HEADERS += src/lstm/reversed.h noinst_HEADERS += src/lstm/series.h noinst_HEADERS += src/lstm/static_shape.h noinst_HEADERS += src/lstm/stridemap.h -noinst_HEADERS += src/lstm/tfnetwork.h noinst_HEADERS += src/lstm/weightmatrix.h noinst_LTLIBRARIES += libtesseract_lstm.la @@ -549,11 +559,7 @@ libtesseract_lstm_la_SOURCES += src/lstm/reconfig.cpp libtesseract_lstm_la_SOURCES += src/lstm/reversed.cpp libtesseract_lstm_la_SOURCES += src/lstm/series.cpp libtesseract_lstm_la_SOURCES += src/lstm/stridemap.cpp -libtesseract_lstm_la_SOURCES += src/lstm/tfnetwork.cpp libtesseract_lstm_la_SOURCES += src/lstm/weightmatrix.cpp -if TENSORFLOW -libtesseract_lstm_la_SOURCES += src/lstm/tfnetwork.pb.cc -endif # Rules for src/textord. @@ -723,7 +729,6 @@ tesseract_LDFLAGS = $(OPENMP_CXXFLAGS) tesseract_LDADD = libtesseract.la tesseract_LDADD += $(LEPTONICA_LIBS) -tesseract_LDADD += $(TENSORFLOW_LIBS) tesseract_LDADD += $(libarchive_LIBS) tesseract_LDADD += $(libcurl_LIBS) @@ -893,7 +898,6 @@ EXTRA_PROGRAMS += $(trainingtools) extralib = libtesseract.la extralib += $(libarchive_LIBS) extralib += $(LEPTONICA_LIBS) -extralib += $(TENSORFLOW_LIBS) if T_WIN extralib += -lws2_32 endif @@ -1001,7 +1005,6 @@ fuzzer-api: unittest/fuzzers/fuzzer-api.cpp $< \ $(builddir)/.libs/libtesseract.a \ $(LEPTONICA_LIBS) \ - $(TENSORFLOW_LIBS) \ $(libarchive_LIBS) \ $(libcurl_LIBS) \ -o $@ @@ -1021,7 +1024,6 @@ fuzzer-api-512x256: unittest/fuzzers/fuzzer-api.cpp $< \ $(builddir)/.libs/libtesseract.a \ $(LEPTONICA_LIBS) \ - $(TENSORFLOW_LIBS) \ $(libarchive_LIBS) \ $(libcurl_LIBS) \ -o $@ @@ -1134,10 +1136,6 @@ endif # ENABLE_TRAINING unittest_CPPFLAGS += -I$(top_srcdir)/src/viewer unittest_CPPFLAGS += -I$(top_srcdir)/src/wordrec unittest_CPPFLAGS += -I$(top_srcdir)/unittest -if TENSORFLOW -unittest_CPPFLAGS += -DINCLUDE_TENSORFLOW -unittest_CPPFLAGS += -I/usr/include/tensorflow -endif # TENSORFLOW # Build googletest: check_LTLIBRARIES = libgtest.la libgtest_main.la libgmock.la libgmock_main.la @@ -1165,7 +1163,6 @@ GTEST_LIBS = libgtest.la libgtest_main.la -lpthread GMOCK_LIBS = libgmock.la libgmock_main.la TESS_LIBS = $(GTEST_LIBS) TESS_LIBS += libtesseract.la $(libarchive_LIBS) -TESS_LIBS += $(TENSORFLOW_LIBS) TRAINING_LIBS = libtesseract_training.la TRAINING_LIBS += $(TESS_LIBS) unittest_CPPFLAGS += -isystem $(top_srcdir)/unittest/third_party/googletest/googletest/include @@ -1412,10 +1409,6 @@ networkio_test_CPPFLAGS = $(unittest_CPPFLAGS) networkio_test_LDADD = $(TESS_LIBS) normstrngs_test_SOURCES = unittest/normstrngs_test.cc -if TENSORFLOW -normstrngs_test_SOURCES += unittest/third_party/utf/rune.c -normstrngs_test_SOURCES += unittest/util/utf8/unilib.cc -endif # TENSORFLOW normstrngs_test_CPPFLAGS = $(unittest_CPPFLAGS) normstrngs_test_LDADD = $(TRAINING_LIBS) $(ICU_I18N_LIBS) $(ICU_UC_LIBS) @@ -1434,11 +1427,6 @@ pagesegmode_test_CPPFLAGS = $(unittest_CPPFLAGS) pagesegmode_test_LDADD = $(TRAINING_LIBS) $(LEPTONICA_LIBS) pango_font_info_test_SOURCES = unittest/pango_font_info_test.cc -if TENSORFLOW -pango_font_info_test_SOURCES += unittest/third_party/utf/rune.c -pango_font_info_test_SOURCES += unittest/util/utf8/unicodetext.cc -pango_font_info_test_SOURCES += unittest/util/utf8/unilib.cc -endif # TENSORFLOW pango_font_info_test_CPPFLAGS = $(unittest_CPPFLAGS) pango_font_info_test_LDADD = $(TRAINING_LIBS) $(LEPTONICA_LIBS) pango_font_info_test_LDADD += $(ICU_I18N_LIBS) diff --git a/README.md b/README.md index 99da6d275d..24bb138c57 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,8 @@ # Tesseract OCR -[![Build status](https://ci.appveyor.com/api/projects/status/miah0ikfsf0j3819/branch/master?svg=true)](https://ci.appveyor.com/project/zdenop/tesseract/) -[![Build status](https://github.com/tesseract-ocr/tesseract/actions/workflows/sw.yml/badge.svg)](https://github.com/tesseract-ocr/tesseract/actions/workflows/sw.yml)\ [![Coverity Scan Build Status](https://scan.coverity.com/projects/tesseract-ocr/badge.svg)](https://scan.coverity.com/projects/tesseract-ocr) [![CodeQL](https://github.com/tesseract-ocr/tesseract/workflows/CodeQL/badge.svg)](https://github.com/tesseract-ocr/tesseract/security/code-scanning) -[![OSS-Fuzz](https://img.shields.io/badge/oss--fuzz-fuzzing-brightgreen)](https://bugs.chromium.org/p/oss-fuzz/issues/list?sort=-opened&can=2&q=proj:tesseract-ocr) +[![OSS-Fuzz](https://img.shields.io/badge/oss--fuzz-fuzzing-brightgreen)](https://issues.oss-fuzz.com/issues?q=is:open%20title:tesseract-ocr) \ [![GitHub license](https://img.shields.io/badge/license-Apache--2.0-blue.svg)](https://raw.githubusercontent.com/tesseract-ocr/tesseract/main/LICENSE) [![Downloads](https://img.shields.io/badge/download-all%20releases-brightgreen.svg)](https://github.com/tesseract-ocr/tesseract/releases/) diff --git a/VERSION b/VERSION index ade65226e0..d50359de18 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -5.4.1 +5.5.0 diff --git a/configure.ac b/configure.ac index 7ec2f0c4ec..cb853e6665 100644 --- a/configure.ac +++ b/configure.ac @@ -29,7 +29,7 @@ AM_INIT_AUTOMAKE([foreign subdir-objects nostdinc]) # Define date of package, etc. Could be useful in auto-generated # documentation. PACKAGE_YEAR=2024 -PACKAGE_DATE="06/11" +PACKAGE_DATE="11/10" abs_top_srcdir=`AS_DIRNAME([$0])` @@ -72,6 +72,7 @@ AC_CONFIG_HEADERS([include/config_auto.h:config/config.h.in]) # default conditional AM_CONDITIONAL([T_WIN], false) +AM_CONDITIONAL([MINGW], false) AM_CONDITIONAL([GRAPHICS_DISABLED], false) AC_SUBST([AM_CPPFLAGS]) @@ -87,7 +88,9 @@ AM_SILENT_RULES([yes]) AC_CANONICAL_HOST case "${host_os}" in mingw*) + AC_DEFINE_UNQUOTED([MINGW], 1, [This is a MinGW system]) AM_CONDITIONAL([T_WIN], true) + AM_CONDITIONAL([MINGW], true) AM_CONDITIONAL([ADD_RT], false) AC_SUBST([AM_LDFLAGS], ['-no-undefined']) ;; @@ -128,6 +131,7 @@ AM_CONDITIONAL([HAVE_AVX512F], false) AM_CONDITIONAL([HAVE_FMA], false) AM_CONDITIONAL([HAVE_SSE4_1], false) AM_CONDITIONAL([HAVE_NEON], false) +AM_CONDITIONAL([HAVE_RVV], false) case "${host_cpu}" in @@ -185,6 +189,16 @@ case "${host_cpu}" in ;; + riscv*) + + AX_CHECK_COMPILE_FLAG([-march=rv64gcv], [rvv=true], [rvv=false], [$WERROR]) + AM_CONDITIONAL([HAVE_RVV], [$rvv]) + if $rvv; then + AC_DEFINE([HAVE_RVV], [1], [Enable RVV instructions]) + check_for_rvv=1 + fi + ;; + *) AC_MSG_WARN([No compiler options for $host_cpu]) @@ -204,6 +218,16 @@ if test x$check_for_neon = x1; then fi fi +# additional checks for RVV targets +if test x$check_for_rvv = x1; then + AC_MSG_NOTICE([checking how to detect RVV availability]) + AC_CHECK_FUNCS([getauxval]) + + if test $ac_cv_func_getauxval = no; then + AC_MSG_WARN([RVV is available, but we don't know how to check for it. Will not be able to use RVV.]) + fi +fi + AX_CHECK_COMPILE_FLAG([-fopenmp-simd], [openmp_simd=true], [openmp_simd=false], [$WERROR]) AM_CONDITIONAL([OPENMP_SIMD], $openmp_simd) @@ -267,25 +291,6 @@ AC_ARG_WITH([curl], AS_HELP_STRING([--with-curl], [Build with libcurl which supports processing an image URL @<:@default=check@:>@]), [], [with_curl=check]) -AC_ARG_WITH([tensorflow], - AS_HELP_STRING([--with-tensorflow], - [support TensorFlow @<:@default=check@:>@]), - [], [with_tensorflow=check]) - -# Check whether to build with support for TensorFlow. -AM_CONDITIONAL([TENSORFLOW], false) -TENSORFLOW_LIBS= -AS_IF([test "x$with_tensorflow" != xno], - [AC_CHECK_HEADERS([tensorflow/core/framework/graph.pb.h], - [AC_SUBST([TENSORFLOW_LIBS], ["-lprotobuf -ltensorflow_cc"]) - AM_CONDITIONAL([TENSORFLOW], true) - ], - [if test "x$with_tensorflow" != xcheck; then - AC_MSG_FAILURE( - [--with-tensorflow was given, but test for libtensorflow-dev failed]) - fi - ]) - ]) # https://lists.apple.com/archives/unix-porting/2009/Jan/msg00026.html m4_define([MY_CHECK_FRAMEWORK], @@ -559,6 +564,7 @@ AC_CONFIG_FILES([java/com/google/Makefile]) AC_CONFIG_FILES([java/com/google/scrollview/Makefile]) AC_CONFIG_FILES([java/com/google/scrollview/events/Makefile]) AC_CONFIG_FILES([java/com/google/scrollview/ui/Makefile]) +AC_CONFIG_FILES([nsis/Makefile]) AC_OUTPUT # Final message diff --git a/doc/tesseract.1.asc b/doc/tesseract.1.asc index cb5d8837d2..b4730ea18e 100644 --- a/doc/tesseract.1.asc +++ b/doc/tesseract.1.asc @@ -15,7 +15,7 @@ DESCRIPTION tesseract(1) is a commercial quality OCR engine originally developed at HP between 1985 and 1995. In 1995, this engine was among the top 3 evaluated by UNLV. It was open-sourced by HP and UNLV in 2005, and has been developed -at Google since then. +at Google until 2018. IN/OUT ARGUMENTS diff --git a/java/Makefile.am b/java/Makefile.am index 6fdde1897e..92b91b220d 100644 --- a/java/Makefile.am +++ b/java/Makefile.am @@ -51,9 +51,9 @@ $(SCROLLVIEW_CLASSES) : $(SCROLLVIEW_FILES) $(SCROLLVIEW_LIBS) .PHONY: fetch-jars fetch-jars $(SCROLLVIEW_LIBS): - curl -s -S -L -O https://search.maven.org/remotecontent?filepath=org/piccolo2d/piccolo2d-core/3.0.1/piccolo2d-core-3.0.1.jar - curl -s -S -L -O https://search.maven.org/remotecontent?filepath=org/piccolo2d/piccolo2d-extras/3.0.1/piccolo2d-extras-3.0.1.jar - curl -s -S -L -O https://search.maven.org/remotecontent?filepath=javax/xml/bind/jaxb-api/2.3.1/jaxb-api-2.3.1.jar + curl -sSLO https://repo1.maven.org/maven2/org/piccolo2d/piccolo2d-core/3.0.1/piccolo2d-core-3.0.1.jar + curl -sSLO https://repo1.maven.org/maven2/org/piccolo2d/piccolo2d-extras/3.0.1/piccolo2d-extras-3.0.1.jar + curl -sSLO https://repo1.maven.org/maven2/javax/xml/bind/jaxb-api/2.3.1/jaxb-api-2.3.1.jar .PHONY: install-jars install-jars : ScrollView.jar diff --git a/nsis/Makefile.am b/nsis/Makefile.am new file mode 100644 index 0000000000..917f1431e7 --- /dev/null +++ b/nsis/Makefile.am @@ -0,0 +1,22 @@ +AUTOMAKE_OPTIONS = subdir-objects + +all: + +if MINGW + +gitrev="$(shell git --git-dir=${abs_top_srcdir}/.git --work-tree=${abs_top_srcdir} describe --always --tags | sed s/^v//)" + +.PHONY: winsetup + +Plugins/x86-unicode/INetC.dll: + curl -OsS https://nsis.sourceforge.io/mediawiki/images/c/c9/Inetc.zip + unzip Inetc.zip $@ + +winpath.exe: winpath.cpp + x86_64-w64-mingw32-g++ -Os -o $@ $< + x86_64-w64-mingw32-strip --strip-unneeded $@ + +winsetup: Plugins/x86-unicode/INetC.dll winpath.exe + makensis -DCROSSBUILD -DSHARED -DSIGNCODE=$(SIGNCODE) -DSRCDIR=$(top_srcdir) -DVERSION=${gitrev} $(shell test "$(host_cpu)" = x86_64 && echo "-DW64") -NOCD $(top_srcdir)/nsis/tesseract.nsi + +endif diff --git a/nsis/build.sh b/nsis/build.sh new file mode 100755 index 0000000000..49245a1b43 --- /dev/null +++ b/nsis/build.sh @@ -0,0 +1,111 @@ +#!/bin/bash + +# GitHub actions - Create Tesseract installer for Windows + +# Author: Stefan Weil (2010-2024) + +set -e +set -x + +LANG=C.UTF-8 + +ARCH=$1 + +if [ "$ARCH" = "i686" ]; then + MINGW=/mingw32 +else + ARCH=x86_64 + MINGW=/mingw64 +fi + +ROOTDIR=$PWD +DISTDIR=$ROOTDIR/dist +HOST=$ARCH-w64-mingw32 +TAG=$(cat VERSION).$(date +%Y%m%d) +BUILDDIR=bin/ndebug/$HOST-$TAG +PKG_ARCH=mingw-w64-${ARCH/_/-} + +# Install packages. +sudo apt-get update --quiet +sudo apt-get install --assume-yes --no-install-recommends --quiet \ + asciidoc curl xsltproc docbook-xml docbook-xsl \ + automake dpkg-dev libtool pkg-config default-jdk-headless \ + mingw-w64-tools nsis g++-"$PKG_ARCH" \ + makepkg pacman-package-manager python3-venv unzip + +# Configure pacman. + +# Enable mirrorlist. +sudo sed -Ei 's/^#.*(Include.*mirrorlist)/\1/' /etc/pacman.conf +( +# Add msys key for pacman. +cd /usr/share/keyrings +sudo curl -OsS https://raw.githubusercontent.com/msys2/MSYS2-keyring/master/msys2.gpg +sudo curl -OsS https://raw.githubusercontent.com/msys2/MSYS2-keyring/master/msys2-revoked +sudo curl -OsS https://raw.githubusercontent.com/msys2/MSYS2-keyring/master/msys2-trusted +) +( +# Add active environments for pacman. +# See https://www.msys2.org/docs/repos-mirrors/. +sudo mkdir -p /etc/pacman.d +cd /etc/pacman.d +cat </dev/null +[mingw64] +Include = /etc/pacman.d/mirrorlist.mingw +eod +sudo curl -OsS https://raw.githubusercontent.com/msys2/MSYS2-packages/master/pacman-mirrors/mirrorlist.mingw +# sudo curl -OsS https://raw.githubusercontent.com/msys2/MSYS2-packages/master/pacman-mirrors/mirrorlist.msys +) + +sudo pacman-key --init +sudo pacman-key --populate msys2 +sudo pacman -Syu --noconfirm + +# Install required pacman packages. +sudo pacman -S --noconfirm \ + mingw-w64-x86_64-curl-winssl \ + mingw-w64-x86_64-giflib \ + mingw-w64-x86_64-icu \ + mingw-w64-x86_64-leptonica \ + mingw-w64-x86_64-libarchive \ + mingw-w64-x86_64-libidn2 \ + mingw-w64-x86_64-openjpeg2 \ + mingw-w64-x86_64-openssl \ + mingw-w64-x86_64-pango \ + mingw-w64-x86_64-libpng \ + mingw-w64-x86_64-libtiff \ + mingw-w64-x86_64-libwebp + +git config --global user.email "sw@weilnetz.de" +git config --global user.name "Stefan Weil" +git tag -a "v$TAG" -m "Tesseract $TAG" + +# Run autogen. +./autogen.sh + +# Build Tesseract installer. +mkdir -p "$BUILDDIR" && cd "$BUILDDIR" + +# Run configure. +PKG_CONFIG_PATH=$MINGW/lib/pkgconfig +export PKG_CONFIG_PATH +# Disable OpenMP (see https://github.com/tesseract-ocr/tesseract/issues/1662). +../../../configure --disable-openmp --host="$HOST" --prefix="/usr/$HOST" \ + CXX="$HOST-g++-posix" \ + CXXFLAGS="-fno-math-errno -Wall -Wextra -Wpedantic -g -O2 -isystem $MINGW/include" \ + LDFLAGS="-L$MINGW/lib" + +make all training +MINGW_INSTALL=${PWD}${MINGW} +make install-jars install training-install html prefix="$MINGW_INSTALL" INSTALL_STRIP_FLAG=-s +test -d venv || python3 -m venv venv +source venv/bin/activate +pip install pefile +mkdir -p dll +ln -sv $("$ROOTDIR/nsis/find_deps.py" "$MINGW_INSTALL"/bin/*.exe "$MINGW_INSTALL"/bin/*.dll) dll/ +ln -svf /usr/lib/gcc/x86_64-w64-mingw32/*-win32/libstdc++-6.dll dll/ +ln -svf /usr/lib/gcc/x86_64-w64-mingw32/*-win32/libgcc_s_seh-1.dll dll/ +make winsetup prefix="$MINGW_INSTALL" + +# Copy result for upload. +mkdir -p "$DISTDIR" && cp nsis/tesseract-ocr-w*-setup-*.exe "$DISTDIR" diff --git a/nsis/find_deps.py b/nsis/find_deps.py new file mode 100755 index 0000000000..0aa12a91b7 --- /dev/null +++ b/nsis/find_deps.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python3 +# +# Copyright (C) 2024 Stefan Weil +# +# SPDX-License-Identifier: MIT +# +# Find the DLL files which are required for a given set of +# Windows executables and libraries. + +import argparse +import os +import pefile + +VERBOSE = False + +def find_dependencies(binary, search_path, analyzed_deps): + pe = pefile.PE(binary) + pe.parse_data_directories() + if VERBOSE: + print(f'{binary}:') + # print(pe.dump_info()) + + for entry in pe.DIRECTORY_ENTRY_IMPORT: + name = entry.dll.decode('utf-8') + if name in analyzed_deps: + if VERBOSE: + print(f'skip {name} (already analyzed)') + continue + analyzed_deps.add(name) + fullpath = os.path.join(search_path, name) + if not os.path.exists(fullpath): + # Not found, maybe system DLL. Skip it. + if VERBOSE: + print(f'skip {name} (not found, maybe system DLL)') + continue + print(fullpath) + analyzed_deps = find_dependencies(fullpath, search_path, analyzed_deps) + + return analyzed_deps + +def main(): + """ + Command-line interface for universal dependency scanner. + """ + + parser = argparse.ArgumentParser(description='Find and copy DLL dependencies') + parser.add_argument('files', nargs='+', help='Paths to executable or library files') + parser.add_argument('--dlldir', dest='dlldir', default='/mingw64/bin/', + help='path to dll files') + + args = parser.parse_args() + + # try: + # Find dependencies + analyzed_deps = set() + for binary in args.files: + if True: + analyzed_deps = find_dependencies(binary, args.dlldir, analyzed_deps) + # except: + # print(f'error: failed to find dependencies for {binary}') + + +if __name__ == '__main__': + main() diff --git a/nsis/include/EnvVarUpdate.nsh b/nsis/include/EnvVarUpdate.nsh new file mode 100644 index 0000000000..81a888ad25 --- /dev/null +++ b/nsis/include/EnvVarUpdate.nsh @@ -0,0 +1,327 @@ +/** + * EnvVarUpdate.nsh + * : Environmental Variables: append, prepend, and remove entries + * + * WARNING: If you use StrFunc.nsh header then include it before this file + * with all required definitions. This is to avoid conflicts + * + * Usage: + * ${EnvVarUpdate} "ResultVar" "EnvVarName" "Action" "RegLoc" "PathString" + * + * Credits: + * Version 1.0 + * * Cal Turney (turnec2) + * * Amir Szekely (KiCHiK) and e-circ for developing the forerunners of this + * function: AddToPath, un.RemoveFromPath, AddToEnvVar, un.RemoveFromEnvVar, + * WriteEnvStr, and un.DeleteEnvStr + * * Diego Pedroso (deguix) for StrTok + * * Kevin English (kenglish_hi) for StrContains + * * Hendri Adriaens (Smile2Me), Diego Pedroso (deguix), and Dan Fuhry + * (dandaman32) for StrReplace + * + * Version 1.1 (compatibility with StrFunc.nsh) + * * techtonik + * + * http://nsis.sourceforge.net/Environmental_Variables:_append%2C_prepend%2C_and_remove_entries + * + */ + + +!ifndef ENVVARUPDATE_FUNCTION +!define ENVVARUPDATE_FUNCTION +!verbose push +!verbose 3 +!include "LogicLib.nsh" +!include "WinMessages.NSH" +!include "StrFunc.nsh" + +; ---- Fix for conflict if StrFunc.nsh is already includes in main file ----------------------- +!macro _IncludeStrFunction StrFuncName + !ifndef ${StrFuncName}_INCLUDED + ${${StrFuncName}} + !endif + !ifndef Un${StrFuncName}_INCLUDED + ${Un${StrFuncName}} + !endif + !define un.${StrFuncName} "${Un${StrFuncName}}" +!macroend + +!insertmacro _IncludeStrFunction StrTok +!insertmacro _IncludeStrFunction StrStr +!insertmacro _IncludeStrFunction StrRep + +; ---------------------------------- Macro Definitions ---------------------------------------- +!macro _EnvVarUpdateConstructor ResultVar EnvVarName Action Regloc PathString + Push "${EnvVarName}" + Push "${Action}" + Push "${RegLoc}" + Push "${PathString}" + Call EnvVarUpdate + Pop "${ResultVar}" +!macroend +!define EnvVarUpdate '!insertmacro "_EnvVarUpdateConstructor"' + +!macro _unEnvVarUpdateConstructor ResultVar EnvVarName Action Regloc PathString + Push "${EnvVarName}" + Push "${Action}" + Push "${RegLoc}" + Push "${PathString}" + Call un.EnvVarUpdate + Pop "${ResultVar}" +!macroend +!define un.EnvVarUpdate '!insertmacro "_unEnvVarUpdateConstructor"' +; ---------------------------------- Macro Definitions end------------------------------------- + +;----------------------------------- EnvVarUpdate start---------------------------------------- +!define hklm_all_users 'HKLM "SYSTEM\CurrentControlSet\Control\Session Manager\Environment"' +!define hkcu_current_user 'HKCU "Environment"' + +!macro EnvVarUpdate UN + +Function ${UN}EnvVarUpdate + + Push $0 + Exch 4 + Exch $1 + Exch 3 + Exch $2 + Exch 2 + Exch $3 + Exch + Exch $4 + Push $5 + Push $6 + Push $7 + Push $8 + Push $9 + Push $R0 + + /* After this point: + ------------------------- + $0 = ResultVar (returned) + $1 = EnvVarName (input) + $2 = Action (input) + $3 = RegLoc (input) + $4 = PathString (input) + $5 = Orig EnvVar (read from registry) + $6 = Len of $0 (temp) + $7 = tempstr1 (temp) + $8 = Entry counter (temp) + $9 = tempstr2 (temp) + $R0 = tempChar (temp) */ + + ; Step 1: Read contents of EnvVarName from RegLoc + ; + ; Check for empty EnvVarName + ${If} $1 == "" + SetErrors + DetailPrint "ERROR: EnvVarName is blank" + Goto EnvVarUpdate_Restore_Vars + ${EndIf} + + ; Check for valid Action + ${If} $2 != "A" + ${AndIf} $2 != "P" + ${AndIf} $2 != "R" + SetErrors + DetailPrint "ERROR: Invalid Action - must be A, P, or R" + Goto EnvVarUpdate_Restore_Vars + ${EndIf} + + ${If} $3 == HKLM + ReadRegStr $5 ${hklm_all_users} $1 ; Get EnvVarName from all users into $5 + ${ElseIf} $3 == HKCU + ReadRegStr $5 ${hkcu_current_user} $1 ; Read EnvVarName from current user into $5 + ${Else} + SetErrors + DetailPrint 'ERROR: Action is [$3] but must be "HKLM" or HKCU"' + Goto EnvVarUpdate_Restore_Vars + ${EndIf} + + ; Check for empty PathString + ${If} $4 == "" + SetErrors + DetailPrint "ERROR: PathString is blank" + Goto EnvVarUpdate_Restore_Vars + ${EndIf} + + ; Make sure we've got some work to do + ${If} $5 == "" + ${AndIf} $2 == "R" + SetErrors + DetailPrint "$1 is empty - Nothing to remove" + Goto EnvVarUpdate_Restore_Vars + ${EndIf} + + ; Step 2: Scrub EnvVar + ; + StrCpy $0 $5 ; Copy the contents to $0 + ; Remove spaces around semicolons (NOTE: spaces before the 1st entry or + ; after the last one are not removed here but instead in Step 3) + ${If} $0 != "" ; If EnvVar is not empty ... + ${Do} + ${${UN}StrStr} $7 $0 " ;" + ${If} $7 == "" + ${ExitDo} + ${EndIf} + ${${UN}StrRep} $0 $0 " ;" ";" ; Remove ';' + ${Loop} + ${Do} + ${${UN}StrStr} $7 $0 "; " + ${If} $7 == "" + ${ExitDo} + ${EndIf} + ${${UN}StrRep} $0 $0 "; " ";" ; Remove ';' + ${Loop} + ${Do} + ${${UN}StrStr} $7 $0 ";;" + ${If} $7 == "" + ${ExitDo} + ${EndIf} + ${${UN}StrRep} $0 $0 ";;" ";" + ${Loop} + + ; Remove a leading or trailing semicolon from EnvVar + StrCpy $7 $0 1 0 + ${If} $7 == ";" + StrCpy $0 $0 "" 1 ; Change ';' to '' + ${EndIf} + StrLen $6 $0 + IntOp $6 $6 - 1 + StrCpy $7 $0 1 $6 + ${If} $7 == ";" + StrCpy $0 $0 $6 ; Change ';' to '' + ${EndIf} + ; DetailPrint "Scrubbed $1: [$0]" ; Uncomment to debug + ${EndIf} + + /* Step 3. Remove all instances of the target path/string (even if "A" or "P") + $6 = bool flag (1 = found and removed PathString) + $7 = a string (e.g. path) delimited by semicolon(s) + $8 = entry counter starting at 0 + $9 = copy of $0 + $R0 = tempChar */ + + ${If} $5 != "" ; If EnvVar is not empty ... + StrCpy $9 $0 + StrCpy $0 "" + StrCpy $8 0 + StrCpy $6 0 + + ${Do} + ${${UN}StrTok} $7 $9 ";" $8 "0" ; $7 = next entry, $8 = entry counter + + ${If} $7 == "" ; If we've run out of entries, + ${ExitDo} ; were done + ${EndIf} ; + + ; Remove leading and trailing spaces from this entry (critical step for Action=Remove) + ${Do} + StrCpy $R0 $7 1 + ${If} $R0 != " " + ${ExitDo} + ${EndIf} + StrCpy $7 $7 "" 1 ; Remove leading space + ${Loop} + ${Do} + StrCpy $R0 $7 1 -1 + ${If} $R0 != " " + ${ExitDo} + ${EndIf} + StrCpy $7 $7 -1 ; Remove trailing space + ${Loop} + ${If} $7 == $4 ; If string matches, remove it by not appending it + StrCpy $6 1 ; Set 'found' flag + ${ElseIf} $7 != $4 ; If string does NOT match + ${AndIf} $0 == "" ; and the 1st string being added to $0, + StrCpy $0 $7 ; copy it to $0 without a prepended semicolon + ${ElseIf} $7 != $4 ; If string does NOT match + ${AndIf} $0 != "" ; and this is NOT the 1st string to be added to $0, + StrCpy $0 $0;$7 ; append path to $0 with a prepended semicolon + ${EndIf} ; + + IntOp $8 $8 + 1 ; Bump counter + ${Loop} ; Check for duplicates until we run out of paths + ${EndIf} + + ; Step 4: Perform the requested Action + ; + ${If} $2 != "R" ; If Append or Prepend + ${If} $6 == 1 ; And if we found the target + DetailPrint "Target is already present in $1. It will be removed and" + ${EndIf} + ${If} $0 == "" ; If EnvVar is (now) empty + StrCpy $0 $4 ; just copy PathString to EnvVar + ${If} $6 == 0 ; If found flag is either 0 + ${OrIf} $6 == "" ; or blank (if EnvVarName is empty) + DetailPrint "$1 was empty and has been updated with the target" + ${EndIf} + ${ElseIf} $2 == "A" ; If Append (and EnvVar is not empty), + StrCpy $0 $0;$4 ; append PathString + ${If} $6 == 1 + DetailPrint "appended to $1" + ${Else} + DetailPrint "Target was appended to $1" + ${EndIf} + ${Else} ; If Prepend (and EnvVar is not empty), + StrCpy $0 $4;$0 ; prepend PathString + ${If} $6 == 1 + DetailPrint "prepended to $1" + ${Else} + DetailPrint "Target was prepended to $1" + ${EndIf} + ${EndIf} + ${Else} ; If Action = Remove + ${If} $6 == 1 ; and we found the target + DetailPrint "Target was found and removed from $1" + ${Else} + DetailPrint "Target was NOT found in $1 (nothing to remove)" + ${EndIf} + ${If} $0 == "" + DetailPrint "$1 is now empty" + ${EndIf} + ${EndIf} + + ; Step 5: Update the registry at RegLoc with the updated EnvVar and announce the change + ; + ClearErrors + ${If} $3 == HKLM + WriteRegExpandStr ${hklm_all_users} $1 $0 ; Write it in all users section + ${ElseIf} $3 == HKCU + WriteRegExpandStr ${hkcu_current_user} $1 $0 ; Write it to current user section + ${EndIf} + + IfErrors 0 +4 + MessageBox MB_OK|MB_ICONEXCLAMATION "Could not write updated $1 to $3" + DetailPrint "Could not write updated $1 to $3" + Goto EnvVarUpdate_Restore_Vars + + ; "Export" our change + SendMessage ${HWND_BROADCAST} ${WM_WININICHANGE} 0 "STR:Environment" /TIMEOUT=5000 + + EnvVarUpdate_Restore_Vars: + ; + ; Restore the user's variables and return ResultVar + Pop $R0 + Pop $9 + Pop $8 + Pop $7 + Pop $6 + Pop $5 + Pop $4 + Pop $3 + Pop $2 + Pop $1 + Push $0 ; Push my $0 (ResultVar) + Exch + Pop $0 ; Restore his $0 + +FunctionEnd + +!macroend ; EnvVarUpdate UN +!insertmacro EnvVarUpdate "" +!insertmacro EnvVarUpdate "un." +;----------------------------------- EnvVarUpdate end---------------------------------------- + +!verbose pop +!endif diff --git a/nsis/tesseract.nsi b/nsis/tesseract.nsi new file mode 100644 index 0000000000..0d94d458c6 --- /dev/null +++ b/nsis/tesseract.nsi @@ -0,0 +1,1445 @@ +; (C) Copyright 2010, Sergey Bronnikov +; (C) Copyright 2010-2012, Zdenko Podobný +; (C) Copyright 2015-2024 Stefan Weil +; +; Licensed under the Apache License, Version 2.0 (the "License"); +; you may not use this file except in compliance with the License. +; You may obtain a copy of the License at +; http://www.apache.org/licenses/LICENSE-2.0 +; Unless required by applicable law or agreed to in writing, software +; distributed under the License is distributed on an "AS IS" BASIS, +; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +; See the License for the specific language governing permissions and +; limitations under the License. + +; Links to NSIS documentation: +; https://nsis.sourceforge.io/Docs/Modern%20UI%202/Readme.html + +; TODO: +; * Fix PreventMultipleInstances. +; * Add Tesseract icon and images for installer. + +SetCompressor /FINAL /SOLID lzma +SetCompressorDictSize 32 + +Unicode true + +; Settings which normally should be passed as command line arguments. +;define CROSSBUILD +;define SHARED +;define W64 +!ifndef COMMENTS +!define COMMENTS "GitHub CI build" +!endif +!ifndef COMPANYNAME +!define COMPANYNAME "Open Source Community" +!endif +!ifndef SRCDIR +!define SRCDIR . +!endif +!ifndef VERSION +!define VERSION undefined +!endif + +!define PRODUCT_NAME "Tesseract-OCR" +!define PRODUCT_VERSION "${VERSION}" +!define PRODUCT_PUBLISHER "Tesseract-OCR community" +!ifndef PRODUCT_WEB_SITE +!define PRODUCT_WEB_SITE "https://github.com/tesseract-ocr/tesseract" +!endif +!define GITHUB_RAW_FILE_URL \ + "https://raw.githubusercontent.com/tesseract-ocr/tessdata_fast/main" + +!ifdef CROSSBUILD +!addincludedir ${SRCDIR}\nsis\include +!addplugindir Plugins/x86-unicode +!endif + +!ifdef W64 +!define ARCH "x86_64" +!define SETUP "tesseract-ocr-w64-setup" +!else +!define ARCH "i686" +!define SETUP "tesseract-ocr-w32-setup" +!endif + +# Name of program and file +!define OUTFILE "${SETUP}-${VERSION}.exe" +OutFile ${OUTFILE} + +!ifdef SIGNCODE +!finalize "${SIGNCODE} %1" +!uninstfinalize "${SIGNCODE} %1" +!endif + +!ifndef PREFIX +!define PREFIX "../mingw64" +!endif +!define BINDIR "${PREFIX}/bin" + +# General Definitions +Name "${PRODUCT_NAME}" +Caption "${PRODUCT_NAME} ${VERSION}" +!ifndef CROSSBUILD +BrandingText /TRIMCENTER "(c) 2010-2019 ${PRODUCT_NAME}" +!endif + +; File properties. +!define /date DATEVERSION "%Y%m%d%H%M%S" +VIProductVersion "${VERSION}" +VIAddVersionKey "ProductName" "${PRODUCT_NAME}" +VIAddVersionKey "Comments" "${COMMENTS}" +VIAddVersionKey "CompanyName" "${COMPANYNAME}" +VIAddVersionKey "FileDescription" "Tesseract OCR" +!define /date DATETIME "%Y-%m-%d-%H-%M-%S" +VIAddVersionKey "FileVersion" "${DATETIME}" +VIAddVersionKey "InternalName" "Tesseract" +VIAddVersionKey "LegalCopyright" "Apache-2.0" +#VIAddVersionKey "LegalTrademarks" "" +VIAddVersionKey "OriginalFilename" "${OUTFILE}" +VIAddVersionKey "ProductVersion" "${VERSION}" + +!define REGKEY "SOFTWARE\${PRODUCT_NAME}" +; HKLM (all users) vs HKCU (current user) defines +!define env_hklm 'HKLM "SYSTEM\CurrentControlSet\Control\Session Manager\Environment"' +!define env_hkcu 'HKCU "Environment"' + +# MultiUser Symbol Definitions +# https://nsis.sourceforge.io/Docs/MultiUser/Readme.html +!define MULTIUSER_EXECUTIONLEVEL Highest +!define MULTIUSER_MUI +!define MULTIUSER_INSTALLMODE_DEFAULT_REGISTRY_KEY "${REGKEY}" +!define MULTIUSER_INSTALLMODE_DEFAULT_REGISTRY_VALUENAME MultiUserInstallMode +!define MULTIUSER_INSTALLMODE_COMMANDLINE +!define MULTIUSER_INSTALLMODE_INSTDIR ${PRODUCT_NAME} +!define MULTIUSER_INSTALLMODE_INSTDIR_REGISTRY_KEY "${REGKEY}" +!define MULTIUSER_INSTALLMODE_INSTDIR_REGISTRY_VALUE "Path" +!ifdef W64 +!define MULTIUSER_USE_PROGRAMFILES64 +!endif + +# MUI Symbol Definitions +!define MUI_ABORTWARNING +!define MUI_COMPONENTSPAGE_SMALLDESC +!define MUI_HEADERIMAGE +!define MUI_HEADERIMAGE_BITMAP_NOSTRETCH +!define MUI_ICON "${NSISDIR}\Contrib\Graphics\Icons\modern-install-blue-full.ico" +!define MUI_FINISHPAGE_LINK "View Tesseract on GitHub" +!define MUI_FINISHPAGE_LINK_LOCATION "https://github.com/tesseract-ocr/tesseract" +!define MUI_FINISHPAGE_NOAUTOCLOSE +!ifdef SHOW_README +; Showing the README does not work. +!define MUI_FINISHPAGE_SHOWREADME "$INSTDIR\doc\README.md" +!define MUI_FINISHPAGE_SHOWREADME_FUNCTION ShowReadme +!define MUI_FINISHPAGE_SHOWREADME_TEXT "Show README" +!endif +!define MUI_STARTMENUPAGE_REGISTRY_ROOT HKLM +!define MUI_STARTMENUPAGE_REGISTRY_KEY ${REGKEY} +!define MUI_STARTMENUPAGE_REGISTRY_VALUENAME StartMenuGroup +!define MUI_STARTMENUPAGE_DEFAULTFOLDER ${PRODUCT_NAME} +!define MUI_UNICON "${NSISDIR}\Contrib\Graphics\Icons\orange-uninstall.ico" +!define MUI_UNFINISHPAGE_NOAUTOCLOSE +!define MUI_WELCOMEPAGE_TITLE_3LINES + +# Included files +!include MultiUser.nsh +!include Sections.nsh +!include MUI2.nsh +!include LogicLib.nsh +!include winmessages.nsh # include for some of the windows messages defines + +# Variables +Var StartMenuGroup +; Define user variables +Var OLD_KEY + +# Installer pages +!insertmacro MUI_PAGE_WELCOME +!insertmacro MUI_PAGE_LICENSE "${SRCDIR}\LICENSE" +!insertmacro MULTIUSER_PAGE_INSTALLMODE + Page custom PageReinstall PageLeaveReinstall +!insertmacro MUI_PAGE_COMPONENTS +!insertmacro MUI_PAGE_DIRECTORY +!insertmacro MUI_PAGE_STARTMENU Application $StartMenuGroup +!insertmacro MUI_PAGE_INSTFILES +!insertmacro MUI_PAGE_FINISH +!insertmacro MUI_UNPAGE_CONFIRM +!insertmacro MUI_UNPAGE_INSTFILES + +# Languages +!insertmacro MUI_LANGUAGE "English" +!insertmacro MUI_LANGUAGE "French" +!insertmacro MUI_LANGUAGE "German" +!insertmacro MUI_LANGUAGE "Italian" +!insertmacro MUI_LANGUAGE "Russian" +!insertmacro MUI_LANGUAGE "Slovak" +!insertmacro MUI_LANGUAGE "Spanish" +!insertmacro MUI_LANGUAGE "SpanishInternational" + +# Installer attributes +ShowInstDetails hide +InstProgressFlags smooth colored +XPStyle on +SpaceTexts +CRCCheck on +InstProgressFlags smooth colored +CRCCheck On # Do a CRC check before installing + +!macro Download_Lang_Data Lang + ; Download traineddata file. + DetailPrint "Download: ${Lang} language file" + inetc::get /caption "Downloading ${Lang} language file" \ + "${GITHUB_RAW_FILE_URL}/${Lang}.traineddata" $INSTDIR/tessdata/${Lang}.traineddata \ + /END + Pop $0 # return value = exit code, "OK" if OK + StrCmp $0 "OK" +2 + MessageBox MB_OK|MB_ICONEXCLAMATION \ + "Download error. Status of ${Lang}: $0. Click OK to continue." /SD IDOK +!macroend + +Section -Main SEC0000 + ; mark as read only component + SectionIn RO + SetOutPath "$INSTDIR" + # files included in distribution + File ${BINDIR}/tesseract.exe + File ${BINDIR}/libtesseract-*.dll +!ifdef CROSSBUILD + File ../dll/*.dll +!endif + File winpath.exe + File ../doc/*.html + CreateDirectory "$INSTDIR\tessdata" + SetOutPath "$INSTDIR\tessdata" + File ${PREFIX}/share/tessdata/pdf.ttf + CreateDirectory "$INSTDIR\tessdata\configs" + SetOutPath "$INSTDIR\tessdata\configs" + File ${PREFIX}/share/tessdata/configs/* + CreateDirectory "$INSTDIR\tessdata\script" + CreateDirectory "$INSTDIR\tessdata\tessconfigs" + SetOutPath "$INSTDIR\tessdata\tessconfigs" + File ${PREFIX}/share/tessdata/tessconfigs/* + CreateDirectory "$INSTDIR\doc" + SetOutPath "$INSTDIR\doc" + File ${SRCDIR}\AUTHORS + File ${SRCDIR}\LICENSE + File ${SRCDIR}\README.md +## File ${SRCDIR}\ReleaseNotes +SectionEnd + +Section "ScrollView" SecScrollView + SectionIn 1 + SetOutPath "$INSTDIR\tessdata" + File ${PREFIX}/share/tessdata/*.jar +SectionEnd + +Section "Training Tools" SecTr + SectionIn 1 + SetOutPath "$INSTDIR" + File /x tesseract.exe ${BINDIR}/*.exe +SectionEnd + +!define UNINST_EXE "$INSTDIR\tesseract-uninstall.exe" +!define UNINST_KEY "Software\Microsoft\Windows\CurrentVersion\Uninstall\${PRODUCT_NAME}" + +Section -post SEC0001 +!ifdef W64 + SetRegView 64 +!endif + ;Store installation folder - we always use HKLM! + WriteRegStr HKLM "${REGKEY}" "Path" "$INSTDIR" + WriteRegStr HKLM "${REGKEY}" "Mode" $MultiUser.InstallMode + WriteRegStr HKLM "${REGKEY}" "InstallDir" "$INSTDIR" + WriteRegStr HKLM "${REGKEY}" "CurrentVersion" "${VERSION}" + WriteRegStr HKLM "${REGKEY}" "Uninstaller" "${UNINST_EXE}" + ;WriteRegStr HKLM "Software\Microsoft\Windows\CurrentVersion\App Paths\tesseract.exe" "$INSTDIR\tesseract.exe" + ;WriteRegStr HKLM "Software\Microsoft\Windows\CurrentVersion\Run" "Tesseract-OCR" "$INSTDIR\tesseract.exe" + ; Register to Add/Remove program in control panel + WriteRegStr HKLM "${UNINST_KEY}" "DisplayName" "${PRODUCT_NAME} - open source OCR engine" + WriteRegStr HKLM "${UNINST_KEY}" "DisplayVersion" "${VERSION}" + WriteRegStr HKLM "${UNINST_KEY}" "Publisher" "${PRODUCT_PUBLISHER}" + WriteRegStr HKLM "${UNINST_KEY}" "URLInfoAbout" "${PRODUCT_WEB_SITE}" + WriteRegStr HKLM "${UNINST_KEY}" "DisplayIcon" "${UNINST_EXE}" + WriteRegStr HKLM "${UNINST_KEY}" "UninstallString" "${UNINST_EXE}" + WriteRegStr HKLM "${UNINST_KEY}" "QuietUninstallString" '"${UNINST_EXE}" /S' + WriteRegDWORD HKLM "${UNINST_KEY}" "NoModify" 1 + WriteRegDWORD HKLM "${UNINST_KEY}" "NoRepair" 1 + ;Create uninstaller + WriteUninstaller "${UNINST_EXE}" + ;ExecShell "open" "https://github.com/tesseract-ocr/tesseract" + ;ExecShell "open" '"$INSTDIR"' + ;BringToFront +SectionEnd + +Section "Shortcuts creation" SecCS + SetOutPath $INSTDIR + CreateDirectory "$SMPROGRAMS\${PRODUCT_NAME}" + CreateShortCut "$SMPROGRAMS\${PRODUCT_NAME}\Console.lnk" "$INSTDIR\winpath.exe" "cmd" + CreateShortCut "$SMPROGRAMS\${PRODUCT_NAME}\Dokumentation.lnk" "$INSTDIR\tesseract.1.html" + CreateShortCut "$SMPROGRAMS\${PRODUCT_NAME}\Homepage.lnk" "${PRODUCT_WEB_SITE}" + CreateShortCut "$SMPROGRAMS\${PRODUCT_NAME}\ReadMe.lnk" "${PRODUCT_WEB_SITE}/wiki/ReadMe" + CreateShortCut "$SMPROGRAMS\${PRODUCT_NAME}\FAQ.lnk" "${PRODUCT_WEB_SITE}/wiki/FAQ" + CreateShortCut "$SMPROGRAMS\${PRODUCT_NAME}\Uninstall.lnk" "${UNINST_EXE}" "" "${UNINST_EXE}" 0 + ;CreateShortCut "$DESKTOP\Tesseract-OCR.lnk" "$INSTDIR\tesseract.exe" "" "$INSTDIR\tesseract.exe" 0 + ;CreateShortCut "$QUICKLAUNCH\.lnk" "$INSTDIR\tesseract.exe" "" "$INSTDIR\tesseract.exe" 0 +SectionEnd + +; Language files +SectionGroup "Language data" SecGrp_LD + Section "English" SecLang_eng + SectionIn RO + !insertmacro Download_Lang_Data eng + SectionEnd + + Section "Orientation and script detection" SecLang_osd + SectionIn 1 + !insertmacro Download_Lang_Data osd + SectionEnd +SectionGroupEnd + +; Download script files +SectionGroup "Additional script data (download)" SecGrp_ASD + Section /o "Arabic script" SecLang_Arabic + AddSize 8880 + !insertmacro Download_Lang_Data script/Arabic + SectionEnd + + Section /o "Armenian script" SecLang_Armenian + AddSize 7510 + !insertmacro Download_Lang_Data script/Armenian + SectionEnd + + Section /o "Bengali script" SecLang_Bengali + AddSize 5450 + !insertmacro Download_Lang_Data script/Bengali + SectionEnd + + Section /o "Canadian Aboriginal script" SecLang_Canadian_Aboriginal + AddSize 6850 + !insertmacro Download_Lang_Data script/Canadian_Aboriginal + SectionEnd + + Section /o "Cherokee script" SecLang_Cherokee + AddSize 4040 + !insertmacro Download_Lang_Data script/Cherokee + SectionEnd + + Section /o "Cyrillic script" SecLang_Cyrillic + AddSize 27900 + !insertmacro Download_Lang_Data script/Cyrillic + SectionEnd + + Section /o "Devanagari script" SecLang_Devanagari + AddSize 17100 + !insertmacro Download_Lang_Data script/Devanagari + SectionEnd + + Section /o "Ethiopic script" SecLang_Ethiopic + AddSize 8650 + !insertmacro Download_Lang_Data script/Ethiopic + SectionEnd + + Section /o "Fraktur script" SecLang_Fraktur + AddSize 10400 + !insertmacro Download_Lang_Data script/Fraktur + SectionEnd + + Section /o "Georgian script" SecLang_Georgian + AddSize 6630 + !insertmacro Download_Lang_Data script/Georgian + SectionEnd + + Section /o "Greek script" SecLang_Greek + AddSize 2900 + !insertmacro Download_Lang_Data script/Greek + SectionEnd + + Section /o "Gujarati script" SecLang_Gujarati + AddSize 4780 + !insertmacro Download_Lang_Data script/Gujarati + SectionEnd + + Section /o "Gurmukhi script" SecLang_Gurmukhi + AddSize 4020 + !insertmacro Download_Lang_Data script/Gurmukhi + SectionEnd + + Section /o "Han Simplified script" SecLang_HanS + AddSize 5700 + !insertmacro Download_Lang_Data script/HanS + SectionEnd + + Section /o "Han Simplified vertical script" SecLang_HanS_vert + AddSize 5304 + !insertmacro Download_Lang_Data script/HanS_vert + SectionEnd + + Section /o "Han Traditional script" SecLang_HanT + AddSize 5200 + !insertmacro Download_Lang_Data script/HanT + SectionEnd + + Section /o "Han Traditional vertical script" SecLang_HanT_vert + AddSize 5200 + !insertmacro Download_Lang_Data script/HanT_vert + SectionEnd + + Section /o "Hangul script" SecLang_Hangul + AddSize 4620 + !insertmacro Download_Lang_Data script/Hangul + SectionEnd + + Section /o "Hangul vertical script" SecLang_Hangul_vert + AddSize 4510 + !insertmacro Download_Lang_Data script/Hangul_vert + SectionEnd + + Section /o "Hebrew script" SecLang_Hebrew + AddSize 4640 + !insertmacro Download_Lang_Data script/Hebrew + SectionEnd + + Section /o "Japanese script" SecLang_Japanese + AddSize 5610 + !insertmacro Download_Lang_Data script/Japanese + SectionEnd + + Section /o "Japanese vertical script" SecLang_Japanese_vert + AddSize 6150 + !insertmacro Download_Lang_Data script/Japanese_vert + SectionEnd + + Section /o "Kannada script" SecLang_Kannada + AddSize 6460 + !insertmacro Download_Lang_Data script/Kannada + SectionEnd + + Section /o "Khmer script" SecLang_Khmer + AddSize 4270 + !insertmacro Download_Lang_Data script/Khmer + SectionEnd + + Section /o "Lao script" SecLang_Script_Lao + AddSize 9640 + !insertmacro Download_Lang_Data script/Lao + SectionEnd + + Section /o "Latin script" SecLang_Latin + AddSize 85200 + !insertmacro Download_Lang_Data script/Latin + SectionEnd + + Section /o "Malayalam script" SecLang_Malayalam + AddSize 8590 + !insertmacro Download_Lang_Data script/Malayalam + SectionEnd + + Section /o "Myanmar script" SecLang_Myanmar + AddSize 7480 + !insertmacro Download_Lang_Data script/Myanmar + SectionEnd + + Section /o "Oriya script" SecLang_Oriya + AddSize 5480 + !insertmacro Download_Lang_Data script/Oriya + SectionEnd + + Section /o "Sinhala script" SecLang_Sinhala + AddSize 4560 + !insertmacro Download_Lang_Data script/Sinhala + SectionEnd + + Section /o "Syriac script" SecLang_Syriac + AddSize 5530 + !insertmacro Download_Lang_Data script/Syriac + SectionEnd + + Section /o "Tamil script" SecLang_Tamil + AddSize 6760 + !insertmacro Download_Lang_Data script/Tamil + SectionEnd + + Section /o "Telugu script" SecLang_Telugu + AddSize 6180 + !insertmacro Download_Lang_Data script/Telugu + SectionEnd + + Section /o "Thaana script" SecLang_Thaana + AddSize 5770 + !insertmacro Download_Lang_Data script/Thaana + SectionEnd + + Section /o "Thai script" SecLang_Thai + AddSize 4050 + !insertmacro Download_Lang_Data script/Thai + SectionEnd + + Section /o "Tibetan script" SecLang_Tibetan + AddSize 5440 + !insertmacro Download_Lang_Data script/Tibetan + SectionEnd + + Section /o "Vietnamese script" SecLang_Vietnamese + AddSize 1590 + !insertmacro Download_Lang_Data script/Vietnamese + SectionEnd + +SectionGroupEnd + +; Download language files +SectionGroup "Additional language data (download)" SecGrp_ALD + Section /o "Math / equation detection module" SecLang_equ + AddSize 2200 + !insertmacro Download_Lang_Data equ + SectionEnd + + ; The language names are documented here: + ; https://github.com/tesseract-ocr/tesseract/blob/main/doc/tesseract.1.asc#languages + + Section /o "Afrikaans" SecLang_afr + AddSize 2530 + !insertmacro Download_Lang_Data afr + SectionEnd + + Section /o "Amharic" SecLang_amh + AddSize 5220 + !insertmacro Download_Lang_Data amh + SectionEnd + + Section /o "Arabic" SecLang_ara + AddSize 1370 + !insertmacro Download_Lang_Data ara + SectionEnd + + Section /o "Assamese" SecLang_asm + AddSize 1950 + !insertmacro Download_Lang_Data asm + SectionEnd + + Section /o "Azerbaijani" SecLang_aze + AddSize 3360 + !insertmacro Download_Lang_Data aze + SectionEnd + + Section /o "Azerbaijani (Cyrillic)" SecLang_aze_cyrl + AddSize 1850 + !insertmacro Download_Lang_Data aze_cyrl + SectionEnd + + Section /o "Belarusian" SecLang_bel + AddSize 3520 + !insertmacro Download_Lang_Data bel + SectionEnd + + Section /o "Bengali" SecLang_ben + AddSize 836 + !insertmacro Download_Lang_Data ben + SectionEnd + + Section /o "Tibetan" SecLang_bod + AddSize 1880 + !insertmacro Download_Lang_Data bod + SectionEnd + + Section /o "Bosnian" SecLang_bos + AddSize 2380 + !insertmacro Download_Lang_Data bos + SectionEnd + + Section /o "Breton" SecLang_bre + AddSize 6188 + !insertmacro Download_Lang_Data bre + SectionEnd + + Section /o "Bulgarian" SecLang_bul + AddSize 1600 + !insertmacro Download_Lang_Data bul + SectionEnd + + Section /o "Catalan" SecLang_cat + AddSize 1090 + !insertmacro Download_Lang_Data cat + SectionEnd + + Section /o "Cebuano" SecLang_ceb + AddSize 699 + !insertmacro Download_Lang_Data ceb + SectionEnd + + Section /o "Czech" SecLang_ces + AddSize 3620 + !insertmacro Download_Lang_Data ces + SectionEnd + + Section /o "Chinese (Simplified)" SecLang_chi_sim + AddSize 2350 + !insertmacro Download_Lang_Data chi_sim + SectionEnd + + Section /o "Chinese (Simplified vertical)" SecLang_chi_sim_vert + AddSize 1840 + !insertmacro Download_Lang_Data chi_sim_vert + SectionEnd + + Section /o "Chinese (Traditional)" SecLang_chi_tra + AddSize 2260 + !insertmacro Download_Lang_Data chi_tra + SectionEnd + + Section /o "Chinese (Traditional vertical)" SecLang_chi_tra_vert + AddSize 1740 + !insertmacro Download_Lang_Data chi_tra_vert + SectionEnd + + Section /o "Cherokee" SecLang_chr + AddSize 366 + !insertmacro Download_Lang_Data chr + SectionEnd + + Section /o "Corsican" SecLang_cos + AddSize 2190 + !insertmacro Download_Lang_Data cos + SectionEnd + + Section /o "Welsh" SecLang_cym + AddSize 2110 + !insertmacro Download_Lang_Data cym + SectionEnd + + Section /o "Danish" SecLang_dan + AddSize 2460 + !insertmacro Download_Lang_Data dan + SectionEnd + + Section /o "German" SecLang_deu + AddSize 1450 + !insertmacro Download_Lang_Data deu + SectionEnd + + Section /o "German Fraktur" SecLang_deu_latf + AddSize 6130 + !insertmacro Download_Lang_Data deu_latf + SectionEnd + + Section /o "Divehi" SecLang_div + AddSize 1690 + !insertmacro Download_Lang_Data div + SectionEnd + + Section /o "Dzongkha" SecLang_dzo + AddSize 439 + !insertmacro Download_Lang_Data dzo + SectionEnd + + Section /o "Greek" SecLang_ell + AddSize 1350 + !insertmacro Download_Lang_Data ell + SectionEnd + + Section /o "English - Middle (1100-1500)" SecLang_enm + AddSize 2960 + !insertmacro Download_Lang_Data enm + SectionEnd + + Section /o "Esperanto" SecLang_epo + AddSize 4510 + !insertmacro Download_Lang_Data epo + SectionEnd + + Section /o "Estonian" SecLang_est + AddSize 4250 + !insertmacro Download_Lang_Data est + SectionEnd + + Section /o "Basque" SecLang_eus + AddSize 4940 + !insertmacro Download_Lang_Data eus + SectionEnd + + Section /o "Faroese" SecLang_fao + AddSize 3280 + !insertmacro Download_Lang_Data fao + SectionEnd + + Section /o "Persian" SecLang_fas + AddSize 421 + !insertmacro Download_Lang_Data fas + SectionEnd + + Section /o "Filipino" SecLang_fil + AddSize 1760 + !insertmacro Download_Lang_Data fil + SectionEnd + + Section /o "Finnish" SecLang_fin + AddSize 7500 + !insertmacro Download_Lang_Data fin + SectionEnd + + Section /o "French" SecLang_fra + AddSize 1080 + !insertmacro Download_Lang_Data fra + SectionEnd + + Section /o "French - Middle (ca. 1400-1600)" SecLang_frm + AddSize 1930 + !insertmacro Download_Lang_Data frm + SectionEnd + + Section /o "Frisian (Western)" SecLang_fry + AddSize 1820 + !insertmacro Download_Lang_Data fry + SectionEnd + + Section /o "Gaelic (Scots)" SecLang_gla + AddSize 2930 + !insertmacro Download_Lang_Data gla + SectionEnd + + Section /o "Irish" SecLang_gle + AddSize 1130 + !insertmacro Download_Lang_Data gle + SectionEnd + + Section /o "Galician" SecLang_glg + AddSize 2440 + !insertmacro Download_Lang_Data glg + SectionEnd + + Section /o "Greek, Ancient (-1453)" SecLang_grc + AddSize 2140 + !insertmacro Download_Lang_Data grc + SectionEnd + + Section /o "Gujarati" SecLang_guj + AddSize 1350 + !insertmacro Download_Lang_Data guj + SectionEnd + + Section /o "Haitian" SecLang_hat + AddSize 1890 + !insertmacro Download_Lang_Data hat + SectionEnd + + Section /o "Hebrew" SecLang_heb + AddSize 939 + !insertmacro Download_Lang_Data heb + SectionEnd + + Section /o "Hindi" SecLang_hin + AddSize 1070 + !insertmacro Download_Lang_Data hin + SectionEnd + + Section /o "Croatian" SecLang_hrv + AddSize 3910 + !insertmacro Download_Lang_Data hrv + SectionEnd + + Section /o "Hungarian" SecLang_hun + AddSize 5050 + !insertmacro Download_Lang_Data hun + SectionEnd + + Section /o "Armenian" SecLang_hye + AddSize 3300 + !insertmacro Download_Lang_Data hye + SectionEnd + + Section /o "Inuktitut" SecLang_iku + AddSize 2670 + !insertmacro Download_Lang_Data iku + SectionEnd + + Section /o "Indonesian" SecLang_ind + AddSize 1070 + !insertmacro Download_Lang_Data ind + SectionEnd + + Section /o "Icelandic" SecLang_isl + AddSize 2170 + !insertmacro Download_Lang_Data isl + SectionEnd + + Section /o "Italian" SecLang_ita + AddSize 2580 + !insertmacro Download_Lang_Data ita + SectionEnd + + Section /o "Italian (Old)" SecLang_ita_old + AddSize 3130 + !insertmacro Download_Lang_Data ita_old + SectionEnd + + Section /o "Javanese" SecLang_jav + AddSize 2840 + !insertmacro Download_Lang_Data jav + SectionEnd + + Section /o "Japanese" SecLang_jpn + AddSize 2360 + !insertmacro Download_Lang_Data jpn + SectionEnd + + Section /o "Japanese (vertical)" SecLang_jpn_vert + AddSize 2900 + !insertmacro Download_Lang_Data jpn_vert + SectionEnd + + Section /o "Kannada" SecLang_kan + AddSize 3440 + !insertmacro Download_Lang_Data kan + SectionEnd + + Section /o "Georgian" SecLang_kat + AddSize 2410 + !insertmacro Download_Lang_Data kat + SectionEnd + + Section /o "Georgian (Old)" SecLang_kat_old + AddSize 413 + !insertmacro Download_Lang_Data kat_old + SectionEnd + + Section /o "Kazakh" SecLang_kaz + AddSize 4520 + !insertmacro Download_Lang_Data kaz + SectionEnd + + Section /o "Central Khmer" SecLang_khm + AddSize 1380 + !insertmacro Download_Lang_Data khm + SectionEnd + + Section /o "Kirghiz" SecLang_kir + AddSize 9470 + !insertmacro Download_Lang_Data kir + SectionEnd + + Section /o "Korean" SecLang_kor + AddSize 1600 + !insertmacro Download_Lang_Data kor + SectionEnd + + Section /o "Kurdish (Kurmanji)" SecLang_kmr + AddSize 3400 + !insertmacro Download_Lang_Data kmr + SectionEnd + + Section /o "Lao" SecLang_lao + AddSize 6090 + !insertmacro Download_Lang_Data lao + SectionEnd + + Section /o "Latin" SecLang_lat + AddSize 3040 + !insertmacro Download_Lang_Data lat + SectionEnd + + Section /o "Latvian" SecLang_lav + AddSize 2590 + !insertmacro Download_Lang_Data lav + SectionEnd + + Section /o "Lithuanian" SecLang_lit + AddSize 3010 + !insertmacro Download_Lang_Data lit + SectionEnd + + Section /o "Luxembourgish" SecLang_ltz + AddSize 2490 + !insertmacro Download_Lang_Data ltz + SectionEnd + + Section /o "Malayalam" SecLang_mal + AddSize 5030 + !insertmacro Download_Lang_Data mal + SectionEnd + + Section /o "Marathi" SecLang_mar + AddSize 2020 + !insertmacro Download_Lang_Data mar + SectionEnd + + Section /o "Macedonian" SecLang_mkd + AddSize 1530 + !insertmacro Download_Lang_Data mkd + SectionEnd + + Section /o "Maltese" SecLang_mlt + AddSize 2200 + !insertmacro Download_Lang_Data mlt + SectionEnd + + Section /o "Mongolian" SecLang_mon + AddSize 2040 + !insertmacro Download_Lang_Data mon + SectionEnd + + Section /o "Maori" SecLang_mri + AddSize 843 + !insertmacro Download_Lang_Data mri + SectionEnd + + Section /o "Malay" SecLang_msa + AddSize 1670 + !insertmacro Download_Lang_Data msa + SectionEnd + + Section /o "Burmese" SecLang_mya + AddSize 4430 + !insertmacro Download_Lang_Data mya + SectionEnd + + Section /o "Nepali" SecLang_nep + AddSize 979 + !insertmacro Download_Lang_Data nep + SectionEnd + + Section /o "Dutch; Flemish" SecLang_nld + AddSize 5770 + !insertmacro Download_Lang_Data nld + SectionEnd + + Section /o "Norwegian" SecLang_nor + AddSize 3440 + !insertmacro Download_Lang_Data nor + SectionEnd + + Section /o "Occitan (post 1500)" SecLang_oci + AddSize 6030 + !insertmacro Download_Lang_Data oci + SectionEnd + + Section /o "Oriya" SecLang_ori + AddSize 1410 + !insertmacro Download_Lang_Data ori + SectionEnd + + Section /o "Panjabi / Punjabi" SecLang_pan + AddSize 4860 + !insertmacro Download_Lang_Data pan + SectionEnd + + Section /o "Polish" SecLang_pol + AddSize 4540 + !insertmacro Download_Lang_Data pol + SectionEnd + + Section /o "Portuguese" SecLang_por + AddSize 1890 + !insertmacro Download_Lang_Data por + SectionEnd + + Section /o "Pushto / Pashto" SecLang_pus + AddSize 1690 + !insertmacro Download_Lang_Data pus + SectionEnd + + Section /o "Quechua" SecLang_que + AddSize 4790 + !insertmacro Download_Lang_Data que + SectionEnd + + Section /o "Romanian" SecLang_ron + AddSize 2270 + !insertmacro Download_Lang_Data ron + SectionEnd + + Section /o "Russian" SecLang_rus + AddSize 3680 + !insertmacro Download_Lang_Data rus + SectionEnd + + Section /o "Sanskrit" SecLang_san + AddSize 1180 + !insertmacro Download_Lang_Data san + SectionEnd + + Section /o "Sinhala / Sinhalese" SecLang_sin + AddSize 1650 + !insertmacro Download_Lang_Data sin + SectionEnd + + Section /o "Slovak" SecLang_slk + AddSize 4220 + !insertmacro Download_Lang_Data slk + SectionEnd + + Section /o "Slovenian" SecLang_slv + AddSize 2860 + !insertmacro Download_Lang_Data slv + SectionEnd + + Section /o "Sindhi" SecLang_snd + AddSize 1620 + !insertmacro Download_Lang_Data snd + SectionEnd + + Section /o "Spanish" SecLang_spa + AddSize 2190 + !insertmacro Download_Lang_Data spa + SectionEnd + + Section /o "Spanish (Old)" SecLang_spa_old + AddSize 2760 + !insertmacro Download_Lang_Data spa_old + SectionEnd + + Section /o "Albanian" SecLang_sqi + AddSize 1790 + !insertmacro Download_Lang_Data sqi + SectionEnd + + Section /o "Serbian" SecLang_srp + AddSize 2050 + !insertmacro Download_Lang_Data srp + SectionEnd + + Section /o "Serbian (Latin)" SecLang_srp_latn + AddSize 3130 + !insertmacro Download_Lang_Data srp_latn + SectionEnd + + Section /o "Sundanese" SecLang_sun + AddSize 1310 + !insertmacro Download_Lang_Data sun + SectionEnd + + Section /o "Swahili" SecLang_swa + AddSize 2070 + !insertmacro Download_Lang_Data swa + SectionEnd + + Section /o "Swedish" SecLang_swe + AddSize 3970 + !insertmacro Download_Lang_Data swe + SectionEnd + + Section /o "Syriac" SecLang_syr + AddSize 2100 + !insertmacro Download_Lang_Data syr + SectionEnd + + Section /o "Tamil" SecLang_tam + AddSize 3090 + !insertmacro Download_Lang_Data tam + SectionEnd + + Section /o "Tatar" SecLang_tat + AddSize 1020 + !insertmacro Download_Lang_Data tat + SectionEnd + + Section /o "Telugu" SecLang_tel + AddSize 2640 + !insertmacro Download_Lang_Data tel + SectionEnd + + Section /o "Tajik" SecLang_tgk + AddSize 2480 + !insertmacro Download_Lang_Data tgk + SectionEnd + + Section /o "Thai" SecLang_tha + AddSize 1020 + !insertmacro Download_Lang_Data tha + SectionEnd + + Section /o "Tigrinya" SecLang_tir + AddSize 370 + !insertmacro Download_Lang_Data tir + SectionEnd + + Section /o "Tonga" SecLang_ton + AddSize 925 + !insertmacro Download_Lang_Data ton + SectionEnd + + Section /o "Turkish" SecLang_tur + AddSize 4240 + !insertmacro Download_Lang_Data tur + SectionEnd + + Section /o "Uighur" SecLang_uig + AddSize 2660 + !insertmacro Download_Lang_Data uig + SectionEnd + + Section /o "Ukrainian" SecLang_ukr + AddSize 3650 + !insertmacro Download_Lang_Data ukr + SectionEnd + + Section /o "Urdu" SecLang_urd + AddSize 1330 + !insertmacro Download_Lang_Data urd + SectionEnd + + Section /o "Uzbek" SecLang_uzb + AddSize 6170 + !insertmacro Download_Lang_Data uzb + SectionEnd + + Section /o "Uzbek (Cyrillic)" SecLang_uzb_cyrl + AddSize 1490 + !insertmacro Download_Lang_Data uzb_cyrl + SectionEnd + + Section /o "Vietnamese" SecLang_vie + AddSize 519 + !insertmacro Download_Lang_Data vie + SectionEnd + + Section /o "Yiddish" SecLang_yid + AddSize 533 + !insertmacro Download_Lang_Data yid + SectionEnd + + Section /o "Yoruba" SecLang_yor + AddSize 941 + !insertmacro Download_Lang_Data yor + SectionEnd + +SectionGroupEnd + +;-------------------------------- +;Descriptions + ; At first we need to localize installer for languages which supports well in tesseract: Eng, Spa, Ger, Ita, Dutch + Russian (it is authors native language) + ;Language strings + LangString DESC_SEC0001 ${LANG_RUSSIAN} "Установочные файлы." + ;LangString DESC_SecHelp ${LANG_RUSSIAN} "Справочная информация." + LangString DESC_SecCS ${LANG_RUSSIAN} "Добавить ярлыки в меню Пуск" + + LangString DESC_SEC0001 ${LANG_ENGLISH} "Installation files." + ;LangString DESC_SecHelp ${LANG_ENGLISH} "Help information." + LangString DESC_SecCS ${LANG_ENGLISH} "Add shortcuts to Start menu." + + LangString DESC_SEC0001 ${LANG_FRENCH} "Fichier d'installation." + ;LangString DESC_SecHelp ${LANG_FRENCH} "Aide." + LangString DESC_SecCS ${LANG_FRENCH} "Ajouter des raccourcis vers le menu démarrer." + + LangString DESC_SEC0001 ${LANG_GERMAN} "Dateien für die Installation." + ;LangString DESC_SecHelp ${LANG_GERMAN} "Hilfe." + LangString DESC_SecCS ${LANG_GERMAN} "Einträge im Startmenü hinzufügen." + + LangString DESC_SEC0001 ${LANG_ITALIAN} "File di installazione." + ;LangString DESC_SecHelp ${LANG_ITALIAN} "Guida di informazioni." + LangString DESC_SecCS ${LANG_ITALIAN} "Aggiungere collegamenti al menu Start." + + LangString DESC_SEC0001 ${LANG_SLOVAK} "Súbory inštalácie." + ;LangString DESC_SecHelp ${LANG_ENGLISH} "Pomocné informácie." + LangString DESC_SecCS ${LANG_SLOVAK} "Pridať odkaz do Start menu." + + LangString DESC_SEC0001 ${LANG_SPANISH} "Los archivos de instalación." + ;LangString DESC_SecHelp ${LANG_SPANISH} "Información de ayuda." + LangString DESC_SecCS ${LANG_SPANISH} "Ańadir accesos directos al menú Inicio." + + LangString DESC_SEC0001 ${LANG_SPANISHINTERNATIONAL} "Los archivos de instalación." + ;LangString DESC_SecHelp ${LANG_SPANISHINTERNATIONAL} "Información de ayuda." + LangString DESC_SecCS ${LANG_SPANISHINTERNATIONAL} "Ańadir accesos directos al menú Inicio." + + ;Assign language strings to sections + !insertmacro MUI_FUNCTION_DESCRIPTION_BEGIN + !insertmacro MUI_DESCRIPTION_TEXT ${SEC0001} $(DESC_SEC0001) + !insertmacro MUI_DESCRIPTION_TEXT ${SecCS} $(DESC_SecCS) + !insertmacro MUI_FUNCTION_DESCRIPTION_END + +;-------------------------------- +;Uninstaller Section + +;Section /o -un.Main UNSEC0000 +Section -un.Main UNSEC0000 +!ifdef W64 + SetRegView 64 +!endif + DetailPrint "Removing everything" + Delete "$SMPROGRAMS\${PRODUCT_NAME}\*.*" + RMDir "$SMPROGRAMS\${PRODUCT_NAME}" + DetailPrint "Removing registry info" + DeleteRegKey HKLM "Software\Tesseract-OCR" + SendMessage ${HWND_BROADCAST} ${WM_WININICHANGE} 0 "STR:Environment" /TIMEOUT=1000 + + # remove the Add/Remove information + DeleteRegKey HKLM "${UNINST_KEY}" + Delete "${UNINST_EXE}" + DeleteRegValue HKLM "${REGKEY}" Path + DeleteRegKey /IfEmpty HKLM "${REGKEY}\Components" + DeleteRegKey /IfEmpty HKLM "${REGKEY}" + Delete "$INSTDIR\*.dll" + Delete "$INSTDIR\*.exe" + Delete "$INSTDIR\*.html" + Delete "$INSTDIR\doc\AUTHORS" + Delete "$INSTDIR\doc\LICENSE" + Delete "$INSTDIR\doc\README.md" + RMDir "$INSTDIR\doc" + RMDir /r "$INSTDIR\tessdata" + RMDir "$INSTDIR" +SectionEnd + +Function PageReinstall + +FunctionEnd + +Function PageLeaveReinstall + +FunctionEnd + +!macro REMOVE_REGKEY OLD_KEY + StrCmp ${OLD_KEY} HKLM 0 +3 + DeleteRegKey HKLM "${REGKEY}" + Goto End + DeleteRegKey HKCU "${REGKEY}" + End: +!macroend + +Function .onInit +!ifdef W64 + SetRegView 64 +!endif + Call PreventMultipleInstances + !insertmacro MUI_LANGDLL_DISPLAY + ;RequestExecutionLevel admin + !insertmacro MULTIUSER_INIT + + ; is tesseract already installed? + ReadRegStr $R0 HKCU "${REGKEY}" "CurrentVersion" + StrCpy $OLD_KEY HKCU + StrCmp $R0 "" TestHKLM AskUninstall + TestHKLM: + ReadRegStr $R0 HKLM "${REGKEY}" "CurrentVersion" + StrCpy $OLD_KEY HKLM + StrCmp $R0 "" SkipUnInstall + AskUninstall: + MessageBox MB_YESNO|MB_ICONEXCLAMATION \ + "Tesseract-ocr version $R0 is installed (in $OLD_KEY)! Do you want to uninstall it first?$\nUninstall will delete all files in '$INSTDIR'!" \ + /SD IDYES IDNO SkipUnInstall IDYES UnInstall + UnInstall: + StrCmp $OLD_KEY "HKLM" UnInst_hklm + DetailPrint "Uninstall: current user" + readRegStr $R1 HKCU "${UNINST_KEY}" "UninstallString" + Goto try_uninstall + UnInst_hklm: + DetailPrint "UnInstall: all users" + readRegStr $R1 HKLM "${UNINST_KEY}" "UninstallString" + try_uninstall: + ClearErrors + ExecWait '$R1 _?=$INSTDIR'$0 + ; Check if unstaller finished ok. If yes, then try to remove it from installer. + StrCmp $0 0 0 +3 + !insertmacro REMOVE_REGKEY ${OLD_KEY} + Goto SkipUnInstall + messagebox mb_ok "Uninstaller failed:\n$0\n\nYou need to remove program manually." + SkipUnInstall: + ;InitPluginsDir + ;File /oname=$PLUGINSDIR\splash.bmp "${NSISDIR}\Contrib\Graphics\Header\nsis.bmp" + ;File /oname=$PLUGINSDIR\splash.bmp "new.bmp" + ;advsplash::show 1000 600 400 -1 $PLUGINSDIR\splash + ;Pop $0 ; $0 has '1' if the user closed the splash screen early, + ; '0' if everything closed normal, and '-1' if some error occurred. + ;IfFileExists $INSTDIR\loadmain.exe PathGood + ;done: + ; Make selection based on System language ID + System::Call 'kernel32::GetSystemDefaultLangID() i .r0' + ;http://msdn.microsoft.com/en-us/library/dd318693%28v=VS.85%29.aspx + StrCmp $0 "1078" Afrikaans + StrCmp $0 "1052" Albanian + StrCmp $0 "5121" Arabic + StrCmp $0 "1068" Azerbaijani + StrCmp $0 "1069" Basque + StrCmp $0 "1059" Belarusian + StrCmp $0 "1093" Bengali + StrCmp $0 "1026" Bulgarian + StrCmp $0 "1027" Catalan + StrCmp $0 "1116" Cherokee + StrCmp $0 "31748" Chinese_tra + StrCmp $0 "4" Chinese_sim + StrCmp $0 "26" Croatian + StrCmp $0 "1029" Czech + StrCmp $0 "1030" Danish + StrCmp $0 "2067" Dutch + StrCmp $0 "1061" Estonian + StrCmp $0 "3079" German + StrCmp $0 "1032" Greek + StrCmp $0 "1035" Finnish + StrCmp $0 "2060" French + StrCmp $0 "1037" Hebrew + StrCmp $0 "1081" Hindi + StrCmp $0 "1038" Hungarian + StrCmp $0 "1039" Icelandic + StrCmp $0 "1057" Indonesian + StrCmp $0 "1040" Italian + StrCmp $0 "1041" Japanese + StrCmp $0 "1099" Kannada + StrCmp $0 "1042" Korean + StrCmp $0 "1062" Latvian + StrCmp $0 "1063" Lithuanian + StrCmp $0 "1071" Macedonian + StrCmp $0 "1100" Malayalam + StrCmp $0 "2110" Malay + StrCmp $0 "1082" Maltese + StrCmp $0 "1044" Norwegian + StrCmp $0 "1045" Polish + StrCmp $0 "1046" Portuguese + StrCmp $0 "1048" Romanian + StrCmp $0 "1049" Russian + StrCmp $0 "1051" Slovak + StrCmp $0 "1060" Slovenian + StrCmp $0 "11274" Spanish + StrCmp $0 "2074" Serbian + StrCmp $0 "1089" Swahili + StrCmp $0 "2077" Swedish + StrCmp $0 "1097" Tamil + StrCmp $0 "1098" Telugu + StrCmp $0 "1054" Thai + StrCmp $0 "1055" Turkish + StrCmp $0 "1058" Ukrainian + StrCmp $0 "1066" Vietnamese + + Goto lang_end + + Afrikaans: !insertmacro SelectSection ${SecLang_afr} + Goto lang_end + Albanian: !insertmacro SelectSection ${SecLang_sqi} + Goto lang_end + Arabic: !insertmacro SelectSection ${SecLang_ara} + Goto lang_end + ;Assamese: !insertmacro SelectSection ${SecLang_asm} + ; Goto lang_end + Azerbaijani: !insertmacro SelectSection ${SecLang_aze} + Goto lang_end + Basque: !insertmacro SelectSection ${SecLang_eus} + Goto lang_end + Belarusian: !insertmacro SelectSection ${SecLang_bel} + Goto lang_end + Bengali: !insertmacro SelectSection ${SecLang_ben} + Goto lang_end + Bulgarian: !insertmacro SelectSection ${SecLang_bul} + Goto lang_end + Catalan: !insertmacro SelectSection ${SecLang_cat} + Goto lang_end + Cherokee: !insertmacro SelectSection ${SecLang_chr} + Goto lang_end + Chinese_tra: !insertmacro SelectSection ${SecLang_chi_tra} + Goto lang_end + Chinese_sim: !insertmacro SelectSection ${SecLang_chi_sim} + Goto lang_end + Croatian: !insertmacro SelectSection ${SecLang_hrv} + Goto lang_end + Czech: !insertmacro SelectSection ${SecLang_ces} + Goto lang_end + Danish: !insertmacro SelectSection ${SecLang_dan} + Goto lang_end + Dutch: !insertmacro SelectSection ${SecLang_nld} + Goto lang_end + Estonian: !insertmacro SelectSection ${SecLang_hrv} + Goto lang_end + German: !insertmacro SelectSection ${SecLang_deu} + Goto lang_end + Greek: !insertmacro SelectSection ${SecLang_ell} + !insertmacro SelectSection ${SecLang_grc} + Goto lang_end + Finnish: !insertmacro SelectSection ${SecLang_fin} + !insertmacro SelectSection ${SecLang_frm} + Goto lang_end + French: !insertmacro SelectSection ${SecLang_fra} + Goto lang_end + Hebrew: !insertmacro SelectSection ${SecLang_heb} + ;!insertmacro SelectSection ${SecLang_heb_com} + Goto lang_end + Hungarian: !insertmacro SelectSection ${SecLang_hin} + Goto lang_end + Hindi: !insertmacro SelectSection ${SecLang_hun} + Goto lang_end + Icelandic: !insertmacro SelectSection ${SecLang_isl} + Goto lang_end + Indonesian: !insertmacro SelectSection ${SecLang_ind} + Goto lang_end + Italian: !insertmacro SelectSection ${SecLang_ita} + !insertmacro SelectSection ${SecLang_ita_old} + Goto lang_end + Japanese: !insertmacro SelectSection ${SecLang_jpn} + Goto lang_end + Kannada: !insertmacro SelectSection ${SecLang_kan} + Goto lang_end + Korean: !insertmacro SelectSection ${SecLang_kor} + Goto lang_end + Latvian: !insertmacro SelectSection ${SecLang_lav} + Goto lang_end + Lithuanian: !insertmacro SelectSection ${SecLang_lit} + Goto lang_end + Macedonian: !insertmacro SelectSection ${SecLang_mkd} + Goto lang_end + Malayalam: !insertmacro SelectSection ${SecLang_msa} + Goto lang_end + Malay: !insertmacro SelectSection ${SecLang_mal} + Goto lang_end + Maltese: !insertmacro SelectSection ${SecLang_mlt} + Goto lang_end + Norwegian: !insertmacro SelectSection ${SecLang_nor} + Goto lang_end + Polish: !insertmacro SelectSection ${SecLang_pol} + Goto lang_end + Portuguese: !insertmacro SelectSection ${SecLang_por} + Goto lang_end + Romanian: !insertmacro SelectSection ${SecLang_ron} + Goto lang_end + Russian: !insertmacro SelectSection ${SecLang_rus} + Goto lang_end + Slovak: !insertmacro SelectSection ${SecLang_slk} + Goto lang_end + Slovenian: !insertmacro SelectSection ${SecLang_slv} + Goto lang_end + Spanish: !insertmacro SelectSection ${SecLang_spa} + !insertmacro SelectSection ${SecLang_spa_old} + Goto lang_end + Serbian: !insertmacro SelectSection ${SecLang_srp} + Goto lang_end + Swahili: !insertmacro SelectSection ${SecLang_swa} + Goto lang_end + Swedish: !insertmacro SelectSection ${SecLang_swe} + Goto lang_end + Tamil: !insertmacro SelectSection ${SecLang_tam} + Goto lang_end + Telugu: !insertmacro SelectSection ${SecLang_tel} + Goto lang_end + Thai: !insertmacro SelectSection ${SecLang_tha} + Goto lang_end + Turkish: !insertmacro SelectSection ${SecLang_tur} + Goto lang_end + Ukrainian: !insertmacro SelectSection ${SecLang_ukr} + Goto lang_end + Vietnamese: !insertmacro SelectSection ${SecLang_vie} + + lang_end: +FunctionEnd + +Function un.onInit + !insertmacro MUI_LANGDLL_DISPLAY + !insertmacro MULTIUSER_UNINIT + ;!insertmacro SELECT_UNSECTION Main ${UNSEC0000} + ;!insertmacro MUI_UNGETLANGUAGE +FunctionEnd + +Function .onInstFailed + MessageBox MB_OK "Installation failed." +FunctionEnd + +!ifdef SHOW_README +Function ShowReadme + Exec '"wordpad" "doc\README.md"' + ;BringToFront +FunctionEnd +!endif + +; Prevent running multiple instances of the installer +Function PreventMultipleInstances + ; TODO: Does not work. + Push $R0 + System::Call 'kernel32::CreateMutexA(i 0, i 0, t ${PRODUCT_NAME}) ?e' + Pop $R0 + StrCmp $R0 0 +3 + MessageBox MB_OK|MB_ICONEXCLAMATION "The installer is already running." /SD IDOK + Abort + Pop $R0 +FunctionEnd diff --git a/nsis/winpath.cpp b/nsis/winpath.cpp new file mode 100644 index 0000000000..e8e1bfae83 --- /dev/null +++ b/nsis/winpath.cpp @@ -0,0 +1,39 @@ +// Copyright (C) 2024 Stefan Weil +// +// SPDX-License-Identifier: Apache-2.0 +// +// winpath - run a Windows program with extended PATH +// +// Usage: +// +// winpath [CMD [ARGUMENT ...]] +// +// Example: +// +// winpath cmd +// +// This will start a Windows command line with PATH extended by +// the location of the winpath executable. + +#include // _spawnvp +#include // _putenv_s +#include // strcpy, strcat + +static char path[4096]; + +int main(int argc, char *argv[]) { + if (argc > 1) { + char *dir = argv[0]; + char *last = strrchr(dir, '\\'); + if (last != nullptr) { + *last = '\0'; + } + strcpy(path, dir); + strcat(path, ";"); + strcat(path, getenv("PATH")); + _putenv_s("PATH", path); + _spawnvp(_P_WAIT, argv[1], argv + 1); + //~ _spawnvp(_P_OVERLAY, argv[1], argv + 1); + } + return 0; +} diff --git a/src/api/altorenderer.cpp b/src/api/altorenderer.cpp index 4a17a24820..e373f73aa4 100644 --- a/src/api/altorenderer.cpp +++ b/src/api/altorenderer.cpp @@ -15,9 +15,6 @@ #include "errcode.h" // for ASSERT_HOST #include "helpers.h" // for copy_string -#ifdef _WIN32 -# include "host.h" // windows.h for MultiByteToWideChar, ... -#endif #include "tprintf.h" // for tprintf #include @@ -145,20 +142,6 @@ char *TessBaseAPI::GetAltoText(ETEXT_DESC *monitor, int page_number) { SetInputName(nullptr); } -#ifdef _WIN32 - // convert input name from ANSI encoding to utf-8 - int str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_.c_str(), -1, nullptr, 0); - wchar_t *uni16_str = new WCHAR[str16_len]; - str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_.c_str(), -1, uni16_str, str16_len); - int utf8_len = - WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, nullptr, 0, nullptr, nullptr); - char *utf8_str = new char[utf8_len]; - WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, utf8_str, utf8_len, nullptr, nullptr); - input_file_ = utf8_str; - delete[] uni16_str; - delete[] utf8_str; -#endif - std::stringstream alto_str; // Use "C" locale (needed for int values larger than 999). alto_str.imbue(std::locale::classic()); @@ -169,7 +152,7 @@ char *TessBaseAPI::GetAltoText(ETEXT_DESC *monitor, int page_number) { << " WIDTH=\"" << rect_width_ << "\"" << " HEIGHT=\"" << rect_height_ << "\">\n"; - ResultIterator *res_it = GetIterator(); + std::unique_ptr res_it(GetIterator()); while (!res_it->Empty(RIL_BLOCK)) { if (res_it->Empty(RIL_WORD)) { res_it->Next(RIL_WORD); @@ -186,7 +169,7 @@ char *TessBaseAPI::GetAltoText(ETEXT_DESC *monitor, int page_number) { // Handle all kinds of images. // TODO: optionally add TYPE, for example TYPE="photo". alto_str << "\t\t\t\t\n"; res_it->Next(RIL_BLOCK); continue; @@ -195,7 +178,7 @@ char *TessBaseAPI::GetAltoText(ETEXT_DESC *monitor, int page_number) { case PT_VERT_LINE: // Handle horizontal and vertical lines. alto_str << "\t\t\t\t\n"; res_it->Next(RIL_BLOCK); continue; @@ -208,24 +191,24 @@ char *TessBaseAPI::GetAltoText(ETEXT_DESC *monitor, int page_number) { if (res_it->IsAtBeginningOf(RIL_BLOCK)) { alto_str << "\t\t\t\tIsAtBeginningOf(RIL_PARA)) { alto_str << "\t\t\t\t\tIsAtBeginningOf(RIL_TEXTLINE)) { alto_str << "\t\t\t\t\t\tIsAtFinalElement(RIL_TEXTLINE, RIL_WORD); @@ -272,7 +255,6 @@ char *TessBaseAPI::GetAltoText(ETEXT_DESC *monitor, int page_number) { alto_str << "\t\t\t\n" << "\t\t\n"; - delete res_it; return copy_string(alto_str.str()); } diff --git a/src/api/baseapi.cpp b/src/api/baseapi.cpp index 72503636c0..bae30ab8bb 100644 --- a/src/api/baseapi.cpp +++ b/src/api/baseapi.cpp @@ -64,6 +64,7 @@ #include // for round, M_PI #include // for int32_t #include // for strcmp, strcpy +#include // for std::filesystem #include // for size_t #include // for std::cin #include // for std::locale::classic @@ -82,15 +83,9 @@ #endif #if defined(_WIN32) -# include -# include -#else -# include // for closedir, opendir, readdir, DIR, dirent -# include -# include // for stat, S_IFDIR -# include -# include -#endif // _WIN32 +# include // for _O_BINARY +# include // for _setmode +#endif namespace tesseract { @@ -149,61 +144,18 @@ static void ExtractFontName(const char* filename, std::string* fontname) { /* Add all available languages recursively. */ -static void addAvailableLanguages(const std::string &datadir, const std::string &base, +static void addAvailableLanguages(const std::string &datadir, std::vector *langs) { - auto base2 = base; - if (!base2.empty()) { - base2 += "/"; - } - const size_t extlen = sizeof(kTrainedDataSuffix); -#ifdef _WIN32 - WIN32_FIND_DATA data; - HANDLE handle = FindFirstFile((datadir + base2 + "*").c_str(), &data); - if (handle != INVALID_HANDLE_VALUE) { - BOOL result = TRUE; - for (; result;) { - char *name = data.cFileName; - // Skip '.', '..', and hidden files - if (name[0] != '.') { - if ((data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) == FILE_ATTRIBUTE_DIRECTORY) { - addAvailableLanguages(datadir, base2 + name, langs); - } else { - size_t len = strlen(name); - if (len > extlen && name[len - extlen] == '.' && - strcmp(&name[len - extlen + 1], kTrainedDataSuffix) == 0) { - name[len - extlen] = '\0'; - langs->push_back(base2 + name); - } - } - } - result = FindNextFile(handle, &data); - } - FindClose(handle); - } -#else // _WIN32 - DIR *dir = opendir((datadir + base).c_str()); - if (dir != nullptr) { - dirent *de; - while ((de = readdir(dir))) { - char *name = de->d_name; - // Skip '.', '..', and hidden files - if (name[0] != '.') { - struct stat st; - if (stat((datadir + base2 + name).c_str(), &st) == 0 && (st.st_mode & S_IFDIR) == S_IFDIR) { - addAvailableLanguages(datadir, base2 + name, langs); - } else { - size_t len = strlen(name); - if (len > extlen && name[len - extlen] == '.' && - strcmp(&name[len - extlen + 1], kTrainedDataSuffix) == 0) { - name[len - extlen] = '\0'; - langs->push_back(base2 + name); - } - } - } + for (const auto& entry : + std::filesystem::recursive_directory_iterator(datadir, + std::filesystem::directory_options::follow_directory_symlink | + std::filesystem::directory_options::skip_permission_denied)) { + auto path = entry.path().lexically_relative(datadir).string(); + auto extPos = path.rfind(".traineddata"); + if (extPos != std::string::npos) { + langs->push_back(path.substr(0, extPos)); } - closedir(dir); } -#endif } TessBaseAPI::TessBaseAPI() @@ -444,7 +396,7 @@ void TessBaseAPI::GetLoadedLanguagesAsVector(std::vector *langs) co void TessBaseAPI::GetAvailableLanguagesAsVector(std::vector *langs) const { langs->clear(); if (tesseract_ != nullptr) { - addAvailableLanguages(tesseract_->datadir, "", langs); + addAvailableLanguages(tesseract_->datadir, langs); std::sort(langs->begin(), langs->end()); } } diff --git a/src/api/hocrrenderer.cpp b/src/api/hocrrenderer.cpp index ea9d7cef40..5f319ecac1 100644 --- a/src/api/hocrrenderer.cpp +++ b/src/api/hocrrenderer.cpp @@ -21,9 +21,6 @@ #include // for std::locale::classic #include // for std::unique_ptr #include // for std::stringstream -#ifdef _WIN32 -# include "host.h" // windows.h for MultiByteToWideChar, ... -#endif #include #include "helpers.h" // for copy_string #include "tesseractclass.h" // for Tesseract @@ -151,23 +148,6 @@ char *TessBaseAPI::GetHOCRText(ETEXT_DESC *monitor, int page_number) { SetInputName(nullptr); } -#ifdef _WIN32 - // convert input name from ANSI encoding to utf-8 - int str16_len = - MultiByteToWideChar(CP_ACP, 0, input_file_.c_str(), -1, nullptr, 0); - wchar_t *uni16_str = new WCHAR[str16_len]; - str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_.c_str(), -1, uni16_str, - str16_len); - int utf8_len = WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, nullptr, - 0, nullptr, nullptr); - char *utf8_str = new char[utf8_len]; - WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, utf8_str, utf8_len, - nullptr, nullptr); - input_file_ = utf8_str; - delete[] uni16_str; - delete[] utf8_str; -#endif - std::stringstream hocr_str; // Use "C" locale (needed for double values x_size and x_descenders). hocr_str.imbue(std::locale::classic()); @@ -512,9 +492,9 @@ bool TessHOcrRenderer::BeginDocumentHandler() { " \n" " \n" diff --git a/src/api/pagerenderer.cpp b/src/api/pagerenderer.cpp index a611341628..cd2d0a5ed9 100644 --- a/src/api/pagerenderer.cpp +++ b/src/api/pagerenderer.cpp @@ -2,7 +2,7 @@ // Description: PAGE XML rendering interface // Author: Jan Kamlah -// (C) Copyright 2021 +// (C) Copyright 2024 // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -15,9 +15,6 @@ #include "errcode.h" // for ASSERT_HOST #include "helpers.h" // for copy_string -#ifdef _WIN32 -# include "host.h" // windows.h for MultiByteToWideChar, ... -#endif #include "tprintf.h" // for tprintf #include @@ -496,7 +493,7 @@ Pta *FitBaselineIntoLinePolygon(Pta *bottom_pts, Pta *baseline_pts, } num_pts = ptaGetCount(bottom_pts); - // Create a interpolated polygon with stepsize 1 + // Create an interpolated polygon with stepsize 1. for (int index = 0; index < num_pts - 1; ++index) { ptaGetIPt(bottom_pts, index, &x0, &y0); ptaGetIPt(bottom_pts, index + 1, &x1, &y1); @@ -639,7 +636,7 @@ bool TessPAGERenderer::AddImageHandler(TessBaseAPI *api) { "pagecontent.xsd\">\n" "\t if (std::regex_search(api->GetInputName(), std::regex("^(https?|ftp|ssh):"))) { @@ -717,23 +714,6 @@ char *TessBaseAPI::GetPAGEText(ETEXT_DESC *monitor, int page_number) { SetInputName(nullptr); } -#ifdef _WIN32 - // convert input name from ANSI encoding to utf-8 - int str16_len = - MultiByteToWideChar(CP_ACP, 0, input_file_.c_str(), -1, nullptr, 0); - wchar_t *uni16_str = new WCHAR[str16_len]; - str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_.c_str(), -1, uni16_str, - str16_len); - int utf8_len = WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, nullptr, - 0, nullptr, nullptr); - char *utf8_str = new char[utf8_len]; - WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, utf8_str, utf8_len, - nullptr, nullptr); - input_file_ = utf8_str; - delete[] uni16_str; - delete[] utf8_str; -#endif - // Used variables std::stringstream reading_order_str; @@ -788,7 +768,11 @@ char *TessBaseAPI::GetPAGEText(ETEXT_DESC *monitor, int page_number) { << "\t\t\t\n"; - ResultIterator *res_it = GetIterator(); + std::unique_ptr res_it(GetIterator()); + + float block_conf = 0; + float line_conf = 0; + while (!res_it->Empty(RIL_BLOCK)) { if (res_it->Empty(RIL_WORD)) { res_it->Next(RIL_WORD); @@ -804,7 +788,7 @@ char *TessBaseAPI::GetPAGEText(ETEXT_DESC *monitor, int page_number) { // Handle all kinds of images. page_str << "\t\t\n"; page_str << "\t\t\t"; - AddBoxToPAGE(res_it, RIL_BLOCK, page_str); + AddBoxToPAGE(res_it.get(), RIL_BLOCK, page_str); page_str << "\t\t\n"; res_it->Next(RIL_BLOCK); continue; @@ -814,7 +798,7 @@ char *TessBaseAPI::GetPAGEText(ETEXT_DESC *monitor, int page_number) { // Handle horizontal and vertical lines. page_str << "\t\t\n"; page_str << "\t\t\t"; - AddBoxToPAGE(res_it, RIL_BLOCK, page_str); + AddBoxToPAGE(res_it.get(), RIL_BLOCK, page_str); page_str << "\t\t\n"; res_it->Next(RIL_BLOCK); continue; @@ -825,7 +809,6 @@ char *TessBaseAPI::GetPAGEText(ETEXT_DESC *monitor, int page_number) { break; } - float block_conf = 0; if (res_it->IsAtBeginningOf(RIL_BLOCK)) { // Add Block to reading order reading_order_str << "\t\t\t\tIsAtBeginningOf(RIL_TEXTLINE)) { // writing_direction_before = writing_direction; line_conf = ((res_it->Confidence(RIL_TEXTLINE)) / 100.); @@ -890,9 +872,9 @@ char *TessBaseAPI::GetPAGEText(ETEXT_DESC *monitor, int page_number) { line_str << "custom=\"" << "readingOrder {index:" << lcnt << ";}\">\n"; // If level is linebased, get the line polygon and baseline if (LEVELFLAG == 0 && (!POLYGONFLAG || skewed_flag)) { - AddPointToWordPolygon(res_it, RIL_TEXTLINE, line_top_ltr_pts, + AddPointToWordPolygon(res_it.get(), RIL_TEXTLINE, line_top_ltr_pts, line_bottom_ltr_pts, writing_direction); - AddBaselineToPTA(res_it, RIL_TEXTLINE, line_baseline_pts); + AddBaselineToPTA(res_it.get(), RIL_TEXTLINE, line_baseline_pts); if (ttb_flag) { line_baseline_pts = TransposePolygonline(line_baseline_pts); } @@ -912,18 +894,18 @@ char *TessBaseAPI::GetPAGEText(ETEXT_DESC *monitor, int page_number) { << WritingDirectionToStr(writing_direction) << "\" " << "custom=\"" << "readingOrder {index:" << wcnt << ";}\">\n"; if ((!POLYGONFLAG || skewed_flag) || ttb_flag) { - AddPointToWordPolygon(res_it, RIL_WORD, word_top_pts, word_bottom_pts, + AddPointToWordPolygon(res_it.get(), RIL_WORD, word_top_pts, word_bottom_pts, writing_direction); } } if (POLYGONFLAG && !skewed_flag && ttb_flag && LEVELFLAG == 0) { - AddPointToWordPolygon(res_it, RIL_WORD, word_top_pts, word_bottom_pts, + AddPointToWordPolygon(res_it.get(), RIL_WORD, word_top_pts, word_bottom_pts, writing_direction); } // Get the word baseline information - AddBaselineToPTA(res_it, RIL_WORD, word_baseline_pts); + AddBaselineToPTA(res_it.get(), RIL_WORD, word_baseline_pts); // Get the word text content and polygon do { @@ -932,7 +914,7 @@ char *TessBaseAPI::GetPAGEText(ETEXT_DESC *monitor, int page_number) { if (grapheme && grapheme[0] != 0) { word_content << HOcrEscape(grapheme.get()).c_str(); if (POLYGONFLAG && !skewed_flag && !ttb_flag) { - AddPointToWordPolygon(res_it, RIL_SYMBOL, word_top_pts, + AddPointToWordPolygon(res_it.get(), RIL_SYMBOL, word_top_pts, word_bottom_pts, writing_direction); } } @@ -1144,7 +1126,6 @@ char *TessBaseAPI::GetPAGEText(ETEXT_DESC *monitor, int page_number) { const std::string &text = reading_order_str.str(); reading_order_str.str(""); - delete res_it; return copy_string(text); } diff --git a/src/arch/intsimdmatrix.h b/src/arch/intsimdmatrix.h index d93f928dbc..af88ab49c7 100644 --- a/src/arch/intsimdmatrix.h +++ b/src/arch/intsimdmatrix.h @@ -115,6 +115,8 @@ struct TESS_API IntSimdMatrix { static const IntSimdMatrix *intSimdMatrix; // Only available with NEON. static const IntSimdMatrix intSimdMatrixNEON; + // Only available with RVV. + static const IntSimdMatrix intSimdMatrixRVV; // Only available with AVX2 / AVX / FMA / SSE. static const IntSimdMatrix intSimdMatrixAVX2; static const IntSimdMatrix intSimdMatrixSSE; diff --git a/src/arch/intsimdmatrixrvv.cpp b/src/arch/intsimdmatrixrvv.cpp new file mode 100644 index 0000000000..cd0ee68098 --- /dev/null +++ b/src/arch/intsimdmatrixrvv.cpp @@ -0,0 +1,88 @@ +/////////////////////////////////////////////////////////////////////// +// File: intsimdmatrixrvv.cpp +// Description: matrix-vector product for 8-bit data on rvv. +// Author: sunyuechi +// +// Copyright (c) 2024 Institute of Software Chinese Academy of Sciences (ISCAS). +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +/////////////////////////////////////////////////////////////////////// + +#ifdef HAVE_CONFIG_H +# include "config_auto.h" // for HAVE_RVV, ... +#endif + +#if HAVE_RVV +# include "intsimdmatrix.h" +# include "tesstypes.h" + +namespace tesseract { + +static int DotProduct(const int8_t *u, const int8_t *v, int num) { + int total = 0; + + asm __volatile__ ( + " .option arch, +v \n\t" + " vsetvli t0,zero,e32,m8,ta,ma \n\t" + " vmv.v.i v0,0 \n\t" + "1: \n\t" + " vsetvli t0,%[num],e8,m2,ta,ma \n\t" + " vle8.v v16,0(%[u]) \n\t" + " vle8.v v24,0(%[v]) \n\t" + " sub %[num],%[num],t0 \n\t" + " vwmul.vv v8,v24,v16 \n\t" + " add %[u],%[u],t0 \n\t" + " add %[v],%[v],t0 \n\t" + " vsetvli zero,zero,e16,m4,tu,ma \n\t" + " vwadd.wv v0,v0,v8 \n\t" + " bnez %[num],1b \n\t" + " vsetvli t0,zero,e32,m8,ta,ma \n\t" + " vmv.s.x v8,zero \n\t" + " vredsum.vs v0,v0,v8 \n\t" + " vmv.x.s %[total],v0 \n\t" + : [u] "+r" (u), + [v] "+r" (v), + [num] "+r" (num), + [total] "+r" (total) + : + : "cc", "memory" + ); + + return total; +} + +static void matrixDotVector(int dim1, int dim2, const int8_t *wi, const TFloat *scales, + const int8_t *u, TFloat *v) { + int num_out = dim1; + int num_in = dim2 - 1; + for (int i = 0; i < num_out; ++i) { + const int8_t *wi_start = wi + i * dim2; + int total = DotProduct(wi_start, u, num_in); + // Add in the bias and apply scaling. + v[i] = (total + wi_start[num_in] * INT8_MAX) * scales[i]; + } +} + +const IntSimdMatrix IntSimdMatrix::intSimdMatrixRVV = { + // Function. + matrixDotVector, + // Number of 32 bit outputs held in each register. + 1, + // Maximum number of registers that we will use to hold outputs. + 1, + // Number of 8 bit inputs in the inputs register. + 1, + // Number of inputs in each weight group. + 1 +}; + +} // namespace tesseract. + +#endif /* HAVE_RVV */ diff --git a/src/arch/simddetect.cpp b/src/arch/simddetect.cpp index 1afe5a5d81..9acd78a886 100644 --- a/src/arch/simddetect.cpp +++ b/src/arch/simddetect.cpp @@ -65,6 +65,13 @@ # endif #endif +#if defined(HAVE_RVV) +# if defined(HAVE_GETAUXVAL) +# include +# define HWCAP_RV(letter) (1ul << ((letter) - 'A')) +# endif +#endif + namespace tesseract { // Computes and returns the dot product of the two n-vectors u and v. @@ -89,6 +96,8 @@ bool SIMDDetect::neon_available_ = true; #elif defined(HAVE_NEON) // If true, then Neon has been detected. bool SIMDDetect::neon_available_; +#elif defined(HAVE_RVV) +bool SIMDDetect::rvv_available_; #else // If true, then AVX has been detected. bool SIMDDetect::avx_available_; @@ -229,6 +238,13 @@ SIMDDetect::SIMDDetect() { elf_aux_info(AT_HWCAP, &hwcap, sizeof hwcap); neon_available_ = hwcap & HWCAP_NEON; # endif +#endif + +#if defined(HAVE_RVV) +# if defined(HAVE_GETAUXVAL) + const unsigned long hwcap = getauxval(AT_HWCAP); + rvv_available_ = hwcap & HWCAP_RV('V'); +# endif #endif // Select code for calculation of dot product based on autodetection. @@ -258,6 +274,10 @@ SIMDDetect::SIMDDetect() { } else if (neon_available_) { // NEON detected. SetDotProduct(DotProductNEON, &IntSimdMatrix::intSimdMatrixNEON); +#endif +#if defined(HAVE_RVV) + } else if (rvv_available_) { + SetDotProduct(DotProductGeneric, &IntSimdMatrix::intSimdMatrixRVV); #endif } diff --git a/src/arch/simddetect.h b/src/arch/simddetect.h index fcb0f53eca..5d4eb33880 100644 --- a/src/arch/simddetect.h +++ b/src/arch/simddetect.h @@ -63,6 +63,10 @@ class SIMDDetect { static inline bool IsNEONAvailable() { return detector.neon_available_; } + // Returns true if RVV is available on this system. + static inline bool IsRVVAvailable() { + return detector.rvv_available_; + } // Update settings after config variable was set. static TESS_API void Update(); @@ -86,6 +90,8 @@ class SIMDDetect { static TESS_API bool sse_available_; // If true, then NEON has been detected. static TESS_API bool neon_available_; + // If true, then RVV has been detected. + static TESS_API bool rvv_available_; }; } // namespace tesseract diff --git a/src/ccmain/applybox.cpp b/src/ccmain/applybox.cpp index e50bda50f6..452da1a4e0 100644 --- a/src/ccmain/applybox.cpp +++ b/src/ccmain/applybox.cpp @@ -26,6 +26,7 @@ #include #include "pageres.h" #include "tesseractclass.h" +#include "tesserrstream.h" // for tesserr #include "unicharset.h" #ifndef DISABLED_LEGACY_ENGINE @@ -652,9 +653,10 @@ void Tesseract::SearchForText(const std::vector *choices, in if (choices_pos + length == choices_length && text_index + 1 == target_text.size()) { // This is a complete match. If the rating is good record a new best. if (applybox_debug > 2) { - tprintf("Complete match, rating = %g, best=%g, seglength=%zu, best=%zu\n", - rating + choice_rating, *best_rating, segmentation->size(), - best_segmentation->size()); + tesserr << "Complete match, rating = " << rating + choice_rating + << ", best=" << *best_rating + << ", seglength=" << segmentation->size() + << ", best=" << best_segmentation->size() << '\n'; } if (best_segmentation->empty() || rating + choice_rating < *best_rating) { *best_segmentation = *segmentation; diff --git a/src/ccmain/control.cpp b/src/ccmain/control.cpp index 30afc47763..454aa94f2a 100644 --- a/src/ccmain/control.cpp +++ b/src/ccmain/control.cpp @@ -41,6 +41,7 @@ #endif #include "sorthelper.h" #include "tesseractclass.h" +#include "tesserrstream.h" // for tesserr #include "tessvars.h" #include "werdit.h" @@ -51,13 +52,14 @@ const char *const kBackUpConfigFile = "tempconfigdata.config"; const double kMinRefitXHeightFraction = 0.5; #endif // ! DISABLED_LEGACY_ENGINE +namespace tesseract { + /** * Make a word from the selected blobs and run Tess on them. * * @param page_res recognise blobs * @param selection_box within this box */ -namespace tesseract { void Tesseract::recog_pseudo_word(PAGE_RES *page_res, TBOX &selection_box) { PAGE_RES_IT *it = make_pseudo_word(page_res, selection_box); @@ -1312,7 +1314,11 @@ void Tesseract::classify_word_and_language(int pass_n, PAGE_RES_IT *pr_it, WordD PointerVector best_words; // Points to the best result. May be word or in lang_words. const WERD_RES *word = word_data->word; - clock_t start_t = clock(); + clock_t total_time = 0; + const bool timing_debug = tessedit_timing_debug; + if (timing_debug) { + total_time = clock(); + } const bool debug = classify_debug_level > 0 || multilang_debug_level > 0; if (debug) { tprintf("%s word with lang %s at:", word->done ? "Already done" : "Processing", @@ -1364,10 +1370,10 @@ void Tesseract::classify_word_and_language(int pass_n, PAGE_RES_IT *pr_it, WordD } else { tprintf("no best words!!\n"); } - clock_t ocr_t = clock(); - if (tessedit_timing_debug) { - tprintf("%s (ocr took %.2f sec)\n", word_data->word->best_choice->unichar_string().c_str(), - static_cast(ocr_t - start_t) / CLOCKS_PER_SEC); + if (timing_debug) { + total_time = clock() - total_time; + tesserr << word_data->word->best_choice->unichar_string() + << " (ocr took " << 1000 * total_time / CLOCKS_PER_SEC << " ms)\n"; } } diff --git a/src/ccmain/ltrresultiterator.cpp b/src/ccmain/ltrresultiterator.cpp index 4ff498fa37..84ed913a2f 100644 --- a/src/ccmain/ltrresultiterator.cpp +++ b/src/ccmain/ltrresultiterator.cpp @@ -99,13 +99,11 @@ float LTRResultIterator::Confidence(PageIteratorLevel level) const { float mean_certainty = 0.0f; int certainty_count = 0; PAGE_RES_IT res_it(*it_); - WERD_CHOICE *best_choice = res_it.word()->best_choice; - ASSERT_HOST(best_choice != nullptr); + WERD_CHOICE *best_choice; switch (level) { case RIL_BLOCK: do { best_choice = res_it.word()->best_choice; - ASSERT_HOST(best_choice != nullptr); mean_certainty += best_choice->certainty(); ++certainty_count; res_it.forward(); @@ -114,7 +112,6 @@ float LTRResultIterator::Confidence(PageIteratorLevel level) const { case RIL_PARA: do { best_choice = res_it.word()->best_choice; - ASSERT_HOST(best_choice != nullptr); mean_certainty += best_choice->certainty(); ++certainty_count; res_it.forward(); @@ -124,19 +121,20 @@ float LTRResultIterator::Confidence(PageIteratorLevel level) const { case RIL_TEXTLINE: do { best_choice = res_it.word()->best_choice; - ASSERT_HOST(best_choice != nullptr); mean_certainty += best_choice->certainty(); ++certainty_count; res_it.forward(); } while (res_it.row() == res_it.prev_row()); break; case RIL_WORD: - mean_certainty += best_choice->certainty(); - ++certainty_count; + best_choice = res_it.word()->best_choice; + mean_certainty = best_choice->certainty(); + certainty_count = 1; break; case RIL_SYMBOL: - mean_certainty += best_choice->certainty(blob_index_); - ++certainty_count; + best_choice = res_it.word()->best_choice; + mean_certainty = best_choice->certainty(blob_index_); + certainty_count = 1; } if (certainty_count > 0) { mean_certainty /= certainty_count; @@ -320,7 +318,6 @@ char *LTRResultIterator::WordNormedUTF8Text() const { std::string ocr_text; WERD_CHOICE *best_choice = it_->word()->best_choice; const UNICHARSET *unicharset = it_->word()->uch_set; - ASSERT_HOST(best_choice != nullptr); for (unsigned i = 0; i < best_choice->length(); ++i) { ocr_text += unicharset->get_normed_unichar(best_choice->unichar_id(i)); } diff --git a/src/ccmain/pagesegmain.cpp b/src/ccmain/pagesegmain.cpp index 5ebcb3f70a..acd6a5aaa5 100644 --- a/src/ccmain/pagesegmain.cpp +++ b/src/ccmain/pagesegmain.cpp @@ -108,8 +108,10 @@ int Tesseract::SegmentPage(const char *input_file, BLOCK_LIST *blocks, Tesseract // If a UNLV zone file can be found, use that instead of segmentation. if (!PSM_COL_FIND_ENABLED(pageseg_mode) && input_file != nullptr && input_file[0] != '\0') { std::string name = input_file; - std::size_t lastdot = name.find_last_of("."); - name = name.substr(0, lastdot); + auto lastdot = name.find_last_of('.'); + if (lastdot != std::string::npos) { + name.resize(lastdot); + } read_unlv_file(name, width, height, blocks); } if (blocks->empty()) { diff --git a/src/ccmain/paragraphs.cpp b/src/ccmain/paragraphs.cpp index ed05b73f42..bf802a61a4 100644 --- a/src/ccmain/paragraphs.cpp +++ b/src/ccmain/paragraphs.cpp @@ -31,6 +31,7 @@ #include "ratngs.h" // for WERD_CHOICE #include "rect.h" // for TBOX #include "statistc.h" // for STATS +#include "tesserrstream.h" // for tesserr #include "tprintf.h" // for tprintf #include "unicharset.h" // for UNICHARSET #include "werd.h" // for WERD, W_REP_CHAR @@ -74,8 +75,8 @@ static bool AcceptableRowArgs(int debug_level, int min_num_rows, const char *fun const std::vector *rows, int row_start, int row_end) { if (row_start < 0 || static_cast(row_end) > rows->size() || row_start > row_end) { - tprintf("Invalid arguments rows[%d, %d) while rows is of size %zu.\n", row_start, row_end, - rows->size()); + tesserr << "Invalid arguments rows[" << row_start << ", " << row_end + << ") while rows is of size " << rows->size() << ".\n"; return false; } if (row_end - row_start < min_num_rows) { @@ -915,10 +916,9 @@ struct GeometricClassifierState { tolerance = InterwordSpace(*r, r_start, r_end); CalculateTabStops(r, r_start, r_end, tolerance, &left_tabs, &right_tabs); if (debug_level >= 3) { - tprintf( - "Geometry: TabStop cluster tolerance = %d; " - "%zu left tabs; %zu right tabs\n", - tolerance, left_tabs.size(), right_tabs.size()); + tesserr << "Geometry: TabStop cluster tolerance = " << tolerance << "; " + << left_tabs.size() << " left tabs; " + << right_tabs.size() << " right tabs\n"; } ltr = (*r)[r_start].ri_->ltr; } diff --git a/src/ccstruct/blamer.cpp b/src/ccstruct/blamer.cpp index 92260054ee..ff7a7c4d22 100644 --- a/src/ccstruct/blamer.cpp +++ b/src/ccstruct/blamer.cpp @@ -92,7 +92,7 @@ void BlamerBundle::SetSymbolTruth(const UNICHARSET &unicharset, const char *char if (id != INVALID_UNICHAR_ID) { std::string normed_uch(unicharset.get_normed_unichar(id)); if (normed_uch.length() > 0) { - symbol_str = normed_uch; + symbol_str = std::move(normed_uch); } } int length = truth_word_.length(); diff --git a/src/ccstruct/blobbox.cpp b/src/ccstruct/blobbox.cpp index 6201a31ce4..7356fdb4fd 100644 --- a/src/ccstruct/blobbox.cpp +++ b/src/ccstruct/blobbox.cpp @@ -883,7 +883,7 @@ void vertical_cblob_projection( // project outlines /********************************************************************** * vertical_coutline_projection * - * Compute the vertical projection of a outline from its outlines + * Compute the vertical projection of an outline from its outlines * and add to the given STATS. **********************************************************************/ diff --git a/src/ccstruct/detlinefit.cpp b/src/ccstruct/detlinefit.cpp index d100aa502c..fbbc9c474d 100644 --- a/src/ccstruct/detlinefit.cpp +++ b/src/ccstruct/detlinefit.cpp @@ -17,9 +17,9 @@ /////////////////////////////////////////////////////////////////////// #include "detlinefit.h" -#include "helpers.h" // for IntCastRounded +#include "helpers.h" // for IntCastRounded #include "statistc.h" -#include "tprintf.h" +#include "tesserrstream.h" // for tesserr #include #include // for FLT_MAX @@ -143,13 +143,17 @@ double DetLineFit::ConstrainedFit(const FCOORD &direction, double min_dist, doub std::nth_element(distances_.begin(), distances_.begin() + median_index, distances_.end()); *line_pt = distances_[median_index].data(); if (debug) { - tprintf("Constrained fit to dir %g, %g = %d, %d :%zu distances:\n", direction.x(), direction.y(), - line_pt->x(), line_pt->y(), distances_.size()); + tesserr << "Constrained fit to dir " << direction.x() << ", " + << direction.y() << " = " + << line_pt->x() << ", " << line_pt->y() + << " :" << distances_.size() << " distances:\n"; for (unsigned i = 0; i < distances_.size(); ++i) { - tprintf("%d: %d, %d -> %g\n", i, distances_[i].data().x(), distances_[i].data().y(), - distances_[i].key()); + tesserr << i << ": " + << distances_[i].data().x() << ", " + << distances_[i].data().y() << " -> " + << distances_[i].key() << '\n'; } - tprintf("Result = %zu\n", median_index); + tesserr << "Result = " << median_index << '\n'; } // Center distances on the fitted point. double dist_origin = direction * *line_pt; diff --git a/src/ccstruct/imagedata.cpp b/src/ccstruct/imagedata.cpp index 10de5fbe3a..03c5049c42 100644 --- a/src/ccstruct/imagedata.cpp +++ b/src/ccstruct/imagedata.cpp @@ -27,6 +27,7 @@ #include "rect.h" // for TBOX #include "scrollview.h" // for ScrollView, ScrollView::CYAN, ScrollView::NONE #include "tprintf.h" // for tprintf +#include "tesserrstream.h" // for tesserr #include "helpers.h" // for IntCastRounded, TRand, ClipToRange, Modulo #include "serialis.h" // for TFile @@ -618,9 +619,10 @@ bool DocumentData::ReCachePages() { pages_.clear(); } else if (loaded_pages > 1) { // Avoid lots of messages for training with single line images. - tprintf("Loaded %zu/%d lines (%d-%zu) of document %s\n", pages_.size(), - loaded_pages, pages_offset_ + 1, pages_offset_ + pages_.size(), - document_name_.c_str()); + tesserr << "Loaded " << pages_.size() << '/' << loaded_pages << " lines (" + << pages_offset_ + 1 << '-' + << pages_offset_ + pages_.size() << ") of document " + << document_name_ << '\n'; } set_total_pages(loaded_pages); return !pages_.empty(); diff --git a/src/ccstruct/rect.h b/src/ccstruct/rect.h index ab4f57beb5..b6bb7fbf53 100644 --- a/src/ccstruct/rect.h +++ b/src/ccstruct/rect.h @@ -123,7 +123,7 @@ class TESS_API TBOX { // bounding box } } - TDimension width() const { // how high is it? + TDimension width() const { // how wide is it? if (!null_box()) { return top_right.x() - bot_left.x(); } else { diff --git a/src/ccutil/ambigs.cpp b/src/ccutil/ambigs.cpp index 34d68968f8..5aafedb187 100644 --- a/src/ccutil/ambigs.cpp +++ b/src/ccutil/ambigs.cpp @@ -39,12 +39,9 @@ static const char kIllegalUnicharMsg[] = "Illegal unichar %s in ambiguity specif // UNICHAR_LEN * (MAX_AMBIG_SIZE + 1) for each part of the ambig const int kMaxAmbigStringSize = UNICHAR_LEN * (MAX_AMBIG_SIZE + 1); -AmbigSpec::AmbigSpec() { +AmbigSpec::AmbigSpec() : correct_ngram_id(INVALID_UNICHAR_ID), type(NOT_AMBIG), wrong_ngram_size(0) { wrong_ngram[0] = INVALID_UNICHAR_ID; correct_fragments[0] = INVALID_UNICHAR_ID; - correct_ngram_id = INVALID_UNICHAR_ID; - type = NOT_AMBIG; - wrong_ngram_size = 0; } // Initializes the ambigs by adding a nullptr pointer to each table. diff --git a/src/ccutil/ccutil.cpp b/src/ccutil/ccutil.cpp index 7cf57f2ee9..930aa2636e 100644 --- a/src/ccutil/ccutil.cpp +++ b/src/ccutil/ccutil.cpp @@ -10,14 +10,12 @@ // See the License for the specific language governing permissions and // limitations under the License. -#if defined(_WIN32) -# include // for _access -#endif - #include "ccutil.h" +#include "tprintf.h" // for tprintf #include -#include // for std::strrchr +#include // for std::strrchrA +#include // for std::filesystem namespace tesseract { @@ -48,6 +46,12 @@ void CCUtil::main_setup(const std::string &argv0, const std::string &basename) { const char *tessdata_prefix = getenv("TESSDATA_PREFIX"); + // Ignore TESSDATA_PREFIX if there is no matching filesystem entry. + if (tessdata_prefix != nullptr && !std::filesystem::exists(tessdata_prefix)) { + tprintf("Warning: TESSDATA_PREFIX %s does not exist, ignore it\n", tessdata_prefix); + tessdata_prefix = nullptr; + } + if (!argv0.empty()) { /* Use tessdata prefix from the command line. */ datadir = argv0; @@ -55,7 +59,7 @@ void CCUtil::main_setup(const std::string &argv0, const std::string &basename) { /* Use tessdata prefix from the environment. */ datadir = tessdata_prefix; #if defined(_WIN32) - } else if (datadir.empty() || _access(datadir.c_str(), 0) != 0) { + } else if (datadir.empty() || !std::filesystem::exists(datadir)) { /* Look for tessdata in directory of executable. */ char path[_MAX_PATH]; DWORD length = GetModuleFileName(nullptr, path, sizeof(path)); @@ -65,7 +69,7 @@ void CCUtil::main_setup(const std::string &argv0, const std::string &basename) { *separator = '\0'; std::string subdir = path; subdir += "/tessdata"; - if (_access(subdir.c_str(), 0) == 0) { + if (std::filesystem::exists(subdir)) { datadir = subdir; } } diff --git a/src/ccutil/tesserrstream.h b/src/ccutil/tesserrstream.h new file mode 100644 index 0000000000..80da99da0e --- /dev/null +++ b/src/ccutil/tesserrstream.h @@ -0,0 +1,68 @@ +// File: tesserrstream.h +// Description: C++ stream which enhances tprintf +// Author: Stefan Weil +// +// (C) Copyright 2024 +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TESSERACT_CCUTIL_TESSERRSTREAM_H +#define TESSERACT_CCUTIL_TESSERRSTREAM_H + +#include "tprintf.h" +#include // for TESS_API + +#include // for std::ostream + +namespace tesseract { + +class TessStreamBuf : public std::streambuf { +public: + TessStreamBuf() = default; + +protected: + virtual int_type overflow(int_type c) override { + if (c != EOF) { + if (debugfp == nullptr) { + debugfp = get_debugfp(); + } + if (fputc(c, debugfp) == EOF) { + return EOF; + } + } + return c; + } + + virtual std::streamsize xsputn(const char* s, std::streamsize n) override { + if (debugfp == nullptr) { + debugfp = get_debugfp(); + } + return fwrite(s, 1, n, debugfp); + } + +private: + FILE *debugfp = nullptr; +}; + +class TessErrStream : public std::ostream { +private: + TessStreamBuf buf; + +public: + TessErrStream() : std::ostream(nullptr), buf() { + rdbuf(&buf); + } +}; + +extern TESS_API TessErrStream tesserr; + +} // namespace tesseract + +#endif // TESSERACT_CCUTIL_TESSERRSTREAM_H diff --git a/src/ccutil/tprintf.cpp b/src/ccutil/tprintf.cpp index 3c5f7e2fca..2739b6cec4 100644 --- a/src/ccutil/tprintf.cpp +++ b/src/ccutil/tprintf.cpp @@ -21,6 +21,7 @@ # include "config_auto.h" #endif +#include "tesserrstream.h" #include "tprintf.h" #include "params.h" @@ -31,45 +32,53 @@ namespace tesseract { -#define MAX_MSG_LEN 2048 - INT_VAR(log_level, INT_MAX, "Logging level"); static STRING_VAR(debug_file, "", "File to send tprintf output to"); -// Trace printf -void tprintf(const char *format, ...) { - const char *debug_file_name = debug_file.c_str(); - static FILE *debugfp = nullptr; // debug file - - if (debug_file_name == nullptr) { - // This should not happen. - return; - } +// File for debug output. +FILE *debugfp; +// Set output for log messages. +// The output is written to stderr if debug_file is empty. +// Otherwise it is written to debug_file. +// It is possible to switch between stderr and debug_file output: +// tprintf("write to configured output\n"); +// debug_file = ""; +// tprintf("write to stderr\n"); +// debug_file = "/tmp/log"; +// tprintf("write to /tmp/log\n"); +// debug_file = ""; +// tprintf("write to stderr\n"); +FILE *get_debugfp() { + if (debug_file.empty()) { + // Write to stderr. + if (debugfp != stderr && debugfp != nullptr) { + fclose(debugfp); + } + debugfp = stderr; + } else if (debugfp == stderr || debugfp == nullptr) { + // Write to file. #ifdef _WIN32 - // Replace /dev/null by nul for Windows. - if (strcmp(debug_file_name, "/dev/null") == 0) { - debug_file_name = "nul"; - debug_file.set_value(debug_file_name); - } + if (debug_file == "/dev/null") { + // Replace /dev/null by nul for Windows. + debug_file = "nul"; + } #endif - - if (debugfp == nullptr && debug_file_name[0] != '\0') { - debugfp = fopen(debug_file_name, "wb"); - } else if (debugfp != nullptr && debug_file_name[0] == '\0') { - fclose(debugfp); - debugfp = nullptr; + debugfp = fopen(debug_file.c_str(), "wb"); } + return debugfp; +} +// Trace printf. +void tprintf(const char *format, ...) { + FILE *f = get_debugfp(); va_list args; // variable args va_start(args, format); // variable list - if (debugfp != nullptr) { - vfprintf(debugfp, format, args); - } else { - vfprintf(stderr, format, args); - } + vfprintf(f, format, args); va_end(args); } +TessErrStream tesserr; + } // namespace tesseract diff --git a/src/ccutil/tprintf.h b/src/ccutil/tprintf.h index bb995cd408..574cbbb708 100644 --- a/src/ccutil/tprintf.h +++ b/src/ccutil/tprintf.h @@ -19,7 +19,7 @@ #ifndef TESSERACT_CCUTIL_TPRINTF_H #define TESSERACT_CCUTIL_TPRINTF_H -#include "params.h" // for BOOL_VAR_H +#include "params.h" // for INT_VAR_H #include // for TESS_API namespace tesseract { @@ -36,6 +36,9 @@ extern TESS_API void tprintf( // Trace printf const char *format, ...) // Message __attribute__((format(printf, 1, 2))); +// Get file for debug output. +FILE *get_debugfp(); + } // namespace tesseract #undef __attribute__ diff --git a/src/ccutil/unicharset.cpp b/src/ccutil/unicharset.cpp index c0aea1f3e6..b29ec3b7fe 100644 --- a/src/ccutil/unicharset.cpp +++ b/src/ccutil/unicharset.cpp @@ -824,7 +824,7 @@ bool UNICHARSET::load_via_fgets( stream >> std::setw(255) >> unichar >> std::hex >> properties >> std::dec; // stream.flags(std::ios::dec); if (stream.fail()) { - fprintf(stderr, "%s:%u failed\n", __FILE__, __LINE__); + fprintf(stderr, "%s:%d failed\n", __FILE__, __LINE__); return false; } auto position = stream.tellg(); diff --git a/src/ccutil/unicity_table.h b/src/ccutil/unicity_table.h index 54f740a3b3..905d34cce8 100644 --- a/src/ccutil/unicity_table.h +++ b/src/ccutil/unicity_table.h @@ -80,7 +80,7 @@ class UnicityTable { int push_back(T object) { auto idx = get_index(object); if (idx == -1) { - idx = table_.push_back(object); + idx = table_.push_back(std::move(object)); } return idx; } diff --git a/src/classify/adaptive.cpp b/src/classify/adaptive.cpp index 3139cce9be..1b7bf5fb0a 100644 --- a/src/classify/adaptive.cpp +++ b/src/classify/adaptive.cpp @@ -59,13 +59,12 @@ PERM_CONFIG_STRUCT::~PERM_CONFIG_STRUCT() { delete[] Ambigs; } -ADAPT_CLASS_STRUCT::ADAPT_CLASS_STRUCT() { - NumPermConfigs = 0; - MaxNumTimesSeen = 0; - TempProtos = NIL_LIST; - - PermProtos = NewBitVector(MAX_NUM_PROTOS); - PermConfigs = NewBitVector(MAX_NUM_CONFIGS); +ADAPT_CLASS_STRUCT::ADAPT_CLASS_STRUCT() : + NumPermConfigs(0), + MaxNumTimesSeen(0), + PermProtos(NewBitVector(MAX_NUM_PROTOS)), + PermConfigs(NewBitVector(MAX_NUM_CONFIGS)), + TempProtos(NIL_LIST) { zero_all_bits(PermProtos, WordsInVectorOfSize(MAX_NUM_PROTOS)); zero_all_bits(PermConfigs, WordsInVectorOfSize(MAX_NUM_CONFIGS)); @@ -124,16 +123,13 @@ int Classify::GetFontinfoId(ADAPT_CLASS_STRUCT *Class, uint8_t ConfigId) { /// /// @param MaxProtoId max id of any proto in new config /// @param FontinfoId font information from pre-trained templates -TEMP_CONFIG_STRUCT::TEMP_CONFIG_STRUCT(int maxProtoId, int fontinfoId) { - int NumProtos = maxProtoId + 1; - - Protos = NewBitVector(NumProtos); - - NumTimesSeen = 1; - MaxProtoId = maxProtoId; - ProtoVectorSize = WordsInVectorOfSize(NumProtos); +TEMP_CONFIG_STRUCT::TEMP_CONFIG_STRUCT(int maxProtoId, int fontinfoId) : + NumTimesSeen(1), + ProtoVectorSize(WordsInVectorOfSize(maxProtoId + 1)), + MaxProtoId(maxProtoId), + Protos(NewBitVector(maxProtoId + 1)), + FontinfoId(fontinfoId) { zero_all_bits(Protos, ProtoVectorSize); - FontinfoId = fontinfoId; } TEMP_CONFIG_STRUCT::~TEMP_CONFIG_STRUCT() { diff --git a/src/classify/adaptmatch.cpp b/src/classify/adaptmatch.cpp index 5c02c6f363..273259f918 100644 --- a/src/classify/adaptmatch.cpp +++ b/src/classify/adaptmatch.cpp @@ -644,12 +644,12 @@ void Classify::StartBackupAdaptiveClassifier() { * - #EnableLearning * set to true by this routine */ -void Classify::SettupPass1() { +void Classify::SetupPass1() { EnableLearning = classify_enable_learning; - getDict().SettupStopperPass1(); + getDict().SetupStopperPass1(); -} /* SettupPass1 */ +} /* SetupPass1 */ /*---------------------------------------------------------------------------*/ /** @@ -660,11 +660,11 @@ void Classify::SettupPass1() { * Globals: * - #EnableLearning set to false by this routine */ -void Classify::SettupPass2() { +void Classify::SetupPass2() { EnableLearning = false; - getDict().SettupStopperPass2(); + getDict().SetupStopperPass2(); -} /* SettupPass2 */ +} /* SetupPass2 */ /*---------------------------------------------------------------------------*/ /** diff --git a/src/classify/classify.h b/src/classify/classify.h index 2225e5feab..e42d5674e3 100644 --- a/src/classify/classify.h +++ b/src/classify/classify.h @@ -243,8 +243,8 @@ class TESS_API Classify : public CCStruct { void DisplayAdaptedChar(TBLOB *blob, INT_CLASS_STRUCT *int_class); bool AdaptableWord(WERD_RES *word); void EndAdaptiveClassifier(); - void SettupPass1(); - void SettupPass2(); + void SetupPass1(); + void SetupPass2(); void AdaptiveClassifier(TBLOB *Blob, BLOB_CHOICE_LIST *Choices); void ClassifyAsNoise(ADAPT_RESULTS *Results); void ResetAdaptiveClassifierInternal(); diff --git a/src/classify/clusttool.cpp b/src/classify/clusttool.cpp index 543378d6b3..de8b1f5cb8 100644 --- a/src/classify/clusttool.cpp +++ b/src/classify/clusttool.cpp @@ -277,7 +277,7 @@ void WritePrototype(FILE *File, uint16_t N, PROTOTYPE *Proto) { fprintf(File, "insignificant "); } WriteProtoStyle(File, static_cast(Proto->Style)); - fprintf(File, "%6d\n\t", Proto->NumSamples); + fprintf(File, "%6u\n\t", Proto->NumSamples); WriteNFloats(File, N, &Proto->Mean[0]); fprintf(File, "\t"); diff --git a/src/classify/intproto.cpp b/src/classify/intproto.cpp index 854026bcc9..f5f08cf15a 100644 --- a/src/classify/intproto.cpp +++ b/src/classify/intproto.cpp @@ -511,7 +511,7 @@ INT_TEMPLATES_STRUCT *Classify::CreateIntTemplates(CLASSES FloatProtos, for (unsigned i = 0; i < fs_size; ++i) { fs.push_back(FClass->font_set[i]); } - IClass->font_set_id = this->fontset_table_.push_back(fs); + IClass->font_set_id = this->fontset_table_.push_back(std::move(fs)); AddIntClass(IntTemplates, ClassId, IClass); for (ProtoId = 0; ProtoId < FClass->NumProtos; ProtoId++) { diff --git a/src/classify/shapetable.h b/src/classify/shapetable.h index 0d2cd41c1b..8497b7864a 100644 --- a/src/classify/shapetable.h +++ b/src/classify/shapetable.h @@ -26,6 +26,7 @@ #include "fontinfo.h" #include "genericheap.h" #include "intmatcher.h" +#include "tesserrstream.h" // for tesserr namespace tesseract { @@ -41,10 +42,10 @@ struct UnicharRating { // Print debug info. void Print() const { - tprintf( - "Unichar-id=%d, rating=%g, adapted=%d, config=%d, misses=%u," - " %zu fonts\n", - unichar_id, static_cast(rating), adapted, config, feature_misses, fonts.size()); + tesserr << "Unichar-id=" << unichar_id << ", rating=" << rating + << ", adapted=" << adapted << ", config=" << config + << ", misses=" << feature_misses << ", " + << fonts.size() << " fonts\n"; } // Helper function to get the index of the first result with the required diff --git a/src/dict/dict.cpp b/src/dict/dict.cpp index be3cc1372f..2dc5dcb733 100644 --- a/src/dict/dict.cpp +++ b/src/dict/dict.cpp @@ -18,6 +18,7 @@ #include "dict.h" +#include "tesserrstream.h" // for tesserr #include "tprintf.h" #include @@ -410,10 +411,10 @@ int Dict::def_letter_is_okay(void *void_dawg_args, const UNICHARSET &unicharset, ASSERT_HOST(unicharset.contains_unichar_id(unichar_id)); if (dawg_debug_level >= 3) { - tprintf( - "def_letter_is_okay: current unichar=%s word_end=%d" - " num active dawgs=%zu\n", - getUnicharset().debug_str(unichar_id).c_str(), word_end, dawg_args->active_dawgs->size()); + tesserr << "def_letter_is_okay: current unichar=" + << getUnicharset().debug_str(unichar_id) + << " word_end=" << word_end + << " num active dawgs=" << dawg_args->active_dawgs->size() << '\n'; } // Do not accept words that contain kPatternUnicharID. diff --git a/src/dict/dict.h b/src/dict/dict.h index 136308153c..3290dd0bb0 100644 --- a/src/dict/dict.h +++ b/src/dict/dict.h @@ -267,9 +267,9 @@ class TESS_API Dict { /// Prints the current choices for this word to stdout. void DebugWordChoices(); /// Sets up stopper variables in preparation for the first pass. - void SettupStopperPass1(); + void SetupStopperPass1(); /// Sets up stopper variables in preparation for the second pass. - void SettupStopperPass2(); + void SetupStopperPass2(); /* context.cpp *************************************************************/ /// Check a string to see if it matches a set of lexical rules. int case_ok(const WERD_CHOICE &word) const; diff --git a/src/dict/stopper.cpp b/src/dict/stopper.cpp index 4c33fa89ce..a1885daf37 100644 --- a/src/dict/stopper.cpp +++ b/src/dict/stopper.cpp @@ -359,11 +359,11 @@ void Dict::EndDangerousAmbigs() {} #endif // !defined(DISABLED_LEGACY_ENGINE) -void Dict::SettupStopperPass1() { +void Dict::SetupStopperPass1() { reject_offset_ = 0.0; } -void Dict::SettupStopperPass2() { +void Dict::SetupStopperPass2() { reject_offset_ = stopper_phase2_certainty_rejection_offset; } diff --git a/src/lstm/network.cpp b/src/lstm/network.cpp index 6e13e4963a..cfddbfd43a 100644 --- a/src/lstm/network.cpp +++ b/src/lstm/network.cpp @@ -38,9 +38,6 @@ #include "scrollview.h" #include "series.h" #include "statistc.h" -#ifdef INCLUDE_TENSORFLOW -# include "tfnetwork.h" -#endif #include "tprintf.h" namespace tesseract { @@ -287,11 +284,7 @@ Network *Network::CreateFromFile(TFile *fp) { network = new Series(name); break; case NT_TENSORFLOW: -#ifdef INCLUDE_TENSORFLOW - network = new TFNetwork(name); -#else - tprintf("TensorFlow not compiled in! -DINCLUDE_TENSORFLOW\n"); -#endif + tprintf("Unsupported TensorFlow model\n"); break; // All variants of FullyConnected. case NT_SOFTMAX: diff --git a/src/lstm/series.cpp b/src/lstm/series.cpp index 990698613c..25b0d2743f 100644 --- a/src/lstm/series.cpp +++ b/src/lstm/series.cpp @@ -20,6 +20,7 @@ #include "fullyconnected.h" #include "networkscratch.h" #include "scrollview.h" +#include "tesserrstream.h" // for tesserr #include "tprintf.h" namespace tesseract { @@ -164,7 +165,8 @@ void Series::SplitAt(unsigned last_start, Series **start, Series **end) { *start = nullptr; *end = nullptr; if (last_start >= stack_.size()) { - tprintf("Invalid split index %u must be in range [0,%zu]!\n", last_start, stack_.size() - 1); + tesserr << "Invalid split index " << last_start + << " must be in range [0," << stack_.size() - 1 << "]!\n"; return; } auto *master_series = new Series("MasterSeries"); diff --git a/src/lstm/tfnetwork.cpp b/src/lstm/tfnetwork.cpp deleted file mode 100644 index d7b1441eec..0000000000 --- a/src/lstm/tfnetwork.cpp +++ /dev/null @@ -1,143 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: tfnetwork.cpp -// Description: Encapsulation of an entire tensorflow graph as a -// Tesseract Network. -// Author: Ray Smith -// -// (C) Copyright 2016, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -/////////////////////////////////////////////////////////////////////// -#ifdef INCLUDE_TENSORFLOW - -# include "tfnetwork.h" - -# include -# include "input.h" -# include "networkscratch.h" - -using tensorflow::Status; -using tensorflow::Tensor; -using tensorflow::TensorShape; - -namespace tesseract { - -TFNetwork::TFNetwork(const std::string &name) : Network(NT_TENSORFLOW, name, 0, 0) {} - -int TFNetwork::InitFromProtoStr(const std::string &proto_str) { - if (!model_proto_.ParseFromString(proto_str)) - return 0; - return InitFromProto(); -} - -// Writes to the given file. Returns false in case of error. -// Should be overridden by subclasses, but called by their Serialize. -bool TFNetwork::Serialize(TFile *fp) const { - if (!Network::Serialize(fp)) - return false; - std::string proto_str; - model_proto_.SerializeToString(&proto_str); - // TODO: optimize and avoid copy from proto_str to data. - std::vector data(proto_str.size()); - memcpy(&data[0], proto_str.data(), proto_str.size()); - return fp->Serialize(data); -} - -// Reads from the given file. Returns false in case of error. -// Should be overridden by subclasses, but NOT called by their DeSerialize. -bool TFNetwork::DeSerialize(TFile *fp) { - std::vector data; - if (!fp->DeSerialize(data)) - return false; - if (!model_proto_.ParseFromArray(&data[0], data.size())) { - return false; - } - return InitFromProto(); -} - -// Runs forward propagation of activations on the input line. -// See Network for a detailed discussion of the arguments. -void TFNetwork::Forward(bool debug, const NetworkIO &input, const TransposedArray *input_transpose, - NetworkScratch *scratch, NetworkIO *output) { - std::vector> tf_inputs; - int depth = input_shape_.depth(); - ASSERT_HOST(depth == input.NumFeatures()); - // TODO(rays) Allow batching. For now batch_size = 1. - const StrideMap &stride_map = input.stride_map(); - // TF requires a tensor of shape float[batch, height, width, depth]. - TensorShape shape{1, stride_map.Size(FD_HEIGHT), stride_map.Size(FD_WIDTH), depth}; - Tensor input_tensor(tensorflow::DT_FLOAT, shape); - // The flat() member gives a 1d array, with a data() member to get the data. - auto eigen_tensor = input_tensor.flat(); - memcpy(eigen_tensor.data(), input.f(0), input.Width() * depth * sizeof(input.f(0)[0])); - // Add the tensor to the vector of inputs. - tf_inputs.emplace_back(model_proto_.image_input(), input_tensor); - - // Provide tensors giving the width and/or height of the image if they are - // required. Some tf ops require a separate tensor with knowledge of the - // size of the input as they cannot obtain it from the input tensor. This is - // usually true in the case of ops that process a batch of variable-sized - // objects. - if (!model_proto_.image_widths().empty()) { - TensorShape size_shape{1}; - Tensor width_tensor(tensorflow::DT_INT64, size_shape); - auto eigen_wtensor = width_tensor.flat(); - *eigen_wtensor.data() = stride_map.Size(FD_WIDTH); - tf_inputs.emplace_back(model_proto_.image_widths(), width_tensor); - } - if (!model_proto_.image_heights().empty()) { - TensorShape size_shape{1}; - Tensor height_tensor(tensorflow::DT_INT64, size_shape); - auto eigen_htensor = height_tensor.flat(); - *eigen_htensor.data() = stride_map.Size(FD_HEIGHT); - tf_inputs.emplace_back(model_proto_.image_heights(), height_tensor); - } - std::vector target_layers = {model_proto_.output_layer()}; - std::vector outputs; - Status s = session_->Run(tf_inputs, target_layers, {}, &outputs); - if (!s.ok()) - tprintf("session->Run failed:%s\n", s.error_message().c_str()); - ASSERT_HOST(s.ok()); - ASSERT_HOST(outputs.size() == 1); - const Tensor &output_tensor = outputs[0]; - // Check the dimensions of the output. - ASSERT_HOST(output_tensor.shape().dims() == 3); - int output_batch = output_tensor.shape().dim_size(0); - int output_steps = output_tensor.shape().dim_size(1); - int output_depth = output_tensor.shape().dim_size(2); - ASSERT_HOST(output_batch == 1); - ASSERT_HOST(output_depth == output_shape_.depth()); - output->Resize2d(false, output_steps, output_depth); - auto eigen_output = output_tensor.flat(); - memcpy(output->f(0), eigen_output.data(), output_steps * output_depth * sizeof(output->f(0)[0])); -} - -int TFNetwork::InitFromProto() { - spec_ = model_proto_.spec(); - input_shape_.SetShape(model_proto_.batch_size(), std::max(0, model_proto_.y_size()), - std::max(0, model_proto_.x_size()), model_proto_.depth()); - output_shape_.SetShape(model_proto_.batch_size(), 1, 0, model_proto_.num_classes()); - output_shape_.set_loss_type(model_proto_.using_ctc() ? LT_CTC : LT_SOFTMAX); - ni_ = input_shape_.height(); - no_ = output_shape_.depth(); - // Initialize the session_ with the graph. Since we can't get the graph - // back from the session_, we have to keep the proto as well - tensorflow::SessionOptions options; - session_.reset(NewSession(options)); - Status s = session_->Create(model_proto_.graph()); - if (s.ok()) - return model_proto_.global_step(); - tprintf("Session_->Create returned '%s'\n", s.error_message().c_str()); - return 0; -} - -} // namespace tesseract - -#endif // ifdef INCLUDE_TENSORFLOW diff --git a/src/lstm/tfnetwork.h b/src/lstm/tfnetwork.h deleted file mode 100644 index 7fbd6042cd..0000000000 --- a/src/lstm/tfnetwork.h +++ /dev/null @@ -1,106 +0,0 @@ -/////////////////////////////////////////////////////////////////////// -// File: tfnetwork.h -// Description: Encapsulation of an entire tensorflow graph as a -// Tesseract Network. -// Author: Ray Smith -// -// (C) Copyright 2016, Google Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -/////////////////////////////////////////////////////////////////////// - -#ifndef TESSERACT_LSTM_TFNETWORK_H_ -#define TESSERACT_LSTM_TFNETWORK_H_ - -#ifdef INCLUDE_TENSORFLOW - -# include -# include - -# include "network.h" -# include "static_shape.h" -# include "tensorflow/core/framework/graph.pb.h" -# include "tensorflow/core/public/session.h" -# include "tfnetwork.pb.h" - -namespace tesseract { - -class TFNetwork : public Network { -public: - explicit TFNetwork(const std::string &name); - virtual ~TFNetwork() = default; - - // Returns the required shape input to the network. - StaticShape InputShape() const override { - return input_shape_; - } - // Returns the shape output from the network given an input shape (which may - // be partially unknown ie zero). - StaticShape OutputShape(const StaticShape &input_shape) const override { - return output_shape_; - } - - std::string spec() const override { - return spec_; - } - - // Deserializes *this from a serialized TFNetwork proto. Returns 0 if failed, - // otherwise the global step of the serialized graph. - int InitFromProtoStr(const std::string &proto_str); - // The number of classes in this network should be equal to those in the - // recoder_ in LSTMRecognizer. - int num_classes() const { - return output_shape_.depth(); - } - - // Writes to the given file. Returns false in case of error. - // Should be overridden by subclasses, but called by their Serialize. - bool Serialize(TFile *fp) const override; - // Reads from the given file. Returns false in case of error. - // Should be overridden by subclasses, but NOT called by their DeSerialize. - bool DeSerialize(TFile *fp) override; - - // Runs forward propagation of activations on the input line. - // See Network for a detailed discussion of the arguments. - void Forward(bool debug, const NetworkIO &input, const TransposedArray *input_transpose, - NetworkScratch *scratch, NetworkIO *output) override; - -private: - // Runs backward propagation of errors on the deltas line. - // See Network for a detailed discussion of the arguments. - bool Backward(bool debug, const NetworkIO &fwd_deltas, NetworkScratch *scratch, - NetworkIO *back_deltas) override { - tprintf("Must override Network::Backward for type %d\n", type_); - return false; - } - - void DebugWeights() override { - tprintf("Must override Network::DebugWeights for type %d\n", type_); - } - - int InitFromProto(); - - // The original network definition for reference. - std::string spec_; - // Input tensor parameters. - StaticShape input_shape_; - // Output tensor parameters. - StaticShape output_shape_; - // The tensor flow graph is contained in here. - std::unique_ptr session_; - // The serialized graph is also contained in here. - TFNetworkModel model_proto_; -}; - -} // namespace tesseract. - -#endif // ifdef INCLUDE_TENSORFLOW - -#endif // TESSERACT_TENSORFLOW_TFNETWORK_H_ diff --git a/src/lstm/tfnetwork.pb.cc b/src/lstm/tfnetwork.pb.cc deleted file mode 100644 index 8c2537cecb..0000000000 --- a/src/lstm/tfnetwork.pb.cc +++ /dev/null @@ -1,929 +0,0 @@ -// Generated by the protocol buffer compiler. DO NOT EDIT! -// source: tfnetwork.proto - -#include "tfnetwork.pb.h" - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -// This is a temporary google only hack -#ifdef GOOGLE_PROTOBUF_ENFORCE_UNIQUENESS -# include "third_party/protobuf/version.h" -#endif -// @@protoc_insertion_point(includes) - -namespace protobuf_tensorflow_2fcore_2fframework_2fgraph_2eproto { -extern PROTOBUF_INTERNAL_EXPORT_protobuf_tensorflow_2fcore_2fframework_2fgraph_2eproto ::google:: - protobuf::internal::SCCInfo<3> - scc_info_GraphDef; -} // namespace protobuf_tensorflow_2fcore_2fframework_2fgraph_2eproto -namespace tesseract { -class TFNetworkModelDefaultTypeInternal { -public: - ::google::protobuf::internal::ExplicitlyConstructed _instance; -} _TFNetworkModel_default_instance_; -} // namespace tesseract -namespace protobuf_tfnetwork_2eproto { -static void InitDefaultsTFNetworkModel() { - GOOGLE_PROTOBUF_VERIFY_VERSION; - - { - void *ptr = &::tesseract::_TFNetworkModel_default_instance_; - new (ptr)::tesseract::TFNetworkModel(); - ::google::protobuf::internal::OnShutdownDestroyMessage(ptr); - } - ::tesseract::TFNetworkModel::InitAsDefaultInstance(); -} - -::google::protobuf::internal::SCCInfo<1> scc_info_TFNetworkModel = { - {ATOMIC_VAR_INIT(::google::protobuf::internal::SCCInfoBase::kUninitialized), 1, - InitDefaultsTFNetworkModel}, - { - &protobuf_tensorflow_2fcore_2fframework_2fgraph_2eproto::scc_info_GraphDef.base, - }}; - -void InitDefaults() { - ::google::protobuf::internal::InitSCC(&scc_info_TFNetworkModel.base); -} - -::google::protobuf::Metadata file_level_metadata[1]; - -const ::google::protobuf::uint32 TableStruct::offsets[] GOOGLE_PROTOBUF_ATTRIBUTE_SECTION_VARIABLE( - protodesc_cold) = { - ~0u, // no _has_bits_ - GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(::tesseract::TFNetworkModel, - _internal_metadata_), - ~0u, // no _extensions_ - ~0u, // no _oneof_case_ - ~0u, // no _weak_field_map_ - GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(::tesseract::TFNetworkModel, graph_), - GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(::tesseract::TFNetworkModel, global_step_), - GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(::tesseract::TFNetworkModel, spec_), - GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(::tesseract::TFNetworkModel, depth_), - GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(::tesseract::TFNetworkModel, x_size_), - GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(::tesseract::TFNetworkModel, y_size_), - GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(::tesseract::TFNetworkModel, batch_size_), - GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(::tesseract::TFNetworkModel, num_classes_), - GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(::tesseract::TFNetworkModel, using_ctc_), - GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(::tesseract::TFNetworkModel, image_input_), - GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(::tesseract::TFNetworkModel, image_widths_), - GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(::tesseract::TFNetworkModel, image_heights_), - GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(::tesseract::TFNetworkModel, output_layer_), -}; -static const ::google::protobuf::internal::MigrationSchema - schemas[] GOOGLE_PROTOBUF_ATTRIBUTE_SECTION_VARIABLE(protodesc_cold) = { - {0, -1, sizeof(::tesseract::TFNetworkModel)}, -}; - -static ::google::protobuf::Message const *const file_default_instances[] = { - reinterpret_cast( - &::tesseract::_TFNetworkModel_default_instance_), -}; - -void protobuf_AssignDescriptors() { - AddDescriptors(); - AssignDescriptors("tfnetwork.proto", schemas, file_default_instances, TableStruct::offsets, - file_level_metadata, NULL, NULL); -} - -void protobuf_AssignDescriptorsOnce() { - static ::google::protobuf::internal::once_flag once; - ::google::protobuf::internal::call_once(once, protobuf_AssignDescriptors); -} - -void protobuf_RegisterTypes(const ::std::string &) GOOGLE_PROTOBUF_ATTRIBUTE_COLD; -void protobuf_RegisterTypes(const ::std::string &) { - protobuf_AssignDescriptorsOnce(); - ::google::protobuf::internal::RegisterAllTypes(file_level_metadata, 1); -} - -void AddDescriptorsImpl() { - InitDefaults(); - static const char descriptor[] GOOGLE_PROTOBUF_ATTRIBUTE_SECTION_VARIABLE(protodesc_cold) = { - "\n\017tfnetwork.proto\022\ttesseract\032%tensorflow" - "/core/framework/graph.proto\"\233\002\n\016TFNetwor" - "kModel\022#\n\005graph\030\001 \001(\0132\024.tensorflow.Graph" - "Def\022\023\n\013global_step\030\002 \001(\003\022\014\n\004spec\030\003 \001(\t\022\r" - "\n\005depth\030\004 \001(\005\022\016\n\006x_size\030\005 \001(\005\022\016\n\006y_size\030" - "\006 \001(\005\022\022\n\nbatch_size\030\010 \001(\005\022\023\n\013num_classes" - "\030\t \001(\005\022\021\n\tusing_ctc\030\n \001(\010\022\023\n\013image_input" - "\030\013 \001(\t\022\024\n\014image_widths\030\014 \001(\t\022\025\n\rimage_he" - "ights\030\r \001(\t\022\024\n\014output_layer\030\016 \001(\tb\006proto" - "3"}; - ::google::protobuf::DescriptorPool::InternalAddGeneratedFile(descriptor, 361); - ::google::protobuf::MessageFactory::InternalRegisterGeneratedFile("tfnetwork.proto", - &protobuf_RegisterTypes); - ::protobuf_tensorflow_2fcore_2fframework_2fgraph_2eproto::AddDescriptors(); -} - -void AddDescriptors() { - static ::google::protobuf::internal::once_flag once; - ::google::protobuf::internal::call_once(once, AddDescriptorsImpl); -} -// Force AddDescriptors() to be called at dynamic initialization time. -struct StaticDescriptorInitializer { - StaticDescriptorInitializer() { - AddDescriptors(); - } -} static_descriptor_initializer; -} // namespace protobuf_tfnetwork_2eproto -namespace tesseract { - -// =================================================================== - -void TFNetworkModel::InitAsDefaultInstance() { - ::tesseract::_TFNetworkModel_default_instance_._instance.get_mutable()->graph_ = - const_cast<::tensorflow::GraphDef *>(::tensorflow::GraphDef::internal_default_instance()); -} -void TFNetworkModel::clear_graph() { - if (GetArenaNoVirtual() == NULL && graph_ != NULL) { - delete graph_; - } - graph_ = NULL; -} -#if !defined(_MSC_VER) || _MSC_VER >= 1900 -const int TFNetworkModel::kGraphFieldNumber; -const int TFNetworkModel::kGlobalStepFieldNumber; -const int TFNetworkModel::kSpecFieldNumber; -const int TFNetworkModel::kDepthFieldNumber; -const int TFNetworkModel::kXSizeFieldNumber; -const int TFNetworkModel::kYSizeFieldNumber; -const int TFNetworkModel::kBatchSizeFieldNumber; -const int TFNetworkModel::kNumClassesFieldNumber; -const int TFNetworkModel::kUsingCtcFieldNumber; -const int TFNetworkModel::kImageInputFieldNumber; -const int TFNetworkModel::kImageWidthsFieldNumber; -const int TFNetworkModel::kImageHeightsFieldNumber; -const int TFNetworkModel::kOutputLayerFieldNumber; -#endif // !defined(_MSC_VER) || _MSC_VER >= 1900 - -TFNetworkModel::TFNetworkModel() : ::google::protobuf::Message(), _internal_metadata_(NULL) { - ::google::protobuf::internal::InitSCC(&protobuf_tfnetwork_2eproto::scc_info_TFNetworkModel.base); - SharedCtor(); - // @@protoc_insertion_point(constructor:tesseract.TFNetworkModel) -} -TFNetworkModel::TFNetworkModel(const TFNetworkModel &from) - : ::google::protobuf::Message(), _internal_metadata_(NULL) { - _internal_metadata_.MergeFrom(from._internal_metadata_); - spec_.UnsafeSetDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); - if (from.spec().size() > 0) { - spec_.AssignWithDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), - from.spec_); - } - image_input_.UnsafeSetDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); - if (from.image_input().size() > 0) { - image_input_.AssignWithDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), - from.image_input_); - } - image_widths_.UnsafeSetDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); - if (from.image_widths().size() > 0) { - image_widths_.AssignWithDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), - from.image_widths_); - } - image_heights_.UnsafeSetDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); - if (from.image_heights().size() > 0) { - image_heights_.AssignWithDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), - from.image_heights_); - } - output_layer_.UnsafeSetDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); - if (from.output_layer().size() > 0) { - output_layer_.AssignWithDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), - from.output_layer_); - } - if (from.has_graph()) { - graph_ = new ::tensorflow::GraphDef(*from.graph_); - } else { - graph_ = NULL; - } - ::memcpy(&global_step_, &from.global_step_, - static_cast(reinterpret_cast(&using_ctc_) - - reinterpret_cast(&global_step_)) + - sizeof(using_ctc_)); - // @@protoc_insertion_point(copy_constructor:tesseract.TFNetworkModel) -} - -void TFNetworkModel::SharedCtor() { - spec_.UnsafeSetDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); - image_input_.UnsafeSetDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); - image_widths_.UnsafeSetDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); - image_heights_.UnsafeSetDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); - output_layer_.UnsafeSetDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); - ::memset(&graph_, 0, - static_cast(reinterpret_cast(&using_ctc_) - - reinterpret_cast(&graph_)) + - sizeof(using_ctc_)); -} - -TFNetworkModel::~TFNetworkModel() { - // @@protoc_insertion_point(destructor:tesseract.TFNetworkModel) - SharedDtor(); -} - -void TFNetworkModel::SharedDtor() { - spec_.DestroyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); - image_input_.DestroyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); - image_widths_.DestroyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); - image_heights_.DestroyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); - output_layer_.DestroyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); - if (this != internal_default_instance()) - delete graph_; -} - -void TFNetworkModel::SetCachedSize(int size) const { - _cached_size_.Set(size); -} -const ::google::protobuf::Descriptor *TFNetworkModel::descriptor() { - ::protobuf_tfnetwork_2eproto::protobuf_AssignDescriptorsOnce(); - return ::protobuf_tfnetwork_2eproto::file_level_metadata[kIndexInFileMessages].descriptor; -} - -const TFNetworkModel &TFNetworkModel::default_instance() { - ::google::protobuf::internal::InitSCC(&protobuf_tfnetwork_2eproto::scc_info_TFNetworkModel.base); - return *internal_default_instance(); -} - -void TFNetworkModel::Clear() { - // @@protoc_insertion_point(message_clear_start:tesseract.TFNetworkModel) - ::google::protobuf::uint32 cached_has_bits = 0; - // Prevent compiler warnings about cached_has_bits being unused - (void)cached_has_bits; - - spec_.ClearToEmptyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); - image_input_.ClearToEmptyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); - image_widths_.ClearToEmptyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); - image_heights_.ClearToEmptyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); - output_layer_.ClearToEmptyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); - if (GetArenaNoVirtual() == NULL && graph_ != NULL) { - delete graph_; - } - graph_ = NULL; - ::memset(&global_step_, 0, - static_cast(reinterpret_cast(&using_ctc_) - - reinterpret_cast(&global_step_)) + - sizeof(using_ctc_)); - _internal_metadata_.Clear(); -} - -bool TFNetworkModel::MergePartialFromCodedStream(::google::protobuf::io::CodedInputStream *input) { -#define DO_(EXPRESSION) \ - if (!GOOGLE_PREDICT_TRUE(EXPRESSION)) \ - goto failure - ::google::protobuf::uint32 tag; - // @@protoc_insertion_point(parse_start:tesseract.TFNetworkModel) - for (;;) { - ::std::pair<::google::protobuf::uint32, bool> p = input->ReadTagWithCutoffNoLastTag(127u); - tag = p.first; - if (!p.second) - goto handle_unusual; - switch (::google::protobuf::internal::WireFormatLite::GetTagFieldNumber(tag)) { - // .tensorflow.GraphDef graph = 1; - case 1: { - if (static_cast<::google::protobuf::uint8>(tag) == - static_cast<::google::protobuf::uint8>(10u /* 10 & 0xFF */)) { - DO_(::google::protobuf::internal::WireFormatLite::ReadMessage(input, mutable_graph())); - } else { - goto handle_unusual; - } - break; - } - - // int64 global_step = 2; - case 2: { - if (static_cast<::google::protobuf::uint8>(tag) == - static_cast<::google::protobuf::uint8>(16u /* 16 & 0xFF */)) { - DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive< - ::google::protobuf::int64, ::google::protobuf::internal::WireFormatLite::TYPE_INT64>( - input, &global_step_))); - } else { - goto handle_unusual; - } - break; - } - - // string spec = 3; - case 3: { - if (static_cast<::google::protobuf::uint8>(tag) == - static_cast<::google::protobuf::uint8>(26u /* 26 & 0xFF */)) { - DO_(::google::protobuf::internal::WireFormatLite::ReadString(input, - this->mutable_spec())); - DO_(::google::protobuf::internal::WireFormatLite::VerifyUtf8String( - this->spec().data(), static_cast(this->spec().length()), - ::google::protobuf::internal::WireFormatLite::PARSE, - "tesseract.TFNetworkModel.spec")); - } else { - goto handle_unusual; - } - break; - } - - // int32 depth = 4; - case 4: { - if (static_cast<::google::protobuf::uint8>(tag) == - static_cast<::google::protobuf::uint8>(32u /* 32 & 0xFF */)) { - DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive< - ::google::protobuf::int32, ::google::protobuf::internal::WireFormatLite::TYPE_INT32>( - input, &depth_))); - } else { - goto handle_unusual; - } - break; - } - - // int32 x_size = 5; - case 5: { - if (static_cast<::google::protobuf::uint8>(tag) == - static_cast<::google::protobuf::uint8>(40u /* 40 & 0xFF */)) { - DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive< - ::google::protobuf::int32, ::google::protobuf::internal::WireFormatLite::TYPE_INT32>( - input, &x_size_))); - } else { - goto handle_unusual; - } - break; - } - - // int32 y_size = 6; - case 6: { - if (static_cast<::google::protobuf::uint8>(tag) == - static_cast<::google::protobuf::uint8>(48u /* 48 & 0xFF */)) { - DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive< - ::google::protobuf::int32, ::google::protobuf::internal::WireFormatLite::TYPE_INT32>( - input, &y_size_))); - } else { - goto handle_unusual; - } - break; - } - - // int32 batch_size = 8; - case 8: { - if (static_cast<::google::protobuf::uint8>(tag) == - static_cast<::google::protobuf::uint8>(64u /* 64 & 0xFF */)) { - DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive< - ::google::protobuf::int32, ::google::protobuf::internal::WireFormatLite::TYPE_INT32>( - input, &batch_size_))); - } else { - goto handle_unusual; - } - break; - } - - // int32 num_classes = 9; - case 9: { - if (static_cast<::google::protobuf::uint8>(tag) == - static_cast<::google::protobuf::uint8>(72u /* 72 & 0xFF */)) { - DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive< - ::google::protobuf::int32, ::google::protobuf::internal::WireFormatLite::TYPE_INT32>( - input, &num_classes_))); - } else { - goto handle_unusual; - } - break; - } - - // bool using_ctc = 10; - case 10: { - if (static_cast<::google::protobuf::uint8>(tag) == - static_cast<::google::protobuf::uint8>(80u /* 80 & 0xFF */)) { - DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive< - bool, ::google::protobuf::internal::WireFormatLite::TYPE_BOOL>(input, &using_ctc_))); - } else { - goto handle_unusual; - } - break; - } - - // string image_input = 11; - case 11: { - if (static_cast<::google::protobuf::uint8>(tag) == - static_cast<::google::protobuf::uint8>(90u /* 90 & 0xFF */)) { - DO_(::google::protobuf::internal::WireFormatLite::ReadString( - input, this->mutable_image_input())); - DO_(::google::protobuf::internal::WireFormatLite::VerifyUtf8String( - this->image_input().data(), static_cast(this->image_input().length()), - ::google::protobuf::internal::WireFormatLite::PARSE, - "tesseract.TFNetworkModel.image_input")); - } else { - goto handle_unusual; - } - break; - } - - // string image_widths = 12; - case 12: { - if (static_cast<::google::protobuf::uint8>(tag) == - static_cast<::google::protobuf::uint8>(98u /* 98 & 0xFF */)) { - DO_(::google::protobuf::internal::WireFormatLite::ReadString( - input, this->mutable_image_widths())); - DO_(::google::protobuf::internal::WireFormatLite::VerifyUtf8String( - this->image_widths().data(), static_cast(this->image_widths().length()), - ::google::protobuf::internal::WireFormatLite::PARSE, - "tesseract.TFNetworkModel.image_widths")); - } else { - goto handle_unusual; - } - break; - } - - // string image_heights = 13; - case 13: { - if (static_cast<::google::protobuf::uint8>(tag) == - static_cast<::google::protobuf::uint8>(106u /* 106 & 0xFF */)) { - DO_(::google::protobuf::internal::WireFormatLite::ReadString( - input, this->mutable_image_heights())); - DO_(::google::protobuf::internal::WireFormatLite::VerifyUtf8String( - this->image_heights().data(), static_cast(this->image_heights().length()), - ::google::protobuf::internal::WireFormatLite::PARSE, - "tesseract.TFNetworkModel.image_heights")); - } else { - goto handle_unusual; - } - break; - } - - // string output_layer = 14; - case 14: { - if (static_cast<::google::protobuf::uint8>(tag) == - static_cast<::google::protobuf::uint8>(114u /* 114 & 0xFF */)) { - DO_(::google::protobuf::internal::WireFormatLite::ReadString( - input, this->mutable_output_layer())); - DO_(::google::protobuf::internal::WireFormatLite::VerifyUtf8String( - this->output_layer().data(), static_cast(this->output_layer().length()), - ::google::protobuf::internal::WireFormatLite::PARSE, - "tesseract.TFNetworkModel.output_layer")); - } else { - goto handle_unusual; - } - break; - } - - default: { - handle_unusual: - if (tag == 0) { - goto success; - } - DO_(::google::protobuf::internal::WireFormat::SkipField( - input, tag, _internal_metadata_.mutable_unknown_fields())); - break; - } - } - } -success: - // @@protoc_insertion_point(parse_success:tesseract.TFNetworkModel) - return true; -failure: - // @@protoc_insertion_point(parse_failure:tesseract.TFNetworkModel) - return false; -#undef DO_ -} - -void TFNetworkModel::SerializeWithCachedSizes( - ::google::protobuf::io::CodedOutputStream *output) const { - // @@protoc_insertion_point(serialize_start:tesseract.TFNetworkModel) - ::google::protobuf::uint32 cached_has_bits = 0; - (void)cached_has_bits; - - // .tensorflow.GraphDef graph = 1; - if (this->has_graph()) { - ::google::protobuf::internal::WireFormatLite::WriteMessageMaybeToArray( - 1, this->_internal_graph(), output); - } - - // int64 global_step = 2; - if (this->global_step() != 0) { - ::google::protobuf::internal::WireFormatLite::WriteInt64(2, this->global_step(), output); - } - - // string spec = 3; - if (this->spec().size() > 0) { - ::google::protobuf::internal::WireFormatLite::VerifyUtf8String( - this->spec().data(), static_cast(this->spec().length()), - ::google::protobuf::internal::WireFormatLite::SERIALIZE, "tesseract.TFNetworkModel.spec"); - ::google::protobuf::internal::WireFormatLite::WriteStringMaybeAliased(3, this->spec(), output); - } - - // int32 depth = 4; - if (this->depth() != 0) { - ::google::protobuf::internal::WireFormatLite::WriteInt32(4, this->depth(), output); - } - - // int32 x_size = 5; - if (this->x_size() != 0) { - ::google::protobuf::internal::WireFormatLite::WriteInt32(5, this->x_size(), output); - } - - // int32 y_size = 6; - if (this->y_size() != 0) { - ::google::protobuf::internal::WireFormatLite::WriteInt32(6, this->y_size(), output); - } - - // int32 batch_size = 8; - if (this->batch_size() != 0) { - ::google::protobuf::internal::WireFormatLite::WriteInt32(8, this->batch_size(), output); - } - - // int32 num_classes = 9; - if (this->num_classes() != 0) { - ::google::protobuf::internal::WireFormatLite::WriteInt32(9, this->num_classes(), output); - } - - // bool using_ctc = 10; - if (this->using_ctc() != 0) { - ::google::protobuf::internal::WireFormatLite::WriteBool(10, this->using_ctc(), output); - } - - // string image_input = 11; - if (this->image_input().size() > 0) { - ::google::protobuf::internal::WireFormatLite::VerifyUtf8String( - this->image_input().data(), static_cast(this->image_input().length()), - ::google::protobuf::internal::WireFormatLite::SERIALIZE, - "tesseract.TFNetworkModel.image_input"); - ::google::protobuf::internal::WireFormatLite::WriteStringMaybeAliased(11, this->image_input(), - output); - } - - // string image_widths = 12; - if (this->image_widths().size() > 0) { - ::google::protobuf::internal::WireFormatLite::VerifyUtf8String( - this->image_widths().data(), static_cast(this->image_widths().length()), - ::google::protobuf::internal::WireFormatLite::SERIALIZE, - "tesseract.TFNetworkModel.image_widths"); - ::google::protobuf::internal::WireFormatLite::WriteStringMaybeAliased(12, this->image_widths(), - output); - } - - // string image_heights = 13; - if (this->image_heights().size() > 0) { - ::google::protobuf::internal::WireFormatLite::VerifyUtf8String( - this->image_heights().data(), static_cast(this->image_heights().length()), - ::google::protobuf::internal::WireFormatLite::SERIALIZE, - "tesseract.TFNetworkModel.image_heights"); - ::google::protobuf::internal::WireFormatLite::WriteStringMaybeAliased(13, this->image_heights(), - output); - } - - // string output_layer = 14; - if (this->output_layer().size() > 0) { - ::google::protobuf::internal::WireFormatLite::VerifyUtf8String( - this->output_layer().data(), static_cast(this->output_layer().length()), - ::google::protobuf::internal::WireFormatLite::SERIALIZE, - "tesseract.TFNetworkModel.output_layer"); - ::google::protobuf::internal::WireFormatLite::WriteStringMaybeAliased(14, this->output_layer(), - output); - } - - if ((_internal_metadata_.have_unknown_fields() && - ::google::protobuf::internal::GetProto3PreserveUnknownsDefault())) { - ::google::protobuf::internal::WireFormat::SerializeUnknownFields( - (::google::protobuf::internal::GetProto3PreserveUnknownsDefault() - ? _internal_metadata_.unknown_fields() - : _internal_metadata_.default_instance()), - output); - } - // @@protoc_insertion_point(serialize_end:tesseract.TFNetworkModel) -} - -::google::protobuf::uint8 *TFNetworkModel::InternalSerializeWithCachedSizesToArray( - bool deterministic, ::google::protobuf::uint8 *target) const { - (void)deterministic; // Unused - // @@protoc_insertion_point(serialize_to_array_start:tesseract.TFNetworkModel) - ::google::protobuf::uint32 cached_has_bits = 0; - (void)cached_has_bits; - - // .tensorflow.GraphDef graph = 1; - if (this->has_graph()) { - target = ::google::protobuf::internal::WireFormatLite::InternalWriteMessageToArray( - 1, this->_internal_graph(), deterministic, target); - } - - // int64 global_step = 2; - if (this->global_step() != 0) { - target = ::google::protobuf::internal::WireFormatLite::WriteInt64ToArray(2, this->global_step(), - target); - } - - // string spec = 3; - if (this->spec().size() > 0) { - ::google::protobuf::internal::WireFormatLite::VerifyUtf8String( - this->spec().data(), static_cast(this->spec().length()), - ::google::protobuf::internal::WireFormatLite::SERIALIZE, "tesseract.TFNetworkModel.spec"); - target = - ::google::protobuf::internal::WireFormatLite::WriteStringToArray(3, this->spec(), target); - } - - // int32 depth = 4; - if (this->depth() != 0) { - target = - ::google::protobuf::internal::WireFormatLite::WriteInt32ToArray(4, this->depth(), target); - } - - // int32 x_size = 5; - if (this->x_size() != 0) { - target = - ::google::protobuf::internal::WireFormatLite::WriteInt32ToArray(5, this->x_size(), target); - } - - // int32 y_size = 6; - if (this->y_size() != 0) { - target = - ::google::protobuf::internal::WireFormatLite::WriteInt32ToArray(6, this->y_size(), target); - } - - // int32 batch_size = 8; - if (this->batch_size() != 0) { - target = ::google::protobuf::internal::WireFormatLite::WriteInt32ToArray(8, this->batch_size(), - target); - } - - // int32 num_classes = 9; - if (this->num_classes() != 0) { - target = ::google::protobuf::internal::WireFormatLite::WriteInt32ToArray(9, this->num_classes(), - target); - } - - // bool using_ctc = 10; - if (this->using_ctc() != 0) { - target = ::google::protobuf::internal::WireFormatLite::WriteBoolToArray(10, this->using_ctc(), - target); - } - - // string image_input = 11; - if (this->image_input().size() > 0) { - ::google::protobuf::internal::WireFormatLite::VerifyUtf8String( - this->image_input().data(), static_cast(this->image_input().length()), - ::google::protobuf::internal::WireFormatLite::SERIALIZE, - "tesseract.TFNetworkModel.image_input"); - target = ::google::protobuf::internal::WireFormatLite::WriteStringToArray( - 11, this->image_input(), target); - } - - // string image_widths = 12; - if (this->image_widths().size() > 0) { - ::google::protobuf::internal::WireFormatLite::VerifyUtf8String( - this->image_widths().data(), static_cast(this->image_widths().length()), - ::google::protobuf::internal::WireFormatLite::SERIALIZE, - "tesseract.TFNetworkModel.image_widths"); - target = ::google::protobuf::internal::WireFormatLite::WriteStringToArray( - 12, this->image_widths(), target); - } - - // string image_heights = 13; - if (this->image_heights().size() > 0) { - ::google::protobuf::internal::WireFormatLite::VerifyUtf8String( - this->image_heights().data(), static_cast(this->image_heights().length()), - ::google::protobuf::internal::WireFormatLite::SERIALIZE, - "tesseract.TFNetworkModel.image_heights"); - target = ::google::protobuf::internal::WireFormatLite::WriteStringToArray( - 13, this->image_heights(), target); - } - - // string output_layer = 14; - if (this->output_layer().size() > 0) { - ::google::protobuf::internal::WireFormatLite::VerifyUtf8String( - this->output_layer().data(), static_cast(this->output_layer().length()), - ::google::protobuf::internal::WireFormatLite::SERIALIZE, - "tesseract.TFNetworkModel.output_layer"); - target = ::google::protobuf::internal::WireFormatLite::WriteStringToArray( - 14, this->output_layer(), target); - } - - if ((_internal_metadata_.have_unknown_fields() && - ::google::protobuf::internal::GetProto3PreserveUnknownsDefault())) { - target = ::google::protobuf::internal::WireFormat::SerializeUnknownFieldsToArray( - (::google::protobuf::internal::GetProto3PreserveUnknownsDefault() - ? _internal_metadata_.unknown_fields() - : _internal_metadata_.default_instance()), - target); - } - // @@protoc_insertion_point(serialize_to_array_end:tesseract.TFNetworkModel) - return target; -} - -size_t TFNetworkModel::ByteSizeLong() const { - // @@protoc_insertion_point(message_byte_size_start:tesseract.TFNetworkModel) - size_t total_size = 0; - - if ((_internal_metadata_.have_unknown_fields() && - ::google::protobuf::internal::GetProto3PreserveUnknownsDefault())) { - total_size += ::google::protobuf::internal::WireFormat::ComputeUnknownFieldsSize( - (::google::protobuf::internal::GetProto3PreserveUnknownsDefault() - ? _internal_metadata_.unknown_fields() - : _internal_metadata_.default_instance())); - } - // string spec = 3; - if (this->spec().size() > 0) { - total_size += 1 + ::google::protobuf::internal::WireFormatLite::StringSize(this->spec()); - } - - // string image_input = 11; - if (this->image_input().size() > 0) { - total_size += 1 + ::google::protobuf::internal::WireFormatLite::StringSize(this->image_input()); - } - - // string image_widths = 12; - if (this->image_widths().size() > 0) { - total_size += - 1 + ::google::protobuf::internal::WireFormatLite::StringSize(this->image_widths()); - } - - // string image_heights = 13; - if (this->image_heights().size() > 0) { - total_size += - 1 + ::google::protobuf::internal::WireFormatLite::StringSize(this->image_heights()); - } - - // string output_layer = 14; - if (this->output_layer().size() > 0) { - total_size += - 1 + ::google::protobuf::internal::WireFormatLite::StringSize(this->output_layer()); - } - - // .tensorflow.GraphDef graph = 1; - if (this->has_graph()) { - total_size += 1 + ::google::protobuf::internal::WireFormatLite::MessageSize(*graph_); - } - - // int64 global_step = 2; - if (this->global_step() != 0) { - total_size += 1 + ::google::protobuf::internal::WireFormatLite::Int64Size(this->global_step()); - } - - // int32 depth = 4; - if (this->depth() != 0) { - total_size += 1 + ::google::protobuf::internal::WireFormatLite::Int32Size(this->depth()); - } - - // int32 x_size = 5; - if (this->x_size() != 0) { - total_size += 1 + ::google::protobuf::internal::WireFormatLite::Int32Size(this->x_size()); - } - - // int32 y_size = 6; - if (this->y_size() != 0) { - total_size += 1 + ::google::protobuf::internal::WireFormatLite::Int32Size(this->y_size()); - } - - // int32 batch_size = 8; - if (this->batch_size() != 0) { - total_size += 1 + ::google::protobuf::internal::WireFormatLite::Int32Size(this->batch_size()); - } - - // int32 num_classes = 9; - if (this->num_classes() != 0) { - total_size += 1 + ::google::protobuf::internal::WireFormatLite::Int32Size(this->num_classes()); - } - - // bool using_ctc = 10; - if (this->using_ctc() != 0) { - total_size += 1 + 1; - } - - int cached_size = ::google::protobuf::internal::ToCachedSize(total_size); - SetCachedSize(cached_size); - return total_size; -} - -void TFNetworkModel::MergeFrom(const ::google::protobuf::Message &from) { - // @@protoc_insertion_point(generalized_merge_from_start:tesseract.TFNetworkModel) - GOOGLE_DCHECK_NE(&from, this); - const TFNetworkModel *source = - ::google::protobuf::internal::DynamicCastToGenerated(&from); - if (source == NULL) { - // @@protoc_insertion_point(generalized_merge_from_cast_fail:tesseract.TFNetworkModel) - ::google::protobuf::internal::ReflectionOps::Merge(from, this); - } else { - // @@protoc_insertion_point(generalized_merge_from_cast_success:tesseract.TFNetworkModel) - MergeFrom(*source); - } -} - -void TFNetworkModel::MergeFrom(const TFNetworkModel &from) { - // @@protoc_insertion_point(class_specific_merge_from_start:tesseract.TFNetworkModel) - GOOGLE_DCHECK_NE(&from, this); - _internal_metadata_.MergeFrom(from._internal_metadata_); - ::google::protobuf::uint32 cached_has_bits = 0; - (void)cached_has_bits; - - if (from.spec().size() > 0) { - spec_.AssignWithDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), - from.spec_); - } - if (from.image_input().size() > 0) { - image_input_.AssignWithDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), - from.image_input_); - } - if (from.image_widths().size() > 0) { - image_widths_.AssignWithDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), - from.image_widths_); - } - if (from.image_heights().size() > 0) { - image_heights_.AssignWithDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), - from.image_heights_); - } - if (from.output_layer().size() > 0) { - output_layer_.AssignWithDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), - from.output_layer_); - } - if (from.has_graph()) { - mutable_graph()->::tensorflow::GraphDef::MergeFrom(from.graph()); - } - if (from.global_step() != 0) { - set_global_step(from.global_step()); - } - if (from.depth() != 0) { - set_depth(from.depth()); - } - if (from.x_size() != 0) { - set_x_size(from.x_size()); - } - if (from.y_size() != 0) { - set_y_size(from.y_size()); - } - if (from.batch_size() != 0) { - set_batch_size(from.batch_size()); - } - if (from.num_classes() != 0) { - set_num_classes(from.num_classes()); - } - if (from.using_ctc() != 0) { - set_using_ctc(from.using_ctc()); - } -} - -void TFNetworkModel::CopyFrom(const ::google::protobuf::Message &from) { - // @@protoc_insertion_point(generalized_copy_from_start:tesseract.TFNetworkModel) - if (&from == this) - return; - Clear(); - MergeFrom(from); -} - -void TFNetworkModel::CopyFrom(const TFNetworkModel &from) { - // @@protoc_insertion_point(class_specific_copy_from_start:tesseract.TFNetworkModel) - if (&from == this) - return; - Clear(); - MergeFrom(from); -} - -bool TFNetworkModel::IsInitialized() const { - return true; -} - -void TFNetworkModel::Swap(TFNetworkModel *other) { - if (other == this) - return; - InternalSwap(other); -} -void TFNetworkModel::InternalSwap(TFNetworkModel *other) { - using std::swap; - spec_.Swap(&other->spec_, &::google::protobuf::internal::GetEmptyStringAlreadyInited(), - GetArenaNoVirtual()); - image_input_.Swap(&other->image_input_, - &::google::protobuf::internal::GetEmptyStringAlreadyInited(), - GetArenaNoVirtual()); - image_widths_.Swap(&other->image_widths_, - &::google::protobuf::internal::GetEmptyStringAlreadyInited(), - GetArenaNoVirtual()); - image_heights_.Swap(&other->image_heights_, - &::google::protobuf::internal::GetEmptyStringAlreadyInited(), - GetArenaNoVirtual()); - output_layer_.Swap(&other->output_layer_, - &::google::protobuf::internal::GetEmptyStringAlreadyInited(), - GetArenaNoVirtual()); - swap(graph_, other->graph_); - swap(global_step_, other->global_step_); - swap(depth_, other->depth_); - swap(x_size_, other->x_size_); - swap(y_size_, other->y_size_); - swap(batch_size_, other->batch_size_); - swap(num_classes_, other->num_classes_); - swap(using_ctc_, other->using_ctc_); - _internal_metadata_.Swap(&other->_internal_metadata_); -} - -::google::protobuf::Metadata TFNetworkModel::GetMetadata() const { - protobuf_tfnetwork_2eproto::protobuf_AssignDescriptorsOnce(); - return ::protobuf_tfnetwork_2eproto::file_level_metadata[kIndexInFileMessages]; -} - -// @@protoc_insertion_point(namespace_scope) -} // namespace tesseract -namespace google { -namespace protobuf { -template <> -GOOGLE_PROTOBUF_ATTRIBUTE_NOINLINE ::tesseract::TFNetworkModel * -Arena::CreateMaybeMessage<::tesseract::TFNetworkModel>(Arena *arena) { - return Arena::CreateInternal<::tesseract::TFNetworkModel>(arena); -} -} // namespace protobuf -} // namespace google - -// @@protoc_insertion_point(global_scope) diff --git a/src/lstm/tfnetwork.pb.h b/src/lstm/tfnetwork.pb.h deleted file mode 100644 index f7ff2f054c..0000000000 --- a/src/lstm/tfnetwork.pb.h +++ /dev/null @@ -1,703 +0,0 @@ -// Generated by the protocol buffer compiler. DO NOT EDIT! -// source: tfnetwork.proto - -#ifndef PROTOBUF_INCLUDED_tfnetwork_2eproto -#define PROTOBUF_INCLUDED_tfnetwork_2eproto - -#include - -#include - -#if GOOGLE_PROTOBUF_VERSION < 3006001 -# error This file was generated by a newer version of protoc which is -# error incompatible with your Protocol Buffer headers. Please update -# error your headers. -#endif -#if 3006001 < GOOGLE_PROTOBUF_MIN_PROTOC_VERSION -# error This file was generated by an older version of protoc which is -# error incompatible with your Protocol Buffer headers. Please -# error regenerate this file with a newer version of protoc. -#endif - -#include -#include -#include // IWYU pragma: export -#include -#include -#include -#include -#include -#include -#include // IWYU pragma: export -#include -#include "tensorflow/core/framework/graph.pb.h" -// @@protoc_insertion_point(includes) -#define PROTOBUF_INTERNAL_EXPORT_protobuf_tfnetwork_2eproto - -namespace protobuf_tfnetwork_2eproto { -// Internal implementation detail -- do not use these members. -struct TableStruct { - static const ::google::protobuf::internal::ParseTableField entries[]; - static const ::google::protobuf::internal::AuxillaryParseTableField aux[]; - static const ::google::protobuf::internal::ParseTable schema[1]; - static const ::google::protobuf::internal::FieldMetadata field_metadata[]; - static const ::google::protobuf::internal::SerializationTable serialization_table[]; - static const ::google::protobuf::uint32 offsets[]; -}; -void AddDescriptors(); -} // namespace protobuf_tfnetwork_2eproto -namespace tesseract { -class TFNetworkModel; -class TFNetworkModelDefaultTypeInternal; -extern TFNetworkModelDefaultTypeInternal _TFNetworkModel_default_instance_; -} // namespace tesseract -namespace google { -namespace protobuf { -template <> -::tesseract::TFNetworkModel *Arena::CreateMaybeMessage<::tesseract::TFNetworkModel>(Arena *); -} // namespace protobuf -} // namespace google -namespace tesseract { - -// =================================================================== - -class TFNetworkModel - : public ::google::protobuf:: - Message /* @@protoc_insertion_point(class_definition:tesseract.TFNetworkModel) - */ -{ -public: - TFNetworkModel(); - virtual ~TFNetworkModel(); - - TFNetworkModel(const TFNetworkModel &from); - - inline TFNetworkModel &operator=(const TFNetworkModel &from) { - CopyFrom(from); - return *this; - } -#if LANG_CXX11 - TFNetworkModel(TFNetworkModel &&from) noexcept : TFNetworkModel() { - *this = ::std::move(from); - } - - inline TFNetworkModel &operator=(TFNetworkModel &&from) noexcept { - if (GetArenaNoVirtual() == from.GetArenaNoVirtual()) { - if (this != &from) - InternalSwap(&from); - } else { - CopyFrom(from); - } - return *this; - } -#endif - static const ::google::protobuf::Descriptor *descriptor(); - static const TFNetworkModel &default_instance(); - - static void InitAsDefaultInstance(); // FOR INTERNAL USE ONLY - static inline const TFNetworkModel *internal_default_instance() { - return reinterpret_cast(&_TFNetworkModel_default_instance_); - } - static constexpr int kIndexInFileMessages = 0; - - void Swap(TFNetworkModel *other); - friend void swap(TFNetworkModel &a, TFNetworkModel &b) { - a.Swap(&b); - } - - // implements Message ---------------------------------------------- - - inline TFNetworkModel *New() const final { - return CreateMaybeMessage(NULL); - } - - TFNetworkModel *New(::google::protobuf::Arena *arena) const final { - return CreateMaybeMessage(arena); - } - void CopyFrom(const ::google::protobuf::Message &from) final; - void MergeFrom(const ::google::protobuf::Message &from) final; - void CopyFrom(const TFNetworkModel &from); - void MergeFrom(const TFNetworkModel &from); - void Clear() final; - bool IsInitialized() const final; - - size_t ByteSizeLong() const final; - bool MergePartialFromCodedStream(::google::protobuf::io::CodedInputStream *input) final; - void SerializeWithCachedSizes(::google::protobuf::io::CodedOutputStream *output) const final; - ::google::protobuf::uint8 *InternalSerializeWithCachedSizesToArray( - bool deterministic, ::google::protobuf::uint8 *target) const final; - int GetCachedSize() const final { - return _cached_size_.Get(); - } - -private: - void SharedCtor(); - void SharedDtor(); - void SetCachedSize(int size) const final; - void InternalSwap(TFNetworkModel *other); - -private: - inline ::google::protobuf::Arena *GetArenaNoVirtual() const { - return NULL; - } - inline void *MaybeArenaPtr() const { - return NULL; - } - -public: - ::google::protobuf::Metadata GetMetadata() const final; - - // nested types ---------------------------------------------------- - - // accessors ------------------------------------------------------- - - // string spec = 3; - void clear_spec(); - static const int kSpecFieldNumber = 3; - const ::std::string &spec() const; - void set_spec(const ::std::string &value); -#if LANG_CXX11 - void set_spec(::std::string &&value); -#endif - void set_spec(const char *value); - void set_spec(const char *value, size_t size); - ::std::string *mutable_spec(); - ::std::string *release_spec(); - void set_allocated_spec(::std::string *spec); - - // string image_input = 11; - void clear_image_input(); - static const int kImageInputFieldNumber = 11; - const ::std::string &image_input() const; - void set_image_input(const ::std::string &value); -#if LANG_CXX11 - void set_image_input(::std::string &&value); -#endif - void set_image_input(const char *value); - void set_image_input(const char *value, size_t size); - ::std::string *mutable_image_input(); - ::std::string *release_image_input(); - void set_allocated_image_input(::std::string *image_input); - - // string image_widths = 12; - void clear_image_widths(); - static const int kImageWidthsFieldNumber = 12; - const ::std::string &image_widths() const; - void set_image_widths(const ::std::string &value); -#if LANG_CXX11 - void set_image_widths(::std::string &&value); -#endif - void set_image_widths(const char *value); - void set_image_widths(const char *value, size_t size); - ::std::string *mutable_image_widths(); - ::std::string *release_image_widths(); - void set_allocated_image_widths(::std::string *image_widths); - - // string image_heights = 13; - void clear_image_heights(); - static const int kImageHeightsFieldNumber = 13; - const ::std::string &image_heights() const; - void set_image_heights(const ::std::string &value); -#if LANG_CXX11 - void set_image_heights(::std::string &&value); -#endif - void set_image_heights(const char *value); - void set_image_heights(const char *value, size_t size); - ::std::string *mutable_image_heights(); - ::std::string *release_image_heights(); - void set_allocated_image_heights(::std::string *image_heights); - - // string output_layer = 14; - void clear_output_layer(); - static const int kOutputLayerFieldNumber = 14; - const ::std::string &output_layer() const; - void set_output_layer(const ::std::string &value); -#if LANG_CXX11 - void set_output_layer(::std::string &&value); -#endif - void set_output_layer(const char *value); - void set_output_layer(const char *value, size_t size); - ::std::string *mutable_output_layer(); - ::std::string *release_output_layer(); - void set_allocated_output_layer(::std::string *output_layer); - - // .tensorflow.GraphDef graph = 1; - bool has_graph() const; - void clear_graph(); - static const int kGraphFieldNumber = 1; - -private: - const ::tensorflow::GraphDef &_internal_graph() const; - -public: - const ::tensorflow::GraphDef &graph() const; - ::tensorflow::GraphDef *release_graph(); - ::tensorflow::GraphDef *mutable_graph(); - void set_allocated_graph(::tensorflow::GraphDef *graph); - - // int64 global_step = 2; - void clear_global_step(); - static const int kGlobalStepFieldNumber = 2; - ::google::protobuf::int64 global_step() const; - void set_global_step(::google::protobuf::int64 value); - - // int32 depth = 4; - void clear_depth(); - static const int kDepthFieldNumber = 4; - ::google::protobuf::int32 depth() const; - void set_depth(::google::protobuf::int32 value); - - // int32 x_size = 5; - void clear_x_size(); - static const int kXSizeFieldNumber = 5; - ::google::protobuf::int32 x_size() const; - void set_x_size(::google::protobuf::int32 value); - - // int32 y_size = 6; - void clear_y_size(); - static const int kYSizeFieldNumber = 6; - ::google::protobuf::int32 y_size() const; - void set_y_size(::google::protobuf::int32 value); - - // int32 batch_size = 8; - void clear_batch_size(); - static const int kBatchSizeFieldNumber = 8; - ::google::protobuf::int32 batch_size() const; - void set_batch_size(::google::protobuf::int32 value); - - // int32 num_classes = 9; - void clear_num_classes(); - static const int kNumClassesFieldNumber = 9; - ::google::protobuf::int32 num_classes() const; - void set_num_classes(::google::protobuf::int32 value); - - // bool using_ctc = 10; - void clear_using_ctc(); - static const int kUsingCtcFieldNumber = 10; - bool using_ctc() const; - void set_using_ctc(bool value); - - // @@protoc_insertion_point(class_scope:tesseract.TFNetworkModel) -private: - ::google::protobuf::internal::InternalMetadataWithArena _internal_metadata_; - ::google::protobuf::internal::ArenaStringPtr spec_; - ::google::protobuf::internal::ArenaStringPtr image_input_; - ::google::protobuf::internal::ArenaStringPtr image_widths_; - ::google::protobuf::internal::ArenaStringPtr image_heights_; - ::google::protobuf::internal::ArenaStringPtr output_layer_; - ::tensorflow::GraphDef *graph_; - ::google::protobuf::int64 global_step_; - ::google::protobuf::int32 depth_; - ::google::protobuf::int32 x_size_; - ::google::protobuf::int32 y_size_; - ::google::protobuf::int32 batch_size_; - ::google::protobuf::int32 num_classes_; - bool using_ctc_; - mutable ::google::protobuf::internal::CachedSize _cached_size_; - friend struct ::protobuf_tfnetwork_2eproto::TableStruct; -}; -// =================================================================== - -// =================================================================== - -#ifdef __GNUC__ -# pragma GCC diagnostic push -# pragma GCC diagnostic ignored "-Wstrict-aliasing" -#endif // __GNUC__ -// TFNetworkModel - -// .tensorflow.GraphDef graph = 1; -inline bool TFNetworkModel::has_graph() const { - return this != internal_default_instance() && graph_ != NULL; -} -inline const ::tensorflow::GraphDef &TFNetworkModel::_internal_graph() const { - return *graph_; -} -inline const ::tensorflow::GraphDef &TFNetworkModel::graph() const { - const ::tensorflow::GraphDef *p = graph_; - // @@protoc_insertion_point(field_get:tesseract.TFNetworkModel.graph) - return p != NULL ? *p - : *reinterpret_cast( - &::tensorflow::_GraphDef_default_instance_); -} -inline ::tensorflow::GraphDef *TFNetworkModel::release_graph() { - // @@protoc_insertion_point(field_release:tesseract.TFNetworkModel.graph) - - ::tensorflow::GraphDef *temp = graph_; - graph_ = NULL; - return temp; -} -inline ::tensorflow::GraphDef *TFNetworkModel::mutable_graph() { - if (graph_ == NULL) { - auto *p = CreateMaybeMessage<::tensorflow::GraphDef>(GetArenaNoVirtual()); - graph_ = p; - } - // @@protoc_insertion_point(field_mutable:tesseract.TFNetworkModel.graph) - return graph_; -} -inline void TFNetworkModel::set_allocated_graph(::tensorflow::GraphDef *graph) { - ::google::protobuf::Arena *message_arena = GetArenaNoVirtual(); - if (message_arena == NULL) { - delete reinterpret_cast<::google::protobuf::MessageLite *>(graph_); - } - if (graph) { - ::google::protobuf::Arena *submessage_arena = - reinterpret_cast<::google::protobuf::MessageLite *>(graph)->GetArena(); - if (message_arena != submessage_arena) { - graph = ::google::protobuf::internal::GetOwnedMessage(message_arena, graph, submessage_arena); - } - - } else { - } - graph_ = graph; - // @@protoc_insertion_point(field_set_allocated:tesseract.TFNetworkModel.graph) -} - -// int64 global_step = 2; -inline void TFNetworkModel::clear_global_step() { - global_step_ = GOOGLE_LONGLONG(0); -} -inline ::google::protobuf::int64 TFNetworkModel::global_step() const { - // @@protoc_insertion_point(field_get:tesseract.TFNetworkModel.global_step) - return global_step_; -} -inline void TFNetworkModel::set_global_step(::google::protobuf::int64 value) { - global_step_ = value; - // @@protoc_insertion_point(field_set:tesseract.TFNetworkModel.global_step) -} - -// string spec = 3; -inline void TFNetworkModel::clear_spec() { - spec_.ClearToEmptyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); -} -inline const ::std::string &TFNetworkModel::spec() const { - // @@protoc_insertion_point(field_get:tesseract.TFNetworkModel.spec) - return spec_.GetNoArena(); -} -inline void TFNetworkModel::set_spec(const ::std::string &value) { - spec_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), value); - // @@protoc_insertion_point(field_set:tesseract.TFNetworkModel.spec) -} -#if LANG_CXX11 -inline void TFNetworkModel::set_spec(::std::string &&value) { - spec_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), - ::std::move(value)); - // @@protoc_insertion_point(field_set_rvalue:tesseract.TFNetworkModel.spec) -} -#endif -inline void TFNetworkModel::set_spec(const char *value) { - GOOGLE_DCHECK(value != NULL); - - spec_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), - ::std::string(value)); - // @@protoc_insertion_point(field_set_char:tesseract.TFNetworkModel.spec) -} -inline void TFNetworkModel::set_spec(const char *value, size_t size) { - spec_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), - ::std::string(reinterpret_cast(value), size)); - // @@protoc_insertion_point(field_set_pointer:tesseract.TFNetworkModel.spec) -} -inline ::std::string *TFNetworkModel::mutable_spec() { - // @@protoc_insertion_point(field_mutable:tesseract.TFNetworkModel.spec) - return spec_.MutableNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); -} -inline ::std::string *TFNetworkModel::release_spec() { - // @@protoc_insertion_point(field_release:tesseract.TFNetworkModel.spec) - - return spec_.ReleaseNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); -} -inline void TFNetworkModel::set_allocated_spec(::std::string *spec) { - if (spec != NULL) { - } else { - } - spec_.SetAllocatedNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), spec); - // @@protoc_insertion_point(field_set_allocated:tesseract.TFNetworkModel.spec) -} - -// int32 depth = 4; -inline void TFNetworkModel::clear_depth() { - depth_ = 0; -} -inline ::google::protobuf::int32 TFNetworkModel::depth() const { - // @@protoc_insertion_point(field_get:tesseract.TFNetworkModel.depth) - return depth_; -} -inline void TFNetworkModel::set_depth(::google::protobuf::int32 value) { - depth_ = value; - // @@protoc_insertion_point(field_set:tesseract.TFNetworkModel.depth) -} - -// int32 x_size = 5; -inline void TFNetworkModel::clear_x_size() { - x_size_ = 0; -} -inline ::google::protobuf::int32 TFNetworkModel::x_size() const { - // @@protoc_insertion_point(field_get:tesseract.TFNetworkModel.x_size) - return x_size_; -} -inline void TFNetworkModel::set_x_size(::google::protobuf::int32 value) { - x_size_ = value; - // @@protoc_insertion_point(field_set:tesseract.TFNetworkModel.x_size) -} - -// int32 y_size = 6; -inline void TFNetworkModel::clear_y_size() { - y_size_ = 0; -} -inline ::google::protobuf::int32 TFNetworkModel::y_size() const { - // @@protoc_insertion_point(field_get:tesseract.TFNetworkModel.y_size) - return y_size_; -} -inline void TFNetworkModel::set_y_size(::google::protobuf::int32 value) { - y_size_ = value; - // @@protoc_insertion_point(field_set:tesseract.TFNetworkModel.y_size) -} - -// int32 batch_size = 8; -inline void TFNetworkModel::clear_batch_size() { - batch_size_ = 0; -} -inline ::google::protobuf::int32 TFNetworkModel::batch_size() const { - // @@protoc_insertion_point(field_get:tesseract.TFNetworkModel.batch_size) - return batch_size_; -} -inline void TFNetworkModel::set_batch_size(::google::protobuf::int32 value) { - batch_size_ = value; - // @@protoc_insertion_point(field_set:tesseract.TFNetworkModel.batch_size) -} - -// int32 num_classes = 9; -inline void TFNetworkModel::clear_num_classes() { - num_classes_ = 0; -} -inline ::google::protobuf::int32 TFNetworkModel::num_classes() const { - // @@protoc_insertion_point(field_get:tesseract.TFNetworkModel.num_classes) - return num_classes_; -} -inline void TFNetworkModel::set_num_classes(::google::protobuf::int32 value) { - num_classes_ = value; - // @@protoc_insertion_point(field_set:tesseract.TFNetworkModel.num_classes) -} - -// bool using_ctc = 10; -inline void TFNetworkModel::clear_using_ctc() { - using_ctc_ = false; -} -inline bool TFNetworkModel::using_ctc() const { - // @@protoc_insertion_point(field_get:tesseract.TFNetworkModel.using_ctc) - return using_ctc_; -} -inline void TFNetworkModel::set_using_ctc(bool value) { - using_ctc_ = value; - // @@protoc_insertion_point(field_set:tesseract.TFNetworkModel.using_ctc) -} - -// string image_input = 11; -inline void TFNetworkModel::clear_image_input() { - image_input_.ClearToEmptyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); -} -inline const ::std::string &TFNetworkModel::image_input() const { - // @@protoc_insertion_point(field_get:tesseract.TFNetworkModel.image_input) - return image_input_.GetNoArena(); -} -inline void TFNetworkModel::set_image_input(const ::std::string &value) { - image_input_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), value); - // @@protoc_insertion_point(field_set:tesseract.TFNetworkModel.image_input) -} -#if LANG_CXX11 -inline void TFNetworkModel::set_image_input(::std::string &&value) { - image_input_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), - ::std::move(value)); - // @@protoc_insertion_point(field_set_rvalue:tesseract.TFNetworkModel.image_input) -} -#endif -inline void TFNetworkModel::set_image_input(const char *value) { - GOOGLE_DCHECK(value != NULL); - - image_input_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), - ::std::string(value)); - // @@protoc_insertion_point(field_set_char:tesseract.TFNetworkModel.image_input) -} -inline void TFNetworkModel::set_image_input(const char *value, size_t size) { - image_input_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), - ::std::string(reinterpret_cast(value), size)); - // @@protoc_insertion_point(field_set_pointer:tesseract.TFNetworkModel.image_input) -} -inline ::std::string *TFNetworkModel::mutable_image_input() { - // @@protoc_insertion_point(field_mutable:tesseract.TFNetworkModel.image_input) - return image_input_.MutableNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); -} -inline ::std::string *TFNetworkModel::release_image_input() { - // @@protoc_insertion_point(field_release:tesseract.TFNetworkModel.image_input) - - return image_input_.ReleaseNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); -} -inline void TFNetworkModel::set_allocated_image_input(::std::string *image_input) { - if (image_input != NULL) { - } else { - } - image_input_.SetAllocatedNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), - image_input); - // @@protoc_insertion_point(field_set_allocated:tesseract.TFNetworkModel.image_input) -} - -// string image_widths = 12; -inline void TFNetworkModel::clear_image_widths() { - image_widths_.ClearToEmptyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); -} -inline const ::std::string &TFNetworkModel::image_widths() const { - // @@protoc_insertion_point(field_get:tesseract.TFNetworkModel.image_widths) - return image_widths_.GetNoArena(); -} -inline void TFNetworkModel::set_image_widths(const ::std::string &value) { - image_widths_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), value); - // @@protoc_insertion_point(field_set:tesseract.TFNetworkModel.image_widths) -} -#if LANG_CXX11 -inline void TFNetworkModel::set_image_widths(::std::string &&value) { - image_widths_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), - ::std::move(value)); - // @@protoc_insertion_point(field_set_rvalue:tesseract.TFNetworkModel.image_widths) -} -#endif -inline void TFNetworkModel::set_image_widths(const char *value) { - GOOGLE_DCHECK(value != NULL); - - image_widths_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), - ::std::string(value)); - // @@protoc_insertion_point(field_set_char:tesseract.TFNetworkModel.image_widths) -} -inline void TFNetworkModel::set_image_widths(const char *value, size_t size) { - image_widths_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), - ::std::string(reinterpret_cast(value), size)); - // @@protoc_insertion_point(field_set_pointer:tesseract.TFNetworkModel.image_widths) -} -inline ::std::string *TFNetworkModel::mutable_image_widths() { - // @@protoc_insertion_point(field_mutable:tesseract.TFNetworkModel.image_widths) - return image_widths_.MutableNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); -} -inline ::std::string *TFNetworkModel::release_image_widths() { - // @@protoc_insertion_point(field_release:tesseract.TFNetworkModel.image_widths) - - return image_widths_.ReleaseNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); -} -inline void TFNetworkModel::set_allocated_image_widths(::std::string *image_widths) { - if (image_widths != NULL) { - } else { - } - image_widths_.SetAllocatedNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), - image_widths); - // @@protoc_insertion_point(field_set_allocated:tesseract.TFNetworkModel.image_widths) -} - -// string image_heights = 13; -inline void TFNetworkModel::clear_image_heights() { - image_heights_.ClearToEmptyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); -} -inline const ::std::string &TFNetworkModel::image_heights() const { - // @@protoc_insertion_point(field_get:tesseract.TFNetworkModel.image_heights) - return image_heights_.GetNoArena(); -} -inline void TFNetworkModel::set_image_heights(const ::std::string &value) { - image_heights_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), value); - // @@protoc_insertion_point(field_set:tesseract.TFNetworkModel.image_heights) -} -#if LANG_CXX11 -inline void TFNetworkModel::set_image_heights(::std::string &&value) { - image_heights_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), - ::std::move(value)); - // @@protoc_insertion_point(field_set_rvalue:tesseract.TFNetworkModel.image_heights) -} -#endif -inline void TFNetworkModel::set_image_heights(const char *value) { - GOOGLE_DCHECK(value != NULL); - - image_heights_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), - ::std::string(value)); - // @@protoc_insertion_point(field_set_char:tesseract.TFNetworkModel.image_heights) -} -inline void TFNetworkModel::set_image_heights(const char *value, size_t size) { - image_heights_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), - ::std::string(reinterpret_cast(value), size)); - // @@protoc_insertion_point(field_set_pointer:tesseract.TFNetworkModel.image_heights) -} -inline ::std::string *TFNetworkModel::mutable_image_heights() { - // @@protoc_insertion_point(field_mutable:tesseract.TFNetworkModel.image_heights) - return image_heights_.MutableNoArena( - &::google::protobuf::internal::GetEmptyStringAlreadyInited()); -} -inline ::std::string *TFNetworkModel::release_image_heights() { - // @@protoc_insertion_point(field_release:tesseract.TFNetworkModel.image_heights) - - return image_heights_.ReleaseNoArena( - &::google::protobuf::internal::GetEmptyStringAlreadyInited()); -} -inline void TFNetworkModel::set_allocated_image_heights(::std::string *image_heights) { - if (image_heights != NULL) { - } else { - } - image_heights_.SetAllocatedNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), - image_heights); - // @@protoc_insertion_point(field_set_allocated:tesseract.TFNetworkModel.image_heights) -} - -// string output_layer = 14; -inline void TFNetworkModel::clear_output_layer() { - output_layer_.ClearToEmptyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); -} -inline const ::std::string &TFNetworkModel::output_layer() const { - // @@protoc_insertion_point(field_get:tesseract.TFNetworkModel.output_layer) - return output_layer_.GetNoArena(); -} -inline void TFNetworkModel::set_output_layer(const ::std::string &value) { - output_layer_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), value); - // @@protoc_insertion_point(field_set:tesseract.TFNetworkModel.output_layer) -} -#if LANG_CXX11 -inline void TFNetworkModel::set_output_layer(::std::string &&value) { - output_layer_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), - ::std::move(value)); - // @@protoc_insertion_point(field_set_rvalue:tesseract.TFNetworkModel.output_layer) -} -#endif -inline void TFNetworkModel::set_output_layer(const char *value) { - GOOGLE_DCHECK(value != NULL); - - output_layer_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), - ::std::string(value)); - // @@protoc_insertion_point(field_set_char:tesseract.TFNetworkModel.output_layer) -} -inline void TFNetworkModel::set_output_layer(const char *value, size_t size) { - output_layer_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), - ::std::string(reinterpret_cast(value), size)); - // @@protoc_insertion_point(field_set_pointer:tesseract.TFNetworkModel.output_layer) -} -inline ::std::string *TFNetworkModel::mutable_output_layer() { - // @@protoc_insertion_point(field_mutable:tesseract.TFNetworkModel.output_layer) - return output_layer_.MutableNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); -} -inline ::std::string *TFNetworkModel::release_output_layer() { - // @@protoc_insertion_point(field_release:tesseract.TFNetworkModel.output_layer) - - return output_layer_.ReleaseNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); -} -inline void TFNetworkModel::set_allocated_output_layer(::std::string *output_layer) { - if (output_layer != NULL) { - } else { - } - output_layer_.SetAllocatedNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), - output_layer); - // @@protoc_insertion_point(field_set_allocated:tesseract.TFNetworkModel.output_layer) -} - -#ifdef __GNUC__ -# pragma GCC diagnostic pop -#endif // __GNUC__ - -// @@protoc_insertion_point(namespace_scope) - -} // namespace tesseract - -// @@protoc_insertion_point(global_scope) - -#endif // PROTOBUF_INCLUDED_tfnetwork_2eproto diff --git a/src/lstm/tfnetwork.proto b/src/lstm/tfnetwork.proto deleted file mode 100644 index d9cf33908b..0000000000 --- a/src/lstm/tfnetwork.proto +++ /dev/null @@ -1,74 +0,0 @@ -// Protocol description for Tesseract - -// Compile this file with the Protocol Compiler protoc to generate -// the files tfnetwork.pb.cc and tfnetwork.pb.h. - -// This requires the protocol descriptions for TensorFlow -// (included in the TensorFlow sources). - -// With TensorFlow sources installed in /usr/src/tensorflow/tensorflow-1.10.1, -// this command was used on Debian to generate the files: - -// protoc --cpp_out=$PWD --proto_path=/usr/src/tensorflow/tensorflow-1.10.1 \ -// --proto_path=$PWD src/lstm/tfnetwork.proto - -syntax = "proto3"; - -package tesseract; - -import "tensorflow/core/framework/graph.proto"; - -// This proto is the interface between a python TF graph builder/trainer and -// the C++ world. The writer of this proto must provide fields as documented -// by the comments below. -// The graph must have a placeholder for NetworkIO, Widths and Heights. The -// following python code creates the appropriate placeholders: -// -// input_layer = tf.placeholder(tf.float32, -// shape=[batch_size, xsize, ysize, depth_dim], -// name='NetworkIO') -// widths = tf.placeholder(tf.int32, shape=[batch_size], name='Widths') -// heights = tf.placeholder(tf.int32, shape=[batch_size], name='Heights') -// # Flip x and y to the TF convention. -// input_layer = tf.transpose(input_layer, [0, 2, 1, 3]) -// -// The widths and heights will be set to indicate the post-scaling size of the -// input image(s). -// For now batch_size is ignored and set to 1. -// The graph should return a 2-dimensional float32 tensor called 'softmax' of -// shape [sequence_length, num_classes], where sequence_length is allowed to -// be variable, given by the tensor itself. -// TODO(rays) determine whether it is worth providing for batch_size >1 and if -// so, how. -message TFNetworkModel { - // The TF graph definition. Required. - tensorflow.GraphDef graph = 1; - // The training index. Required to be > 0. - int64 global_step = 2; - // The original network definition for reference. Optional - string spec = 3; - // Input tensor parameters. - // Values per pixel. Required to be 1 or 3. Inputs assumed to be float32. - int32 depth = 4; - // Image size. Required. Zero implies flexible sizes, fixed if non-zero. - // If x_size > 0, images will be cropped/padded to the given size, after - // any scaling required by the y_size. - // If y_size > 0, images will be scaled isotropically to the given height. - int32 x_size = 5; - int32 y_size = 6; - // Number of images in a batch. Optional. - int32 batch_size = 8; - // Output tensor parameters. - // Number of output classes. Required to match the depth of the softmax. - int32 num_classes = 9; - // True if this network needs CTC-like decoding, dropping duplicated labels. - // The decoder always drops the null character. - bool using_ctc = 10; - // Name of input image tensor. - string image_input = 11; - // Name of image height and width tensors. - string image_widths = 12; - string image_heights = 13; - // Name of output (softmax) tensor. - string output_layer = 14; -} diff --git a/src/tesseract.cpp b/src/tesseract.cpp index 1ed7fcf398..2c27d2a04f 100644 --- a/src/tesseract.cpp +++ b/src/tesseract.cpp @@ -78,8 +78,8 @@ static void Win32WarningHandler(const char *module, const char *fmt, va_list ap) class AutoWin32ConsoleOutputCP { public: - explicit AutoWin32ConsoleOutputCP(UINT codeCP) { - oldCP_ = GetConsoleOutputCP(); + explicit AutoWin32ConsoleOutputCP(UINT codeCP) : + oldCP_(GetConsoleOutputCP()) { SetConsoleOutputCP(codeCP); } ~AutoWin32ConsoleOutputCP() { @@ -112,6 +112,9 @@ static void PrintVersionInfo() { #if defined(HAVE_NEON) || defined(__aarch64__) if (tesseract::SIMDDetect::IsNEONAvailable()) printf(" Found NEON\n"); +#elif defined(HAVE_RVV) + if (tesseract::SIMDDetect::IsRVVAvailable()) + printf(" Found RVV\n"); #else if (tesseract::SIMDDetect::IsAVX512BWAvailable()) { printf(" Found AVX512BW\n"); @@ -151,44 +154,41 @@ static void PrintVersionInfo() { } static void PrintHelpForPSM() { - const char *msg = - "Page segmentation modes:\n" - " 0 Orientation and script detection (OSD) only.\n" - " 1 Automatic page segmentation with OSD.\n" - " 2 Automatic page segmentation, but no OSD, or OCR. (not " + printf( + "Page segmentation modes (PSM):\n" + " 0|osd_only Orientation and script detection (OSD) only.\n" + " 1|auto_osd Automatic page segmentation with OSD.\n" + " 2|auto_only Automatic page segmentation, but no OSD, or OCR. (not " "implemented)\n" - " 3 Fully automatic page segmentation, but no OSD. (Default)\n" - " 4 Assume a single column of text of variable sizes.\n" - " 5 Assume a single uniform block of vertically aligned text.\n" - " 6 Assume a single uniform block of text.\n" - " 7 Treat the image as a single text line.\n" - " 8 Treat the image as a single word.\n" - " 9 Treat the image as a single word in a circle.\n" - " 10 Treat the image as a single character.\n" - " 11 Sparse text. Find as much text as possible in no" + " 3|auto Fully automatic page segmentation, but no OSD. (Default)\n" + " 4|single_column Assume a single column of text of variable sizes.\n" + " 5|single_block_vert_text Assume a single uniform block of vertically aligned text.\n" + " 6|single_block Assume a single uniform block of text.\n" + " 7|single_line Treat the image as a single text line.\n" + " 8|single_word Treat the image as a single word.\n" + " 9|circle_word Treat the image as a single word in a circle.\n" + " 10|single_char Treat the image as a single character.\n" + " 11|sparse_text Sparse text. Find as much text as possible in no" " particular order.\n" - " 12 Sparse text with OSD.\n" - " 13 Raw line. Treat the image as a single text line,\n" - " bypassing hacks that are Tesseract-specific.\n"; + " 12|sparse_text_osd Sparse text with OSD.\n" + " 13|raw_line Raw line. Treat the image as a single text line,\n" + " bypassing hacks that are Tesseract-specific.\n" + ); #ifdef DISABLED_LEGACY_ENGINE - const char *disabled_osd_msg = "\nNOTE: The OSD modes are currently disabled.\n"; - printf("%s%s", msg, disabled_osd_msg); -#else - printf("%s", msg); + printf("\nNOTE: The OSD modes are currently disabled.\n"); #endif } #ifndef DISABLED_LEGACY_ENGINE static void PrintHelpForOEM() { - const char *msg = - "OCR Engine modes:\n" - " 0 Legacy engine only.\n" - " 1 Neural nets LSTM engine only.\n" - " 2 Legacy + LSTM engines.\n" - " 3 Default, based on what is available.\n"; - - printf("%s", msg); + printf( + "OCR Engine modes (OEM):\n" + " 0|tesseract_only Legacy engine only.\n" + " 1|lstm_only Neural nets LSTM engine only.\n" + " 2|tesseract_lstm_combined Legacy + LSTM engines.\n" + " 3|default Default, based on what is available.\n" + ); } #endif // ndef DISABLED_LEGACY_ENGINE @@ -218,9 +218,9 @@ static void PrintHelpExtra(const char *program) { " -l LANG[+LANG] Specify language(s) used for OCR.\n" " -c VAR=VALUE Set value for config variables.\n" " Multiple -c arguments are allowed.\n" - " --psm NUM Specify page segmentation mode.\n" + " --psm PSM|NUM Specify page segmentation mode.\n" #ifndef DISABLED_LEGACY_ENGINE - " --oem NUM Specify OCR Engine mode.\n" + " --oem OEM|NUM Specify OCR Engine mode.\n" #endif "NOTE: These options must occur before any configfile.\n" "\n", @@ -332,12 +332,63 @@ static void FixPageSegMode(tesseract::TessBaseAPI &api, tesseract::PageSegMode p static bool checkArgValues(int arg, const char *mode, int count) { if (arg >= count || arg < 0) { - printf("Invalid %s value, please enter a number between 0-%d\n", mode, count - 1); + printf("Invalid %s value, please enter a symbolic %s value or a number between 0-%d\n", mode, mode, count - 1); return false; } return true; } +// Convert a symbolic or numeric string to an OEM value. +static int stringToOEM(const std::string arg) { + std::map oem_map = { + {"0", 0}, + {"1", 1}, + {"2", 2}, + {"3", 3}, + {"tesseract_only", 0}, + {"lstm_only", 1}, + {"tesseract_lstm_combined", 2}, + {"default", 3}, + }; + auto it = oem_map.find(arg); + return it == oem_map.end() ? -1 : it->second; +} + +static int stringToPSM(const std::string arg) { + std::map psm_map = { + {"0", 0}, + {"1", 1}, + {"2", 2}, + {"3", 3}, + {"4", 4}, + {"5", 5}, + {"6", 6}, + {"7", 7}, + {"8", 8}, + {"9", 9}, + {"10", 10}, + {"11", 11}, + {"12", 12}, + {"13", 13}, + {"osd_only", 0}, + {"auto_osd", 1}, + {"auto_only", 2}, + {"auto", 3}, + {"single_column", 4}, + {"single_block_vert_text", 5}, + {"single_block", 6}, + {"single_line", 7}, + {"single_word", 8}, + {"circle_word", 9}, + {"single_char", 10}, + {"sparse_text", 11}, + {"sparse_text_osd", 12}, + {"raw_line", 13}, + }; + auto it = psm_map.find(arg); + return it == psm_map.end() ? -1 : it->second; +} + // NOTE: arg_i is used here to avoid ugly *i so many times in this function static bool ParseArgs(int argc, char **argv, const char **lang, const char **image, const char **outputbase, const char **datapath, l_int32 *dpi, @@ -410,14 +461,15 @@ static bool ParseArgs(int argc, char **argv, const char **lang, const char **ima noocr = true; *list_langs = true; } else if (strcmp(argv[i], "--psm") == 0 && i + 1 < argc) { - if (!checkArgValues(atoi(argv[i + 1]), "PSM", tesseract::PSM_COUNT)) { + int psm = stringToPSM(argv[i + 1]); + if (!checkArgValues(psm, "PSM", tesseract::PSM_COUNT)) { return false; } - *pagesegmode = static_cast(atoi(argv[i + 1])); + *pagesegmode = static_cast(psm); ++i; } else if (strcmp(argv[i], "--oem") == 0 && i + 1 < argc) { #ifndef DISABLED_LEGACY_ENGINE - int oem = atoi(argv[i + 1]); + int oem = stringToOEM(argv[i + 1]); if (!checkArgValues(oem, "OEM", tesseract::OEM_COUNT)) { return false; } diff --git a/src/textord/baselinedetect.cpp b/src/textord/baselinedetect.cpp index 82df673b6e..dea1873593 100644 --- a/src/textord/baselinedetect.cpp +++ b/src/textord/baselinedetect.cpp @@ -34,6 +34,7 @@ #include "helpers.h" #include "linlsq.h" #include "makerow.h" +#include "tesserrstream.h" // for tesserr #include "textord.h" #include "tprintf.h" #include "underlin.h" @@ -671,9 +672,10 @@ bool BaselineBlock::ComputeLineSpacing() { } } if (debug_level_ > 0) { - tprintf("Spacing %g, in %zu rows, %d gaps fitted out of %d non-trivial\n", - line_spacing_, row_positions.size(), fitting_gaps, - non_trivial_gaps); + tesserr << "Spacing " << line_spacing_ << ", in " + << row_positions.size() << " rows, " + << fitting_gaps << " gaps fitted out of " + << non_trivial_gaps << " non-trivial\n"; } return fitting_gaps > non_trivial_gaps * kMinFittingLinespacings; } diff --git a/src/textord/blkocc.cpp b/src/textord/blkocc.cpp index 69e18f6a74..60283e744f 100644 --- a/src/textord/blkocc.cpp +++ b/src/textord/blkocc.cpp @@ -125,7 +125,7 @@ static void horizontal_cblob_projection( // project outlines /** * horizontal_coutline_projection * - * Compute the horizontal projection of a outline from its outlines + * Compute the horizontal projection of an outline from its outlines * and add to the given STATS. */ diff --git a/src/textord/devanagari_processing.cpp b/src/textord/devanagari_processing.cpp index bad6edb50f..13da1374c0 100644 --- a/src/textord/devanagari_processing.cpp +++ b/src/textord/devanagari_processing.cpp @@ -38,15 +38,16 @@ INT_VAR(devanagari_split_debuglevel, 0, "Debug level for split shiro-rekha proce BOOL_VAR(devanagari_split_debugimage, 0, "Whether to create a debug image for split shiro-rekha process."); -ShiroRekhaSplitter::ShiroRekhaSplitter() { - orig_pix_ = nullptr; - segmentation_block_list_ = nullptr; - splitted_image_ = nullptr; - global_xheight_ = kUnspecifiedXheight; - perform_close_ = false; - debug_image_ = nullptr; - pageseg_split_strategy_ = NO_SPLIT; - ocr_split_strategy_ = NO_SPLIT; +ShiroRekhaSplitter::ShiroRekhaSplitter() : + orig_pix_(nullptr), + splitted_image_(nullptr), + pageseg_split_strategy_(NO_SPLIT), + ocr_split_strategy_(NO_SPLIT), + debug_image_(nullptr), + segmentation_block_list_(nullptr), + global_xheight_(kUnspecifiedXheight), + perform_close_(false) +{ } ShiroRekhaSplitter::~ShiroRekhaSplitter() { diff --git a/src/textord/tabfind.cpp b/src/textord/tabfind.cpp index f6ba2b8477..fb6d99ae04 100644 --- a/src/textord/tabfind.cpp +++ b/src/textord/tabfind.cpp @@ -67,8 +67,8 @@ TabFind::TabFind(int gridsize, const ICOORD &bleft, const ICOORD &tright, TabVec : AlignedBlob(gridsize, bleft, tright) , resolution_(resolution) , image_origin_(0, tright.y() - 1) - , v_it_(&vectors_) { - width_cb_ = nullptr; + , v_it_(&vectors_) + , width_cb_(nullptr) { v_it_.add_list_after(vlines); SetVerticalSkewAndParallelize(vertical_x, vertical_y); using namespace std::placeholders; // for _1 diff --git a/src/textord/wordseg.cpp b/src/textord/wordseg.cpp index cb9202eb04..b3db3445c3 100644 --- a/src/textord/wordseg.cpp +++ b/src/textord/wordseg.cpp @@ -87,6 +87,7 @@ void make_single_word(bool one_blob, TO_ROW_LIST *rows, ROW_LIST *real_rows) { word->set_flag(W_EOL, true); word->set_flag(W_DONT_CHOP, one_blob); word_it.add_after_then_move(word); + real_row->recalc_bounding_box(); row_it.add_after_then_move(real_row); } } diff --git a/src/training/CMakeLists.txt b/src/training/CMakeLists.txt index 7fbf02120e..c764442070 100644 --- a/src/training/CMakeLists.txt +++ b/src/training/CMakeLists.txt @@ -126,7 +126,7 @@ install( ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) generate_export_header(common_training EXPORT_MACRO_NAME TESS_COMMON_TRAINING_API) -if (MSVC) +if (MSVC AND BUILD_SHARED_LIBS) install(FILES $ DESTINATION bin OPTIONAL) endif() project_group(common_training "Training Tools") @@ -298,7 +298,7 @@ if(ICU_FOUND) RUNTIME DESTINATION bin LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) - if (MSVC) + if (MSVC AND BUILD_SHARED_LIBS) install(FILES $ DESTINATION bin OPTIONAL) endif() generate_export_header(unicharset_training EXPORT_MACRO_NAME diff --git a/src/training/classifier_tester.cpp b/src/training/classifier_tester.cpp index 3c40a9ae30..6398dedf36 100644 --- a/src/training/classifier_tester.cpp +++ b/src/training/classifier_tester.cpp @@ -35,13 +35,13 @@ enum ClassifierName { CN_PRUNER, CN_FULL, CN_COUNT }; static const char *names[] = {"pruner", "full"}; -static tesseract::ShapeClassifier *InitializeClassifier(const char *classifer_name, +static tesseract::ShapeClassifier *InitializeClassifier(const char *classifier_name, const UNICHARSET &unicharset, int argc, char **argv, tesseract::TessBaseAPI **api) { // Decode the classifier string. ClassifierName classifier = CN_COUNT; for (int c = 0; c < CN_COUNT; ++c) { - if (strcmp(classifer_name, names[c]) == 0) { + if (strcmp(classifier_name, names[c]) == 0) { classifier = static_cast(c); break; } @@ -75,7 +75,7 @@ static tesseract::ShapeClassifier *InitializeClassifier(const char *classifer_na } else if (classifier == CN_FULL) { shape_classifier = new tesseract::TessClassifier(false, classify); } - tprintf("Testing classifier %s:\n", classifer_name); + tprintf("Testing classifier %s:\n", classifier_name); return shape_classifier; } diff --git a/src/training/combine_lang_model.cpp b/src/training/combine_lang_model.cpp index 3f97238d61..8ddfbd89b9 100644 --- a/src/training/combine_lang_model.cpp +++ b/src/training/combine_lang_model.cpp @@ -19,6 +19,7 @@ #include "commandlineflags.h" #include "commontraining.h" // CheckSharedLibraryVersion #include "lang_model_helpers.h" +#include "tesserrstream.h" // for tesserr #include "tprintf.h" #include "unicharset_training_utils.h" @@ -60,8 +61,8 @@ int main(int argc, char **argv) { tprintf("Failed to load unicharset from %s\n", FLAGS_input_unicharset.c_str()); return EXIT_FAILURE; } - tprintf("Loaded unicharset of size %zu from file %s\n", unicharset.size(), - FLAGS_input_unicharset.c_str()); + tesserr << "Loaded unicharset of size " << unicharset.size() + << " from file " << FLAGS_input_unicharset.c_str() << '\n'; // Set unichar properties tprintf("Setting unichar properties\n"); diff --git a/src/training/common/errorcounter.cpp b/src/training/common/errorcounter.cpp index 3d5a5bbb4f..5fb83cca29 100644 --- a/src/training/common/errorcounter.cpp +++ b/src/training/common/errorcounter.cpp @@ -23,6 +23,7 @@ #include "sampleiterator.h" #include "shapeclassifier.h" #include "shapetable.h" +#include "tesserrstream.h" #include "trainingsample.h" #include "trainingsampleset.h" #include "unicity_table.h" @@ -50,7 +51,10 @@ double ErrorCounter::ComputeErrorRate(ShapeClassifier *classifier, int report_le ErrorCounter counter(classifier->GetUnicharset(), fontsize); std::vector results; - clock_t start = clock(); + clock_t total_time = 0; + if (report_level > 1) { + total_time = clock(); + } unsigned total_samples = 0; double unscaled_error = 0.0; // Set a number of samples on which to run the classify debug mode. @@ -85,7 +89,6 @@ double ErrorCounter::ComputeErrorRate(ShapeClassifier *classifier, int report_le } ++total_samples; } - const double total_time = 1.0 * (clock() - start) / CLOCKS_PER_SEC; // Create the appropriate error report. unscaled_error = counter.ReportErrors(report_level, boosting_mode, fontinfo_table, *it, unichar_error, fonts_report); @@ -94,8 +97,9 @@ double ErrorCounter::ComputeErrorRate(ShapeClassifier *classifier, int report_le } if (report_level > 1 && total_samples > 0) { // It is useful to know the time in microseconds/char. - tprintf("Errors computed in %.2fs at %.1f μs/char\n", total_time, - 1000000.0 * total_time / total_samples); + total_time = 1000 * (clock() - total_time) / CLOCKS_PER_SEC; + tesserr << "Errors computed in " << total_time << " ms at " + << 1000 * total_time / total_samples << " μs/char\n"; } return unscaled_error; } diff --git a/src/training/common/trainingsampleset.cpp b/src/training/common/trainingsampleset.cpp index 118febb6a3..bf1e40ceb6 100644 --- a/src/training/common/trainingsampleset.cpp +++ b/src/training/common/trainingsampleset.cpp @@ -28,6 +28,7 @@ #include "intfeaturemap.h" #include "intfeaturespace.h" #include "shapetable.h" +#include "tesserrstream.h" // for tesserr #include "trainingsample.h" #include "trainingsampleset.h" #include "unicity_table.h" @@ -566,8 +567,9 @@ void TrainingSampleSet::OrganizeByFontAndClass() { int font_id = samples_[s]->font_id(); int class_id = samples_[s]->class_id(); if (font_id < 0 || font_id >= font_id_map_.SparseSize()) { - tprintf("Font id = %d/%d, class id = %d/%d on sample %zu\n", font_id, - font_id_map_.SparseSize(), class_id, unicharset_size_, s); + tesserr << "Font id = " << font_id << '/' << font_id_map_.SparseSize() + << ", class id = " << class_id << '/' << unicharset_size_ + << " on sample " << s << '\n'; } ASSERT_HOST(font_id >= 0 && font_id < font_id_map_.SparseSize()); ASSERT_HOST(class_id >= 0 && class_id < unicharset_size_); diff --git a/src/training/pango/boxchar.cpp b/src/training/pango/boxchar.cpp index d31c5a8b4e..35c1a2b7fb 100644 --- a/src/training/pango/boxchar.cpp +++ b/src/training/pango/boxchar.cpp @@ -23,6 +23,7 @@ #include "fileio.h" #include "normstrngs.h" +#include "tesserrstream.h" // for tesserr #include "tprintf.h" #include "unicharset.h" #include "unicode/uchar.h" // from libicu @@ -100,7 +101,7 @@ void BoxChar::PrepareToWrite(std::vector *boxes) { InsertSpaces(rtl_rules, vertical_rules, boxes); for (size_t i = 0; i < boxes->size(); ++i) { if ((*boxes)[i]->box_ == nullptr) { - tprintf("Null box at index %zu\n", i); + tesserr << "Null box at index " << i << '\n'; } } if (rtl_rules) { diff --git a/src/training/pango/pango_font_info.cpp b/src/training/pango/pango_font_info.cpp index 5adbb26f9e..aefe73417e 100644 --- a/src/training/pango/pango_font_info.cpp +++ b/src/training/pango/pango_font_info.cpp @@ -521,9 +521,9 @@ bool FontUtils::IsAvailableFont(const char *input_query_desc, std::string *best_ *best_match = selected_desc_str; // Clip the ending ' 0' if there is one. It seems that, if there is no // point size on the end of the fontname, then Pango always appends ' 0'. - int len = best_match->size(); + auto len = best_match->size(); if (len > 2 && best_match->at(len - 1) == '0' && best_match->at(len - 2) == ' ') { - *best_match = best_match->substr(0, len - 2); + best_match->resize(len - 2); } } g_free(selected_desc_str); diff --git a/src/training/unicharset/lstmtrainer.cpp b/src/training/unicharset/lstmtrainer.cpp index 0f6035f609..eeb7943535 100644 --- a/src/training/unicharset/lstmtrainer.cpp +++ b/src/training/unicharset/lstmtrainer.cpp @@ -36,9 +36,6 @@ #include "networkbuilder.h" #include "ratngs.h" #include "recodebeam.h" -#ifdef INCLUDE_TENSORFLOW -# include "tfnetwork.h" -#endif #include "tprintf.h" namespace tesseract { @@ -186,23 +183,6 @@ bool LSTMTrainer::InitNetwork(const char *network_spec, int append_index, return true; } -// Initializes a trainer from a serialized TFNetworkModel proto. -// Returns the global step of TensorFlow graph or 0 if failed. -#ifdef INCLUDE_TENSORFLOW -int LSTMTrainer::InitTensorFlowNetwork(const std::string &tf_proto) { - delete network_; - TFNetwork *tf_net = new TFNetwork("TensorFlow"); - training_iteration_ = tf_net->InitFromProtoStr(tf_proto); - if (training_iteration_ == 0) { - tprintf("InitFromProtoStr failed!!\n"); - return 0; - } - network_ = tf_net; - ASSERT_HOST(recoder_.code_range() == tf_net->num_classes()); - return training_iteration_; -} -#endif - // Resets all the iteration counters for fine tuning or traininng a head, // where we want the error reporting to reset. void LSTMTrainer::InitIterations() { diff --git a/src/training/unicharset/lstmtrainer.h b/src/training/unicharset/lstmtrainer.h index 1d90423a48..e10514c6dc 100644 --- a/src/training/unicharset/lstmtrainer.h +++ b/src/training/unicharset/lstmtrainer.h @@ -119,10 +119,6 @@ class TESS_UNICHARSET_TRAINING_API LSTMTrainer : public LSTMRecognizer { bool InitNetwork(const char *network_spec, int append_index, int net_flags, float weight_range, float learning_rate, float momentum, float adam_beta); - // Initializes a trainer from a serialized TFNetworkModel proto. - // Returns the global step of TensorFlow graph or 0 if failed. - // Building a compatible TF graph: See tfnetwork.proto. - int InitTensorFlowNetwork(const std::string &tf_proto); // Resets all the iteration counters for fine tuning or training a head, // where we want the error reporting to reset. void InitIterations(); diff --git a/src/training/unicharset/unicharset_training_utils.cpp b/src/training/unicharset/unicharset_training_utils.cpp index 853bee8e8e..a6e38a90cd 100644 --- a/src/training/unicharset/unicharset_training_utils.cpp +++ b/src/training/unicharset/unicharset_training_utils.cpp @@ -2,7 +2,6 @@ // File: unicharset_training_utils.cpp // Description: Training utilities for UNICHARSET. // Author: Ray Smith -// Created: Fri Oct 17 17:09:01 PDT 2014 // // (C) Copyright 2014, Google Inc. // Licensed under the Apache License, Version 2.0 (the "License"); @@ -29,6 +28,7 @@ #include "icuerrorcode.h" #include "normstrngs.h" #include "statistc.h" +#include "tesserrstream.h" // for tesserr #include "unicharset.h" #include "unicode/uchar.h" // from libicu #include "unicode/uscript.h" // from libicu @@ -189,8 +189,8 @@ void SetPropertiesForInputFile(const std::string &script_dir, // Load the input unicharset unicharset.load_from_file(input_unicharset_file.c_str()); - tprintf("Loaded unicharset of size %zu from file %s\n", unicharset.size(), - input_unicharset_file.c_str()); + tesserr << "Loaded unicharset of size " << unicharset.size() + << " from file " << input_unicharset_file << '\n'; // Set unichar properties tprintf("Setting unichar properties\n"); diff --git a/src/viewer/scrollview.cpp b/src/viewer/scrollview.cpp index f5336c589a..73bd3c5fc2 100644 --- a/src/viewer/scrollview.cpp +++ b/src/viewer/scrollview.cpp @@ -307,7 +307,7 @@ void ScrollView::Initialize(const char *name, int x_pos, int y_pos, int x_size, // Set up an actual Window on the client side. char message[kMaxMsgSize]; snprintf(message, sizeof(message), - "w%u = luajava.newInstance('com.google.scrollview.ui" + "w%d = luajava.newInstance('com.google.scrollview.ui" ".SVWindow','%s',%u,%u,%u,%u,%u,%u,%u)\n", window_id_, window_name_, window_id_, x_pos, y_pos, x_size, y_size, x_canvas_size, y_canvas_size); @@ -393,7 +393,7 @@ void ScrollView::SendMsg(const char *format, ...) { va_end(args); char form[kMaxMsgSize]; - snprintf(form, sizeof(form), "w%u:%s\n", window_id_, message); + snprintf(form, sizeof(form), "w%d:%s\n", window_id_, message); stream_->Send(form); } @@ -532,7 +532,7 @@ void ScrollView::AlwaysOnTop(bool b) { // Adds a message entry to the message box. void ScrollView::AddMessage(const char *message) { char form[kMaxMsgSize]; - snprintf(form, sizeof(form), "w%u:%s", window_id_, message); + snprintf(form, sizeof(form), "w%d:%s", window_id_, message); char *esc = AddEscapeChars(form); SendMsg("addMessage(\"%s\")", esc); diff --git a/src/viewer/svutil.cpp b/src/viewer/svutil.cpp index 7fe6825988..3acce98040 100644 --- a/src/viewer/svutil.cpp +++ b/src/viewer/svutil.cpp @@ -273,8 +273,6 @@ SVNetwork::SVNetwork(const char *hostname, int port) { buffer_ptr_ = nullptr; - struct addrinfo *addr_info = nullptr; - struct addrinfo hints = {0, PF_INET, SOCK_STREAM}; auto port_string = std::to_string(port); # ifdef _WIN32 // Initialize Winsock @@ -285,6 +283,10 @@ SVNetwork::SVNetwork(const char *hostname, int port) { } # endif // _WIN32 + struct addrinfo *addr_info = nullptr; + struct addrinfo hints = {}; + hints.ai_family = AF_INET; + hints.ai_socktype = SOCK_STREAM; if (getaddrinfo(hostname, port_string.c_str(), &hints, &addr_info) != 0) { std::cerr << "Error resolving name for ScrollView host " << std::string(hostname) << ":" << port << std::endl; diff --git a/src/wordrec/associate.h b/src/wordrec/associate.h index 0559d9ae1b..2df85085ea 100644 --- a/src/wordrec/associate.h +++ b/src/wordrec/associate.h @@ -55,13 +55,13 @@ struct AssociateStats { float shape_cost; // cost of blob shape bool bad_shape; // true if the shape of the blob is unacceptable - float full_wh_ratio; // width-to-hight ratio + gap on the right - float full_wh_ratio_total; // sum of width-to-hight ratios + float full_wh_ratio; // width-to-height ratio + gap on the right + float full_wh_ratio_total; // sum of width-to-height ratios // on the path terminating at this blob float full_wh_ratio_var; // variance of full_wh_ratios on the path bool bad_fixed_pitch_right_gap; // true if there is no gap before // the blob on the right - bool bad_fixed_pitch_wh_ratio; // true if the blobs has width-to-hight + bool bad_fixed_pitch_wh_ratio; // true if the blobs has width-to-height // ratio > kMaxFixedPitchCharAspectRatio int gap_sum; // sum of gaps within the blob }; diff --git a/src/wordrec/language_model.h b/src/wordrec/language_model.h index 010cf5b4f4..98f90f748a 100644 --- a/src/wordrec/language_model.h +++ b/src/wordrec/language_model.h @@ -227,7 +227,7 @@ class LanguageModel { // (used by ComputeNgramCost()). float ComputeDenom(BLOB_CHOICE_LIST *curr_list); - // Fills the given consistenty_info based on parent_vse.consistency_info + // Fills the given consistency_info based on parent_vse.consistency_info // and on the consistency of the given unichar_id with parent_vse. void FillConsistencyInfo(int curr_col, bool word_end, BLOB_CHOICE *b, ViterbiStateEntry *parent_vse, WERD_RES *word_res, diff --git a/src/wordrec/tface.cpp b/src/wordrec/tface.cpp index 06b9e9275a..107085d83c 100644 --- a/src/wordrec/tface.cpp +++ b/src/wordrec/tface.cpp @@ -97,7 +97,7 @@ int Wordrec::dict_word(const WERD_CHOICE &word) { void Wordrec::set_pass1() { chop_ok_split.set_value(70.0); language_model_->getParamsModel().SetPass(ParamsModel::PTRAIN_PASS1); - SettupPass1(); + SetupPass1(); } /** @@ -108,7 +108,7 @@ void Wordrec::set_pass1() { void Wordrec::set_pass2() { chop_ok_split.set_value(pass2_ok_split); language_model_->getParamsModel().SetPass(ParamsModel::PTRAIN_PASS2); - SettupPass2(); + SetupPass2(); } /** diff --git a/src/wordrec/wordrec.cpp b/src/wordrec/wordrec.cpp index ceaad04431..be6590688a 100644 --- a/src/wordrec/wordrec.cpp +++ b/src/wordrec/wordrec.cpp @@ -99,10 +99,10 @@ Wordrec::Wordrec() "Save alternative paths found during chopping" " and segmentation search", params()) - , pass2_ok_split(0.0f) { - prev_word_best_choice_ = nullptr; - language_model_ = std::make_unique(&get_fontinfo_table(), &(getDict())); - fill_lattice_ = nullptr; + , language_model_(std::make_unique(&get_fontinfo_table(), &(getDict()))) + , pass2_ok_split(0.0f) + , prev_word_best_choice_(nullptr) + , fill_lattice_(nullptr) { } } // namespace tesseract diff --git a/tesseract.pc.cmake b/tesseract.pc.cmake index 5469a3987a..dd801aeb13 100644 --- a/tesseract.pc.cmake +++ b/tesseract.pc.cmake @@ -1,13 +1,13 @@ prefix=@CMAKE_INSTALL_PREFIX@ -exec_prefix=${prefix}/bin -libdir=${prefix}/@CMAKE_INSTALL_LIBDIR@ -includedir=${prefix}/include +exec_prefix=@CMAKE_INSTALL_PREFIX@ +libdir=@CMAKE_INSTALL_FULL_LIBDIR@ +includedir=@CMAKE_INSTALL_FULL_INCLUDEDIR@ Name: @tesseract_NAME@ -Description: An OCR Engine that was developed at HP Labs between 1985 and 1995... and now at Google. +Description: An OCR Engine that was developed at HP Labs (1985-1995) and Google (2006-2018). URL: https://github.com/tesseract-ocr/tesseract Version: @tesseract_VERSION@ Requires.private: lept -Libs: -L${libdir} -l@tesseract_OUTPUT_NAME@ @libarchive_LIBS@ @libcurl_LIBS@ @TENSORFLOW_LIBS@ +Libs: -L${libdir} -l@tesseract_OUTPUT_NAME@ @libarchive_LIBS@ @libcurl_LIBS@ Libs.private: Cflags: -I${includedir} diff --git a/tesseract.pc.in b/tesseract.pc.in index 81b1531f7a..59f606ce4e 100644 --- a/tesseract.pc.in +++ b/tesseract.pc.in @@ -7,10 +7,10 @@ libdir=@libdir@ includedir=@includedir@ Name: @PACKAGE_NAME@ -Description: An OCR Engine that was developed at HP Labs between 1985 and 1995... and now at Google. +Description: An OCR Engine that was developed at HP Labs (1985-1995) and Google (2006-2018). URL: https://github.com/tesseract-ocr/tesseract Version: @VERSION@ Requires.private: lept -Libs: -L${libdir} -ltesseract @libarchive_LIBS@ @libcurl_LIBS@ @TENSORFLOW_LIBS@ +Libs: -L${libdir} -ltesseract @libarchive_LIBS@ @libcurl_LIBS@ Libs.private: -lpthread Cflags: -I${includedir} diff --git a/unittest/fuzzers/oss-fuzz-build.sh b/unittest/fuzzers/oss-fuzz-build.sh index 5c7a37bb49..491e19ee00 100755 --- a/unittest/fuzzers/oss-fuzz-build.sh +++ b/unittest/fuzzers/oss-fuzz-build.sh @@ -32,7 +32,7 @@ mkdir -p "$OUT"/tessdata ( cd "$OUT"/tessdata test -f eng.traineddata || \ - curl -L -O https://github.com/tesseract-ocr/tessdata/raw/main/eng.traineddata + curl -sSL -O https://github.com/tesseract-ocr/tessdata/raw/main/eng.traineddata ) # OSS-Fuzz requires static linking for the project specific libraries, diff --git a/unittest/mastertrainer_test.cc b/unittest/mastertrainer_test.cc index b6dcc5bb5a..30f4bf4e04 100644 --- a/unittest/mastertrainer_test.cc +++ b/unittest/mastertrainer_test.cc @@ -156,9 +156,9 @@ class MasterTrainerTest : public testing::Test { return file::JoinPath(FLAGS_test_tmpdir, name); } - MasterTrainerTest() { - shape_table_ = nullptr; - master_trainer_ = nullptr; + MasterTrainerTest() : + shape_table_(nullptr), + master_trainer_(nullptr) { } ~MasterTrainerTest() override { delete shape_table_; diff --git a/unittest/pagesegmode_test.cc b/unittest/pagesegmode_test.cc index 9689e407e1..781e67d3f9 100644 --- a/unittest/pagesegmode_test.cc +++ b/unittest/pagesegmode_test.cc @@ -9,13 +9,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -#if defined(_WIN32) -# include // for _access -#else -# include // for access -#endif #include #include +#include #include #include "helpers.h" #include "include_gunit.h" @@ -24,15 +20,6 @@ namespace tesseract { -// Replacement for std::filesystem::exists (C++-17) -static bool file_exists(const char *filename) { -#if defined(_WIN32) - return _access(filename, 0) == 0; -#else - return access(filename, 0) == 0; -#endif -} - // The fixture for testing Tesseract. class PageSegModeTest : public testing::Test { protected: @@ -86,7 +73,7 @@ class PageSegModeTest : public testing::Test { // and differently to line and block mode. TEST_F(PageSegModeTest, WordTest) { std::string filename = file::JoinPath(TESTING_DIR, "segmodeimg.tif"); - if (!file_exists(filename.c_str())) { + if (!std::filesystem::exists(filename)) { LOG(INFO) << "Skip test because of missing " << filename << '\n'; GTEST_SKIP(); } else { diff --git a/unittest/tatweel_test.cc b/unittest/tatweel_test.cc index d0d8f2ae6f..10e673b217 100644 --- a/unittest/tatweel_test.cc +++ b/unittest/tatweel_test.cc @@ -9,12 +9,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#if defined(_WIN32) -# include // for _access -#else -# include // for access -#endif - +#include #include "dawg.h" #include "include_gunit.h" #include "trie.h" @@ -23,15 +18,6 @@ namespace tesseract { -// Replacement for std::filesystem::exists (C++-17) -static bool file_exists(const char *filename) { -#if defined(_WIN32) - return _access(filename, 0) == 0; -#else - return access(filename, 0) == 0; -#endif -} - class TatweelTest : public ::testing::Test { protected: void SetUp() override { @@ -41,7 +27,7 @@ class TatweelTest : public ::testing::Test { TatweelTest() { std::string filename = TestDataNameToPath("ara.wordlist"); - if (file_exists(filename.c_str())) { + if (std::filesystem::exists(filename)) { std::string wordlist("\u0640"); CHECK_OK(file::GetContents(filename, &wordlist, file::Defaults())); // Put all the unicodes in the unicharset_. @@ -77,7 +63,7 @@ TEST_F(TatweelTest, DictIgnoresTatweel) { // This test verifies that the dictionary ignores the Tatweel character. tesseract::Trie trie(tesseract::DAWG_TYPE_WORD, "ara", SYSTEM_DAWG_PERM, unicharset_.size(), 0); std::string filename = TestDataNameToPath("ara.wordlist"); - if (!file_exists(filename.c_str())) { + if (!std::filesystem::exists(filename)) { LOG(INFO) << "Skip test because of missing " << filename; GTEST_SKIP(); } else { @@ -91,7 +77,7 @@ TEST_F(TatweelTest, UnicharsetLoadKeepsTatweel) { // This test verifies that a load of an existing unicharset keeps any // existing tatweel for backwards compatibility. std::string filename = TestDataNameToPath("ara.unicharset"); - if (!file_exists(filename.c_str())) { + if (!std::filesystem::exists(filename)) { LOG(INFO) << "Skip test because of missing " << filename; GTEST_SKIP(); } else { diff --git a/unittest/third_party/googletest b/unittest/third_party/googletest index e2239ee604..b514bdc898 160000 --- a/unittest/third_party/googletest +++ b/unittest/third_party/googletest @@ -1 +1 @@ -Subproject commit e2239ee6043f73722e7aa812a459f54a28552929 +Subproject commit b514bdc898e2951020cbdca1304b75f5950d1f59 diff --git a/unittest/third_party/utf/rune.c b/unittest/third_party/utf/rune.c index 4b4f069742..6c4801141c 100644 --- a/unittest/third_party/utf/rune.c +++ b/unittest/third_party/utf/rune.c @@ -12,9 +12,9 @@ * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. */ #include +#include #include #include "third_party/utf/utf.h" -#include "third_party/utf/utfdef.h" enum { Bit1 = 7, @@ -74,7 +74,7 @@ int charntorune(Rune *rune, const char *str, int length) { * one character sequence (7-bit value) * 00000-0007F => T1 */ - c = *(uchar *)str; + c = *(uint8_t *)str; if (c < Tx) { *rune = c; return 1; @@ -89,7 +89,7 @@ int charntorune(Rune *rune, const char *str, int length) { * two character sequence (11-bit value) * 0080-07FF => T2 Tx */ - c1 = *(uchar *)(str + 1) ^ Tx; + c1 = *(uint8_t *)(str + 1) ^ Tx; if (c1 & Testx) goto bad; if (c < T3) { @@ -111,7 +111,7 @@ int charntorune(Rune *rune, const char *str, int length) { * three character sequence (16-bit value) * 0800-FFFF => T3 Tx Tx */ - c2 = *(uchar *)(str + 2) ^ Tx; + c2 = *(uint8_t *)(str + 2) ^ Tx; if (c2 & Testx) goto bad; if (c < T4) { @@ -129,7 +129,7 @@ int charntorune(Rune *rune, const char *str, int length) { * four character sequence (21-bit value) * 10000-1FFFFF => T4 Tx Tx Tx */ - c3 = *(uchar *)(str + 3) ^ Tx; + c3 = *(uint8_t *)(str + 3) ^ Tx; if (c3 & Testx) goto bad; if (c < T5) { @@ -168,7 +168,7 @@ int chartorune(Rune *rune, const char *str) { * one character sequence * 00000-0007F => T1 */ - c = *(uchar *)str; + c = *(uint8_t *)str; if (c < Tx) { *rune = c; return 1; @@ -178,7 +178,7 @@ int chartorune(Rune *rune, const char *str) { * two character sequence * 0080-07FF => T2 Tx */ - c1 = *(uchar *)(str + 1) ^ Tx; + c1 = *(uint8_t *)(str + 1) ^ Tx; if (c1 & Testx) goto bad; if (c < T3) { @@ -195,7 +195,7 @@ int chartorune(Rune *rune, const char *str) { * three character sequence * 0800-FFFF => T3 Tx Tx */ - c2 = *(uchar *)(str + 2) ^ Tx; + c2 = *(uint8_t *)(str + 2) ^ Tx; if (c2 & Testx) goto bad; if (c < T4) { @@ -210,7 +210,7 @@ int chartorune(Rune *rune, const char *str) { * four character sequence (21-bit value) * 10000-1FFFFF => T4 Tx Tx Tx */ - c3 = *(uchar *)(str + 3) ^ Tx; + c3 = *(uint8_t *)(str + 3) ^ Tx; if (c3 & Testx) goto bad; if (c < T5) { @@ -304,7 +304,7 @@ int runelen(Rune rune) { int runenlen(const Rune *r, int nrune) { int nb; - ulong c; /* Rune is signed, so use unsigned for range check. */ + unsigned long c; /* Rune is signed, so use unsigned for range check. */ nb = 0; while (nrune--) { @@ -325,7 +325,7 @@ int runenlen(const Rune *r, int nrune) { int fullrune(const char *str, int n) { if (n > 0) { - int c = *(uchar *)str; + int c = *(uint8_t *)str; if (c < Tx) return 1; if (n > 1) { diff --git a/unittest/third_party/utf/utfdef.h b/unittest/third_party/utf/utfdef.h deleted file mode 100644 index deaf396b32..0000000000 --- a/unittest/third_party/utf/utfdef.h +++ /dev/null @@ -1,14 +0,0 @@ -#define uchar _utfuchar -#define ushort _utfushort -#define uint _utfuint -#define ulong _utfulong -#define vlong _utfvlong -#define uvlong _utfuvlong - -typedef unsigned char uchar; -typedef unsigned short ushort; -typedef unsigned int uint; -typedef unsigned long ulong; - -#define nelem(x) (sizeof(x) / sizeof((x)[0])) -#define nil ((void *)0)