diff --git a/.github/workflows/autotools-openmp.yml b/.github/workflows/autotools-openmp.yml index 4719eb12db..f92d7fd69b 100644 --- a/.github/workflows/autotools-openmp.yml +++ b/.github/workflows/autotools-openmp.yml @@ -13,7 +13,7 @@ jobs: fail-fast: false matrix: config: - - { name: 20.04-openmp, os: ubuntu-20.04 } + - { name: 24.04-openmp, os: ubuntu-24.04 } - { name: 22.04-openmp, os: ubuntu-22.04 } steps: diff --git a/.github/workflows/autotools.yml b/.github/workflows/autotools.yml index b657556fb8..c5a9ca07e3 100644 --- a/.github/workflows/autotools.yml +++ b/.github/workflows/autotools.yml @@ -15,10 +15,9 @@ jobs: config: - { name: ubuntu-22.04-clang-15-autotools, os: ubuntu-22.04, cxx: clang++-15 } #installed + - { name: ubuntu-24.04-gcc-14-autotools, os: ubuntu-24.04, cxx: g++-14 } #installed - { name: ubuntu-22.04-gcc-12-autotools, os: ubuntu-22.04, cxx: g++-12 } #installed - { name: ubuntu-22.04-gcc-11-autotools, os: ubuntu-22.04, cxx: g++-11 } #installed - - { name: ubuntu-20.04-gcc-10-autotools, os: ubuntu-20.04, cxx: g++-10 } #installed - - { name: ubuntu-20.04-gcc-9-autotools, os: ubuntu-20.04, cxx: g++-9 } #installed steps: - uses: actions/checkout@v4 diff --git a/.github/workflows/cmake-win64.yml b/.github/workflows/cmake-win64.yml index 261814fbb2..9f450370c6 100644 --- a/.github/workflows/cmake-win64.yml +++ b/.github/workflows/cmake-win64.yml @@ -4,12 +4,12 @@ name: cmake-win64 on: #push: schedule: - - cron: 0 23 * * * + - cron: 0 5 * * * workflow_dispatch: env: ILOC: d:/a/local - png_ver: 1643 + png_ver: 1644 jobs: build: @@ -37,9 +37,9 @@ jobs: run: | mkdir ${{env.ILOC}} - - name: Uninstall Perl - run: | - choco uninstall strawberryperl + #- name: Uninstall Perl + # run: | + # choco uninstall strawberryperl - name: Build and Install zlib-ng shell: cmd diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index e723871178..380bdefab2 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -19,11 +19,8 @@ jobs: - { name: macos-15-clang-cmake, os: macos-15, cxx: clang++ } # default - { name: ubuntu-22.04-clang-15-cmake, os: ubuntu-22.04, cxx: clang++-15 } #installed - + - { name: ubuntu-24.04-gcc-12-cmake, os: ubuntu-24.04, cxx: g++-14 } #installed - { name: ubuntu-22.04-gcc-12-cmake, os: ubuntu-22.04, cxx: g++-12 } #installed - - { name: ubuntu-22.04-gcc-11-cmake, os: ubuntu-22.04, cxx: g++-11 } #installed - - { name: ubuntu-20.04-gcc-10-cmake, os: ubuntu-20.04, cxx: g++-10 } #installed - - { name: ubuntu-20.04-gcc-9-cmake, os: ubuntu-20.04, cxx: g++-9 } #installed steps: - name: Install compilers on Linux diff --git a/.github/workflows/unittest-disablelegacy.yml b/.github/workflows/unittest-disablelegacy.yml index 12daaa84fb..cfa206f0e4 100644 --- a/.github/workflows/unittest-disablelegacy.yml +++ b/.github/workflows/unittest-disablelegacy.yml @@ -14,8 +14,8 @@ jobs: strategy: fail-fast: false matrix: - compiler: [ g++, clang++-15 ] - os: [ ubuntu-22.04 ] + compiler: [ g++, clang++-18 ] + os: [ ubuntu-24.04 ] steps: - uses: actions/checkout@v4 diff --git a/.github/workflows/unittest.yml b/.github/workflows/unittest.yml index 2e268b6d0f..c0be830072 100644 --- a/.github/workflows/unittest.yml +++ b/.github/workflows/unittest.yml @@ -3,6 +3,14 @@ name: unittest # ubuntu-20.04-gcc-unittest - CI runs out of diskspace. on: #push: + pull_request: + paths: + - '**.cpp' + - '**.h' + - '**Makefile.am' + - '/configure.ac' + - 'unittest/**.c' + - 'unittest/**.cc' schedule: - cron: 0 0 * * * workflow_dispatch: @@ -15,7 +23,7 @@ jobs: fail-fast: false matrix: config: - - { name: ubuntu-20.04-gcc-unittest, os: ubuntu-20.04, cxx: g++, cxxflags: '-g -O2 -fsanitize=address,undefined' } + - { name: ubuntu-24.04-gcc-unittest, os: ubuntu-24.04, cxx: g++, cxxflags: '-g -O2 -fsanitize=address,undefined' } - { name: ubuntu-22.04-clang-unittest, os: ubuntu-22.04, cxx: clang++, cxxflags: '-g -O2 -fsanitize=address,undefined -stdlib=libc++' } steps: - uses: actions/checkout@v4 diff --git a/CMakeLists.txt b/CMakeLists.txt index 6cb5a6c84e..6727af290b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -282,7 +282,7 @@ endif() # Compiler specific environment if(CMAKE_COMPILER_IS_GNUCXX OR MINGW) set(CMAKE_CXX_FLAGS_DEBUG - "${CMAKE_CXX_FLAGS_DEBUG} -Wall -DDEBUG -pedantic -Og") + "${CMAKE_CXX_FLAGS_DEBUG} -Wall -DDEBUG -pedantic -Og -Wno-unknown-pragmas") elseif(MSVC) add_definitions(-D_CRT_SECURE_NO_WARNINGS) add_definitions(-D_CRT_NONSTDC_NO_DEPRECATE) # strdup @@ -306,6 +306,10 @@ elseif(MSVC) set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded$<$:Debug>") message(STATUS "Building with static CRT.") endif() + # Workaround: When building on VS 2022 17.10 or newer, but using an older runtime, + # mutexes can crash + # https://stackoverflow.com/questions/78598141/first-stdmutexlock-crashes-in-application-built-with-latest-visual-studio + add_definitions(-D_DISABLE_CONSTEXPR_MUTEX_CONSTRUCTOR) endif() if(CLANG) # clang all platforms set(CMAKE_CXX_FLAGS_RELEASE @@ -320,7 +324,10 @@ if(OPENMP_BUILD set(OPENMP_BUILD OFF) endif() if(OPENMP_BUILD) - find_package(OpenMP QUIET) + if(MSVC) # supported from cmake 3.30 + set(OpenMP_RUNTIME_MSVC "llvm") + endif(MSVC) + find_package(OpenMP) # https://stackoverflow.com/questions/12399422 # how-to-set-linker-flags-for-openmp-in-cmakes-try-compile-function if(NOT OpenMP_FOUND @@ -519,6 +526,7 @@ message(STATUS "General configuration for Tesseract ${PACKAGE_VERSION}") message(STATUS "--------------------------------------------------------") message(STATUS "Build type: ${CMAKE_BUILD_TYPE} ${BUILD_ARCH}") message(STATUS "Compiler: ${CMAKE_CXX_COMPILER_ID}") +message(STATUS "Compiler version: ${CMAKE_CXX_COMPILER_VERSION}") message(STATUS "Used standard: C++${CMAKE_CXX_STANDARD}") message(STATUS "CXX compiler options: ${COMPILER_FLAGS}") get_directory_property(DirCompDefs COMPILE_DEFINITIONS) @@ -891,7 +899,9 @@ if(BUILD_TESTS AND EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/unittest/third_party/googletest/CMakeLists.txt ) + enable_testing() add_subdirectory(unittest/third_party/googletest) + add_subdirectory(unittest) endif() if(BUILD_TRAINING_TOOLS) diff --git a/ChangeLog b/ChangeLog index 9e7ec162cf..87f713a227 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,466 +1,7 @@ -2024-11-10 - V5.5.0 -* Set hOCR capabilities ocrp_dir and ocrp_lang unconditionally. -* Calculate row bounding box in single-word mode per (issue #4304). -* Reduce clock syscalls (#4303). -* Several small performance and other code fixes. -* Modernized code. -* Print time for tessedit_timing_debug in milliseconds. -* Print time for ErrorCounter::ComputeErrorRate in milliseconds. -* cmake: Correctly set the soversion based on SemVer properties. -* Do not export PDBs for static libraries (issue #4279). -* Several other small fixes and improvements for builds and CI. -* Modernize code for renderers and remove filename conversion for Windows (#4330). -* Add build rule for Windows installer. -* Support symbolic values for --oem and --psm options. -* Remove Tensorflow support. -* Add RISC-V V support (#4346). -* Remove broken GitHub action msys2-4.1.1. +The ChangeLog for all releases from 1.0 (2006-06-16) up to 5.0.0 (2024-11-10) +is available here: -2024-06-11 - V5.4.1 -* Avoid FP overflow in NormEvidenceOf (fixes issue #4257) (#4259) -* Small build fixes and code improvements (#4262, #4263, #4266, #4267) +https://github.com/tesseract-ocr/tesseract/blob/64eab6c457b2337dd690746a5fde5c222b40d5f8/ChangeLog -2024-06-06 - V5.4.0 -* Small build fixes and code improvements - (#4241, #4243, #4244, #4245, #4246, #4248, #4249, #4250, #4253) +See https://github.com/tesseract-ocr/tesseract/releases for the latest release notes. -2024-05-19 - V5.4.0-rc2 -* Fix setup of datadir on installations with Conda (issue #4230) (#4240) -* Fix FP exception in Wordrec::angle_change (issue #4242) (#4243) - -2024-05-12 - V5.4.0-rc1 -* Build fixes, code refactoring and other smaller changes. -* Fix grey result of indexed PNG in pdfrenderer. -* Rename frk -> deu_latf (ISO 639-3, ISO 15924). -* Remove broken Dockerfile. -* Fixes for several issues reported by Coverity Scan. -* Remove unsupported OpenCL code and related API functions (#4220). -* Facilitate vectorization for generic build (#4223). -* Add PAGE XML renderer / export (#4214). -* Support training without lstmf files. -* Improve CCUtil::main_setup (fixes issue #4230 related to Coda). -* Allow for text angle/gradient to be retrieved (#4070). - -2024-01-18 - V5.3.4 -* Fixes for scrollview -* Fixes for autoconf, clang and sw builds -* Improve OCR for an image URL - * Fail on curl download errors - * New parameter curl_cookiefile - * Set User-Agent: header field in HTTP request for curl downloads -* Output directory list from "combine_tessdata -d" to stdout -* Other small improvements for code and documentation. - -2023-10-05 - V5.3.3 -* Small code fixes and improvements to fix Coverity Scan issues. -* Disable -mfpu=neon for aarch64. -* Fix build without git clone in cloned directory (required for FreeBSD). -* Other build fixes for autotools, cmake and sw. -* Fix regression in layout detection which was introduced in release 5.0.0. -* Fix regression which prevented loading of submodels, introduced in release 5.0.0-rc2. -* Other small improvements for code and documentation. - -2023-07-11 - V5.3.2 -* Updates for snap package building. -* Support for Sgaw and W Pwo Karen languages in the Myanmar validator (#4065). -* Improve format of logging from lstmtraining. -* Use less digits in filenames of checkpoints written by lstmtraining. -* Replace deprecated sprintf. -* Remove unused code in function fix_rep_char. -* Avoid 32 bit overflow in multiplication (fixes 3 CodeQL CI alerts). -* Avoid conversions from std::string to char* to std::string. -* Abort with error message if OSD is requested with LSTM-only model. -* cmake: allow to disable tiff (-DDISABLE_TIFF=ON). -* cmake: provide info about disabled LibArchive and CURL. -* cmake: check if leptonica was build with tiff support. -* Remove old broken GitHub action vcpkg-4.1.1 (fixes issue #4078). -* Create config.yml. -* Fix typos. - -2023-04-01 - V5.3.1 - * Bug fixes for some special scenarios: - * Fix issue #4010. - * textord: Catch empty rows in block iterator (fixes #4039). - * Fix FP division by zero (issue #3995). - * Improve documentation and log messages. - * Build fixes and improvements (mainly for cmake). - -2022-12-22 - V5.3.0 - * Minor updates for documentation and cmake builds. - -2022-12-13 - V5.3.0-rc1 - * Fix the training tools for the legacy OCR engine (fix issue #3925). - * PDF renderer: Ignore non-text blocks (fix issue #3957). - * Remove colormap before thresholding (fix issue #3940). - * Fix a number of performance issues reported by Coverity Scan. - * Training tools: Replace call of exit function by return statement in main function. - * Fix double free in function vigorous_noise_removal (fix issue #3876). - * Create to_win if needed in Textord::make_spline_rows (fix issue #3875). - * Bug fixes for ScrollView viewer: - * Fix memory issues in ScrollView::MessageReceiver. - * Catch potential nullptr in SVNetwork::SVNetwork. - * Move svpaint.cpp from src/viewer to src/. - * Add rule for svpaint executable in Autotools. - * Bug fixes and improvements for build tools: - * Fix AMD64 detection with autobuild on FreeBSD (fix issue #3964). - * Fix tesseract.pc generated from CMake to match Autotools. - * Detect availability of AVX512-VNNI. - * configure.ac: fix build on aarch64_be. - * Drop CI for old versions of macOS and Ubuntu. - -2022-07-06 - V5.2.0 - * Improvements and fixes for continuous integration, - autoconf and cmake builds. - * Set /Os for some 32 bit MS compilers (fixes #3769). - * Improve comments and other documentation. - * Add initial support for Intel AVX512F. - * Fix for very large PDF files on 32 bit hosts (fixes #3805). - * Fix NEON detection on FreeBSD. - * Fix regression with UZN files (fixes #3837). - * Fix calling delete[] for memory allocated by malloc in C API. - * Add an API function to init tesseract with traineddata from memory - (fixes #3691). - * Replace direct access to Leptonica internal data structures by - function calls and support latest releases of Leptonica. - * Replace std::regex by std::string functions (fixes issue #3830). - * Use compiled-in TESSDATA_PREFIX also on Windows (fixes #3767). - * Add new parameter 'invert_threshold', change the default threshold - from 0.5 to 0.7 and mark parameter 'tessedit_do_invert' as deprecated. - -2022-03-01 - V5.1.0 - * Handle image and line regions in output formats ALTO, hOCR and text. - * New parameter curl_timeout for curl_easy_setop. - * Build fixes and improvements. - * Catch nullptr in PageIterator::Orientation to improve robustness. - * Remove unused code. - -2022-01-06 - V5.0.1 - * Add SPDX-License-Identifier to public include files. - * Support redirections when running OCR on a URL. - * Lots of fixes and improvements for cmake builds. - Distributions should use the autoconf build. - * Fix broken msys2 build with gcc 11. - * Fix parameter certainty_scale (was duplicated). - * Fix some compiler warnings and clean code. - * Correctly detect amd64 and i386 on FreeBSD. - * Add libarchive and libcurl in continuous integration actions. - * Update submodule googletest to release v1.11.0. - -2021-11-22 - V5.0.0 - * Faster training and recognition by default (float instead of - double calculations) - * More options for binarization - * Improved support for ARM NEON - * Modernized code - * Removed proprietary data types like GenericVector and STRING - from public API - * pdf.ttf no longer needed, now integrated into the code - * Faster flat build with automake - * New options for combine_tessdata to show details of traineddata files - * Improved training messages - * Improved unit tests and fuzzing tests - * Lots of bug fixes - -2021-11-15 - V4.1.3 - * Fix build regression for autoconf build - -2021-11-14 - V4.1.2 - * Add RowAttributes getter to PageIterator - * Allow line images with larger width for training - * Fix memory leaks - * Improve build process - * Don't output empty ALTO sourceImageInformation (issue #2700) - * Extend URI support for Tesseract with libcurl - * Abort LSTM training with integer model (fixes issue #1573) - * Update documentation - * Make automake builds less noisy by default - * Don't use -march=native in automake builds - -2019-12-26 - V4.1.1 - * Implemented sw build (cppan is depreciated) - * Improved cmake build - * Code cleanup and optimization - * A lot of bug fixes... - -2019-07-07 - V4.1.0 - * Added new renders Alto, LSTMBox, WordStrBox. - * Added character boxes in hOCR output. - * Added python training scripts (experimental) as alternative shell scripts. - * Better support AVX / AVX2 / SSE. - * Disable OpenMP support by default (see e.g. #1171, #1081). - * Fix for bounding box problem. - * Implemented support for whitelist/blacklist in LSTM engine. - * Improved cmake configuration. - * Code modernization and improvements. - * A lot of bug fixes... - -2018-10-29 - V4.0.0 - * Added new neural network system based on LSTMs, with major accuracy gains. - * Improvements to PDF rendering. - * Fixes to trainingdata rendering. - * Added LSTM models+lang models to 101 languages. (tessdata repository) - * Improved multi-page TIFF handling. - * Fixed damage to binary images when processing PDFs. - * Fixes to training process to allow incremental training from a recognition model. - * Made LSTM the default engine, pushed cube out. - * Deleted cube code. - * Changed OEModes --oem 0 for legacy tesseract engine, --oem 1 for LSTM, --oem 2 for both, --oem 3 for default. - * Avoid use of Leptonica debug parameters or functions. - * Fixed multi-language mode. - * Removed support for VS2010. - * Added Support for VS2015 and VS2017 with CPPAN. - * Implemented invisible text only for PDF. - * Added AVX / SSE support for Windows. - * Enabled OpenMP support. - * Parameter unlv_tilde_crunching change to false. - * Miscellaneous Fixes. - * Detailed Changelog can be found at https://tesseract-ocr.github.io/tessdoc/4.0x-Changelog.html and https://tesseract-ocr.github.io/tessdoc/ReleaseNotes.html#tesseract-release-notes-oct-29-2018---v400 - -2017-02-16 - V3.05.00 - * Made some fine tuning to the hOCR output. - * Added TSV as another optional output format. - * Fixed ABI break introduced in 3.04.00 with the AnalyseLayout() method. - * text2image tool - Enable all OpenType ligatures available in a font. This feature requires Pango 1.38 or newer. - * Training tools - Replaced asserts with tprintf() and exit(1). - * Fixed Cygwin compatibility. - * Improved multipage tiff processing. - * Improved the embedded pdf font (pdf.ttf). - * Enable selection of OCR engine mode from command line. - * Changed tesseract command line parameter '-psm' to '--psm'. - * Write output of tesseract --help, --version and --list-langs to stdout instead of stderr. - * Added new C API for orientation and script detection, removed the old one. - * Increased minimum autoconf version to 2.59. - * Removed dead code. - * Require Leptonica 1.74 or higher. - * Fixed many compiler warning. - * Fixed memory and resource leaks. - * Fixed some issues with the 'Cube' OCR engine. - * Fixed some openCL issues. - * Added option to build Tesseract with CMake build system. - * Implemented CPPAN support for easy Windows building. - -2016-02-17 - V3.04.01 - * Added OSD renderer for psm 0. Works for single page and multi-page images. - * Improve tesstrain.sh script. - * Simplify build and run of ScrollView. - * Improved PDF output for OS X Preview utility. - * INCOMPATIBLE fix to hOCR line height information - commit 134ebc3. - * Added option to build Tesseract without Cube OCR engine (-DNO_CUBE_BUILD). - * Enable OpenMP support. - * Many bug fixes. - -2015-07-11 - V3.04.00 - * Tesseract development is now done with Git and hosted at github.com (Previously we used Subversion as a VCS and code.google.com for hosting). - * Tesseract now requires leptonica 1.71 or a higher version. - * Removed official support for VS 2008. - * Added support for 39 additional scripts/languages, including: amh, asm, aze_cyrl, bod, bos, ceb, cym, dzo, fas, gle, guj, hat, iku, jav, kat, kat_old, kaz, khm, kir, kur, lao, lat, mar, mya, nep, ori, pan, pus, san, sin, srp_latn, syr, tgk, tir, uig, urd, uzb, uzb_cyrl, yid - * Major updates to training system as a result of extensive testing on 100 languages. - * New training data for over 100 languages - * Improved performance with PIC compilation option. - * Significant change to invisible font system in pdf output to improve correctness and compatibility with external programs, particularly ghostscript. - * Improved font identification. - * Major change to improve layout analysis for heavily diacritic languages: Thai, Vietnamese, Kannada, Telugu etc. - * Fixed problems with shifted baselines so recognition can recover from layout analysis errors. - * Major refactor to improve speed on difficult images, especially when running a heap checker. - * Moved params from global in page layout to tesseractclass. - * Improved single column layout analysis. - * Allow ocr output to multiple formats using tesseract command line executable. - * Fixed issues with mixed eng+ara scripts. - * Improved script consistency in numbers. - * Major refactor of control.cpp to enable line recognition. - * Added tesstrain.sh - a master training script. - * Added ability to text2image training tool to just list available fonts. - * Added ability to text2image to underline words. - * Improved efficiency of image processing for PDF output. - * Added parameter description for each parameter listed with 'print-parameters' command line option. - * Added font info to hOCR output. - * Enabled streaming input and output of multi-page documents. - * Many bug fixes. - -2014-02-04 - V3.03(rc1) - * Added new training tool text2image to generate box/tif file pairs from - text and truetype fonts. - * Added support for PDF output with searchable text. - * Removed entire IMAGE class and all code in image directory. - * Tesseract executable: support for output to stdout; limited support for one - page images from stdin (especially on Windows) - * Added Renderer to API to allow document-level processing and output - of document formats, like hOCR, PDF. - * Major refactor of word-level recognition, beam search, eliminating dead code. - * Refactored classifier to make it easier to add new ones. - * Generalized feature extractor to allow feature extraction from greyscale. - * Improved sub/superscript treatment. - * Improved baseline fit. - * Added set_unicharset_properties to training tools. - * Many bug fixes. - * More training source data included. - -2012-02-01 - V3.02 - * Moved ResultIterator/PageIterator to ccmain. - * Added Right-to-left/Bidi capability in the output iterators for Hebrew/Arabic. - * Added paragraph detection in layout analysis/post OCR. - * Fixed inconsistent xheight during training and over-chopping. - * Added simultaneous multi-language capability. - * Refactored top-level word recognition module. - * Added experimental equation detector. - * Improved handling of resolution from input images. - * Blamer module added for error analysis. - * Cleaned up externally used namespace by removing includes from baseapi.h. - * Removed dead memory mangagement code. - * Tidied up constraints on control parameters. - * Added support for ShapeTable in classifier and training. - * Refactored class pruner. - * Fixed training leaks and randomness. - * Major improvements to layout analysis for better image detection, diacritic detection, better textline finding, better tabstop finding. - * Improved line detection and removal. - * Added fixed pitch chopper for CJK. - * Added UNICHARSET to WERD_CHOICE to make mult-language handling easier. - * Fixed problems with internally scaled images. - * Added page and bbox to string in tr files to identify source of training data better. - * Fixes to Hindi Shiroreka splitter. - * Added word bigram correction. - * Reduced stack memory consumption and eliminated some ugly typedefs. - * Added new uniform classifier API. - * Added new training error counter. - * Fixed endian bug in dawg reader. - * Many other fixes, including the way in which the chopper finds chops and messes with the outline while it does so. - -2010-11-29 - V3.01 - * Removed old/dead serialise/deserialize methods on *LISTIZED classes. - * Total rewrite of DENORM to better encapsulate operation and make - for potential to extract features from images. - * Thread-safety! Moved all critical global and static variables to members of the appropriate class. Tesseract is now thread-safe (multiple instances can be used in parallel in multiple threads.) with the minor exception that some control parameters are still global and affect all threads. - * Added Cube, a new recognizer for Arabic. Cube can also be used in combination with normal Tesseract for other languages with an improvement in accuracy at the cost of (much) lower speed. *There is no training module for Cube yet.* - * `OcrEngineMode` in `Init` replaces `AccuracyVSpeed` to control cube. - * Greatly improved segmentation search with consequent accuracy and speed improvements, especially for Chinese. - * Added `PageIterator` and `ResultIterator` as cleaner ways to get the full results out of Tesseract, that are not currently provided by any of the `TessBaseAPI::Get*` methods. All other methods, such as the `ETEXT_STRUCT` in particular are deprecated and will be deleted in the future. - * ApplyBoxes totally rewritten to make training easier. It can now cope with touching/overlapping training characters, and a new boxfile format allows word boxes instead of character boxes, BUT to use that you have to have already bootstrapped the language with character boxes. "Cyclic dependency" on traineddata. - * Auto orientation and script detection added to page layout analysis. - * Deleted *lots* of dead code. - * Fixxht module replaced with scalable data-driven module. - * Output font characteristics accuracy improved. - * Removed the double conversion at each classification. - * Upgraded oldest structs to be classes and deprecated PBLOB. - * Removed non-deterministic baseline fit. - * Added fixed length dawgs for Chinese. - * Handling of vertical text improved. - * Handling of leader dots improved. - * Table detection greatly improved. - * Fixed a couple of memory leaks. - * Fixed font labels on output text. (Not perfect, but a lot better than before.) - * Cleanup and more bug fixes - * Special treatments for Hindi. - * Support for build in VS2010 with Microsoft Windows SDK for Windows 7 (thanks to Michael Lutz) - -2010-09-21 - V3.00 - * Preparations for thread safety: - * Changed TessBaseAPI methods to be non-static - * Created a class hierarchy for the directories to hold instance data, - and began moving code into the classes. - * Moved thresholding code to a separate class. - * Added major new page layout analysis module. - * Added HOCR output (issues 221, 263: thanks to amkryukov). - * Added Leptonica as main image I/O and handling. Currently optional, - but in future releases linking with Leptonica will be mandatory. - * Ambiguity table rewritten to allow definite replacements in place - of fix_quotes. - * Added TessdataManager to combine data files into a single file. - * Some dead code deleted. - * VC++6 no longer supported. It can't cope with the use of templates. - * Many more languages added. - * Doxygenation of most of the function header comments. - * Added man pages. - * Added bash completion script (issue 247: thanks to neskiem) - * Fix integer overview in thresholding (issue 366: thanks to Cyanide.Drake) - * Add Danish Fraktur support (issues 300, 360: thanks to - dsl602230@vip.cybercity.dk) - * Fix file pointer leak (issue 359, thanks to yukihiro.nakadaira) - * Fix an error using user-words (Issue 345: thanks to max.markin) - * Fix a memory leak in tablefind.cpp (Issue 342, thanks to zdravco) - * Fix a segfault due to double fclose (Issue 320, thanks to souther) - * Fix an automake error (Issue 318, thanks to ichanjz) - * Fix a Win32 crash on fileFormatIsTiff() (Issues 304, 316, 317, 330, 347, - 349, 352: thanks to nguyenq87, max.markin, zdenop) - * Fixed a number of errors in newer (stricter) versions of VC++ (Issues - 301, among others) - -2009-06-30 - V2.04 - * Integrated bug fixes and patches and misc changes for portability. - * Integrated a patch to remove some of the "access" macros. - * Removed dependence on lua from the viewer, speeding it up - dramatically. - * Fixed the viewer so it compiles and runs properly! - * Specifically fixing issues: 1, 63, 67, 71, 76, 81, 82, 106, 111, - 112, 128, 129, 130, 133, 135, 142, 143, 145, 147, 153, 154, 160, - 165, 170, 175, 177, 187, 192, 195, 199, 201, 205, 209, 108, 169 - -2008-04-22 - V2.03 - * Fixed crash introduced in 2.02. - * Fixed lack of tessembedded.cpp in distribution. - * Added test for leptonica header files and conditional test for lib. - -2008-04-21 - V2.02 (again) - * Fixed namespace collisions with jpeg library (INT32). - * Portability fixes for Windows for new code. - * Updates to autoconf system for new code. - -2008-01-23 - V2.02 - * Improvements to clustering, training and classifier. - * Major internationalization improvements for large-character-set - * languages, eg Kannada. - * Removed some compiler warnings. - * Added multipage tiff support for training and running. - * Updated graphics output to talk to new java-based viewer. - * Added ability to save n-best lists. - * Added leptonica support for more file types. - * Improved Init/End to make them safe. - * Reduced memory use of dictionaries. - * Added some new APIs to TessBaseAPI. - -2007-08-27 - V2.01 - * Fixed UTF8 input problems with box file reader. - * Fixed various infinite loops and crashes in dawg code. - * Removed include of config_auto.h from host.h. - * Added automatic wctype encoding to unicharset_extractor. - * Fixed dawg table too full error. - * Removed svn files from tarball. - * Added new functions to tessdll. - * Increased maximum utf8 string in a classification result to 8. - -2007-07-02 - V2.00 - * Converted internal character handling to UTF8. - * Trained with 6 languages. - * Added unicharset_extractor, wordlist2dawg. - * Added boxfile creation mode. - * Added UNLV regression test capability. - * Fixed problems with copyright and registered symbols. - * Fixed extern "C" declarations problem. - -2007-05-15 - V1.04 - * Added dll exports for Windows. - * Fixed name collisions with stl etc. - * Made some preliminary changes ready for unicodeization. - * Several bug fixes discovered during unicodeization. - -2007-02-02 - V1.03 - * Added mftraining and cntraining. - * Added baseapi with adaptive thresholding for grey and color. - * Fixed many memory leaks. - * Fixed several bugs including lack of use of adaptive classifier. - * Added ifdefs to eliminate graphics code and add embedded platform support. - * Incorporated several patches, including 64-bit builds, Mac builds. - * Minor accuracy improvements. - -2006-10-04 - V1.02 - * Removed dependency on Aspirin. - * Fixed a few missing Apache license headers. - * Removed $log. - -2006-09-07 - V1.01. - * Added mfcpch.cpp and getopt.cpp for VC++. - * Fixed problem with greyscale images and no libtiff. - * Stopped debug window from being used for the usage output. - * Fixed load of inttemp for big-endian architectures. - * Fixed some Mac compilation issues. - -2006-06-16 - V1.0 of open source Tesseract checked-in. diff --git a/Makefile.am b/Makefile.am index 85ff31a081..9f2a367d9c 100644 --- a/Makefile.am +++ b/Makefile.am @@ -394,9 +394,6 @@ endif noinst_LTLIBRARIES += libtesseract_ccutil.la libtesseract_ccutil_la_SOURCES = src/ccutil/ccutil.cpp -libtesseract_ccutil_la_SOURCES += src/ccutil/clst.cpp -libtesseract_ccutil_la_SOURCES += src/ccutil/elst2.cpp -libtesseract_ccutil_la_SOURCES += src/ccutil/elst.cpp libtesseract_ccutil_la_SOURCES += src/ccutil/errcode.cpp libtesseract_ccutil_la_SOURCES += src/ccutil/serialis.cpp libtesseract_ccutil_la_SOURCES += src/ccutil/scanutils.cpp @@ -1429,7 +1426,7 @@ pagesegmode_test_LDADD = $(TRAINING_LIBS) $(LEPTONICA_LIBS) pango_font_info_test_SOURCES = unittest/pango_font_info_test.cc pango_font_info_test_CPPFLAGS = $(unittest_CPPFLAGS) pango_font_info_test_LDADD = $(TRAINING_LIBS) $(LEPTONICA_LIBS) -pango_font_info_test_LDADD += $(ICU_I18N_LIBS) +pango_font_info_test_LDADD += $(ICU_I18N_LIBS) $(ICU_UC_LIBS) pango_font_info_test_LDADD += $(pangocairo_LIBS) pango_font_info_test_LDADD += $(pangoft2_LIBS) diff --git a/VERSION b/VERSION index d50359de18..7acd1cb0e1 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -5.5.0 +5.5.1 diff --git a/configure.ac b/configure.ac index cb853e6665..bffc1d465c 100644 --- a/configure.ac +++ b/configure.ac @@ -28,8 +28,8 @@ AM_INIT_AUTOMAKE([foreign subdir-objects nostdinc]) # Define date of package, etc. Could be useful in auto-generated # documentation. -PACKAGE_YEAR=2024 -PACKAGE_DATE="11/10" +PACKAGE_YEAR=2025 +PACKAGE_DATE="05/25" abs_top_srcdir=`AS_DIRNAME([$0])` @@ -221,9 +221,9 @@ fi # additional checks for RVV targets if test x$check_for_rvv = x1; then AC_MSG_NOTICE([checking how to detect RVV availability]) - AC_CHECK_FUNCS([getauxval]) + AC_CHECK_FUNCS([getauxval elf_aux_info]) - if test $ac_cv_func_getauxval = no; then + if test $ac_cv_func_getauxval = no && test $ac_cv_func_elf_aux_info = no; then AC_MSG_WARN([RVV is available, but we don't know how to check for it. Will not be able to use RVV.]) fi fi diff --git a/src/api/altorenderer.cpp b/src/api/altorenderer.cpp index e373f73aa4..42faecb7f4 100644 --- a/src/api/altorenderer.cpp +++ b/src/api/altorenderer.cpp @@ -51,6 +51,20 @@ static void AddBoxToAlto(const ResultIterator *it, PageIteratorLevel level, } } +static std::string GetID(const char *prefix, int page_number, int counter) { + std::stringstream idstr; + // IDs will only have the counter for the first page to keep them consistent + // with the IDs assigned before this change was made. + // From the second page on, IDs will also contain the page number to make them unique. + if (page_number == 0) { + idstr << prefix << "_" << counter; + } else { + idstr << prefix << "_" << page_number << "_" << counter; + } + + return idstr.str(); +} + /// /// Append the ALTO XML for the beginning of the document /// @@ -168,7 +182,7 @@ char *TessBaseAPI::GetAltoText(ETEXT_DESC *monitor, int page_number) { case PT_PULLOUT_IMAGE: { // Handle all kinds of images. // TODO: optionally add TYPE, for example TYPE="photo". - alto_str << "\t\t\t\t\n"; res_it->Next(RIL_BLOCK); @@ -177,7 +191,7 @@ char *TessBaseAPI::GetAltoText(ETEXT_DESC *monitor, int page_number) { case PT_HORZ_LINE: case PT_VERT_LINE: // Handle horizontal and vertical lines. - alto_str << "\t\t\t\t\n"; res_it->Next(RIL_BLOCK); @@ -190,24 +204,24 @@ char *TessBaseAPI::GetAltoText(ETEXT_DESC *monitor, int page_number) { } if (res_it->IsAtBeginningOf(RIL_BLOCK)) { - alto_str << "\t\t\t\tIsAtBeginningOf(RIL_PARA)) { - alto_str << "\t\t\t\t\tIsAtBeginningOf(RIL_TEXTLINE)) { - alto_str << "\t\t\t\t\t\tpush_back(path.substr(0, extPos)); + auto path = entry.path().lexically_relative(datadir); + if (path.extension() == ".traineddata") { + langs->push_back(path.replace_extension("").string()); } } } diff --git a/src/arch/simddetect.cpp b/src/arch/simddetect.cpp index 9acd78a886..0031556163 100644 --- a/src/arch/simddetect.cpp +++ b/src/arch/simddetect.cpp @@ -61,12 +61,11 @@ # include # elif defined(HAVE_ELF_AUX_INFO) # include -# include # endif #endif #if defined(HAVE_RVV) -# if defined(HAVE_GETAUXVAL) +# if defined(HAVE_GETAUXVAL) || defined(HAVE_ELF_AUX_INFO) # include # define HWCAP_RV(letter) (1ul << ((letter) - 'A')) # endif @@ -244,6 +243,10 @@ SIMDDetect::SIMDDetect() { # if defined(HAVE_GETAUXVAL) const unsigned long hwcap = getauxval(AT_HWCAP); rvv_available_ = hwcap & HWCAP_RV('V'); +# elif defined(HAVE_ELF_AUX_INFO) + unsigned long hwcap = 0; + elf_aux_info(AT_HWCAP, &hwcap, sizeof hwcap); + rvv_available_ = hwcap & HWCAP_RV('V'); # endif #endif diff --git a/src/ccmain/fixspace.cpp b/src/ccmain/fixspace.cpp index 7f03ff03ea..dd9e3663fe 100644 --- a/src/ccmain/fixspace.cpp +++ b/src/ccmain/fixspace.cpp @@ -55,12 +55,9 @@ class ROW; **********************************************************************/ static int c_blob_comparator( // sort blobs - const void *blob1p, // ptr to ptr to blob1 - const void *blob2p // ptr to ptr to blob2 + const C_BLOB *blob1, + const C_BLOB *blob2 ) { - const C_BLOB *blob1 = *reinterpret_cast(blob1p); - const C_BLOB *blob2 = *reinterpret_cast(blob2p); - return blob1->bounding_box().left() - blob2->bounding_box().left(); } diff --git a/src/ccmain/paramsd.cpp b/src/ccmain/paramsd.cpp index 85e596d100..14f220f8e6 100644 --- a/src/ccmain/paramsd.cpp +++ b/src/ccmain/paramsd.cpp @@ -191,9 +191,7 @@ void ParamsEditor::GetPrefixes(const char *s, std::string *level_one, std::strin } // Compare two VC objects by their name. -int ParamContent::Compare(const void *v1, const void *v2) { - const ParamContent *one = *static_cast(v1); - const ParamContent *two = *static_cast(v2); +int ParamContent::Compare(const ParamContent *one, const ParamContent *two) { return strcmp(one->GetName(), two->GetName()); } diff --git a/src/ccmain/paramsd.h b/src/ccmain/paramsd.h index 5e2a57e489..f898d51bf7 100644 --- a/src/ccmain/paramsd.h +++ b/src/ccmain/paramsd.h @@ -44,10 +44,10 @@ enum ParamType { VT_INTEGER, VT_BOOLEAN, VT_STRING, VT_DOUBLE }; // comparisond or getting its value. It is used in the context of the // ParamsEditor as a bridge from the internal tesseract parameters to the // ones displayed by the ScrollView server. -class ParamContent : public ELIST_LINK { +class ParamContent : public ELIST::LINK { public: // Compare two VC objects by their name. - static int Compare(const void *v1, const void *v2); + static int Compare(const ParamContent *v1, const ParamContent *v2); // Gets a VC object identified by its ID. static ParamContent *GetParamContentById(int id); diff --git a/src/ccmain/thresholder.cpp b/src/ccmain/thresholder.cpp index 44cf29b461..20c73649a8 100644 --- a/src/ccmain/thresholder.cpp +++ b/src/ccmain/thresholder.cpp @@ -207,7 +207,10 @@ std::tuple ImageThresholder::Threshold( tprintf("\nimage width: %d height: %d ppi: %d\n", pix_w, pix_h, yres_); } - if (method == ThresholdMethod::Sauvola) { + if (method == ThresholdMethod::Sauvola && pix_w > 6 && pix_h > 6) { + // pixSauvolaBinarizeTiled requires half_window_size >= 2. + // Therefore window_size must be at least 4 which requires + // pix_w and pix_h to be at least 7. int window_size; double window_size_factor; api->GetDoubleVariable("thresholding_window_size", &window_size_factor); @@ -283,30 +286,24 @@ bool ImageThresholder::ThresholdToPix(Image *pix) { tprintf("Image too large: (%d, %d)\n", image_width_, image_height_); return false; } - Image original = GetPixRect(); + // Handle binary image if (pix_channels_ == 0) { // We have a binary image, but it still has to be copied, as this API // allows the caller to modify the output. + Image original = GetPixRect(); *pix = original.copy(); - } else { - if (pixGetColormap(original)) { - Image tmp; - Image without_cmap = - pixRemoveColormap(original, REMOVE_CMAP_BASED_ON_SRC); - int depth = pixGetDepth(without_cmap); - if (depth > 1 && depth < 8) { - tmp = pixConvertTo8(without_cmap, false); - } else { - tmp = without_cmap.copy(); - } - without_cmap.destroy(); - OtsuThresholdRectToPix(tmp, pix); - tmp.destroy(); - } else { - OtsuThresholdRectToPix(pix_, pix); - } + original.destroy(); + return true; + } + // Handle colormaps + Image src = pix_; + if (pixGetColormap(src)) { + src = pixRemoveColormap(src, REMOVE_CMAP_BASED_ON_SRC); + } + OtsuThresholdRectToPix(src, pix); + if (src != pix_) { + src.destroy(); } - original.destroy(); return true; } diff --git a/src/ccstruct/blobbox.h b/src/ccstruct/blobbox.h index 03e73ec9d7..f25d00fe13 100644 --- a/src/ccstruct/blobbox.h +++ b/src/ccstruct/blobbox.h @@ -138,7 +138,7 @@ class ColPartition; class BLOBNBOX; ELISTIZEH(BLOBNBOX) -class BLOBNBOX : public ELIST_LINK { +class BLOBNBOX : public ELIST::LINK { public: BLOBNBOX() { ReInit(); @@ -552,7 +552,7 @@ class BLOBNBOX : public ELIST_LINK { bool owns_cblob_ = false; }; -class TO_ROW : public ELIST2_LINK { +class TO_ROW : public ELIST2::LINK { public: static const int kErrorWeight = 3; @@ -695,7 +695,7 @@ class TO_ROW : public ELIST2_LINK { }; ELIST2IZEH(TO_ROW) -class TESS_API TO_BLOCK : public ELIST_LINK { +class TESS_API TO_BLOCK : public ELIST::LINK { public: TO_BLOCK() : pitch_decision(PITCH_DUNNO) { clear(); diff --git a/src/ccstruct/coutln.h b/src/ccstruct/coutln.h index 3aa25519de..fd08fd6d69 100644 --- a/src/ccstruct/coutln.h +++ b/src/ccstruct/coutln.h @@ -72,7 +72,7 @@ struct EdgeOffset { class C_OUTLINE; // forward declaration ELISTIZEH(C_OUTLINE) -class C_OUTLINE : public ELIST_LINK { +class C_OUTLINE : public ELIST::LINK { public: C_OUTLINE() { stepcount = 0; diff --git a/src/ccstruct/imagedata.cpp b/src/ccstruct/imagedata.cpp index 03c5049c42..e45a5dedb2 100644 --- a/src/ccstruct/imagedata.cpp +++ b/src/ccstruct/imagedata.cpp @@ -526,7 +526,8 @@ void DocumentData::Shuffle() { TRand random; // Different documents get shuffled differently, but the same for the same // name. - random.set_seed(document_name_.c_str()); + std::hash hasher; + random.set_seed(static_cast(hasher(document_name_))); int num_pages = pages_.size(); // Execute one random swap for each page in the document. for (int i = 0; i < num_pages; ++i) { diff --git a/src/ccstruct/ocrblock.cpp b/src/ccstruct/ocrblock.cpp index 1b222f6e07..7281e0c97e 100644 --- a/src/ccstruct/ocrblock.cpp +++ b/src/ccstruct/ocrblock.cpp @@ -68,9 +68,9 @@ BLOCK::BLOCK(const char *name, ///< filename * Sort Comparator: Return <0 if row1 top < row2 top */ -static int decreasing_top_order(const void *row1, const void *row2) { - return (*reinterpret_cast(row2))->bounding_box().top() - - (*reinterpret_cast(row1))->bounding_box().top(); +static int decreasing_top_order(const ROW *row1, const ROW *row2) { + return row2->bounding_box().top() - + row1->bounding_box().top(); } /** @@ -222,7 +222,7 @@ void BLOCK::print( // print list of sides BLOCK &BLOCK::operator=( // assignment const BLOCK &source // from this ) { - this->ELIST_LINK::operator=(source); + this->ELIST::LINK::operator=(source); pdblk = source.pdblk; proportional = source.proportional; kerning = source.kerning; diff --git a/src/ccstruct/ocrblock.h b/src/ccstruct/ocrblock.h index 88753b4b47..38a830d706 100644 --- a/src/ccstruct/ocrblock.h +++ b/src/ccstruct/ocrblock.h @@ -29,7 +29,7 @@ class BLOCK; // forward decl ELISTIZEH(BLOCK) -class TESS_API BLOCK : public ELIST_LINK +class TESS_API BLOCK : public ELIST::LINK // page block { friend class BLOCK_RECT_IT; // block iterator diff --git a/src/ccstruct/ocrpara.h b/src/ccstruct/ocrpara.h index e0bb6ab276..473ba6c674 100644 --- a/src/ccstruct/ocrpara.h +++ b/src/ccstruct/ocrpara.h @@ -27,7 +27,7 @@ namespace tesseract { class ParagraphModel; -struct PARA : public ELIST_LINK { +struct PARA : public ELIST::LINK { public: PARA() : model(nullptr) diff --git a/src/ccstruct/ocrrow.cpp b/src/ccstruct/ocrrow.cpp index 5e6eba4010..a2c284eb0c 100644 --- a/src/ccstruct/ocrrow.cpp +++ b/src/ccstruct/ocrrow.cpp @@ -223,7 +223,7 @@ void ROW::plot( // draw it **********************************************************************/ ROW &ROW::operator=(const ROW &source) { - this->ELIST_LINK::operator=(source); + this->ELIST::LINK::operator=(source); kerning = source.kerning; spacing = source.spacing; xheight = source.xheight; diff --git a/src/ccstruct/ocrrow.h b/src/ccstruct/ocrrow.h index 4955cd283e..637cb0c5e7 100644 --- a/src/ccstruct/ocrrow.h +++ b/src/ccstruct/ocrrow.h @@ -36,7 +36,7 @@ class TO_ROW; struct PARA; -class ROW : public ELIST_LINK { +class ROW : public ELIST::LINK { friend void tweak_row_baseline(ROW *, double, double); public: diff --git a/src/ccstruct/pageres.cpp b/src/ccstruct/pageres.cpp index 65ea748ff7..1bc5e1635f 100644 --- a/src/ccstruct/pageres.cpp +++ b/src/ccstruct/pageres.cpp @@ -184,7 +184,7 @@ ROW_RES::ROW_RES(bool merge_similar_words, ROW *the_row) { } WERD_RES &WERD_RES::operator=(const WERD_RES &source) { - this->ELIST_LINK::operator=(source); + this->ELIST::LINK::operator=(source); Clear(); if (source.combination) { word = new WERD; diff --git a/src/ccstruct/pageres.h b/src/ccstruct/pageres.h index 48e70b73d4..c7176f7a6c 100644 --- a/src/ccstruct/pageres.h +++ b/src/ccstruct/pageres.h @@ -115,7 +115,7 @@ class PAGE_RES { // page result * BLOCK_RES - Block results *************************************************************************/ -class BLOCK_RES : public ELIST_LINK { +class BLOCK_RES : public ELIST::LINK { public: BLOCK *block; // real block int32_t char_count; // chars in block @@ -139,7 +139,7 @@ class BLOCK_RES : public ELIST_LINK { * ROW_RES - Row results *************************************************************************/ -class ROW_RES : public ELIST_LINK { +class ROW_RES : public ELIST::LINK { public: ROW *row; // real row int32_t char_count; // chars in block @@ -161,7 +161,7 @@ enum CRUNCH_MODE { CR_NONE, CR_KEEP_SPACE, CR_LOOSE_SPACE, CR_DELETE }; // WERD_RES is a collection of publicly accessible members that gathers // information about a word result. -class TESS_API WERD_RES : public ELIST_LINK { +class TESS_API WERD_RES : public ELIST::LINK { public: // Which word is which? // There are 3 coordinate spaces in use here: a possibly rotated pixel space, @@ -345,7 +345,7 @@ class TESS_API WERD_RES : public ELIST_LINK { } // Deep copies everything except the ratings MATRIX. // To get that use deep_copy below. - WERD_RES(const WERD_RES &source) : ELIST_LINK(source) { + WERD_RES(const WERD_RES &source) : ELIST::LINK(source) { // combination is used in function Clear which is called from operator=. combination = false; *this = source; // see operator= diff --git a/src/ccstruct/points.h b/src/ccstruct/points.h index 59793592a1..f3adfbf623 100644 --- a/src/ccstruct/points.h +++ b/src/ccstruct/points.h @@ -160,7 +160,7 @@ class ICOORD { TDimension ycoord; ///< y value }; -class ICOORDELT : public ELIST_LINK, +class ICOORDELT : public ELIST::LINK, public ICOORD // embedded coord list { diff --git a/src/ccstruct/polyblk.cpp b/src/ccstruct/polyblk.cpp index 8c21fd8dc0..993e10b683 100644 --- a/src/ccstruct/polyblk.cpp +++ b/src/ccstruct/polyblk.cpp @@ -34,8 +34,6 @@ namespace tesseract { #define INTERSECTING INT16_MAX -int lessthan(const void *first, const void *second); - POLY_BLOCK::POLY_BLOCK(ICOORDELT_LIST *points, PolyBlockType t) { ICOORDELT_IT v = &vertices; @@ -357,7 +355,15 @@ ICOORDELT_LIST *PB_LINE_IT::get_line(TDimension y) { } if (!r.empty()) { - r.sort(lessthan); + r.sort([](const ICOORDELT *p1, const ICOORDELT *p2) { + if (p1->x() < p2->x()) { + return (-1); + } else if (p1->x() > p2->x()) { + return (1); + } else { + return (0); + } + }); for (r.mark_cycle_pt(); !r.cycled_list(); r.forward()) { x = r.data(); } @@ -371,19 +377,6 @@ ICOORDELT_LIST *PB_LINE_IT::get_line(TDimension y) { return result; } -int lessthan(const void *first, const void *second) { - const ICOORDELT *p1 = *reinterpret_cast(first); - const ICOORDELT *p2 = *reinterpret_cast(second); - - if (p1->x() < p2->x()) { - return (-1); - } else if (p1->x() > p2->x()) { - return (1); - } else { - return (0); - } -} - #ifndef GRAPHICS_DISABLED /// Returns a color to draw the given type. ScrollView::Color POLY_BLOCK::ColorForPolyBlockType(PolyBlockType type) { diff --git a/src/ccstruct/ratngs.cpp b/src/ccstruct/ratngs.cpp index 0b3cda09af..66d032bd56 100644 --- a/src/ccstruct/ratngs.cpp +++ b/src/ccstruct/ratngs.cpp @@ -110,7 +110,7 @@ BLOB_CHOICE::BLOB_CHOICE(UNICHAR_ID src_unichar_id, // character id * * Constructor to build a BLOB_CHOICE from another BLOB_CHOICE. */ -BLOB_CHOICE::BLOB_CHOICE(const BLOB_CHOICE &other) : ELIST_LINK(other) { +BLOB_CHOICE::BLOB_CHOICE(const BLOB_CHOICE &other) : ELIST::LINK(other) { unichar_id_ = other.unichar_id(); rating_ = other.rating(); certainty_ = other.certainty(); @@ -129,7 +129,7 @@ BLOB_CHOICE::BLOB_CHOICE(const BLOB_CHOICE &other) : ELIST_LINK(other) { // Copy assignment operator. BLOB_CHOICE &BLOB_CHOICE::operator=(const BLOB_CHOICE &other) { - ELIST_LINK::operator=(other); + ELIST::LINK::operator=(other); unichar_id_ = other.unichar_id(); rating_ = other.rating(); certainty_ = other.certainty(); @@ -370,7 +370,7 @@ void WERD_CHOICE::punct_stripped(unsigned *start, unsigned *end) const { while (*start < length() && unicharset()->get_ispunctuation(unichar_id(*start))) { (*start)++; } - while (*end > 0 && unicharset()->get_ispunctuation(unichar_id(*end - 1))) { + while (*end > *start && unicharset()->get_ispunctuation(unichar_id(*end - 1))) { (*end)--; } } diff --git a/src/ccstruct/ratngs.h b/src/ccstruct/ratngs.h index 2107e90342..e9371c9c18 100644 --- a/src/ccstruct/ratngs.h +++ b/src/ccstruct/ratngs.h @@ -53,7 +53,7 @@ enum BlobChoiceClassifier { BCC_FAKE, // From some other process. }; -class BLOB_CHOICE : public ELIST_LINK { +class BLOB_CHOICE : public ELIST::LINK { public: BLOB_CHOICE() { unichar_id_ = UNICHAR_SPACE; @@ -255,7 +255,7 @@ enum ScriptPos { SP_NORMAL, SP_SUBSCRIPT, SP_SUPERSCRIPT, SP_DROPCAP }; const char *ScriptPosToString(ScriptPos script_pos); -class TESS_API WERD_CHOICE : public ELIST_LINK { +class TESS_API WERD_CHOICE : public ELIST::LINK { public: static const float kBadRating; static const char *permuter_name(uint8_t permuter); @@ -272,7 +272,7 @@ class TESS_API WERD_CHOICE : public ELIST_LINK { this->init(src_string, src_lengths, src_rating, src_certainty, src_permuter); } WERD_CHOICE(const char *src_string, const UNICHARSET &unicharset); - WERD_CHOICE(const WERD_CHOICE &word) : ELIST_LINK(word), unicharset_(word.unicharset_) { + WERD_CHOICE(const WERD_CHOICE &word) : ELIST::LINK(word), unicharset_(word.unicharset_) { this->init(word.length()); this->operator=(word); } diff --git a/src/ccstruct/stepblob.h b/src/ccstruct/stepblob.h index 31d267104c..b7d90fec0e 100644 --- a/src/ccstruct/stepblob.h +++ b/src/ccstruct/stepblob.h @@ -36,8 +36,7 @@ class C_BLOB; class DENORM; ELISTIZEH(C_BLOB) - -class TESS_API C_BLOB : public ELIST_LINK { +class TESS_API C_BLOB : public ELIST::LINK { public: C_BLOB() = default; explicit C_BLOB(C_OUTLINE_LIST *outline_list); @@ -121,9 +120,7 @@ class TESS_API C_BLOB : public ELIST_LINK { return blob; } - static int SortByXMiddle(const void *v1, const void *v2) { - const C_BLOB *blob1 = *static_cast(v1); - const C_BLOB *blob2 = *static_cast(v2); + static int SortByXMiddle(const C_BLOB *blob1, const C_BLOB *blob2) { return blob1->bounding_box().x_middle() - blob2->bounding_box().x_middle(); } diff --git a/src/ccstruct/werd.cpp b/src/ccstruct/werd.cpp index 22f9cda710..36aa0b605f 100644 --- a/src/ccstruct/werd.cpp +++ b/src/ccstruct/werd.cpp @@ -355,7 +355,7 @@ WERD *WERD::shallow_copy() { */ WERD &WERD::operator=(const WERD &source) { - this->ELIST2_LINK::operator=(source); + this->ELIST2::LINK::operator=(source); blanks = source.blanks; flags = source.flags; script_id_ = source.script_id_; @@ -374,9 +374,7 @@ WERD &WERD::operator=(const WERD &source) { * order of left edge. */ -int word_comparator(const void *word1p, const void *word2p) { - const WERD *word1 = *reinterpret_cast(word1p); - const WERD *word2 = *reinterpret_cast(word2p); +int word_comparator(const WERD *word1, const WERD *word2) { return word1->bounding_box().left() - word2->bounding_box().left(); } diff --git a/src/ccstruct/werd.h b/src/ccstruct/werd.h index db1b0ee4e4..2c19ef1eb0 100644 --- a/src/ccstruct/werd.h +++ b/src/ccstruct/werd.h @@ -55,7 +55,7 @@ enum DISPLAY_FLAGS { class ROW; // forward decl -class TESS_API WERD : public ELIST2_LINK { +class TESS_API WERD : public ELIST2::LINK { public: WERD() = default; // WERD constructed with: @@ -205,7 +205,7 @@ ELIST2IZEH(WERD) namespace tesseract { // compare words by increasing order of left edge, suitable for qsort(3) -int word_comparator(const void *word1p, const void *word2p); +int word_comparator(const WERD *word1, const WERD *word2); } // namespace tesseract diff --git a/src/ccutil/ambigs.h b/src/ccutil/ambigs.h index fee1c067b7..effedbf7f6 100644 --- a/src/ccutil/ambigs.h +++ b/src/ccutil/ambigs.h @@ -109,7 +109,7 @@ class UnicharIdArrayUtils { // AMBIG_SPEC_LIST stores a list of dangerous ambigs that // start with the same unichar (e.g. r->t rn->m rr1->m). -class AmbigSpec : public ELIST_LINK { +class AmbigSpec : public ELIST::LINK { public: AmbigSpec(); ~AmbigSpec() = default; @@ -117,9 +117,7 @@ class AmbigSpec : public ELIST_LINK { // Comparator function for sorting AmbigSpec_LISTs. The lists will // be sorted by their wrong_ngram arrays. Example of wrong_ngram vectors // in a sorted AmbigSpec_LIST: [9 1 3], [9 3 4], [9 8], [9, 8 1]. - static int compare_ambig_specs(const void *spec1, const void *spec2) { - const AmbigSpec *s1 = *static_cast(spec1); - const AmbigSpec *s2 = *static_cast(spec2); + static int compare_ambig_specs(const AmbigSpec *s1, const AmbigSpec *s2) { int result = UnicharIdArrayUtils::compare(s1->wrong_ngram, s2->wrong_ngram); if (result != 0) { return result; diff --git a/src/ccutil/clst.cpp b/src/ccutil/clst.cpp deleted file mode 100644 index c80eed8336..0000000000 --- a/src/ccutil/clst.cpp +++ /dev/null @@ -1,444 +0,0 @@ -/********************************************************************** - * File: clst.cpp (Formerly clist.c) - * Description: CONS cell list handling code which is not in the include file. - * Author: Phil Cheatle - * - * (C) Copyright 1991, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#include "clst.h" -#include - -namespace tesseract { - -/*********************************************************************** - * CLIST::internal_deep_clear - * - * Used by the "deep_clear" member function of derived list - * classes to destroy all the elements on the list. - * The calling function passes a "zapper" function which can be called to - * delete each data element of the list, regardless of its class. This - * technique permits a generic clear function to destroy elements of - * different derived types correctly, without requiring virtual functions and - * the consequential memory overhead. - **********************************************************************/ - -void CLIST::internal_deep_clear( // destroy all links - void (*zapper)(void *)) { // ptr to zapper functn - if (!empty()) { - auto ptr = last->next; // set to first - last->next = nullptr; // break circle - last = nullptr; // set list empty - while (ptr) { - auto next = ptr->next; - zapper(ptr->data); - delete (ptr); - ptr = next; - } - } -} - -/*********************************************************************** - * CLIST::shallow_clear - * - * Used by the destructor and the "shallow_clear" member function of derived - * list classes to destroy the list. - * The data elements are NOT destroyed. - * - **********************************************************************/ - -void CLIST::shallow_clear() { // destroy all links - if (!empty()) { - auto ptr = last->next; // set to first - last->next = nullptr; // break circle - last = nullptr; // set list empty - while (ptr) { - auto next = ptr->next; - delete (ptr); - ptr = next; - } - } -} - -/*********************************************************************** - * CLIST::assign_to_sublist - * - * The list is set to a sublist of another list. "This" list must be empty - * before this function is invoked. The two iterators passed must refer to - * the same list, different from "this" one. The sublist removed is the - * inclusive list from start_it's current position to end_it's current - * position. If this range passes over the end of the source list then the - * source list has its end set to the previous element of start_it. The - * extracted sublist is unaffected by the end point of the source list, its - * end point is always the end_it position. - **********************************************************************/ - -void CLIST::assign_to_sublist( // to this list - CLIST_ITERATOR *start_it, // from list start - CLIST_ITERATOR *end_it) { // from list end - constexpr ERRCODE LIST_NOT_EMPTY("Destination list must be empty before extracting a sublist"); - - if (!empty()) { - LIST_NOT_EMPTY.error("CLIST.assign_to_sublist", ABORT); - } - - last = start_it->extract_sublist(end_it); -} - -/*********************************************************************** - * CLIST::sort - * - * Sort elements on list - **********************************************************************/ - -void CLIST::sort( // sort elements - int comparator( // comparison routine - const void *, const void *)) { - // Allocate an array of pointers, one per list element. - auto count = length(); - if (count > 0) { - // ptr array to sort - std::vector base; - base.reserve(count); - - CLIST_ITERATOR it(this); - - // Extract all elements, putting the pointers in the array. - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - base.push_back(it.extract()); - } - - // Sort the pointer array. - qsort(&base[0], count, sizeof(base[0]), comparator); - - // Rebuild the list from the sorted pointers. - for (auto current : base) { - it.add_to_end(current); - } - } -} - -// Assuming list has been sorted already, insert new_data to -// keep the list sorted according to the same comparison function. -// Comparison function is the same as used by sort, i.e. uses double -// indirection. Time is O(1) to add to beginning or end. -// Time is linear to add pre-sorted items to an empty list. -// If unique, then don't add duplicate entries. -// Returns true if the element was added to the list. -bool CLIST::add_sorted(int comparator(const void *, const void *), bool unique, void *new_data) { - // Check for adding at the end. - if (last == nullptr || comparator(&last->data, &new_data) < 0) { - auto *new_element = new CLIST_LINK; - new_element->data = new_data; - if (last == nullptr) { - new_element->next = new_element; - } else { - new_element->next = last->next; - last->next = new_element; - } - last = new_element; - return true; - } else if (!unique || last->data != new_data) { - // Need to use an iterator. - CLIST_ITERATOR it(this); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - void *data = it.data(); - if (data == new_data && unique) { - return false; - } - if (comparator(&data, &new_data) > 0) { - break; - } - } - if (it.cycled_list()) { - it.add_to_end(new_data); - } else { - it.add_before_then_move(new_data); - } - return true; - } - return false; -} - -// Assuming that the minuend and subtrahend are already sorted with -// the same comparison function, shallow clears this and then copies -// the set difference minuend - subtrahend to this, being the elements -// of minuend that do not compare equal to anything in subtrahend. -// If unique is true, any duplicates in minuend are also eliminated. -void CLIST::set_subtract(int comparator(const void *, const void *), bool unique, CLIST *minuend, - CLIST *subtrahend) { - shallow_clear(); - CLIST_ITERATOR m_it(minuend); - CLIST_ITERATOR s_it(subtrahend); - // Since both lists are sorted, finding the subtras that are not - // minus is a case of a parallel iteration. - for (m_it.mark_cycle_pt(); !m_it.cycled_list(); m_it.forward()) { - void *minu = m_it.data(); - void *subtra = nullptr; - if (!s_it.empty()) { - subtra = s_it.data(); - while (!s_it.at_last() && comparator(&subtra, &minu) < 0) { - s_it.forward(); - subtra = s_it.data(); - } - } - if (subtra == nullptr || comparator(&subtra, &minu) != 0) { - add_sorted(comparator, unique, minu); - } - } -} - -/*********************************************************************** - * MEMBER FUNCTIONS OF CLASS: CLIST_ITERATOR - * ========================================= - **********************************************************************/ - -/*********************************************************************** - * CLIST_ITERATOR::forward - * - * Move the iterator to the next element of the list. - * REMEMBER: ALL LISTS ARE CIRCULAR. - **********************************************************************/ - -void *CLIST_ITERATOR::forward() { - if (list->empty()) { - return nullptr; - } - - if (current) { // not removed so - // set previous - prev = current; - started_cycling = true; - // In case next is deleted by another iterator, get next from current. - current = current->next; - } else { - if (ex_current_was_cycle_pt) { - cycle_pt = next; - } - current = next; - } - - next = current->next; - return current->data; -} - -/*********************************************************************** - * CLIST_ITERATOR::data_relative - * - * Return the data pointer to the element "offset" elements from current. - * "offset" must not be less than -1. - * (This function can't be INLINEd because it contains a loop) - **********************************************************************/ - -void *CLIST_ITERATOR::data_relative( // get data + or - ... - int8_t offset) { // offset from current - CLIST_LINK *ptr; - -#ifndef NDEBUG - if (!list) - NO_LIST.error("CLIST_ITERATOR::data_relative", ABORT); - if (list->empty()) - EMPTY_LIST.error("CLIST_ITERATOR::data_relative", ABORT); - if (offset < -1) - BAD_PARAMETER.error("CLIST_ITERATOR::data_relative", ABORT, "offset < -l"); -#endif - - if (offset == -1) { - ptr = prev; - } else { - for (ptr = current ? current : prev; offset-- > 0; ptr = ptr->next) { - ; - } - } - - return ptr->data; -} - -/*********************************************************************** - * CLIST_ITERATOR::move_to_last() - * - * Move current so that it is set to the end of the list. - * Return data just in case anyone wants it. - * (This function can't be INLINEd because it contains a loop) - **********************************************************************/ - -void *CLIST_ITERATOR::move_to_last() { - while (current != list->last) { - forward(); - } - - if (current == nullptr) { - return nullptr; - } else { - return current->data; - } -} - -/*********************************************************************** - * CLIST_ITERATOR::exchange() - * - * Given another iterator, whose current element is a different element on - * the same list list OR an element of another list, exchange the two current - * elements. On return, each iterator points to the element which was the - * other iterators current on entry. - * (This function hasn't been in-lined because its a bit big!) - **********************************************************************/ - -void CLIST_ITERATOR::exchange( // positions of 2 links - CLIST_ITERATOR *other_it) { // other iterator - constexpr ERRCODE DONT_EXCHANGE_DELETED("Can't exchange deleted elements of lists"); - - /* Do nothing if either list is empty or if both iterators reference the same -link */ - - if ((list->empty()) || (other_it->list->empty()) || (current == other_it->current)) { - return; - } - - /* Error if either current element is deleted */ - - if (!current || !other_it->current) { - DONT_EXCHANGE_DELETED.error("CLIST_ITERATOR.exchange", ABORT); - } - - /* Now handle the 4 cases: doubleton list; non-doubleton adjacent elements -(other before this); non-doubleton adjacent elements (this before other); -non-adjacent elements. */ - - // adjacent links - if ((next == other_it->current) || (other_it->next == current)) { - // doubleton list - if ((next == other_it->current) && (other_it->next == current)) { - prev = next = current; - other_it->prev = other_it->next = other_it->current; - } else { // non-doubleton with - // adjacent links - // other before this - if (other_it->next == current) { - other_it->prev->next = current; - other_it->current->next = next; - current->next = other_it->current; - other_it->next = other_it->current; - prev = current; - } else { // this before other - prev->next = other_it->current; - current->next = other_it->next; - other_it->current->next = current; - next = current; - other_it->prev = other_it->current; - } - } - } else { // no overlap - prev->next = other_it->current; - current->next = other_it->next; - other_it->prev->next = current; - other_it->current->next = next; - } - - /* update end of list pointer when necessary (remember that the 2 iterators - may iterate over different lists!) */ - - if (list->last == current) { - list->last = other_it->current; - } - if (other_it->list->last == other_it->current) { - other_it->list->last = current; - } - - if (current == cycle_pt) { - cycle_pt = other_it->cycle_pt; - } - if (other_it->current == other_it->cycle_pt) { - other_it->cycle_pt = cycle_pt; - } - - /* The actual exchange - in all cases*/ - - auto old_current = current; - current = other_it->current; - other_it->current = old_current; -} - -/*********************************************************************** - * CLIST_ITERATOR::extract_sublist() - * - * This is a private member, used only by CLIST::assign_to_sublist. - * Given another iterator for the same list, extract the links from THIS to - * OTHER inclusive, link them into a new circular list, and return a - * pointer to the last element. - * (Can't inline this function because it contains a loop) - **********************************************************************/ - -CLIST_LINK *CLIST_ITERATOR::extract_sublist( // from this current - CLIST_ITERATOR *other_it) { // to other current - CLIST_ITERATOR temp_it = *this; - - constexpr ERRCODE BAD_SUBLIST("Can't find sublist end point in original list"); -#ifndef NDEBUG - constexpr ERRCODE BAD_EXTRACTION_PTS("Can't extract sublist from points on different lists"); - constexpr ERRCODE DONT_EXTRACT_DELETED("Can't extract a sublist marked by deleted points"); - - if (list != other_it->list) - BAD_EXTRACTION_PTS.error("CLIST_ITERATOR.extract_sublist", ABORT); - if (list->empty()) - EMPTY_LIST.error("CLIST_ITERATOR::extract_sublist", ABORT); - - if (!current || !other_it->current) - DONT_EXTRACT_DELETED.error("CLIST_ITERATOR.extract_sublist", ABORT); -#endif - - ex_current_was_last = other_it->ex_current_was_last = false; - ex_current_was_cycle_pt = false; - other_it->ex_current_was_cycle_pt = false; - - temp_it.mark_cycle_pt(); - do { // walk sublist - if (temp_it.cycled_list()) { // can't find end pt - BAD_SUBLIST.error("CLIST_ITERATOR.extract_sublist", ABORT); - } - - if (temp_it.at_last()) { - list->last = prev; - ex_current_was_last = other_it->ex_current_was_last = true; - } - - if (temp_it.current == cycle_pt) { - ex_current_was_cycle_pt = true; - } - - if (temp_it.current == other_it->cycle_pt) { - other_it->ex_current_was_cycle_pt = true; - } - - temp_it.forward(); - } while (temp_it.prev != other_it->current); - - // circularise sublist - other_it->current->next = current; - auto end_of_new_list = other_it->current; - - // sublist = whole list - if (prev == other_it->current) { - list->last = nullptr; - prev = current = next = nullptr; - other_it->prev = other_it->current = other_it->next = nullptr; - } else { - prev->next = other_it->next; - current = other_it->current = nullptr; - next = other_it->next; - other_it->prev = prev; - } - return end_of_new_list; -} - -} // namespace tesseract diff --git a/src/ccutil/clst.h b/src/ccutil/clst.h index a731cc55ed..140d555bef 100644 --- a/src/ccutil/clst.h +++ b/src/ccutil/clst.h @@ -19,696 +19,980 @@ #ifndef CLST_H #define CLST_H -#include "list.h" #include "lsterr.h" #include "serialis.h" +#include #include namespace tesseract { -class CLIST_ITERATOR; - -/********************************************************************** - * CLASS - CLIST_LINK - * - * Generic link class for singly linked CONS cell lists - * - * Note: No destructor - elements are assumed to be destroyed EITHER after - * they have been extracted from a list OR by the CLIST destructor which - * walks the list. - **********************************************************************/ - -class CLIST_LINK { - friend class CLIST_ITERATOR; - friend class CLIST; - - CLIST_LINK *next; - void *data; - -public: - CLIST_LINK() { // constructor - data = next = nullptr; - } - - CLIST_LINK(const CLIST_LINK &) = delete; - void operator=(const CLIST_LINK &) = delete; -}; - /********************************************************************** * CLASS - CLIST * * Generic list class for singly linked CONS cell lists **********************************************************************/ -class TESS_API CLIST { - friend class CLIST_ITERATOR; - - CLIST_LINK *last = nullptr; // End of list +template +class ConsList { + friend class Link; - //(Points to head) - CLIST_LINK *First() { // return first - return last != nullptr ? last->next : nullptr; - } +public: + /********************************************************************** + * CLASS - Link + * + * Generic link class for singly linked CONS cell lists + * + * Note: No destructor - elements are assumed to be destroyed EITHER after + * they have been extracted from a list OR by the ConsList destructor which + * walks the list. + **********************************************************************/ + struct Link { + Link *next{}; + T *data{}; + + Link() = default; + Link(const Link &) = delete; + void operator=(const Link &) = delete; + }; - const CLIST_LINK *First() const { // return first - return last != nullptr ? last->next : nullptr; - } + /*********************************************************************** + * CLASS - Iterator + * + * Generic iterator class for singly linked lists with embedded + *links + **********************************************************************/ + class Iterator { + ConsList *list; // List being iterated + Link *prev; // prev element + Link *current; // current element + Link *next; // next element + Link *cycle_pt; // point we are cycling the list to. + bool ex_current_was_last; // current extracted was end of list + bool ex_current_was_cycle_pt; // current extracted was cycle point + bool started_cycling; // Have we moved off the start? + + /*********************************************************************** + * Iterator::extract_sublist() + * + * This is a private member, used only by ConsList::assign_to_sublist. + * Given another iterator for the same list, extract the links from THIS to + * OTHER inclusive, link them into a new circular list, and return a + * pointer to the last element. + * (Can't inline this function because it contains a loop) + **********************************************************************/ + Link *extract_sublist( // from this current + Iterator *other_it) { // to other current + Iterator temp_it = *this; + + constexpr ERRCODE BAD_SUBLIST("Can't find sublist end point in original list"); +#ifndef NDEBUG + constexpr ERRCODE BAD_EXTRACTION_PTS("Can't extract sublist from points on different lists"); + constexpr ERRCODE DONT_EXTRACT_DELETED("Can't extract a sublist marked by deleted points"); -public: - ~CLIST() { // destructor - shallow_clear(); - } + if (list != other_it->list) + BAD_EXTRACTION_PTS.error("Iterator.extract_sublist", ABORT); + if (list->empty()) + EMPTY_LIST.error("Iterator::extract_sublist", ABORT); - void internal_deep_clear( // destroy all links - void (*zapper)(void *)); // ptr to zapper functn + if (!current || !other_it->current) + DONT_EXTRACT_DELETED.error("Iterator.extract_sublist", ABORT); +#endif - void shallow_clear(); // clear list but don't - // delete data elements + ex_current_was_last = other_it->ex_current_was_last = false; + ex_current_was_cycle_pt = false; + other_it->ex_current_was_cycle_pt = false; - bool empty() const { // is list empty? - return !last; - } + temp_it.mark_cycle_pt(); + do { // walk sublist + if (temp_it.cycled_list()) { // can't find end pt + BAD_SUBLIST.error("Iterator.extract_sublist", ABORT); + } - bool singleton() const { - return last != nullptr ? (last == last->next) : false; - } + if (temp_it.at_last()) { + list->last = prev; + ex_current_was_last = other_it->ex_current_was_last = true; + } - void shallow_copy( // dangerous!! - CLIST *from_list) { // beware destructors!! - last = from_list->last; - } + if (temp_it.current == cycle_pt) { + ex_current_was_cycle_pt = true; + } - void assign_to_sublist( // to this list - CLIST_ITERATOR *start_it, // from list start - CLIST_ITERATOR *end_it); // from list end + if (temp_it.current == other_it->cycle_pt) { + other_it->ex_current_was_cycle_pt = true; + } - int32_t length() const { //# elements in list - int32_t count = 0; - if (last != nullptr) { - count = 1; - for (auto it = last->next; it != last; it = it->next) { - count++; + temp_it.forward(); + } while (temp_it.prev != other_it->current); + + // circularise sublist + other_it->current->next = current; + auto end_of_new_list = other_it->current; + + // sublist = whole list + if (prev == other_it->current) { + list->last = nullptr; + prev = current = next = nullptr; + other_it->prev = other_it->current = other_it->next = nullptr; + } else { + prev->next = other_it->next; + current = other_it->current = nullptr; + next = other_it->next; + other_it->prev = prev; } + return end_of_new_list; } - return count; - } - void sort( // sort elements - int comparator( // comparison routine - const void *, const void *)); - - // Assuming list has been sorted already, insert new_data to - // keep the list sorted according to the same comparison function. - // Comparison function is the same as used by sort, i.e. uses double - // indirection. Time is O(1) to add to beginning or end. - // Time is linear to add pre-sorted items to an empty list. - // If unique, then don't add duplicate entries. - // Returns true if the element was added to the list. - bool add_sorted(int comparator(const void *, const void *), bool unique, void *new_data); - - // Assuming that the minuend and subtrahend are already sorted with - // the same comparison function, shallow clears this and then copies - // the set difference minuend - subtrahend to this, being the elements - // of minuend that do not compare equal to anything in subtrahend. - // If unique is true, any duplicates in minuend are also eliminated. - void set_subtract(int comparator(const void *, const void *), bool unique, CLIST *minuend, - CLIST *subtrahend); -}; - -/*********************************************************************** - * CLASS - CLIST_ITERATOR - * - * Generic iterator class for singly linked lists with embedded - *links - **********************************************************************/ - -class TESS_API CLIST_ITERATOR { - friend void CLIST::assign_to_sublist(CLIST_ITERATOR *, CLIST_ITERATOR *); - - CLIST *list; // List being iterated - CLIST_LINK *prev; // prev element - CLIST_LINK *current; // current element - CLIST_LINK *next; // next element - CLIST_LINK *cycle_pt; // point we are cycling the list to. - bool ex_current_was_last; // current extracted was end of list - bool ex_current_was_cycle_pt; // current extracted was cycle point - bool started_cycling; // Have we moved off the start? - - CLIST_LINK *extract_sublist( // from this current... - CLIST_ITERATOR *other_it); // to other current - -public: - CLIST_ITERATOR() { // constructor - list = nullptr; - } // unassigned list - - CLIST_ITERATOR( // constructor - CLIST *list_to_iterate); - - void set_to_list( // change list - CLIST *list_to_iterate); - - void add_after_then_move( // add after current & - void *new_data); // move to new - - void add_after_stay_put( // add after current & - void *new_data); // stay at current - - void add_before_then_move( // add before current & - void *new_data); // move to new - - void add_before_stay_put( // add before current & - void *new_data); // stay at current - - void add_list_after( // add a list & - CLIST *list_to_add); // stay at current + public: + Iterator() { // constructor + list = nullptr; + } // unassigned list + + /*********************************************************************** + * Iterator::Iterator + * + * CONSTRUCTOR - set iterator to specified list; + **********************************************************************/ + Iterator( // constructor + ConsList *list_to_iterate) { + set_to_list(list_to_iterate); + } - void add_list_before( // add a list & - CLIST *list_to_add); // move to it 1st item + /*********************************************************************** + * Iterator::set_to_list + * + * (Re-)initialise the iterator to point to the start of the list_to_iterate + * over. + **********************************************************************/ + void set_to_list( // change list + ConsList *list_to_iterate) { + list = list_to_iterate; + prev = list->last; + current = list->First(); + next = current != nullptr ? current->next : nullptr; + cycle_pt = nullptr; // await explicit set + started_cycling = false; + ex_current_was_last = false; + ex_current_was_cycle_pt = false; + } - void *data() { // get current data + /*********************************************************************** + * Iterator::add_after_then_move + * + * Add a new element to the list after the current element and move the + * iterator to the new element. + **********************************************************************/ + void add_after_then_move( // add after current & + T *new_data) { #ifndef NDEBUG - if (!list) { - NO_LIST.error("CLIST_ITERATOR::data", ABORT); - } + if (!new_data) { + BAD_PARAMETER.error("Iterator::add_after_then_move", ABORT, "new_data is nullptr"); + } #endif - return current->data; - } - - void *data_relative( // get data + or - ... - int8_t offset); // offset from current - - void *forward(); // move to next element - - void *extract(); // remove from list - void *move_to_first(); // go to start of list + auto new_element = new Link; + new_element->data = new_data; - void *move_to_last(); // go to end of list + if (list->empty()) { + new_element->next = new_element; + list->last = new_element; + prev = next = new_element; + } else { + new_element->next = next; + + if (current) { // not extracted + current->next = new_element; + prev = current; + if (current == list->last) { + list->last = new_element; + } + } else { // current extracted + prev->next = new_element; + if (ex_current_was_last) { + list->last = new_element; + } + if (ex_current_was_cycle_pt) { + cycle_pt = new_element; + } + } + } + current = new_element; + } // move to new + + /*********************************************************************** + * Iterator::add_after_stay_put + * + * Add a new element to the list after the current element but do not move + * the iterator to the new element. + **********************************************************************/ + void add_after_stay_put( // add after current & + T *new_data) { +#ifndef NDEBUG + if (!new_data) { + BAD_PARAMETER.error("Iterator::add_after_stay_put", ABORT, "new_data is nullptr"); + } +#endif - void mark_cycle_pt(); // remember current + auto new_element = new Link; + new_element->data = new_data; - bool empty() const { // is list empty? - return list->empty(); - } + if (list->empty()) { + new_element->next = new_element; + list->last = new_element; + prev = next = new_element; + ex_current_was_last = false; + current = nullptr; + } else { + new_element->next = next; + + if (current) { // not extracted + current->next = new_element; + if (prev == current) { + prev = new_element; + } + if (current == list->last) { + list->last = new_element; + } + } else { // current extracted + prev->next = new_element; + if (ex_current_was_last) { + list->last = new_element; + ex_current_was_last = false; + } + } + next = new_element; + } + } // stay at current + + /*********************************************************************** + * Iterator::add_before_then_move + * + * Add a new element to the list before the current element and move the + * iterator to the new element. + **********************************************************************/ + void add_before_then_move( // add before current & + T *new_data) { +#ifndef NDEBUG + if (!new_data) { + BAD_PARAMETER.error("Iterator::add_before_then_move", ABORT, "new_data is nullptr"); + } +#endif - bool current_extracted() const { // current extracted? - return !current; - } + auto new_element = new Link; + new_element->data = new_data; - bool at_first() const; // Current is first? + if (list->empty()) { + new_element->next = new_element; + list->last = new_element; + prev = next = new_element; + } else { + prev->next = new_element; + if (current) { // not extracted + new_element->next = current; + next = current; + } else { // current extracted + new_element->next = next; + if (ex_current_was_last) { + list->last = new_element; + } + if (ex_current_was_cycle_pt) { + cycle_pt = new_element; + } + } + } + current = new_element; + } // move to new + + /*********************************************************************** + * Iterator::add_before_stay_put + * + * Add a new element to the list before the current element but don't move the + * iterator to the new element. + **********************************************************************/ + void add_before_stay_put( // add before current & + T *new_data) { +#ifndef NDEBUG + if (!new_data) { + BAD_PARAMETER.error("Iterator::add_before_stay_put", ABORT, "new_data is nullptr"); + } +#endif - bool at_last() const; // Current is last? + auto new_element = new Link; + new_element->data = new_data; - bool cycled_list() const; // Completed a cycle? + if (list->empty()) { + new_element->next = new_element; + list->last = new_element; + prev = next = new_element; + ex_current_was_last = true; + current = nullptr; + } else { + prev->next = new_element; + if (current) { // not extracted + new_element->next = current; + if (next == current) { + next = new_element; + } + } else { // current extracted + new_element->next = next; + if (ex_current_was_last) { + list->last = new_element; + } + } + prev = new_element; + } + } // stay at current + + /*********************************************************************** + * Iterator::add_list_after + * + * Insert another list to this list after the current element but don't move + *the + * iterator. + **********************************************************************/ + void add_list_after( // add a list & + ConsList *list_to_add) { + if (!list_to_add->empty()) { + if (list->empty()) { + list->last = list_to_add->last; + prev = list->last; + next = list->First(); + ex_current_was_last = true; + current = nullptr; + } else { + if (current) { // not extracted + current->next = list_to_add->First(); + if (current == list->last) { + list->last = list_to_add->last; + } + list_to_add->last->next = next; + next = current->next; + } else { // current extracted + prev->next = list_to_add->First(); + if (ex_current_was_last) { + list->last = list_to_add->last; + ex_current_was_last = false; + } + list_to_add->last->next = next; + next = prev->next; + } + } + list_to_add->last = nullptr; + } + } // stay at current + + /*********************************************************************** + * Iterator::add_list_before + * + * Insert another list to this list before the current element. Move the + * iterator to the start of the inserted elements + * iterator. + **********************************************************************/ + void add_list_before( // add a list & + ConsList *list_to_add) { + if (!list_to_add->empty()) { + if (list->empty()) { + list->last = list_to_add->last; + prev = list->last; + current = list->First(); + next = current->next; + ex_current_was_last = false; + } else { + prev->next = list_to_add->First(); + if (current) { // not extracted + list_to_add->last->next = current; + } else { // current extracted + list_to_add->last->next = next; + if (ex_current_was_last) { + list->last = list_to_add->last; + } + if (ex_current_was_cycle_pt) { + cycle_pt = prev->next; + } + } + current = prev->next; + next = current->next; + } + list_to_add->last = nullptr; + } + } // move to it 1st item - void add_to_end( // add at end & - void *new_data); // don't move + T *data() { // get current data +#ifndef NDEBUG + if (!list) { + NO_LIST.error("Iterator::data", ABORT); + } +#endif + return current->data; + } - void exchange( // positions of 2 links - CLIST_ITERATOR *other_it); // other iterator + /*********************************************************************** + * Iterator::data_relative + * + * Return the data pointer to the element "offset" elements from current. + * "offset" must not be less than -1. + * (This function can't be INLINEd because it contains a loop) + **********************************************************************/ + T *data_relative( // get data + or - ... + int8_t offset) { // offset from current + Link *ptr; - int32_t length() const; //# elements in list +#ifndef NDEBUG + if (!list) + NO_LIST.error("Iterator::data_relative", ABORT); + if (list->empty()) + EMPTY_LIST.error("Iterator::data_relative", ABORT); + if (offset < -1) + BAD_PARAMETER.error("Iterator::data_relative", ABORT, "offset < -l"); +#endif - void sort( // sort elements - int comparator( // comparison routine - const void *, const void *)); -}; + if (offset == -1) { + ptr = prev; + } else { + for (ptr = current ? current : prev; offset-- > 0; ptr = ptr->next) { + ; + } + } -/*********************************************************************** - * CLIST_ITERATOR::set_to_list - * - * (Re-)initialise the iterator to point to the start of the list_to_iterate - * over. - **********************************************************************/ + return ptr->data; + } -inline void CLIST_ITERATOR::set_to_list( // change list - CLIST *list_to_iterate) { - list = list_to_iterate; - prev = list->last; - current = list->First(); - next = current != nullptr ? current->next : nullptr; - cycle_pt = nullptr; // await explicit set - started_cycling = false; - ex_current_was_last = false; - ex_current_was_cycle_pt = false; -} - -/*********************************************************************** - * CLIST_ITERATOR::CLIST_ITERATOR - * - * CONSTRUCTOR - set iterator to specified list; - **********************************************************************/ + /*********************************************************************** + * Iterator::forward + * + * Move the iterator to the next element of the list. + * REMEMBER: ALL LISTS ARE CIRCULAR. + **********************************************************************/ + T *forward() { + if (list->empty()) { + return nullptr; + } -inline CLIST_ITERATOR::CLIST_ITERATOR(CLIST *list_to_iterate) { - set_to_list(list_to_iterate); -} + if (current) { // not removed so + // set previous + prev = current; + started_cycling = true; + // In case next is deleted by another iterator, get next from current. + current = current->next; + } else { + if (ex_current_was_cycle_pt) { + cycle_pt = next; + } + current = next; + } -/*********************************************************************** - * CLIST_ITERATOR::add_after_then_move - * - * Add a new element to the list after the current element and move the - * iterator to the new element. - **********************************************************************/ + next = current->next; + return current->data; + } -inline void CLIST_ITERATOR::add_after_then_move( // element to add - void *new_data) { + /*********************************************************************** + * Iterator::extract + * + * Do extraction by removing current from the list, deleting the cons cell + * and returning the data to the caller, but NOT updating the iterator. (So + * that any calling loop can do this.) The iterator's current points to + * nullptr. If the data is to be deleted, this is the callers responsibility. + **********************************************************************/ + T *extract() { #ifndef NDEBUG - if (!new_data) { - BAD_PARAMETER.error("CLIST_ITERATOR::add_after_then_move", ABORT, "new_data is nullptr"); - } + if (!current) { // list empty or + // element extracted + NULL_CURRENT.error("Iterator::extract", ABORT); + } #endif - auto new_element = new CLIST_LINK; - new_element->data = new_data; - - if (list->empty()) { - new_element->next = new_element; - list->last = new_element; - prev = next = new_element; - } else { - new_element->next = next; + if (list->singleton()) { + // Special case where we do need to change the iterator. + prev = next = list->last = nullptr; + } else { + prev->next = next; // remove from list - if (current) { // not extracted - current->next = new_element; - prev = current; - if (current == list->last) { - list->last = new_element; + if (current == list->last) { + list->last = prev; + ex_current_was_last = true; + } else { + ex_current_was_last = false; + } } - } else { // current extracted - prev->next = new_element; - if (ex_current_was_last) { - list->last = new_element; + // Always set ex_current_was_cycle_pt so an add/forward will work in a loop. + ex_current_was_cycle_pt = (current == cycle_pt); + auto extracted_data = current->data; + delete (current); // destroy CONS cell + current = nullptr; + return extracted_data; + } // remove from list + + /*********************************************************************** + * Iterator::move_to_first() + * + * Move current so that it is set to the start of the list. + * Return data just in case anyone wants it. + **********************************************************************/ + T *move_to_first() { + current = list->First(); + prev = list->last; + next = current != nullptr ? current->next : nullptr; + return current != nullptr ? current->data : nullptr; + } // go to start of list + + /*********************************************************************** + * Iterator::move_to_last() + * + * Move current so that it is set to the end of the list. + * Return data just in case anyone wants it. + * (This function can't be INLINEd because it contains a loop) + **********************************************************************/ + T *move_to_last() { + while (current != list->last) { + forward(); } - if (ex_current_was_cycle_pt) { - cycle_pt = new_element; + + if (current == nullptr) { + return nullptr; + } else { + return current->data; } } - } - current = new_element; -} - -/*********************************************************************** - * CLIST_ITERATOR::add_after_stay_put - * - * Add a new element to the list after the current element but do not move - * the iterator to the new element. - **********************************************************************/ -inline void CLIST_ITERATOR::add_after_stay_put( // element to add - void *new_data) { + /*********************************************************************** + * Iterator::mark_cycle_pt() + * + * Remember the current location so that we can tell whether we've returned + * to this point later. + * + * If the current point is deleted either now, or in the future, the cycle + * point will be set to the next item which is set to current. This could be + * by a forward, add_after_then_move or add_after_then_move. + **********************************************************************/ + void mark_cycle_pt() { #ifndef NDEBUG - if (!new_data) { - BAD_PARAMETER.error("CLIST_ITERATOR::add_after_stay_put", ABORT, "new_data is nullptr"); - } + if (!list) { + NO_LIST.error("Iterator::mark_cycle_pt", ABORT); + } #endif - auto new_element = new CLIST_LINK; - new_element->data = new_data; - - if (list->empty()) { - new_element->next = new_element; - list->last = new_element; - prev = next = new_element; - ex_current_was_last = false; - current = nullptr; - } else { - new_element->next = next; - - if (current) { // not extracted - current->next = new_element; - if (prev == current) { - prev = new_element; - } - if (current == list->last) { - list->last = new_element; - } - } else { // current extracted - prev->next = new_element; - if (ex_current_was_last) { - list->last = new_element; - ex_current_was_last = false; + if (current) { + cycle_pt = current; + } else { + ex_current_was_cycle_pt = true; } + started_cycling = false; + } // remember current + + bool empty() const { // is list empty? + return list->empty(); } - next = new_element; - } -} -/*********************************************************************** - * CLIST_ITERATOR::add_before_then_move - * - * Add a new element to the list before the current element and move the - * iterator to the new element. - **********************************************************************/ + bool current_extracted() const { // current extracted? + return !current; + } + + /*********************************************************************** + * Iterator::at_first() + * + * Are we at the start of the list? + * + **********************************************************************/ + bool at_first() const { + // we're at a deleted + return ((list->empty()) || (current == list->First()) || + ((current == nullptr) && (prev == list->last) && // NON-last pt between + !ex_current_was_last)); // first and last + } // Current is first? + + /*********************************************************************** + * Iterator::at_last() + * + * Are we at the end of the list? + * + **********************************************************************/ + bool at_last() const { + // we're at a deleted + return ((list->empty()) || (current == list->last) || + ((current == nullptr) && (prev == list->last) && // last point between + ex_current_was_last)); // first and last + } // Current is last? + + /*********************************************************************** + * Iterator::cycled_list() + * + * Have we returned to the cycle_pt since it was set? + * + **********************************************************************/ + bool cycled_list() const { // Completed a cycle? + return ((list->empty()) || ((current == cycle_pt) && started_cycling)); + } -inline void CLIST_ITERATOR::add_before_then_move( // element to add - void *new_data) { + /*********************************************************************** + * Iterator::add_to_end + * + * Add a new element to the end of the list without moving the iterator. + * This is provided because a single linked list cannot move to the last as + * the iterator couldn't set its prev pointer. Adding to the end is + * essential for implementing + queues. + **********************************************************************/ + void add_to_end( // element to add + T *new_data) { #ifndef NDEBUG - if (!new_data) { - BAD_PARAMETER.error("CLIST_ITERATOR::add_before_then_move", ABORT, "new_data is nullptr"); - } + if (!list) { + NO_LIST.error("Iterator::add_to_end", ABORT); + } + if (!new_data) { + BAD_PARAMETER.error("Iterator::add_to_end", ABORT, "new_data is nullptr"); + } #endif - auto new_element = new CLIST_LINK; - new_element->data = new_data; - - if (list->empty()) { - new_element->next = new_element; - list->last = new_element; - prev = next = new_element; - } else { - prev->next = new_element; - if (current) { // not extracted - new_element->next = current; - next = current; - } else { // current extracted - new_element->next = next; - if (ex_current_was_last) { - list->last = new_element; - } - if (ex_current_was_cycle_pt) { - cycle_pt = new_element; + if (this->at_last()) { + this->add_after_stay_put(new_data); + } else { + if (this->at_first()) { + this->add_before_stay_put(new_data); + list->last = prev; + } else { // Iteratr is elsewhere + auto new_element = new Link; + new_element->data = new_data; + + new_element->next = list->last->next; + list->last->next = new_element; + list->last = new_element; + } } } - } - current = new_element; -} -/*********************************************************************** - * CLIST_ITERATOR::add_before_stay_put - * - * Add a new element to the list before the current element but don't move the - * iterator to the new element. - **********************************************************************/ + /*********************************************************************** + * Iterator::exchange() + * + * Given another iterator, whose current element is a different element on + * the same list list OR an element of another list, exchange the two current + * elements. On return, each iterator points to the element which was the + * other iterators current on entry. + * (This function hasn't been in-lined because its a bit big!) + **********************************************************************/ + void exchange( // positions of 2 links + Iterator *other_it) { // other iterator + constexpr ERRCODE DONT_EXCHANGE_DELETED("Can't exchange deleted elements of lists"); + + /* Do nothing if either list is empty or if both iterators reference the same + link */ + + if ((list->empty()) || (other_it->list->empty()) || (current == other_it->current)) { + return; + } -inline void CLIST_ITERATOR::add_before_stay_put( // element to add - void *new_data) { -#ifndef NDEBUG - if (!new_data) { - BAD_PARAMETER.error("CLIST_ITERATOR::add_before_stay_put", ABORT, "new_data is nullptr"); - } -#endif + /* Error if either current element is deleted */ - auto new_element = new CLIST_LINK; - new_element->data = new_data; - - if (list->empty()) { - new_element->next = new_element; - list->last = new_element; - prev = next = new_element; - ex_current_was_last = true; - current = nullptr; - } else { - prev->next = new_element; - if (current) { // not extracted - new_element->next = current; - if (next == current) { - next = new_element; - } - } else { // current extracted - new_element->next = next; - if (ex_current_was_last) { - list->last = new_element; + if (!current || !other_it->current) { + DONT_EXCHANGE_DELETED.error("Iterator.exchange", ABORT); } - } - prev = new_element; - } -} - -/*********************************************************************** - * CLIST_ITERATOR::add_list_after - * - * Insert another list to this list after the current element but don't move - *the - * iterator. - **********************************************************************/ -inline void CLIST_ITERATOR::add_list_after(CLIST *list_to_add) { - if (!list_to_add->empty()) { - if (list->empty()) { - list->last = list_to_add->last; - prev = list->last; - next = list->First(); - ex_current_was_last = true; - current = nullptr; - } else { - if (current) { // not extracted - current->next = list_to_add->First(); - if (current == list->last) { - list->last = list_to_add->last; + /* Now handle the 4 cases: doubleton list; non-doubleton adjacent elements + (other before this); non-doubleton adjacent elements (this before other); + non-adjacent elements. */ + + // adjacent links + if ((next == other_it->current) || (other_it->next == current)) { + // doubleton list + if ((next == other_it->current) && (other_it->next == current)) { + prev = next = current; + other_it->prev = other_it->next = other_it->current; + } else { // non-doubleton with + // adjacent links + // other before this + if (other_it->next == current) { + other_it->prev->next = current; + other_it->current->next = next; + current->next = other_it->current; + other_it->next = other_it->current; + prev = current; + } else { // this before other + prev->next = other_it->current; + current->next = other_it->next; + other_it->current->next = current; + next = current; + other_it->prev = other_it->current; + } } - list_to_add->last->next = next; - next = current->next; - } else { // current extracted - prev->next = list_to_add->First(); - if (ex_current_was_last) { - list->last = list_to_add->last; - ex_current_was_last = false; - } - list_to_add->last->next = next; - next = prev->next; + } else { // no overlap + prev->next = other_it->current; + current->next = other_it->next; + other_it->prev->next = current; + other_it->current->next = next; } - } - list_to_add->last = nullptr; - } -} -/*********************************************************************** - * CLIST_ITERATOR::add_list_before - * - * Insert another list to this list before the current element. Move the - * iterator to the start of the inserted elements - * iterator. - **********************************************************************/ + /* update end of list pointer when necessary (remember that the 2 iterators + may iterate over different lists!) */ -inline void CLIST_ITERATOR::add_list_before(CLIST *list_to_add) { - if (!list_to_add->empty()) { - if (list->empty()) { - list->last = list_to_add->last; - prev = list->last; - current = list->First(); - next = current->next; - ex_current_was_last = false; - } else { - prev->next = list_to_add->First(); - if (current) { // not extracted - list_to_add->last->next = current; - } else { // current extracted - list_to_add->last->next = next; - if (ex_current_was_last) { - list->last = list_to_add->last; - } - if (ex_current_was_cycle_pt) { - cycle_pt = prev->next; - } + if (list->last == current) { + list->last = other_it->current; + } + if (other_it->list->last == other_it->current) { + other_it->list->last = current; } - current = prev->next; - next = current->next; - } - list_to_add->last = nullptr; - } -} -/*********************************************************************** - * CLIST_ITERATOR::extract - * - * Do extraction by removing current from the list, deleting the cons cell - * and returning the data to the caller, but NOT updating the iterator. (So - * that any calling loop can do this.) The iterator's current points to - * nullptr. If the data is to be deleted, this is the callers responsibility. - **********************************************************************/ + if (current == cycle_pt) { + cycle_pt = other_it->cycle_pt; + } + if (other_it->current == other_it->cycle_pt) { + other_it->cycle_pt = cycle_pt; + } -inline void *CLIST_ITERATOR::extract() { -#ifndef NDEBUG - if (!current) { // list empty or - // element extracted - NULL_CURRENT.error("CLIST_ITERATOR::extract", ABORT); - } -#endif + /* The actual exchange - in all cases*/ - if (list->singleton()) { - // Special case where we do need to change the iterator. - prev = next = list->last = nullptr; - } else { - prev->next = next; // remove from list + auto old_current = current; + current = other_it->current; + other_it->current = old_current; + } - if (current == list->last) { - list->last = prev; - ex_current_was_last = true; - } else { - ex_current_was_last = false; + /*********************************************************************** + * Iterator::length() + * + * Return the length of the list + * + **********************************************************************/ + int32_t length() const { + return list->length(); } - } - // Always set ex_current_was_cycle_pt so an add/forward will work in a loop. - ex_current_was_cycle_pt = (current == cycle_pt); - auto extracted_data = current->data; - delete (current); // destroy CONS cell - current = nullptr; - return extracted_data; -} - -/*********************************************************************** - * CLIST_ITERATOR::move_to_first() - * - * Move current so that it is set to the start of the list. - * Return data just in case anyone wants it. - **********************************************************************/ -inline void *CLIST_ITERATOR::move_to_first() { - current = list->First(); - prev = list->last; - next = current != nullptr ? current->next : nullptr; - return current != nullptr ? current->data : nullptr; -} + /*********************************************************************** + * Iterator::sort() + * + * Sort the elements of the list, then reposition at the start. + * + **********************************************************************/ + void sort( // sort elements + int comparator( // comparison routine + const T *, const T *)) { + list->sort(comparator); + move_to_first(); + } + }; + using ITERATOR = Iterator; // compat -/*********************************************************************** - * CLIST_ITERATOR::mark_cycle_pt() - * - * Remember the current location so that we can tell whether we've returned - * to this point later. - * - * If the current point is deleted either now, or in the future, the cycle - * point will be set to the next item which is set to current. This could be - * by a forward, add_after_then_move or add_after_then_move. - **********************************************************************/ +private: + Link *last = nullptr; // End of list -inline void CLIST_ITERATOR::mark_cycle_pt() { -#ifndef NDEBUG - if (!list) { - NO_LIST.error("CLIST_ITERATOR::mark_cycle_pt", ABORT); + //(Points to head) + Link *First() { // return first + return last != nullptr ? last->next : nullptr; } -#endif - if (current) { - cycle_pt = current; - } else { - ex_current_was_cycle_pt = true; + const Link *First() const { // return first + return last != nullptr ? last->next : nullptr; } - started_cycling = false; -} -/*********************************************************************** - * CLIST_ITERATOR::at_first() - * - * Are we at the start of the list? - * - **********************************************************************/ +public: + ~ConsList() { // destructor + shallow_clear(); + } -inline bool CLIST_ITERATOR::at_first() const { - // we're at a deleted - return ((list->empty()) || (current == list->First()) || - ((current == nullptr) && (prev == list->last) && // NON-last pt between - !ex_current_was_last)); // first and last -} + /*********************************************************************** + * ConsList::internal_deep_clear + * + * Used by the "deep_clear" member function of derived list + * classes to destroy all the elements on the list. + * The calling function passes a "zapper" function which can be called to + * delete each data element of the list, regardless of its class. This + * technique permits a generic clear function to destroy elements of + * different derived types correctly, without requiring virtual functions and + * the consequential memory overhead. + **********************************************************************/ + void internal_deep_clear() { // ptr to zapper functn + if (!empty()) { + auto ptr = last->next; // set to first + last->next = nullptr; // break circle + last = nullptr; // set list empty + while (ptr) { + auto next = ptr->next; + delete ptr->data; + delete (ptr); + ptr = next; + } + } + } + void deep_clear() { + internal_deep_clear(); + } -/*********************************************************************** - * CLIST_ITERATOR::at_last() - * - * Are we at the end of the list? - * - **********************************************************************/ + /*********************************************************************** + * ConsList::shallow_clear + * + * Used by the destructor and the "shallow_clear" member function of derived + * list classes to destroy the list. + * The data elements are NOT destroyed. + * + **********************************************************************/ + void shallow_clear() { // destroy all links + if (!empty()) { + auto ptr = last->next; // set to first + last->next = nullptr; // break circle + last = nullptr; // set list empty + while (ptr) { + auto next = ptr->next; + delete (ptr); + ptr = next; + } + } + } -inline bool CLIST_ITERATOR::at_last() const { - // we're at a deleted - return ((list->empty()) || (current == list->last) || - ((current == nullptr) && (prev == list->last) && // last point between - ex_current_was_last)); // first and last -} + bool empty() const { // is list empty? + return !last; + } -/*********************************************************************** - * CLIST_ITERATOR::cycled_list() - * - * Have we returned to the cycle_pt since it was set? - * - **********************************************************************/ + bool singleton() const { + return last != nullptr ? (last == last->next) : false; + } -inline bool CLIST_ITERATOR::cycled_list() const { - return ((list->empty()) || ((current == cycle_pt) && started_cycling)); -} + void shallow_copy( // dangerous!! + ConsList *from_list) { // beware destructors!! + last = from_list->last; + } -/*********************************************************************** - * CLIST_ITERATOR::length() - * - * Return the length of the list - * - **********************************************************************/ + /*********************************************************************** + * ConsList::assign_to_sublist + * + * The list is set to a sublist of another list. "This" list must be empty + * before this function is invoked. The two iterators passed must refer to + * the same list, different from "this" one. The sublist removed is the + * inclusive list from start_it's current position to end_it's current + * position. If this range passes over the end of the source list then the + * source list has its end set to the previous element of start_it. The + * extracted sublist is unaffected by the end point of the source list, its + * end point is always the end_it position. + **********************************************************************/ + void assign_to_sublist( // to this list + Iterator *start_it, // from list start + Iterator *end_it) { // from list end + constexpr ERRCODE LIST_NOT_EMPTY("Destination list must be empty before extracting a sublist"); + + if (!empty()) { + LIST_NOT_EMPTY.error("ConsList.assign_to_sublist", ABORT); + } -inline int32_t CLIST_ITERATOR::length() const { - return list->length(); -} + last = start_it->extract_sublist(end_it); + } -/*********************************************************************** - * CLIST_ITERATOR::sort() - * - * Sort the elements of the list, then reposition at the start. - * - **********************************************************************/ + int32_t length() const { //# elements in list + int32_t count = 0; + if (last != nullptr) { + count = 1; + for (auto it = last->next; it != last; it = it->next) { + count++; + } + } + return count; + } -inline void CLIST_ITERATOR::sort( // sort elements - int comparator( // comparison routine - const void *, const void *)) { - list->sort(comparator); - move_to_first(); -} + /*********************************************************************** + * ConsList::sort + * + * Sort elements on list + **********************************************************************/ + void sort( // sort elements + int comparator( // comparison routine + const T *, const T *)) { + // Allocate an array of pointers, one per list element. + auto count = length(); + if (count > 0) { + // ptr array to sort + std::vector base; + base.reserve(count); + + Iterator it(this); + + // Extract all elements, putting the pointers in the array. + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + base.push_back(it.extract()); + } -/*********************************************************************** - * CLIST_ITERATOR::add_to_end - * - * Add a new element to the end of the list without moving the iterator. - * This is provided because a single linked list cannot move to the last as - * the iterator couldn't set its prev pointer. Adding to the end is - * essential for implementing - queues. -**********************************************************************/ - -inline void CLIST_ITERATOR::add_to_end( // element to add - void *new_data) { -#ifndef NDEBUG - if (!list) { - NO_LIST.error("CLIST_ITERATOR::add_to_end", ABORT); - } - if (!new_data) { - BAD_PARAMETER.error("CLIST_ITERATOR::add_to_end", ABORT, "new_data is nullptr"); + // Sort the pointer array. + std::sort(base.begin(), base.end(), + // all current comparators return -1,0,1, so we handle this correctly for std::sort + [&](auto &&l, auto &&r) {return comparator(l, r) < 0; }); + + // Rebuild the list from the sorted pointers. + for (auto current : base) { + it.add_to_end(current); + } + } } -#endif - if (this->at_last()) { - this->add_after_stay_put(new_data); - } else { - if (this->at_first()) { - this->add_before_stay_put(new_data); - list->last = prev; - } else { // Iteratr is elsewhere - auto new_element = new CLIST_LINK; + // Assuming list has been sorted already, insert new_data to + // keep the list sorted according to the same comparison function. + // Comparison function is the same as used by sort, i.e. uses double + // indirection. Time is O(1) to add to beginning or end. + // Time is linear to add pre-sorted items to an empty list. + // If unique, then don't add duplicate entries. + // Returns true if the element was added to the list. + bool add_sorted(int comparator(const T *, const T *), bool unique, T *new_data) { + // Check for adding at the end. + if (last == nullptr || comparator(last->data, new_data) < 0) { + auto *new_element = new Link; new_element->data = new_data; - - new_element->next = list->last->next; - list->last->next = new_element; - list->last = new_element; + if (last == nullptr) { + new_element->next = new_element; + } else { + new_element->next = last->next; + last->next = new_element; + } + last = new_element; + return true; + } else if (!unique || last->data != new_data) { + // Need to use an iterator. + Iterator it(this); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + auto data = it.data(); + if (data == new_data && unique) { + return false; + } + if (comparator(data, new_data) > 0) { + break; + } + } + if (it.cycled_list()) { + it.add_to_end(new_data); + } else { + it.add_before_then_move(new_data); + } + return true; } + return false; } -} - -template -class X_CLIST : public CLIST { -public: - X_CLIST() = default; - X_CLIST(const X_CLIST &) = delete; - X_CLIST &operator=(const X_CLIST &) = delete; - void deep_clear() { - internal_deep_clear([](void *link) {delete static_cast(link);}); + // Assuming that the minuend and subtrahend are already sorted with + // the same comparison function, shallow clears this and then copies + // the set difference minuend - subtrahend to this, being the elements + // of minuend that do not compare equal to anything in subtrahend. + // If unique is true, any duplicates in minuend are also eliminated. + void set_subtract(int comparator(const T *, const T *), bool unique, ConsList *minuend, + ConsList *subtrahend) { + shallow_clear(); + Iterator m_it(minuend); + Iterator s_it(subtrahend); + // Since both lists are sorted, finding the subtras that are not + // minus is a case of a parallel iteration. + for (m_it.mark_cycle_pt(); !m_it.cycled_list(); m_it.forward()) { + auto minu = m_it.data(); + T *subtra = nullptr; + if (!s_it.empty()) { + subtra = s_it.data(); + while (!s_it.at_last() && comparator(subtra, minu) < 0) { + s_it.forward(); + subtra = s_it.data(); + } + } + if (subtra == nullptr || comparator(subtra, minu) != 0) { + add_sorted(comparator, unique, minu); + } + } } }; -#define CLISTIZEH(CLASSNAME) \ - class CLASSNAME##_CLIST : public X_CLIST { \ - using X_CLIST::X_CLIST; \ - }; \ - struct CLASSNAME##_C_IT : X_ITER { \ - using X_ITER::X_ITER; \ - }; +#define CLISTIZEH(T) \ + class T##_CLIST : public ConsList { \ + using ConsList::ConsList; \ + }; \ + using T##_C_IT = ConsList::Iterator; } // namespace tesseract diff --git a/src/ccutil/elst.cpp b/src/ccutil/elst.cpp deleted file mode 100644 index 2cac5fd14d..0000000000 --- a/src/ccutil/elst.cpp +++ /dev/null @@ -1,440 +0,0 @@ -/********************************************************************** - * File: elst.cpp (Formerly elist.c) - * Description: Embedded list handling code which is not in the include file. - * Author: Phil Cheatle - * - * (C) Copyright 1991, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#include "elst.h" -#include - -namespace tesseract { - -/*********************************************************************** - * ELIST::internal_clear - * - * Used by the destructor and the "clear" member function of derived list - * classes to destroy all the elements on the list. - * The calling function passes a "zapper" function which can be called to - * delete each element of the list, regardless of its derived type. This - * technique permits a generic clear function to destroy elements of - * different derived types correctly, without requiring virtual functions and - * the consequential memory overhead. - **********************************************************************/ - -void ELIST::internal_clear( // destroy all links - void (*zapper)(void *)) { - // ptr to zapper functn - ELIST_LINK *ptr; - ELIST_LINK *next; - - if (!empty()) { - ptr = last->next; // set to first - last->next = nullptr; // break circle - last = nullptr; // set list empty - while (ptr) { - next = ptr->next; - zapper(ptr); - ptr = next; - } - } -} - -/*********************************************************************** - * ELIST::assign_to_sublist - * - * The list is set to a sublist of another list. "This" list must be empty - * before this function is invoked. The two iterators passed must refer to - * the same list, different from "this" one. The sublist removed is the - * inclusive list from start_it's current position to end_it's current - * position. If this range passes over the end of the source list then the - * source list has its end set to the previous element of start_it. The - * extracted sublist is unaffected by the end point of the source list, its - * end point is always the end_it position. - **********************************************************************/ - -void ELIST::assign_to_sublist( // to this list - ELIST_ITERATOR *start_it, // from list start - ELIST_ITERATOR *end_it) { // from list end - constexpr ERRCODE LIST_NOT_EMPTY("Destination list must be empty before extracting a sublist"); - - if (!empty()) { - LIST_NOT_EMPTY.error("ELIST.assign_to_sublist", ABORT); - } - - last = start_it->extract_sublist(end_it); -} - -/*********************************************************************** - * ELIST::sort - * - * Sort elements on list - * NB If you don't like the const declarations in the comparator, coerce yours: - * ( int (*)(const void *, const void *) - **********************************************************************/ - -void ELIST::sort( // sort elements - int comparator( // comparison routine - const void *, const void *)) { - // Allocate an array of pointers, one per list element. - auto count = length(); - - if (count > 0) { - // ptr array to sort - std::vector base; - base.reserve(count); - - ELIST_ITERATOR it(this); - - // Extract all elements, putting the pointers in the array. - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - base.push_back(it.extract()); - } - - // Sort the pointer array. - qsort(&base[0], count, sizeof(base[0]), comparator); - - // Rebuild the list from the sorted pointers. - for (auto current : base) { - it.add_to_end(current); - } - } -} - -// Assuming list has been sorted already, insert new_link to -// keep the list sorted according to the same comparison function. -// Comparison function is the same as used by sort, i.e. uses double -// indirection. Time is O(1) to add to beginning or end. -// Time is linear to add pre-sorted items to an empty list. -// If unique is set to true and comparator() returns 0 (an entry with the -// same information as the one contained in new_link is already in the -// list) - new_link is not added to the list and the function returns the -// pointer to the identical entry that already exists in the list -// (otherwise the function returns new_link). -ELIST_LINK *ELIST::add_sorted_and_find(int comparator(const void *, const void *), bool unique, - ELIST_LINK *new_link) { - // Check for adding at the end. - if (last == nullptr || comparator(&last, &new_link) < 0) { - if (last == nullptr) { - new_link->next = new_link; - } else { - new_link->next = last->next; - last->next = new_link; - } - last = new_link; - } else { - // Need to use an iterator. - ELIST_ITERATOR it(this); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - ELIST_LINK *link = it.data(); - int compare = comparator(&link, &new_link); - if (compare > 0) { - break; - } else if (unique && compare == 0) { - return link; - } - } - if (it.cycled_list()) { - it.add_to_end(new_link); - } else { - it.add_before_then_move(new_link); - } - } - return new_link; -} - -/*********************************************************************** - * MEMBER FUNCTIONS OF CLASS: ELIST_ITERATOR - * ========================================= - **********************************************************************/ - -/*********************************************************************** - * ELIST_ITERATOR::forward - * - * Move the iterator to the next element of the list. - * REMEMBER: ALL LISTS ARE CIRCULAR. - **********************************************************************/ - -ELIST_LINK *ELIST_ITERATOR::forward() { -#ifndef NDEBUG - if (!list) - NO_LIST.error("ELIST_ITERATOR::forward", ABORT); -#endif - if (list->empty()) { - return nullptr; - } - - if (current) { // not removed so - // set previous - prev = current; - started_cycling = true; - // In case next is deleted by another iterator, get next from current. - current = current->next; - } else { - if (ex_current_was_cycle_pt) { - cycle_pt = next; - } - current = next; - } -#ifndef NDEBUG - if (!current) - NULL_DATA.error("ELIST_ITERATOR::forward", ABORT); -#endif - next = current->next; - -#ifndef NDEBUG - if (!next) { - NULL_NEXT.error("ELIST_ITERATOR::forward", ABORT, - "This is: %p Current is: %p", - static_cast(this), - static_cast(current)); - } -#endif - return current; -} - -/*********************************************************************** - * ELIST_ITERATOR::data_relative - * - * Return the data pointer to the element "offset" elements from current. - * "offset" must not be less than -1. - * (This function can't be INLINEd because it contains a loop) - **********************************************************************/ - -ELIST_LINK *ELIST_ITERATOR::data_relative( // get data + or - ... - int8_t offset) { // offset from current - ELIST_LINK *ptr; - -#ifndef NDEBUG - if (!list) - NO_LIST.error("ELIST_ITERATOR::data_relative", ABORT); - if (list->empty()) - EMPTY_LIST.error("ELIST_ITERATOR::data_relative", ABORT); - if (offset < -1) - BAD_PARAMETER.error("ELIST_ITERATOR::data_relative", ABORT, "offset < -l"); -#endif - - if (offset == -1) { - ptr = prev; - } else { - for (ptr = current ? current : prev; offset-- > 0; ptr = ptr->next) { - ; - } - } - -#ifndef NDEBUG - if (!ptr) - NULL_DATA.error("ELIST_ITERATOR::data_relative", ABORT); -#endif - - return ptr; -} - -/*********************************************************************** - * ELIST_ITERATOR::move_to_last() - * - * Move current so that it is set to the end of the list. - * Return data just in case anyone wants it. - * (This function can't be INLINEd because it contains a loop) - **********************************************************************/ - -ELIST_LINK *ELIST_ITERATOR::move_to_last() { -#ifndef NDEBUG - if (!list) - NO_LIST.error("ELIST_ITERATOR::move_to_last", ABORT); -#endif - - while (current != list->last) { - forward(); - } - - return current; -} - -/*********************************************************************** - * ELIST_ITERATOR::exchange() - * - * Given another iterator, whose current element is a different element on - * the same list list OR an element of another list, exchange the two current - * elements. On return, each iterator points to the element which was the - * other iterators current on entry. - * (This function hasn't been in-lined because its a bit big!) - **********************************************************************/ - -void ELIST_ITERATOR::exchange( // positions of 2 links - ELIST_ITERATOR *other_it) { // other iterator - constexpr ERRCODE DONT_EXCHANGE_DELETED("Can't exchange deleted elements of lists"); - - ELIST_LINK *old_current; - -#ifndef NDEBUG - if (!list) - NO_LIST.error("ELIST_ITERATOR::exchange", ABORT); - if (!other_it) - BAD_PARAMETER.error("ELIST_ITERATOR::exchange", ABORT, "other_it nullptr"); - if (!(other_it->list)) - NO_LIST.error("ELIST_ITERATOR::exchange", ABORT, "other_it"); -#endif - - /* Do nothing if either list is empty or if both iterators reference the same -link */ - - if ((list->empty()) || (other_it->list->empty()) || (current == other_it->current)) { - return; - } - - /* Error if either current element is deleted */ - - if (!current || !other_it->current) { - DONT_EXCHANGE_DELETED.error("ELIST_ITERATOR.exchange", ABORT); - } - - /* Now handle the 4 cases: doubleton list; non-doubleton adjacent elements -(other before this); non-doubleton adjacent elements (this before other); -non-adjacent elements. */ - - // adjacent links - if ((next == other_it->current) || (other_it->next == current)) { - // doubleton list - if ((next == other_it->current) && (other_it->next == current)) { - prev = next = current; - other_it->prev = other_it->next = other_it->current; - } else { // non-doubleton with - // adjacent links - // other before this - if (other_it->next == current) { - other_it->prev->next = current; - other_it->current->next = next; - current->next = other_it->current; - other_it->next = other_it->current; - prev = current; - } else { // this before other - prev->next = other_it->current; - current->next = other_it->next; - other_it->current->next = current; - next = current; - other_it->prev = other_it->current; - } - } - } else { // no overlap - prev->next = other_it->current; - current->next = other_it->next; - other_it->prev->next = current; - other_it->current->next = next; - } - - /* update end of list pointer when necessary (remember that the 2 iterators - may iterate over different lists!) */ - - if (list->last == current) { - list->last = other_it->current; - } - if (other_it->list->last == other_it->current) { - other_it->list->last = current; - } - - if (current == cycle_pt) { - cycle_pt = other_it->cycle_pt; - } - if (other_it->current == other_it->cycle_pt) { - other_it->cycle_pt = cycle_pt; - } - - /* The actual exchange - in all cases*/ - - old_current = current; - current = other_it->current; - other_it->current = old_current; -} - -/*********************************************************************** - * ELIST_ITERATOR::extract_sublist() - * - * This is a private member, used only by ELIST::assign_to_sublist. - * Given another iterator for the same list, extract the links from THIS to - * OTHER inclusive, link them into a new circular list, and return a - * pointer to the last element. - * (Can't inline this function because it contains a loop) - **********************************************************************/ - -ELIST_LINK *ELIST_ITERATOR::extract_sublist( // from this current - ELIST_ITERATOR *other_it) { // to other current -#ifndef NDEBUG - constexpr ERRCODE BAD_EXTRACTION_PTS("Can't extract sublist from points on different lists"); - constexpr ERRCODE DONT_EXTRACT_DELETED("Can't extract a sublist marked by deleted points"); -#endif - constexpr ERRCODE BAD_SUBLIST("Can't find sublist end point in original list"); - - ELIST_ITERATOR temp_it = *this; - ELIST_LINK *end_of_new_list; - -#ifndef NDEBUG - if (!other_it) - BAD_PARAMETER.error("ELIST_ITERATOR::extract_sublist", ABORT, "other_it nullptr"); - if (!list) - NO_LIST.error("ELIST_ITERATOR::extract_sublist", ABORT); - if (list != other_it->list) - BAD_EXTRACTION_PTS.error("ELIST_ITERATOR.extract_sublist", ABORT); - if (list->empty()) - EMPTY_LIST.error("ELIST_ITERATOR::extract_sublist", ABORT); - - if (!current || !other_it->current) - DONT_EXTRACT_DELETED.error("ELIST_ITERATOR.extract_sublist", ABORT); -#endif - - ex_current_was_last = other_it->ex_current_was_last = false; - ex_current_was_cycle_pt = false; - other_it->ex_current_was_cycle_pt = false; - - temp_it.mark_cycle_pt(); - do { // walk sublist - if (temp_it.cycled_list()) { // can't find end pt - BAD_SUBLIST.error("ELIST_ITERATOR.extract_sublist", ABORT); - } - - if (temp_it.at_last()) { - list->last = prev; - ex_current_was_last = other_it->ex_current_was_last = true; - } - - if (temp_it.current == cycle_pt) { - ex_current_was_cycle_pt = true; - } - - if (temp_it.current == other_it->cycle_pt) { - other_it->ex_current_was_cycle_pt = true; - } - - temp_it.forward(); - } while (temp_it.prev != other_it->current); - - // circularise sublist - other_it->current->next = current; - end_of_new_list = other_it->current; - - // sublist = whole list - if (prev == other_it->current) { - list->last = nullptr; - prev = current = next = nullptr; - other_it->prev = other_it->current = other_it->next = nullptr; - } else { - prev->next = other_it->next; - current = other_it->current = nullptr; - next = other_it->next; - other_it->prev = prev; - } - return end_of_new_list; -} - -} // namespace tesseract diff --git a/src/ccutil/elst.h b/src/ccutil/elst.h index 040ce2a488..b49298a714 100644 --- a/src/ccutil/elst.h +++ b/src/ccutil/elst.h @@ -19,16 +19,14 @@ #ifndef ELST_H #define ELST_H -#include "list.h" #include "lsterr.h" #include "serialis.h" +#include #include namespace tesseract { -class ELIST_ITERATOR; - /********************************************************************** This module implements list classes and iterators. The following list types and iterators are provided: @@ -68,744 +66,1061 @@ list class - though macros can generate these. It also prevents heterogeneous lists. **********************************************************************/ -/********************************************************************** - * CLASS - ELIST_LINK - * - * Generic link class for singly linked lists with - *embedded links - * - * Note: No destructor - elements are assumed to be destroyed EITHER after - * they have been extracted from a list OR by the ELIST destructor which - * walks the list. - **********************************************************************/ - -class ELIST_LINK { - friend class ELIST_ITERATOR; - friend class ELIST; - - ELIST_LINK *next; - -public: - ELIST_LINK() { - next = nullptr; - } - // constructor - - // The special copy constructor is used by lots of classes. - ELIST_LINK(const ELIST_LINK &) { - next = nullptr; - } - - // The special assignment operator is used by lots of classes. - void operator=(const ELIST_LINK &) { - next = nullptr; - } -}; - /********************************************************************** * CLASS - ELIST * * Generic list class for singly linked lists with embedded links **********************************************************************/ -class TESS_API ELIST { - friend class ELIST_ITERATOR; - - ELIST_LINK *last = nullptr; // End of list - //(Points to head) - ELIST_LINK *First() { // return first - return last ? last->next : nullptr; - } - +template +class IntrusiveForwardList { public: - // destroy all links - void internal_clear(void (*zapper)(void *)); - - bool empty() const { - return !last; - } - - bool singleton() const { - return last ? (last == last->next) : false; - } - - void shallow_copy( // dangerous!! - ELIST *from_list) { // beware destructors!! - last = from_list->last; - } - - // ptr to copier functn - void internal_deep_copy(ELIST_LINK *(*copier)(ELIST_LINK *), - const ELIST *list); // list being copied - - void assign_to_sublist( // to this list - ELIST_ITERATOR *start_it, // from list start - ELIST_ITERATOR *end_it); // from list end - - // # elements in list - int32_t length() const { - int32_t count = 0; - if (last != nullptr) { - count = 1; - for (auto it = last->next; it != last; it = it->next) { - count++; - } + /********************************************************************** + * CLASS - ELIST_LINK + * + * Generic link class for singly linked lists with + *embedded links + * + * Note: No destructor - elements are assumed to be destroyed EITHER after + * they have been extracted from a list OR by the IntrusiveForwardList destructor which + * walks the list. + **********************************************************************/ + + class Link { + friend class Iterator; + friend class IntrusiveForwardList; + + T *next; + + public: + Link() { + next = nullptr; } - return count; - } - - void sort( // sort elements - int comparator( // comparison routine - const void *, const void *)); + // constructor - // Assuming list has been sorted already, insert new_link to - // keep the list sorted according to the same comparison function. - // Comparison function is the same as used by sort, i.e. uses double - // indirection. Time is O(1) to add to beginning or end. - // Time is linear to add pre-sorted items to an empty list. - // If unique is set to true and comparator() returns 0 (an entry with the - // same information as the one contained in new_link is already in the - // list) - new_link is not added to the list and the function returns the - // pointer to the identical entry that already exists in the list - // (otherwise the function returns new_link). - ELIST_LINK *add_sorted_and_find(int comparator(const void *, const void *), bool unique, - ELIST_LINK *new_link); - - // Same as above, but returns true if the new entry was inserted, false - // if the identical entry already existed in the list. - bool add_sorted(int comparator(const void *, const void *), bool unique, ELIST_LINK *new_link) { - return (add_sorted_and_find(comparator, unique, new_link) == new_link); - } -}; - -/*********************************************************************** - * CLASS - ELIST_ITERATOR - * - * Generic iterator class for singly linked lists with - *embedded links - **********************************************************************/ - -class TESS_API ELIST_ITERATOR { - friend void ELIST::assign_to_sublist(ELIST_ITERATOR *, ELIST_ITERATOR *); - - ELIST *list; // List being iterated - ELIST_LINK *prev; // prev element - ELIST_LINK *current; // current element - ELIST_LINK *next; // next element - ELIST_LINK *cycle_pt; // point we are cycling the list to. - bool ex_current_was_last; // current extracted was end of list - bool ex_current_was_cycle_pt; // current extracted was cycle point - bool started_cycling; // Have we moved off the start? - - ELIST_LINK *extract_sublist( // from this current... - ELIST_ITERATOR *other_it); // to other current - -public: - ELIST_ITERATOR() { // constructor - list = nullptr; - } // unassigned list + // The special copy constructor is used by lots of classes. + Link(const Link &) { + next = nullptr; + } - explicit ELIST_ITERATOR(ELIST *list_to_iterate); + // The special assignment operator is used by lots of classes. + void operator=(const Link &) { + next = nullptr; + } + }; + using LINK = Link; // compat + + /*********************************************************************** + * CLASS - ELIST_ITERATOR + * + * Generic iterator class for singly linked lists with + *embedded links + **********************************************************************/ + + class Iterator { + friend void IntrusiveForwardList::assign_to_sublist(Iterator *, Iterator *); + + IntrusiveForwardList *list; // List being iterated + T *prev; // prev element + T *current; // current element + T *next; // next element + T *cycle_pt; // point we are cycling the list to. + bool ex_current_was_last; // current extracted was end of list + bool ex_current_was_cycle_pt; // current extracted was cycle point + bool started_cycling; // Have we moved off the start? + /*********************************************************************** + * Iterator::extract_sublist() + * + * This is a private member, used only by IntrusiveForwardList::assign_to_sublist. + * Given another iterator for the same list, extract the links from THIS to + * OTHER inclusive, link them into a new circular list, and return a + * pointer to the last element. + * (Can't inline this function because it contains a loop) + **********************************************************************/ + T *extract_sublist( // from this current... + Iterator *other_it) { // to other current +#ifndef NDEBUG + constexpr ERRCODE BAD_EXTRACTION_PTS("Can't extract sublist from points on different lists"); + constexpr ERRCODE DONT_EXTRACT_DELETED("Can't extract a sublist marked by deleted points"); +#endif + constexpr ERRCODE BAD_SUBLIST("Can't find sublist end point in original list"); - void set_to_list( // change list - ELIST *list_to_iterate); + Iterator temp_it = *this; + T *end_of_new_list; - void add_after_then_move( // add after current & - ELIST_LINK *new_link); // move to new +#ifndef NDEBUG + if (!other_it) + BAD_PARAMETER.error("ELIST_ITERATOR::extract_sublist", ABORT, "other_it nullptr"); + if (!list) + NO_LIST.error("ELIST_ITERATOR::extract_sublist", ABORT); + if (list != other_it->list) + BAD_EXTRACTION_PTS.error("ELIST_ITERATOR.extract_sublist", ABORT); + if (list->empty()) + EMPTY_LIST.error("ELIST_ITERATOR::extract_sublist", ABORT); + + if (!current || !other_it->current) + DONT_EXTRACT_DELETED.error("ELIST_ITERATOR.extract_sublist", ABORT); +#endif - void add_after_stay_put( // add after current & - ELIST_LINK *new_link); // stay at current + ex_current_was_last = other_it->ex_current_was_last = false; + ex_current_was_cycle_pt = false; + other_it->ex_current_was_cycle_pt = false; - void add_before_then_move( // add before current & - ELIST_LINK *new_link); // move to new + temp_it.mark_cycle_pt(); + do { // walk sublist + if (temp_it.cycled_list()) { // can't find end pt + BAD_SUBLIST.error("Iterator.extract_sublist", ABORT); + } - void add_before_stay_put( // add before current & - ELIST_LINK *new_link); // stay at current + if (temp_it.at_last()) { + list->last = prev; + ex_current_was_last = other_it->ex_current_was_last = true; + } - void add_list_after( // add a list & - ELIST *list_to_add); // stay at current + if (temp_it.current == cycle_pt) { + ex_current_was_cycle_pt = true; + } - void add_list_before( // add a list & - ELIST *list_to_add); // move to it 1st item + if (temp_it.current == other_it->cycle_pt) { + other_it->ex_current_was_cycle_pt = true; + } - ELIST_LINK *data() { // get current data -#ifndef NDEBUG - if (!list) { - NO_LIST.error("ELIST_ITERATOR::data", ABORT); + temp_it.forward(); + } while (temp_it.prev != other_it->current); + + // circularise sublist + other_it->current->next = current; + end_of_new_list = other_it->current; + + // sublist = whole list + if (prev == other_it->current) { + list->last = nullptr; + prev = current = next = nullptr; + other_it->prev = other_it->current = other_it->next = nullptr; + } else { + prev->next = other_it->next; + current = other_it->current = nullptr; + next = other_it->next; + other_it->prev = prev; + } + return end_of_new_list; + } // to other current + + public: + Iterator() { // constructor + list = nullptr; + } // unassigned list + /*********************************************************************** + * ELIST_ITERATOR::ELIST_ITERATOR + * + * CONSTRUCTOR - set iterator to specified list; + **********************************************************************/ + Iterator(IntrusiveForwardList *list_to_iterate) { + set_to_list(list_to_iterate); } - if (!current) { - NULL_DATA.error("ELIST_ITERATOR::data", ABORT); + /*********************************************************************** + * ELIST_ITERATOR::set_to_list + * + * (Re-)initialise the iterator to point to the start of the list_to_iterate + * over. + **********************************************************************/ + void set_to_list( // change list + IntrusiveForwardList *list_to_iterate) { +#ifndef NDEBUG + if (!list_to_iterate) { + BAD_PARAMETER.error("ELIST_ITERATOR::set_to_list", ABORT, "list_to_iterate is nullptr"); + } +#endif + + list = list_to_iterate; + prev = list->last; + current = list->First(); + next = current ? current->next : nullptr; + cycle_pt = nullptr; // await explicit set + started_cycling = false; + ex_current_was_last = false; + ex_current_was_cycle_pt = false; } + /*********************************************************************** + * ELIST_ITERATOR::add_after_then_move + * + * Add a new element to the list after the current element and move the + * iterator to the new element. + **********************************************************************/ + void add_after_then_move( // add after current & + T *new_element) { +#ifndef NDEBUG + if (!list) { + NO_LIST.error("ELIST_ITERATOR::add_after_then_move", ABORT); + } + if (!new_element) { + BAD_PARAMETER.error("ELIST_ITERATOR::add_after_then_move", ABORT, "new_element is nullptr"); + } + if (new_element->next) { + STILL_LINKED.error("ELIST_ITERATOR::add_after_then_move", ABORT); + } #endif - return current; - } - ELIST_LINK *data_relative( // get data + or - ... - int8_t offset); // offset from current + if (list->empty()) { + new_element->next = new_element; + list->last = new_element; + prev = next = new_element; + } else { + new_element->next = next; + + if (current) { // not extracted + current->next = new_element; + prev = current; + if (current == list->last) { + list->last = new_element; + } + } else { // current extracted + prev->next = new_element; + if (ex_current_was_last) { + list->last = new_element; + } + if (ex_current_was_cycle_pt) { + cycle_pt = new_element; + } + } + } + current = new_element; + } // move to new + /*********************************************************************** + * ELIST_ITERATOR::add_after_stay_put + * + * Add a new element to the list after the current element but do not move + * the iterator to the new element. + **********************************************************************/ + void add_after_stay_put( // add after current & + T *new_element) { +#ifndef NDEBUG + if (!list) { + NO_LIST.error("ELIST_ITERATOR::add_after_stay_put", ABORT); + } + if (!new_element) { + BAD_PARAMETER.error("ELIST_ITERATOR::add_after_stay_put", ABORT, "new_element is nullptr"); + } + if (new_element->next) { + STILL_LINKED.error("ELIST_ITERATOR::add_after_stay_put", ABORT); + } +#endif - ELIST_LINK *forward(); // move to next element + if (list->empty()) { + new_element->next = new_element; + list->last = new_element; + prev = next = new_element; + ex_current_was_last = false; + current = nullptr; + } else { + new_element->next = next; + + if (current) { // not extracted + current->next = new_element; + if (prev == current) { + prev = new_element; + } + if (current == list->last) { + list->last = new_element; + } + } else { // current extracted + prev->next = new_element; + if (ex_current_was_last) { + list->last = new_element; + ex_current_was_last = false; + } + } + next = new_element; + } + } // stay at current + /*********************************************************************** + * ELIST_ITERATOR::add_before_then_move + * + * Add a new element to the list before the current element and move the + * iterator to the new element. + **********************************************************************/ + void add_before_then_move( // add before current & + T *new_element) { +#ifndef NDEBUG + if (!list) { + NO_LIST.error("ELIST_ITERATOR::add_before_then_move", ABORT); + } + if (!new_element) { + BAD_PARAMETER.error("ELIST_ITERATOR::add_before_then_move", ABORT, "new_element is nullptr"); + } + if (new_element->next) { + STILL_LINKED.error("ELIST_ITERATOR::add_before_then_move", ABORT); + } +#endif - ELIST_LINK *extract(); // remove from list + if (list->empty()) { + new_element->next = new_element; + list->last = new_element; + prev = next = new_element; + } else { + prev->next = new_element; + if (current) { // not extracted + new_element->next = current; + next = current; + } else { // current extracted + new_element->next = next; + if (ex_current_was_last) { + list->last = new_element; + } + if (ex_current_was_cycle_pt) { + cycle_pt = new_element; + } + } + } + current = new_element; + } // move to new + /*********************************************************************** + * ELIST_ITERATOR::add_before_stay_put + * + * Add a new element to the list before the current element but don't move the + * iterator to the new element. + **********************************************************************/ + void add_before_stay_put( // add before current & + T *new_element) { +#ifndef NDEBUG + if (!list) { + NO_LIST.error("ELIST_ITERATOR::add_before_stay_put", ABORT); + } + if (!new_element) { + BAD_PARAMETER.error("ELIST_ITERATOR::add_before_stay_put", ABORT, "new_element is nullptr"); + } + if (new_element->next) { + STILL_LINKED.error("ELIST_ITERATOR::add_before_stay_put", ABORT); + } +#endif - ELIST_LINK *move_to_first(); // go to start of list + if (list->empty()) { + new_element->next = new_element; + list->last = new_element; + prev = next = new_element; + ex_current_was_last = true; + current = nullptr; + } else { + prev->next = new_element; + if (current) { // not extracted + new_element->next = current; + if (next == current) { + next = new_element; + } + } else { // current extracted + new_element->next = next; + if (ex_current_was_last) { + list->last = new_element; + } + } + prev = new_element; + } + } // stay at current + /*********************************************************************** + * ELIST_ITERATOR::add_list_after + * + * Insert another list to this list after the current element but don't move + *the + * iterator. + **********************************************************************/ + void add_list_after( // add a list & + IntrusiveForwardList *list_to_add) { +#ifndef NDEBUG + if (!list) { + NO_LIST.error("ELIST_ITERATOR::add_list_after", ABORT); + } + if (!list_to_add) { + BAD_PARAMETER.error("ELIST_ITERATOR::add_list_after", ABORT, "list_to_add is nullptr"); + } +#endif - ELIST_LINK *move_to_last(); // go to end of list + if (!list_to_add->empty()) { + if (list->empty()) { + list->last = list_to_add->last; + prev = list->last; + next = list->First(); + ex_current_was_last = true; + current = nullptr; + } else { + if (current) { // not extracted + current->next = list_to_add->First(); + if (current == list->last) { + list->last = list_to_add->last; + } + list_to_add->last->next = next; + next = current->next; + } else { // current extracted + prev->next = list_to_add->First(); + if (ex_current_was_last) { + list->last = list_to_add->last; + ex_current_was_last = false; + } + list_to_add->last->next = next; + next = prev->next; + } + } + list_to_add->last = nullptr; + } + } // stay at current + /*********************************************************************** + * ELIST_ITERATOR::add_list_before + * + * Insert another list to this list before the current element. Move the + * iterator to the start of the inserted elements + * iterator. + **********************************************************************/ + void add_list_before( // add a list & + IntrusiveForwardList *list_to_add) { +#ifndef NDEBUG + if (!list) { + NO_LIST.error("ELIST_ITERATOR::add_list_before", ABORT); + } + if (!list_to_add) { + BAD_PARAMETER.error("ELIST_ITERATOR::add_list_before", ABORT, "list_to_add is nullptr"); + } +#endif - void mark_cycle_pt(); // remember current + if (!list_to_add->empty()) { + if (list->empty()) { + list->last = list_to_add->last; + prev = list->last; + current = list->First(); + next = current->next; + ex_current_was_last = false; + } else { + prev->next = list_to_add->First(); + if (current) { // not extracted + list_to_add->last->next = current; + } else { // current extracted + list_to_add->last->next = next; + if (ex_current_was_last) { + list->last = list_to_add->last; + } + if (ex_current_was_cycle_pt) { + cycle_pt = prev->next; + } + } + current = prev->next; + next = current->next; + } + list_to_add->last = nullptr; + } + } // move to it 1st item - bool empty() const { // is list empty? + T *data() { // get current data #ifndef NDEBUG - if (!list) { - NO_LIST.error("ELIST_ITERATOR::empty", ABORT); - } + if (!list) { + NO_LIST.error("ELIST_ITERATOR::data", ABORT); + } + if (!current) { + NULL_DATA.error("ELIST_ITERATOR::data", ABORT); + } #endif - return list->empty(); - } - - bool current_extracted() const { // current extracted? - return !current; - } + return current; + } + /*********************************************************************** + * ELIST_ITERATOR::data_relative + * + * Return the data pointer to the element "offset" elements from current. + * "offset" must not be less than -1. + * (This function can't be INLINEd because it contains a loop) + **********************************************************************/ + T *data_relative( // get data + or - ... + int8_t offset) { // offset from current + T *ptr; - bool at_first() const; // Current is first? +#ifndef NDEBUG + if (!list) + NO_LIST.error("ELIST_ITERATOR::data_relative", ABORT); + if (list->empty()) + EMPTY_LIST.error("ELIST_ITERATOR::data_relative", ABORT); + if (offset < -1) + BAD_PARAMETER.error("ELIST_ITERATOR::data_relative", ABORT, "offset < -l"); +#endif - bool at_last() const; // Current is last? + if (offset == -1) { + ptr = prev; + } else { + for (ptr = current ? current : prev; offset-- > 0; ptr = ptr->next) { + ; + } + } - bool cycled_list() const; // Completed a cycle? +#ifndef NDEBUG + if (!ptr) + NULL_DATA.error("ELIST_ITERATOR::data_relative", ABORT); +#endif - void add_to_end( // add at end & - ELIST_LINK *new_link); // don't move + return ptr; + } // offset from current + /*********************************************************************** + * ELIST_ITERATOR::forward + * + * Move the iterator to the next element of the list. + * REMEMBER: ALL LISTS ARE CIRCULAR. + **********************************************************************/ + T *forward() { +#ifndef NDEBUG + if (!list) + NO_LIST.error("ELIST_ITERATOR::forward", ABORT); +#endif + if (list->empty()) { + return nullptr; + } - void exchange( // positions of 2 links - ELIST_ITERATOR *other_it); // other iterator + if (current) { // not removed so + // set previous + prev = current; + started_cycling = true; + // In case next is deleted by another iterator, get next from current. + current = current->next; + } else { + if (ex_current_was_cycle_pt) { + cycle_pt = next; + } + current = next; + } +#ifndef NDEBUG + if (!current) + NULL_DATA.error("ELIST_ITERATOR::forward", ABORT); +#endif + next = current->next; - //# elements in list - int32_t length() const { - return list->length(); - } +#ifndef NDEBUG + if (!next) { + NULL_NEXT.error("ELIST_ITERATOR::forward", ABORT, + "This is: %p Current is: %p", + static_cast(this), + static_cast(current)); + } +#endif + return current; + } // move to next element + + /*********************************************************************** + * ELIST_ITERATOR::extract + * + * Do extraction by removing current from the list, returning it to the + * caller, but NOT updating the iterator. (So that any calling loop can do + * this.) The iterator's current points to nullptr. If the extracted element + * is to be deleted, this is the callers responsibility. + **********************************************************************/ + T *extract() { + T *extracted_link; - void sort( // sort elements - int comparator( // comparison routine - const void *, const void *)); -}; +#ifndef NDEBUG + if (!list) { + NO_LIST.error("ELIST_ITERATOR::extract", ABORT); + } + if (!current) { // list empty or + // element extracted + NULL_CURRENT.error("ELIST_ITERATOR::extract", ABORT); + } +#endif -/*********************************************************************** - * ELIST_ITERATOR::set_to_list - * - * (Re-)initialise the iterator to point to the start of the list_to_iterate - * over. - **********************************************************************/ + if (list->singleton()) { + // Special case where we do need to change the iterator. + prev = next = list->last = nullptr; + } else { + prev->next = next; // remove from list -inline void ELIST_ITERATOR::set_to_list( // change list - ELIST *list_to_iterate) { + ex_current_was_last = (current == list->last); + if (ex_current_was_last) { + list->last = prev; + } + } + // Always set ex_current_was_cycle_pt so an add/forward will work in a loop. + ex_current_was_cycle_pt = (current == cycle_pt); + extracted_link = current; + extracted_link->next = nullptr; // for safety + current = nullptr; + return extracted_link; + } // remove from list + /*********************************************************************** + * ELIST_ITERATOR::move_to_first() + * + * Move current so that it is set to the start of the list. + * Return data just in case anyone wants it. + **********************************************************************/ + T *move_to_first() { #ifndef NDEBUG - if (!list_to_iterate) { - BAD_PARAMETER.error("ELIST_ITERATOR::set_to_list", ABORT, "list_to_iterate is nullptr"); - } + if (!list) { + NO_LIST.error("ELIST_ITERATOR::move_to_first", ABORT); + } #endif - list = list_to_iterate; - prev = list->last; - current = list->First(); - next = current ? current->next : nullptr; - cycle_pt = nullptr; // await explicit set - started_cycling = false; - ex_current_was_last = false; - ex_current_was_cycle_pt = false; -} - -/*********************************************************************** - * ELIST_ITERATOR::ELIST_ITERATOR - * - * CONSTRUCTOR - set iterator to specified list; - **********************************************************************/ - -inline ELIST_ITERATOR::ELIST_ITERATOR(ELIST *list_to_iterate) { - set_to_list(list_to_iterate); -} + current = list->First(); + prev = list->last; + next = current ? current->next : nullptr; + return current; + } // go to start of list + /*********************************************************************** + * ELIST_ITERATOR::move_to_last() + * + * Move current so that it is set to the end of the list. + * Return data just in case anyone wants it. + * (This function can't be INLINEd because it contains a loop) + **********************************************************************/ + T *move_to_last() { +#ifndef NDEBUG + if (!list) + NO_LIST.error("ELIST_ITERATOR::move_to_last", ABORT); +#endif -/*********************************************************************** - * ELIST_ITERATOR::add_after_then_move - * - * Add a new element to the list after the current element and move the - * iterator to the new element. - **********************************************************************/ + while (current != list->last) { + forward(); + } -inline void ELIST_ITERATOR::add_after_then_move( // element to add - ELIST_LINK *new_element) { + return current; + } // go to end of list + /*********************************************************************** + * ELIST_ITERATOR::mark_cycle_pt() + * + * Remember the current location so that we can tell whether we've returned + * to this point later. + * + * If the current point is deleted either now, or in the future, the cycle + * point will be set to the next item which is set to current. This could be + * by a forward, add_after_then_move or add_after_then_move. + **********************************************************************/ + void mark_cycle_pt() { #ifndef NDEBUG - if (!list) { - NO_LIST.error("ELIST_ITERATOR::add_after_then_move", ABORT); - } - if (!new_element) { - BAD_PARAMETER.error("ELIST_ITERATOR::add_after_then_move", ABORT, "new_element is nullptr"); - } - if (new_element->next) { - STILL_LINKED.error("ELIST_ITERATOR::add_after_then_move", ABORT); - } + if (!list) { + NO_LIST.error("ELIST_ITERATOR::mark_cycle_pt", ABORT); + } #endif - if (list->empty()) { - new_element->next = new_element; - list->last = new_element; - prev = next = new_element; - } else { - new_element->next = next; - - if (current) { // not extracted - current->next = new_element; - prev = current; - if (current == list->last) { - list->last = new_element; + if (current) { + cycle_pt = current; + } else { + ex_current_was_cycle_pt = true; } - } else { // current extracted - prev->next = new_element; - if (ex_current_was_last) { - list->last = new_element; - } - if (ex_current_was_cycle_pt) { - cycle_pt = new_element; + started_cycling = false; + } // remember current + + bool empty() const { // is list empty? +#ifndef NDEBUG + if (!list) { + NO_LIST.error("ELIST_ITERATOR::empty", ABORT); } +#endif + return list->empty(); } - } - current = new_element; -} -/*********************************************************************** - * ELIST_ITERATOR::add_after_stay_put - * - * Add a new element to the list after the current element but do not move - * the iterator to the new element. - **********************************************************************/ - -inline void ELIST_ITERATOR::add_after_stay_put( // element to add - ELIST_LINK *new_element) { + bool current_extracted() const { // current extracted? + return !current; + } + /*********************************************************************** + * ELIST_ITERATOR::at_first() + * + * Are we at the start of the list? + * + **********************************************************************/ + bool at_first() const { #ifndef NDEBUG - if (!list) { - NO_LIST.error("ELIST_ITERATOR::add_after_stay_put", ABORT); - } - if (!new_element) { - BAD_PARAMETER.error("ELIST_ITERATOR::add_after_stay_put", ABORT, "new_element is nullptr"); - } - if (new_element->next) { - STILL_LINKED.error("ELIST_ITERATOR::add_after_stay_put", ABORT); - } + if (!list) { + NO_LIST.error("ELIST_ITERATOR::at_first", ABORT); + } #endif - if (list->empty()) { - new_element->next = new_element; - list->last = new_element; - prev = next = new_element; - ex_current_was_last = false; - current = nullptr; - } else { - new_element->next = next; - - if (current) { // not extracted - current->next = new_element; - if (prev == current) { - prev = new_element; - } - if (current == list->last) { - list->last = new_element; - } - } else { // current extracted - prev->next = new_element; - if (ex_current_was_last) { - list->last = new_element; - ex_current_was_last = false; + // we're at a deleted + return ((list->empty()) || (current == list->First()) || + ((current == nullptr) && (prev == list->last) && // NON-last pt between + !ex_current_was_last)); // first and last + } // Current is first? + /*********************************************************************** + * ELIST_ITERATOR::at_last() + * + * Are we at the end of the list? + * + **********************************************************************/ + bool at_last() const { +#ifndef NDEBUG + if (!list) { + NO_LIST.error("ELIST_ITERATOR::at_last", ABORT); } - } - next = new_element; - } -} - -/*********************************************************************** - * ELIST_ITERATOR::add_before_then_move - * - * Add a new element to the list before the current element and move the - * iterator to the new element. - **********************************************************************/ +#endif -inline void ELIST_ITERATOR::add_before_then_move( // element to add - ELIST_LINK *new_element) { + // we're at a deleted + return ((list->empty()) || (current == list->last) || + ((current == nullptr) && (prev == list->last) && // last point between + ex_current_was_last)); // first and last + } // Current is last? + /*********************************************************************** + * ELIST_ITERATOR::cycled_list() + * + * Have we returned to the cycle_pt since it was set? + * + **********************************************************************/ + bool cycled_list() const { #ifndef NDEBUG - if (!list) { - NO_LIST.error("ELIST_ITERATOR::add_before_then_move", ABORT); - } - if (!new_element) { - BAD_PARAMETER.error("ELIST_ITERATOR::add_before_then_move", ABORT, "new_element is nullptr"); - } - if (new_element->next) { - STILL_LINKED.error("ELIST_ITERATOR::add_before_then_move", ABORT); - } + if (!list) { + NO_LIST.error("ELIST_ITERATOR::cycled_list", ABORT); + } #endif - if (list->empty()) { - new_element->next = new_element; - list->last = new_element; - prev = next = new_element; - } else { - prev->next = new_element; - if (current) { // not extracted - new_element->next = current; - next = current; - } else { // current extracted - new_element->next = next; - if (ex_current_was_last) { - list->last = new_element; + return ((list->empty()) || ((current == cycle_pt) && started_cycling)); + } // Completed a cycle? + /*********************************************************************** + * ELIST_ITERATOR::add_to_end + * + * Add a new element to the end of the list without moving the iterator. + * This is provided because a single linked list cannot move to the last as + * the iterator couldn't set its prev pointer. Adding to the end is + * essential for implementing + queues. + **********************************************************************/ + void add_to_end( // add at end & + T *new_element) { +#ifndef NDEBUG + if (!list) { + NO_LIST.error("ELIST_ITERATOR::add_to_end", ABORT); } - if (ex_current_was_cycle_pt) { - cycle_pt = new_element; + if (!new_element) { + BAD_PARAMETER.error("ELIST_ITERATOR::add_to_end", ABORT, "new_element is nullptr"); } - } - } - current = new_element; -} + if (new_element->next) { + STILL_LINKED.error("ELIST_ITERATOR::add_to_end", ABORT); + } +#endif -/*********************************************************************** - * ELIST_ITERATOR::add_before_stay_put - * - * Add a new element to the list before the current element but don't move the - * iterator to the new element. - **********************************************************************/ + if (this->at_last()) { + this->add_after_stay_put(new_element); + } else { + if (this->at_first()) { + this->add_before_stay_put(new_element); + list->last = new_element; + } else { // Iteratr is elsewhere + new_element->next = list->last->next; + list->last->next = new_element; + list->last = new_element; + } + } + } // don't move + /*********************************************************************** + * ELIST_ITERATOR::exchange() + * + * Given another iterator, whose current element is a different element on + * the same list list OR an element of another list, exchange the two current + * elements. On return, each iterator points to the element which was the + * other iterators current on entry. + * (This function hasn't been in-lined because its a bit big!) + **********************************************************************/ + void exchange( // positions of 2 links + Iterator *other_it) { // other iterator + constexpr ERRCODE DONT_EXCHANGE_DELETED("Can't exchange deleted elements of lists"); + + T *old_current; -inline void ELIST_ITERATOR::add_before_stay_put( // element to add - ELIST_LINK *new_element) { #ifndef NDEBUG - if (!list) { - NO_LIST.error("ELIST_ITERATOR::add_before_stay_put", ABORT); - } - if (!new_element) { - BAD_PARAMETER.error("ELIST_ITERATOR::add_before_stay_put", ABORT, "new_element is nullptr"); - } - if (new_element->next) { - STILL_LINKED.error("ELIST_ITERATOR::add_before_stay_put", ABORT); - } + if (!list) + NO_LIST.error("ELIST_ITERATOR::exchange", ABORT); + if (!other_it) + BAD_PARAMETER.error("ELIST_ITERATOR::exchange", ABORT, "other_it nullptr"); + if (!(other_it->list)) + NO_LIST.error("ELIST_ITERATOR::exchange", ABORT, "other_it"); #endif - if (list->empty()) { - new_element->next = new_element; - list->last = new_element; - prev = next = new_element; - ex_current_was_last = true; - current = nullptr; - } else { - prev->next = new_element; - if (current) { // not extracted - new_element->next = current; - if (next == current) { - next = new_element; - } - } else { // current extracted - new_element->next = next; - if (ex_current_was_last) { - list->last = new_element; + /* Do nothing if either list is empty or if both iterators reference the same + link */ + + if ((list->empty()) || (other_it->list->empty()) || (current == other_it->current)) { + return; } - } - prev = new_element; - } -} -/*********************************************************************** - * ELIST_ITERATOR::add_list_after - * - * Insert another list to this list after the current element but don't move - *the - * iterator. - **********************************************************************/ + /* Error if either current element is deleted */ -inline void ELIST_ITERATOR::add_list_after(ELIST *list_to_add) { -#ifndef NDEBUG - if (!list) { - NO_LIST.error("ELIST_ITERATOR::add_list_after", ABORT); - } - if (!list_to_add) { - BAD_PARAMETER.error("ELIST_ITERATOR::add_list_after", ABORT, "list_to_add is nullptr"); - } -#endif + if (!current || !other_it->current) { + DONT_EXCHANGE_DELETED.error("ELIST_ITERATOR.exchange", ABORT); + } - if (!list_to_add->empty()) { - if (list->empty()) { - list->last = list_to_add->last; - prev = list->last; - next = list->First(); - ex_current_was_last = true; - current = nullptr; - } else { - if (current) { // not extracted - current->next = list_to_add->First(); - if (current == list->last) { - list->last = list_to_add->last; - } - list_to_add->last->next = next; - next = current->next; - } else { // current extracted - prev->next = list_to_add->First(); - if (ex_current_was_last) { - list->last = list_to_add->last; - ex_current_was_last = false; + /* Now handle the 4 cases: doubleton list; non-doubleton adjacent elements + (other before this); non-doubleton adjacent elements (this before other); + non-adjacent elements. */ + + // adjacent links + if ((next == other_it->current) || (other_it->next == current)) { + // doubleton list + if ((next == other_it->current) && (other_it->next == current)) { + prev = next = current; + other_it->prev = other_it->next = other_it->current; + } else { // non-doubleton with + // adjacent links + // other before this + if (other_it->next == current) { + other_it->prev->next = current; + other_it->current->next = next; + current->next = other_it->current; + other_it->next = other_it->current; + prev = current; + } else { // this before other + prev->next = other_it->current; + current->next = other_it->next; + other_it->current->next = current; + next = current; + other_it->prev = other_it->current; + } } - list_to_add->last->next = next; - next = prev->next; + } else { // no overlap + prev->next = other_it->current; + current->next = other_it->next; + other_it->prev->next = current; + other_it->current->next = next; } - } - list_to_add->last = nullptr; - } -} -/*********************************************************************** - * ELIST_ITERATOR::add_list_before - * - * Insert another list to this list before the current element. Move the - * iterator to the start of the inserted elements - * iterator. - **********************************************************************/ + /* update end of list pointer when necessary (remember that the 2 iterators + may iterate over different lists!) */ -inline void ELIST_ITERATOR::add_list_before(ELIST *list_to_add) { -#ifndef NDEBUG - if (!list) { - NO_LIST.error("ELIST_ITERATOR::add_list_before", ABORT); - } - if (!list_to_add) { - BAD_PARAMETER.error("ELIST_ITERATOR::add_list_before", ABORT, "list_to_add is nullptr"); - } -#endif + if (list->last == current) { + list->last = other_it->current; + } + if (other_it->list->last == other_it->current) { + other_it->list->last = current; + } - if (!list_to_add->empty()) { - if (list->empty()) { - list->last = list_to_add->last; - prev = list->last; - current = list->First(); - next = current->next; - ex_current_was_last = false; - } else { - prev->next = list_to_add->First(); - if (current) { // not extracted - list_to_add->last->next = current; - } else { // current extracted - list_to_add->last->next = next; - if (ex_current_was_last) { - list->last = list_to_add->last; - } - if (ex_current_was_cycle_pt) { - cycle_pt = prev->next; - } + if (current == cycle_pt) { + cycle_pt = other_it->cycle_pt; + } + if (other_it->current == other_it->cycle_pt) { + other_it->cycle_pt = cycle_pt; } - current = prev->next; - next = current->next; - } - list_to_add->last = nullptr; - } -} -/*********************************************************************** - * ELIST_ITERATOR::extract - * - * Do extraction by removing current from the list, returning it to the - * caller, but NOT updating the iterator. (So that any calling loop can do - * this.) The iterator's current points to nullptr. If the extracted element - * is to be deleted, this is the callers responsibility. - **********************************************************************/ + /* The actual exchange - in all cases*/ -inline ELIST_LINK *ELIST_ITERATOR::extract() { - ELIST_LINK *extracted_link; + old_current = current; + current = other_it->current; + other_it->current = old_current; + } // other iterator + //# elements in list + int32_t length() const { + return list->length(); + } + /*********************************************************************** + * ELIST_ITERATOR::sort() + * + * Sort the elements of the list, then reposition at the start. + * + **********************************************************************/ + void sort( // sort elements + int comparator( // comparison routine + const T *, const T *)) { #ifndef NDEBUG - if (!list) { - NO_LIST.error("ELIST_ITERATOR::extract", ABORT); - } - if (!current) { // list empty or - // element extracted - NULL_CURRENT.error("ELIST_ITERATOR::extract", ABORT); - } + if (!list) { + NO_LIST.error("ELIST_ITERATOR::sort", ABORT); + } #endif - if (list->singleton()) { - // Special case where we do need to change the iterator. - prev = next = list->last = nullptr; - } else { - prev->next = next; // remove from list - - ex_current_was_last = (current == list->last); - if (ex_current_was_last) { - list->last = prev; + list->sort(comparator); + move_to_first(); } + }; + using ITERATOR = Iterator; // compat + +private: + T *last = nullptr; // End of list + //(Points to head) + T *First() { // return first + return last ? last->next : nullptr; } - // Always set ex_current_was_cycle_pt so an add/forward will work in a loop. - ex_current_was_cycle_pt = (current == cycle_pt); - extracted_link = current; - extracted_link->next = nullptr; // for safety - current = nullptr; - return extracted_link; -} - -/*********************************************************************** - * ELIST_ITERATOR::move_to_first() - * - * Move current so that it is set to the start of the list. - * Return data just in case anyone wants it. - **********************************************************************/ -inline ELIST_LINK *ELIST_ITERATOR::move_to_first() { -#ifndef NDEBUG - if (!list) { - NO_LIST.error("ELIST_ITERATOR::move_to_first", ABORT); +public: + ~IntrusiveForwardList() { + clear(); } -#endif - current = list->First(); - prev = list->last; - next = current ? current->next : nullptr; - return current; -} + /* delete elements */ + void clear() { + internal_clear(); + } -/*********************************************************************** - * ELIST_ITERATOR::mark_cycle_pt() - * - * Remember the current location so that we can tell whether we've returned - * to this point later. - * - * If the current point is deleted either now, or in the future, the cycle - * point will be set to the next item which is set to current. This could be - * by a forward, add_after_then_move or add_after_then_move. - **********************************************************************/ + /* Become a deep copy of src_list */ + template + void deep_copy(const U *src_list, T *(*copier)(const T *)) { + Iterator from_it(const_cast(src_list)); + Iterator to_it(this); -inline void ELIST_ITERATOR::mark_cycle_pt() { -#ifndef NDEBUG - if (!list) { - NO_LIST.error("ELIST_ITERATOR::mark_cycle_pt", ABORT); + for (from_it.mark_cycle_pt(); !from_it.cycled_list(); from_it.forward()) + to_it.add_after_then_move((*copier)(from_it.data())); } -#endif - if (current) { - cycle_pt = current; - } else { - ex_current_was_cycle_pt = true; + /*********************************************************************** + * IntrusiveForwardList::internal_clear + * + * Used by the destructor and the "clear" member function of derived list + * classes to destroy all the elements on the list. + * The calling function passes a "zapper" function which can be called to + * delete each element of the list, regardless of its derived type. This + * technique permits a generic clear function to destroy elements of + * different derived types correctly, without requiring virtual functions and + * the consequential memory overhead. + **********************************************************************/ + + // destroy all links + void internal_clear() { + T *ptr; + T *next; + + if (!empty()) { + ptr = last->next; // set to first + last->next = nullptr; // break circle + last = nullptr; // set list empty + while (ptr) { + next = ptr->next; + delete ptr; + ptr = next; + } + } } - started_cycling = false; -} -/*********************************************************************** - * ELIST_ITERATOR::at_first() - * - * Are we at the start of the list? - * - **********************************************************************/ + bool empty() const { + return !last; + } -inline bool ELIST_ITERATOR::at_first() const { -#ifndef NDEBUG - if (!list) { - NO_LIST.error("ELIST_ITERATOR::at_first", ABORT); + bool singleton() const { + return last ? (last == last->next) : false; } -#endif - // we're at a deleted - return ((list->empty()) || (current == list->First()) || - ((current == nullptr) && (prev == list->last) && // NON-last pt between - !ex_current_was_last)); // first and last -} + void shallow_copy( // dangerous!! + IntrusiveForwardList *from_list) { // beware destructors!! + last = from_list->last; + } -/*********************************************************************** - * ELIST_ITERATOR::at_last() - * - * Are we at the end of the list? + /*********************************************************************** + * IntrusiveForwardList::assign_to_sublist * + * The list is set to a sublist of another list. "This" list must be empty + * before this function is invoked. The two iterators passed must refer to + * the same list, different from "this" one. The sublist removed is the + * inclusive list from start_it's current position to end_it's current + * position. If this range passes over the end of the source list then the + * source list has its end set to the previous element of start_it. The + * extracted sublist is unaffected by the end point of the source list, its + * end point is always the end_it position. **********************************************************************/ + void assign_to_sublist( // to this list + Iterator *start_it, // from list start + Iterator *end_it) { // from list end + constexpr ERRCODE LIST_NOT_EMPTY("Destination list must be empty before extracting a sublist"); -inline bool ELIST_ITERATOR::at_last() const { -#ifndef NDEBUG - if (!list) { - NO_LIST.error("ELIST_ITERATOR::at_last", ABORT); - } -#endif - - // we're at a deleted - return ((list->empty()) || (current == list->last) || - ((current == nullptr) && (prev == list->last) && // last point between - ex_current_was_last)); // first and last -} + if (!empty()) { + LIST_NOT_EMPTY.error("IntrusiveForwardList.assign_to_sublist", ABORT); + } -/*********************************************************************** - * ELIST_ITERATOR::cycled_list() - * - * Have we returned to the cycle_pt since it was set? - * - **********************************************************************/ + last = start_it->extract_sublist(end_it); + } // from list end -inline bool ELIST_ITERATOR::cycled_list() const { -#ifndef NDEBUG - if (!list) { - NO_LIST.error("ELIST_ITERATOR::cycled_list", ABORT); + // # elements in list + int32_t length() const { + int32_t count = 0; + if (last != nullptr) { + count = 1; + for (auto it = last->next; it != last; it = it->next) { + count++; + } + } + return count; } -#endif - - return ((list->empty()) || ((current == cycle_pt) && started_cycling)); -} -/*********************************************************************** - * ELIST_ITERATOR::sort() - * - * Sort the elements of the list, then reposition at the start. + /*********************************************************************** + * IntrusiveForwardList::sort * + * Sort elements on list + * NB If you don't like the const declarations in the comparator, coerce yours: + * ( int (*)(const void *, const void *) **********************************************************************/ + void sort( // sort elements + int comparator( // comparison routine + const T *, const T *)) { + // Allocate an array of pointers, one per list element. + auto count = length(); -inline void ELIST_ITERATOR::sort( // sort elements - int comparator( // comparison routine - const void *, const void *)) { -#ifndef NDEBUG - if (!list) { - NO_LIST.error("ELIST_ITERATOR::sort", ABORT); - } -#endif + if (count > 0) { + // ptr array to sort + std::vector base; + base.reserve(count); - list->sort(comparator); - move_to_first(); -} + Iterator it(this); -/*********************************************************************** - * ELIST_ITERATOR::add_to_end - * - * Add a new element to the end of the list without moving the iterator. - * This is provided because a single linked list cannot move to the last as - * the iterator couldn't set its prev pointer. Adding to the end is - * essential for implementing - queues. -**********************************************************************/ + // Extract all elements, putting the pointers in the array. + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + base.push_back(it.extract()); + } -inline void ELIST_ITERATOR::add_to_end( // element to add - ELIST_LINK *new_element) { -#ifndef NDEBUG - if (!list) { - NO_LIST.error("ELIST_ITERATOR::add_to_end", ABORT); - } - if (!new_element) { - BAD_PARAMETER.error("ELIST_ITERATOR::add_to_end", ABORT, "new_element is nullptr"); - } - if (new_element->next) { - STILL_LINKED.error("ELIST_ITERATOR::add_to_end", ABORT); + // Sort the pointer array. + std::sort(base.begin(), base.end(), + // all current comparators return -1,0,1, so we handle this correctly for std::sort + [&](auto &&l, auto &&r) {return comparator(l, r) < 0; }); + + // Rebuild the list from the sorted pointers. + for (auto current : base) { + it.add_to_end(current); + } + } } -#endif - if (this->at_last()) { - this->add_after_stay_put(new_element); - } else { - if (this->at_first()) { - this->add_before_stay_put(new_element); - list->last = new_element; - } else { // Iteratr is elsewhere - new_element->next = list->last->next; - list->last->next = new_element; - list->last = new_element; + // Assuming list has been sorted already, insert new_link to + // keep the list sorted according to the same comparison function. + // Comparison function is the same as used by sort, i.e. uses double + // indirection. Time is O(1) to add to beginning or end. + // Time is linear to add pre-sorted items to an empty list. + // If unique is set to true and comparator() returns 0 (an entry with the + // same information as the one contained in new_link is already in the + // list) - new_link is not added to the list and the function returns the + // pointer to the identical entry that already exists in the list + // (otherwise the function returns new_link). + T *add_sorted_and_find(int comparator(const T *, const T *), bool unique, + T *new_link) { + // Check for adding at the end. + if (last == nullptr || comparator(last, new_link) < 0) { + if (last == nullptr) { + new_link->next = new_link; + } else { + new_link->next = last->next; + last->next = new_link; + } + last = new_link; + } else { + // Need to use an iterator. + Iterator it(this); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + auto *link = it.data(); + int compare = comparator(link, new_link); + if (compare > 0) { + break; + } else if (unique && compare == 0) { + return link; + } + } + if (it.cycled_list()) { + it.add_to_end(new_link); + } else { + it.add_before_then_move(new_link); + } } + return new_link; + } + + // Same as above, but returns true if the new entry was inserted, false + // if the identical entry already existed in the list. + bool add_sorted(int comparator(const T *, const T *), bool unique, T *new_link) { + return (add_sorted_and_find(comparator, unique, new_link) == new_link); } -} - -#define ELISTIZEH(CLASSNAME) \ - class CLASSNAME##_LIST : public X_LIST { \ - using X_LIST::X_LIST; \ - }; \ - class CLASSNAME##_IT : public X_ITER { \ - using X_ITER::X_ITER; \ +}; + +template +using ELIST = IntrusiveForwardList; + +// add TESS_API? +// move templated lists to public include dirs? +#define ELISTIZEH(T) \ + class T##_LIST : public IntrusiveForwardList { \ + public: \ + using IntrusiveForwardList::IntrusiveForwardList; \ + }; \ + class T##_IT : public IntrusiveForwardList::Iterator { \ + public: \ + using IntrusiveForwardList::Iterator::Iterator; \ }; } // namespace tesseract diff --git a/src/ccutil/elst2.cpp b/src/ccutil/elst2.cpp deleted file mode 100644 index 64d22fdb65..0000000000 --- a/src/ccutil/elst2.cpp +++ /dev/null @@ -1,476 +0,0 @@ -/********************************************************************** - * File: elst2.cpp (Formerly elist2.c) - * Description: Doubly linked embedded list code not in the include file. - * Author: Phil Cheatle - * - * (C) Copyright 1991, Hewlett-Packard Ltd. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#include "elst2.h" - -#include - -namespace tesseract { - -/*********************************************************************** - * ELIST2::internal_clear - * - * Used by the destructor and the "clear" member function of derived list - * classes to destroy all the elements on the list. - * The calling function passes a "zapper" function which can be called to - * delete each element of the list, regardless of its derived type. This - * technique permits a generic clear function to destroy elements of - * different derived types correctly, without requiring virtual functions and - * the consequential memory overhead. - **********************************************************************/ - -void ELIST2::internal_clear( // destroy all links - void (*zapper)(void *)) { - // ptr to zapper functn - ELIST2_LINK *ptr; - ELIST2_LINK *next; - - if (!empty()) { - ptr = last->next; // set to first - last->next = nullptr; // break circle - last = nullptr; // set list empty - while (ptr) { - next = ptr->next; - zapper(ptr); - ptr = next; - } - } -} - -/*********************************************************************** - * ELIST2::assign_to_sublist - * - * The list is set to a sublist of another list. "This" list must be empty - * before this function is invoked. The two iterators passed must refer to - * the same list, different from "this" one. The sublist removed is the - * inclusive list from start_it's current position to end_it's current - * position. If this range passes over the end of the source list then the - * source list has its end set to the previous element of start_it. The - * extracted sublist is unaffected by the end point of the source list, its - * end point is always the end_it position. - **********************************************************************/ - -void ELIST2::assign_to_sublist( // to this list - ELIST2_ITERATOR *start_it, // from list start - ELIST2_ITERATOR *end_it) { // from list end - constexpr ERRCODE LIST_NOT_EMPTY("Destination list must be empty before extracting a sublist"); - - if (!empty()) { - LIST_NOT_EMPTY.error("ELIST2.assign_to_sublist", ABORT); - } - - last = start_it->extract_sublist(end_it); -} - -/*********************************************************************** - * ELIST2::sort - * - * Sort elements on list - * NB If you don't like the const declarations in the comparator, coerce yours: - * (int (*)(const void *, const void *) - **********************************************************************/ - -void ELIST2::sort( // sort elements - int comparator( // comparison routine - const void *, const void *)) { - // Allocate an array of pointers, one per list element. - auto count = length(); - if (count > 0) { - // ptr array to sort - std::vector base; - base.reserve(count); - - ELIST2_ITERATOR it(this); - - // Extract all elements, putting the pointers in the array. - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - base.push_back(it.extract()); - } - - // Sort the pointer array. - qsort(&base[0], count, sizeof(base[0]), comparator); - - // Rebuild the list from the sorted pointers. - for (auto current : base) { - it.add_to_end(current); - } - } -} - -// Assuming list has been sorted already, insert new_link to -// keep the list sorted according to the same comparison function. -// Comparison function is the same as used by sort, i.e. uses double -// indirection. Time is O(1) to add to beginning or end. -// Time is linear to add pre-sorted items to an empty list. -void ELIST2::add_sorted(int comparator(const void *, const void *), ELIST2_LINK *new_link) { - // Check for adding at the end. - if (last == nullptr || comparator(&last, &new_link) < 0) { - if (last == nullptr) { - new_link->next = new_link; - new_link->prev = new_link; - } else { - new_link->next = last->next; - new_link->prev = last; - last->next = new_link; - new_link->next->prev = new_link; - } - last = new_link; - } else { - // Need to use an iterator. - ELIST2_ITERATOR it(this); - for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { - ELIST2_LINK *link = it.data(); - if (comparator(&link, &new_link) > 0) { - break; - } - } - if (it.cycled_list()) { - it.add_to_end(new_link); - } else { - it.add_before_then_move(new_link); - } - } -} - -/*********************************************************************** - * MEMBER FUNCTIONS OF CLASS: ELIST2_ITERATOR - * ========================================== - **********************************************************************/ - -/*********************************************************************** - * ELIST2_ITERATOR::forward - * - * Move the iterator to the next element of the list. - * REMEMBER: ALL LISTS ARE CIRCULAR. - **********************************************************************/ - -ELIST2_LINK *ELIST2_ITERATOR::forward() { -#ifndef NDEBUG - if (!list) - NO_LIST.error("ELIST2_ITERATOR::forward", ABORT); -#endif - if (list->empty()) { - return nullptr; - } - - if (current) { // not removed so - // set previous - prev = current; - started_cycling = true; - // In case next is deleted by another iterator, get it from the current. - current = current->next; - } else { - if (ex_current_was_cycle_pt) { - cycle_pt = next; - } - current = next; - } - -#ifndef NDEBUG - if (!current) - NULL_DATA.error("ELIST2_ITERATOR::forward", ABORT); -#endif - - next = current->next; - -#ifndef NDEBUG - if (!next) { - NULL_NEXT.error("ELIST2_ITERATOR::forward", ABORT, - "This is: %p Current is: %p", - static_cast(this), - static_cast(current)); - } -#endif - - return current; -} - -/*********************************************************************** - * ELIST2_ITERATOR::backward - * - * Move the iterator to the previous element of the list. - * REMEMBER: ALL LISTS ARE CIRCULAR. - **********************************************************************/ - -ELIST2_LINK *ELIST2_ITERATOR::backward() { -#ifndef NDEBUG - if (!list) - NO_LIST.error("ELIST2_ITERATOR::backward", ABORT); -#endif - if (list->empty()) { - return nullptr; - } - - if (current) { // not removed so - // set previous - next = current; - started_cycling = true; - // In case prev is deleted by another iterator, get it from current. - current = current->prev; - } else { - if (ex_current_was_cycle_pt) { - cycle_pt = prev; - } - current = prev; - } - -#ifndef NDEBUG - if (!current) - NULL_DATA.error("ELIST2_ITERATOR::backward", ABORT); - if (!prev) { - NULL_PREV.error("ELIST2_ITERATOR::backward", ABORT, - "This is: %p Current is: %p", - static_cast(this), - static_cast(current)); - } -#endif - - prev = current->prev; - return current; -} - -/*********************************************************************** - * ELIST2_ITERATOR::data_relative - * - * Return the data pointer to the element "offset" elements from current. - * (This function can't be INLINEd because it contains a loop) - **********************************************************************/ - -ELIST2_LINK *ELIST2_ITERATOR::data_relative( // get data + or - .. - int8_t offset) { // offset from current - ELIST2_LINK *ptr; - -#ifndef NDEBUG - if (!list) - NO_LIST.error("ELIST2_ITERATOR::data_relative", ABORT); - if (list->empty()) - EMPTY_LIST.error("ELIST2_ITERATOR::data_relative", ABORT); -#endif - - if (offset < 0) { - for (ptr = current ? current : next; offset++ < 0; ptr = ptr->prev) { - ; - } - } else { - for (ptr = current ? current : prev; offset-- > 0; ptr = ptr->next) { - ; - } - } - -#ifndef NDEBUG - if (!ptr) - NULL_DATA.error("ELIST2_ITERATOR::data_relative", ABORT); -#endif - - return ptr; -} - -/*********************************************************************** - * ELIST2_ITERATOR::exchange() - * - * Given another iterator, whose current element is a different element on - * the same list list OR an element of another list, exchange the two current - * elements. On return, each iterator points to the element which was the - * other iterators current on entry. - * (This function hasn't been in-lined because its a bit big!) - **********************************************************************/ - -void ELIST2_ITERATOR::exchange( // positions of 2 links - ELIST2_ITERATOR *other_it) { // other iterator - constexpr ERRCODE DONT_EXCHANGE_DELETED("Can't exchange deleted elements of lists"); - - ELIST2_LINK *old_current; - -#ifndef NDEBUG - if (!list) - NO_LIST.error("ELIST2_ITERATOR::exchange", ABORT); - if (!other_it) - BAD_PARAMETER.error("ELIST2_ITERATOR::exchange", ABORT, "other_it nullptr"); - if (!(other_it->list)) - NO_LIST.error("ELIST2_ITERATOR::exchange", ABORT, "other_it"); -#endif - - /* Do nothing if either list is empty or if both iterators reference the same -link */ - - if ((list->empty()) || (other_it->list->empty()) || (current == other_it->current)) { - return; - } - - /* Error if either current element is deleted */ - - if (!current || !other_it->current) { - DONT_EXCHANGE_DELETED.error("ELIST2_ITERATOR.exchange", ABORT); - } - - /* Now handle the 4 cases: doubleton list; non-doubleton adjacent elements -(other before this); non-doubleton adjacent elements (this before other); -non-adjacent elements. */ - - // adjacent links - if ((next == other_it->current) || (other_it->next == current)) { - // doubleton list - if ((next == other_it->current) && (other_it->next == current)) { - prev = next = current; - other_it->prev = other_it->next = other_it->current; - } else { // non-doubleton with - // adjacent links - // other before this - if (other_it->next == current) { - other_it->prev->next = current; - other_it->current->next = next; - other_it->current->prev = current; - current->next = other_it->current; - current->prev = other_it->prev; - next->prev = other_it->current; - - other_it->next = other_it->current; - prev = current; - } else { // this before other - prev->next = other_it->current; - current->next = other_it->next; - current->prev = other_it->current; - other_it->current->next = current; - other_it->current->prev = prev; - other_it->next->prev = current; - - next = current; - other_it->prev = other_it->current; - } - } - } else { // no overlap - prev->next = other_it->current; - current->next = other_it->next; - current->prev = other_it->prev; - next->prev = other_it->current; - other_it->prev->next = current; - other_it->current->next = next; - other_it->current->prev = prev; - other_it->next->prev = current; - } - - /* update end of list pointer when necessary (remember that the 2 iterators - may iterate over different lists!) */ - - if (list->last == current) { - list->last = other_it->current; - } - if (other_it->list->last == other_it->current) { - other_it->list->last = current; - } - - if (current == cycle_pt) { - cycle_pt = other_it->cycle_pt; - } - if (other_it->current == other_it->cycle_pt) { - other_it->cycle_pt = cycle_pt; - } - - /* The actual exchange - in all cases*/ - - old_current = current; - current = other_it->current; - other_it->current = old_current; -} - -/*********************************************************************** - * ELIST2_ITERATOR::extract_sublist() - * - * This is a private member, used only by ELIST2::assign_to_sublist. - * Given another iterator for the same list, extract the links from THIS to - * OTHER inclusive, link them into a new circular list, and return a - * pointer to the last element. - * (Can't inline this function because it contains a loop) - **********************************************************************/ - -ELIST2_LINK *ELIST2_ITERATOR::extract_sublist( // from this current - ELIST2_ITERATOR *other_it) { // to other current -#ifndef NDEBUG - constexpr ERRCODE BAD_EXTRACTION_PTS("Can't extract sublist from points on different lists"); - constexpr ERRCODE DONT_EXTRACT_DELETED("Can't extract a sublist marked by deleted points"); -#endif - constexpr ERRCODE BAD_SUBLIST("Can't find sublist end point in original list"); - - ELIST2_ITERATOR temp_it = *this; - ELIST2_LINK *end_of_new_list; - -#ifndef NDEBUG - if (!other_it) - BAD_PARAMETER.error("ELIST2_ITERATOR::extract_sublist", ABORT, "other_it nullptr"); - if (!list) - NO_LIST.error("ELIST2_ITERATOR::extract_sublist", ABORT); - if (list != other_it->list) - BAD_EXTRACTION_PTS.error("ELIST2_ITERATOR.extract_sublist", ABORT); - if (list->empty()) - EMPTY_LIST.error("ELIST2_ITERATOR::extract_sublist", ABORT); - - if (!current || !other_it->current) - DONT_EXTRACT_DELETED.error("ELIST2_ITERATOR.extract_sublist", ABORT); -#endif - - ex_current_was_last = other_it->ex_current_was_last = false; - ex_current_was_cycle_pt = false; - other_it->ex_current_was_cycle_pt = false; - - temp_it.mark_cycle_pt(); - do { // walk sublist - if (temp_it.cycled_list()) { // can't find end pt - BAD_SUBLIST.error("ELIST2_ITERATOR.extract_sublist", ABORT); - } - - if (temp_it.at_last()) { - list->last = prev; - ex_current_was_last = other_it->ex_current_was_last = true; - } - - if (temp_it.current == cycle_pt) { - ex_current_was_cycle_pt = true; - } - - if (temp_it.current == other_it->cycle_pt) { - other_it->ex_current_was_cycle_pt = true; - } - - temp_it.forward(); - } - // do INCLUSIVE list - while (temp_it.prev != other_it->current); - - // circularise sublist - other_it->current->next = current; - // circularise sublist - current->prev = other_it->current; - end_of_new_list = other_it->current; - - // sublist = whole list - if (prev == other_it->current) { - list->last = nullptr; - prev = current = next = nullptr; - other_it->prev = other_it->current = other_it->next = nullptr; - } else { - prev->next = other_it->next; - other_it->next->prev = prev; - - current = other_it->current = nullptr; - next = other_it->next; - other_it->prev = prev; - } - return end_of_new_list; -} - -} // namespace tesseract diff --git a/src/ccutil/elst2.h b/src/ccutil/elst2.h index a54738a3cb..bcbabe7d57 100644 --- a/src/ccutil/elst2.h +++ b/src/ccutil/elst2.h @@ -19,16 +19,14 @@ #ifndef ELST2_H #define ELST2_H -#include "list.h" #include "lsterr.h" #include "serialis.h" +#include #include namespace tesseract { -class ELIST2_ITERATOR; - /********************************************************************** DESIGN NOTE =========== @@ -47,787 +45,1133 @@ i) The duplication in source does not affect the run time code size - the ii) The compiler should have a bit less work to do! **********************************************************************/ -/********************************************************************** - * CLASS - ELIST2_LINK - * - * Generic link class for doubly linked lists with embedded links - * - * Note: No destructor - elements are assumed to be destroyed EITHER after - * they have been extracted from a list OR by the ELIST2 destructor which - * walks the list. - **********************************************************************/ - -class ELIST2_LINK { - friend class ELIST2_ITERATOR; - friend class ELIST2; - - ELIST2_LINK *prev; - ELIST2_LINK *next; - -public: - ELIST2_LINK() { // constructor - prev = next = nullptr; - } - - ELIST2_LINK(const ELIST2_LINK &) = delete; - - // The assignment operator is required for WERD. - void operator=(const ELIST2_LINK &) { - prev = next = nullptr; - } -}; - /********************************************************************** * CLASS - ELIST2 * * Generic list class for doubly linked lists with embedded links **********************************************************************/ -class TESS_API ELIST2 { - friend class ELIST2_ITERATOR; - - ELIST2_LINK *last = nullptr; // End of list - //(Points to head) - ELIST2_LINK *First() { // return first - return last ? last->next : nullptr; - } - +template +class IntrusiveList { public: - // destroy all links - void internal_clear(void (*zapper)(void *)); - - bool empty() const { // is list empty? - return !last; - } - - bool singleton() const { - return last ? (last == last->next) : false; - } - - void shallow_copy( // dangerous!! - ELIST2 *from_list) { // beware destructors!! - last = from_list->last; - } - - // ptr to copier functn - void internal_deep_copy(ELIST2_LINK *(*copier)(ELIST2_LINK *), - const ELIST2 *list); // list being copied - - void assign_to_sublist( // to this list - ELIST2_ITERATOR *start_it, // from list start - ELIST2_ITERATOR *end_it); // from list end - - // # elements in list - int32_t length() const { - int32_t count = 0; - if (last != nullptr) { - count = 1; - for (auto it = last->next; it != last; it = it->next) { - count++; - } + /********************************************************************** + * CLASS - Link + * + * Generic link class for doubly linked lists with embedded links + * + * Note: No destructor - elements are assumed to be destroyed EITHER after + * they have been extracted from a list OR by the ELIST2 destructor which + * walks the list. + **********************************************************************/ + + class Link { + friend class Iterator; + friend class IntrusiveList; + + T *prev; + T *next; + + public: + Link() { // constructor + prev = next = nullptr; } - return count; - } - - void sort( // sort elements - int comparator( // comparison routine - const void *, const void *)); - - // Assuming list has been sorted already, insert new_link to - // keep the list sorted according to the same comparison function. - // Comparison function is the same as used by sort, i.e. uses double - // indirection. Time is O(1) to add to beginning or end. - // Time is linear to add pre-sorted items to an empty list. - void add_sorted(int comparator(const void *, const void *), ELIST2_LINK *new_link); -}; - -/*********************************************************************** - * CLASS - ELIST2_ITERATOR - * - * Generic iterator class for doubly linked lists with embedded - *links - **********************************************************************/ -class TESS_API ELIST2_ITERATOR { - friend void ELIST2::assign_to_sublist(ELIST2_ITERATOR *, ELIST2_ITERATOR *); + Link(const Link &) = delete; - ELIST2 *list; // List being iterated - ELIST2_LINK *prev; // prev element - ELIST2_LINK *current; // current element - ELIST2_LINK *next; // next element - ELIST2_LINK *cycle_pt; // point we are cycling the list to. - bool ex_current_was_last; // current extracted was end of list - bool ex_current_was_cycle_pt; // current extracted was cycle point - bool started_cycling; // Have we moved off the start? + // The assignment operator is required for WERD. + void operator=(const Link &) { + prev = next = nullptr; + } + }; + using LINK = Link; // compat + + /*********************************************************************** + * CLASS - ELIST2_ITERATOR + * + * Generic iterator class for doubly linked lists with embedded + *links + **********************************************************************/ + + class Iterator { + friend void IntrusiveList::assign_to_sublist(Iterator *, Iterator *); + + IntrusiveList *list; // List being iterated + T *prev; // prev element + T *current; // current element + T *next; // next element + T *cycle_pt; // point we are cycling the list to. + bool ex_current_was_last; // current extracted was end of list + bool ex_current_was_cycle_pt; // current extracted was cycle point + bool started_cycling; // Have we moved off the start? + /*********************************************************************** + * ELIST2_ITERATOR::extract_sublist() + * + * This is a private member, used only by IntrusiveList::assign_to_sublist. + * Given another iterator for the same list, extract the links from THIS to + * OTHER inclusive, link them into a new circular list, and return a + * pointer to the last element. + * (Can't inline this function because it contains a loop) + **********************************************************************/ + T *extract_sublist( // from this current... + Iterator *other_it) { // to other current +#ifndef NDEBUG + constexpr ERRCODE BAD_EXTRACTION_PTS("Can't extract sublist from points on different lists"); + constexpr ERRCODE DONT_EXTRACT_DELETED("Can't extract a sublist marked by deleted points"); +#endif + constexpr ERRCODE BAD_SUBLIST("Can't find sublist end point in original list"); - ELIST2_LINK *extract_sublist( // from this current... - ELIST2_ITERATOR *other_it); // to other current + Iterator temp_it = *this; + T *end_of_new_list; -public: - ELIST2_ITERATOR( // constructor - ELIST2 *list_to_iterate); +#ifndef NDEBUG + if (!other_it) + BAD_PARAMETER.error("ELIST2_ITERATOR::extract_sublist", ABORT, "other_it nullptr"); + if (!list) + NO_LIST.error("ELIST2_ITERATOR::extract_sublist", ABORT); + if (list != other_it->list) + BAD_EXTRACTION_PTS.error("ELIST2_ITERATOR.extract_sublist", ABORT); + if (list->empty()) + EMPTY_LIST.error("ELIST2_ITERATOR::extract_sublist", ABORT); + + if (!current || !other_it->current) + DONT_EXTRACT_DELETED.error("ELIST2_ITERATOR.extract_sublist", ABORT); +#endif - void set_to_list( // change list - ELIST2 *list_to_iterate); + ex_current_was_last = other_it->ex_current_was_last = false; + ex_current_was_cycle_pt = false; + other_it->ex_current_was_cycle_pt = false; - void add_after_then_move( // add after current & - ELIST2_LINK *new_link); // move to new + temp_it.mark_cycle_pt(); + do { // walk sublist + if (temp_it.cycled_list()) { // can't find end pt + BAD_SUBLIST.error("ELIST2_ITERATOR.extract_sublist", ABORT); + } - void add_after_stay_put( // add after current & - ELIST2_LINK *new_link); // stay at current + if (temp_it.at_last()) { + list->last = prev; + ex_current_was_last = other_it->ex_current_was_last = true; + } - void add_before_then_move( // add before current & - ELIST2_LINK *new_link); // move to new + if (temp_it.current == cycle_pt) { + ex_current_was_cycle_pt = true; + } - void add_before_stay_put( // add before current & - ELIST2_LINK *new_link); // stay at current + if (temp_it.current == other_it->cycle_pt) { + other_it->ex_current_was_cycle_pt = true; + } - void add_list_after( // add a list & - ELIST2 *list_to_add); // stay at current + temp_it.forward(); + } + // do INCLUSIVE list + while (temp_it.prev != other_it->current); + + // circularise sublist + other_it->current->next = current; + // circularise sublist + current->prev = other_it->current; + end_of_new_list = other_it->current; + + // sublist = whole list + if (prev == other_it->current) { + list->last = nullptr; + prev = current = next = nullptr; + other_it->prev = other_it->current = other_it->next = nullptr; + } else { + prev->next = other_it->next; + other_it->next->prev = prev; + + current = other_it->current = nullptr; + next = other_it->next; + other_it->prev = prev; + } + return end_of_new_list; + } // to other current + + public: + /*********************************************************************** + * ELIST2_ITERATOR::ELIST2_ITERATOR + * + * CONSTRUCTOR - set iterator to specified list; + **********************************************************************/ + Iterator( // constructor + IntrusiveList *list_to_iterate) { + set_to_list(list_to_iterate); + } - void add_list_before( // add a list & - ELIST2 *list_to_add); // move to it 1st item + /*********************************************************************** + * ELIST2_ITERATOR::set_to_list + * + * (Re-)initialise the iterator to point to the start of the list_to_iterate + * over. + **********************************************************************/ - ELIST2_LINK *data() { // get current data + void set_to_list( // change list + IntrusiveList *list_to_iterate) { #ifndef NDEBUG - if (!current) { - NULL_DATA.error("ELIST2_ITERATOR::data", ABORT); - } - if (!list) { - NO_LIST.error("ELIST2_ITERATOR::data", ABORT); - } + if (!list_to_iterate) { + BAD_PARAMETER.error("ELIST2_ITERATOR::set_to_list", ABORT, "list_to_iterate is nullptr"); + } #endif - return current; - } - - ELIST2_LINK *data_relative( // get data + or - ... - int8_t offset); // offset from current - ELIST2_LINK *forward(); // move to next element - - ELIST2_LINK *backward(); // move to prev element + list = list_to_iterate; + prev = list->last; + current = list->First(); + next = current ? current->next : nullptr; + cycle_pt = nullptr; // await explicit set + started_cycling = false; + ex_current_was_last = false; + ex_current_was_cycle_pt = false; + } + /*********************************************************************** + * ELIST2_ITERATOR::add_after_then_move + * + * Add a new element to the list after the current element and move the + * iterator to the new element. + **********************************************************************/ + void add_after_then_move( // add after current & + T *new_element) { +#ifndef NDEBUG + if (!list) { + NO_LIST.error("ELIST2_ITERATOR::add_after_then_move", ABORT); + } + if (!new_element) { + BAD_PARAMETER.error("ELIST2_ITERATOR::add_after_then_move", ABORT, "new_element is nullptr"); + } + if (new_element->next) { + STILL_LINKED.error("ELIST2_ITERATOR::add_after_then_move", ABORT); + } +#endif - ELIST2_LINK *extract(); // remove from list + if (list->empty()) { + new_element->next = new_element; + new_element->prev = new_element; + list->last = new_element; + prev = next = new_element; + } else { + new_element->next = next; + next->prev = new_element; + + if (current) { // not extracted + new_element->prev = current; + current->next = new_element; + prev = current; + if (current == list->last) { + list->last = new_element; + } + } else { // current extracted + new_element->prev = prev; + prev->next = new_element; + if (ex_current_was_last) { + list->last = new_element; + } + if (ex_current_was_cycle_pt) { + cycle_pt = new_element; + } + } + } + current = new_element; + } // move to new + /*********************************************************************** + * ELIST2_ITERATOR::add_after_stay_put + * + * Add a new element to the list after the current element but do not move + * the iterator to the new element. + **********************************************************************/ + void add_after_stay_put( // add after current & + T *new_element) { +#ifndef NDEBUG + if (!list) { + NO_LIST.error("ELIST2_ITERATOR::add_after_stay_put", ABORT); + } + if (!new_element) { + BAD_PARAMETER.error("ELIST2_ITERATOR::add_after_stay_put", ABORT, "new_element is nullptr"); + } + if (new_element->next) { + STILL_LINKED.error("ELIST2_ITERATOR::add_after_stay_put", ABORT); + } +#endif - // go to start of list - ELIST2_LINK *move_to_first(); + if (list->empty()) { + new_element->next = new_element; + new_element->prev = new_element; + list->last = new_element; + prev = next = new_element; + ex_current_was_last = false; + current = nullptr; + } else { + new_element->next = next; + next->prev = new_element; + + if (current) { // not extracted + new_element->prev = current; + current->next = new_element; + if (prev == current) { + prev = new_element; + } + if (current == list->last) { + list->last = new_element; + } + } else { // current extracted + new_element->prev = prev; + prev->next = new_element; + if (ex_current_was_last) { + list->last = new_element; + ex_current_was_last = false; + } + } + next = new_element; + } + } // stay at current + /*********************************************************************** + * ELIST2_ITERATOR::add_before_then_move + * + * Add a new element to the list before the current element and move the + * iterator to the new element. + **********************************************************************/ + void add_before_then_move( // add before current & + T *new_element) { +#ifndef NDEBUG + if (!list) { + NO_LIST.error("ELIST2_ITERATOR::add_before_then_move", ABORT); + } + if (!new_element) { + BAD_PARAMETER.error("ELIST2_ITERATOR::add_before_then_move", ABORT, "new_element is nullptr"); + } + if (new_element->next) { + STILL_LINKED.error("ELIST2_ITERATOR::add_before_then_move", ABORT); + } +#endif - ELIST2_LINK *move_to_last(); // go to end of list + if (list->empty()) { + new_element->next = new_element; + new_element->prev = new_element; + list->last = new_element; + prev = next = new_element; + } else { + prev->next = new_element; + new_element->prev = prev; + + if (current) { // not extracted + new_element->next = current; + current->prev = new_element; + next = current; + } else { // current extracted + new_element->next = next; + next->prev = new_element; + if (ex_current_was_last) { + list->last = new_element; + } + if (ex_current_was_cycle_pt) { + cycle_pt = new_element; + } + } + } + current = new_element; + } // move to new + /*********************************************************************** + * ELIST2_ITERATOR::add_before_stay_put + * + * Add a new element to the list before the current element but don't move the + * iterator to the new element. + **********************************************************************/ + void add_before_stay_put( // add before current & + T *new_element) { +#ifndef NDEBUG + if (!list) { + NO_LIST.error("ELIST2_ITERATOR::add_before_stay_put", ABORT); + } + if (!new_element) { + BAD_PARAMETER.error("ELIST2_ITERATOR::add_before_stay_put", ABORT, "new_element is nullptr"); + } + if (new_element->next) { + STILL_LINKED.error("ELIST2_ITERATOR::add_before_stay_put", ABORT); + } +#endif - void mark_cycle_pt(); // remember current + if (list->empty()) { + new_element->next = new_element; + new_element->prev = new_element; + list->last = new_element; + prev = next = new_element; + ex_current_was_last = true; + current = nullptr; + } else { + prev->next = new_element; + new_element->prev = prev; + + if (current) { // not extracted + new_element->next = current; + current->prev = new_element; + if (next == current) { + next = new_element; + } + } else { // current extracted + new_element->next = next; + next->prev = new_element; + if (ex_current_was_last) { + list->last = new_element; + } + } + prev = new_element; + } + } // stay at current + /*********************************************************************** + * ELIST2_ITERATOR::add_list_after + * + * Insert another list to this list after the current element but don't move + *the + * iterator. + **********************************************************************/ + void add_list_after( // add a list & + IntrusiveList *list_to_add) { +#ifndef NDEBUG + if (!list) { + NO_LIST.error("ELIST2_ITERATOR::add_list_after", ABORT); + } + if (!list_to_add) { + BAD_PARAMETER.error("ELIST2_ITERATOR::add_list_after", ABORT, "list_to_add is nullptr"); + } +#endif - bool empty() const { // is list empty? + if (!list_to_add->empty()) { + if (list->empty()) { + list->last = list_to_add->last; + prev = list->last; + next = list->First(); + ex_current_was_last = true; + current = nullptr; + } else { + if (current) { // not extracted + current->next = list_to_add->First(); + current->next->prev = current; + if (current == list->last) { + list->last = list_to_add->last; + } + list_to_add->last->next = next; + next->prev = list_to_add->last; + next = current->next; + } else { // current extracted + prev->next = list_to_add->First(); + prev->next->prev = prev; + if (ex_current_was_last) { + list->last = list_to_add->last; + ex_current_was_last = false; + } + list_to_add->last->next = next; + next->prev = list_to_add->last; + next = prev->next; + } + } + list_to_add->last = nullptr; + } + } // stay at current + /*********************************************************************** + * ELIST2_ITERATOR::add_list_before + * + * Insert another list to this list before the current element. Move the + * iterator to the start of the inserted elements + * iterator. + **********************************************************************/ + void add_list_before( // add a list & + IntrusiveList *list_to_add) { #ifndef NDEBUG - if (!list) { - NO_LIST.error("ELIST2_ITERATOR::empty", ABORT); - } + if (!list) { + NO_LIST.error("ELIST2_ITERATOR::add_list_before", ABORT); + } + if (!list_to_add) { + BAD_PARAMETER.error("ELIST2_ITERATOR::add_list_before", ABORT, "list_to_add is nullptr"); + } #endif - return list->empty(); - } - bool current_extracted() const { // current extracted? - return !current; - } + if (!list_to_add->empty()) { + if (list->empty()) { + list->last = list_to_add->last; + prev = list->last; + current = list->First(); + next = current->next; + ex_current_was_last = false; + } else { + prev->next = list_to_add->First(); + prev->next->prev = prev; + + if (current) { // not extracted + list_to_add->last->next = current; + current->prev = list_to_add->last; + } else { // current extracted + list_to_add->last->next = next; + next->prev = list_to_add->last; + if (ex_current_was_last) { + list->last = list_to_add->last; + } + if (ex_current_was_cycle_pt) { + cycle_pt = prev->next; + } + } + current = prev->next; + next = current->next; + } + list_to_add->last = nullptr; + } + } // move to it 1st item - bool at_first() const; // Current is first? + T *data() { // get current data +#ifndef NDEBUG + if (!current) { + NULL_DATA.error("ELIST2_ITERATOR::data", ABORT); + } + if (!list) { + NO_LIST.error("ELIST2_ITERATOR::data", ABORT); + } +#endif + return current; + } + /*********************************************************************** + * ELIST2_ITERATOR::data_relative + * + * Return the data pointer to the element "offset" elements from current. + * (This function can't be INLINEd because it contains a loop) + **********************************************************************/ + T *data_relative( // get data + or - ... + int8_t offset) { // offset from current + T *ptr; - bool at_last() const; // Current is last? +#ifndef NDEBUG + if (!list) + NO_LIST.error("ELIST2_ITERATOR::data_relative", ABORT); + if (list->empty()) + EMPTY_LIST.error("ELIST2_ITERATOR::data_relative", ABORT); +#endif - bool cycled_list() const; // Completed a cycle? + if (offset < 0) { + for (ptr = current ? current : next; offset++ < 0; ptr = ptr->prev) { + ; + } + } else { + for (ptr = current ? current : prev; offset-- > 0; ptr = ptr->next) { + ; + } + } - void add_to_end( // add at end & - ELIST2_LINK *new_link); // don't move +#ifndef NDEBUG + if (!ptr) + NULL_DATA.error("ELIST2_ITERATOR::data_relative", ABORT); +#endif - void exchange( // positions of 2 links - ELIST2_ITERATOR *other_it); // other iterator + return ptr; + } // offset from current + /*********************************************************************** + * ELIST2_ITERATOR::forward + * + * Move the iterator to the next element of the list. + * REMEMBER: ALL LISTS ARE CIRCULAR. + **********************************************************************/ + T *forward() { +#ifndef NDEBUG + if (!list) + NO_LIST.error("ELIST2_ITERATOR::forward", ABORT); +#endif + if (list->empty()) { + return nullptr; + } - //# elements in list - int32_t length() const { - return list->length(); - } + if (current) { // not removed so + // set previous + prev = current; + started_cycling = true; + // In case next is deleted by another iterator, get it from the current. + current = current->next; + } else { + if (ex_current_was_cycle_pt) { + cycle_pt = next; + } + current = next; + } - void sort( // sort elements - int comparator( // comparison routine - const void *, const void *)); +#ifndef NDEBUG + if (!current) + NULL_DATA.error("ELIST2_ITERATOR::forward", ABORT); +#endif -private: - // Don't use the following constructor. - ELIST2_ITERATOR() = delete; -}; + next = current->next; -/*********************************************************************** - * ELIST2_ITERATOR::set_to_list - * - * (Re-)initialise the iterator to point to the start of the list_to_iterate - * over. - **********************************************************************/ +#ifndef NDEBUG + if (!next) { + NULL_NEXT.error("ELIST2_ITERATOR::forward", ABORT, + "This is: %p Current is: %p", + static_cast(this), + static_cast(current)); + } +#endif -inline void ELIST2_ITERATOR::set_to_list( // change list - ELIST2 *list_to_iterate) { + return current; + } // move to next element + /*********************************************************************** + * ELIST2_ITERATOR::backward + * + * Move the iterator to the previous element of the list. + * REMEMBER: ALL LISTS ARE CIRCULAR. + **********************************************************************/ + T *backward() { #ifndef NDEBUG - if (!list_to_iterate) { - BAD_PARAMETER.error("ELIST2_ITERATOR::set_to_list", ABORT, "list_to_iterate is nullptr"); - } + if (!list) + NO_LIST.error("ELIST2_ITERATOR::backward", ABORT); #endif + if (list->empty()) { + return nullptr; + } - list = list_to_iterate; - prev = list->last; - current = list->First(); - next = current ? current->next : nullptr; - cycle_pt = nullptr; // await explicit set - started_cycling = false; - ex_current_was_last = false; - ex_current_was_cycle_pt = false; -} - -/*********************************************************************** - * ELIST2_ITERATOR::ELIST2_ITERATOR - * - * CONSTRUCTOR - set iterator to specified list; - **********************************************************************/ + if (current) { // not removed so + // set previous + next = current; + started_cycling = true; + // In case prev is deleted by another iterator, get it from current. + current = current->prev; + } else { + if (ex_current_was_cycle_pt) { + cycle_pt = prev; + } + current = prev; + } -inline ELIST2_ITERATOR::ELIST2_ITERATOR(ELIST2 *list_to_iterate) { - set_to_list(list_to_iterate); -} +#ifndef NDEBUG + if (!current) + NULL_DATA.error("ELIST2_ITERATOR::backward", ABORT); + if (!prev) { + NULL_PREV.error("ELIST2_ITERATOR::backward", ABORT, + "This is: %p Current is: %p", + static_cast(this), + static_cast(current)); + } +#endif -/*********************************************************************** - * ELIST2_ITERATOR::add_after_then_move - * - * Add a new element to the list after the current element and move the - * iterator to the new element. - **********************************************************************/ + prev = current->prev; + return current; + } // move to prev element + /*********************************************************************** + * ELIST2_ITERATOR::extract + * + * Do extraction by removing current from the list, returning it to the + * caller, but NOT updating the iterator. (So that any calling loop can do + * this.) The iterator's current points to nullptr. If the extracted element + * is to be deleted, this is the callers responsibility. + **********************************************************************/ + T *extract() { + T *extracted_link; -inline void ELIST2_ITERATOR::add_after_then_move( // element to add - ELIST2_LINK *new_element) { #ifndef NDEBUG - if (!list) { - NO_LIST.error("ELIST2_ITERATOR::add_after_then_move", ABORT); - } - if (!new_element) { - BAD_PARAMETER.error("ELIST2_ITERATOR::add_after_then_move", ABORT, "new_element is nullptr"); - } - if (new_element->next) { - STILL_LINKED.error("ELIST2_ITERATOR::add_after_then_move", ABORT); - } + if (!list) { + NO_LIST.error("ELIST2_ITERATOR::extract", ABORT); + } + if (!current) { // list empty or + // element extracted + NULL_CURRENT.error("ELIST2_ITERATOR::extract", ABORT); + } #endif - if (list->empty()) { - new_element->next = new_element; - new_element->prev = new_element; - list->last = new_element; - prev = next = new_element; - } else { - new_element->next = next; - next->prev = new_element; - - if (current) { // not extracted - new_element->prev = current; - current->next = new_element; - prev = current; - if (current == list->last) { - list->last = new_element; - } - } else { // current extracted - new_element->prev = prev; - prev->next = new_element; - if (ex_current_was_last) { - list->last = new_element; + if (list->singleton()) { + // Special case where we do need to change the iterator. + prev = next = list->last = nullptr; + } else { + prev->next = next; // remove from list + next->prev = prev; + + if (current == list->last) { + list->last = prev; + ex_current_was_last = true; + } else { + ex_current_was_last = false; + } } - if (ex_current_was_cycle_pt) { - cycle_pt = new_element; + // Always set ex_current_was_cycle_pt so an add/forward will work in a loop. + ex_current_was_cycle_pt = (current == cycle_pt); + extracted_link = current; + extracted_link->next = nullptr; // for safety + extracted_link->prev = nullptr; // for safety + current = nullptr; + return extracted_link; + } // remove from list + /*********************************************************************** + * ELIST2_ITERATOR::move_to_first() + * + * Move current so that it is set to the start of the list. + * Return data just in case anyone wants it. + **********************************************************************/ + // go to start of list + T *move_to_first() { +#ifndef NDEBUG + if (!list) { + NO_LIST.error("ELIST2_ITERATOR::move_to_first", ABORT); } - } - } - current = new_element; -} - -/*********************************************************************** - * ELIST2_ITERATOR::add_after_stay_put - * - * Add a new element to the list after the current element but do not move - * the iterator to the new element. - **********************************************************************/ +#endif -inline void ELIST2_ITERATOR::add_after_stay_put( // element to add - ELIST2_LINK *new_element) { + current = list->First(); + prev = list->last; + next = current ? current->next : nullptr; + return current; + } + /*********************************************************************** + * ELIST2_ITERATOR::move_to_last() + * + * Move current so that it is set to the end of the list. + * Return data just in case anyone wants it. + **********************************************************************/ + T *move_to_last() { #ifndef NDEBUG - if (!list) { - NO_LIST.error("ELIST2_ITERATOR::add_after_stay_put", ABORT); - } - if (!new_element) { - BAD_PARAMETER.error("ELIST2_ITERATOR::add_after_stay_put", ABORT, "new_element is nullptr"); - } - if (new_element->next) { - STILL_LINKED.error("ELIST2_ITERATOR::add_after_stay_put", ABORT); - } + if (!list) { + NO_LIST.error("ELIST2_ITERATOR::move_to_last", ABORT); + } #endif - if (list->empty()) { - new_element->next = new_element; - new_element->prev = new_element; - list->last = new_element; - prev = next = new_element; - ex_current_was_last = false; - current = nullptr; - } else { - new_element->next = next; - next->prev = new_element; - - if (current) { // not extracted - new_element->prev = current; - current->next = new_element; - if (prev == current) { - prev = new_element; + current = list->last; + prev = current ? current->prev : nullptr; + next = current ? current->next : nullptr; + return current; + } // go to end of list + /*********************************************************************** + * ELIST2_ITERATOR::mark_cycle_pt() + * + * Remember the current location so that we can tell whether we've returned + * to this point later. + * + * If the current point is deleted either now, or in the future, the cycle + * point will be set to the next item which is set to current. This could be + * by a forward, add_after_then_move or add_after_then_move. + **********************************************************************/ + void mark_cycle_pt() { +#ifndef NDEBUG + if (!list) { + NO_LIST.error("ELIST2_ITERATOR::mark_cycle_pt", ABORT); } - if (current == list->last) { - list->last = new_element; +#endif + + if (current) { + cycle_pt = current; + } else { + ex_current_was_cycle_pt = true; } - } else { // current extracted - new_element->prev = prev; - prev->next = new_element; - if (ex_current_was_last) { - list->last = new_element; - ex_current_was_last = false; + started_cycling = false; + } // remember current + + bool empty() const { // is list empty? +#ifndef NDEBUG + if (!list) { + NO_LIST.error("ELIST2_ITERATOR::empty", ABORT); } +#endif + return list->empty(); } - next = new_element; - } -} - -/*********************************************************************** - * ELIST2_ITERATOR::add_before_then_move - * - * Add a new element to the list before the current element and move the - * iterator to the new element. - **********************************************************************/ -inline void ELIST2_ITERATOR::add_before_then_move( // element to add - ELIST2_LINK *new_element) { + bool current_extracted() const { // current extracted? + return !current; + } + /*********************************************************************** + * ELIST2_ITERATOR::at_first() + * + * Are we at the start of the list? + * + **********************************************************************/ + bool at_first() const { #ifndef NDEBUG - if (!list) { - NO_LIST.error("ELIST2_ITERATOR::add_before_then_move", ABORT); - } - if (!new_element) { - BAD_PARAMETER.error("ELIST2_ITERATOR::add_before_then_move", ABORT, "new_element is nullptr"); - } - if (new_element->next) { - STILL_LINKED.error("ELIST2_ITERATOR::add_before_then_move", ABORT); - } + if (!list) { + NO_LIST.error("ELIST2_ITERATOR::at_first", ABORT); + } #endif - if (list->empty()) { - new_element->next = new_element; - new_element->prev = new_element; - list->last = new_element; - prev = next = new_element; - } else { - prev->next = new_element; - new_element->prev = prev; - - if (current) { // not extracted - new_element->next = current; - current->prev = new_element; - next = current; - } else { // current extracted - new_element->next = next; - next->prev = new_element; - if (ex_current_was_last) { - list->last = new_element; - } - if (ex_current_was_cycle_pt) { - cycle_pt = new_element; + // we're at a deleted + return ((list->empty()) || (current == list->First()) || + ((current == nullptr) && (prev == list->last) && // NON-last pt between + !ex_current_was_last)); // first and last + } // Current is first? + /*********************************************************************** + * ELIST2_ITERATOR::at_last() + * + * Are we at the end of the list? + * + **********************************************************************/ + bool at_last() const { +#ifndef NDEBUG + if (!list) { + NO_LIST.error("ELIST2_ITERATOR::at_last", ABORT); } - } - } - current = new_element; -} - -/*********************************************************************** - * ELIST2_ITERATOR::add_before_stay_put - * - * Add a new element to the list before the current element but don't move the - * iterator to the new element. - **********************************************************************/ +#endif -inline void ELIST2_ITERATOR::add_before_stay_put( // element to add - ELIST2_LINK *new_element) { + // we're at a deleted + return ((list->empty()) || (current == list->last) || + ((current == nullptr) && (prev == list->last) && // last point between + ex_current_was_last)); // first and last + } // Current is last? + /*********************************************************************** + * ELIST2_ITERATOR::cycled_list() + * + * Have we returned to the cycle_pt since it was set? + * + **********************************************************************/ + bool cycled_list() const { #ifndef NDEBUG - if (!list) { - NO_LIST.error("ELIST2_ITERATOR::add_before_stay_put", ABORT); - } - if (!new_element) { - BAD_PARAMETER.error("ELIST2_ITERATOR::add_before_stay_put", ABORT, "new_element is nullptr"); - } - if (new_element->next) { - STILL_LINKED.error("ELIST2_ITERATOR::add_before_stay_put", ABORT); - } + if (!list) { + NO_LIST.error("ELIST2_ITERATOR::cycled_list", ABORT); + } #endif - if (list->empty()) { - new_element->next = new_element; - new_element->prev = new_element; - list->last = new_element; - prev = next = new_element; - ex_current_was_last = true; - current = nullptr; - } else { - prev->next = new_element; - new_element->prev = prev; - - if (current) { // not extracted - new_element->next = current; - current->prev = new_element; - if (next == current) { - next = new_element; + return ((list->empty()) || ((current == cycle_pt) && started_cycling)); + } // Completed a cycle? + /*********************************************************************** + * ELIST2_ITERATOR::add_to_end + * + * Add a new element to the end of the list without moving the iterator. + * This is provided because a single linked list cannot move to the last as + * the iterator couldn't set its prev pointer. Adding to the end is + * essential for implementing + queues. + **********************************************************************/ + void add_to_end( // add at end & + T *new_element) { +#ifndef NDEBUG + if (!list) { + NO_LIST.error("ELIST2_ITERATOR::add_to_end", ABORT); } - } else { // current extracted - new_element->next = next; - next->prev = new_element; - if (ex_current_was_last) { - list->last = new_element; + if (!new_element) { + BAD_PARAMETER.error("ELIST2_ITERATOR::add_to_end", ABORT, "new_element is nullptr"); } - } - prev = new_element; - } -} + if (new_element->next) { + STILL_LINKED.error("ELIST2_ITERATOR::add_to_end", ABORT); + } +#endif -/*********************************************************************** - * ELIST2_ITERATOR::add_list_after - * - * Insert another list to this list after the current element but don't move - *the - * iterator. - **********************************************************************/ + if (this->at_last()) { + this->add_after_stay_put(new_element); + } else { + if (this->at_first()) { + this->add_before_stay_put(new_element); + list->last = new_element; + } else { // Iteratr is elsewhere + new_element->next = list->last->next; + new_element->prev = list->last; + list->last->next->prev = new_element; + list->last->next = new_element; + list->last = new_element; + } + } + } // don't move + /*********************************************************************** + * ELIST2_ITERATOR::exchange() + * + * Given another iterator, whose current element is a different element on + * the same list list OR an element of another list, exchange the two current + * elements. On return, each iterator points to the element which was the + * other iterators current on entry. + * (This function hasn't been in-lined because its a bit big!) + **********************************************************************/ + void exchange( // positions of 2 links + Iterator *other_it) { // other iterator + constexpr ERRCODE DONT_EXCHANGE_DELETED("Can't exchange deleted elements of lists"); + + T *old_current; -inline void ELIST2_ITERATOR::add_list_after(ELIST2 *list_to_add) { #ifndef NDEBUG - if (!list) { - NO_LIST.error("ELIST2_ITERATOR::add_list_after", ABORT); - } - if (!list_to_add) { - BAD_PARAMETER.error("ELIST2_ITERATOR::add_list_after", ABORT, "list_to_add is nullptr"); - } + if (!list) + NO_LIST.error("ELIST2_ITERATOR::exchange", ABORT); + if (!other_it) + BAD_PARAMETER.error("ELIST2_ITERATOR::exchange", ABORT, "other_it nullptr"); + if (!(other_it->list)) + NO_LIST.error("ELIST2_ITERATOR::exchange", ABORT, "other_it"); #endif - if (!list_to_add->empty()) { - if (list->empty()) { - list->last = list_to_add->last; - prev = list->last; - next = list->First(); - ex_current_was_last = true; - current = nullptr; - } else { - if (current) { // not extracted - current->next = list_to_add->First(); - current->next->prev = current; - if (current == list->last) { - list->last = list_to_add->last; - } - list_to_add->last->next = next; - next->prev = list_to_add->last; - next = current->next; - } else { // current extracted - prev->next = list_to_add->First(); - prev->next->prev = prev; - if (ex_current_was_last) { - list->last = list_to_add->last; - ex_current_was_last = false; - } - list_to_add->last->next = next; - next->prev = list_to_add->last; - next = prev->next; + /* Do nothing if either list is empty or if both iterators reference the same + link */ + + if ((list->empty()) || (other_it->list->empty()) || (current == other_it->current)) { + return; } - } - list_to_add->last = nullptr; - } -} -/*********************************************************************** - * ELIST2_ITERATOR::add_list_before - * - * Insert another list to this list before the current element. Move the - * iterator to the start of the inserted elements - * iterator. - **********************************************************************/ + /* Error if either current element is deleted */ -inline void ELIST2_ITERATOR::add_list_before(ELIST2 *list_to_add) { -#ifndef NDEBUG - if (!list) { - NO_LIST.error("ELIST2_ITERATOR::add_list_before", ABORT); - } - if (!list_to_add) { - BAD_PARAMETER.error("ELIST2_ITERATOR::add_list_before", ABORT, "list_to_add is nullptr"); - } -#endif + if (!current || !other_it->current) { + DONT_EXCHANGE_DELETED.error("ELIST2_ITERATOR.exchange", ABORT); + } - if (!list_to_add->empty()) { - if (list->empty()) { - list->last = list_to_add->last; - prev = list->last; - current = list->First(); - next = current->next; - ex_current_was_last = false; - } else { - prev->next = list_to_add->First(); - prev->next->prev = prev; - - if (current) { // not extracted - list_to_add->last->next = current; - current->prev = list_to_add->last; - } else { // current extracted - list_to_add->last->next = next; - next->prev = list_to_add->last; - if (ex_current_was_last) { - list->last = list_to_add->last; - } - if (ex_current_was_cycle_pt) { - cycle_pt = prev->next; + /* Now handle the 4 cases: doubleton list; non-doubleton adjacent elements + (other before this); non-doubleton adjacent elements (this before other); + non-adjacent elements. */ + + // adjacent links + if ((next == other_it->current) || (other_it->next == current)) { + // doubleton list + if ((next == other_it->current) && (other_it->next == current)) { + prev = next = current; + other_it->prev = other_it->next = other_it->current; + } else { // non-doubleton with + // adjacent links + // other before this + if (other_it->next == current) { + other_it->prev->next = current; + other_it->current->next = next; + other_it->current->prev = current; + current->next = other_it->current; + current->prev = other_it->prev; + next->prev = other_it->current; + + other_it->next = other_it->current; + prev = current; + } else { // this before other + prev->next = other_it->current; + current->next = other_it->next; + current->prev = other_it->current; + other_it->current->next = current; + other_it->current->prev = prev; + other_it->next->prev = current; + + next = current; + other_it->prev = other_it->current; + } } + } else { // no overlap + prev->next = other_it->current; + current->next = other_it->next; + current->prev = other_it->prev; + next->prev = other_it->current; + other_it->prev->next = current; + other_it->current->next = next; + other_it->current->prev = prev; + other_it->next->prev = current; } - current = prev->next; - next = current->next; - } - list_to_add->last = nullptr; - } -} -/*********************************************************************** - * ELIST2_ITERATOR::extract - * - * Do extraction by removing current from the list, returning it to the - * caller, but NOT updating the iterator. (So that any calling loop can do - * this.) The iterator's current points to nullptr. If the extracted element - * is to be deleted, this is the callers responsibility. - **********************************************************************/ + /* update end of list pointer when necessary (remember that the 2 iterators + may iterate over different lists!) */ -inline ELIST2_LINK *ELIST2_ITERATOR::extract() { - ELIST2_LINK *extracted_link; + if (list->last == current) { + list->last = other_it->current; + } + if (other_it->list->last == other_it->current) { + other_it->list->last = current; + } -#ifndef NDEBUG - if (!list) { - NO_LIST.error("ELIST2_ITERATOR::extract", ABORT); - } - if (!current) { // list empty or - // element extracted - NULL_CURRENT.error("ELIST2_ITERATOR::extract", ABORT); - } -#endif + if (current == cycle_pt) { + cycle_pt = other_it->cycle_pt; + } + if (other_it->current == other_it->cycle_pt) { + other_it->cycle_pt = cycle_pt; + } - if (list->singleton()) { - // Special case where we do need to change the iterator. - prev = next = list->last = nullptr; - } else { - prev->next = next; // remove from list - next->prev = prev; + /* The actual exchange - in all cases*/ - if (current == list->last) { - list->last = prev; - ex_current_was_last = true; - } else { - ex_current_was_last = false; - } - } - // Always set ex_current_was_cycle_pt so an add/forward will work in a loop. - ex_current_was_cycle_pt = (current == cycle_pt); - extracted_link = current; - extracted_link->next = nullptr; // for safety - extracted_link->prev = nullptr; // for safety - current = nullptr; - return extracted_link; -} - -/*********************************************************************** - * ELIST2_ITERATOR::move_to_first() - * - * Move current so that it is set to the start of the list. - * Return data just in case anyone wants it. - **********************************************************************/ + old_current = current; + current = other_it->current; + other_it->current = old_current; + } // other iterator -inline ELIST2_LINK *ELIST2_ITERATOR::move_to_first() { + //# elements in list + int32_t length() const { + return list->length(); + } + /*********************************************************************** + * ELIST2_ITERATOR::sort() + * + * Sort the elements of the list, then reposition at the start. + * + **********************************************************************/ + void sort( // sort elements + int comparator( // comparison routine + const T *, const T *)) { #ifndef NDEBUG - if (!list) { - NO_LIST.error("ELIST2_ITERATOR::move_to_first", ABORT); - } + if (!list) { + NO_LIST.error("ELIST2_ITERATOR::sort", ABORT); + } #endif - current = list->First(); - prev = list->last; - next = current ? current->next : nullptr; - return current; -} + list->sort(comparator); + move_to_first(); + } -/*********************************************************************** - * ELIST2_ITERATOR::move_to_last() - * - * Move current so that it is set to the end of the list. - * Return data just in case anyone wants it. - **********************************************************************/ + private: + // Don't use the following constructor. + Iterator() = delete; + }; + using ITERATOR = Iterator; // compat -inline ELIST2_LINK *ELIST2_ITERATOR::move_to_last() { -#ifndef NDEBUG - if (!list) { - NO_LIST.error("ELIST2_ITERATOR::move_to_last", ABORT); +private: + T *last = nullptr; // End of list + //(Points to head) + T *First() { // return first + return last ? last->next : nullptr; } -#endif - - current = list->last; - prev = current ? current->prev : nullptr; - next = current ? current->next : nullptr; - return current; -} - -/*********************************************************************** - * ELIST2_ITERATOR::mark_cycle_pt() - * - * Remember the current location so that we can tell whether we've returned - * to this point later. - * - * If the current point is deleted either now, or in the future, the cycle - * point will be set to the next item which is set to current. This could be - * by a forward, add_after_then_move or add_after_then_move. - **********************************************************************/ -inline void ELIST2_ITERATOR::mark_cycle_pt() { -#ifndef NDEBUG - if (!list) { - NO_LIST.error("ELIST2_ITERATOR::mark_cycle_pt", ABORT); +public: + ~IntrusiveList() { + clear(); } -#endif - if (current) { - cycle_pt = current; - } else { - ex_current_was_cycle_pt = true; + /* delete elements */ + void clear() { + internal_clear(); } - started_cycling = false; -} -/*********************************************************************** - * ELIST2_ITERATOR::at_first() - * - * Are we at the start of the list? - * - **********************************************************************/ + /* Become a deep copy of src_list */ + template + void deep_copy(const U *src_list, T *(*copier)(const T *)) { + Iterator from_it(const_cast(src_list)); + Iterator to_it(this); -inline bool ELIST2_ITERATOR::at_first() const { -#ifndef NDEBUG - if (!list) { - NO_LIST.error("ELIST2_ITERATOR::at_first", ABORT); + for (from_it.mark_cycle_pt(); !from_it.cycled_list(); from_it.forward()) + to_it.add_after_then_move((*copier)(from_it.data())); } -#endif - // we're at a deleted - return ((list->empty()) || (current == list->First()) || - ((current == nullptr) && (prev == list->last) && // NON-last pt between - !ex_current_was_last)); // first and last -} + /*********************************************************************** + * IntrusiveList::internal_clear + * + * Used by the destructor and the "clear" member function of derived list + * classes to destroy all the elements on the list. + * The calling function passes a "zapper" function which can be called to + * delete each element of the list, regardless of its derived type. This + * technique permits a generic clear function to destroy elements of + * different derived types correctly, without requiring virtual functions and + * the consequential memory overhead. + **********************************************************************/ + + // destroy all links + void internal_clear() { + // ptr to zapper functn + T *ptr; + T *next; + + if (!empty()) { + ptr = last->next; // set to first + last->next = nullptr; // break circle + last = nullptr; // set list empty + while (ptr) { + next = ptr->next; + delete ptr; + ptr = next; + } + } + } -/*********************************************************************** - * ELIST2_ITERATOR::at_last() - * - * Are we at the end of the list? - * - **********************************************************************/ + bool empty() const { // is list empty? + return !last; + } -inline bool ELIST2_ITERATOR::at_last() const { -#ifndef NDEBUG - if (!list) { - NO_LIST.error("ELIST2_ITERATOR::at_last", ABORT); + bool singleton() const { + return last ? (last == last->next) : false; } -#endif - // we're at a deleted - return ((list->empty()) || (current == list->last) || - ((current == nullptr) && (prev == list->last) && // last point between - ex_current_was_last)); // first and last -} + void shallow_copy( // dangerous!! + IntrusiveList *from_list) { // beware destructors!! + last = from_list->last; + } -/*********************************************************************** - * ELIST2_ITERATOR::cycled_list() - * - * Have we returned to the cycle_pt since it was set? + /*********************************************************************** + * IntrusiveList::assign_to_sublist * + * The list is set to a sublist of another list. "This" list must be empty + * before this function is invoked. The two iterators passed must refer to + * the same list, different from "this" one. The sublist removed is the + * inclusive list from start_it's current position to end_it's current + * position. If this range passes over the end of the source list then the + * source list has its end set to the previous element of start_it. The + * extracted sublist is unaffected by the end point of the source list, its + * end point is always the end_it position. **********************************************************************/ + void assign_to_sublist( // to this list + Iterator *start_it, // from list start + Iterator *end_it); // from list end -inline bool ELIST2_ITERATOR::cycled_list() const { -#ifndef NDEBUG - if (!list) { - NO_LIST.error("ELIST2_ITERATOR::cycled_list", ABORT); + // # elements in list + int32_t length() const { + int32_t count = 0; + if (last != nullptr) { + count = 1; + for (auto it = last->next; it != last; it = it->next) { + count++; + } + } + return count; } -#endif - - return ((list->empty()) || ((current == cycle_pt) && started_cycling)); -} - -/*********************************************************************** - * ELIST2_ITERATOR::sort() - * - * Sort the elements of the list, then reposition at the start. + /*********************************************************************** + * IntrusiveList::sort * + * Sort elements on list + * NB If you don't like the const declarations in the comparator, coerce yours: + * (int (*)(const void *, const void *) **********************************************************************/ + void sort( // sort elements + int comparator( // comparison routine + const T *, const T *)) { + // Allocate an array of pointers, one per list element. + auto count = length(); + if (count > 0) { + // ptr array to sort + std::vector base; + base.reserve(count); + + Iterator it(this); + + // Extract all elements, putting the pointers in the array. + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + base.push_back(it.extract()); + } -inline void ELIST2_ITERATOR::sort( // sort elements - int comparator( // comparison routine - const void *, const void *)) { -#ifndef NDEBUG - if (!list) { - NO_LIST.error("ELIST2_ITERATOR::sort", ABORT); - } -#endif - - list->sort(comparator); - move_to_first(); -} - -/*********************************************************************** - * ELIST2_ITERATOR::add_to_end - * - * Add a new element to the end of the list without moving the iterator. - * This is provided because a single linked list cannot move to the last as - * the iterator couldn't set its prev pointer. Adding to the end is - * essential for implementing - queues. -**********************************************************************/ + // Sort the pointer array. + std::sort(base.begin(), base.end(), + // all current comparators return -1,0,1, so we handle this correctly for std::sort + [&](auto &&l, auto &&r) {return comparator(l, r) < 0; }); -inline void ELIST2_ITERATOR::add_to_end( // element to add - ELIST2_LINK *new_element) { -#ifndef NDEBUG - if (!list) { - NO_LIST.error("ELIST2_ITERATOR::add_to_end", ABORT); - } - if (!new_element) { - BAD_PARAMETER.error("ELIST2_ITERATOR::add_to_end", ABORT, "new_element is nullptr"); - } - if (new_element->next) { - STILL_LINKED.error("ELIST2_ITERATOR::add_to_end", ABORT); + // Rebuild the list from the sorted pointers. + for (auto current : base) { + it.add_to_end(current); + } + } } -#endif - if (this->at_last()) { - this->add_after_stay_put(new_element); - } else { - if (this->at_first()) { - this->add_before_stay_put(new_element); - list->last = new_element; - } else { // Iteratr is elsewhere - new_element->next = list->last->next; - new_element->prev = list->last; - list->last->next->prev = new_element; - list->last->next = new_element; - list->last = new_element; + // Assuming list has been sorted already, insert new_link to + // keep the list sorted according to the same comparison function. + // Comparison function is the same as used by sort, i.e. uses double + // indirection. Time is O(1) to add to beginning or end. + // Time is linear to add pre-sorted items to an empty list. + void add_sorted(int comparator(const T *, const T *), T *new_link) { + // Check for adding at the end. + if (last == nullptr || comparator(last, new_link) < 0) { + if (last == nullptr) { + new_link->next = new_link; + new_link->prev = new_link; + } else { + new_link->next = last->next; + new_link->prev = last; + last->next = new_link; + new_link->next->prev = new_link; + } + last = new_link; + } else { + // Need to use an iterator. + Iterator it(this); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + auto link = it.data(); + if (comparator(link, new_link) > 0) { + break; + } + } + if (it.cycled_list()) { + it.add_to_end(new_link); + } else { + it.add_before_then_move(new_link); + } } } -} - -#define ELIST2IZEH(CLASSNAME) \ - class CLASSNAME##_LIST : public X_LIST { \ - using X_LIST::X_LIST; \ - }; \ - struct CLASSNAME##_IT : X_ITER { \ - using X_ITER::X_ITER; \ - CLASSNAME *backward() { \ - return reinterpret_cast(ELIST2_ITERATOR::backward()); \ - } \ +}; + +template +using ELIST2 = IntrusiveList; + +// add TESS_API? +// move templated lists to public include dirs? +#define ELIST2IZEH(T) \ + class T##_LIST : public IntrusiveList { \ + public: \ + using IntrusiveList::IntrusiveList; \ + }; \ + class T##_IT : public IntrusiveList::Iterator { \ + public: \ + using base = IntrusiveList::Iterator; \ + using base::base; \ }; } // namespace tesseract diff --git a/src/ccutil/errcode.cpp b/src/ccutil/errcode.cpp index e6b05c213c..a67aba56de 100644 --- a/src/ccutil/errcode.cpp +++ b/src/ccutil/errcode.cpp @@ -86,8 +86,4 @@ void ERRCODE::error( // handle error } } -void ERRCODE::error(const char *caller, TessErrorLogCode action) const { - error(caller, action, nullptr); -} - } // namespace tesseract diff --git a/src/ccutil/errcode.h b/src/ccutil/errcode.h index e23fcbb953..5f4e5713fa 100644 --- a/src/ccutil/errcode.h +++ b/src/ccutil/errcode.h @@ -43,7 +43,9 @@ class TESS_API ERRCODE { // error handler class TessErrorLogCode action, // action to take const char *format, ... // fprintf format ) const __attribute__((format(printf, 4, 5))); - void error(const char *caller, TessErrorLogCode action) const; + void error(const char *caller, TessErrorLogCode action) const { + error(caller, action, nullptr); + } constexpr ERRCODE(const char *string) : message(string) {} // initialize with string }; diff --git a/src/ccutil/helpers.h b/src/ccutil/helpers.h index 212415020b..fa2f38ec83 100644 --- a/src/ccutil/helpers.h +++ b/src/ccutil/helpers.h @@ -24,10 +24,7 @@ #include // for INT_MIN, INT_MAX #include // std::isfinite #include -#include #include // for std::find -#include -#include #include #include @@ -68,22 +65,21 @@ inline const std::vector split(const std::string &s, char c) { return v; } -// A simple linear congruential random number generator. +// A simple linear congruential random number generator, +// using Knuth's constants from: +// http://en.wikipedia.org/wiki/Linear_congruential_generator. class TRand { public: + TRand() = default; // Sets the seed to the given value. void set_seed(uint64_t seed) { - e.seed(seed); - } - // Sets the seed using a hash of a string. - void set_seed(const std::string &str) { - std::hash hasher; - set_seed(static_cast(hasher(str))); + seed_ = seed; } // Returns an integer in the range 0 to INT32_MAX. int32_t IntRand() { - return e(); + Iterate(); + return seed_ >> 33; } // Returns a floating point value in the range [-range, range]. double SignedRand(double range) { @@ -95,7 +91,14 @@ class TRand { } private: - std::minstd_rand e; + // Steps the generator to the next value. + void Iterate() { + seed_ *= 6364136223846793005ULL; + seed_ += 1442695040888963407ULL; + } + + // The current value of the seed. + uint64_t seed_{1}; }; // Remove newline (if any) at the end of the string. diff --git a/src/ccutil/list.h b/src/ccutil/list.h deleted file mode 100644 index d9dfe09513..0000000000 --- a/src/ccutil/list.h +++ /dev/null @@ -1,70 +0,0 @@ -/********************************************************************** - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef LIST_ITERATOR_H -#define LIST_ITERATOR_H - -#include - -namespace tesseract { - -template -class X_ITER : public ITERATOR { -public: - X_ITER() = default; - template - X_ITER(U *list) : ITERATOR(list) {} - - CLASSNAME *data() { - return static_cast(ITERATOR::data()); - } - CLASSNAME *data_relative(int8_t offset) { - return static_cast(ITERATOR::data_relative(offset)); - } - CLASSNAME *forward() { - return static_cast(ITERATOR::forward()); - } - CLASSNAME *extract() { - return static_cast(ITERATOR::extract()); - } -}; - -template -class X_LIST : public CONTAINER { -public: - X_LIST() = default; - X_LIST(const X_LIST &) = delete; - X_LIST &operator=(const X_LIST &) = delete; - ~X_LIST() { - clear(); - } - - /* delete elements */ - void clear() { - CONTAINER::internal_clear([](void *link) {delete reinterpret_cast(link);}); - } - - /* Become a deep copy of src_list */ - template - void deep_copy(const U *src_list, CLASSNAME *(*copier)(const CLASSNAME *)) { - X_ITER from_it(const_cast(src_list)); - X_ITER to_it(this); - - for (from_it.mark_cycle_pt(); !from_it.cycled_list(); from_it.forward()) - to_it.add_after_then_move((*copier)(from_it.data())); - } -}; - -} // namespace tesseract - -#endif diff --git a/src/ccutil/tprintf.cpp b/src/ccutil/tprintf.cpp index 2739b6cec4..e5814e0f2d 100644 --- a/src/ccutil/tprintf.cpp +++ b/src/ccutil/tprintf.cpp @@ -70,15 +70,6 @@ FILE *get_debugfp() { return debugfp; } -// Trace printf. -void tprintf(const char *format, ...) { - FILE *f = get_debugfp(); - va_list args; // variable args - va_start(args, format); // variable list - vfprintf(f, format, args); - va_end(args); -} - TessErrStream tesserr; } // namespace tesseract diff --git a/src/ccutil/tprintf.h b/src/ccutil/tprintf.h index 574cbbb708..4b984f5ee7 100644 --- a/src/ccutil/tprintf.h +++ b/src/ccutil/tprintf.h @@ -21,26 +21,25 @@ #include "params.h" // for INT_VAR_H #include // for TESS_API +#include +#include // for std::forward namespace tesseract { -#if !defined(__GNUC__) && !defined(__attribute__) -# define __attribute__(attr) // compiler without support for __attribute__ -#endif - // Disable some log messages by setting log_level > 0. extern TESS_API INT_VAR_H(log_level); -// Main logging function. -extern TESS_API void tprintf( // Trace printf - const char *format, ...) // Message - __attribute__((format(printf, 1, 2))); - // Get file for debug output. -FILE *get_debugfp(); +TESS_API FILE *get_debugfp(); -} // namespace tesseract +// Main logging function. Trace printf. +inline void tprintf(const char *format, ...) { + va_list args; + va_start(args, format); + vfprintf(get_debugfp(), format, args); + va_end(args); +} -#undef __attribute__ +} // namespace tesseract #endif // define TESSERACT_CCUTIL_TPRINTF_H diff --git a/src/ccutil/universalambigs.h b/src/ccutil/universalambigs.h index 83b0b2987e..d2e3c3a446 100644 --- a/src/ccutil/universalambigs.h +++ b/src/ccutil/universalambigs.h @@ -22,8 +22,10 @@ namespace tesseract { +#ifndef _MSC_VER #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Woverlength-strings" +#endif inline const char kUniversalAmbigsFile[] = { "v2\n" "'' \" 1\n" @@ -19035,7 +19037,9 @@ inline const char kUniversalAmbigsFile[] = { "iXl in 1\n" #endif }; +#ifndef _MSC_VER #pragma GCC diagnostic pop +#endif inline const int ksizeofUniversalAmbigsFile = sizeof(kUniversalAmbigsFile); diff --git a/src/classify/cluster.cpp b/src/classify/cluster.cpp index fd88978023..4143bbbb39 100644 --- a/src/classify/cluster.cpp +++ b/src/classify/cluster.cpp @@ -3238,10 +3238,10 @@ static bool MultipleCharSamples(CLUSTERER *Clusterer, CLUSTER *Cluster, float Ma InitSampleSearch(SearchState, Cluster); while ((Sample = NextSample(&SearchState)) != nullptr) { CharID = Sample->CharID; - if (CharFlags[CharID] == false) { + if (CharFlags[CharID] == 0) { CharFlags[CharID] = true; } else { - if (CharFlags[CharID] == true) { + if (CharFlags[CharID] == 1) { NumIllegalInCluster++; CharFlags[CharID] = ILLEGAL_CHAR; } diff --git a/src/classify/protos.cpp b/src/classify/protos.cpp index 71f1920771..6c793024a9 100644 --- a/src/classify/protos.cpp +++ b/src/classify/protos.cpp @@ -25,7 +25,6 @@ #include "classify.h" #include "intproto.h" #include "params.h" -#include "tprintf.h" #include // for M_PI #include diff --git a/src/classify/trainingsample.h b/src/classify/trainingsample.h index 5e4a943af9..211ab66994 100644 --- a/src/classify/trainingsample.h +++ b/src/classify/trainingsample.h @@ -51,7 +51,7 @@ static const int kSampleScaleSize = 3; static const int kSampleRandomSize = kSampleYShiftSize * kSampleScaleSize - 2; // ASSERT_IS_PRIME(kSampleRandomSize) !! -class TESS_API TrainingSample : public ELIST_LINK { +class TESS_API TrainingSample : public ELIST::LINK { public: TrainingSample() : class_id_(INVALID_UNICHAR_ID) diff --git a/src/dict/dawg.h b/src/dict/dawg.h index 408fa2ca96..b87b3880b7 100644 --- a/src/dict/dawg.h +++ b/src/dict/dawg.h @@ -110,7 +110,7 @@ static const char kWildcard[] = "*"; class TESS_API Dawg { public: /// Magic number to determine endianness when reading the Dawg from file. - static const int16_t kDawgMagicNumber = 42; + static constexpr int16_t kDawgMagicNumber = 42; /// A special unichar id that indicates that any appropriate pattern /// (e.g.dictionary word, 0-9 digit, etc) can be inserted instead /// Used for expressing patterns in punctuation and number Dawgs. diff --git a/src/lstm/lstmrecognizer.h b/src/lstm/lstmrecognizer.h index bfefac8f1b..4234cf324e 100644 --- a/src/lstm/lstmrecognizer.h +++ b/src/lstm/lstmrecognizer.h @@ -286,7 +286,7 @@ class TESS_API LSTMRecognizer { protected: // Sets the random seed from the sample_iteration_; void SetRandomSeed() { - int64_t seed = static_cast(sample_iteration_) * 0x10000001; + int64_t seed = sample_iteration_ * 0x10000001LL; randomizer_.set_seed(seed); randomizer_.IntRand(); } diff --git a/src/lstm/network.h b/src/lstm/network.h index feb1206f58..353b110c92 100644 --- a/src/lstm/network.h +++ b/src/lstm/network.h @@ -23,7 +23,6 @@ #include "networkio.h" #include "serialis.h" #include "static_shape.h" -#include "tprintf.h" #include #include diff --git a/src/tesseract.cpp b/src/tesseract.cpp index 2c27d2a04f..8ed2c1d52f 100644 --- a/src/tesseract.cpp +++ b/src/tesseract.cpp @@ -273,32 +273,6 @@ static void PrintHelpMessage(const char *program) { program, program, program); } -static bool SetVariablesFromCLArgs(tesseract::TessBaseAPI &api, int argc, char **argv) { - bool success = true; - char opt1[256], opt2[255]; - for (int i = 0; i < argc; i++) { - if (strcmp(argv[i], "-c") == 0 && i + 1 < argc) { - strncpy(opt1, argv[i + 1], 255); - opt1[255] = '\0'; - char *p = strchr(opt1, '='); - if (!p) { - fprintf(stderr, "Missing = in configvar assignment\n"); - success = false; - break; - } - *p = 0; - strncpy(opt2, strchr(argv[i + 1], '=') + 1, sizeof(opt2) - 1); - opt2[254] = 0; - ++i; - - if (!api.SetVariable(opt1, opt2)) { - fprintf(stderr, "Could not set option: %s=%s\n", opt1, opt2); - } - } - } - return success; -} - static void PrintLangsList(tesseract::TessBaseAPI &api) { std::vector languages; api.GetAvailableLanguagesAsVector(&languages); @@ -444,7 +418,7 @@ static bool ParseArgs(int argc, char **argv, const char **lang, const char **ima try { auto loglevel = loglevels.at(loglevel_string); log_level = loglevel; - } catch (const std::out_of_range &e) { + } catch (const std::out_of_range &) { // TODO: Allow numeric argument? tprintf("Error, unsupported --loglevel %s\n", loglevel_string.c_str()); return false; @@ -485,7 +459,16 @@ static bool ParseArgs(int argc, char **argv, const char **lang, const char **ima *print_fonts_table = true; #endif // ndef DISABLED_LEGACY_ENGINE } else if (strcmp(argv[i], "-c") == 0 && i + 1 < argc) { - // handled properly after api init + const std::string argument(argv[i + 1]); + const auto equal_pos = argument.find('='); + if (equal_pos == std::string::npos) { + throw std::invalid_argument("Missing '=' in configvar assignment"); + } + // Extract key and value + const std::string key = argument.substr(0, equal_pos); + const std::string value = argument.substr(equal_pos + 1); + vars_vec->push_back(key); + vars_values->push_back(value); ++i; } else if (*image == nullptr) { *image = argv[i]; @@ -665,7 +648,7 @@ static void PreloadRenderers(tesseract::TessBaseAPI &api, * **********************************************************************/ -int main(int argc, char **argv) { +static int main1(int argc, char **argv) { #if defined(__USE_GNU) && defined(HAVE_FEENABLEEXCEPT) // Raise SIGFPE. # if defined(__clang__) @@ -736,10 +719,6 @@ int main(int argc, char **argv) { const int init_failed = api.Init(datapath, lang, enginemode, &(argv[arg_i]), argc - arg_i, &vars_vec, &vars_values, false); - if (!SetVariablesFromCLArgs(api, argc, argv)) { - return EXIT_FAILURE; - } - // SIMD settings might be overridden by config variable. tesseract::SIMDDetect::Update(); @@ -873,3 +852,14 @@ int main(int argc, char **argv) { return ret_val; } + +int main(int argc, char **argv) { + try { + return main1(argc, argv); + } catch (std::exception &e) { + std::cerr << "exception: " << e.what() << "\n"; + } catch (...) { + std::cerr << "unknown exception\n"; + } + return 1; +} diff --git a/src/textord/bbgrid.h b/src/textord/bbgrid.h index cced30063d..ddaf7222f8 100644 --- a/src/textord/bbgrid.h +++ b/src/textord/bbgrid.h @@ -364,10 +364,7 @@ class GridSearch { // Sort function to sort a BBC by bounding_box().left(). template -int SortByBoxLeft(const void *void1, const void *void2) { - // The void*s are actually doubly indirected, so get rid of one level. - const BBC *p1 = *static_cast(void1); - const BBC *p2 = *static_cast(void2); +int SortByBoxLeft(const BBC *p1, const BBC *p2) { int result = p1->bounding_box().left() - p2->bounding_box().left(); if (result != 0) { return result; @@ -384,10 +381,7 @@ int SortByBoxLeft(const void *void1, const void *void2) { } template -bool StdSortByBoxLeft(const void *void1, const void *void2) { - // The void*s are actually doubly indirected, so get rid of one level. - const BBC *p1 = *static_cast(void1); - const BBC *p2 = *static_cast(void2); +bool StdSortByBoxLeft(const BBC *p1, const BBC *p2) { int result = p1->bounding_box().left() - p2->bounding_box().left(); if (result != 0) { return result < 0; @@ -405,10 +399,7 @@ bool StdSortByBoxLeft(const void *void1, const void *void2) { // Sort function to sort a BBC by bounding_box().right() in right-to-left order. template -int SortRightToLeft(const void *void1, const void *void2) { - // The void*s are actually doubly indirected, so get rid of one level. - const BBC *p1 = *static_cast(void1); - const BBC *p2 = *static_cast(void2); +int SortRightToLeft(const BBC *p1, const BBC *p2) { int result = p2->bounding_box().right() - p1->bounding_box().right(); if (result != 0) { return result; @@ -425,10 +416,7 @@ int SortRightToLeft(const void *void1, const void *void2) { } template -bool StdSortRightToLeft(const void *void1, const void *void2) { - // The void*s are actually doubly indirected, so get rid of one level. - const BBC *p1 = *static_cast(void1); - const BBC *p2 = *static_cast(void2); +bool StdSortRightToLeft(const BBC *p1, const BBC *p2) { int result = p2->bounding_box().right() - p1->bounding_box().right(); if (result != 0) { return result < 0; @@ -446,10 +434,7 @@ bool StdSortRightToLeft(const void *void1, const void *void2) { // Sort function to sort a BBC by bounding_box().bottom(). template -int SortByBoxBottom(const void *void1, const void *void2) { - // The void*s are actually doubly indirected, so get rid of one level. - const BBC *p1 = *static_cast(void1); - const BBC *p2 = *static_cast(void2); +int SortByBoxBottom(const BBC *p1, const BBC *p2) { int result = p1->bounding_box().bottom() - p2->bounding_box().bottom(); if (result != 0) { return result; diff --git a/src/textord/blkocc.h b/src/textord/blkocc.h index 449cddbb2f..e8d5a1bb41 100644 --- a/src/textord/blkocc.h +++ b/src/textord/blkocc.h @@ -44,7 +44,7 @@ CLASS REGION_OCC ****************************************************************************/ -class REGION_OCC : public ELIST_LINK { +class REGION_OCC : public ELIST::LINK { public: float min_x; // Lowest x in region float max_x; // Highest x in region diff --git a/src/textord/colpartition.h b/src/textord/colpartition.h index 74a6a38ce2..438277542d 100644 --- a/src/textord/colpartition.h +++ b/src/textord/colpartition.h @@ -64,7 +64,7 @@ CLISTIZEH(ColPartition) * to a given y-coordinate range, eventually, a ColPartitionSet of ColPartitions * emerges, which represents the columns over a wide y-coordinate range. */ -class TESS_API ColPartition : public ELIST2_LINK { +class TESS_API ColPartition : public ELIST2::LINK { public: // This empty constructor is here only so that the class can be ELISTIZED. // TODO(rays) change deep_copy in elst.h line 955 to take a callback copier @@ -709,9 +709,7 @@ class TESS_API ColPartition : public ELIST2_LINK { bool IsInSameColumnAs(const ColPartition &part) const; // Sort function to sort by bounding box. - static int SortByBBox(const void *p1, const void *p2) { - const ColPartition *part1 = *static_cast(p1); - const ColPartition *part2 = *static_cast(p2); + static int SortByBBox(const ColPartition *part1, const ColPartition *part2) { int mid_y1 = part1->bounding_box_.y_middle(); int mid_y2 = part2->bounding_box_.y_middle(); if ((part2->bounding_box_.bottom() <= mid_y1 && diff --git a/src/textord/colpartitionset.h b/src/textord/colpartitionset.h index 60a78297fa..53b04177fb 100644 --- a/src/textord/colpartitionset.h +++ b/src/textord/colpartitionset.h @@ -35,7 +35,7 @@ using PartSetVector = std::vector; // Its main use is in holding a candidate partitioning of the width of the // image into columns, where each member ColPartition is a single column. // ColPartitionSets are used in building the column layout of a page. -class ColPartitionSet : public ELIST_LINK { +class ColPartitionSet : public ELIST::LINK { public: ColPartitionSet() = default; explicit ColPartitionSet(ColPartition_LIST *partitions); diff --git a/src/textord/fpchop.h b/src/textord/fpchop.h index 13f4c10aaa..2e8ec39f77 100644 --- a/src/textord/fpchop.h +++ b/src/textord/fpchop.h @@ -24,7 +24,7 @@ namespace tesseract { -class C_OUTLINE_FRAG : public ELIST_LINK { +class C_OUTLINE_FRAG : public ELIST::LINK { public: C_OUTLINE_FRAG() { // empty constructor steps = nullptr; diff --git a/src/textord/makerow.cpp b/src/textord/makerow.cpp index 837a3e3104..3d89b711b6 100644 --- a/src/textord/makerow.cpp +++ b/src/textord/makerow.cpp @@ -105,13 +105,8 @@ const int kMinLeaderCount = 5; * Sort function to sort rows in y from page top. */ static int row_y_order( // sort function - const void *item1, // items to compare - const void *item2) { - // converted ptr - const TO_ROW *row1 = *reinterpret_cast(item1); - // converted ptr - const TO_ROW *row2 = *reinterpret_cast(item2); - + const TO_ROW *row1, // items to compare + const TO_ROW *row2) { if (row1->parallel_c() > row2->parallel_c()) { return -1; } else if (row1->parallel_c() < row2->parallel_c()) { @@ -2540,13 +2535,8 @@ OVERLAP_STATE most_overlapping_row( // find best row * Sort function to sort blobs in x from page left. */ int blob_x_order( // sort function - const void *item1, // items to compare - const void *item2) { - // converted ptr - const BLOBNBOX *blob1 = *reinterpret_cast(item1); - // converted ptr - const BLOBNBOX *blob2 = *reinterpret_cast(item2); - + const BLOBNBOX *blob1, // items to compare + const BLOBNBOX *blob2) { if (blob1->bounding_box().left() < blob2->bounding_box().left()) { return -1; } else if (blob1->bounding_box().left() > blob2->bounding_box().left()) { diff --git a/src/textord/makerow.h b/src/textord/makerow.h index 91668dfabd..cfee8ec667 100644 --- a/src/textord/makerow.h +++ b/src/textord/makerow.h @@ -243,8 +243,8 @@ OVERLAP_STATE most_overlapping_row(TO_ROW_IT *row_it, // iterator bool testing_blob // test stuff ); int blob_x_order( // sort function - const void *item1, // items to compare - const void *item2); + const BLOBNBOX *item1, // items to compare + const BLOBNBOX *item2); void mark_repeated_chars(TO_ROW *row); diff --git a/src/textord/pitsync1.h b/src/textord/pitsync1.h index 5df7ec4889..d81512d660 100644 --- a/src/textord/pitsync1.h +++ b/src/textord/pitsync1.h @@ -31,7 +31,7 @@ namespace tesseract { class FPSEGPT_LIST; -class FPSEGPT : public ELIST_LINK { +class FPSEGPT : public ELIST::LINK { public: FPSEGPT() = default; FPSEGPT( // constructor diff --git a/src/textord/sortflts.h b/src/textord/sortflts.h index 278d8f9074..8c1755b10b 100644 --- a/src/textord/sortflts.h +++ b/src/textord/sortflts.h @@ -23,7 +23,7 @@ namespace tesseract { -class SORTED_FLOAT : public ELIST_LINK { +class SORTED_FLOAT : public ELIST::LINK { friend class SORTED_FLOATS; public: diff --git a/src/textord/tablefind.cpp b/src/textord/tablefind.cpp index 537bec9cb0..46ce4fa433 100644 --- a/src/textord/tablefind.cpp +++ b/src/textord/tablefind.cpp @@ -2099,7 +2099,7 @@ void TableFinder::MakeTableBlocks(ColPartitionGrid *grid, //////// ColSegment code //////// ColSegment::ColSegment() - : ELIST_LINK(), + : ELIST::LINK(), num_table_cells_(0), num_text_cells_(0), type_(COL_UNKNOWN) {} diff --git a/src/textord/tablefind.h b/src/textord/tablefind.h index 2554afdbc1..378b71a255 100644 --- a/src/textord/tablefind.h +++ b/src/textord/tablefind.h @@ -36,7 +36,7 @@ class ColSegment; ELISTIZEH(ColSegment) CLISTIZEH(ColSegment) -class ColSegment : public ELIST_LINK { +class ColSegment : public ELIST::LINK { public: ColSegment(); ~ColSegment() = default; diff --git a/src/textord/tabvector.h b/src/textord/tabvector.h index 2c48d72196..9762009c0b 100644 --- a/src/textord/tabvector.h +++ b/src/textord/tabvector.h @@ -64,7 +64,7 @@ ELISTIZEH(TabConstraint) // on a list of constraints. The list itself is cooperatively owned // by the TabVectors of the constraints on the list and managed // by implicit reference counting via the elements of the list. -class TabConstraint : public ELIST_LINK { +class TabConstraint : public ELIST::LINK { public: // This empty constructor is here only so that the class can be ELISTIZED. // TODO(rays) change deep_copy in elst.h line 955 to take a callback copier @@ -102,7 +102,7 @@ class TabConstraint : public ELIST_LINK { // Class to hold information about a single vector // that represents a tab stop or a rule line. -class TabVector : public ELIST2_LINK { +class TabVector : public ELIST2::LINK { public: // TODO(rays) fix this in elst.h line 1076, where it should use the // copy constructor instead of operator=. @@ -286,9 +286,7 @@ class TabVector : public ELIST2_LINK { } // Sort function for E2LIST::sort to sort by sort_key_. - static int SortVectorsByKey(const void *v1, const void *v2) { - const TabVector *tv1 = *static_cast(v1); - const TabVector *tv2 = *static_cast(v2); + static int SortVectorsByKey(const TabVector *tv1, const TabVector *tv2) { return tv1->sort_key_ - tv2->sort_key_; } diff --git a/src/textord/workingpartset.h b/src/textord/workingpartset.h index f02926967a..ee092decdb 100644 --- a/src/textord/workingpartset.h +++ b/src/textord/workingpartset.h @@ -29,7 +29,7 @@ namespace tesseract { // WorkingPartSet holds a working set of ColPartitions during transformation // from the grid-based storage to regions in logical reading order, and is // therefore only used during construction of the regions. -class WorkingPartSet : public ELIST_LINK { +class WorkingPartSet : public ELIST::LINK { public: explicit WorkingPartSet(ColPartition *column) : column_(column), latest_part_(nullptr), part_it_(&part_set_) {} diff --git a/src/training/pango/pango_font_info.cpp b/src/training/pango/pango_font_info.cpp index aefe73417e..610f42cf23 100644 --- a/src/training/pango/pango_font_info.cpp +++ b/src/training/pango/pango_font_info.cpp @@ -230,7 +230,7 @@ bool PangoFontInfo::CoversUTF8Text(const char *utf8_text, int byte_length) const int len = it.get_utf8(tmp); tmp[len] = '\0'; tlog(2, "'%s' (U+%x) not covered by font\n", tmp, *it); -#if PANGO_VERSION_CHECK(1, 52, 0) +#if PANGO_VERSION_CHECK(1, 50, 4) g_object_unref(coverage); #else pango_coverage_unref(coverage); @@ -239,7 +239,7 @@ bool PangoFontInfo::CoversUTF8Text(const char *utf8_text, int byte_length) const return false; } } -#if PANGO_VERSION_CHECK(1, 52, 0) +#if PANGO_VERSION_CHECK(1, 50, 4) g_object_unref(coverage); #else pango_coverage_unref(coverage); @@ -311,7 +311,7 @@ int PangoFontInfo::DropUncoveredChars(std::string *utf8_text) const { my_strnmove(out, utf8_char, utf8_len); out += utf8_len; } -#if PANGO_VERSION_CHECK(1, 52, 0) +#if PANGO_VERSION_CHECK(1, 50, 4) g_object_unref(coverage); #else pango_coverage_unref(coverage); @@ -615,7 +615,7 @@ int FontUtils::FontScore(const std::unordered_map &ch_map, ch_flags->push_back(covered); } } -#if PANGO_VERSION_CHECK(1, 52, 0) +#if PANGO_VERSION_CHECK(1, 50, 4) g_object_unref(coverage); #else pango_coverage_unref(coverage); diff --git a/src/wordrec/lm_state.h b/src/wordrec/lm_state.h index 14f343e4be..a7ba725753 100644 --- a/src/wordrec/lm_state.h +++ b/src/wordrec/lm_state.h @@ -89,7 +89,7 @@ struct LanguageModelNgramInfo { /// Struct for storing the information about a path in the segmentation graph /// explored by Viterbi search. -struct ViterbiStateEntry : public ELIST_LINK { +struct ViterbiStateEntry : public ELIST::LINK { ViterbiStateEntry(ViterbiStateEntry *pe, BLOB_CHOICE *b, float c, float ol, const LMConsistencyInfo &ci, const AssociateStats &as, LanguageModelFlagsType tcf, LanguageModelDawgInfo *d, LanguageModelNgramInfo *n, @@ -133,9 +133,7 @@ struct ViterbiStateEntry : public ELIST_LINK { } /// Comparator function for sorting ViterbiStateEntry_LISTs in /// non-increasing order of costs. - static int Compare(const void *e1, const void *e2) { - const ViterbiStateEntry *ve1 = *static_cast(e1); - const ViterbiStateEntry *ve2 = *static_cast(e2); + static int Compare(const ViterbiStateEntry *ve1, const ViterbiStateEntry *ve2) { return (ve1->cost < ve2->cost) ? -1 : 1; } inline bool Consistent() const { diff --git a/src/wordrec/wordrec.h b/src/wordrec/wordrec.h index 20bdbcf9e4..f57d6fe985 100644 --- a/src/wordrec/wordrec.h +++ b/src/wordrec/wordrec.h @@ -169,7 +169,7 @@ class SegSearchPending { }; /* ccmain/tstruct.cpp *********************************************************/ -class FRAGMENT : public ELIST_LINK { +class FRAGMENT : public ELIST::LINK { public: FRAGMENT() { // constructor } diff --git a/sw.cpp b/sw.cpp index dfa26a27f6..e989e6e8a9 100644 --- a/sw.cpp +++ b/sw.cpp @@ -16,7 +16,6 @@ void build(Solution &s) libtesseract += "TESS_API"_api; libtesseract += "include/.*"_rr; libtesseract += "src/.+/.*"_rr; - libtesseract -= "src/lstm/.*\\.cc"_rr; libtesseract -= "src/training/.*"_rr; libtesseract.Public += "include"_idir; @@ -335,8 +334,7 @@ void build(Solution &s) auto &tw = add_test("tatweel"); tw += "unittest/util/.*"_rr; - tw += "unittest/third_party/.*"_rr; - tw -= "unittest/third_party/googletest/.*"_rr; + tw += "unittest/third_party/utf/.*"_rr; } } diff --git a/test b/test index 2761899921..232ff181c6 160000 --- a/test +++ b/test @@ -1 +1 @@ -Subproject commit 2761899921c08014cf9dbf3b63592237fb9e6ecb +Subproject commit 232ff181c66516116ec0e84c4963f70de15050fd diff --git a/unittest/CMakeLists.txt b/unittest/CMakeLists.txt new file mode 100644 index 0000000000..6a91c62fc2 --- /dev/null +++ b/unittest/CMakeLists.txt @@ -0,0 +1,110 @@ +# find_package(GTest REQUIRED) +include(GoogleTest) # Todo install GoogleTests? + +# Set common include directories +set(COMMON_INCLUDE_DIRS + ${CMAKE_CURRENT_BINARY_DIR}/../src/training + ${CMAKE_CURRENT_SOURCE_DIR}/../src/ccutil + ${CMAKE_CURRENT_SOURCE_DIR}/../src/ccstruct + ${CMAKE_CURRENT_SOURCE_DIR}/../src/viewer + ${CMAKE_CURRENT_SOURCE_DIR}/../include + ${CMAKE_CURRENT_SOURCE_DIR}/../src/training/unicharset + ${CMAKE_CURRENT_SOURCE_DIR}/../src/training/common + ${CMAKE_CURRENT_SOURCE_DIR}/third_party/googletest/googlemock/include) + +if (MSVC) + set(TESSBIN_DIR ${EXECUTABLE_OUTPUT_PATH}/$) +else() + set(TESSBIN_DIR ${EXECUTABLE_OUTPUT_PATH}) +endif() + +# Set common compile definitions +set(COMMON_COMPILE_DEFINITIONS + "-DTESTING_DIR=\"${CMAKE_CURRENT_SOURCE_DIR}/../test/testing\"" + "-DTESSDATA_DIR=\"${CMAKE_CURRENT_SOURCE_DIR}/../tessdata\"" + "-DTESSBIN_DIR=\"${TESSBIN_DIR}\"" + "-DTESTDATA_DIR=\"${CMAKE_CURRENT_SOURCE_DIR}/../test/testdata\"" + "-DLANGDATA_DIR=\"${CMAKE_CURRENT_SOURCE_DIR}/../langdata_lstm\"") + +file( + GLOB TEST_SOURCES + RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} + "*.cc") + +set(COMMON_LINK_LIBS libtesseract GTest::gtest_main common_training + unicharset_training) + +set(TRAINING_TESTS + commandlineflags_test.cc + dawg_test.cc + lstm_recode_test.cc + lstm_squashed_test.cc + lstm_test.cc + lstm_test.cc + normstrngs_test.cc + unichar_test.cc + unicharcompress_test.cc + unicharset_test.cc + validate_grapheme_test.cc + validate_indic_test.cc + validate_khmer_test.cc + validate_myanmar_test.cc + validator_test.cc) + +set(PANGO_TESTS ligature_table_test.cc pango_font_info_test.cc + pango_font_info_test.cc stringrenderer_test.cc) + +set(LEGACY_TESTS + applybox_test.cc + bitvector_test.cc + equationdetect_test.cc + indexmapbidi_test.cc + intfeaturemap_test.cc + mastertrainer_test.cc + osd_test.cc + params_model_test.cc + shapetable_test.cc) + +if(BUILD_TRAINING_TOOLS AND PANGO_FOUND) + list(APPEND COMMON_INCLUDE_DIRS + ${CMAKE_CURRENT_SOURCE_DIR}/../src/training/pango ${PANGO_INCLUDE_DIRS}) + +else() + list(REMOVE_ITEM TEST_SOURCES ${PANGO_TESTS}) +endif() + +if(DISABLED_LEGACY_ENGINE) + list(REMOVE_ITEM TEST_SOURCES ${LEGACY_TESTS}) +endif() + +if(NOT BUILD_TRAINING_TOOLS) + list(REMOVE_ITEM TEST_SOURCES ${TRAINING_TESTS}) +endif() + +set(TATWEEL_TEST_EXTRA_SRC util/utf8/unilib.cc util/utf8/unicodetext.cc + third_party/utf/rune.c) + +message(STATUS "Enabled tests: ${TEST_SOURCES}") + +foreach(test_source IN LISTS TEST_SOURCES) + get_filename_component(test_name ${test_source} NAME_WE) + if(${test_source} IN_LIST PANGO_TESTS) + list(APPEND COMMON_LINK_LIBS pango_training ${PANGO_LIBRARIES}) + endif() + if(${test_name} MATCHES "tatweel_test") + list(APPEND test_source ${TATWEEL_TEST_EXTRA_SRC}) + list(APPEND COMMON_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR} + ${CMAKE_CURRENT_SOURCE_DIR}/util/utf8) + endif() + add_executable(${test_name} ${test_source}) + if(${test_name} MATCHES "progress_test") + target_link_libraries(${test_name} PRIVATE GTest::gmock) + endif() + target_compile_definitions(${test_name} PRIVATE ${COMMON_COMPILE_DEFINITIONS}) + target_include_directories(${test_name} PRIVATE ${COMMON_INCLUDE_DIRS}) + target_link_libraries(${test_name} PRIVATE ${COMMON_LINK_LIBS}) + add_test(NAME ${test_name} COMMAND ${test_name}) +endforeach() + +# Discover tests gtest_discover_tests(apiexample_test baseapi_test +# baseapi_thread_test) add_test(baseapi_gtests baseapi_test.cc) diff --git a/unittest/README.md b/unittest/README.md index 2d7742993a..113e82831d 100644 --- a/unittest/README.md +++ b/unittest/README.md @@ -82,6 +82,9 @@ To run the tests, do the following in tesseract folder ``` autoreconf -fiv git submodule update --init +git clone https://github.com/egorpugin/tessdata tessdata_unittest --depth 1 +cp tessdata_unittest/fonts/* test/testing/ +mv tessdata_unittest/* ../ export TESSDATA_PREFIX=/prefix/to/path/to/tessdata make check ``` diff --git a/unittest/intsimdmatrix_test.cc b/unittest/intsimdmatrix_test.cc index 95688eed5a..c3e180997d 100644 --- a/unittest/intsimdmatrix_test.cc +++ b/unittest/intsimdmatrix_test.cc @@ -22,7 +22,6 @@ #include "include_gunit.h" #include "matrix.h" #include "simddetect.h" -#include "tprintf.h" namespace tesseract { @@ -92,9 +91,9 @@ class IntSimdMatrixTest : public ::testing::Test { } // Compare sum of all results with expected value. #ifdef FAST_FLOAT - EXPECT_FLOAT_EQ(total, 337852.16f); + EXPECT_FLOAT_EQ(total, -423236.53f); #else - EXPECT_FLOAT_EQ(total, 337849.39354684710); + EXPECT_FLOAT_EQ(total, -423243.392011); #endif } diff --git a/unittest/linlsq_test.cc b/unittest/linlsq_test.cc index cac5dba24e..e278c9f999 100644 --- a/unittest/linlsq_test.cc +++ b/unittest/linlsq_test.cc @@ -103,15 +103,15 @@ TEST_F(LLSQTest, Vectors) { // sqrt( sum (!nvec * (x_i - x_avg))^2 / n) TEST_F(LLSQTest, RmsOrthWorksAsIntended) { std::vector pts; - pts.emplace_back(0.56, 0.95); - pts.emplace_back(0.09, 0.09); - pts.emplace_back(0.13, 0.77); - pts.emplace_back(0.16, 0.83); - pts.emplace_back(0.45, 0.79); - VerifyRmsOrth(pts, FCOORD(1, 0)); - VerifyRmsOrth(pts, FCOORD(1, 1)); - VerifyRmsOrth(pts, FCOORD(1, 2)); - VerifyRmsOrth(pts, FCOORD(2, 1)); + pts.emplace_back(0.56f, 0.95f); + pts.emplace_back(0.09f, 0.09f); + pts.emplace_back(0.13f, 0.77f); + pts.emplace_back(0.16f, 0.83f); + pts.emplace_back(0.45f, 0.79f); + VerifyRmsOrth(pts, FCOORD(1.f, 0.f)); + VerifyRmsOrth(pts, FCOORD(1.f, 1.f)); + VerifyRmsOrth(pts, FCOORD(1.f, 2.f)); + VerifyRmsOrth(pts, FCOORD(2.f, 1.f)); } } // namespace tesseract diff --git a/unittest/list_test.cc b/unittest/list_test.cc index 212994e4b0..a35ae27e21 100644 --- a/unittest/list_test.cc +++ b/unittest/list_test.cc @@ -25,19 +25,19 @@ class ListTest : public ::testing::Test { const size_t ListSize = 5; }; -class Clst : public CLIST_LINK { +class Clst { public: Clst(unsigned n) : value(n) {} unsigned value; }; -class Elst : public ELIST_LINK { +class Elst : public ELIST::LINK { public: Elst(unsigned n) : value(n) {} unsigned value; }; -class Elst2 : public ELIST2_LINK { +class Elst2 : public ELIST2::LINK { public: Elst2(unsigned n) : value(n) {} unsigned value; @@ -51,7 +51,7 @@ TEST_F(ListTest, TestCLIST) { Clst_CLIST list; EXPECT_TRUE(list.empty()); EXPECT_EQ(list.length(), 0); - auto it = CLIST_ITERATOR(&list); + auto it = Clst_CLIST::ITERATOR(&list); for (unsigned i = 0; i < ListSize; i++) { auto *lst = new Clst(i); it.add_to_end(lst); @@ -82,7 +82,7 @@ TEST_F(ListTest, TestELIST) { Elst_LIST list; EXPECT_TRUE(list.empty()); EXPECT_EQ(list.length(), 0); - auto it = ELIST_ITERATOR(&list); + auto it = ELIST::ITERATOR(&list); for (unsigned i = 0; i < ListSize; i++) { auto *elst = new Elst(i); it.add_to_end(elst); @@ -113,7 +113,7 @@ TEST_F(ListTest, TestELIST2) { Elst2_LIST list; EXPECT_TRUE(list.empty()); EXPECT_EQ(list.length(), 0); - auto it = ELIST2_ITERATOR(&list); + auto it = ELIST2::ITERATOR(&list); for (unsigned i = 0; i < ListSize; i++) { auto *lst = new Elst2(i); it.add_to_end(lst); diff --git a/unittest/lstm_test.h b/unittest/lstm_test.h index d1de2eb7fd..380c3476d7 100644 --- a/unittest/lstm_test.h +++ b/unittest/lstm_test.h @@ -19,7 +19,6 @@ #include "include_gunit.h" #include "helpers.h" -#include "tprintf.h" #include "functions.h" #include "lang_model_helpers.h" diff --git a/unittest/third_party/googletest b/unittest/third_party/googletest index b514bdc898..7d76a231b0 160000 --- a/unittest/third_party/googletest +++ b/unittest/third_party/googletest @@ -1 +1 @@ -Subproject commit b514bdc898e2951020cbdca1304b75f5950d1f59 +Subproject commit 7d76a231b0e29caf86e68d1df858308cd53b2a66 diff --git a/unittest/unicharcompress_test.cc b/unittest/unicharcompress_test.cc index 98f1e3bcc6..e829ca6ac9 100644 --- a/unittest/unicharcompress_test.cc +++ b/unittest/unicharcompress_test.cc @@ -16,7 +16,6 @@ #include "include_gunit.h" #include "log.h" // for LOG #include "serialis.h" -#include "tprintf.h" #include "unicharcompress.h" namespace tesseract {