From f2c25c0c9a31b36d49a497bc2bbeea5085ff5c8c Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Fri, 24 Nov 2023 12:49:26 +1100 Subject: [PATCH 001/348] Test can run in Debug build on windows. --- tests/CMakeLists.txt | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index abcc7ede..b597c265 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -43,8 +43,15 @@ include(GoogleTest) gtest_add_tests(TARGET test_executables) if (WIN32) - add_custom_command(TARGET test_executables POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy "../bin/Release/gtest.dll" "./Release/" - COMMAND ${CMAKE_COMMAND} -E copy "../bin/Release/gtest_main.dll" "./Release/" - ) + if(CMAKE_BUILD_TYPE STREQUAL "Debug") + add_custom_command(TARGET test_executables POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy "../bin/Debug/gtest.dll" "./Debug/" + COMMAND ${CMAKE_COMMAND} -E copy "../bin/Debug/gtest_main.dll" "./Debug/" + ) + elseif(CMAKE_BUILD_TYPE STREQUAL "Release") + add_custom_command(TARGET test_executables POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy "../bin/Release/gtest.dll" "./Release/" + COMMAND ${CMAKE_COMMAND} -E copy "../bin/Release/gtest_main.dll" "./Release/" + ) + endif() endif() From 7a8415e5ebb5cb94c2f321a00c7ccc79659e835b Mon Sep 17 00:00:00 2001 From: aous72 Date: Mon, 18 Dec 2023 16:38:51 +1100 Subject: [PATCH 002/348] Minor improvement to CMakeLists.txt --- .gitignore | 1 + CMakeLists.txt | 56 +++++++++++++++------------------------------- ojph_libname.cmake | 37 ++++++++++++++++++++++++++++++ 3 files changed, 56 insertions(+), 38 deletions(-) create mode 100644 ojph_libname.cmake diff --git a/.gitignore b/.gitignore index 627f2909..c101eb7d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ .DS_Store mytest/* others/* +lib/* .vscode build.sh \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index dbdeff43..910f9b44 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,30 +1,14 @@ cmake_minimum_required(VERSION 3.11.0) +## project project (openjph DESCRIPTION "Open source implementation of JPH" LANGUAGES CXX) +set_property(GLOBAL PROPERTY USE_FOLDERS ON) ################################################################################################ # Building OpenJPH ################################################################################################ -############################################################ -# Parse version file -# credit: https://stackoverflow.com/a/47084079 - -file(READ "${CMAKE_CURRENT_SOURCE_DIR}/src/core/common/ojph_version.h" VERFILE) -if (NOT VERFILE) - message(FATAL_ERROR "Failed to parse ojph_version.h!") -endif() - -string(REGEX MATCH "OPENJPH_VERSION_MAJOR ([0-9]*)" _ ${VERFILE}) -set(OPENJPH_VERSION_MAJOR ${CMAKE_MATCH_1}) -string(REGEX MATCH "OPENJPH_VERSION_MINOR ([0-9]*)" _ ${VERFILE}) -set(OPENJPH_VERSION_MINOR ${CMAKE_MATCH_1}) -string(REGEX MATCH "OPENJPH_VERSION_PATCH ([a-z0-9]*)" _ ${VERFILE}) -set(OPENJPH_VERSION_PATCH ${CMAKE_MATCH_1}) - -set(OPENJPH_VERSION "${OPENJPH_VERSION_MAJOR}.${OPENJPH_VERSION_MINOR}.${OPENJPH_VERSION_PATCH}") -############################################################ - +## options option(OJPH_DISABLE_INTEL_SIMD "Disables the use of SIMD instructions and associated files" OFF) option(OJPH_ENABLE_INTEL_AVX512 "enables the use of AVX512 SIMD instructions and associated files" ON) option(BUILD_SHARED_LIBS "Shared Libraries" ON) @@ -45,6 +29,7 @@ set(CMAKE_CXX_FLAGS_ASAN CACHE STRING "Flags used by the C++ compiler during AddressSanitizer builds." FORCE) + ## build type if (NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE "Release") message( STATUS "To use AddressSanitizer, use \"cmake .. -DCMAKE_BUILD_TYPE=asan\"" ) @@ -85,6 +70,13 @@ endif() set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/../bin) set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/../bin) +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/../lib) +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_DEBUG ${CMAKE_BINARY_DIR}/../bin) +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_DEBUG ${CMAKE_BINARY_DIR}/../bin) +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY_DEBUG ${CMAKE_BINARY_DIR}/../lib) +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_RELEASE ${CMAKE_BINARY_DIR}/../bin) +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE ${CMAKE_BINARY_DIR}/../bin) +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY_RELEASE ${CMAKE_BINARY_DIR}/../lib) include_directories(src/core/common) include_directories(src/apps/common) @@ -150,25 +142,13 @@ else() add_library(openjph ${SOURCES}) endif() +## include library version/name +include(ojph_libname.cmake) + target_include_directories(openjph PUBLIC src/core/common) target_compile_definitions(openjph PUBLIC _FILE_OFFSET_BITS=64) -if (OPENJPH_VERSION) - if (WIN32) - set_target_properties(openjph - PROPERTIES - OUTPUT_NAME "openjph.${OPENJPH_VERSION_MAJOR}.${OPENJPH_VERSION_MINOR}") - else() - set_target_properties(openjph - PROPERTIES - SOVERSION "${OPENJPH_VERSION_MAJOR}.${OPENJPH_VERSION_MINOR}" - VERSION "${OPENJPH_VERSION}") - endif() -else() - message(FATAL_ERROR "OPENJPH_VERSION is not set") -endif() - if (MSVC) set_source_files_properties(src/core/codestream/ojph_codestream_avx.cpp PROPERTIES COMPILE_FLAGS "/arch:AVX") set_source_files_properties(src/core/codestream/ojph_codestream_avx2.cpp PROPERTIES COMPILE_FLAGS "/arch:AVX2") @@ -238,7 +218,7 @@ endif() if(OJPH_BUILD_EXECUTABLES) add_executable(ojph_expand ${OJPH_EXPAND}) - add_executable(ojph_compress ${OJPH_COMPRESS}) + add_executable(ojph_compress ${OJPH_COMPRESS}) endif() if (MSVC) @@ -264,11 +244,11 @@ endif() ################################################################################################ if(OJPH_BUILD_EXECUTABLES) - install(TARGETS ojph_expand + install(TARGETS ojph_expand DESTINATION bin) - install(TARGETS ojph_compress - DESTINATION bin) + install(TARGETS ojph_compress + DESTINATION bin) endif() include(GNUInstallDirs) diff --git a/ojph_libname.cmake b/ojph_libname.cmake new file mode 100644 index 00000000..8fc999af --- /dev/null +++ b/ojph_libname.cmake @@ -0,0 +1,37 @@ +################################################################################################ +# Generating library name +################################################################################################ + +############################################################ +# Parse version file +# credit: https://stackoverflow.com/a/47084079 + +file(READ "${CMAKE_CURRENT_SOURCE_DIR}/src/core/common/ojph_version.h" VERFILE) +if (NOT VERFILE) + message(FATAL_ERROR "Failed to parse ojph_version.h!") +endif() + +string(REGEX MATCH "OPENJPH_VERSION_MAJOR ([0-9]*)" _ ${VERFILE}) +set(OPENJPH_VERSION_MAJOR ${CMAKE_MATCH_1}) +string(REGEX MATCH "OPENJPH_VERSION_MINOR ([0-9]*)" _ ${VERFILE}) +set(OPENJPH_VERSION_MINOR ${CMAKE_MATCH_1}) +string(REGEX MATCH "OPENJPH_VERSION_PATCH ([a-z0-9]*)" _ ${VERFILE}) +set(OPENJPH_VERSION_PATCH ${CMAKE_MATCH_1}) + +set(OPENJPH_VERSION "${OPENJPH_VERSION_MAJOR}.${OPENJPH_VERSION_MINOR}.${OPENJPH_VERSION_PATCH}") +############################################################ + +if (OPENJPH_VERSION) + if (WIN32) + set_target_properties(openjph + PROPERTIES + OUTPUT_NAME "openjph.${OPENJPH_VERSION_MAJOR}.${OPENJPH_VERSION_MINOR}") + else() + set_target_properties(openjph + PROPERTIES + SOVERSION "${OPENJPH_VERSION_MAJOR}.${OPENJPH_VERSION_MINOR}" + VERSION "${OPENJPH_VERSION}") + endif() +else() + message(FATAL_ERROR "OPENJPH_VERSION is not set") +endif() From 16a82095baee7a9dc247a41fe96f8e72debfdb3d Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Wed, 27 Dec 2023 14:10:57 +1100 Subject: [PATCH 003/348] Removed unneeded file --- CMakeLists.txt | 14 +++++++------- tests/test.py | 15 --------------- 2 files changed, 7 insertions(+), 22 deletions(-) delete mode 100644 tests/test.py diff --git a/CMakeLists.txt b/CMakeLists.txt index 910f9b44..00bff012 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -70,13 +70,13 @@ endif() set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/../bin) set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/../bin) -set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/../lib) -set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_DEBUG ${CMAKE_BINARY_DIR}/../bin) -set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_DEBUG ${CMAKE_BINARY_DIR}/../bin) -set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY_DEBUG ${CMAKE_BINARY_DIR}/../lib) -set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_RELEASE ${CMAKE_BINARY_DIR}/../bin) -set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE ${CMAKE_BINARY_DIR}/../bin) -set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY_RELEASE ${CMAKE_BINARY_DIR}/../lib) +# set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/../lib) +# set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_DEBUG ${CMAKE_BINARY_DIR}/../bin) +# set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_DEBUG ${CMAKE_BINARY_DIR}/../bin) +# set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY_DEBUG ${CMAKE_BINARY_DIR}/../lib) +# set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_RELEASE ${CMAKE_BINARY_DIR}/../bin) +# set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE ${CMAKE_BINARY_DIR}/../bin) +# set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY_RELEASE ${CMAKE_BINARY_DIR}/../lib) include_directories(src/core/common) include_directories(src/apps/common) diff --git a/tests/test.py b/tests/test.py deleted file mode 100644 index fb8c0df9..00000000 --- a/tests/test.py +++ /dev/null @@ -1,15 +0,0 @@ -#!/usr/bin/python3 - -import numpy as np -import cv2 -import matplotlib.pyplot as plt - -# import os -# os.system('') -# subprocess.run - -print('Testing in Python') - -im = cv2.imread("../../ARRI_AlexaDrums_3840x2160p_24_12b_P3_444_00000.ppm", cv2.IMREAD_UNCHANGED ); -hist, bin_edges = np.histogram(im.astype('int32'), bins=range(4096)); -_ = plt.hist(hist, bin_edges); From 91724a84c482e86396b32f21ac9a40369e9e098e Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Wed, 27 Dec 2023 20:24:16 +1100 Subject: [PATCH 004/348] Re-arranging CMake files. --- CMakeLists.txt | 216 +++----------------------- ojph_libname.cmake | 12 +- src/apps/CMakeLists.txt | 51 ++++++ src/apps/ojph_compress/CMakeLists.txt | 38 +++++ src/apps/ojph_expand/CMakeLists.txt | 38 +++++ src/core/CMakeLists.txt | 106 +++++++++++++ tests/CMakeLists.txt | 4 +- tests/mse_pae.cmake | 22 +-- tests/mse_pae.cpp | 4 +- tests/test_executables.cpp | 4 +- 10 files changed, 277 insertions(+), 218 deletions(-) create mode 100644 src/apps/CMakeLists.txt create mode 100644 src/apps/ojph_compress/CMakeLists.txt create mode 100644 src/apps/ojph_expand/CMakeLists.txt create mode 100644 src/core/CMakeLists.txt diff --git a/CMakeLists.txt b/CMakeLists.txt index 00bff012..3cf131cc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -16,6 +16,13 @@ option(OJPH_ENABLE_TIFF_SUPPORT "Enables input and output support for TIFF files option(OJPH_BUILD_TESTS "Enables building test code" OFF) option(OJPH_BUILD_EXECUTABLES "Enables building command line executables" ON) +## Setting some of the options if EMSCRIPTEN is the compiler +if(EMSCRIPTEN) + set(OJPH_DISABLE_INTEL_SIMD ON) + set(BUILD_SHARED_LIBS OFF) + set(OJPH_ENABLE_TIFF_SUPPORT OFF) +endif() + # This is related to how the timestamp is set for URL downloaded files. # Set DOWNLOAD_EXTRACT_TIMESTAMP if (${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.24.0") @@ -29,13 +36,17 @@ set(CMAKE_CXX_FLAGS_ASAN CACHE STRING "Flags used by the C++ compiler during AddressSanitizer builds." FORCE) - ## build type +## build type if (NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE "Release") message( STATUS "To use AddressSanitizer, use \"cmake .. -DCMAKE_BUILD_TYPE=asan\"" ) endif() message(STATUS "Building ${CMAKE_BUILD_TYPE}") +## Library name +include(ojph_libname.cmake) + +## C++ version and flags set(CMAKE_CXX_STANDARD 14) if (MSVC) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc /D \"_CRT_SECURE_NO_WARNINGS\"") @@ -44,215 +55,38 @@ if (CMAKE_CXX_COMPILER_ID MATCHES "Clang|GNU") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexceptions -Wall -Wextra -Wconversion -Wunused-parameter") endif() +## The option OJPH_DISABLE_INTEL_SIMD and OJPH_ENABLE_INTEL_AVX512 if (OJPH_DISABLE_INTEL_SIMD) if (MSVC) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /D \"OJPH_DISABLE_INTEL_SIMD\"") else() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DOJPH_DISABLE_INTEL_SIMD") endif() +elseif (OJPH_ENABLE_INTEL_AVX512) + if (MSVC) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /D \"OJPH_ENABLE_INTEL_AVX512\"") + else() + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DOJPH_ENABLE_INTEL_AVX512") + endif() endif() -if (OJPH_ENABLE_INTEL_AVX512) - if (MSVC) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /D \"OJPH_ENABLE_INTEL_AVX512\"") - else() - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DOJPH_ENABLE_INTEL_AVX512") - endif() -endif() - +## The option BUILD_SHARED_LIBS if (BUILD_SHARED_LIBS AND MSVC) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /D \"OJPH_BUILD_SHARED_LIBRARY\"") endif() -if (OJPH_CODE_COVERAGE AND NOT MSVC) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --coverage") +## Build library and applications +add_subdirectory(src/core) +if (OJPH_BUILD_EXECUTABLES) + add_subdirectory(src/apps) endif() -set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/../bin) -set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/../bin) -# set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/../lib) -# set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_DEBUG ${CMAKE_BINARY_DIR}/../bin) -# set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_DEBUG ${CMAKE_BINARY_DIR}/../bin) -# set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY_DEBUG ${CMAKE_BINARY_DIR}/../lib) -# set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_RELEASE ${CMAKE_BINARY_DIR}/../bin) -# set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE ${CMAKE_BINARY_DIR}/../bin) -# set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY_RELEASE ${CMAKE_BINARY_DIR}/../lib) - -include_directories(src/core/common) -include_directories(src/apps/common) - -file(GLOB CODESTREAM "src/core/codestream/*.cpp" "src/core/codestream/*.h") -file(GLOB CODESTREAM_SSE "src/core/codestream/*_sse.cpp") -file(GLOB CODESTREAM_SSE2 "src/core/codestream/*_sse2.cpp") -file(GLOB CODESTREAM_AVX "src/core/codestream/*_avx.cpp") -file(GLOB CODESTREAM_AVX2 "src/core/codestream/*_avx2.cpp") -file(GLOB CODESTREAM_WASM "src/core/codestream/*_wasm.cpp") -file(GLOB CODING "src/core/coding/*.cpp" "src/core/coding/*.h") -file(GLOB CODING_SSSE3 "src/core/coding/*_ssse3.cpp") -file(GLOB CODING_WASM "src/core/coding/*_wasm.cpp") -file(GLOB CODING_AVX512 "src/core/coding/*_avx512.cpp") -file(GLOB COMMON "src/core/common/*.h") -file(GLOB OTHERS "src/core/others/*.cpp") -file(GLOB TRANSFORM "src/core/transform/*.cpp" "src/core/transform/*.h") -file(GLOB TRANSFORM_SSE "src/core/transform/*_sse.cpp") -file(GLOB TRANSFORM_SSE2 "src/core/transform/*_sse2.cpp") -file(GLOB TRANSFORM_AVX "src/core/transform/*_avx.cpp") -file(GLOB TRANSFORM_AVX2 "src/core/transform/*_avx2.cpp") -file(GLOB TRANSFORM_WASM "src/core/transform/*_wasm.cpp") - -list(REMOVE_ITEM CODESTREAM ${CODESTREAM_SSE} ${CODESTREAM_SSE2} ${CODESTREAM_AVX} ${CODESTREAM_AVX2} ${CODESTREAM_WASM}) -list(REMOVE_ITEM CODING ${CODING_SSSE3} ${CODING_WASM} ${CODING_AVX512}) -list(REMOVE_ITEM TRANSFORM ${TRANSFORM_SSE} ${TRANSFORM_SSE2} ${TRANSFORM_AVX} ${TRANSFORM_AVX2} ${TRANSFORM_WASM}) -list(APPEND SOURCES ${CODESTREAM} ${CODING} ${COMMON} ${OTHERS} ${TRANSFORM}) - -source_group("codestream" FILES ${CODESTREAM}) -source_group("coding" FILES ${CODING}) -source_group("common" FILES ${COMMON}) -source_group("others" FILES ${OTHERS}) -source_group("transform" FILES ${TRANSFORM}) - -configure_file( - "${CMAKE_CURRENT_SOURCE_DIR}/src/pkg-config.pc.cmake" - "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/${PROJECT_NAME}.pc" -) - -if(EMSCRIPTEN) - set(OJPH_DISABLE_INTEL_SIMD ON) - set(BUILD_SHARED_LIBS OFF) - set(OJPH_ENABLE_TIFF_SUPPORT OFF) - add_compile_options(-std=c++11 -O3 -fexceptions -DOJPH_DISABLE_INTEL_SIMD) - add_library(openjph ${SOURCES}) - add_library(openjphsimd ${SOURCES} ${CODESTREAM_WASM} ${CODING_WASM} ${TRANSFORM_WASM}) - target_include_directories(openjph PUBLIC src/core/common) - target_include_directories(openjphsimd PUBLIC src/core/common) - target_compile_options(openjphsimd PRIVATE -DOJPH_ENABLE_WASM_SIMD -msimd128) - source_group("codestream" FILES ${CODESTREAM_WASM}) - source_group("coding" FILES ${CODING_WASM}) - source_group("transform" FILES ${TRANSFORM_WASM}) -elseif(NOT OJPH_DISABLE_INTEL_SIMD) - add_library(openjph ${SOURCES} ${CODESTREAM_SSE} ${CODESTREAM_SSE2} ${CODESTREAM_AVX} ${CODESTREAM_AVX2} ${CODING_SSSE3} ${TRANSFORM_SSE} ${TRANSFORM_SSE2} ${TRANSFORM_AVX} ${TRANSFORM_AVX2}) - source_group("codestream" FILES ${CODESTREAM_SSE} ${CODESTREAM_SSE2} ${CODESTREAM_AVX} ${CODESTREAM_AVX2}) - source_group("coding" FILES ${CODING_SSSE3}) - source_group("transform" FILES ${TRANSFORM_SSE} ${TRANSFORM_SSE2} ${TRANSFORM_AVX} ${TRANSFORM_AVX2}) - if (OJPH_ENABLE_INTEL_AVX512) - target_sources(openjph PRIVATE ${CODING_AVX512}) - source_group("coding" FILES ${CODING_AVX512}) - endif() -else() - add_library(openjph ${SOURCES}) -endif() - -## include library version/name -include(ojph_libname.cmake) - -target_include_directories(openjph PUBLIC src/core/common) - -target_compile_definitions(openjph PUBLIC _FILE_OFFSET_BITS=64) - -if (MSVC) - set_source_files_properties(src/core/codestream/ojph_codestream_avx.cpp PROPERTIES COMPILE_FLAGS "/arch:AVX") - set_source_files_properties(src/core/codestream/ojph_codestream_avx2.cpp PROPERTIES COMPILE_FLAGS "/arch:AVX2") - set_source_files_properties(src/core/coding/ojph_block_encoder_avx512.cpp PROPERTIES COMPILE_FLAGS "/arch:AVX512") - set_source_files_properties(src/core/transform/ojph_colour_avx.cpp PROPERTIES COMPILE_FLAGS "/arch:AVX") - set_source_files_properties(src/core/transform/ojph_colour_avx2.cpp PROPERTIES COMPILE_FLAGS "/arch:AVX2") - set_source_files_properties(src/core/transform/ojph_transform_avx.cpp PROPERTIES COMPILE_FLAGS "/arch:AVX") - set_source_files_properties(src/core/transform/ojph_transform_avx2.cpp PROPERTIES COMPILE_FLAGS "/arch:AVX2") -else() - set_source_files_properties(src/core/codestream/ojph_codestream_avx.cpp PROPERTIES COMPILE_FLAGS -mavx) - set_source_files_properties(src/core/codestream/ojph_codestream_avx2.cpp PROPERTIES COMPILE_FLAGS -mavx2) - set_source_files_properties(src/core/coding/ojph_block_decoder_ssse3.cpp PROPERTIES COMPILE_FLAGS -mssse3) - set_source_files_properties(src/core/coding/ojph_block_encoder_avx512.cpp PROPERTIES COMPILE_FLAGS -mavx512cd) - set_source_files_properties(src/core/transform/ojph_colour_avx.cpp PROPERTIES COMPILE_FLAGS -mavx) - set_source_files_properties(src/core/transform/ojph_colour_avx2.cpp PROPERTIES COMPILE_FLAGS -mavx2) - set_source_files_properties(src/core/transform/ojph_transform_avx.cpp PROPERTIES COMPILE_FLAGS -mavx) - set_source_files_properties(src/core/transform/ojph_transform_avx2.cpp PROPERTIES COMPILE_FLAGS -mavx2) -endif() - -############################################################ -if( OJPH_ENABLE_TIFF_SUPPORT ) - - if( WIN32 ) - - set(TIFF_INCLUDE_DIR "C:\\Program Files\\tiff\\include" CACHE PATH "the directory containing the TIFF headers") - set(TIFF_LIBRARY_DEBUG "C:\\Program Files\\tiff\\lib\\tiffd.lib" CACHE FILEPATH "the path to the TIFF library for debug configurations") - set(TIFF_LIBRARY_RELEASE "C:\\Program Files\\tiff\\lib\\tiff.lib" CACHE FILEPATH "the path to the TIFF library for release configurations") - set(TIFFXX_LIBRARY_DEBUG "C:\\Program Files\\tiff\\lib\\tiffxxd.lib" CACHE FILEPATH "the path to the TIFFXX library for debug configurations") - set(TIFFXX_LIBRARY_RELEASE "C:\\Program Files\\tiff\\lib\\tiffxx.lib" CACHE FILEPATH "the path to the TIFFXX library for release configurations") - - message( STATUS "WIN32 detected: Setting CMakeCache TIFF values as follows, use CMake-gui Advanced to modify them" ) - message( STATUS " TIFF_INCLUDE_DIR : \"${TIFF_INCLUDE_DIR}\" " ) - message( STATUS " TIFF_LIBRARY_DEBUG : \"${TIFF_LIBRARY_DEBUG}\" " ) - message( STATUS " TIFF_LIBRARY_RELEASE : \"${TIFF_LIBRARY_RELEASE}\" " ) - message( STATUS " TIFFXX_LIBRARY_DEBUG : \"${TIFFXX_LIBRARY_DEBUG}\" " ) - message( STATUS " TIFFXX_LIBRARY_RELEASE : \"${TIFFXX_LIBRARY_RELEASE}\" " ) - - endif( WIN32 ) - - FIND_PACKAGE( TIFF ) - - if( TIFF_FOUND ) - set(USE_TIFF TRUE CACHE BOOL "Add TIFF support") - include_directories( ${TIFF_INCLUDE_DIR} ) - if (MSVC) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /D \"OJPH_ENABLE_TIFF_SUPPORT\"") - else() - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DOJPH_ENABLE_TIFF_SUPPORT") - endif() - #include_directories(${CMAKE_BINARY_DIR}/libtiff) # for tiffconf.h on windows - endif( TIFF_FOUND ) - -endif() -############################################################ - -set(OJPH_EXPAND src/apps/ojph_expand/ojph_expand.cpp src/apps/others/ojph_img_io.cpp) -set(OJPH_COMPRESS src/apps/ojph_compress/ojph_compress.cpp src/apps/others/ojph_img_io.cpp) -set(OJPH_IMG_IO_SSE41 src/apps/others/ojph_img_io_sse41.cpp) -set(OJPH_IMG_IO_AVX2 src/apps/others/ojph_img_io_avx2.cpp) - -if(NOT OJPH_DISABLE_INTEL_SIMD) - list(APPEND OJPH_EXPAND ${OJPH_IMG_IO_SSE41}) - list(APPEND OJPH_EXPAND ${OJPH_IMG_IO_AVX2}) - list(APPEND OJPH_COMPRESS ${OJPH_IMG_IO_SSE41}) - list(APPEND OJPH_COMPRESS ${OJPH_IMG_IO_AVX2}) -endif() - -if(OJPH_BUILD_EXECUTABLES) - add_executable(ojph_expand ${OJPH_EXPAND}) - add_executable(ojph_compress ${OJPH_COMPRESS}) -endif() - -if (MSVC) - set_source_files_properties(src/apps/others/ojph_img_io_avx2.cpp PROPERTIES COMPILE_FLAGS "/arch:AVX2") -else() - set_source_files_properties(src/apps/others/ojph_img_io_sse41.cpp PROPERTIES COMPILE_FLAGS -msse4.1) - set_source_files_properties(src/apps/others/ojph_img_io_avx2.cpp PROPERTIES COMPILE_FLAGS -mavx2) -endif() - -if(OJPH_BUILD_EXECUTABLES) - if( USE_TIFF ) - target_link_libraries(ojph_expand PUBLIC openjph ${TIFF_LIBRARIES}) - target_link_libraries(ojph_compress PUBLIC openjph ${TIFF_LIBRARIES}) - else() - target_link_libraries(ojph_expand PUBLIC openjph) - target_link_libraries(ojph_compress PUBLIC openjph) - endif() -endif() - - ################################################################################################ # Install ################################################################################################ -if(OJPH_BUILD_EXECUTABLES) - install(TARGETS ojph_expand - DESTINATION bin) - - install(TARGETS ojph_compress - DESTINATION bin) -endif() - include(GNUInstallDirs) -install(TARGETS openjph LIBRARY +install(TARGETS openjph LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}) install (DIRECTORY src/core/common/ diff --git a/ojph_libname.cmake b/ojph_libname.cmake index 8fc999af..942a62e9 100644 --- a/ojph_libname.cmake +++ b/ojph_libname.cmake @@ -21,17 +21,9 @@ set(OPENJPH_VERSION_PATCH ${CMAKE_MATCH_1}) set(OPENJPH_VERSION "${OPENJPH_VERSION_MAJOR}.${OPENJPH_VERSION_MINOR}.${OPENJPH_VERSION_PATCH}") ############################################################ +message(STATUS "OpenJPH library version: ${OPENJPH_VERSION}") + if (OPENJPH_VERSION) - if (WIN32) - set_target_properties(openjph - PROPERTIES - OUTPUT_NAME "openjph.${OPENJPH_VERSION_MAJOR}.${OPENJPH_VERSION_MINOR}") - else() - set_target_properties(openjph - PROPERTIES - SOVERSION "${OPENJPH_VERSION_MAJOR}.${OPENJPH_VERSION_MINOR}" - VERSION "${OPENJPH_VERSION}") - endif() else() message(FATAL_ERROR "OPENJPH_VERSION is not set") endif() diff --git a/src/apps/CMakeLists.txt b/src/apps/CMakeLists.txt new file mode 100644 index 00000000..b67a1dd5 --- /dev/null +++ b/src/apps/CMakeLists.txt @@ -0,0 +1,51 @@ +# Add tiff library +############################################################ +if( OJPH_ENABLE_TIFF_SUPPORT ) + + if( WIN32 ) + + set(TIFF_INCLUDE_DIR "C:\\Program Files\\tiff\\include" CACHE PATH "the directory containing the TIFF headers") + set(TIFF_LIBRARY_DEBUG "C:\\Program Files\\tiff\\lib\\tiffd.lib" CACHE FILEPATH "the path to the TIFF library for debug configurations") + set(TIFF_LIBRARY_RELEASE "C:\\Program Files\\tiff\\lib\\tiff.lib" CACHE FILEPATH "the path to the TIFF library for release configurations") + set(TIFFXX_LIBRARY_DEBUG "C:\\Program Files\\tiff\\lib\\tiffxxd.lib" CACHE FILEPATH "the path to the TIFFXX library for debug configurations") + set(TIFFXX_LIBRARY_RELEASE "C:\\Program Files\\tiff\\lib\\tiffxx.lib" CACHE FILEPATH "the path to the TIFFXX library for release configurations") + + message( STATUS "WIN32 detected: Setting CMakeCache TIFF values as follows, use CMake-gui Advanced to modify them" ) + message( STATUS " TIFF_INCLUDE_DIR : \"${TIFF_INCLUDE_DIR}\" " ) + message( STATUS " TIFF_LIBRARY_DEBUG : \"${TIFF_LIBRARY_DEBUG}\" " ) + message( STATUS " TIFF_LIBRARY_RELEASE : \"${TIFF_LIBRARY_RELEASE}\" " ) + message( STATUS " TIFFXX_LIBRARY_DEBUG : \"${TIFFXX_LIBRARY_DEBUG}\" " ) + message( STATUS " TIFFXX_LIBRARY_RELEASE : \"${TIFFXX_LIBRARY_RELEASE}\" " ) + + endif( WIN32 ) + + FIND_PACKAGE( TIFF ) + + if( TIFF_FOUND ) + set(USE_TIFF TRUE CACHE BOOL "Add TIFF support") + include_directories( ${TIFF_INCLUDE_DIR} ) + if (MSVC) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /D \"OJPH_ENABLE_TIFF_SUPPORT\"") + else() + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DOJPH_ENABLE_TIFF_SUPPORT") + endif() + #include_directories(${CMAKE_BINARY_DIR}/libtiff) # for tiffconf.h on windows + endif( TIFF_FOUND ) + +endif() +############################################################ + +## Change folders +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/../bin) +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/../bin) +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/../lib) +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_DEBUG ${CMAKE_BINARY_DIR}/../bin) +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_DEBUG ${CMAKE_BINARY_DIR}/../bin) +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY_DEBUG ${CMAKE_BINARY_DIR}/../lib) +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_RELEASE ${CMAKE_BINARY_DIR}/../bin) +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE ${CMAKE_BINARY_DIR}/../bin) +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY_RELEASE ${CMAKE_BINARY_DIR}/../lib) + +## Build executables +add_subdirectory(ojph_expand) +add_subdirectory(ojph_compress) \ No newline at end of file diff --git a/src/apps/ojph_compress/CMakeLists.txt b/src/apps/ojph_compress/CMakeLists.txt new file mode 100644 index 00000000..24f3112c --- /dev/null +++ b/src/apps/ojph_compress/CMakeLists.txt @@ -0,0 +1,38 @@ +## building ojph_compress +######################### + +include_directories(../common) + +file(GLOB OJPH_COMPRESS "ojph_compress.cpp") +file(GLOB OJPH_IMG_IO "../others/ojph_img_io*.cpp") +file(GLOB OJPH_IMG_IO_H "../common/ojph_img_io.h") + +if(NOT OJPH_DISABLE_INTEL_SIMD) + list(REMOVE_ITEM OJPH_IMG_IO ${OJPH_IMG_IO_SSE41}) + list(REMOVE_ITEM OJPH_IMG_IO ${OJPH_IMG_IO_AVX2}) +endif() + +source_group("main" FILES ${OJPH_COMPRESS}) +source_group("others" FILES ${OJPH_IMG_IO}) +source_group("common" FILES ${OJPH_IMG_IO_H}) + +if(OJPH_BUILD_EXECUTABLES) + add_executable(ojph_compress ${OJPH_COMPRESS} ${OJPH_IMG_IO} ${OJPH_IMG_IO_H}) +endif() + +if (MSVC) + set_source_files_properties(../others/ojph_img_io_avx2.cpp PROPERTIES COMPILE_FLAGS "/arch:AVX2") +else() + set_source_files_properties(../others/ojph_img_io_sse41.cpp PROPERTIES COMPILE_FLAGS -msse4.1) + set_source_files_properties(../others/ojph_img_io_avx2.cpp PROPERTIES COMPILE_FLAGS -mavx2) +endif() + +if(OJPH_BUILD_EXECUTABLES) + if( USE_TIFF ) + target_link_libraries(ojph_compress PUBLIC openjph ${TIFF_LIBRARIES}) + else() + target_link_libraries(ojph_compress PUBLIC openjph) + endif() +endif() + +install(TARGETS ojph_compress DESTINATION bin) \ No newline at end of file diff --git a/src/apps/ojph_expand/CMakeLists.txt b/src/apps/ojph_expand/CMakeLists.txt new file mode 100644 index 00000000..29644dfc --- /dev/null +++ b/src/apps/ojph_expand/CMakeLists.txt @@ -0,0 +1,38 @@ +## building ojph_expand +####################### + +include_directories(../common) + +file(GLOB OJPH_EXPAND "ojph_expand.cpp") +file(GLOB OJPH_IMG_IO "../others/ojph_img_io*.cpp") +file(GLOB OJPH_IMG_IO_H "../common/ojph_img_io.h") + +if(NOT OJPH_DISABLE_INTEL_SIMD) + list(REMOVE_ITEM OJPH_IMG_IO ${OJPH_IMG_IO_SSE41}) + list(REMOVE_ITEM OJPH_IMG_IO ${OJPH_IMG_IO_AVX2}) +endif() + +source_group("main" FILES ${OJPH_EXPAND}) +source_group("others" FILES ${OJPH_IMG_IO}) +source_group("common" FILES ${OJPH_IMG_IO_H}) + +if(OJPH_BUILD_EXECUTABLES) + add_executable(ojph_expand ${OJPH_EXPAND} ${OJPH_IMG_IO} ${OJPH_IMG_IO_H}) +endif() + +if (MSVC) + set_source_files_properties(../others/ojph_img_io_avx2.cpp PROPERTIES COMPILE_FLAGS "/arch:AVX2") +else() + set_source_files_properties(../others/ojph_img_io_sse41.cpp PROPERTIES COMPILE_FLAGS -msse4.1) + set_source_files_properties(../others/ojph_img_io_avx2.cpp PROPERTIES COMPILE_FLAGS -mavx2) +endif() + +if(OJPH_BUILD_EXECUTABLES) + if( USE_TIFF ) + target_link_libraries(ojph_expand PUBLIC openjph ${TIFF_LIBRARIES}) + else() + target_link_libraries(ojph_expand PUBLIC openjph) + endif() +endif() + +install(TARGETS ojph_expand DESTINATION bin) diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt new file mode 100644 index 00000000..dc87c5a1 --- /dev/null +++ b/src/core/CMakeLists.txt @@ -0,0 +1,106 @@ + +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/../bin) +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/../bin) +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/../lib) +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_DEBUG ${CMAKE_BINARY_DIR}/../bin) +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_DEBUG ${CMAKE_BINARY_DIR}/../bin) +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY_DEBUG ${CMAKE_BINARY_DIR}/../lib) +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_RELEASE ${CMAKE_BINARY_DIR}/../bin) +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE ${CMAKE_BINARY_DIR}/../bin) +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY_RELEASE ${CMAKE_BINARY_DIR}/../lib) + +include_directories(common) + +file(GLOB CODESTREAM "codestream/*.cpp" "codestream/*.h") +file(GLOB CODESTREAM_SSE "codestream/*_sse.cpp") +file(GLOB CODESTREAM_SSE2 "codestream/*_sse2.cpp") +file(GLOB CODESTREAM_AVX "codestream/*_avx.cpp") +file(GLOB CODESTREAM_AVX2 "codestream/*_avx2.cpp") +file(GLOB CODESTREAM_WASM "codestream/*_wasm.cpp") +file(GLOB CODING "coding/*.cpp" "coding/*.h") +file(GLOB CODING_SSSE3 "coding/*_ssse3.cpp") +file(GLOB CODING_WASM "coding/*_wasm.cpp") +file(GLOB CODING_AVX512 "coding/*_avx512.cpp") +file(GLOB COMMON "common/*.h") +file(GLOB OTHERS "others/*.cpp") +file(GLOB TRANSFORM "transform/*.cpp" "transform/*.h") +file(GLOB TRANSFORM_SSE "transform/*_sse.cpp") +file(GLOB TRANSFORM_SSE2 "transform/*_sse2.cpp") +file(GLOB TRANSFORM_AVX "transform/*_avx.cpp") +file(GLOB TRANSFORM_AVX2 "transform/*_avx2.cpp") +file(GLOB TRANSFORM_WASM "transform/*_wasm.cpp") + +list(REMOVE_ITEM CODESTREAM ${CODESTREAM_SSE} ${CODESTREAM_SSE2} ${CODESTREAM_AVX} ${CODESTREAM_AVX2} ${CODESTREAM_WASM}) +list(REMOVE_ITEM CODING ${CODING_SSSE3} ${CODING_WASM} ${CODING_AVX512}) +list(REMOVE_ITEM TRANSFORM ${TRANSFORM_SSE} ${TRANSFORM_SSE2} ${TRANSFORM_AVX} ${TRANSFORM_AVX2} ${TRANSFORM_WASM}) +list(APPEND SOURCES ${CODESTREAM} ${CODING} ${COMMON} ${OTHERS} ${TRANSFORM}) + +source_group("codestream" FILES ${CODESTREAM}) +source_group("coding" FILES ${CODING}) +source_group("common" FILES ${COMMON}) +source_group("others" FILES ${OTHERS}) +source_group("transform" FILES ${TRANSFORM}) + +configure_file( + "../pkg-config.pc.cmake" + "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/${PROJECT_NAME}.pc" +) + +if(EMSCRIPTEN) + add_compile_options(-std=c++11 -O3 -fexceptions -DOJPH_DISABLE_INTEL_SIMD) + add_library(openjph ${SOURCES}) + add_library(openjphsimd ${SOURCES} ${CODESTREAM_WASM} ${CODING_WASM} ${TRANSFORM_WASM}) + target_include_directories(openjph PUBLIC common) + target_include_directories(openjphsimd PUBLIC common) + target_compile_options(openjphsimd PRIVATE -DOJPH_ENABLE_WASM_SIMD -msimd128) + source_group("codestream" FILES ${CODESTREAM_WASM}) + source_group("coding" FILES ${CODING_WASM}) + source_group("transform" FILES ${TRANSFORM_WASM}) +elseif(NOT OJPH_DISABLE_INTEL_SIMD) + add_library(openjph ${SOURCES} ${CODESTREAM_SSE} ${CODESTREAM_SSE2} ${CODESTREAM_AVX} ${CODESTREAM_AVX2} ${CODING_SSSE3} ${TRANSFORM_SSE} ${TRANSFORM_SSE2} ${TRANSFORM_AVX} ${TRANSFORM_AVX2}) + source_group("codestream" FILES ${CODESTREAM_SSE} ${CODESTREAM_SSE2} ${CODESTREAM_AVX} ${CODESTREAM_AVX2}) + source_group("coding" FILES ${CODING_SSSE3}) + source_group("transform" FILES ${TRANSFORM_SSE} ${TRANSFORM_SSE2} ${TRANSFORM_AVX} ${TRANSFORM_AVX2}) + if (OJPH_ENABLE_INTEL_AVX512) + target_sources(openjph PRIVATE ${CODING_AVX512}) + source_group("coding" FILES ${CODING_AVX512}) + endif() +else() + add_library(openjph ${SOURCES}) +endif() + +## include library version/name +target_include_directories(openjph PUBLIC common) +target_compile_definitions(openjph PUBLIC _FILE_OFFSET_BITS=64) + +if (MSVC) + set_source_files_properties(src/core/codestream/ojph_codestream_avx.cpp PROPERTIES COMPILE_FLAGS "/arch:AVX") + set_source_files_properties(src/core/codestream/ojph_codestream_avx2.cpp PROPERTIES COMPILE_FLAGS "/arch:AVX2") + set_source_files_properties(src/core/coding/ojph_block_encoder_avx512.cpp PROPERTIES COMPILE_FLAGS "/arch:AVX512") + set_source_files_properties(src/core/transform/ojph_colour_avx.cpp PROPERTIES COMPILE_FLAGS "/arch:AVX") + set_source_files_properties(src/core/transform/ojph_colour_avx2.cpp PROPERTIES COMPILE_FLAGS "/arch:AVX2") + set_source_files_properties(src/core/transform/ojph_transform_avx.cpp PROPERTIES COMPILE_FLAGS "/arch:AVX") + set_source_files_properties(src/core/transform/ojph_transform_avx2.cpp PROPERTIES COMPILE_FLAGS "/arch:AVX2") +else() + set_source_files_properties(src/core/codestream/ojph_codestream_avx.cpp PROPERTIES COMPILE_FLAGS -mavx) + set_source_files_properties(src/core/codestream/ojph_codestream_avx2.cpp PROPERTIES COMPILE_FLAGS -mavx2) + set_source_files_properties(src/core/coding/ojph_block_decoder_ssse3.cpp PROPERTIES COMPILE_FLAGS -mssse3) + set_source_files_properties(src/core/coding/ojph_block_encoder_avx512.cpp PROPERTIES COMPILE_FLAGS -mavx512cd) + set_source_files_properties(src/core/transform/ojph_colour_avx.cpp PROPERTIES COMPILE_FLAGS -mavx) + set_source_files_properties(src/core/transform/ojph_colour_avx2.cpp PROPERTIES COMPILE_FLAGS -mavx2) + set_source_files_properties(src/core/transform/ojph_transform_avx.cpp PROPERTIES COMPILE_FLAGS -mavx) + set_source_files_properties(src/core/transform/ojph_transform_avx2.cpp PROPERTIES COMPILE_FLAGS -mavx2) +endif() + +if (WIN32) + set(OJPH_LIB_NAME_STRING "openjph.${OPENJPH_VERSION_MAJOR}.${OPENJPH_VERSION_MINOR}") + set_target_properties(openjph + PROPERTIES + OUTPUT_NAME "openjph.${OPENJPH_VERSION_MAJOR}.${OPENJPH_VERSION_MINOR}") +else() + set(OJPH_LIB_NAME_STRING "openjph.${OPENJPH_VERSION_MAJOR}.${OPENJPH_VERSION_MINOR}") + set_target_properties(openjph + PROPERTIES + SOVERSION "${OPENJPH_VERSION_MAJOR}.${OPENJPH_VERSION_MINOR}" + VERSION "${OPENJPH_VERSION}") +endif() diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index b597c265..fb2db788 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -43,12 +43,12 @@ include(GoogleTest) gtest_add_tests(TARGET test_executables) if (WIN32) - if(CMAKE_BUILD_TYPE STREQUAL "Debug") + if(CMAKE_BUILD_TYPE MATCHES "Debug") add_custom_command(TARGET test_executables POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy "../bin/Debug/gtest.dll" "./Debug/" COMMAND ${CMAKE_COMMAND} -E copy "../bin/Debug/gtest_main.dll" "./Debug/" ) - elseif(CMAKE_BUILD_TYPE STREQUAL "Release") + elseif(CMAKE_BUILD_TYPE MATCHES "Release") add_custom_command(TARGET test_executables POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy "../bin/Release/gtest.dll" "./Release/" COMMAND ${CMAKE_COMMAND} -E copy "../bin/Release/gtest_main.dll" "./Release/" diff --git a/tests/mse_pae.cmake b/tests/mse_pae.cmake index 90cea824..8187ce13 100644 --- a/tests/mse_pae.cmake +++ b/tests/mse_pae.cmake @@ -5,6 +5,8 @@ project (mse_pae DESCRIPTION "A program to find MSE and peak absolute error between two images" LANGUAGES CXX) set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) +include_directories(../src/apps/common) +include_directories(../src/core/common) # Configure source files set(mse_pae mse_pae.cpp "../src/apps/others/ojph_img_io.cpp" "../src/core/others/ojph_message.cpp" "../src/core/others/ojph_file.cpp" "../src/core/others/ojph_mem.cpp" "../src/core/others/ojph_arch.cpp") @@ -15,22 +17,20 @@ set(OJPH_IMG_IO_AVX2 "../src/apps/others/ojph_img_io_avx2.cpp") if(NOT OJPH_DISABLE_INTEL_SIMD) list(APPEND mse_pae ${OJPH_IMG_IO_SSE41}) list(APPEND mse_pae ${OJPH_IMG_IO_AVX2}) + # Set compilation flags + if (MSVC) + set_source_files_properties(../src/apps/others/ojph_img_io_avx2.cpp PROPERTIES COMPILE_FLAGS "/arch:AVX2") + else() + set_source_files_properties(../src/apps/others/ojph_img_io_sse41.cpp PROPERTIES COMPILE_FLAGS -msse4.1) + set_source_files_properties(../src/apps/others/ojph_img_io_avx2.cpp PROPERTIES COMPILE_FLAGS -mavx2) + endif() endif() -# Set compilation flags -if (MSVC) - set_source_files_properties(../src/apps/others/ojph_img_io_avx2.cpp PROPERTIES COMPILE_FLAGS "/arch:AVX2") -else() - set_source_files_properties(../src/apps/others/ojph_img_io_sse41.cpp PROPERTIES COMPILE_FLAGS -msse4.1) - set_source_files_properties(../src/apps/others/ojph_img_io_avx2.cpp PROPERTIES COMPILE_FLAGS -mavx2) -endif() # Add executable add_executable(mse_pae ${mse_pae}) # Add tiff library if it is available -IF( USE_TIFF ) +if( USE_TIFF ) target_link_libraries (mse_pae ${TIFF_LIBRARIES}) -ELSE() - target_link_libraries (mse_pae) -ENDIF() \ No newline at end of file +endif() diff --git a/tests/mse_pae.cpp b/tests/mse_pae.cpp index 9924aea8..84653399 100644 --- a/tests/mse_pae.cpp +++ b/tests/mse_pae.cpp @@ -40,8 +40,8 @@ #include #include #include -#include "../common/ojph_img_io.h" -#include "../common/ojph_mem.h" +#include "ojph_img_io.h" +#include "ojph_mem.h" using namespace ojph; using namespace std; diff --git a/tests/test_executables.cpp b/tests/test_executables.cpp index 696daadd..7b16b542 100644 --- a/tests/test_executables.cpp +++ b/tests/test_executables.cpp @@ -99,8 +99,8 @@ int execute(const std::string& cmd, std::string& result) #define REF_FILE_DIR ".\\jp2k_test_codestreams\\openjph\\references\\" #define MSE_PAE_PATH ".\\Release\\mse_pae" #define COMPARE_FILES_PATH ".\\Release\\compare_files" - #define EXPAND_EXECUTABLE "..\\..\\bin\\Release\\ojph_expand.exe" - #define COMPRESS_EXECUTABLE "..\\..\\bin\\Release\\ojph_compress.exe" + #define EXPAND_EXECUTABLE "..\\..\\bin\\ojph_expand.exe" + #define COMPRESS_EXECUTABLE "..\\..\\bin\\ojph_compress.exe" #else #define SRC_FILE_DIR "./jp2k_test_codestreams/openjph/" #define OUT_FILE_DIR "./" From c73f25121344c0660551423dcdfea37d3315a529 Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Wed, 27 Dec 2023 20:34:17 +1100 Subject: [PATCH 005/348] Fixed the earlier error --- src/core/CMakeLists.txt | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index dc87c5a1..64364ec5 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -74,22 +74,22 @@ target_include_directories(openjph PUBLIC common) target_compile_definitions(openjph PUBLIC _FILE_OFFSET_BITS=64) if (MSVC) - set_source_files_properties(src/core/codestream/ojph_codestream_avx.cpp PROPERTIES COMPILE_FLAGS "/arch:AVX") - set_source_files_properties(src/core/codestream/ojph_codestream_avx2.cpp PROPERTIES COMPILE_FLAGS "/arch:AVX2") - set_source_files_properties(src/core/coding/ojph_block_encoder_avx512.cpp PROPERTIES COMPILE_FLAGS "/arch:AVX512") - set_source_files_properties(src/core/transform/ojph_colour_avx.cpp PROPERTIES COMPILE_FLAGS "/arch:AVX") - set_source_files_properties(src/core/transform/ojph_colour_avx2.cpp PROPERTIES COMPILE_FLAGS "/arch:AVX2") - set_source_files_properties(src/core/transform/ojph_transform_avx.cpp PROPERTIES COMPILE_FLAGS "/arch:AVX") - set_source_files_properties(src/core/transform/ojph_transform_avx2.cpp PROPERTIES COMPILE_FLAGS "/arch:AVX2") + set_source_files_properties(codestream/ojph_codestream_avx.cpp PROPERTIES COMPILE_FLAGS "/arch:AVX") + set_source_files_properties(codestream/ojph_codestream_avx2.cpp PROPERTIES COMPILE_FLAGS "/arch:AVX2") + set_source_files_properties(coding/ojph_block_encoder_avx512.cpp PROPERTIES COMPILE_FLAGS "/arch:AVX512") + set_source_files_properties(transform/ojph_colour_avx.cpp PROPERTIES COMPILE_FLAGS "/arch:AVX") + set_source_files_properties(transform/ojph_colour_avx2.cpp PROPERTIES COMPILE_FLAGS "/arch:AVX2") + set_source_files_properties(transform/ojph_transform_avx.cpp PROPERTIES COMPILE_FLAGS "/arch:AVX") + set_source_files_properties(transform/ojph_transform_avx2.cpp PROPERTIES COMPILE_FLAGS "/arch:AVX2") else() - set_source_files_properties(src/core/codestream/ojph_codestream_avx.cpp PROPERTIES COMPILE_FLAGS -mavx) - set_source_files_properties(src/core/codestream/ojph_codestream_avx2.cpp PROPERTIES COMPILE_FLAGS -mavx2) - set_source_files_properties(src/core/coding/ojph_block_decoder_ssse3.cpp PROPERTIES COMPILE_FLAGS -mssse3) - set_source_files_properties(src/core/coding/ojph_block_encoder_avx512.cpp PROPERTIES COMPILE_FLAGS -mavx512cd) - set_source_files_properties(src/core/transform/ojph_colour_avx.cpp PROPERTIES COMPILE_FLAGS -mavx) - set_source_files_properties(src/core/transform/ojph_colour_avx2.cpp PROPERTIES COMPILE_FLAGS -mavx2) - set_source_files_properties(src/core/transform/ojph_transform_avx.cpp PROPERTIES COMPILE_FLAGS -mavx) - set_source_files_properties(src/core/transform/ojph_transform_avx2.cpp PROPERTIES COMPILE_FLAGS -mavx2) + set_source_files_properties(codestream/ojph_codestream_avx.cpp PROPERTIES COMPILE_FLAGS -mavx) + set_source_files_properties(codestream/ojph_codestream_avx2.cpp PROPERTIES COMPILE_FLAGS -mavx2) + set_source_files_properties(coding/ojph_block_decoder_ssse3.cpp PROPERTIES COMPILE_FLAGS -mssse3) + set_source_files_properties(coding/ojph_block_encoder_avx512.cpp PROPERTIES COMPILE_FLAGS -mavx512cd) + set_source_files_properties(transform/ojph_colour_avx.cpp PROPERTIES COMPILE_FLAGS -mavx) + set_source_files_properties(transform/ojph_colour_avx2.cpp PROPERTIES COMPILE_FLAGS -mavx2) + set_source_files_properties(transform/ojph_transform_avx.cpp PROPERTIES COMPILE_FLAGS -mavx) + set_source_files_properties(transform/ojph_transform_avx2.cpp PROPERTIES COMPILE_FLAGS -mavx2) endif() if (WIN32) From b06f663607ba981a5e77326c78fe270ffd7019e2 Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Sat, 30 Dec 2023 12:45:56 +1100 Subject: [PATCH 006/348] Modified CMakeLists.txt to fix installing files. One problem, tests files are also copied when they are build. --- CMakeLists.txt | 6 ++++++ src/apps/CMakeLists.txt | 3 --- src/core/CMakeLists.txt | 3 --- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3cf131cc..98b2e292 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -75,6 +75,12 @@ if (BUILD_SHARED_LIBS AND MSVC) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /D \"OJPH_BUILD_SHARED_LIBRARY\"") endif() +## specify output directories +## this will be refined further for Debug and Release builds in included CMakeLists.txt +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/../bin) +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/../bin) +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/../lib) + ## Build library and applications add_subdirectory(src/core) if (OJPH_BUILD_EXECUTABLES) diff --git a/src/apps/CMakeLists.txt b/src/apps/CMakeLists.txt index b67a1dd5..9bece1a3 100644 --- a/src/apps/CMakeLists.txt +++ b/src/apps/CMakeLists.txt @@ -36,9 +36,6 @@ endif() ############################################################ ## Change folders -set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/../bin) -set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/../bin) -set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/../lib) set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_DEBUG ${CMAKE_BINARY_DIR}/../bin) set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_DEBUG ${CMAKE_BINARY_DIR}/../bin) set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY_DEBUG ${CMAKE_BINARY_DIR}/../lib) diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index dc87c5a1..74065b78 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -1,7 +1,4 @@ -set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/../bin) -set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/../bin) -set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/../lib) set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_DEBUG ${CMAKE_BINARY_DIR}/../bin) set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_DEBUG ${CMAKE_BINARY_DIR}/../bin) set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY_DEBUG ${CMAKE_BINARY_DIR}/../lib) From 0de48e2858594d1eefc068bcf846bec9e5fb950a Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Sat, 30 Dec 2023 17:46:29 +1100 Subject: [PATCH 007/348] This improved wasm builds. It also removes all the warnings during wasm builds. --- src/apps/ojph_compress/CMakeLists.txt | 77 +++++++++++-------- src/apps/ojph_expand/CMakeLists.txt | 77 +++++++++++-------- src/apps/others/ojph_img_io.cpp | 2 +- src/core/codestream/ojph_codestream_local.cpp | 4 +- src/core/codestream/ojph_resolution.cpp | 5 +- src/core/codestream/ojph_subband.cpp | 6 +- 6 files changed, 103 insertions(+), 68 deletions(-) diff --git a/src/apps/ojph_compress/CMakeLists.txt b/src/apps/ojph_compress/CMakeLists.txt index 24f3112c..29b4201e 100644 --- a/src/apps/ojph_compress/CMakeLists.txt +++ b/src/apps/ojph_compress/CMakeLists.txt @@ -1,38 +1,55 @@ ## building ojph_compress ######################### -include_directories(../common) - -file(GLOB OJPH_COMPRESS "ojph_compress.cpp") -file(GLOB OJPH_IMG_IO "../others/ojph_img_io*.cpp") -file(GLOB OJPH_IMG_IO_H "../common/ojph_img_io.h") - -if(NOT OJPH_DISABLE_INTEL_SIMD) - list(REMOVE_ITEM OJPH_IMG_IO ${OJPH_IMG_IO_SSE41}) - list(REMOVE_ITEM OJPH_IMG_IO ${OJPH_IMG_IO_AVX2}) -endif() - -source_group("main" FILES ${OJPH_COMPRESS}) -source_group("others" FILES ${OJPH_IMG_IO}) -source_group("common" FILES ${OJPH_IMG_IO_H}) - if(OJPH_BUILD_EXECUTABLES) - add_executable(ojph_compress ${OJPH_COMPRESS} ${OJPH_IMG_IO} ${OJPH_IMG_IO_H}) -endif() -if (MSVC) - set_source_files_properties(../others/ojph_img_io_avx2.cpp PROPERTIES COMPILE_FLAGS "/arch:AVX2") -else() - set_source_files_properties(../others/ojph_img_io_sse41.cpp PROPERTIES COMPILE_FLAGS -msse4.1) - set_source_files_properties(../others/ojph_img_io_avx2.cpp PROPERTIES COMPILE_FLAGS -mavx2) -endif() - -if(OJPH_BUILD_EXECUTABLES) - if( USE_TIFF ) - target_link_libraries(ojph_compress PUBLIC openjph ${TIFF_LIBRARIES}) + include_directories(../common) + include_directories(../../core/common) + + file(GLOB OJPH_COMPRESS "ojph_compress.cpp") + file(GLOB OJPH_IMG_IO "../others/ojph_img_io.cpp") + file(GLOB OJPH_IMG_IO_SSE4 "../others/ojph_img_io_sse41.cpp") + file(GLOB OJPH_IMG_IO_AVX2 "../others/ojph_img_io_avx2.cpp") + file(GLOB OJPH_IMG_IO_H "../common/ojph_img_io.h") + + list(APPEND SOURCES ${OJPH_COMPRESS} ${OJPH_IMG_IO} ${OJPH_IMG_IO_H}) + + source_group("main" FILES ${OJPH_COMPRESS}) + source_group("others" FILES ${OJPH_IMG_IO}) + source_group("common" FILES ${OJPH_IMG_IO_H}) + + if(EMSCRIPTEN) + add_compile_options(-std=c++11 -O3 -fexceptions -DOJPH_DISABLE_INTEL_SIMD) + add_executable(ojph_compress ${SOURCES}) + add_executable(ojph_compress_simd ${SOURCES} ${OJPH_IMG_IO_SSE4}) + target_compile_options(ojph_compress_simd PRIVATE -DOJPH_ENABLE_WASM_SIMD -msimd128 -msse4.1) + source_group("others" FILES ${OJPH_IMG_IO_SSE4}) + + target_link_libraries(ojph_compress PRIVATE openjph) + install(TARGETS ojph_compress DESTINATION bin) + target_link_libraries(ojph_compress_simd PRIVATE openjphsimd) + install(TARGETS ojph_compress_simd DESTINATION bin) else() - target_link_libraries(ojph_compress PUBLIC openjph) + if(NOT OJPH_DISABLE_INTEL_SIMD) + add_executable(ojph_compress ${SOURCES} ${OJPH_IMG_IO_SSE4} ${OJPH_IMG_IO_AVX2}) + + if (MSVC) + set_source_files_properties(${OJPH_IMG_IO_AVX2} PROPERTIES COMPILE_FLAGS "/arch:AVX2") + else() + set_source_files_properties(${OJPH_IMG_IO_SSE4} PROPERTIES COMPILE_FLAGS -msse4.1) + set_source_files_properties(${OJPH_IMG_IO_AVX2} PROPERTIES COMPILE_FLAGS -mavx2) + endif() + else() + add_executable(ojph_compress ${SOURCES}) + endif() + + if( USE_TIFF ) + target_link_libraries(ojph_compress PUBLIC openjph ${TIFF_LIBRARIES}) + else() + target_link_libraries(ojph_compress PUBLIC openjph) + endif() + + install(TARGETS ojph_compress DESTINATION bin) endif() -endif() -install(TARGETS ojph_compress DESTINATION bin) \ No newline at end of file +endif() diff --git a/src/apps/ojph_expand/CMakeLists.txt b/src/apps/ojph_expand/CMakeLists.txt index 29644dfc..d93e568d 100644 --- a/src/apps/ojph_expand/CMakeLists.txt +++ b/src/apps/ojph_expand/CMakeLists.txt @@ -1,38 +1,55 @@ ## building ojph_expand ####################### -include_directories(../common) - -file(GLOB OJPH_EXPAND "ojph_expand.cpp") -file(GLOB OJPH_IMG_IO "../others/ojph_img_io*.cpp") -file(GLOB OJPH_IMG_IO_H "../common/ojph_img_io.h") - -if(NOT OJPH_DISABLE_INTEL_SIMD) - list(REMOVE_ITEM OJPH_IMG_IO ${OJPH_IMG_IO_SSE41}) - list(REMOVE_ITEM OJPH_IMG_IO ${OJPH_IMG_IO_AVX2}) -endif() - -source_group("main" FILES ${OJPH_EXPAND}) -source_group("others" FILES ${OJPH_IMG_IO}) -source_group("common" FILES ${OJPH_IMG_IO_H}) - if(OJPH_BUILD_EXECUTABLES) - add_executable(ojph_expand ${OJPH_EXPAND} ${OJPH_IMG_IO} ${OJPH_IMG_IO_H}) -endif() -if (MSVC) - set_source_files_properties(../others/ojph_img_io_avx2.cpp PROPERTIES COMPILE_FLAGS "/arch:AVX2") -else() - set_source_files_properties(../others/ojph_img_io_sse41.cpp PROPERTIES COMPILE_FLAGS -msse4.1) - set_source_files_properties(../others/ojph_img_io_avx2.cpp PROPERTIES COMPILE_FLAGS -mavx2) -endif() - -if(OJPH_BUILD_EXECUTABLES) - if( USE_TIFF ) - target_link_libraries(ojph_expand PUBLIC openjph ${TIFF_LIBRARIES}) + include_directories(../common) + include_directories(../../core/common) + + file(GLOB OJPH_EXPAND "ojph_expand.cpp") + file(GLOB OJPH_IMG_IO "../others/ojph_img_io.cpp") + file(GLOB OJPH_IMG_IO_SSE4 "../others/ojph_img_io_sse41.cpp") + file(GLOB OJPH_IMG_IO_AVX2 "../others/ojph_img_io_avx2.cpp") + file(GLOB OJPH_IMG_IO_H "../common/ojph_img_io.h") + + list(APPEND SOURCES ${OJPH_EXPAND} ${OJPH_IMG_IO} ${OJPH_IMG_IO_H}) + + source_group("main" FILES ${OJPH_EXPAND}) + source_group("others" FILES ${OJPH_IMG_IO}) + source_group("common" FILES ${OJPH_IMG_IO_H}) + + if(EMSCRIPTEN) + add_compile_options(-std=c++11 -O3 -fexceptions -DOJPH_DISABLE_INTEL_SIMD) + add_executable(ojph_expand ${SOURCES}) + add_executable(ojph_expand_simd ${SOURCES} ${OJPH_IMG_IO_SSE4}) + target_compile_options(ojph_expand_simd PRIVATE -DOJPH_ENABLE_WASM_SIMD -msimd128 -msse4.1) + source_group("others" FILES ${OJPH_IMG_IO_SSE4}) + + target_link_libraries(ojph_expand PRIVATE openjph) + install(TARGETS ojph_expand DESTINATION bin) + target_link_libraries(ojph_expand_simd PRIVATE openjphsimd) + install(TARGETS ojph_expand_simd DESTINATION bin) else() - target_link_libraries(ojph_expand PUBLIC openjph) + if(NOT OJPH_DISABLE_INTEL_SIMD) + add_executable(ojph_expand ${SOURCES} ${OJPH_IMG_IO_SSE4} ${OJPH_IMG_IO_AVX2}) + + if (MSVC) + set_source_files_properties(${OJPH_IMG_IO_AVX2} PROPERTIES COMPILE_FLAGS "/arch:AVX2") + else() + set_source_files_properties(${OJPH_IMG_IO_SSE4} PROPERTIES COMPILE_FLAGS -msse4.1) + set_source_files_properties(${OJPH_IMG_IO_AVX2} PROPERTIES COMPILE_FLAGS -mavx2) + endif() + else() + add_executable(ojph_expand ${SOURCES}) + endif() + + if( USE_TIFF ) + target_link_libraries(ojph_expand PUBLIC openjph ${TIFF_LIBRARIES}) + else() + target_link_libraries(ojph_expand PUBLIC openjph) + endif() + + install(TARGETS ojph_expand DESTINATION bin) endif() -endif() -install(TARGETS ojph_expand DESTINATION bin) +endif() diff --git a/src/apps/others/ojph_img_io.cpp b/src/apps/others/ojph_img_io.cpp index a83e5fc1..88fdcfc5 100644 --- a/src/apps/others/ojph_img_io.cpp +++ b/src/apps/others/ojph_img_io.cpp @@ -1650,7 +1650,7 @@ namespace ojph { be2le(offset_to_data_for_image_element_1); // set to starting point of image data - if (fseek(file_handle, offset_to_image_data_in_bytes, SEEK_SET) != 0) + if (fseek(file_handle, (long)offset_to_image_data_in_bytes, SEEK_SET) != 0) { close(); OJPH_ERROR(0x0300000E7, "Error reading file %s", filename); diff --git a/src/core/codestream/ojph_codestream_local.cpp b/src/core/codestream/ojph_codestream_local.cpp index cc074298..cb1a69b1 100644 --- a/src/core/codestream/ojph_codestream_local.cpp +++ b/src/core/codestream/ojph_codestream_local.cpp @@ -112,7 +112,7 @@ namespace ojph { OJPH_ERROR(0x00030011, "number of tiles cannot exceed 65535"); //allocate tiles - allocator->pre_alloc_obj(num_tiles.area()); + allocator->pre_alloc_obj((size_t)num_tiles.area()); ui32 num_tileparts = 0; point index; @@ -208,7 +208,7 @@ namespace ojph { allocator->post_alloc_obj(precinct_scratch_needed_bytes); //get tiles - tiles = this->allocator->post_alloc_obj(num_tiles.area()); + tiles = this->allocator->post_alloc_obj((size_t)num_tiles.area()); ui32 num_tileparts = 0; point index; diff --git a/src/core/codestream/ojph_resolution.cpp b/src/core/codestream/ojph_resolution.cpp index bde1b511..e1d78cba 100644 --- a/src/core/codestream/ojph_resolution.cpp +++ b/src/core/codestream/ojph_resolution.cpp @@ -160,7 +160,7 @@ namespace ojph { num_precincts.w -= trx0 >> log_PP.w; num_precincts.h = (try1 + (1 << log_PP.h) - 1) >> log_PP.h; num_precincts.h -= try0 >> log_PP.h; - allocator->pre_alloc_obj(num_precincts.area()); + allocator->pre_alloc_obj((size_t)num_precincts.area()); } //allocate lines @@ -262,7 +262,8 @@ namespace ojph { num_precincts.w -= trx0 >> log_PP.w; num_precincts.h = (try1 + (1 << log_PP.h) - 1) >> log_PP.h; num_precincts.h -= try0 >> log_PP.h; - precincts = allocator->post_alloc_obj(num_precincts.area()); + precincts = + allocator->post_alloc_obj((size_t)num_precincts.area()); ui64 num = num_precincts.area(); for (ui64 i = 0; i < num; ++i) precincts[i] = precinct(); diff --git a/src/core/codestream/ojph_subband.cpp b/src/core/codestream/ojph_subband.cpp index b41294e8..fc83bf2b 100644 --- a/src/core/codestream/ojph_subband.cpp +++ b/src/core/codestream/ojph_subband.cpp @@ -85,7 +85,7 @@ namespace ojph { allocator->pre_alloc_obj(num_blocks.w); //allocate codeblock headers - allocator->pre_alloc_obj(num_blocks.area()); + allocator->pre_alloc_obj((size_t)num_blocks.area()); for (ui32 i = 0; i < num_blocks.w; ++i) codeblock::pre_alloc(codestream, nominal); @@ -152,8 +152,8 @@ namespace ojph { blocks = allocator->post_alloc_obj(num_blocks.w); //allocate codeblock headers coded_cb_header *cp = coded_cbs = - allocator->post_alloc_obj(num_blocks.area()); - memset(coded_cbs, 0, sizeof(coded_cb_header) * num_blocks.area()); + allocator->post_alloc_obj((size_t)num_blocks.area()); + memset(coded_cbs, 0, sizeof(coded_cb_header) * (size_t)num_blocks.area()); for (int i = (int)num_blocks.area(); i > 0; --i, ++cp) cp->Kmax = K_max; From 3d8f5c82875fba711b5597be6a3df322631c47ed Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Sat, 30 Dec 2023 18:23:45 +1100 Subject: [PATCH 008/348] Removing CMAKD_CXX_FLAGS settings, and replacing them with add_definitions and add_compile_options. --- CMakeLists.txt | 24 +++++++++++------------- src/apps/CMakeLists.txt | 7 +------ 2 files changed, 12 insertions(+), 19 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 98b2e292..24a8e8e5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -49,30 +49,28 @@ include(ojph_libname.cmake) ## C++ version and flags set(CMAKE_CXX_STANDARD 14) if (MSVC) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc /D \"_CRT_SECURE_NO_WARNINGS\"") + add_definitions(-D_CRT_SECURE_NO_WARNINGS) endif() if (CMAKE_CXX_COMPILER_ID MATCHES "Clang|GNU") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexceptions -Wall -Wextra -Wconversion -Wunused-parameter") + add_compile_options( + -fexceptions + -Wall + -Wextra + -Wconversion + -Wunused-parameter + ) endif() ## The option OJPH_DISABLE_INTEL_SIMD and OJPH_ENABLE_INTEL_AVX512 if (OJPH_DISABLE_INTEL_SIMD) - if (MSVC) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /D \"OJPH_DISABLE_INTEL_SIMD\"") - else() - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DOJPH_DISABLE_INTEL_SIMD") - endif() + add_definitions(-DOJPH_DISABLE_INTEL_SIMD) elseif (OJPH_ENABLE_INTEL_AVX512) - if (MSVC) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /D \"OJPH_ENABLE_INTEL_AVX512\"") - else() - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DOJPH_ENABLE_INTEL_AVX512") - endif() + add_definitions(-DOJPH_ENABLE_INTEL_AVX512) endif() ## The option BUILD_SHARED_LIBS if (BUILD_SHARED_LIBS AND MSVC) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /D \"OJPH_BUILD_SHARED_LIBRARY\"") + add_definitions(-DOJPH_BUILD_SHARED_LIBRARY) endif() ## specify output directories diff --git a/src/apps/CMakeLists.txt b/src/apps/CMakeLists.txt index 9bece1a3..9795bb7a 100644 --- a/src/apps/CMakeLists.txt +++ b/src/apps/CMakeLists.txt @@ -24,12 +24,7 @@ if( OJPH_ENABLE_TIFF_SUPPORT ) if( TIFF_FOUND ) set(USE_TIFF TRUE CACHE BOOL "Add TIFF support") include_directories( ${TIFF_INCLUDE_DIR} ) - if (MSVC) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /D \"OJPH_ENABLE_TIFF_SUPPORT\"") - else() - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DOJPH_ENABLE_TIFF_SUPPORT") - endif() - #include_directories(${CMAKE_BINARY_DIR}/libtiff) # for tiffconf.h on windows + add_definitions(-DOJPH_ENABLE_TIFF_SUPPORT) endif( TIFF_FOUND ) endif() From a3103d63c6045e400888a0e6a7a74a856c1e2c67 Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Sat, 6 Jan 2024 11:30:17 +1100 Subject: [PATCH 009/348] This is to fix MingW building. I also corrected some settings. MingW tests are not working yet. --- CMakeLists.txt | 38 ++++++++------- ojph_libname.cmake | 2 +- src/apps/CMakeLists.txt | 4 +- src/core/CMakeLists.txt | 10 ++-- src/core/common/ojph_file.h | 87 +++++++++++----------------------- src/core/common/ojph_version.h | 2 +- 6 files changed, 58 insertions(+), 85 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 24a8e8e5..54e63e3c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,7 +1,10 @@ cmake_minimum_required(VERSION 3.11.0) +## Library name/version +include(ojph_libname.cmake) + ## project -project (openjph DESCRIPTION "Open source implementation of JPH" LANGUAGES CXX) +project (openjph VERSION ${OPENJPH_VERSION} DESCRIPTION "Open source implementation of JPH" LANGUAGES CXX) set_property(GLOBAL PROPERTY USE_FOLDERS ON) ################################################################################################ @@ -43,13 +46,10 @@ if (NOT CMAKE_BUILD_TYPE) endif() message(STATUS "Building ${CMAKE_BUILD_TYPE}") -## Library name -include(ojph_libname.cmake) - ## C++ version and flags set(CMAKE_CXX_STANDARD 14) if (MSVC) - add_definitions(-D_CRT_SECURE_NO_WARNINGS) + add_compile_options(-D_CRT_SECURE_NO_WARNINGS) endif() if (CMAKE_CXX_COMPILER_ID MATCHES "Clang|GNU") add_compile_options( @@ -63,14 +63,9 @@ endif() ## The option OJPH_DISABLE_INTEL_SIMD and OJPH_ENABLE_INTEL_AVX512 if (OJPH_DISABLE_INTEL_SIMD) - add_definitions(-DOJPH_DISABLE_INTEL_SIMD) + add_compile_options(-DOJPH_DISABLE_INTEL_SIMD) elseif (OJPH_ENABLE_INTEL_AVX512) - add_definitions(-DOJPH_ENABLE_INTEL_AVX512) -endif() - -## The option BUILD_SHARED_LIBS -if (BUILD_SHARED_LIBS AND MSVC) - add_definitions(-DOJPH_BUILD_SHARED_LIBRARY) + add_compile_options(-DOJPH_ENABLE_INTEL_AVX512) endif() ## specify output directories @@ -89,18 +84,29 @@ endif() # Install ################################################################################################ +set(PKG_CONFIG_INCLUDEDIR "\${prefix}/include") +set(PKG_CONFIG_LIBDIR "\${prefix}/lib") +set(PKG_CONFIG_LIBS "-lopenjph") + include(GNUInstallDirs) -install(TARGETS openjph LIBRARY - DESTINATION ${CMAKE_INSTALL_LIBDIR}) +install(TARGETS openjph + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}) -install (DIRECTORY src/core/common/ +install(DIRECTORY src/core/common/ DESTINATION include/openjph FILES_MATCHING PATTERN "*.h") -install(FILES "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/${PROJECT_NAME}.pc" +install(FILES "${CMAKE_BINARY_DIR}/${PROJECT_NAME}.pc" DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig) +configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/src/pkg-config.pc.cmake" + "${CMAKE_BINARY_DIR}/${PROJECT_NAME}.pc" +) + ################################################################################################ # Testing (OJPH_BUILD_TESTS) ################################################################################################ diff --git a/ojph_libname.cmake b/ojph_libname.cmake index 942a62e9..8c9d6ecd 100644 --- a/ojph_libname.cmake +++ b/ojph_libname.cmake @@ -15,7 +15,7 @@ string(REGEX MATCH "OPENJPH_VERSION_MAJOR ([0-9]*)" _ ${VERFILE}) set(OPENJPH_VERSION_MAJOR ${CMAKE_MATCH_1}) string(REGEX MATCH "OPENJPH_VERSION_MINOR ([0-9]*)" _ ${VERFILE}) set(OPENJPH_VERSION_MINOR ${CMAKE_MATCH_1}) -string(REGEX MATCH "OPENJPH_VERSION_PATCH ([a-z0-9]*)" _ ${VERFILE}) +string(REGEX MATCH "OPENJPH_VERSION_PATCH ([0-9]*)" _ ${VERFILE}) set(OPENJPH_VERSION_PATCH ${CMAKE_MATCH_1}) set(OPENJPH_VERSION "${OPENJPH_VERSION_MAJOR}.${OPENJPH_VERSION_MINOR}.${OPENJPH_VERSION_PATCH}") diff --git a/src/apps/CMakeLists.txt b/src/apps/CMakeLists.txt index 9795bb7a..d0779aa3 100644 --- a/src/apps/CMakeLists.txt +++ b/src/apps/CMakeLists.txt @@ -2,7 +2,7 @@ ############################################################ if( OJPH_ENABLE_TIFF_SUPPORT ) - if( WIN32 ) + if( MSVC ) set(TIFF_INCLUDE_DIR "C:\\Program Files\\tiff\\include" CACHE PATH "the directory containing the TIFF headers") set(TIFF_LIBRARY_DEBUG "C:\\Program Files\\tiff\\lib\\tiffd.lib" CACHE FILEPATH "the path to the TIFF library for debug configurations") @@ -17,7 +17,7 @@ if( OJPH_ENABLE_TIFF_SUPPORT ) message( STATUS " TIFFXX_LIBRARY_DEBUG : \"${TIFFXX_LIBRARY_DEBUG}\" " ) message( STATUS " TIFFXX_LIBRARY_RELEASE : \"${TIFFXX_LIBRARY_RELEASE}\" " ) - endif( WIN32 ) + endif( MSVC ) FIND_PACKAGE( TIFF ) diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index f8b78ee0..e43b6977 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -38,11 +38,6 @@ source_group("common" FILES ${COMMON}) source_group("others" FILES ${OTHERS}) source_group("transform" FILES ${TRANSFORM}) -configure_file( - "../pkg-config.pc.cmake" - "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/${PROJECT_NAME}.pc" -) - if(EMSCRIPTEN) add_compile_options(-std=c++11 -O3 -fexceptions -DOJPH_DISABLE_INTEL_SIMD) add_library(openjph ${SOURCES}) @@ -66,6 +61,11 @@ else() add_library(openjph ${SOURCES}) endif() +## The option BUILD_SHARED_LIBS +if (BUILD_SHARED_LIBS AND WIN32) + target_compile_definitions(openjph PRIVATE OJPH_BUILD_SHARED_LIBRARY) +endif() + ## include library version/name target_include_directories(openjph PUBLIC common) target_compile_definitions(openjph PUBLIC _FILE_OFFSET_BITS=64) diff --git a/src/core/common/ojph_file.h b/src/core/common/ojph_file.h index 7faa6b0f..7d9efce6 100644 --- a/src/core/common/ojph_file.h +++ b/src/core/common/ojph_file.h @@ -71,7 +71,7 @@ namespace ojph { //////////////////////////////////////////////////////////////////////////// - class outfile_base + class OJPH_EXPORT outfile_base { public: @@ -84,24 +84,17 @@ namespace ojph { }; //////////////////////////////////////////////////////////////////////////// - class j2c_outfile : public outfile_base + class OJPH_EXPORT j2c_outfile : public outfile_base { public: - OJPH_EXPORT j2c_outfile() { fh = 0; } - OJPH_EXPORT - ~j2c_outfile() { if (fh) fclose(fh); } + ~j2c_outfile() override { if (fh) fclose(fh); } - OJPH_EXPORT void open(const char *filename); - OJPH_EXPORT - virtual size_t write(const void *ptr, size_t size); - OJPH_EXPORT - virtual si64 tell(); - OJPH_EXPORT - virtual void flush(); - OJPH_EXPORT - virtual void close(); + size_t write(const void *ptr, size_t size) override; + si64 tell() override; + void flush() override; + void close() override; private: FILE *fh; @@ -120,15 +113,13 @@ namespace ojph { * * memory data can be accessed using get_data() */ - class mem_outfile : public outfile_base + class OJPH_EXPORT mem_outfile : public outfile_base { public: /** A constructor */ - OJPH_EXPORT mem_outfile(); /** A destructor */ - OJPH_EXPORT - ~mem_outfile(); + ~mem_outfile() override; /** Call this function to open a memory file. * @@ -138,7 +129,6 @@ namespace ojph { * @param initial_size is the initial memory buffer size. * The default value is 2^16. */ - OJPH_EXPORT void open(size_t initial_size = 65536); /** Call this function to write data to the memory file. @@ -149,23 +139,20 @@ namespace ojph { * @param ptr is the address of the new data. * @param size the number of bytes in the new data. */ - OJPH_EXPORT - virtual size_t write(const void *ptr, size_t size); + size_t write(const void *ptr, size_t size) override; /** Call this function to know the file size (i.e., number of bytes used * to store the file). * * @return the file size. */ - OJPH_EXPORT - virtual si64 tell() { return cur_ptr - buf; } + si64 tell() override { return cur_ptr - buf; } /** Call this function to close the file and deallocate memory * * The object can be used again after calling close */ - OJPH_EXPORT - virtual void close(); + void close() override; /** Call this function to access memory file data. * @@ -174,7 +161,6 @@ namespace ojph { * * @return a constant pointer to the data. */ - OJPH_EXPORT const ui8* get_data() { return buf; } /** Call this function to access memory file data (for const objects) @@ -184,7 +170,6 @@ namespace ojph { * * @return a constant pointer to the data. */ - OJPH_EXPORT const ui8* get_data() const { return buf; } private: @@ -195,7 +180,7 @@ namespace ojph { }; //////////////////////////////////////////////////////////////////////////// - class infile_base + class OJPH_EXPORT infile_base { public: enum seek : int { @@ -216,64 +201,46 @@ namespace ojph { }; //////////////////////////////////////////////////////////////////////////// - class j2c_infile : public infile_base + class OJPH_EXPORT j2c_infile : public infile_base { public: - OJPH_EXPORT j2c_infile() { fh = 0; } - OJPH_EXPORT - ~j2c_infile() { if (fh) fclose(fh); } + ~j2c_infile() override { if (fh) fclose(fh); } - OJPH_EXPORT void open(const char *filename); //read reads size bytes, returns the number of bytes read - OJPH_EXPORT - virtual size_t read(void *ptr, size_t size); + size_t read(void *ptr, size_t size) override; //seek returns 0 on success - OJPH_EXPORT - virtual int seek(si64 offset, enum infile_base::seek origin); - OJPH_EXPORT - virtual si64 tell(); - OJPH_EXPORT - virtual bool eof() { return feof(fh) != 0; } - OJPH_EXPORT - virtual void close(); + int seek(si64 offset, enum infile_base::seek origin) override; + si64 tell() override; + bool eof() override { return feof(fh) != 0; } + void close() override; private: FILE *fh; - }; //////////////////////////////////////////////////////////////////////////// - class mem_infile : public infile_base + class OJPH_EXPORT mem_infile : public infile_base { public: - OJPH_EXPORT mem_infile() { close(); } - OJPH_EXPORT - ~mem_infile() { } + ~mem_infile() override { } - OJPH_EXPORT void open(const ui8* data, size_t size); //read reads size bytes, returns the number of bytes read - OJPH_EXPORT - virtual size_t read(void *ptr, size_t size); + size_t read(void *ptr, size_t size) override; //seek returns 0 on success - OJPH_EXPORT - virtual int seek(si64 offset, enum infile_base::seek origin); - OJPH_EXPORT - virtual si64 tell() { return cur_ptr - data; } - OJPH_EXPORT - virtual bool eof() { return cur_ptr >= data + size; } - OJPH_EXPORT - virtual void close() { data = cur_ptr = NULL; size = 0; } + int seek(si64 offset, enum infile_base::seek origin) override; + si64 tell() override { return cur_ptr - data; } + bool eof() override { return cur_ptr >= data + size; } + void close() override { data = cur_ptr = NULL; size = 0; } private: const ui8 *data, *cur_ptr; size_t size; - }; diff --git a/src/core/common/ojph_version.h b/src/core/common/ojph_version.h index 09812863..1d537a94 100644 --- a/src/core/common/ojph_version.h +++ b/src/core/common/ojph_version.h @@ -35,4 +35,4 @@ #define OPENJPH_VERSION_MAJOR 0 #define OPENJPH_VERSION_MINOR 10 -#define OPENJPH_VERSION_PATCH beta0 +#define OPENJPH_VERSION_PATCH 0 From a730206367e5a8da1c67b4c8189eae819ae2cdeb Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Sat, 6 Jan 2024 11:49:43 +1100 Subject: [PATCH 010/348] This addresses tests on MSys compilation. --- tests/CMakeLists.txt | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index fb2db788..08fd2f90 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -42,7 +42,7 @@ target_link_libraries( include(GoogleTest) gtest_add_tests(TARGET test_executables) -if (WIN32) +if (MSVC) if(CMAKE_BUILD_TYPE MATCHES "Debug") add_custom_command(TARGET test_executables POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy "../bin/Debug/gtest.dll" "./Debug/" @@ -54,4 +54,9 @@ if (WIN32) COMMAND ${CMAKE_COMMAND} -E copy "../bin/Release/gtest_main.dll" "./Release/" ) endif() +elseif(MSYS) + add_custom_command(TARGET test_executables POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy "../bin/msys-gtest.dll" "./" + COMMAND ${CMAKE_COMMAND} -E copy "../bin/msys-gtest_main.dll" "./" + ) endif() From ea6fb2e5ef271e08e434f65d533f41b32be4c7df Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Sat, 6 Jan 2024 12:10:37 +1100 Subject: [PATCH 011/348] Test if this works --- .github/workflows/ccp-workflow.yml | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ccp-workflow.yml b/.github/workflows/ccp-workflow.yml index 8eedc0ab..d691a69b 100644 --- a/.github/workflows/ccp-workflow.yml +++ b/.github/workflows/ccp-workflow.yml @@ -5,11 +5,19 @@ on: push jobs: build: - name: main build for Unix-like - runs-on: ${{ matrix.os }} strategy: matrix: - os: [macos-latest, ubuntu-20.04, ubuntu-latest] + include: [ + { system: MINGW64, runner: windows-2022 }, + { system: MINGW32, runner: windows-2022 }, + { system: UCRT64, runner: windows-2022 }, + { system: CLANG64, runner: windows-2022 }, + { system: CLANG32, runner: windows-2022 }, + { system: MACOS, runner: macos-latest }, + { system: UBUNTU, runner: ubuntu-latest }, + ] + name: ${{ matrix.system }} + runs-on: ${{ matrix.runner }} steps: - uses: actions/checkout@v3 - name: cmake From b20bcb1e00e8096b95ee44c745f5fee42e0ae14b Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Sat, 6 Jan 2024 12:19:29 +1100 Subject: [PATCH 012/348] The previous test did not work. This is to correct it. --- .github/workflows/ccp-workflow.yml | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/.github/workflows/ccp-workflow.yml b/.github/workflows/ccp-workflow.yml index d691a69b..9b388901 100644 --- a/.github/workflows/ccp-workflow.yml +++ b/.github/workflows/ccp-workflow.yml @@ -8,15 +8,11 @@ jobs: strategy: matrix: include: [ - { system: MINGW64, runner: windows-2022 }, - { system: MINGW32, runner: windows-2022 }, - { system: UCRT64, runner: windows-2022 }, - { system: CLANG64, runner: windows-2022 }, - { system: CLANG32, runner: windows-2022 }, - { system: MACOS, runner: macos-latest }, - { system: UBUNTU, runner: ubuntu-latest }, + { system: MacOS, runner: macos-latest }, + { system: Ubuntu-20, runner: ubuntu-20.04 }, + { system: Ubuntu-latest, runner: ubuntu-latest }, ] - name: ${{ matrix.system }} + name: ${{ matrix.system }} Build runs-on: ${{ matrix.runner }} steps: - uses: actions/checkout@v3 @@ -28,11 +24,14 @@ jobs: working-directory: build test: - name: tests on Linux and MacOS - runs-on: ${{ matrix.os }} strategy: matrix: - os: [macos-latest, ubuntu-latest] + include: [ + { system: MacOS, runner: macos-latest }, + { system: Ubuntu-latest, runner: ubuntu-latest }, + ] + name: ${{ matrix.system }} Test + runs-on: ${{ matrix.runner }} steps: - uses: actions/checkout@v3 - name: cmake @@ -46,11 +45,13 @@ jobs: working-directory: build test_windows: - name: tests on Windows - runs-on: ${{ matrix.os }} strategy: matrix: - os: [windows-latest] + include: [ + { system: Windows, runner: windows-latest }, + ] + name: ${{ matrix.system }} Test + runs-on: ${{ matrix.runner }} steps: - uses: actions/checkout@v3 - name: cmake From fbaedb302c1c0d0539a86cdb08d2e28c1e860033 Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Sat, 6 Jan 2024 13:22:54 +1100 Subject: [PATCH 013/348] Improvement to CMakeLists.txt files. Now, in windows, tiff library files are copied to the bin folder. --- src/apps/CMakeLists.txt | 101 +++++++++++++++----------- src/apps/ojph_compress/CMakeLists.txt | 90 +++++++++++------------ src/apps/ojph_expand/CMakeLists.txt | 89 +++++++++++------------ 3 files changed, 145 insertions(+), 135 deletions(-) diff --git a/src/apps/CMakeLists.txt b/src/apps/CMakeLists.txt index d0779aa3..1af20d7e 100644 --- a/src/apps/CMakeLists.txt +++ b/src/apps/CMakeLists.txt @@ -1,43 +1,58 @@ -# Add tiff library -############################################################ -if( OJPH_ENABLE_TIFF_SUPPORT ) - - if( MSVC ) - - set(TIFF_INCLUDE_DIR "C:\\Program Files\\tiff\\include" CACHE PATH "the directory containing the TIFF headers") - set(TIFF_LIBRARY_DEBUG "C:\\Program Files\\tiff\\lib\\tiffd.lib" CACHE FILEPATH "the path to the TIFF library for debug configurations") - set(TIFF_LIBRARY_RELEASE "C:\\Program Files\\tiff\\lib\\tiff.lib" CACHE FILEPATH "the path to the TIFF library for release configurations") - set(TIFFXX_LIBRARY_DEBUG "C:\\Program Files\\tiff\\lib\\tiffxxd.lib" CACHE FILEPATH "the path to the TIFFXX library for debug configurations") - set(TIFFXX_LIBRARY_RELEASE "C:\\Program Files\\tiff\\lib\\tiffxx.lib" CACHE FILEPATH "the path to the TIFFXX library for release configurations") - - message( STATUS "WIN32 detected: Setting CMakeCache TIFF values as follows, use CMake-gui Advanced to modify them" ) - message( STATUS " TIFF_INCLUDE_DIR : \"${TIFF_INCLUDE_DIR}\" " ) - message( STATUS " TIFF_LIBRARY_DEBUG : \"${TIFF_LIBRARY_DEBUG}\" " ) - message( STATUS " TIFF_LIBRARY_RELEASE : \"${TIFF_LIBRARY_RELEASE}\" " ) - message( STATUS " TIFFXX_LIBRARY_DEBUG : \"${TIFFXX_LIBRARY_DEBUG}\" " ) - message( STATUS " TIFFXX_LIBRARY_RELEASE : \"${TIFFXX_LIBRARY_RELEASE}\" " ) - - endif( MSVC ) - - FIND_PACKAGE( TIFF ) - - if( TIFF_FOUND ) - set(USE_TIFF TRUE CACHE BOOL "Add TIFF support") - include_directories( ${TIFF_INCLUDE_DIR} ) - add_definitions(-DOJPH_ENABLE_TIFF_SUPPORT) - endif( TIFF_FOUND ) - -endif() -############################################################ - -## Change folders -set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_DEBUG ${CMAKE_BINARY_DIR}/../bin) -set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_DEBUG ${CMAKE_BINARY_DIR}/../bin) -set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY_DEBUG ${CMAKE_BINARY_DIR}/../lib) -set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_RELEASE ${CMAKE_BINARY_DIR}/../bin) -set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE ${CMAKE_BINARY_DIR}/../bin) -set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY_RELEASE ${CMAKE_BINARY_DIR}/../lib) - -## Build executables -add_subdirectory(ojph_expand) -add_subdirectory(ojph_compress) \ No newline at end of file + +if(OJPH_BUILD_EXECUTABLES) + + # Add tiff library + ############################################################ + if( OJPH_ENABLE_TIFF_SUPPORT ) + + if( MSVC ) + + set(TIFF_PATH "C:\\Program Files\\tiff") + set(TIFF_INCLUDE_DIR "${TIFF_PATH}\\include" CACHE PATH "the directory containing the TIFF headers") + set(TIFF_LIBRARY_DEBUG "${TIFF_PATH}\\lib\\tiffd.lib" CACHE FILEPATH "the path to the TIFF library for debug configurations") + set(TIFF_LIBRARY_RELEASE "${TIFF_PATH}\\lib\\tiff.lib" CACHE FILEPATH "the path to the TIFF library for release configurations") + set(TIFFXX_LIBRARY_DEBUG "${TIFF_PATH}\\lib\\tiffxxd.lib" CACHE FILEPATH "the path to the TIFFXX library for debug configurations") + set(TIFFXX_LIBRARY_RELEASE "${TIFF_PATH}\\lib\\tiffxx.lib" CACHE FILEPATH "the path to the TIFFXX library for release configurations") + + message( STATUS "WIN32 detected: Setting CMakeCache TIFF values as follows, use CMake-gui Advanced to modify them" ) + message( STATUS " TIFF_INCLUDE_DIR : \"${TIFF_INCLUDE_DIR}\" " ) + message( STATUS " TIFF_LIBRARY_DEBUG : \"${TIFF_LIBRARY_DEBUG}\" " ) + message( STATUS " TIFF_LIBRARY_RELEASE : \"${TIFF_LIBRARY_RELEASE}\" " ) + message( STATUS " TIFFXX_LIBRARY_DEBUG : \"${TIFFXX_LIBRARY_DEBUG}\" " ) + message( STATUS " TIFFXX_LIBRARY_RELEASE : \"${TIFFXX_LIBRARY_RELEASE}\" " ) + + endif( MSVC ) + + FIND_PACKAGE( TIFF ) + + if( TIFF_FOUND ) + set(USE_TIFF TRUE CACHE BOOL "Add TIFF support") + include_directories( ${TIFF_INCLUDE_DIR} ) + add_definitions(-DOJPH_ENABLE_TIFF_SUPPORT) + endif( TIFF_FOUND ) + + endif() + ############################################################ + + ## Change folders + set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_DEBUG ${CMAKE_BINARY_DIR}/../bin) + set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_DEBUG ${CMAKE_BINARY_DIR}/../bin) + set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY_DEBUG ${CMAKE_BINARY_DIR}/../lib) + set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_RELEASE ${CMAKE_BINARY_DIR}/../bin) + set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE ${CMAKE_BINARY_DIR}/../bin) + set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY_RELEASE ${CMAKE_BINARY_DIR}/../lib) + + ## Build executables + add_subdirectory(ojph_expand) + add_subdirectory(ojph_compress) + + if (MSVC AND OJPH_ENABLE_TIFF_SUPPORT) + if (CMAKE_BUILD_TYPE MATCHES "Release") + file(COPY "${TIFF_PATH}\\bin\\tiff.dll" DESTINATION "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}") + file(COPY "${TIFF_PATH}\\bin\\tiffxx.dll" DESTINATION "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}") + elseif(CMAKE_BUILD_TYPE MATCHES "Debug") + file(COPY "${TIFF_PATH}\\bin\\tiffd.dll" DESTINATION "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}") + file(COPY "${TIFF_PATH}\\bin\\tiffxxd.dll" DESTINATION "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}") + endif() + endif() +endif() diff --git a/src/apps/ojph_compress/CMakeLists.txt b/src/apps/ojph_compress/CMakeLists.txt index 29b4201e..3dd3c59a 100644 --- a/src/apps/ojph_compress/CMakeLists.txt +++ b/src/apps/ojph_compress/CMakeLists.txt @@ -1,55 +1,53 @@ ## building ojph_compress ######################### -if(OJPH_BUILD_EXECUTABLES) - - include_directories(../common) - include_directories(../../core/common) - - file(GLOB OJPH_COMPRESS "ojph_compress.cpp") - file(GLOB OJPH_IMG_IO "../others/ojph_img_io.cpp") - file(GLOB OJPH_IMG_IO_SSE4 "../others/ojph_img_io_sse41.cpp") - file(GLOB OJPH_IMG_IO_AVX2 "../others/ojph_img_io_avx2.cpp") - file(GLOB OJPH_IMG_IO_H "../common/ojph_img_io.h") - - list(APPEND SOURCES ${OJPH_COMPRESS} ${OJPH_IMG_IO} ${OJPH_IMG_IO_H}) - - source_group("main" FILES ${OJPH_COMPRESS}) - source_group("others" FILES ${OJPH_IMG_IO}) - source_group("common" FILES ${OJPH_IMG_IO_H}) - - if(EMSCRIPTEN) - add_compile_options(-std=c++11 -O3 -fexceptions -DOJPH_DISABLE_INTEL_SIMD) - add_executable(ojph_compress ${SOURCES}) - add_executable(ojph_compress_simd ${SOURCES} ${OJPH_IMG_IO_SSE4}) - target_compile_options(ojph_compress_simd PRIVATE -DOJPH_ENABLE_WASM_SIMD -msimd128 -msse4.1) - source_group("others" FILES ${OJPH_IMG_IO_SSE4}) - - target_link_libraries(ojph_compress PRIVATE openjph) - install(TARGETS ojph_compress DESTINATION bin) - target_link_libraries(ojph_compress_simd PRIVATE openjphsimd) - install(TARGETS ojph_compress_simd DESTINATION bin) - else() - if(NOT OJPH_DISABLE_INTEL_SIMD) - add_executable(ojph_compress ${SOURCES} ${OJPH_IMG_IO_SSE4} ${OJPH_IMG_IO_AVX2}) - - if (MSVC) - set_source_files_properties(${OJPH_IMG_IO_AVX2} PROPERTIES COMPILE_FLAGS "/arch:AVX2") - else() - set_source_files_properties(${OJPH_IMG_IO_SSE4} PROPERTIES COMPILE_FLAGS -msse4.1) - set_source_files_properties(${OJPH_IMG_IO_AVX2} PROPERTIES COMPILE_FLAGS -mavx2) - endif() +include_directories(../common) +include_directories(../../core/common) + +file(GLOB OJPH_COMPRESS "ojph_compress.cpp") +file(GLOB OJPH_IMG_IO "../others/ojph_img_io.cpp") +file(GLOB OJPH_IMG_IO_SSE4 "../others/ojph_img_io_sse41.cpp") +file(GLOB OJPH_IMG_IO_AVX2 "../others/ojph_img_io_avx2.cpp") +file(GLOB OJPH_IMG_IO_H "../common/ojph_img_io.h") + +list(APPEND SOURCES ${OJPH_COMPRESS} ${OJPH_IMG_IO} ${OJPH_IMG_IO_H}) + +source_group("main" FILES ${OJPH_COMPRESS}) +source_group("others" FILES ${OJPH_IMG_IO}) +source_group("common" FILES ${OJPH_IMG_IO_H}) + +if(EMSCRIPTEN) + add_compile_options(-std=c++11 -O3 -fexceptions -DOJPH_DISABLE_INTEL_SIMD) + add_executable(ojph_compress ${SOURCES}) + add_executable(ojph_compress_simd ${SOURCES} ${OJPH_IMG_IO_SSE4}) + target_compile_options(ojph_compress_simd PRIVATE -DOJPH_ENABLE_WASM_SIMD -msimd128 -msse4.1) + source_group("others" FILES ${OJPH_IMG_IO_SSE4}) + + target_link_libraries(ojph_compress PRIVATE openjph) + install(TARGETS ojph_compress DESTINATION bin) + target_link_libraries(ojph_compress_simd PRIVATE openjphsimd) + install(TARGETS ojph_compress_simd DESTINATION bin) +else() + if(NOT OJPH_DISABLE_INTEL_SIMD) + add_executable(ojph_compress ${SOURCES} ${OJPH_IMG_IO_SSE4} ${OJPH_IMG_IO_AVX2}) + + if (MSVC) + set_source_files_properties(${OJPH_IMG_IO_AVX2} PROPERTIES COMPILE_FLAGS "/arch:AVX2") else() - add_executable(ojph_compress ${SOURCES}) - endif() - - if( USE_TIFF ) - target_link_libraries(ojph_compress PUBLIC openjph ${TIFF_LIBRARIES}) - else() - target_link_libraries(ojph_compress PUBLIC openjph) + set_source_files_properties(${OJPH_IMG_IO_SSE4} PROPERTIES COMPILE_FLAGS -msse4.1) + set_source_files_properties(${OJPH_IMG_IO_AVX2} PROPERTIES COMPILE_FLAGS -mavx2) endif() + else() + add_executable(ojph_compress ${SOURCES}) + endif() - install(TARGETS ojph_compress DESTINATION bin) + if( USE_TIFF ) + target_link_libraries(ojph_compress PUBLIC openjph ${TIFF_LIBRARIES}) + else() + target_link_libraries(ojph_compress PUBLIC openjph) endif() + install(TARGETS ojph_compress DESTINATION bin) + endif() + diff --git a/src/apps/ojph_expand/CMakeLists.txt b/src/apps/ojph_expand/CMakeLists.txt index d93e568d..f7b1ed33 100644 --- a/src/apps/ojph_expand/CMakeLists.txt +++ b/src/apps/ojph_expand/CMakeLists.txt @@ -1,55 +1,52 @@ ## building ojph_expand ####################### -if(OJPH_BUILD_EXECUTABLES) - - include_directories(../common) - include_directories(../../core/common) - - file(GLOB OJPH_EXPAND "ojph_expand.cpp") - file(GLOB OJPH_IMG_IO "../others/ojph_img_io.cpp") - file(GLOB OJPH_IMG_IO_SSE4 "../others/ojph_img_io_sse41.cpp") - file(GLOB OJPH_IMG_IO_AVX2 "../others/ojph_img_io_avx2.cpp") - file(GLOB OJPH_IMG_IO_H "../common/ojph_img_io.h") - - list(APPEND SOURCES ${OJPH_EXPAND} ${OJPH_IMG_IO} ${OJPH_IMG_IO_H}) - - source_group("main" FILES ${OJPH_EXPAND}) - source_group("others" FILES ${OJPH_IMG_IO}) - source_group("common" FILES ${OJPH_IMG_IO_H}) - - if(EMSCRIPTEN) - add_compile_options(-std=c++11 -O3 -fexceptions -DOJPH_DISABLE_INTEL_SIMD) - add_executable(ojph_expand ${SOURCES}) - add_executable(ojph_expand_simd ${SOURCES} ${OJPH_IMG_IO_SSE4}) - target_compile_options(ojph_expand_simd PRIVATE -DOJPH_ENABLE_WASM_SIMD -msimd128 -msse4.1) - source_group("others" FILES ${OJPH_IMG_IO_SSE4}) - - target_link_libraries(ojph_expand PRIVATE openjph) - install(TARGETS ojph_expand DESTINATION bin) - target_link_libraries(ojph_expand_simd PRIVATE openjphsimd) - install(TARGETS ojph_expand_simd DESTINATION bin) - else() - if(NOT OJPH_DISABLE_INTEL_SIMD) - add_executable(ojph_expand ${SOURCES} ${OJPH_IMG_IO_SSE4} ${OJPH_IMG_IO_AVX2}) - - if (MSVC) - set_source_files_properties(${OJPH_IMG_IO_AVX2} PROPERTIES COMPILE_FLAGS "/arch:AVX2") - else() - set_source_files_properties(${OJPH_IMG_IO_SSE4} PROPERTIES COMPILE_FLAGS -msse4.1) - set_source_files_properties(${OJPH_IMG_IO_AVX2} PROPERTIES COMPILE_FLAGS -mavx2) - endif() - else() - add_executable(ojph_expand ${SOURCES}) - endif() - - if( USE_TIFF ) - target_link_libraries(ojph_expand PUBLIC openjph ${TIFF_LIBRARIES}) +include_directories(../common) +include_directories(../../core/common) + +file(GLOB OJPH_EXPAND "ojph_expand.cpp") +file(GLOB OJPH_IMG_IO "../others/ojph_img_io.cpp") +file(GLOB OJPH_IMG_IO_SSE4 "../others/ojph_img_io_sse41.cpp") +file(GLOB OJPH_IMG_IO_AVX2 "../others/ojph_img_io_avx2.cpp") +file(GLOB OJPH_IMG_IO_H "../common/ojph_img_io.h") + +list(APPEND SOURCES ${OJPH_EXPAND} ${OJPH_IMG_IO} ${OJPH_IMG_IO_H}) + +source_group("main" FILES ${OJPH_EXPAND}) +source_group("others" FILES ${OJPH_IMG_IO}) +source_group("common" FILES ${OJPH_IMG_IO_H}) + +if(EMSCRIPTEN) + add_compile_options(-std=c++11 -O3 -fexceptions -DOJPH_DISABLE_INTEL_SIMD) + add_executable(ojph_expand ${SOURCES}) + add_executable(ojph_expand_simd ${SOURCES} ${OJPH_IMG_IO_SSE4}) + target_compile_options(ojph_expand_simd PRIVATE -DOJPH_ENABLE_WASM_SIMD -msimd128 -msse4.1) + source_group("others" FILES ${OJPH_IMG_IO_SSE4}) + + target_link_libraries(ojph_expand PRIVATE openjph) + install(TARGETS ojph_expand DESTINATION bin) + target_link_libraries(ojph_expand_simd PRIVATE openjphsimd) + install(TARGETS ojph_expand_simd DESTINATION bin) +else() + if(NOT OJPH_DISABLE_INTEL_SIMD) + add_executable(ojph_expand ${SOURCES} ${OJPH_IMG_IO_SSE4} ${OJPH_IMG_IO_AVX2}) + + if (MSVC) + set_source_files_properties(${OJPH_IMG_IO_AVX2} PROPERTIES COMPILE_FLAGS "/arch:AVX2") else() - target_link_libraries(ojph_expand PUBLIC openjph) + set_source_files_properties(${OJPH_IMG_IO_SSE4} PROPERTIES COMPILE_FLAGS -msse4.1) + set_source_files_properties(${OJPH_IMG_IO_AVX2} PROPERTIES COMPILE_FLAGS -mavx2) endif() + else() + add_executable(ojph_expand ${SOURCES}) + endif() - install(TARGETS ojph_expand DESTINATION bin) + if( USE_TIFF ) + target_link_libraries(ojph_expand PUBLIC openjph ${TIFF_LIBRARIES}) + else() + target_link_libraries(ojph_expand PUBLIC openjph) endif() + install(TARGETS ojph_expand DESTINATION bin) + endif() From 9ebe7bcfe1290591c42aa3ae1bc5ee7ced534e64 Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Sat, 6 Jan 2024 19:53:50 +1100 Subject: [PATCH 014/348] Fix CodeQL script. --- .github/workflows/codeql.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index fe9d084c..c2d527ad 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -38,7 +38,7 @@ jobs: strategy: fail-fast: false matrix: - language: [ 'cpp', 'python' ] + language: [ 'cpp' ] # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby', 'swift' ] # Use only 'java' to analyze code written in Java, Kotlin or both # Use only 'javascript' to analyze code written in JavaScript, TypeScript or both From 99cdae102ea79745b077ce83a900a738f8921a8d Mon Sep 17 00:00:00 2001 From: Thomas Klausner Date: Sat, 6 Jan 2024 20:12:46 +0100 Subject: [PATCH 015/348] Add library path to pkg-config file. --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 54e63e3c..b4715087 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -86,7 +86,7 @@ endif() set(PKG_CONFIG_INCLUDEDIR "\${prefix}/include") set(PKG_CONFIG_LIBDIR "\${prefix}/lib") -set(PKG_CONFIG_LIBS "-lopenjph") +set(PKG_CONFIG_LIBS "-L\${prefix}/lib -lopenjph") include(GNUInstallDirs) install(TARGETS openjph From e7cf53e6893a92da3ebe010fe98526540af5bfd5 Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Sun, 7 Jan 2024 08:52:52 +1100 Subject: [PATCH 016/348] Version Change --- src/core/common/ojph_version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/common/ojph_version.h b/src/core/common/ojph_version.h index 1d537a94..44d7453a 100644 --- a/src/core/common/ojph_version.h +++ b/src/core/common/ojph_version.h @@ -35,4 +35,4 @@ #define OPENJPH_VERSION_MAJOR 0 #define OPENJPH_VERSION_MINOR 10 -#define OPENJPH_VERSION_PATCH 0 +#define OPENJPH_VERSION_PATCH 1 From db8c8f02d93e13360085ad544d1d32d2626cf90e Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Mon, 8 Jan 2024 21:43:50 +1100 Subject: [PATCH 017/348] Improving PKG-Config configuration, and version bumping. --- CMakeLists.txt | 3 ++- src/core/common/ojph_version.h | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b4715087..78b0c94f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -86,7 +86,8 @@ endif() set(PKG_CONFIG_INCLUDEDIR "\${prefix}/include") set(PKG_CONFIG_LIBDIR "\${prefix}/lib") -set(PKG_CONFIG_LIBS "-L\${prefix}/lib -lopenjph") +set(PKG_CONFIG_LIBS "-L\${libdir} -lopenjph") +set(PKG_CONFIG_CFLAGS "-I\${includedir}/openjph") include(GNUInstallDirs) install(TARGETS openjph diff --git a/src/core/common/ojph_version.h b/src/core/common/ojph_version.h index 44d7453a..df3d3c23 100644 --- a/src/core/common/ojph_version.h +++ b/src/core/common/ojph_version.h @@ -35,4 +35,4 @@ #define OPENJPH_VERSION_MAJOR 0 #define OPENJPH_VERSION_MINOR 10 -#define OPENJPH_VERSION_PATCH 1 +#define OPENJPH_VERSION_PATCH 2 From 752d66450d3e426885bf96f73bfe62e3f08bebcb Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Mon, 8 Jan 2024 21:59:41 +1100 Subject: [PATCH 018/348] Further improved PKG-Config --- CMakeLists.txt | 2 +- src/pkg-config.pc.cmake | 18 +++++++++--------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 78b0c94f..94afd3fe 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -87,7 +87,7 @@ endif() set(PKG_CONFIG_INCLUDEDIR "\${prefix}/include") set(PKG_CONFIG_LIBDIR "\${prefix}/lib") set(PKG_CONFIG_LIBS "-L\${libdir} -lopenjph") -set(PKG_CONFIG_CFLAGS "-I\${includedir}/openjph") +set(PKG_CONFIG_CFLAGS "-I\${includedir}") include(GNUInstallDirs) install(TARGETS openjph diff --git a/src/pkg-config.pc.cmake b/src/pkg-config.pc.cmake index 99bec574..546b2675 100644 --- a/src/pkg-config.pc.cmake +++ b/src/pkg-config.pc.cmake @@ -1,9 +1,9 @@ -Name: ${PROJECT_NAME} -Description: ${PROJECT_DESCRIPTION} -Version: ${PROJECT_VERSION} -Requires: ${PKG_CONFIG_REQUIRES} -prefix=${CMAKE_INSTALL_PREFIX} -includedir=${PKG_CONFIG_INCLUDEDIR} -libdir=${PKG_CONFIG_LIBDIR} -Libs: ${PKG_CONFIG_LIBS} -Cflags: ${PKG_CONFIG_CFLAGS} +Name: @PROJECT_NAME@ +Description: @PROJECT_DESCRIPTION@ +Version: @PROJECT_VERSION@ +Requires: @PKG_CONFIG_REQUIRES@ +prefix=@CMAKE_INSTALL_PREFIX@ +includedir=@PKG_CONFIG_INCLUDEDIR@ +libdir=@PKG_CONFIG_LIBDIR@ +Libs: @PKG_CONFIG_LIBS@ +Cflags: @PKG_CONFIG_CFLAGS@ From 1cb338ceb2efc25a91983891a4c6fc2c1f82aa17 Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Tue, 9 Jan 2024 11:15:19 +1100 Subject: [PATCH 019/348] Improve CMakeLists.txt to better support PKG-Config, and a version bump. --- CMakeLists.txt | 13 +++++++------ src/apps/CMakeLists.txt | 16 +++++++++------- src/core/CMakeLists.txt | 17 ++++++++++------- src/core/common/ojph_version.h | 2 +- 4 files changed, 27 insertions(+), 21 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 94afd3fe..341502fd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -84,11 +84,6 @@ endif() # Install ################################################################################################ -set(PKG_CONFIG_INCLUDEDIR "\${prefix}/include") -set(PKG_CONFIG_LIBDIR "\${prefix}/lib") -set(PKG_CONFIG_LIBS "-L\${libdir} -lopenjph") -set(PKG_CONFIG_CFLAGS "-I\${includedir}") - include(GNUInstallDirs) install(TARGETS openjph ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} @@ -96,16 +91,22 @@ install(TARGETS openjph LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}) install(DIRECTORY src/core/common/ - DESTINATION include/openjph + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/openjph FILES_MATCHING PATTERN "*.h") install(FILES "${CMAKE_BINARY_DIR}/${PROJECT_NAME}.pc" DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig) +set(PKG_CONFIG_INCLUDEDIR "\${prefix}/${CMAKE_INSTALL_INCLUDEDIR}") +set(PKG_CONFIG_LIBDIR "\${prefix}/${CMAKE_INSTALL_LIBDIR}") +set(PKG_CONFIG_LIBS "-L\${libdir} -lopenjph") +set(PKG_CONFIG_CFLAGS "-I\${includedir}") + configure_file( "${CMAKE_CURRENT_SOURCE_DIR}/src/pkg-config.pc.cmake" "${CMAKE_BINARY_DIR}/${PROJECT_NAME}.pc" + @ONLY ) ################################################################################################ diff --git a/src/apps/CMakeLists.txt b/src/apps/CMakeLists.txt index 1af20d7e..a4a04df1 100644 --- a/src/apps/CMakeLists.txt +++ b/src/apps/CMakeLists.txt @@ -34,13 +34,15 @@ if(OJPH_BUILD_EXECUTABLES) endif() ############################################################ - ## Change folders - set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_DEBUG ${CMAKE_BINARY_DIR}/../bin) - set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_DEBUG ${CMAKE_BINARY_DIR}/../bin) - set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY_DEBUG ${CMAKE_BINARY_DIR}/../lib) - set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_RELEASE ${CMAKE_BINARY_DIR}/../bin) - set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE ${CMAKE_BINARY_DIR}/../bin) - set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY_RELEASE ${CMAKE_BINARY_DIR}/../lib) + ## change folders but only for Microsoft + if(MSVC) + set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_DEBUG ${CMAKE_BINARY_DIR}/../bin) + set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_DEBUG ${CMAKE_BINARY_DIR}/../bin) + set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY_DEBUG ${CMAKE_BINARY_DIR}/../lib) + set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_RELEASE ${CMAKE_BINARY_DIR}/../bin) + set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE ${CMAKE_BINARY_DIR}/../bin) + set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY_RELEASE ${CMAKE_BINARY_DIR}/../lib) + endif() ## Build executables add_subdirectory(ojph_expand) diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index e43b6977..1f41da8c 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -1,10 +1,13 @@ -set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_DEBUG ${CMAKE_BINARY_DIR}/../bin) -set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_DEBUG ${CMAKE_BINARY_DIR}/../bin) -set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY_DEBUG ${CMAKE_BINARY_DIR}/../lib) -set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_RELEASE ${CMAKE_BINARY_DIR}/../bin) -set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE ${CMAKE_BINARY_DIR}/../bin) -set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY_RELEASE ${CMAKE_BINARY_DIR}/../lib) +## change folders but only for Microsoft +if (MSVC) + set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_DEBUG ${CMAKE_BINARY_DIR}/../bin) + set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_DEBUG ${CMAKE_BINARY_DIR}/../bin) + set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY_DEBUG ${CMAKE_BINARY_DIR}/../lib) + set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_RELEASE ${CMAKE_BINARY_DIR}/../bin) + set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE ${CMAKE_BINARY_DIR}/../bin) + set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY_RELEASE ${CMAKE_BINARY_DIR}/../lib) +endif() include_directories(common) @@ -89,7 +92,7 @@ else() set_source_files_properties(transform/ojph_transform_avx2.cpp PROPERTIES COMPILE_FLAGS -mavx2) endif() -if (WIN32) +if (MSVC) set(OJPH_LIB_NAME_STRING "openjph.${OPENJPH_VERSION_MAJOR}.${OPENJPH_VERSION_MINOR}") set_target_properties(openjph PROPERTIES diff --git a/src/core/common/ojph_version.h b/src/core/common/ojph_version.h index df3d3c23..e7fcc900 100644 --- a/src/core/common/ojph_version.h +++ b/src/core/common/ojph_version.h @@ -35,4 +35,4 @@ #define OPENJPH_VERSION_MAJOR 0 #define OPENJPH_VERSION_MINOR 10 -#define OPENJPH_VERSION_PATCH 2 +#define OPENJPH_VERSION_PATCH 3 From c675935c792b0d38bf54e3908c475b14d77eb4ae Mon Sep 17 00:00:00 2001 From: Brad Hards Date: Wed, 10 Jan 2024 16:40:11 +1100 Subject: [PATCH 020/348] trivial typo fixes --- src/apps/ojph_compress/ojph_compress.cpp | 4 ++-- src/apps/ojph_expand/ojph_expand.cpp | 16 ++++++++-------- src/apps/others/ojph_img_io.cpp | 6 +++--- src/core/coding/ojph_block_common.cpp | 10 +++++----- src/core/coding/ojph_block_decoder.cpp | 4 ++-- src/core/coding/ojph_block_decoder_ssse3.cpp | 6 +++--- src/core/coding/ojph_block_decoder_wasm.cpp | 4 ++-- 7 files changed, 25 insertions(+), 25 deletions(-) diff --git a/src/apps/ojph_compress/ojph_compress.cpp b/src/apps/ojph_compress/ojph_compress.cpp index b800224c..42befaff 100644 --- a/src/apps/ojph_compress/ojph_compress.cpp +++ b/src/apps/ojph_compress/ojph_compress.cpp @@ -560,7 +560,7 @@ int main(int argc, char * argv[]) { " a codeblock. In unix-like environment, { and } must be\n" " proceeded by a ""\\""\n" " -precincts {x,y},{x,y},...,{x,y} where {x,y} is the precinct size\n" - " starting from the coarest resolution; the last precinct\n" + " starting from the coarsest resolution; the last precinct\n" " is repeated for all finer resolutions\n" " -tile_offset {x,y} tile offset. \n" " -tile_size {x,y} tile width and height. \n" @@ -900,7 +900,7 @@ int main(int argc, char * argv[]) { cod.set_progression_order(prog_order); if (employ_color_transform != -1) OJPH_ERROR(0x01000086, - "color transform is not meaningless since .raw files are single " + "color transform is meaningless since .raw files are single " "component files"); cod.set_reversible(reversible); if (!reversible && quantization_step != -1.0f) diff --git a/src/apps/ojph_expand/ojph_expand.cpp b/src/apps/ojph_expand/ojph_expand.cpp index dfee3cef..638c0c1d 100644 --- a/src/apps/ojph_expand/ojph_expand.cpp +++ b/src/apps/ojph_expand/ojph_expand.cpp @@ -63,14 +63,14 @@ struct ui32_list_interpreter : public ojph::cli_interpreter::arg_inter_base { if (num_eles) { - if (*next_char != ',') //separate sizes by a comma - throw "sizes in a sizes list must be separated by a comma"; + if (*next_char != ',') //separate res by a comma + throw "resolutions in a list must be separated by a comma"; next_char++; } char *endptr; si32list[num_eles] = (ojph::ui32)strtoul(next_char, &endptr, 10); if (endptr == next_char) - throw "size number is improperly formatted"; + throw "res number is improperly formatted"; next_char = endptr; ++num_eles; } @@ -81,7 +81,7 @@ struct ui32_list_interpreter : public ojph::cli_interpreter::arg_inter_base throw "list elements must separated by a "","""; } else if (*next_char) - throw "there are too many elements in the size list"; + throw "there are too many elements in the resolution list"; } const int max_num_eles; @@ -204,7 +204,7 @@ int main(int argc, char *argv[]) { try { if (output_filename == NULL) OJPH_ERROR(0x020000008, - "Please provide and output file using the -o option\n"); + "Please provide an output file using the -o option\n"); ojph::j2c_infile j2c_file; j2c_file.open(input_filename); @@ -233,7 +233,7 @@ int main(int argc, char *argv[]) { if (siz.get_num_components() != 1) OJPH_ERROR(0x020000001, "The file has more than one color component, but .pgm can " - "contain only on color component\n"); + "contain only one color component\n"); ppm.configure(siz.get_recon_width(0), siz.get_recon_height(0), siz.get_num_components(), siz.get_bit_depth(0)); ppm.open(output_filename); @@ -258,7 +258,7 @@ int main(int argc, char *argv[]) { if (!all_same) OJPH_ERROR(0x020000003, "To save an image to ppm, all the components must have the " - "downsampling ratio\n"); + "same downsampling ratio\n"); ppm.configure(siz.get_recon_width(0), siz.get_recon_height(0), siz.get_num_components(), siz.get_bit_depth(0)); ppm.open(output_filename); @@ -280,7 +280,7 @@ int main(int argc, char *argv[]) { if (!all_same) OJPH_ERROR(0x020000008, "To save an image to tif(f), all the components must have the " - "downsampling ratio\n"); + "same downsampling ratio\n"); ojph::ui32 bit_depths[4] = { 0, 0, 0, 0 }; for (ojph::ui32 c = 0; c < siz.get_num_components(); c++) { diff --git a/src/apps/others/ojph_img_io.cpp b/src/apps/others/ojph_img_io.cpp index 88fdcfc5..1c71bb6c 100644 --- a/src/apps/others/ojph_img_io.cpp +++ b/src/apps/others/ojph_img_io.cpp @@ -276,7 +276,7 @@ namespace ojph { { close(); OJPH_ERROR(0x030000005, "wrong file extension, a file with keyword P6 " - "must have a .ppm extension fir file %s", filename); + "must have a .ppm extension for file %s", filename); } // set number of components based on file-type @@ -309,7 +309,7 @@ namespace ojph { temp_buf = malloc(temp_buf_byte_size); if (temp_buf == NULL) { // failed to allocate memory if (t) free(t); // the original buffer is still valid - OJPH_ERROR(0x030000007, "error allocating mmeory"); + OJPH_ERROR(0x030000007, "error allocating memory"); } } else @@ -586,7 +586,7 @@ namespace ojph { // Error on known incompatilbe input formats if( tiff_bits_per_sample != 8 && tiff_bits_per_sample != 16 ) { - OJPH_ERROR(0x0300000B3, "\nTIFF IO is currently limited to file limited" + OJPH_ERROR(0x0300000B3, "\nTIFF IO is currently limited" " to files with TIFFTAG_BITSPERSAMPLE=8 and TIFFTAG_BITSPERSAMPLE=16 \n" "input file = %s has TIFFTAG_BITSPERSAMPLE=%d", filename, tiff_bits_per_sample); diff --git a/src/core/coding/ojph_block_common.cpp b/src/core/coding/ojph_block_common.cpp index 6e1b53fd..e6b4de6a 100644 --- a/src/core/coding/ojph_block_common.cpp +++ b/src/core/coding/ojph_block_common.cpp @@ -55,13 +55,13 @@ namespace ojph { * \li \c cwd_len : 3bits -> the codeword length of the VLC codeword; * the VLC cwd is in the LSB of bitstream \n * \li \c u_off : 1bit -> u_offset, which is 1 if u value is not 0 \n - * \li \c rho : 4bits -> signficant samples within a quad \n + * \li \c rho : 4bits -> significant samples within a quad \n * \li \c e_1 : 4bits -> EMB e_1 \n * \li \c e_k : 4bits -> EMB e_k \n * \n * The table index is 10 bits and composed of two parts: \n * The 7 LSBs contain a codeword which might be shorter than 7 bits; - * this word is the next decoable bits in the bitstream. \n + * this word is the next decodable bits in the bitstream. \n * The 3 MSB is the context of for the codeword. \n */ @@ -75,7 +75,7 @@ namespace ojph { //************************************************************************/ /** @defgroup uvlc_decoding_tables_grp VLC decoding tables * @{ - * UVLC decoding tables used to partiallu decode u values from UVLC + * UVLC decoding tables used to partially decode u values from UVLC * codewords. \n * The table index is 8 (or 9) bits and composed of two parts: \n * The 6 LSBs carries the head of the VLC to be decoded. Up to 6 bits to @@ -109,7 +109,7 @@ namespace ojph { //Data in the table is arranged in this format (taken from the standard) // c_q is the context for a quad - // rho is the signficance pattern for a quad + // rho is the significance pattern for a quad // u_off indicate if u value is 0 (u_off is 0), or communicated // e_k, e_1 EMB patterns // cwd VLC codeword @@ -132,7 +132,7 @@ namespace ojph { if (debug) memset(vlc_tbl0, 0, sizeof(vlc_tbl0)); //unnecessary // this is to convert table entries into values for decoder look up - // There can be at most 1024 possibilites, not all of them are valid. + // There can be at most 1024 possibilities, not all of them are valid. // for (int i = 0; i < 1024; ++i) { diff --git a/src/core/coding/ojph_block_decoder.cpp b/src/core/coding/ojph_block_decoder.cpp index 8c287990..9a121876 100644 --- a/src/core/coding/ojph_block_decoder.cpp +++ b/src/core/coding/ojph_block_decoder.cpp @@ -64,7 +64,7 @@ namespace ojph { dec_mel_st() : data(NULL), tmp(0), bits(0), size(0), unstuff(false), k(0), num_runs(0), runs(0) {} - // data decoding machinary + // data decoding machinery ui8* data; //! Date: Wed, 10 Jan 2024 21:11:47 +1100 Subject: [PATCH 021/348] Update ojph_expand.cpp modified res to resolution --- src/apps/ojph_expand/ojph_expand.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/apps/ojph_expand/ojph_expand.cpp b/src/apps/ojph_expand/ojph_expand.cpp index 638c0c1d..6b836680 100644 --- a/src/apps/ojph_expand/ojph_expand.cpp +++ b/src/apps/ojph_expand/ojph_expand.cpp @@ -70,7 +70,7 @@ struct ui32_list_interpreter : public ojph::cli_interpreter::arg_inter_base char *endptr; si32list[num_eles] = (ojph::ui32)strtoul(next_char, &endptr, 10); if (endptr == next_char) - throw "res number is improperly formatted"; + throw "resolution number is improperly formatted"; next_char = endptr; ++num_eles; } From f453f16d7e41852285cdeb652b3c93b3a83b299e Mon Sep 17 00:00:00 2001 From: Brad Hards Date: Thu, 11 Jan 2024 17:44:06 +1100 Subject: [PATCH 022/348] add getter for TLM marker request --- src/core/codestream/ojph_codestream.cpp | 6 ++++++ src/core/common/ojph_codestream.h | 4 +++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/core/codestream/ojph_codestream.cpp b/src/core/codestream/ojph_codestream.cpp index 7036085c..1afbb973 100644 --- a/src/core/codestream/ojph_codestream.cpp +++ b/src/core/codestream/ojph_codestream.cpp @@ -114,6 +114,12 @@ namespace ojph { state->request_tlm_marker(needed); } + //////////////////////////////////////////////////////////////////////////// + bool codestream::is_tlm_requested() + { + return state->is_tlm_needed(); + } + //////////////////////////////////////////////////////////////////////////// bool codestream::is_planar() const { diff --git a/src/core/common/ojph_codestream.h b/src/core/common/ojph_codestream.h index 042fe03c..e3bebe1a 100644 --- a/src/core/common/ojph_codestream.h +++ b/src/core/common/ojph_codestream.h @@ -79,7 +79,9 @@ namespace ojph { OJPH_EXPORT void set_tilepart_divisions(bool at_resolutions, bool at_components); OJPH_EXPORT - void request_tlm_marker(bool needed); + void request_tlm_marker(bool needed); + OJPH_EXPORT + bool is_tlm_requested(); OJPH_EXPORT void write_headers(outfile_base *file, From f72f8bb5173008d19b8fde8e9173557a926173e3 Mon Sep 17 00:00:00 2001 From: Brad Hards Date: Thu, 11 Jan 2024 17:46:36 +1100 Subject: [PATCH 023/348] make comment_exchange string const --- src/core/codestream/ojph_params.cpp | 4 ++-- src/core/common/ojph_params.h | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/core/codestream/ojph_params.cpp b/src/core/codestream/ojph_params.cpp index 6fe5e567..fa194431 100644 --- a/src/core/codestream/ojph_params.cpp +++ b/src/core/codestream/ojph_params.cpp @@ -368,7 +368,7 @@ namespace ojph { //////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////// - void comment_exchange::set_string(char* str) + void comment_exchange::set_string(const char* str) { size_t t = strlen(str); if (len > 65531) @@ -380,7 +380,7 @@ namespace ojph { } ////////////////////////////////////////////////////////////////////////// - void comment_exchange::set_data(char* data, ui16 len) + void comment_exchange::set_data(const char* data, ui16 len) { if (len > 65531) OJPH_ERROR(0x000500C2, diff --git a/src/core/common/ojph_params.h b/src/core/common/ojph_params.h index d17e8d2c..2ab85db4 100644 --- a/src/core/common/ojph_params.h +++ b/src/core/common/ojph_params.h @@ -176,12 +176,12 @@ namespace ojph { public: comment_exchange() : data(NULL), len(0), Rcom(0) {} OJPH_EXPORT - void set_string(char* str); + void set_string(const char* str); OJPH_EXPORT - void set_data(char* data, ui16 len); + void set_data(const char* data, ui16 len); private: - char* data; + const char* data; ui16 len; ui16 Rcom; }; From 8cb0d2b9e31e4c8afac093daa352b96dd58cab36 Mon Sep 17 00:00:00 2001 From: Brad Hards Date: Fri, 12 Jan 2024 15:42:34 +1100 Subject: [PATCH 024/348] add accessors for tilepart division --- src/core/codestream/ojph_codestream.cpp | 14 ++++++++++++++ src/core/codestream/ojph_codestream_local.cpp | 2 +- src/core/codestream/ojph_params_local.h | 4 ++-- src/core/codestream/ojph_tile.cpp | 6 +++--- src/core/common/ojph_codestream.h | 5 +++++ 5 files changed, 25 insertions(+), 6 deletions(-) diff --git a/src/core/codestream/ojph_codestream.cpp b/src/core/codestream/ojph_codestream.cpp index 1afbb973..06f6b567 100644 --- a/src/core/codestream/ojph_codestream.cpp +++ b/src/core/codestream/ojph_codestream.cpp @@ -108,6 +108,20 @@ namespace ojph { state->set_tilepart_divisions(value); } + //////////////////////////////////////////////////////////////////////////// + bool codestream::is_tilepart_division_at_resolutions() + { + ui32 res = state->get_tilepart_div() & OJPH_TILEPART_RESOLUTIONS; + return res ? true : false; + } + + //////////////////////////////////////////////////////////////////////////// + bool codestream::is_tilepart_division_at_components() + { + ui32 comp = state->get_tilepart_div() & OJPH_TILEPART_COMPONENTS; + return comp ? true : false; + } + //////////////////////////////////////////////////////////////////////////// void codestream::request_tlm_marker(bool needed) { diff --git a/src/core/codestream/ojph_codestream_local.cpp b/src/core/codestream/ojph_codestream_local.cpp index cb1a69b1..3bee6719 100644 --- a/src/core/codestream/ojph_codestream_local.cpp +++ b/src/core/codestream/ojph_codestream_local.cpp @@ -68,7 +68,7 @@ namespace ojph { employ_color_transform = false; planar = -1; profile = OJPH_PN_UNDEFINED; - tilepart_div = OJPH_TILEPART_NODIVSIONS; + tilepart_div = OJPH_TILEPART_NO_DIVISIONS; need_tlm = false; cur_comp = 0; diff --git a/src/core/codestream/ojph_params_local.h b/src/core/codestream/ojph_params_local.h index 2450b00d..9c169d63 100644 --- a/src/core/codestream/ojph_params_local.h +++ b/src/core/codestream/ojph_params_local.h @@ -95,8 +95,8 @@ namespace ojph { const char OJPH_PN_STRING_IMF[] = "IMF"; //////////////////////////////////////////////////////////////////////////// - enum OJPH_TILEPART_DIVISONS: ui32 { - OJPH_TILEPART_NODIVSIONS = 0x0, // no divisions to tile parts + enum OJPH_TILEPART_DIVISIONS: ui32 { + OJPH_TILEPART_NO_DIVISIONS = 0x0, // no divisions to tile parts OJPH_TILEPART_RESOLUTIONS = 0x1, OJPH_TILEPART_COMPONENTS = 0x2, OJPH_TILEPART_LAYERS = 0x4, // these are meaningless with HTJ2K diff --git a/src/core/codestream/ojph_tile.cpp b/src/core/codestream/ojph_tile.cpp index 9fecd03b..0ad4acd3 100644 --- a/src/core/codestream/ojph_tile.cpp +++ b/src/core/codestream/ojph_tile.cpp @@ -412,7 +412,7 @@ namespace ojph { ////////////////////////////////////////////////////////////////////////// void tile::fill_tlm(param_tlm *tlm) { - if (tilepart_div == OJPH_TILEPART_NODIVSIONS) { + if (tilepart_div == OJPH_TILEPART_NO_DIVISIONS) { tlm->set_next_pair(sot.get_tile_index(), this->num_bytes); } else if (tilepart_div == OJPH_TILEPART_RESOLUTIONS) @@ -471,7 +471,7 @@ namespace ojph { max_decompositions = ojph_max(max_decompositions, comps[c].get_num_decompositions()); - if (tilepart_div == OJPH_TILEPART_NODIVSIONS) + if (tilepart_div == OJPH_TILEPART_NO_DIVISIONS) { //write tile header if (!sot.write(file, this->num_bytes)) @@ -487,7 +487,7 @@ namespace ojph { //sequence the writing of precincts according to progression order if (prog_order == OJPH_PO_LRCP || prog_order == OJPH_PO_RLCP) { - if (tilepart_div == OJPH_TILEPART_NODIVSIONS) + if (tilepart_div == OJPH_TILEPART_NO_DIVISIONS) { for (ui32 r = 0; r <= max_decompositions; ++r) for (ui32 c = 0; c < num_comps; ++c) diff --git a/src/core/common/ojph_codestream.h b/src/core/common/ojph_codestream.h index e3bebe1a..2d7e74ad 100644 --- a/src/core/common/ojph_codestream.h +++ b/src/core/common/ojph_codestream.h @@ -78,6 +78,11 @@ namespace ojph { void set_profile(const char* s); OJPH_EXPORT void set_tilepart_divisions(bool at_resolutions, bool at_components); + OJPH_EXPORT + bool is_tilepart_division_at_resolutions(); + OJPH_EXPORT + bool is_tilepart_division_at_components(); + OJPH_EXPORT void request_tlm_marker(bool needed); OJPH_EXPORT From 4d50eb7c75e0bc297400b48a3e6ab65bbb2a4ef4 Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Mon, 22 Jan 2024 18:50:57 +1100 Subject: [PATCH 025/348] Modifies CMake files, with the objective of not modifying output directories. Now all files are copied after compilation. I also fiddle with the tests to make them more versatile. --- CMakeLists.txt | 15 ++-- src/apps/CMakeLists.txt | 100 +++++++++++--------------- src/apps/ojph_compress/CMakeLists.txt | 5 +- src/apps/ojph_expand/CMakeLists.txt | 6 +- src/core/CMakeLists.txt | 27 ++++--- tests/CMakeLists.txt | 33 ++++++--- tests/test_executables.cpp | 8 +-- 7 files changed, 104 insertions(+), 90 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 341502fd..af40c1a2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,6 +19,15 @@ option(OJPH_ENABLE_TIFF_SUPPORT "Enables input and output support for TIFF files option(OJPH_BUILD_TESTS "Enables building test code" OFF) option(OJPH_BUILD_EXECUTABLES "Enables building command line executables" ON) +## specify output directories +set(OJPH_DESTINATION_BINARY ${CMAKE_BINARY_DIR}/../bin) +set(OJPH_DESTINATION_ARCHIVE ${CMAKE_BINARY_DIR}/../lib) + +## specify the top directory for TIFF library (only for Micrsoft) +if (MSVC) + set(TIFF_PATH "C:\\Program Files\\tiff") +endif() + ## Setting some of the options if EMSCRIPTEN is the compiler if(EMSCRIPTEN) set(OJPH_DISABLE_INTEL_SIMD ON) @@ -68,12 +77,6 @@ elseif (OJPH_ENABLE_INTEL_AVX512) add_compile_options(-DOJPH_ENABLE_INTEL_AVX512) endif() -## specify output directories -## this will be refined further for Debug and Release builds in included CMakeLists.txt -set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/../bin) -set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/../bin) -set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/../lib) - ## Build library and applications add_subdirectory(src/core) if (OJPH_BUILD_EXECUTABLES) diff --git a/src/apps/CMakeLists.txt b/src/apps/CMakeLists.txt index a4a04df1..19b9bffa 100644 --- a/src/apps/CMakeLists.txt +++ b/src/apps/CMakeLists.txt @@ -1,60 +1,46 @@ -if(OJPH_BUILD_EXECUTABLES) - - # Add tiff library - ############################################################ - if( OJPH_ENABLE_TIFF_SUPPORT ) - - if( MSVC ) - - set(TIFF_PATH "C:\\Program Files\\tiff") - set(TIFF_INCLUDE_DIR "${TIFF_PATH}\\include" CACHE PATH "the directory containing the TIFF headers") - set(TIFF_LIBRARY_DEBUG "${TIFF_PATH}\\lib\\tiffd.lib" CACHE FILEPATH "the path to the TIFF library for debug configurations") - set(TIFF_LIBRARY_RELEASE "${TIFF_PATH}\\lib\\tiff.lib" CACHE FILEPATH "the path to the TIFF library for release configurations") - set(TIFFXX_LIBRARY_DEBUG "${TIFF_PATH}\\lib\\tiffxxd.lib" CACHE FILEPATH "the path to the TIFFXX library for debug configurations") - set(TIFFXX_LIBRARY_RELEASE "${TIFF_PATH}\\lib\\tiffxx.lib" CACHE FILEPATH "the path to the TIFFXX library for release configurations") - - message( STATUS "WIN32 detected: Setting CMakeCache TIFF values as follows, use CMake-gui Advanced to modify them" ) - message( STATUS " TIFF_INCLUDE_DIR : \"${TIFF_INCLUDE_DIR}\" " ) - message( STATUS " TIFF_LIBRARY_DEBUG : \"${TIFF_LIBRARY_DEBUG}\" " ) - message( STATUS " TIFF_LIBRARY_RELEASE : \"${TIFF_LIBRARY_RELEASE}\" " ) - message( STATUS " TIFFXX_LIBRARY_DEBUG : \"${TIFFXX_LIBRARY_DEBUG}\" " ) - message( STATUS " TIFFXX_LIBRARY_RELEASE : \"${TIFFXX_LIBRARY_RELEASE}\" " ) - - endif( MSVC ) - - FIND_PACKAGE( TIFF ) - - if( TIFF_FOUND ) - set(USE_TIFF TRUE CACHE BOOL "Add TIFF support") - include_directories( ${TIFF_INCLUDE_DIR} ) - add_definitions(-DOJPH_ENABLE_TIFF_SUPPORT) - endif( TIFF_FOUND ) - - endif() - ############################################################ - - ## change folders but only for Microsoft - if(MSVC) - set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_DEBUG ${CMAKE_BINARY_DIR}/../bin) - set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_DEBUG ${CMAKE_BINARY_DIR}/../bin) - set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY_DEBUG ${CMAKE_BINARY_DIR}/../lib) - set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_RELEASE ${CMAKE_BINARY_DIR}/../bin) - set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE ${CMAKE_BINARY_DIR}/../bin) - set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY_RELEASE ${CMAKE_BINARY_DIR}/../lib) - endif() - - ## Build executables - add_subdirectory(ojph_expand) - add_subdirectory(ojph_compress) - - if (MSVC AND OJPH_ENABLE_TIFF_SUPPORT) - if (CMAKE_BUILD_TYPE MATCHES "Release") - file(COPY "${TIFF_PATH}\\bin\\tiff.dll" DESTINATION "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}") - file(COPY "${TIFF_PATH}\\bin\\tiffxx.dll" DESTINATION "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}") - elseif(CMAKE_BUILD_TYPE MATCHES "Debug") - file(COPY "${TIFF_PATH}\\bin\\tiffd.dll" DESTINATION "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}") - file(COPY "${TIFF_PATH}\\bin\\tiffxxd.dll" DESTINATION "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}") - endif() +# Add tiff library +############################################################ +if( OJPH_ENABLE_TIFF_SUPPORT ) + + if( MSVC ) + + set(TIFF_INCLUDE_DIR "${TIFF_PATH}\\include" CACHE PATH "the directory containing the TIFF headers") + set(TIFF_LIBRARY_DEBUG "${TIFF_PATH}\\lib\\tiffd.lib" CACHE FILEPATH "the path to the TIFF library for debug configurations") + set(TIFF_LIBRARY_RELEASE "${TIFF_PATH}\\lib\\tiff.lib" CACHE FILEPATH "the path to the TIFF library for release configurations") + set(TIFFXX_LIBRARY_DEBUG "${TIFF_PATH}\\lib\\tiffxxd.lib" CACHE FILEPATH "the path to the TIFFXX library for debug configurations") + set(TIFFXX_LIBRARY_RELEASE "${TIFF_PATH}\\lib\\tiffxx.lib" CACHE FILEPATH "the path to the TIFFXX library for release configurations") + + message( STATUS "WIN32 detected: Setting CMakeCache TIFF values as follows, use CMake-gui Advanced to modify them" ) + message( STATUS " TIFF_INCLUDE_DIR : \"${TIFF_INCLUDE_DIR}\" " ) + message( STATUS " TIFF_LIBRARY_DEBUG : \"${TIFF_LIBRARY_DEBUG}\" " ) + message( STATUS " TIFF_LIBRARY_RELEASE : \"${TIFF_LIBRARY_RELEASE}\" " ) + message( STATUS " TIFFXX_LIBRARY_DEBUG : \"${TIFFXX_LIBRARY_DEBUG}\" " ) + message( STATUS " TIFFXX_LIBRARY_RELEASE : \"${TIFFXX_LIBRARY_RELEASE}\" " ) + + endif( MSVC ) + + FIND_PACKAGE( TIFF ) + + if( TIFF_FOUND ) + set(USE_TIFF TRUE CACHE BOOL "Add TIFF support") + include_directories( ${TIFF_INCLUDE_DIR} ) + add_definitions(-DOJPH_ENABLE_TIFF_SUPPORT) + endif( TIFF_FOUND ) + +endif() +############################################################ + +## Build executables +add_subdirectory(ojph_expand) +add_subdirectory(ojph_compress) + +if (MSVC AND OJPH_ENABLE_TIFF_SUPPORT) + if (CMAKE_BUILD_TYPE MATCHES "Release") + file(COPY "${TIFF_PATH}\\bin\\tiff.dll" DESTINATION "${OJPH_DESTINATION_BINARY}") + file(COPY "${TIFF_PATH}\\bin\\tiffxx.dll" DESTINATION "${OJPH_DESTINATION_BINARY}") + elseif(CMAKE_BUILD_TYPE MATCHES "Debug") + file(COPY "${TIFF_PATH}\\bin\\tiffd.dll" DESTINATION "${OJPH_DESTINATION_BINARY}") + file(COPY "${TIFF_PATH}\\bin\\tiffxxd.dll" DESTINATION "${OJPH_DESTINATION_BINARY}") endif() endif() diff --git a/src/apps/ojph_compress/CMakeLists.txt b/src/apps/ojph_compress/CMakeLists.txt index 3dd3c59a..420c303b 100644 --- a/src/apps/ojph_compress/CMakeLists.txt +++ b/src/apps/ojph_compress/CMakeLists.txt @@ -48,6 +48,9 @@ else() endif() install(TARGETS ojph_compress DESTINATION bin) - endif() +## copy target files to the top bin folder +add_custom_command(TARGET ojph_compress POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy "$" ${OJPH_DESTINATION_BINARY} +) diff --git a/src/apps/ojph_expand/CMakeLists.txt b/src/apps/ojph_expand/CMakeLists.txt index f7b1ed33..5d43239d 100644 --- a/src/apps/ojph_expand/CMakeLists.txt +++ b/src/apps/ojph_expand/CMakeLists.txt @@ -48,5 +48,9 @@ else() endif() install(TARGETS ojph_expand DESTINATION bin) - endif() + +## copy target files to the top bin folder +add_custom_command(TARGET ojph_expand POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy "$" ${OJPH_DESTINATION_BINARY} +) diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 1f41da8c..e4591e34 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -1,14 +1,4 @@ -## change folders but only for Microsoft -if (MSVC) - set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_DEBUG ${CMAKE_BINARY_DIR}/../bin) - set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_DEBUG ${CMAKE_BINARY_DIR}/../bin) - set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY_DEBUG ${CMAKE_BINARY_DIR}/../lib) - set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_RELEASE ${CMAKE_BINARY_DIR}/../bin) - set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE ${CMAKE_BINARY_DIR}/../bin) - set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY_RELEASE ${CMAKE_BINARY_DIR}/../lib) -endif() - include_directories(common) file(GLOB CODESTREAM "codestream/*.cpp" "codestream/*.h") @@ -104,3 +94,20 @@ else() SOVERSION "${OPENJPH_VERSION_MAJOR}.${OPENJPH_VERSION_MINOR}" VERSION "${OPENJPH_VERSION}") endif() + +## copy target files to the top bin folder +add_custom_command(TARGET openjph POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy "$" ${OJPH_DESTINATION_BINARY} + COMMAND ${CMAKE_COMMAND} -E make_directory ${OJPH_DESTINATION_ARCHIVE} + COMMAND ${CMAKE_COMMAND} -E copy "$" ${OJPH_DESTINATION_ARCHIVE} +) +if (MSVC) + add_custom_command(TARGET openjph POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy "$/${OJPH_LIB_NAME_STRING}.exp" ${OJPH_DESTINATION_ARCHIVE} + ) +endif() +if(EMSCRIPTEN) + add_custom_command(TARGET openjphsimd POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy "$" ${OJPH_DESTINATION_BINARY} + ) +endif() diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 08fd2f90..864409e9 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -43,17 +43,28 @@ include(GoogleTest) gtest_add_tests(TARGET test_executables) if (MSVC) - if(CMAKE_BUILD_TYPE MATCHES "Debug") - add_custom_command(TARGET test_executables POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy "../bin/Debug/gtest.dll" "./Debug/" - COMMAND ${CMAKE_COMMAND} -E copy "../bin/Debug/gtest_main.dll" "./Debug/" - ) - elseif(CMAKE_BUILD_TYPE MATCHES "Release") - add_custom_command(TARGET test_executables POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy "../bin/Release/gtest.dll" "./Release/" - COMMAND ${CMAKE_COMMAND} -E copy "../bin/Release/gtest_main.dll" "./Release/" - ) - endif() + add_custom_command(TARGET test_executables POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy "../bin/\$(Configuration)/gtest.dll" "./" + COMMAND ${CMAKE_COMMAND} -E copy "../bin/\$(Configuration)/gtest_main.dll" "./" + COMMAND ${CMAKE_COMMAND} -E copy "$" "./" + COMMAND ${CMAKE_COMMAND} -E copy "$" "./" + COMMAND ${CMAKE_COMMAND} -E copy "$" "./" + ) + add_custom_command(TARGET compare_files POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy "./\$(Configuration)/compare_files.exe" "./" + ) + add_custom_command(TARGET mse_pae POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy "./\$(Configuration)/mse_pae.exe" "./" + ) + if (MSVC AND OJPH_ENABLE_TIFF_SUPPORT) + if (CMAKE_BUILD_TYPE MATCHES "Release") + file(COPY "${TIFF_PATH}\\bin\\tiff.dll" DESTINATION "./") + file(COPY "${TIFF_PATH}\\bin\\tiffxx.dll" DESTINATION "./") + elseif(CMAKE_BUILD_TYPE MATCHES "Debug") + file(COPY "${TIFF_PATH}\\bin\\tiffd.dll" DESTINATION "./") + file(COPY "${TIFF_PATH}\\bin\\tiffxxd.dll" DESTINATION "./") + endif() + endif() elseif(MSYS) add_custom_command(TARGET test_executables POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy "../bin/msys-gtest.dll" "./" diff --git a/tests/test_executables.cpp b/tests/test_executables.cpp index 7b16b542..908e6ce1 100644 --- a/tests/test_executables.cpp +++ b/tests/test_executables.cpp @@ -97,10 +97,10 @@ int execute(const std::string& cmd, std::string& result) #define SRC_FILE_DIR ".\\jp2k_test_codestreams\\openjph\\" #define OUT_FILE_DIR ".\\" #define REF_FILE_DIR ".\\jp2k_test_codestreams\\openjph\\references\\" - #define MSE_PAE_PATH ".\\Release\\mse_pae" - #define COMPARE_FILES_PATH ".\\Release\\compare_files" - #define EXPAND_EXECUTABLE "..\\..\\bin\\ojph_expand.exe" - #define COMPRESS_EXECUTABLE "..\\..\\bin\\ojph_compress.exe" + #define MSE_PAE_PATH ".\\mse_pae" + #define COMPARE_FILES_PATH ".\\compare_files" + #define EXPAND_EXECUTABLE ".\\ojph_expand.exe" + #define COMPRESS_EXECUTABLE ".\\ojph_compress.exe" #else #define SRC_FILE_DIR "./jp2k_test_codestreams/openjph/" #define OUT_FILE_DIR "./" From 4132afea79fea202359ac9e3a1e4565ac49c8256 Mon Sep 17 00:00:00 2001 From: Brad Hards Date: Sun, 28 Jan 2024 10:51:28 +1100 Subject: [PATCH 026/348] include ojph_arch header for OJPH_EXPORT definition --- src/core/common/ojph_codestream.h | 1 + src/core/common/ojph_params.h | 1 + 2 files changed, 2 insertions(+) diff --git a/src/core/common/ojph_codestream.h b/src/core/common/ojph_codestream.h index 2d7e74ad..a2698594 100644 --- a/src/core/common/ojph_codestream.h +++ b/src/core/common/ojph_codestream.h @@ -41,6 +41,7 @@ #include +#include "ojph_arch.h" #include "ojph_defs.h" namespace ojph { diff --git a/src/core/common/ojph_params.h b/src/core/common/ojph_params.h index 2ab85db4..76ab4c1b 100644 --- a/src/core/common/ojph_params.h +++ b/src/core/common/ojph_params.h @@ -39,6 +39,7 @@ #ifndef OJPH_PARAMS_H #define OJPH_PARAMS_H +#include "ojph_arch.h" #include "ojph_base.h" namespace ojph { From c0ef77b6b933598bfa54790a0422cdf975076999 Mon Sep 17 00:00:00 2001 From: atzlinux Date: Sun, 28 Jan 2024 17:38:49 +0800 Subject: [PATCH 027/348] Update ojph_codestream_local.cpp, fix typo indivdual --> individual --- src/core/codestream/ojph_codestream_local.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/codestream/ojph_codestream_local.cpp b/src/core/codestream/ojph_codestream_local.cpp index 3bee6719..df2f18c4 100644 --- a/src/core/codestream/ojph_codestream_local.cpp +++ b/src/core/codestream/ojph_codestream_local.cpp @@ -799,7 +799,7 @@ namespace ojph { skip_marker(file, "CRG", "CRG has been ignored; CRG is related to" " where the Cb and Cr colour components are co-sited or located" " with respect to the Y' luma component. Perhaps, it is better" - " to get the indivdual components and assemble the samples" + " to get the individual components and assemble the samples" " according to your needs", OJPH_MSG_LEVEL::INFO, false); else if (marker_idx == 13) @@ -1195,4 +1195,4 @@ namespace ojph { } } -} \ No newline at end of file +} From 46cc02c306b0c16770907d24eb7ebb053e4e2b64 Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Mon, 5 Feb 2024 22:30:44 +1100 Subject: [PATCH 028/348] This fixes the case when we have one high-pass line. There is another bug when two rows are transformed. --- src/core/codestream/ojph_resolution.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/core/codestream/ojph_resolution.cpp b/src/core/codestream/ojph_resolution.cpp index e1d78cba..ff0b081e 100644 --- a/src/core/codestream/ojph_resolution.cpp +++ b/src/core/codestream/ojph_resolution.cpp @@ -585,6 +585,9 @@ namespace ojph { } else { + float* sp = lines[0].f32; + for (ui32 i = width; i > 0; --i) + *sp++ *= 2.0f; //push to H irrev_horz_wvlt_fwd_tx(lines, bands[2].get_line(), bands[3].get_line(), width, horz_even); From 803782eee8d74fbac37966cd12af0eb19734e65f Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Mon, 5 Feb 2024 22:32:32 +1100 Subject: [PATCH 029/348] This fixes tile_size calculation. --- src/core/codestream/ojph_params_local.h | 2 +- tests/test_executables.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/core/codestream/ojph_params_local.h b/src/core/codestream/ojph_params_local.h index 9c169d63..bac0c359 100644 --- a/src/core/codestream/ojph_params_local.h +++ b/src/core/codestream/ojph_params_local.h @@ -192,7 +192,7 @@ namespace ojph { void check_validity() { if (XTsiz == 0 && YTsiz == 0) - { XTsiz = Xsiz - XOsiz; YTsiz = Ysiz - YOsiz; } + { XTsiz = Xsiz + XOsiz; YTsiz = Ysiz + YOsiz; } if (Xsiz == 0 || Ysiz == 0 || XTsiz == 0 || YTsiz == 0) OJPH_ERROR(0x00040001, "You cannot set image extent nor tile size to zero"); diff --git a/tests/test_executables.cpp b/tests/test_executables.cpp index 908e6ce1..50e8f6d4 100644 --- a/tests/test_executables.cpp +++ b/tests/test_executables.cpp @@ -1220,8 +1220,8 @@ TEST(TestExecutables, SimpleEncIrv97TallNarrow) { // The compressed file is obtained using these command-line options: // -o simple_enc_irv97_tall_narrow1.j2c -image_offset {1,0} -qstep 0.1 TEST(TestExecutables, SimpleEncIrv97TallNarrow1) { - double mse[3] = { 96.7935, 69.6824, 66.7822}; - int pae[3] = { 41, 39, 35}; + double mse[3] = { 100.905762, 76.113037, 72.834717}; + int pae[3] = { 39, 35, 34}; run_ojph_compress("tall_narrow.ppm", "simple_enc_irv97_tall_narrow1", "", "j2c", "-image_offset \"{1,0}\" -qstep 0.1"); From 3c3bb0c6ba5acdd0023f428b82523089f37719d6 Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Tue, 6 Feb 2024 08:18:27 +1100 Subject: [PATCH 030/348] This fixes the bug when a resolution has only two rows. --- src/core/codestream/ojph_resolution.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/core/codestream/ojph_resolution.cpp b/src/core/codestream/ojph_resolution.cpp index ff0b081e..82371bd7 100644 --- a/src/core/codestream/ojph_resolution.cpp +++ b/src/core/codestream/ojph_resolution.cpp @@ -553,8 +553,10 @@ namespace ojph { bands[1].push_line(); child_res->push_line(); } - irrev_vert_wvlt_K(lines + 2, lines + 5, - false, width); + if (cur_line >= 2) + irrev_vert_wvlt_K(lines + 2, lines + 5, false, width); + else + irrev_vert_wvlt_K(lines, lines + 5, false, width); irrev_horz_wvlt_fwd_tx(lines + 5, bands[2].get_line(), bands[3].get_line(), width, horz_even); bands[2].push_line(); From f4dc9f59a10a1d78aea8db999a97f3a5db4bfe45 Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Thu, 8 Feb 2024 08:00:45 +1100 Subject: [PATCH 031/348] Version bump. --- src/core/common/ojph_version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/common/ojph_version.h b/src/core/common/ojph_version.h index e7fcc900..7b9fc822 100644 --- a/src/core/common/ojph_version.h +++ b/src/core/common/ojph_version.h @@ -35,4 +35,4 @@ #define OPENJPH_VERSION_MAJOR 0 #define OPENJPH_VERSION_MINOR 10 -#define OPENJPH_VERSION_PATCH 3 +#define OPENJPH_VERSION_PATCH 4 From 8f5924d65f6a65dc2e3c4f3083b6c83675138ac2 Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Fri, 9 Feb 2024 09:10:34 +1100 Subject: [PATCH 032/348] This addresses issue #127 --- src/apps/ojph_compress/CMakeLists.txt | 1 + src/apps/ojph_expand/CMakeLists.txt | 1 + src/core/CMakeLists.txt | 1 + tests/CMakeLists.txt | 1 + 4 files changed, 4 insertions(+) diff --git a/src/apps/ojph_compress/CMakeLists.txt b/src/apps/ojph_compress/CMakeLists.txt index 420c303b..cc9f64bd 100644 --- a/src/apps/ojph_compress/CMakeLists.txt +++ b/src/apps/ojph_compress/CMakeLists.txt @@ -52,5 +52,6 @@ endif() ## copy target files to the top bin folder add_custom_command(TARGET ojph_compress POST_BUILD + COMMAND ${CMAKE_COMMAND} -E make_directory ${OJPH_DESTINATION_BINARY} COMMAND ${CMAKE_COMMAND} -E copy "$" ${OJPH_DESTINATION_BINARY} ) diff --git a/src/apps/ojph_expand/CMakeLists.txt b/src/apps/ojph_expand/CMakeLists.txt index 5d43239d..e6eab462 100644 --- a/src/apps/ojph_expand/CMakeLists.txt +++ b/src/apps/ojph_expand/CMakeLists.txt @@ -52,5 +52,6 @@ endif() ## copy target files to the top bin folder add_custom_command(TARGET ojph_expand POST_BUILD + COMMAND ${CMAKE_COMMAND} -E make_directory ${OJPH_DESTINATION_BINARY} COMMAND ${CMAKE_COMMAND} -E copy "$" ${OJPH_DESTINATION_BINARY} ) diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index e4591e34..5a046cf0 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -97,6 +97,7 @@ endif() ## copy target files to the top bin folder add_custom_command(TARGET openjph POST_BUILD + COMMAND ${CMAKE_COMMAND} -E make_directory ${OJPH_DESTINATION_BINARY} COMMAND ${CMAKE_COMMAND} -E copy "$" ${OJPH_DESTINATION_BINARY} COMMAND ${CMAKE_COMMAND} -E make_directory ${OJPH_DESTINATION_ARCHIVE} COMMAND ${CMAKE_COMMAND} -E copy "$" ${OJPH_DESTINATION_ARCHIVE} diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 864409e9..dc2a8936 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -4,6 +4,7 @@ include(FetchContent) FetchContent_Declare( googletest URL https://github.com/google/googletest/archive/refs/tags/v1.13.0.tar.gz + EXCLUDE_FROM_ALL ) # For Windows: Prevent overriding the parent project's compiler/linker settings set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) From 4d1dfe804f50a656093afdc6ef98a906478c5b77 Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Tue, 13 Feb 2024 21:27:06 +1100 Subject: [PATCH 033/348] This fixes issue #128. Gtest should not be installed. the .bin folder is removed. version bump. --- CMakeLists.txt | 6 +----- bin/.gitignore | 2 -- src/apps/CMakeLists.txt | 10 ---------- src/apps/ojph_compress/CMakeLists.txt | 6 ------ src/apps/ojph_expand/CMakeLists.txt | 6 ------ src/core/CMakeLists.txt | 18 ------------------ src/core/common/ojph_version.h | 2 +- tests/CMakeLists.txt | 13 ++++++++++--- tests/test_executables.cpp | 4 ++-- 9 files changed, 14 insertions(+), 53 deletions(-) delete mode 100644 bin/.gitignore diff --git a/CMakeLists.txt b/CMakeLists.txt index af40c1a2..ed253023 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,11 +19,7 @@ option(OJPH_ENABLE_TIFF_SUPPORT "Enables input and output support for TIFF files option(OJPH_BUILD_TESTS "Enables building test code" OFF) option(OJPH_BUILD_EXECUTABLES "Enables building command line executables" ON) -## specify output directories -set(OJPH_DESTINATION_BINARY ${CMAKE_BINARY_DIR}/../bin) -set(OJPH_DESTINATION_ARCHIVE ${CMAKE_BINARY_DIR}/../lib) - -## specify the top directory for TIFF library (only for Micrsoft) +## specify the top directory for TIFF library (only for Microsoft Visual Studio) if (MSVC) set(TIFF_PATH "C:\\Program Files\\tiff") endif() diff --git a/bin/.gitignore b/bin/.gitignore deleted file mode 100644 index 7d1d8c52..00000000 --- a/bin/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -.DS_Store -* diff --git a/src/apps/CMakeLists.txt b/src/apps/CMakeLists.txt index 19b9bffa..acc16701 100644 --- a/src/apps/CMakeLists.txt +++ b/src/apps/CMakeLists.txt @@ -34,13 +34,3 @@ endif() ## Build executables add_subdirectory(ojph_expand) add_subdirectory(ojph_compress) - -if (MSVC AND OJPH_ENABLE_TIFF_SUPPORT) - if (CMAKE_BUILD_TYPE MATCHES "Release") - file(COPY "${TIFF_PATH}\\bin\\tiff.dll" DESTINATION "${OJPH_DESTINATION_BINARY}") - file(COPY "${TIFF_PATH}\\bin\\tiffxx.dll" DESTINATION "${OJPH_DESTINATION_BINARY}") - elseif(CMAKE_BUILD_TYPE MATCHES "Debug") - file(COPY "${TIFF_PATH}\\bin\\tiffd.dll" DESTINATION "${OJPH_DESTINATION_BINARY}") - file(COPY "${TIFF_PATH}\\bin\\tiffxxd.dll" DESTINATION "${OJPH_DESTINATION_BINARY}") - endif() -endif() diff --git a/src/apps/ojph_compress/CMakeLists.txt b/src/apps/ojph_compress/CMakeLists.txt index cc9f64bd..bbb77abc 100644 --- a/src/apps/ojph_compress/CMakeLists.txt +++ b/src/apps/ojph_compress/CMakeLists.txt @@ -49,9 +49,3 @@ else() install(TARGETS ojph_compress DESTINATION bin) endif() - -## copy target files to the top bin folder -add_custom_command(TARGET ojph_compress POST_BUILD - COMMAND ${CMAKE_COMMAND} -E make_directory ${OJPH_DESTINATION_BINARY} - COMMAND ${CMAKE_COMMAND} -E copy "$" ${OJPH_DESTINATION_BINARY} -) diff --git a/src/apps/ojph_expand/CMakeLists.txt b/src/apps/ojph_expand/CMakeLists.txt index e6eab462..c0ac185e 100644 --- a/src/apps/ojph_expand/CMakeLists.txt +++ b/src/apps/ojph_expand/CMakeLists.txt @@ -49,9 +49,3 @@ else() install(TARGETS ojph_expand DESTINATION bin) endif() - -## copy target files to the top bin folder -add_custom_command(TARGET ojph_expand POST_BUILD - COMMAND ${CMAKE_COMMAND} -E make_directory ${OJPH_DESTINATION_BINARY} - COMMAND ${CMAKE_COMMAND} -E copy "$" ${OJPH_DESTINATION_BINARY} -) diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 5a046cf0..40b9649b 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -94,21 +94,3 @@ else() SOVERSION "${OPENJPH_VERSION_MAJOR}.${OPENJPH_VERSION_MINOR}" VERSION "${OPENJPH_VERSION}") endif() - -## copy target files to the top bin folder -add_custom_command(TARGET openjph POST_BUILD - COMMAND ${CMAKE_COMMAND} -E make_directory ${OJPH_DESTINATION_BINARY} - COMMAND ${CMAKE_COMMAND} -E copy "$" ${OJPH_DESTINATION_BINARY} - COMMAND ${CMAKE_COMMAND} -E make_directory ${OJPH_DESTINATION_ARCHIVE} - COMMAND ${CMAKE_COMMAND} -E copy "$" ${OJPH_DESTINATION_ARCHIVE} -) -if (MSVC) - add_custom_command(TARGET openjph POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy "$/${OJPH_LIB_NAME_STRING}.exp" ${OJPH_DESTINATION_ARCHIVE} - ) -endif() -if(EMSCRIPTEN) - add_custom_command(TARGET openjphsimd POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy "$" ${OJPH_DESTINATION_BINARY} - ) -endif() diff --git a/src/core/common/ojph_version.h b/src/core/common/ojph_version.h index 7b9fc822..fdf28bc2 100644 --- a/src/core/common/ojph_version.h +++ b/src/core/common/ojph_version.h @@ -35,4 +35,4 @@ #define OPENJPH_VERSION_MAJOR 0 #define OPENJPH_VERSION_MINOR 10 -#define OPENJPH_VERSION_PATCH 4 +#define OPENJPH_VERSION_PATCH 5 diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index dc2a8936..03ac41cd 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -9,6 +9,7 @@ FetchContent_Declare( # For Windows: Prevent overriding the parent project's compiler/linker settings set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) set(BUILD_GMOCK OFF CACHE BOOL "" FORCE) +option(INSTALL_GTEST "Enable installation of googletest." OFF) FetchContent_MakeAvailable(googletest) # Fetch test files @@ -66,9 +67,15 @@ if (MSVC) file(COPY "${TIFF_PATH}\\bin\\tiffxxd.dll" DESTINATION "./") endif() endif() -elseif(MSYS) +else() add_custom_command(TARGET test_executables POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy "../bin/msys-gtest.dll" "./" - COMMAND ${CMAKE_COMMAND} -E copy "../bin/msys-gtest_main.dll" "./" + COMMAND ${CMAKE_COMMAND} -E copy "$" "./" + COMMAND ${CMAKE_COMMAND} -E copy "$" "./" ) + if(MSYS) + add_custom_command(TARGET test_executables POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy "../bin/msys-gtest.dll" "./" + COMMAND ${CMAKE_COMMAND} -E copy "../bin/msys-gtest_main.dll" "./" + ) + endif() endif() diff --git a/tests/test_executables.cpp b/tests/test_executables.cpp index 50e8f6d4..7e6a00cb 100644 --- a/tests/test_executables.cpp +++ b/tests/test_executables.cpp @@ -107,8 +107,8 @@ int execute(const std::string& cmd, std::string& result) #define REF_FILE_DIR "./jp2k_test_codestreams/openjph/references/" #define MSE_PAE_PATH "./mse_pae" #define COMPARE_FILES_PATH "./compare_files" - #define EXPAND_EXECUTABLE "../../bin/ojph_expand" - #define COMPRESS_EXECUTABLE "../../bin/ojph_compress" + #define EXPAND_EXECUTABLE "./ojph_expand" + #define COMPRESS_EXECUTABLE "./ojph_compress" #endif #define TOL_DOUBLE 0.01 #define TOL_INTEGER 1 From 61ea3eb1a18c3570efa8a11597251099d7b630bb Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Sun, 25 Feb 2024 18:46:08 +1100 Subject: [PATCH 034/348] This removes the hardcoded TIFF_PATH, must use CMAKE_PREFIX_PATH instead. --- CMakeLists.txt | 6 +----- src/apps/CMakeLists.txt | 21 ++++----------------- tests/CMakeLists.txt | 11 ++++------- 3 files changed, 9 insertions(+), 29 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ed253023..c9c6db92 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,11 +19,6 @@ option(OJPH_ENABLE_TIFF_SUPPORT "Enables input and output support for TIFF files option(OJPH_BUILD_TESTS "Enables building test code" OFF) option(OJPH_BUILD_EXECUTABLES "Enables building command line executables" ON) -## specify the top directory for TIFF library (only for Microsoft Visual Studio) -if (MSVC) - set(TIFF_PATH "C:\\Program Files\\tiff") -endif() - ## Setting some of the options if EMSCRIPTEN is the compiler if(EMSCRIPTEN) set(OJPH_DISABLE_INTEL_SIMD ON) @@ -52,6 +47,7 @@ endif() message(STATUS "Building ${CMAKE_BUILD_TYPE}") ## C++ version and flags +# C++14 is needed for gtest, otherwise, C++11 is sufficient for the library set(CMAKE_CXX_STANDARD 14) if (MSVC) add_compile_options(-D_CRT_SECURE_NO_WARNINGS) diff --git a/src/apps/CMakeLists.txt b/src/apps/CMakeLists.txt index acc16701..b3ef06d5 100644 --- a/src/apps/CMakeLists.txt +++ b/src/apps/CMakeLists.txt @@ -3,29 +3,16 @@ ############################################################ if( OJPH_ENABLE_TIFF_SUPPORT ) - if( MSVC ) - - set(TIFF_INCLUDE_DIR "${TIFF_PATH}\\include" CACHE PATH "the directory containing the TIFF headers") - set(TIFF_LIBRARY_DEBUG "${TIFF_PATH}\\lib\\tiffd.lib" CACHE FILEPATH "the path to the TIFF library for debug configurations") - set(TIFF_LIBRARY_RELEASE "${TIFF_PATH}\\lib\\tiff.lib" CACHE FILEPATH "the path to the TIFF library for release configurations") - set(TIFFXX_LIBRARY_DEBUG "${TIFF_PATH}\\lib\\tiffxxd.lib" CACHE FILEPATH "the path to the TIFFXX library for debug configurations") - set(TIFFXX_LIBRARY_RELEASE "${TIFF_PATH}\\lib\\tiffxx.lib" CACHE FILEPATH "the path to the TIFFXX library for release configurations") - - message( STATUS "WIN32 detected: Setting CMakeCache TIFF values as follows, use CMake-gui Advanced to modify them" ) - message( STATUS " TIFF_INCLUDE_DIR : \"${TIFF_INCLUDE_DIR}\" " ) - message( STATUS " TIFF_LIBRARY_DEBUG : \"${TIFF_LIBRARY_DEBUG}\" " ) - message( STATUS " TIFF_LIBRARY_RELEASE : \"${TIFF_LIBRARY_RELEASE}\" " ) - message( STATUS " TIFFXX_LIBRARY_DEBUG : \"${TIFFXX_LIBRARY_DEBUG}\" " ) - message( STATUS " TIFFXX_LIBRARY_RELEASE : \"${TIFFXX_LIBRARY_RELEASE}\" " ) - - endif( MSVC ) - FIND_PACKAGE( TIFF ) if( TIFF_FOUND ) set(USE_TIFF TRUE CACHE BOOL "Add TIFF support") include_directories( ${TIFF_INCLUDE_DIR} ) add_definitions(-DOJPH_ENABLE_TIFF_SUPPORT) + elseif(MSVC) + message(STATUS "TIFF support has been enabled by no path to the TIFF library " + "has been specified; please configure with -DCMAKE_PREFIX_PATH=, " + "or disable TIFF support using -DOJPH_ENABLE_TIFF_SUPPORT=OFF.") endif( TIFF_FOUND ) endif() diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 03ac41cd..48c8f67d 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -59,13 +59,10 @@ if (MSVC) COMMAND ${CMAKE_COMMAND} -E copy "./\$(Configuration)/mse_pae.exe" "./" ) if (MSVC AND OJPH_ENABLE_TIFF_SUPPORT) - if (CMAKE_BUILD_TYPE MATCHES "Release") - file(COPY "${TIFF_PATH}\\bin\\tiff.dll" DESTINATION "./") - file(COPY "${TIFF_PATH}\\bin\\tiffxx.dll" DESTINATION "./") - elseif(CMAKE_BUILD_TYPE MATCHES "Debug") - file(COPY "${TIFF_PATH}\\bin\\tiffd.dll" DESTINATION "./") - file(COPY "${TIFF_PATH}\\bin\\tiffxxd.dll" DESTINATION "./") - endif() + file(COPY "${TIFF_INCLUDE_DIR}\\..\\bin\\tiff.dll" DESTINATION "./") + file(COPY "${TIFF_INCLUDE_DIR}\\..\\bin\\tiffxx.dll" DESTINATION "./") + file(COPY "${TIFF_INCLUDE_DIR}\\..\\bin\\tiffd.dll" DESTINATION "./") + file(COPY "${TIFF_INCLUDE_DIR}\\..\\bin\\tiffxxd.dll" DESTINATION "./") endif() else() add_custom_command(TARGET test_executables POST_BUILD From 508eb92e32aad7aacd49f4547511d7c41a0795dc Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Fri, 29 Mar 2024 07:03:28 +1100 Subject: [PATCH 035/348] Added DFS and ATK, but not fully tested. Need to add COC. Integrate with the rest of the code. --- src/core/codestream/ojph_codestream_local.cpp | 20 +- src/core/codestream/ojph_codestream_local.h | 27 +- src/core/codestream/ojph_params.cpp | 371 ++++++++++++++++-- src/core/codestream/ojph_params_local.h | 206 ++++++++-- src/core/codestream/ojph_subband.cpp | 9 +- src/core/common/ojph_version.h | 4 +- 6 files changed, 554 insertions(+), 83 deletions(-) diff --git a/src/core/codestream/ojph_codestream_local.cpp b/src/core/codestream/ojph_codestream_local.cpp index df2f18c4..c2154fa0 100644 --- a/src/core/codestream/ojph_codestream_local.cpp +++ b/src/core/codestream/ojph_codestream_local.cpp @@ -81,6 +81,8 @@ namespace ojph { used_qcc_fields = 0; qcc = qcc_store; + used_coc_fields = 0; + coc = coc_store; allocator = new mem_fixed_allocator; elastic_alloc = new mem_elastic_allocator(1048576); //1 megabyte @@ -717,15 +719,15 @@ namespace ojph { { if (msg_level == OJPH_MSG_LEVEL::INFO) { - OJPH_INFO(0x00030001, "%s\n", msg); + OJPH_INFO(0x00030001, "%s", msg); } else if (msg_level == OJPH_MSG_LEVEL::WARN) { - OJPH_WARN(0x00030001, "%s\n", msg); + OJPH_WARN(0x00030001, "%s", msg); } else if (msg_level == OJPH_MSG_LEVEL::ERROR) { - OJPH_ERROR(0x00030001, "%s\n", msg); + OJPH_ERROR(0x00030001, "%s", msg); } else assert(0); @@ -736,8 +738,8 @@ namespace ojph { ////////////////////////////////////////////////////////////////////////// void codestream::read_headers(infile_base *file) { - ui16 marker_list[17] = { SOC, SIZ, CAP, PRF, CPF, COD, COC, QCD, QCC, - RGN, POC, PPM, TLM, PLM, CRG, COM, SOT }; + ui16 marker_list[19] = { SOC, SIZ, CAP, PRF, CPF, COD, COC, QCD, QCC, + RGN, POC, PPM, TLM, PLM, CRG, COM, DFS, ATK, SOT }; find_marker(file, marker_list, 1); //find SOC find_marker(file, marker_list + 1, 1); //find SIZ siz.read(file); @@ -745,7 +747,7 @@ namespace ojph { int received_markers = 0; //check that COD, & QCD received while (true) { - marker_idx = find_marker(file, marker_list + 2, 15); + marker_idx = find_marker(file, marker_list + 2, 17); if (marker_idx == 0) cap.read(file); else if (marker_idx == 1) @@ -805,11 +807,17 @@ namespace ojph { else if (marker_idx == 13) skip_marker(file, "COM", NULL, OJPH_MSG_LEVEL::NO_MSG, false); else if (marker_idx == 14) + dfs.read(file); + else if (marker_idx == 15) + atk.read(file); + else if (marker_idx == 16) break; else OJPH_ERROR(0x00030051, "File ended before finding a tile segment"); } + //qcd.update(&dfs); + if (received_markers != 3) OJPH_ERROR(0x00030052, "markers error, COD and QCD are required"); diff --git a/src/core/codestream/ojph_codestream_local.h b/src/core/codestream/ojph_codestream_local.h index 5e0bbfaf..035b534f 100644 --- a/src/core/codestream/ojph_codestream_local.h +++ b/src/core/codestream/ojph_codestream_local.h @@ -148,20 +148,27 @@ namespace ojph { bool employ_color_transform; int planar; int profile; - ui32 tilepart_div; // tilepart division value - bool need_tlm; // true if tlm markers are needed + ui32 tilepart_div; // tilepart division value + bool need_tlm; // true if tlm markers are needed private: - param_siz siz; - param_cod cod; - param_cap cap; - param_qcd qcd; - param_tlm tlm; + param_siz siz; // image and tile size + param_cod cod; // coding style default + param_cap cap; // extended capabilities + param_qcd qcd; // quantization default + param_tlm tlm; // tile-part lengths - private: // this is to handle qcc + private: // this is to handle qcc and coc int used_qcc_fields; - param_qcc qcc_store[4], *qcc; // we allocate 4, - // if not enough, we allocate more + param_qcc *qcc; // quantization component + param_qcc qcc_store[4]; // we allocate 4, we allocate more if needed + int used_coc_fields; + param_coc *coc; // coding style component + param_coc coc_store[4]; // we allocate 4, we allocate more if needed + + private: // these are from Part 2 of the standard + param_dfs dfs; // downsmapling factor styles + param_atk atk; // arbitrary transformation kernels private: mem_fixed_allocator *allocator; diff --git a/src/core/codestream/ojph_params.cpp b/src/core/codestream/ojph_params.cpp index fa194431..5243762f 100644 --- a/src/core/codestream/ojph_params.cpp +++ b/src/core/codestream/ojph_params.cpp @@ -417,6 +417,16 @@ namespace ojph { return u; } + ////////////////////////////////////////////////////////////////////////// + static inline + ui64 swap_byte(ui64 t) + { + ui64 u = swap_byte((ui32)(t & 0xFFFFFFFFu)); + u <<= 32; + u |= swap_byte((ui32)(t >> 32)); + return u; + } + ////////////////////////////////////////////////////////////////////////// // // @@ -790,7 +800,7 @@ namespace ojph { ////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////// - void param_qcd::set_rev_quant(ui32 bit_depth, + void param_qcd::set_rev_quant(int num_decomps, ui32 bit_depth, bool is_employing_color_transform) { int guard_bits = 1; @@ -815,7 +825,7 @@ namespace ojph { } ////////////////////////////////////////////////////////////////////////// - void param_qcd::set_irrev_quant() + void param_qcd::set_irrev_quant(int num_decomps) { int guard_bits = 1; Sqcd = (ui8)((guard_bits<<5)|0x2);//one guard bit, scalar quantization @@ -859,13 +869,17 @@ namespace ojph { ////////////////////////////////////////////////////////////////////////// ui32 param_qcd::get_MAGBp() const { //this can be written better, but it is only executed once + + // this assumes a bi-directional wavelet (conventional DWT) + ui32 num_decomps = (num_subbands - 1) / 3; + ui32 B = 0; int irrev = Sqcd & 0x1F; if (irrev == 0) //reversible - for (ui32 i = 0; i < 3 * num_decomps + 1; ++i) + for (ui32 i = 0; i < num_subbands; ++i) B = ojph_max(B, (u8_SPqcd[i] >> 3) + get_num_guard_bits() - 1u); else if (irrev == 2) //scalar expounded - for (ui32 i = 0; i < 3 * num_decomps + 1; ++i) + for (ui32 i = 0; i < num_subbands; ++i) { ui32 nb = num_decomps - (i ? (i - 1) / 3 : 0); //decompsition level B = ojph_max(B, (u16_SPqcd[i] >> 11) + get_num_guard_bits() - nb); @@ -877,14 +891,24 @@ namespace ojph { } ////////////////////////////////////////////////////////////////////////// - float param_qcd::irrev_get_delta(ui32 resolution, ui32 subband) const + float param_qcd::irrev_get_delta(const param_dfs* dfs, + ui32 num_decompositions, + ui32 resolution, ui32 subband) const { - assert((resolution == 0 && subband == 0) || - (resolution <= num_decomps && subband > 0 && subband<4)); - assert((Sqcd & 0x1F) == 2); float arr[] = { 1.0f, 2.0f, 2.0f, 4.0f }; + assert((Sqcd & 0x1F) == 2); - ui32 idx = resolution == 0 ? 0 : (resolution - 1) * 3 + subband; + ui32 idx = + dfs->get_subband_idx(num_decompositions, resolution, subband); + if (idx >= num_subbands) { + OJPH_INFO(0x00050101, "Trying to access quantization step size for " + "subband %d when the QCD/QCC marker segment specifies " + "quantization step sizes for %d subbands only. To continue " + "decoding, we are using the step size for subband %d, which can " + "produce incorrect results", + idx + 1, num_subbands, num_subbands - 1); + idx = num_subbands - 1; + } int eps = u16_SPqcd[idx] >> 11; float mantissa; mantissa = (float)((u16_SPqcd[idx] & 0x7FF) | 0x800) * arr[subband]; @@ -900,12 +924,22 @@ namespace ojph { } ////////////////////////////////////////////////////////////////////////// - ui32 param_qcd::get_Kmax(ui32 resolution, ui32 subband) const + ui32 param_qcd::get_Kmax(const param_dfs* dfs, ui32 num_decompositions, + ui32 resolution, ui32 subband) const { - assert((resolution == 0 && subband == 0) || - (resolution <= num_decomps && subband > 0 && subband<4)); ui32 num_bits = get_num_guard_bits(); - ui32 idx = resolution == 0 ? 0 : (resolution - 1) * 3 + subband; + ui32 idx = + dfs->get_subband_idx(num_decompositions, resolution, subband); + if (idx >= num_subbands) { + OJPH_INFO(0x00050111, "Trying to access quantization step size for " + "subband %d when the QCD/QCC marker segment specifies " + "quantization step sizes for %d subbands only. To continue " + "decoding, we are using the step size for subband %d, which can " + "produce incorrect results", + idx + 1, num_subbands, num_subbands - 1); + idx = num_subbands - 1; + } + int irrev = Sqcd & 0x1F; if (irrev == 0) //reversible; this is (10.22) from the J2K book { @@ -926,7 +960,6 @@ namespace ojph { bool param_qcd::write(outfile_base *file) { int irrev = Sqcd & 0x1F; - ui32 num_subbands = 1 + 3 * num_decomps; //marker size excluding header Lqcd = 3; @@ -976,16 +1009,16 @@ namespace ojph { OJPH_ERROR(0x00050082, "error reading QCD marker"); if ((Sqcd & 0x1F) == 0) { - num_decomps = (Lqcd - 4) / 3; - if (Lqcd != 4 + 3 * num_decomps) + num_subbands = (Lqcd - 3); + if (Lqcd != 3 + num_subbands) OJPH_ERROR(0x00050083, "wrong Lqcd value in QCD marker"); - for (ui32 i = 0; i < 1 + 3 * num_decomps; ++i) + for (ui32 i = 0; i < num_subbands; ++i) if (file->read(&u8_SPqcd[i], 1) != 1) OJPH_ERROR(0x00050084, "error reading QCD marker"); } else if ((Sqcd & 0x1F) == 1) { - num_decomps = 0; + num_subbands = 0; OJPH_ERROR(0x00050089, "Scalar derived quantization is not supported yet in QCD marker"); if (Lqcd != 5) @@ -993,10 +1026,10 @@ namespace ojph { } else if ((Sqcd & 0x1F) == 2) { - num_decomps = (Lqcd - 5) / 6; - if (Lqcd != 5 + 6 * num_decomps) + num_subbands = (Lqcd - 3) / 2; + if (Lqcd != 3 + 2 * num_subbands) OJPH_ERROR(0x00050086, "wrong Lqcd value in QCD marker"); - for (ui32 i = 0; i < 1 + 3 * num_decomps; ++i) + for (ui32 i = 0; i < num_subbands; ++i) { if (file->read(&u16_SPqcd[i], 2) != 2) OJPH_ERROR(0x00050087, "error reading QCD marker"); @@ -1036,20 +1069,19 @@ namespace ojph { } if (file->read(&Sqcd, 1) != 1) OJPH_ERROR(0x000500A4, "error reading QCC marker"); + ui32 offset = num_comps < 257 ? 4 : 5; if ((Sqcd & 0x1F) == 0) { - ui32 offset = num_comps < 257 ? 5 : 6; - num_decomps = (Lqcd - offset) / 3; - if (Lqcd != offset + 3 * num_decomps) + num_subbands = (Lqcd - offset); + if (Lqcd != offset + num_subbands) OJPH_ERROR(0x000500A5, "wrong Lqcd value in QCC marker"); - for (ui32 i = 0; i < 1 + 3 * num_decomps; ++i) + for (ui32 i = 0; i < num_subbands; ++i) if (file->read(&u8_SPqcd[i], 1) != 1) OJPH_ERROR(0x000500A6, "error reading QCC marker"); } else if ((Sqcd & 0x1F) == 1) { - ui32 offset = num_comps < 257 ? 6 : 7; - num_decomps = 0; + num_subbands = 0; OJPH_ERROR(0x000500AB, "Scalar derived quantization is not supported yet in QCC marker"); if (Lqcd != offset) @@ -1057,11 +1089,10 @@ namespace ojph { } else if ((Sqcd & 0x1F) == 2) { - ui32 offset = num_comps < 257 ? 6 : 7; - num_decomps = (Lqcd - offset) / 6; - if (Lqcd != offset + 6 * num_decomps) + num_subbands = (Lqcd - offset) / 2; + if (Lqcd != offset + 2 * num_subbands) OJPH_ERROR(0x000500A8, "wrong Lqcc value in QCC marker"); - for (ui32 i = 0; i < 1 + 3 * num_decomps; ++i) + for (ui32 i = 0; i < num_subbands; ++i) { if (file->read(&u16_SPqcd[i], 2) != 2) OJPH_ERROR(0x000500A9, "error reading QCC marker"); @@ -1260,6 +1291,280 @@ namespace ojph { return result; } - } + ////////////////////////////////////////////////////////////////////////// + // + // + // + // + // + ////////////////////////////////////////////////////////////////////////// -} + ////////////////////////////////////////////////////////////////////////// + const param_dfs* param_dfs::get_dfs(int index) const + { + const param_dfs* p = this; + while (p && p->Sdfs != index) + p = p->next; + return p; + } + + ////////////////////////////////////////////////////////////////////////// + param_dfs::dfs_dwt_type param_dfs::get_dwt_type(ui32 decomp_level) const + { + assert(decomp_level > 0 && decomp_level <= Ids); + + decomp_level = ojph_min(decomp_level, Ids); + ui8 d = decomp_level - 1; // decomp_level starts from 1 + ui8 idx = d >> 2; // complete bytes + ui8 bits = d & 0x3; // bit within the bytes + ui8 val = (Ddfs[idx] >> (6 - 2 * bits)) & 0x3; + return (dfs_dwt_type)val; + } + + ////////////////////////////////////////////////////////////////////////// + int param_dfs::get_subband_idx(ui32 num_decompositions, ui32 resolution, + ui32 subband) const + { + int idx; + if (this != NULL) + { + assert((resolution == 0 && subband == 0) || + (resolution > 0 && resolution <= Ids && + subband > 0 && subband < 4)); + + ui32 ns[4] = { 0, 3, 2, 2 }; + ui32 off[4] = {}; + + idx = 0; + if (resolution > 0) + { + idx = 0; + ui32 i = 1; + for (; i < resolution; ++i) + idx += ns[get_dwt_type(num_decompositions - i + 1)]; + dfs_dwt_type t = get_dwt_type(num_decompositions - i + 1); + idx += subband; + if (t == VERT_DWT && subband == 2) + --idx; + } + } + else + { + assert(subband >= 0 && subband < 4); + idx = resolution ? (resolution - 1) * 3 + subband : 0; + } + + return idx; + } + + ////////////////////////////////////////////////////////////////////////// + bool param_dfs::read(infile_base *file) + { + if (Ldfs != 0) { // this param_dfs is used + param_dfs* p = this; + while (p->next != NULL) + p = p->next; + p->next = new param_dfs; + p = p->next; + return p->read(file); + } + + if (file->read(&Ldfs, 2) != 2) + OJPH_ERROR(0x000500D1, "error reading DFS-Ldfs parameter"); + Ldfs = swap_byte(Ldfs); + if (file->read(&Sdfs, 2) != 2) + OJPH_ERROR(0x000500D2, "error reading DFS-Sdfs parameter"); + Sdfs = swap_byte(Sdfs); + if (Sdfs > 15) + OJPH_ERROR(0x000500D3, "The DFS-Sdfs parameter is %d, which is " + "larger than the permissible 15", Sdfs); + ui8 t, l_Ids = 0; + if (file->read(&l_Ids, 1) != 1) + OJPH_ERROR(0x000500D4, "error reading DFS-Ids parameter"); + constexpr int max_Ddfs = sizeof(Ddfs) * 4; + if (l_Ids > max_Ddfs) + OJPH_INFO(0x000500D5, "The DFS-Ids parameter is %d; while this is " + "valid, the number is unnessarily large -- you do not need more " + "than %d. Please contact me regarding this issue.", + l_Ids, max_Ddfs); + Ids = l_Ids < max_Ddfs ? l_Ids : max_Ddfs; + for (int i = 0; i < Ids; i += 4) + if (file->read(&Ddfs[i / 4], 1) != 1) + OJPH_ERROR(0x000500D6, "error reading DFS-Ddfs parameters"); + for (int i = Ids; i < l_Ids; i += 4) + if (file->read(&t, 1) != 1) + OJPH_ERROR(0x000500D7, "error reading DFS-Ddfs parameters"); + return true; + } + + ////////////////////////////////////////////////////////////////////////// + // + // + // + // + // + ////////////////////////////////////////////////////////////////////////// + + ////////////////////////////////////////////////////////////////////////// + const param_atk* param_atk::get_atk(int index) const + { + const param_atk* p = this; + while (p && p->get_index() != index) + p = p->next; + return p; + } + + ////////////////////////////////////////////////////////////////////////// + bool param_atk::read_coefficient(infile_base *file, float &K) + { + int coeff_type = get_coeff_type(); + if (coeff_type == 0) { // 8bit + ui8 v; + if (file->read(&v, 1) != 1) return false; + K = v; + } + else if (coeff_type == 1) { // 16bit + ui16 v; + if (file->read(&v, 2) != 2) return false; + K = swap_byte(v); + } + else if (coeff_type == 2) { // float + if (file->read(&K, 4) != 4) return false; + ui32 t = swap_byte(*(ui32*)&K); + K = *(float*)&t; + } + else if (coeff_type == 3) { // double + double v; + if (file->read(&v, 8) != 8) return false; + ui64 t = swap_byte(*(ui64*)&v); + double u = *(float*)&t; + K = (float)u; + } + else if (coeff_type == 4) { // 128 bit float + ui64 v, v1; + if (file->read(&v, 8) != 8) return false; + if (file->read(&v1, 8) != 8) return false; // not needed + v = swap_byte(v); + + // convert the MSB of 128b float to 32b float + // 32b float has 1 sign bit, 8 exponent (offset 127), 23 mantissa + // 128b float has 1 sign bit, 15 exponent (offset 16383), 112 mantissa + si32 t1 = (si32)((v >> 48) & 0x7FFF); // exponent + t1 -= 16383; + t1 += 127; + t1 = t1 & 0xFF; // removes MSBs if negative + t1 <<= 23; // move bits to their location + ui32 t = 0; + t |= ((ui32)(v >> 32) & 0x80000000); // copy sign bit + t |= t1; // copy exponent + t |= (ui32)((v >> 25) & 0x007FFFFF); // copy 23 mantissa + K = *(float*)&t; + } + return true; + } + + + ////////////////////////////////////////////////////////////////////////// + bool param_atk::read_coefficient(infile_base *file, si16 &K) + { + int coeff_type = get_coeff_type(); + if (coeff_type == 0) { + ui8 v; + if (file->read(&v, 1) != 1) return false; + K = v; + } + else if (coeff_type == 1) { + ui16 v; + if (file->read(&v, 2) != 2) return false; + v = swap_byte(v); + K = v; + } + else + return false; + return true; + } + + ////////////////////////////////////////////////////////////////////////// + bool param_atk::read(infile_base *file) + { + if (Latk != 0) { // this param_atk is used + param_atk *p = this; + while (p->next != NULL) + p = p->next; + p->next = new param_atk; + p = p->next; + return p->read(file); + } + + if (file->read(&Latk, 2) != 2) + OJPH_ERROR(0x000500E1, "error reading ATK-Latk parameter"); + Latk = swap_byte(Latk); + if (file->read(&Satk, 2) != 2) + OJPH_ERROR(0x000500E2, "error reading ATK-Satk parameter"); + Satk = swap_byte(Satk); + if (is_m_init0() == false) // only even-indexed is supported + OJPH_ERROR(0x000500E3, "ATK-Satk parameter sets m_init to 1, " + "requiring odd-indexed subsequence in first reconstruction step, " + "which is not supported yet."); + if (is_whole_sample() == false) // ARB filter not supported + OJPH_ERROR(0x000500E4, "ATK-Satk parameter specified ARB filter, " + "which is not supported yet."); + if (is_reversible() && get_coeff_type() >= 2) // reversible & float + OJPH_ERROR(0x000500E5, "ATK-Satk parameter does not make sense. " + "It employs floats with reversible filtering."); + if (is_reversible() == false) + if (read_coefficient(file, Katk) == false) + OJPH_ERROR(0x000500E6, "error reading ATK-Katk parameter"); + if (file->read(&Natk, 1) != 1) + OJPH_ERROR(0x000500E7, "error reading ATK-Natk parameter"); + if (Natk > max_steps) { + if (d != d_store) // was this allocated -- very unlikely + delete[] d; + d = new data[Natk]; + max_steps = Natk; + } + + if (is_reversible()) + { + for (int s = 0; s < Natk; ++s) + { + if (file->read(&d[s].rev.Eatk, 1) != 1) + OJPH_ERROR(0x000500E8, "error reading ATK-Eatk parameter"); + if (file->read(&d[s].rev.Batk, 2) != 2) + OJPH_ERROR(0x000500E9, "error reading ATK-Batk parameter"); + d[s].rev.Batk = (si16)swap_byte((ui16)d[s].rev.Batk); + ui8 LCatk; + if (file->read(&LCatk, 1) != 1) + OJPH_ERROR(0x000500EA, "error reading ATK-LCatk parameter"); + if (LCatk == 0) + OJPH_ERROR(0x000500EB, "Encountered a ATK-LCatk value of zero; " + "something is wrong."); + if (LCatk > 1) + OJPH_ERROR(0x000500EC, "ATK-LCatk value greater than 1; " + "that is, a multitap filter is not supported"); + if (read_coefficient(file, d[s].rev.Aatk) == false) + OJPH_ERROR(0x000500ED, "Error reding ATK-Aatk parameter"); + } + } + else + { + for (int s = 0; s < Natk; ++s) + { + ui8 LCatk; + if (file->read(&LCatk, 1) != 1) + OJPH_ERROR(0x000500EE, "error reading ATK-LCatk parameter"); + if (LCatk == 0) + OJPH_ERROR(0x000500EF, "Encountered a ATK-LCatk value of zero; " + "something is wrong."); + if (LCatk > 1) + OJPH_ERROR(0x000500F0, "ATK-LCatk value greater than 1; " + "that is, a multitap filter is not supported."); + if (read_coefficient(file, d[s].irv.Aatk) == false) + OJPH_ERROR(0x000500F1, "Error reding ATK-Aatk parameter"); + } + } + + return true; + } + } // !local namespace +} // !ojph namespace diff --git a/src/core/codestream/ojph_params_local.h b/src/core/codestream/ojph_params_local.h index bac0c359..acfd0347 100644 --- a/src/core/codestream/ojph_params_local.h +++ b/src/core/codestream/ojph_params_local.h @@ -96,14 +96,26 @@ namespace ojph { //////////////////////////////////////////////////////////////////////////// enum OJPH_TILEPART_DIVISIONS: ui32 { - OJPH_TILEPART_NO_DIVISIONS = 0x0, // no divisions to tile parts - OJPH_TILEPART_RESOLUTIONS = 0x1, - OJPH_TILEPART_COMPONENTS = 0x2, - OJPH_TILEPART_LAYERS = 0x4, // these are meaningless with HTJ2K + OJPH_TILEPART_NO_DIVISIONS = 0x0, // no divisions to tile parts + OJPH_TILEPART_RESOLUTIONS = 0x1, + OJPH_TILEPART_COMPONENTS = 0x2, + OJPH_TILEPART_LAYERS = 0x4, // these are meaningless with HTJ2K }; namespace local { + //defined here + struct param_siz; + struct param_cod; + struct param_qcd; + struct param_qcc; + struct param_cap; + struct param_sot; + struct param_tlm; + struct param_coc; + struct param_dfs; + struct param_atk; + ////////////////////////////////////////////////////////////////////////// enum JP2K_MARKER : ui16 { @@ -111,6 +123,7 @@ namespace ojph { CAP = 0xFF50, //extended capability SIZ = 0xFF51, //image and tile size (required) COD = 0xFF52, //coding style default (required) + COC = 0xFF53, //coding style component TLM = 0xFF55, //tile-part lengths PRF = 0xFF56, //profile PLM = 0xFF57, //packet length, main header @@ -118,19 +131,20 @@ namespace ojph { CPF = 0xFF59, //corresponding profile values QCD = 0xFF5C, //qunatization default (required) QCC = 0xFF5D, //quantization component + RGN = 0xFF5E, //region of interest + POC = 0xFF5F, //progression order change + PPM = 0xFF60, //packed packet headers, main header + PPT = 0xFF61, //packed packet headers, tile-part header + CRG = 0xFF63, //component registration COM = 0xFF64, //comment + DFS = 0xFF72, //downsampling factor styles + ADS = 0xFF73, //arbitrary decomposition styles + ATK = 0xFF79, //arbitrary transformation kernels SOT = 0xFF90, //start of tile-part SOP = 0xFF91, //start of packet EPH = 0xFF92, //end of packet SOD = 0xFF93, //start of data EOC = 0xFFD9, //end of codestream (required) - - COC = 0xFF53, //coding style component - RGN = 0xFF5E, //region of interest - POC = 0xFF5F, //progression order change - PPM = 0xFF60, //packed packet headers, main header - PPT = 0xFF61, //packed packet headers, tile-part header - CRG = 0xFF63, //component registration }; ////////////////////////////////////////////////////////////////////////// @@ -442,23 +456,23 @@ namespace ojph { Sqcd = 0; for (int i = 0; i < 97; ++i) u16_SPqcd[i] = 0; - num_decomps = 0; - base_delta = -1.0f; + num_subbands = 0; + base_delta = -1.0f; } void set_delta(float delta) { base_delta = delta; } - void set_rev_quant(ui32 bit_depth, bool is_employing_color_transform); - void set_irrev_quant(); void check_validity(const param_siz& siz, const param_cod& cod) { - num_decomps = cod.get_num_decompositions(); + int num_decomps = cod.get_num_decompositions(); + num_subbands = 1 + 3 * num_decomps; if (cod.is_reversible()) { ui32 bit_depth = 0; for (ui32 i = 0; i < siz.get_num_components(); ++i) bit_depth = ojph_max(bit_depth, siz.get_bit_depth(i)); - set_rev_quant(bit_depth, cod.is_employing_color_transform()); + set_rev_quant(num_decomps, bit_depth, + cod.is_employing_color_transform()); } else { @@ -466,21 +480,28 @@ namespace ojph { ui32 bit_depth = 0; for (ui32 i = 0; i < siz.get_num_components(); ++i) bit_depth = - ojph_max(bit_depth, siz.get_bit_depth(i) + siz.is_signed(i)); + ojph_max(bit_depth, siz.get_bit_depth(i) + siz.is_signed(i)); base_delta = 1.0f / (float)(1 << bit_depth); } - set_irrev_quant(); - } + set_irrev_quant(num_decomps); + } } - ui32 get_num_guard_bits() const; ui32 get_MAGBp() const; - ui32 get_Kmax(ui32 resolution, ui32 subband) const; - float irrev_get_delta(ui32 resolution, ui32 subband) const; + ui32 get_Kmax(const param_dfs* dfs, ui32 num_decompositions, + ui32 resolution, ui32 subband) const; + float irrev_get_delta(const param_dfs* dfs, + ui32 num_decompositions, + ui32 resolution, ui32 subband) const; bool write(outfile_base *file); void read(infile_base *file); + protected: + void set_rev_quant(int num_decomps, ui32 bit_depth, + bool is_employing_color_transform); + void set_irrev_quant(int num_decomps); + protected: ui16 Lqcd; ui8 Sqcd; @@ -489,8 +510,9 @@ namespace ojph { ui8 u8_SPqcd[97]; ui16 u16_SPqcd[97]; }; - ui32 num_decomps; - float base_delta; + ui32 num_subbands; // number of subbands + float base_delta; // base quantization step size -- all other + // step sizes are derived from it. }; /////////////////////////////////////////////////////////////////////////// @@ -502,7 +524,6 @@ namespace ojph { /////////////////////////////////////////////////////////////////////////// struct param_qcc : public param_qcd { - //friend ::ojph::param_qcc; public: param_qcc() : param_qcd() { comp_idx = 0; } @@ -627,9 +648,136 @@ namespace ojph { Ttlm_Ptlm_pair* pairs; ui32 num_pairs; ui32 next_pair_index; - }; - } -} + + /////////////////////////////////////////////////////////////////////////// + // + // + // + // + // + /////////////////////////////////////////////////////////////////////////// + struct param_coc : public param_cod + { + + }; + + /////////////////////////////////////////////////////////////////////////// + // + // + // + // + // + /////////////////////////////////////////////////////////////////////////// + struct param_dfs + { + public: + enum dfs_dwt_type : ui8 { + NO_DWT = 0, // no wavelet transform + BIDIR_DWT = 1, // bidirectional DWT (this the conventional DWT) + HORZ_DWT = 2, // horizontal only DWT transform + VERT_DWT = 3, // vertical only DWT transform + }; + + public: // member functions + param_dfs() { memset(this, 0, sizeof(param_dfs)); } + ~param_dfs() { if (next) delete next; } + void init() { memset(this, 0, sizeof(param_dfs)); } + bool read(infile_base *file); + bool exists() const { return Ldfs != 0; } + + // get_dfs return a dfs structure Sdfs == index, or NULL if not found + const param_dfs* get_dfs(int index) const; + // decomp_level is the decomposition level, starting from 1 for highest + // resolution to num_decomps for the coarsest resolution + dfs_dwt_type get_dwt_type(ui32 decomp_level) const; + int get_subband_idx(ui32 num_decompositions, ui32 resolution, + ui32 subband) const; + + private: // member variables + ui16 Ldfs; // length of the segment marker + ui16 Sdfs; // index of this DFS marker segment + ui8 Ids; // number of elements in Ddfs, 2 bits per sub-level + ui8 Ddfs[8]; // a string defining number of decomposition sub-levels + // 8 bytes should be enough for 32 levels + param_dfs* next; // used for linking other dfs segments + }; + + /////////////////////////////////////////////////////////////////////////// + // + // + // + // + // + /////////////////////////////////////////////////////////////////////////// + struct param_atk + { + // Limitations: + // Arbitrary filters (ARB) are not supported + // Up to 6 steps are supported -- more than 6 are not supported + // Only one coefficient per step -- first order filter + // Only even-indexed subsequence in first reconstruction step, + // m_init = 0 is supported + + public: // data structures used by this object + struct irv_data { + // si8 Oatk; // only for arbitrary filter + // ui8 LCatk; // number of lifting coefficients in a step + float Aatk; // lifting coefficient + }; + + struct rev_data { + // si8 Oatk; // only for arbitrary filter, offset of filter + ui8 Eatk; // only for reversible, epsilon, the power of 2 + si16 Batk; // only for reversible, beta, the additive residue + // ui8 LCatk; // number of lifting coefficients in a step + si16 Aatk; // lifting coefficient + }; + + union data { + irv_data irv; + rev_data rev; + }; + + public: // member functions + param_atk() { init(); } + ~param_atk() { + if (next) delete next; + if (d != NULL && d != d_store) { + delete[] d; + init(false); + } + } + bool read(infile_base *file); + bool read_coefficient(infile_base *file, float &K); + bool read_coefficient(infile_base *file, si16 &K); + void init(bool clear_all = true) { + if (clear_all) + memset(this, 0, sizeof(param_atk)); + d = d_store; max_steps = sizeof(d_store) / sizeof(data); + } + + ui8 get_index() const { return (ui8)(Satk & 0xFF); } + int get_coeff_type() const { return (Satk >> 8) & 0x7; } + bool is_whole_sample() const { return (Satk & 0x800) != 0; } + bool is_reversible() const { return (Satk & 0x1000) != 0; } + bool is_m_init0() const { return (Satk & 0x2000) == 0; } + bool is_using_ws_extension() const { return (Satk & 0x4000) != 0x4000; } + const param_atk* get_atk(int index) const; + const data* get_step(ui32 s) const { assert(s < Natk); return d + s; } + + private: // member variables + ui16 Latk; // structure length + ui16 Satk; // carries a variety of information + float Katk; // only for irreversible scaling factor K + ui8 Natk; // number of lifting steps + data* d; // pointer to data, initialized to d_store + int max_steps; // maximum number of steps without memory allocation + data d_store[6]; // step coefficient + param_atk* next; // used for chaining if more than one atk segment + // exist in the codestream + }; + } // !local namespace +} // !ojph namespace #endif // !OJPH_PARAMS_LOCAL_H diff --git a/src/core/codestream/ojph_subband.cpp b/src/core/codestream/ojph_subband.cpp index fc83bf2b..eb958bfb 100644 --- a/src/core/codestream/ojph_subband.cpp +++ b/src/core/codestream/ojph_subband.cpp @@ -124,11 +124,14 @@ namespace ojph { cur_cb_row = 0; cur_line = 0; cur_cb_height = 0; - param_qcd *qcd = codestream->access_qcd(parent->get_comp_num()); - this->K_max = qcd->get_Kmax(this->res_num, band_num); + param_qcd* qcd = codestream->access_qcd(parent->get_comp_num()); + const param_cod* cod = codestream->get_cod(); + int num_decomps = cod->get_num_decompositions(); + this->K_max = qcd->get_Kmax(NULL, num_decomps, this->res_num, band_num); if (!reversible) { - float d = qcd->irrev_get_delta(res_num, subband_num); + float d = + qcd->irrev_get_delta(NULL, num_decomps, res_num, subband_num); d /= (float)(1u << (31 - this->K_max)); delta = d; delta_inv = (1.0f/d); diff --git a/src/core/common/ojph_version.h b/src/core/common/ojph_version.h index fdf28bc2..ff62f0aa 100644 --- a/src/core/common/ojph_version.h +++ b/src/core/common/ojph_version.h @@ -34,5 +34,5 @@ //***************************************************************************/ #define OPENJPH_VERSION_MAJOR 0 -#define OPENJPH_VERSION_MINOR 10 -#define OPENJPH_VERSION_PATCH 5 +#define OPENJPH_VERSION_MINOR 11 +#define OPENJPH_VERSION_PATCH 0 From 4648f913599bde67b2c4763ddfd357adc68b1124 Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Fri, 29 Mar 2024 08:04:15 +1100 Subject: [PATCH 036/348] This fixes an issue with the previous commit --- src/core/codestream/ojph_params.cpp | 60 ++++++++++++++--------------- 1 file changed, 28 insertions(+), 32 deletions(-) diff --git a/src/core/codestream/ojph_params.cpp b/src/core/codestream/ojph_params.cpp index 5243762f..b2b1980e 100644 --- a/src/core/codestream/ojph_params.cpp +++ b/src/core/codestream/ojph_params.cpp @@ -898,8 +898,11 @@ namespace ojph { float arr[] = { 1.0f, 2.0f, 2.0f, 4.0f }; assert((Sqcd & 0x1F) == 2); - ui32 idx = - dfs->get_subband_idx(num_decompositions, resolution, subband); + ui32 idx; + if (dfs != NULL && dfs->exists()) + idx = dfs->get_subband_idx(num_decompositions, resolution, subband); + else + idx = resolution ? (resolution - 1) * 3 + subband : 0; if (idx >= num_subbands) { OJPH_INFO(0x00050101, "Trying to access quantization step size for " "subband %d when the QCD/QCC marker segment specifies " @@ -928,8 +931,11 @@ namespace ojph { ui32 resolution, ui32 subband) const { ui32 num_bits = get_num_guard_bits(); - ui32 idx = - dfs->get_subband_idx(num_decompositions, resolution, subband); + ui32 idx; + if (dfs != NULL && dfs->exists()) + idx = dfs->get_subband_idx(num_decompositions, resolution, subband); + else + idx = resolution ? (resolution - 1) * 3 + subband : 0; if (idx >= num_subbands) { OJPH_INFO(0x00050111, "Trying to access quantization step size for " "subband %d when the QCD/QCC marker segment specifies " @@ -1314,10 +1320,10 @@ namespace ojph { assert(decomp_level > 0 && decomp_level <= Ids); decomp_level = ojph_min(decomp_level, Ids); - ui8 d = decomp_level - 1; // decomp_level starts from 1 - ui8 idx = d >> 2; // complete bytes - ui8 bits = d & 0x3; // bit within the bytes - ui8 val = (Ddfs[idx] >> (6 - 2 * bits)) & 0x3; + ui32 d = decomp_level - 1; // decomp_level starts from 1 + ui32 idx = d >> 2; // complete bytes + ui32 bits = d & 0x3; // bit within the bytes + ui32 val = (Ddfs[idx] >> (6 - 2 * bits)) & 0x3; return (dfs_dwt_type)val; } @@ -1325,33 +1331,23 @@ namespace ojph { int param_dfs::get_subband_idx(ui32 num_decompositions, ui32 resolution, ui32 subband) const { - int idx; - if (this != NULL) - { - assert((resolution == 0 && subband == 0) || - (resolution > 0 && resolution <= Ids && - subband > 0 && subband < 4)); + assert((resolution == 0 && subband == 0) || + (resolution > 0 && resolution <= Ids && + subband > 0 && subband < 4)); - ui32 ns[4] = { 0, 3, 2, 2 }; - ui32 off[4] = {}; + ui32 ns[4] = { 0, 3, 2, 2 }; - idx = 0; - if (resolution > 0) - { - idx = 0; - ui32 i = 1; - for (; i < resolution; ++i) - idx += ns[get_dwt_type(num_decompositions - i + 1)]; - dfs_dwt_type t = get_dwt_type(num_decompositions - i + 1); - idx += subband; - if (t == VERT_DWT && subband == 2) - --idx; - } - } - else + int idx = 0; + if (resolution > 0) { - assert(subband >= 0 && subband < 4); - idx = resolution ? (resolution - 1) * 3 + subband : 0; + idx = 0; + ui32 i = 1; + for (; i < resolution; ++i) + idx += ns[get_dwt_type(num_decompositions - i + 1)]; + dfs_dwt_type t = get_dwt_type(num_decompositions - i + 1); + idx += subband; + if (t == VERT_DWT && subband == 2) + --idx; } return idx; From 1a5925f44c8a4f43d4205885d5bb67ba36d4fdef Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Fri, 29 Mar 2024 08:17:51 +1100 Subject: [PATCH 037/348] More fixes --- src/core/codestream/ojph_params.cpp | 48 +++++++++++++++++------------ 1 file changed, 29 insertions(+), 19 deletions(-) diff --git a/src/core/codestream/ojph_params.cpp b/src/core/codestream/ojph_params.cpp index b2b1980e..8c2169c3 100644 --- a/src/core/codestream/ojph_params.cpp +++ b/src/core/codestream/ojph_params.cpp @@ -1425,36 +1425,46 @@ namespace ojph { K = swap_byte(v); } else if (coeff_type == 2) { // float - if (file->read(&K, 4) != 4) return false; - ui32 t = swap_byte(*(ui32*)&K); - K = *(float*)&t; + union { + float f; + ui32 i; + } v; + if (file->read(&v.i, 4) != 4) return false; + v.i = swap_byte(v.i); + K = v.f; } else if (coeff_type == 3) { // double - double v; - if (file->read(&v, 8) != 8) return false; - ui64 t = swap_byte(*(ui64*)&v); - double u = *(float*)&t; - K = (float)u; + union { + double d; + ui64 i; + } v; + if (file->read(&v.i, 8) != 8) return false; + v.i = swap_byte(v.i); + K = (float)v.d; } else if (coeff_type == 4) { // 128 bit float ui64 v, v1; if (file->read(&v, 8) != 8) return false; - if (file->read(&v1, 8) != 8) return false; // not needed + if (file->read(&v1, 8) != 8) return false; // v1 not needed v = swap_byte(v); + union { + float f; + ui32 i; + } s; // convert the MSB of 128b float to 32b float // 32b float has 1 sign bit, 8 exponent (offset 127), 23 mantissa // 128b float has 1 sign bit, 15 exponent (offset 16383), 112 mantissa - si32 t1 = (si32)((v >> 48) & 0x7FFF); // exponent - t1 -= 16383; - t1 += 127; - t1 = t1 & 0xFF; // removes MSBs if negative - t1 <<= 23; // move bits to their location - ui32 t = 0; - t |= ((ui32)(v >> 32) & 0x80000000); // copy sign bit - t |= t1; // copy exponent - t |= (ui32)((v >> 25) & 0x007FFFFF); // copy 23 mantissa - K = *(float*)&t; + si32 e = (si32)((v >> 48) & 0x7FFF); // exponent + e -= 16383; + e += 127; + e = e & 0xFF; // removes MSBs if negative + e <<= 23; // move bits to their location + s.i = 0; + s.i |= ((ui32)(v >> 32) & 0x80000000); // copy sign bit + s.i |= e; // copy exponent + s.i |= (ui32)((v >> 25) & 0x007FFFFF); // copy 23 mantissa + K = s.f; } return true; } From 2b38785cd1111072f9e7f43a1caa69670bd677a4 Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Sat, 30 Mar 2024 18:19:02 +1100 Subject: [PATCH 038/348] Implemented COC. Linked ATK to COD/COC. --- src/core/codestream/ojph_codestream_local.cpp | 40 ++-- src/core/codestream/ojph_codestream_local.h | 4 +- src/core/codestream/ojph_params.cpp | 112 +++++++++-- src/core/codestream/ojph_params_local.h | 178 ++++++++++++------ src/core/codestream/ojph_resolution.cpp | 4 +- src/core/codestream/ojph_subband.cpp | 2 +- src/core/codestream/ojph_tile.cpp | 2 +- 7 files changed, 254 insertions(+), 88 deletions(-) diff --git a/src/core/codestream/ojph_codestream_local.cpp b/src/core/codestream/ojph_codestream_local.cpp index c2154fa0..d4d20a38 100644 --- a/src/core/codestream/ojph_codestream_local.cpp +++ b/src/core/codestream/ojph_codestream_local.cpp @@ -758,7 +758,8 @@ namespace ojph { skip_marker(file, "CPF", NULL, OJPH_MSG_LEVEL::NO_MSG, false); else if (marker_idx == 3) { - cod.read(file); received_markers |= 1; + cod.read(file, param_cod::COD_MAIN); + received_markers |= 1; ojph::param_cod c(&cod); int num_qlayers = c.get_num_layers(); if (num_qlayers != 1) @@ -766,21 +767,32 @@ namespace ojph { "1 quality layer only. This codestream has %d quality layers", num_qlayers); } - else if (marker_idx == 4) - skip_marker(file, "COC", "COC is not supported yet", - OJPH_MSG_LEVEL::WARN, false); + else if (marker_idx == 4) + { + ui32 num_comps = siz.get_num_components(); + if (coc == coc_store && + num_comps * sizeof(param_cod) > sizeof(coc_store)) + { + coc = new param_cod[num_comps]; + } + coc[used_coc_fields++].read( + file, param_cod::COC_MAIN, num_comps, &cod); + } else if (marker_idx == 5) - { qcd.read(file); received_markers |= 2; } + { + qcd.read(file); + received_markers |= 2; + } else if (marker_idx == 6) + { + ui32 num_comps = siz.get_num_components(); + if (qcc == qcc_store && + num_comps * sizeof(param_qcc) > sizeof(qcc_store)) { - ui32 num_comps = siz.get_num_components(); - if (qcc == qcc_store && - num_comps * sizeof(param_qcc) > sizeof(qcc_store)) - { - qcc = new param_qcc[num_comps]; - } - qcc[used_qcc_fields++].read(file, num_comps); + qcc = new param_qcc[num_comps]; } + qcc[used_qcc_fields++].read(file, num_comps); + } else if (marker_idx == 7) skip_marker(file, "RGN", "RGN is not supported yet", OJPH_MSG_LEVEL::WARN, false); @@ -816,7 +828,9 @@ namespace ojph { OJPH_ERROR(0x00030051, "File ended before finding a tile segment"); } - //qcd.update(&dfs); + cod.update_atk(&atk); + for (int i = 0; i < used_coc_fields; ++i) + coc[i].update_atk(&atk); if (received_markers != 3) OJPH_ERROR(0x00030052, "markers error, COD and QCD are required"); diff --git a/src/core/codestream/ojph_codestream_local.h b/src/core/codestream/ojph_codestream_local.h index 035b534f..34ffc355 100644 --- a/src/core/codestream/ojph_codestream_local.h +++ b/src/core/codestream/ojph_codestream_local.h @@ -163,8 +163,8 @@ namespace ojph { param_qcc *qcc; // quantization component param_qcc qcc_store[4]; // we allocate 4, we allocate more if needed int used_coc_fields; - param_coc *coc; // coding style component - param_coc coc_store[4]; // we allocate 4, we allocate more if needed + param_cod *coc; // coding style component + param_cod coc_store[4]; // we allocate 4, we allocate more if needed private: // these are from Part 2 of the standard param_dfs dfs; // downsmapling factor styles diff --git a/src/core/codestream/ojph_params.cpp b/src/core/codestream/ojph_params.cpp index 8c2169c3..07446c0f 100644 --- a/src/core/codestream/ojph_params.cpp +++ b/src/core/codestream/ojph_params.cpp @@ -275,7 +275,12 @@ namespace ojph { //////////////////////////////////////////////////////////////////////////// bool param_cod::is_reversible() const { - return state->is_reversible(); + if (state->SPcod.wavelet_trans <= 1) + return state->get_wavelet_kern() == local::param_cod::DWT_REV53; + else { + assert(state->atk != NULL); + return state->atk->is_reversible(); + } } //////////////////////////////////////////////////////////////////////////// @@ -604,8 +609,9 @@ namespace ojph { OJPH_ERROR(0x00050043, "error reading SIZ marker"); Rsiz = swap_byte(Rsiz); if ((Rsiz & 0x4000) == 0) - OJPH_ERROR(0x00050044, "Rsiz bit 14 not set (this is not a JPH file)"); - if (Rsiz & 0xBFFF) + OJPH_ERROR(0x00050044, + "Rsiz bit 14 is not set (this is not a JPH file)"); + if ((Rsiz & 0x8000) != 0 && (Rsiz & 0xF5F) != 0) OJPH_WARN(0x00050001, "Rsiz in SIZ has unimplemented fields"); if (file->read(&Xsiz, 4) != 4) OJPH_ERROR(0x00050045, "error reading SIZ marker"); @@ -652,6 +658,9 @@ namespace ojph { if (file->read(&cptr[c].YRsiz, 1) != 1) OJPH_ERROR(0x00050053, "error reading SIZ marker"); } + + ws_kern_support_needed = (Rsiz & 0x20) != 0; + dfs_support_needed = (Rsiz & 0x80) != 0; } ////////////////////////////////////////////////////////////////////////// @@ -720,6 +729,8 @@ namespace ojph { ////////////////////////////////////////////////////////////////////////// bool param_cod::write(outfile_base *file) { + assert(type == COD_MAIN); + //marker size excluding header Lcod = 12; Lcod = (ui16)(Lcod + (Scod & 1 ? 1 + SPcod.num_decomp : 0)); @@ -758,37 +769,106 @@ namespace ojph { } ////////////////////////////////////////////////////////////////////////// - void param_cod::read(infile_base *file) + void param_cod::read(infile_base *file, param_cod::cod_type type) { + assert(this->type == UNDEFINED); + assert(type == COD_MAIN); + + this->type = type; if (file->read(&Lcod, 2) != 2) - OJPH_ERROR(0x00050071, "error reading COD marker"); + OJPH_ERROR(0x00050071, "error reading COD segment"); Lcod = swap_byte(Lcod); if (file->read(&Scod, 1) != 1) - OJPH_ERROR(0x00050072, "error reading COD marker"); + OJPH_ERROR(0x00050072, "error reading COD segment"); if (file->read(&SGCod.prog_order, 1) != 1) - OJPH_ERROR(0x00050073, "error reading COD marker"); + OJPH_ERROR(0x00050073, "error reading COD segment"); if (file->read(&SGCod.num_layers, 2) != 2) - { OJPH_ERROR(0x00050074, "error reading COD marker"); } + { OJPH_ERROR(0x00050074, "error reading COD segment"); } else SGCod.num_layers = swap_byte(SGCod.num_layers); if (file->read(&SGCod.mc_trans, 1) != 1) - OJPH_ERROR(0x00050075, "error reading COD marker"); + OJPH_ERROR(0x00050075, "error reading COD segment"); if (file->read(&SPcod.num_decomp, 1) != 1) - OJPH_ERROR(0x00050076, "error reading COD marker"); + OJPH_ERROR(0x00050076, "error reading COD segment"); if (file->read(&SPcod.block_width, 1) != 1) - OJPH_ERROR(0x00050077, "error reading COD marker"); + OJPH_ERROR(0x00050077, "error reading COD segment"); if (file->read(&SPcod.block_height, 1) != 1) - OJPH_ERROR(0x00050078, "error reading COD marker"); + OJPH_ERROR(0x00050078, "error reading COD segment"); if (file->read(&SPcod.block_style, 1) != 1) - OJPH_ERROR(0x00050079, "error reading COD marker"); + OJPH_ERROR(0x00050079, "error reading COD segment"); if (file->read(&SPcod.wavelet_trans, 1) != 1) - OJPH_ERROR(0x0005007A, "error reading COD marker"); + OJPH_ERROR(0x0005007A, "error reading COD segment"); if (Scod & 1) for (int i = 0; i <= SPcod.num_decomp; ++i) if (file->read(&SPcod.precinct_size[i], 1) != 1) - OJPH_ERROR(0x0005007B, "error reading COD marker"); + OJPH_ERROR(0x0005007B, "error reading COD segment"); if (Lcod != 12 + ((Scod & 1) ? 1 + SPcod.num_decomp : 0)) - OJPH_ERROR(0x0005007C, "error in COD marker length"); + OJPH_ERROR(0x0005007C, "error in COD segment length"); + } + + ////////////////////////////////////////////////////////////////////////// + void param_cod::read(infile_base* file, param_cod::cod_type type, + ui32 num_comps, param_cod *cod) + { + assert(this->type == UNDEFINED); + assert(type == COC_MAIN); + assert(cod != NULL); + + this->type = type; + this->SGCod = cod->SGCod; + this->parent = cod; + if (file->read(&Lcod, 2) != 2) + OJPH_ERROR(0x00050121, "error reading COC segment"); + Lcod = swap_byte(Lcod); + if (num_comps < 257) { + ui8 t; + if (file->read(&t, 1) != 1) + OJPH_ERROR(0x00050122, "error reading COC segment"); + comp_idx = t; + } + else { + if (file->read(&comp_idx, 2) != 2) + OJPH_ERROR(0x00050123, "error reading COC segment"); + comp_idx = swap_byte(comp_idx); + } + if (file->read(&Scod, 1) != 1) + OJPH_ERROR(0x00050124, "error reading COC segment"); + if (Scod & 0xF8) + OJPH_WARN(0x00050011, + "Unsupported options in Scoc field of the COC segment"); + if (file->read(&SPcod.num_decomp, 1) != 1) + OJPH_ERROR(0x00050125, "error reading COC segment"); + if (file->read(&SPcod.block_width, 1) != 1) + OJPH_ERROR(0x00050126, "error reading COC segment"); + if (file->read(&SPcod.block_height, 1) != 1) + OJPH_ERROR(0x00050127, "error reading COC segment"); + if (file->read(&SPcod.block_style, 1) != 1) + OJPH_ERROR(0x00050128, "error reading COC segment"); + if (file->read(&SPcod.wavelet_trans, 1) != 1) + OJPH_ERROR(0x00050129, "error reading COC segment"); + if (Scod & 1) + for (int i = 0; i <= get_num_decompositions(); ++i) + if (file->read(&SPcod.precinct_size[i], 1) != 1) + OJPH_ERROR(0x0005012A, "error reading COC segment"); + ui16 t = 9; + t += num_comps < 257 ? 0 : 1; + t += (Scod & 1) ? 1 + get_num_decompositions() : 0; + if (Lcod != t) + OJPH_ERROR(0x0005012B, "error in COC segment length"); + } + + ////////////////////////////////////////////////////////////////////////// + void param_cod::update_atk(const param_atk* atk) + { + if (SPcod.wavelet_trans > 1) { + this->atk = atk->get_atk(SPcod.wavelet_trans); + if (this->atk == NULL) + OJPH_ERROR(0x00050131, "A COD/COC segment employs the DWT kernel " + "atk=%d, but a corresponding ATK segment cannot be found", + SPcod.wavelet_trans); + } + else + this->atk = NULL; } ////////////////////////////////////////////////////////////////////////// diff --git a/src/core/codestream/ojph_params_local.h b/src/core/codestream/ojph_params_local.h index acfd0347..91447f15 100644 --- a/src/core/codestream/ojph_params_local.h +++ b/src/core/codestream/ojph_params_local.h @@ -112,7 +112,6 @@ namespace ojph { struct param_cap; struct param_sot; struct param_tlm; - struct param_coc; struct param_dfs; struct param_atk; @@ -173,6 +172,7 @@ namespace ojph { cptr = store; old_Csiz = 4; Rsiz = 0x4000; //for jph, bit 14 of Rsiz is 1 + ws_kern_support_needed = dfs_support_needed = false; } ~param_siz() @@ -270,6 +270,8 @@ namespace ojph { ui32 t = ojph_div_ceil(Ysiz, ds) - ojph_div_ceil(YOsiz, ds); return t; } + bool is_ws_kern_support_needed() { return ws_kern_support_needed; } + bool is_dfs_support_needed() { return dfs_support_needed; } private: ui16 Lsiz; @@ -289,6 +291,8 @@ namespace ojph { ui32 skipped_resolutions; int old_Csiz; siz_comp_info store[4]; + bool ws_kern_support_needed; + bool dfs_support_needed; param_siz(const param_siz&) = delete; //prevent copy constructor param_siz& operator=(const param_siz&) = delete; //prevent copy }; @@ -308,10 +312,18 @@ namespace ojph { ui8 block_style; ui8 wavelet_trans; ui8 precinct_size[33]; //num_decomp is in [0,32] - }; - /////////////////////////////////////////////////////////////////////////// - typedef cod_SPcod cod_SPcoc; + size get_log_block_dims() const + { return size(block_width + 2, block_height + 2); } + size get_block_dims() const + { size t = get_log_block_dims(); return size(1 << t.w, 1 << t.h); } + size get_log_precinct_size(ui32 res_num) const + { + assert(res_num <= num_decomp); + size ps(precinct_size[res_num] & 0xF, precinct_size[res_num] >> 4); + return ps; + } + }; /////////////////////////////////////////////////////////////////////////// struct cod_SGcod @@ -324,38 +336,65 @@ namespace ojph { /////////////////////////////////////////////////////////////////////////// struct param_cod { + // serves for both COD and COC markers + friend ::ojph::param_cod; + //////////////////////////////////////// enum BLOCK_CODING_STYLES { VERT_CAUSAL_MODE = 0x8, HT_MODE = 0x40 }; - public: + //////////////////////////////////////// + enum cod_type : ui8 { + UNDEFINED = 0, + COD_MAIN = 1, + COC_MAIN = 2, + COD_TILE = 3, + COC_TILE = 4 + }; + //////////////////////////////////////// + enum dwt_type : ui8 { + DWT_IRV97 = 0, + DWT_REV53 = 1, + }; + + public: // COD_MAIN and COC_MAIN common functions + //////////////////////////////////////// param_cod() { memset(this, 0, sizeof(param_cod)); SPcod.block_style = HT_MODE; - SGCod.prog_order = 2; + SGCod.prog_order = OJPH_PO_RPCL; SGCod.num_layers = 1; SGCod.mc_trans = 0; SPcod.num_decomp = 5; SPcod.block_width = 4; //64 SPcod.block_height = 4; //64 - set_reversible(false); } + //////////////////////////////////////// void set_reversible(bool reversible) { - SPcod.wavelet_trans = reversible ? 1 : 0; + assert(type == UNDEFINED || type == COD_MAIN); + type = COD_MAIN; + SPcod.wavelet_trans = reversible ? DWT_REV53 : DWT_IRV97; } + //////////////////////////////////////// void employ_color_transform(ui8 val) { assert(val == 0 || val == 1); + assert(type == UNDEFINED || type == COD_MAIN); + type = COD_MAIN; SGCod.mc_trans = val; } + //////////////////////////////////////// void check_validity(const param_siz& siz) { + assert(type == UNDEFINED || type == COD_MAIN); + type = COD_MAIN; + //check that colour transform and match number of components and // downsampling int num_comps = siz.get_num_components(); @@ -393,50 +432,97 @@ namespace ojph { } } + //////////////////////////////////////// ui8 get_num_decompositions() const - { return SPcod.num_decomp; } - size get_block_dims() const { - return size(1 << (SPcod.block_width + 2), - 1 << (SPcod.block_height + 2)); + if (type == COD_MAIN) + return SPcod.num_decomp; + else if (type == COC_MAIN) + { + if (is_dfs_defined()) + return parent->get_num_decompositions(); + else + return SPcod.num_decomp; + } + else { + assert(0); + return 0; // just in case + } } - bool is_reversible() const - { return (SPcod.wavelet_trans == 1); } + + //////////////////////////////////////// + size get_block_dims() const + { return SPcod.get_block_dims(); } + + //////////////////////////////////////// + size get_log_block_dims() const + { return SPcod.get_log_block_dims(); } + + //////////////////////////////////////// + ui8 get_wavelet_kern() const + { return SPcod.wavelet_trans; } + + //////////////////////////////////////// bool is_employing_color_transform() const { return (SGCod.mc_trans == 1); } - size get_log_block_dims() const - { return size(SPcod.block_width + 2, SPcod.block_height + 2); } + + //////////////////////////////////////// size get_precinct_size(ui32 res_num) const { size t = get_log_precinct_size(res_num); - t.w = 1 << t.w; - t.h = 1 << t.h; - return t; + return size(1 << t.w, 1 << t.h); } + + //////////////////////////////////////// size get_log_precinct_size(ui32 res_num) const - { - assert(res_num <= SPcod.num_decomp); - size ps(15, 15); + { if (Scod & 1) - { - ps.w = SPcod.precinct_size[res_num] & 0xF; - ps.h = SPcod.precinct_size[res_num] >> 4; - } - return ps; + return SPcod.get_log_precinct_size(res_num); + else + return size(15, 15); } + + //////////////////////////////////////// bool packets_may_use_sop() const { return (Scod & 2) == 2; } + + //////////////////////////////////////// bool packets_use_eph() const { return (Scod & 4) == 4; } + //////////////////////////////////////// bool write(outfile_base *file); - void read(infile_base *file); - private: - ui16 Lcod; - ui8 Scod; - cod_SGcod SGCod; - cod_SPcod SPcod; + //////////////////////////////////////// + void read(infile_base *file, cod_type type); + + //////////////////////////////////////// + void read(infile_base* file, cod_type type, ui32 num_comps, + param_cod* cod); + + //////////////////////////////////////// + void update_atk(const param_atk* atk); + + public: // COC_MAIN only functions + //////////////////////////////////////// + bool is_dfs_defined() const + { return (SPcod.num_decomp & 0x80) != 0; } + + //////////////////////////////////////// + ui16 get_dfs_index() const // cannot be more than 15 + { return SPcod.num_decomp & 0xF; } + + private: // Common variables + cod_type type; // The type of this cod structure + ui16 Lcod; // serves as Lcod and Scod + ui8 Scod; // serves as Scod and Scoc + cod_SGcod SGCod; // Used in COD and copied to COC + cod_SPcod SPcod; // serves as SPcod and SPcoc + + private: // COC only variables + param_cod* parent; // parent COD structure + ui16 comp_idx; // component index of this COC structure + const param_atk* atk; // useful when SPcod.wavelet_trans > 1 }; /////////////////////////////////////////////////////////////////////////// @@ -452,11 +538,7 @@ namespace ojph { public: param_qcd() { - Lqcd = 0; - Sqcd = 0; - for (int i = 0; i < 97; ++i) - u16_SPqcd[i] = 0; - num_subbands = 0; + memset(this, 0, sizeof(param_qcd)); base_delta = -1.0f; } @@ -466,7 +548,7 @@ namespace ojph { { int num_decomps = cod.get_num_decompositions(); num_subbands = 1 + 3 * num_decomps; - if (cod.is_reversible()) + if (cod.get_wavelet_kern() == param_cod::DWT_REV53) { ui32 bit_depth = 0; for (ui32 i = 0; i < siz.get_num_components(); ++i) @@ -474,7 +556,7 @@ namespace ojph { set_rev_quant(num_decomps, bit_depth, cod.is_employing_color_transform()); } - else + else if (cod.get_wavelet_kern() == param_cod::DWT_IRV97) { if (base_delta == -1.0f) { ui32 bit_depth = 0; @@ -485,6 +567,8 @@ namespace ojph { } set_irrev_quant(num_decomps); } + else + assert(0); } ui32 get_num_guard_bits() const; ui32 get_MAGBp() const; @@ -554,7 +638,7 @@ namespace ojph { void check_validity(const param_cod& cod, const param_qcd& qcd) { - if (cod.is_reversible()) + if (cod.get_wavelet_kern() == param_cod::DWT_REV53) Ccap[0] &= 0xFFDF; else Ccap[0] |= 0x0020; @@ -650,18 +734,6 @@ namespace ojph { ui32 next_pair_index; }; - /////////////////////////////////////////////////////////////////////////// - // - // - // - // - // - /////////////////////////////////////////////////////////////////////////// - struct param_coc : public param_cod - { - - }; - /////////////////////////////////////////////////////////////////////////// // // diff --git a/src/core/codestream/ojph_resolution.cpp b/src/core/codestream/ojph_resolution.cpp index 82371bd7..0cc7e3b9 100644 --- a/src/core/codestream/ojph_resolution.cpp +++ b/src/core/codestream/ojph_resolution.cpp @@ -166,7 +166,7 @@ namespace ojph { //allocate lines if (skipped_res_for_recon == false) { - bool reversible = cdp->is_reversible(); + bool reversible = (cdp->get_wavelet_kern() == param_cod::DWT_REV53); ui32 num_lines = reversible ? 4 : 6; allocator->pre_alloc_obj(num_lines); @@ -321,7 +321,7 @@ namespace ojph { //allocate lines if (skipped_res_for_recon == false) { - this->reversible = cdp->is_reversible(); + this->reversible = cdp->get_wavelet_kern() == param_cod::DWT_REV53; this->num_lines = this->reversible ? 4 : 6; lines = allocator->post_alloc_obj(num_lines); diff --git a/src/core/codestream/ojph_subband.cpp b/src/core/codestream/ojph_subband.cpp index eb958bfb..ba6c5b96 100644 --- a/src/core/codestream/ojph_subband.cpp +++ b/src/core/codestream/ojph_subband.cpp @@ -112,7 +112,7 @@ namespace ojph { this->parent = res; const param_cod* cdp = codestream->get_cod(); - this->reversible = cdp->is_reversible(); + this->reversible = cdp->get_wavelet_kern() == param_cod::DWT_REV53; size log_cb = cdp->get_log_block_dims(); log_PP = cdp->get_log_precinct_size(res_num); diff --git a/src/core/codestream/ojph_tile.cpp b/src/core/codestream/ojph_tile.cpp index 0ad4acd3..38bcd686 100644 --- a/src/core/codestream/ojph_tile.cpp +++ b/src/core/codestream/ojph_tile.cpp @@ -214,7 +214,7 @@ namespace ojph { //allocate lines const param_cod* cdp = codestream->get_cod(); - this->reversible = cdp->is_reversible(); + this->reversible = cdp->get_wavelet_kern() == param_cod::DWT_REV53; this->employ_color_transform = cdp->is_employing_color_transform(); if (this->employ_color_transform) { From 0363e40896ed45f8ce826d50053ae2fe17e448d5 Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Sat, 30 Mar 2024 18:24:20 +1100 Subject: [PATCH 039/348] Warning fix. --- src/core/codestream/ojph_params.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/codestream/ojph_params.cpp b/src/core/codestream/ojph_params.cpp index 07446c0f..5a76f24c 100644 --- a/src/core/codestream/ojph_params.cpp +++ b/src/core/codestream/ojph_params.cpp @@ -850,7 +850,7 @@ namespace ojph { for (int i = 0; i <= get_num_decompositions(); ++i) if (file->read(&SPcod.precinct_size[i], 1) != 1) OJPH_ERROR(0x0005012A, "error reading COC segment"); - ui16 t = 9; + ui32 t = 9; t += num_comps < 257 ? 0 : 1; t += (Scod & 1) ? 1 + get_num_decompositions() : 0; if (Lcod != t) From 02f6967ecbb9472227f0154ef93b6ce65e3266a5 Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Sun, 31 Mar 2024 13:38:21 +1100 Subject: [PATCH 040/348] More changes to param_atk. --- src/apps/ojph_compress/ojph_compress.cpp | 98 +++++++++---------- src/core/codestream/ojph_codestream_local.cpp | 15 ++- src/core/codestream/ojph_codestream_local.h | 5 +- src/core/codestream/ojph_params.cpp | 58 ++++++++--- src/core/codestream/ojph_params_local.h | 16 ++- src/core/codestream/ojph_resolution.cpp | 4 +- src/core/codestream/ojph_subband.cpp | 2 +- src/core/codestream/ojph_tile.cpp | 2 +- 8 files changed, 127 insertions(+), 73 deletions(-) diff --git a/src/apps/ojph_compress/ojph_compress.cpp b/src/apps/ojph_compress/ojph_compress.cpp index 42befaff..0ee86f7f 100644 --- a/src/apps/ojph_compress/ojph_compress.cpp +++ b/src/apps/ojph_compress/ojph_compress.cpp @@ -917,55 +917,55 @@ int main(int argc, char * argv[]) { } else if (is_matching(".dpx", v)) { - dpx.open(input_filename); - ojph::param_siz siz = codestream.access_siz(); - siz.set_image_extent(ojph::point(image_offset.x + dpx.get_size().w, - image_offset.y + dpx.get_size().h)); - ojph::ui32 num_comps = dpx.get_num_components(); - siz.set_num_components(num_comps); - //if (num_bit_depths > 0) - // dpx.set_bit_depth(num_bit_depths, bit_depth); - for (ojph::ui32 c = 0; c < num_comps; ++c) - siz.set_component(c, dpx.get_comp_subsampling(c), - dpx.get_bit_depth(c), dpx.get_is_signed(c)); - siz.set_image_offset(image_offset); - siz.set_tile_size(tile_size); - siz.set_tile_offset(tile_offset); - - ojph::param_cod cod = codestream.access_cod(); - cod.set_num_decomposition(num_decompositions); - cod.set_block_dims(block_size.w, block_size.h); - if (num_precincts != -1) - cod.set_precinct_size(num_precincts, precinct_size); - cod.set_progression_order(prog_order); - if (employ_color_transform == -1 && num_comps >= 3) - cod.set_color_transform(true); - else - cod.set_color_transform(employ_color_transform == 1); - cod.set_reversible(reversible); - if (!reversible && quantization_step != -1) - codestream.access_qcd().set_irrev_quant(quantization_step); - codestream.set_planar(false); - if (profile_string[0] != '\0') - codestream.set_profile(profile_string); - codestream.set_tilepart_divisions(tileparts_at_resolutions, - tileparts_at_components); - codestream.request_tlm_marker(tlm_marker); - - if (dims.w != 0 || dims.h != 0) - OJPH_WARN(0x01000071, - "-dims option is not needed and was not used\n"); - if (num_components != 0) - OJPH_WARN(0x01000072, - "-num_comps is not needed and was not used\n"); - if (is_signed[0] != -1) - OJPH_WARN(0x01000073, - "-signed is not needed and was not used\n"); - if (comp_downsampling[0].x != 0 || comp_downsampling[0].y != 0) - OJPH_WARN(0x01000075, - "-downsamp is not needed and was not used\n"); - - base = &dpx; + dpx.open(input_filename); + ojph::param_siz siz = codestream.access_siz(); + siz.set_image_extent(ojph::point(image_offset.x + dpx.get_size().w, + image_offset.y + dpx.get_size().h)); + ojph::ui32 num_comps = dpx.get_num_components(); + siz.set_num_components(num_comps); + //if (num_bit_depths > 0) + // dpx.set_bit_depth(num_bit_depths, bit_depth); + for (ojph::ui32 c = 0; c < num_comps; ++c) + siz.set_component(c, dpx.get_comp_subsampling(c), + dpx.get_bit_depth(c), dpx.get_is_signed(c)); + siz.set_image_offset(image_offset); + siz.set_tile_size(tile_size); + siz.set_tile_offset(tile_offset); + + ojph::param_cod cod = codestream.access_cod(); + cod.set_num_decomposition(num_decompositions); + cod.set_block_dims(block_size.w, block_size.h); + if (num_precincts != -1) + cod.set_precinct_size(num_precincts, precinct_size); + cod.set_progression_order(prog_order); + if (employ_color_transform == -1 && num_comps >= 3) + cod.set_color_transform(true); + else + cod.set_color_transform(employ_color_transform == 1); + cod.set_reversible(reversible); + if (!reversible && quantization_step != -1) + codestream.access_qcd().set_irrev_quant(quantization_step); + codestream.set_planar(false); + if (profile_string[0] != '\0') + codestream.set_profile(profile_string); + codestream.set_tilepart_divisions(tileparts_at_resolutions, + tileparts_at_components); + codestream.request_tlm_marker(tlm_marker); + + if (dims.w != 0 || dims.h != 0) + OJPH_WARN(0x01000071, + "-dims option is not needed and was not used\n"); + if (num_components != 0) + OJPH_WARN(0x01000072, + "-num_comps is not needed and was not used\n"); + if (is_signed[0] != -1) + OJPH_WARN(0x01000073, + "-signed is not needed and was not used\n"); + if (comp_downsampling[0].x != 0 || comp_downsampling[0].y != 0) + OJPH_WARN(0x01000075, + "-downsamp is not needed and was not used\n"); + + base = &dpx; } else #if defined( OJPH_ENABLE_TIFF_SUPPORT) diff --git a/src/core/codestream/ojph_codestream_local.cpp b/src/core/codestream/ojph_codestream_local.cpp index d4d20a38..737daffb 100644 --- a/src/core/codestream/ojph_codestream_local.cpp +++ b/src/core/codestream/ojph_codestream_local.cpp @@ -84,6 +84,12 @@ namespace ojph { used_coc_fields = 0; coc = coc_store; + atk = atk_store; + atk[0].init_irv97(); + atk[0].link(atk_store + 1); + atk[1].init_rev53(); + atk[1].link(atk_store + 2); + allocator = new mem_fixed_allocator; elastic_alloc = new mem_elastic_allocator(1048576); //1 megabyte @@ -557,7 +563,8 @@ namespace ojph { { //finalize siz.check_validity(); - cod.check_validity(siz); + cod.check_validity(siz); + cod.update_atk(atk); qcd.check_validity(siz, cod); cap.check_validity(cod, qcd); if (profile == OJPH_PN_IMF) @@ -821,16 +828,16 @@ namespace ojph { else if (marker_idx == 14) dfs.read(file); else if (marker_idx == 15) - atk.read(file); + atk[2].read(file); else if (marker_idx == 16) break; else OJPH_ERROR(0x00030051, "File ended before finding a tile segment"); } - cod.update_atk(&atk); + cod.update_atk(atk); for (int i = 0; i < used_coc_fields; ++i) - coc[i].update_atk(&atk); + coc[i].update_atk(atk); if (received_markers != 3) OJPH_ERROR(0x00030052, "markers error, COD and QCD are required"); diff --git a/src/core/codestream/ojph_codestream_local.h b/src/core/codestream/ojph_codestream_local.h index 34ffc355..5bfa09d4 100644 --- a/src/core/codestream/ojph_codestream_local.h +++ b/src/core/codestream/ojph_codestream_local.h @@ -168,7 +168,10 @@ namespace ojph { private: // these are from Part 2 of the standard param_dfs dfs; // downsmapling factor styles - param_atk atk; // arbitrary transformation kernels + param_atk* atk; // a pointer to atk + param_atk atk_store[3];// 0 and 1 are for DWT from Part 1, 2 onward are + // for arbitrary transformation kernels + private: mem_fixed_allocator *allocator; diff --git a/src/core/codestream/ojph_params.cpp b/src/core/codestream/ojph_params.cpp index 5a76f24c..1735c819 100644 --- a/src/core/codestream/ojph_params.cpp +++ b/src/core/codestream/ojph_params.cpp @@ -279,7 +279,7 @@ namespace ojph { return state->get_wavelet_kern() == local::param_cod::DWT_REV53; else { assert(state->atk != NULL); - return state->atk->is_reversible(); + return state->access_atk()->is_reversible(); } } @@ -1578,6 +1578,7 @@ namespace ojph { while (p->next != NULL) p = p->next; p->next = new param_atk; + p->alloced_next = true; p = p->next; return p->read(file); } @@ -1598,11 +1599,14 @@ namespace ojph { if (is_reversible() && get_coeff_type() >= 2) // reversible & float OJPH_ERROR(0x000500E5, "ATK-Satk parameter does not make sense. " "It employs floats with reversible filtering."); + if (is_using_ws_extension() == false) // only sym. ext is supported + OJPH_ERROR(0x000500E6, "ATK-Satk parameter requires constant " + "boundary extension, which is not supported yet."); if (is_reversible() == false) if (read_coefficient(file, Katk) == false) - OJPH_ERROR(0x000500E6, "error reading ATK-Katk parameter"); + OJPH_ERROR(0x000500E7, "error reading ATK-Katk parameter"); if (file->read(&Natk, 1) != 1) - OJPH_ERROR(0x000500E7, "error reading ATK-Natk parameter"); + OJPH_ERROR(0x000500E8, "error reading ATK-Natk parameter"); if (Natk > max_steps) { if (d != d_store) // was this allocated -- very unlikely delete[] d; @@ -1615,21 +1619,21 @@ namespace ojph { for (int s = 0; s < Natk; ++s) { if (file->read(&d[s].rev.Eatk, 1) != 1) - OJPH_ERROR(0x000500E8, "error reading ATK-Eatk parameter"); + OJPH_ERROR(0x000500E9, "error reading ATK-Eatk parameter"); if (file->read(&d[s].rev.Batk, 2) != 2) - OJPH_ERROR(0x000500E9, "error reading ATK-Batk parameter"); + OJPH_ERROR(0x000500EA, "error reading ATK-Batk parameter"); d[s].rev.Batk = (si16)swap_byte((ui16)d[s].rev.Batk); ui8 LCatk; if (file->read(&LCatk, 1) != 1) - OJPH_ERROR(0x000500EA, "error reading ATK-LCatk parameter"); + OJPH_ERROR(0x000500EB, "error reading ATK-LCatk parameter"); if (LCatk == 0) - OJPH_ERROR(0x000500EB, "Encountered a ATK-LCatk value of zero; " + OJPH_ERROR(0x000500EC, "Encountered a ATK-LCatk value of zero; " "something is wrong."); if (LCatk > 1) - OJPH_ERROR(0x000500EC, "ATK-LCatk value greater than 1; " + OJPH_ERROR(0x000500ED, "ATK-LCatk value greater than 1; " "that is, a multitap filter is not supported"); if (read_coefficient(file, d[s].rev.Aatk) == false) - OJPH_ERROR(0x000500ED, "Error reding ATK-Aatk parameter"); + OJPH_ERROR(0x000500EE, "Error reding ATK-Aatk parameter"); } } else @@ -1638,19 +1642,47 @@ namespace ojph { { ui8 LCatk; if (file->read(&LCatk, 1) != 1) - OJPH_ERROR(0x000500EE, "error reading ATK-LCatk parameter"); + OJPH_ERROR(0x000500EF, "error reading ATK-LCatk parameter"); if (LCatk == 0) - OJPH_ERROR(0x000500EF, "Encountered a ATK-LCatk value of zero; " + OJPH_ERROR(0x000500F0, "Encountered a ATK-LCatk value of zero; " "something is wrong."); if (LCatk > 1) - OJPH_ERROR(0x000500F0, "ATK-LCatk value greater than 1; " + OJPH_ERROR(0x000500F1, "ATK-LCatk value greater than 1; " "that is, a multitap filter is not supported."); if (read_coefficient(file, d[s].irv.Aatk) == false) - OJPH_ERROR(0x000500F1, "Error reding ATK-Aatk parameter"); + OJPH_ERROR(0x000500F2, "Error reding ATK-Aatk parameter"); } } return true; } + + ////////////////////////////////////////////////////////////////////////// + void param_atk::init_irv97() + { + Satk = 0x4a00; // illegal because ATK = 0 + Katk = (float)1.230174104914001; + Natk = 4; + Latk = 5 + Natk + sizeof(float) * (1 + Natk); // (A-4) in T.801 + d[0].irv.Aatk = (float)-1.586134342059924; + d[1].irv.Aatk = (float)-0.052980118572961; + d[2].irv.Aatk = (float)0.882911075530934; + d[3].irv.Aatk = (float)0.443506852043971; + } + + ////////////////////////////////////////////////////////////////////////// + void param_atk::init_rev53() + { + Satk = 0x5801; // illegal because ATK = 1 + Natk = 2; + Latk = 5 + 2 * Natk + sizeof(ui8) * (Natk + Natk); // (A-4) in T.801 + d[0].rev.Aatk = -1; + d[0].rev.Batk = 0; + d[0].rev.Eatk = 1; + d[1].rev.Aatk = 1; + d[1].rev.Batk = 2; + d[1].rev.Eatk = 2; + } + } // !local namespace } // !ojph namespace diff --git a/src/core/codestream/ojph_params_local.h b/src/core/codestream/ojph_params_local.h index 91447f15..59425da3 100644 --- a/src/core/codestream/ojph_params_local.h +++ b/src/core/codestream/ojph_params_local.h @@ -503,6 +503,9 @@ namespace ojph { //////////////////////////////////////// void update_atk(const param_atk* atk); + //////////////////////////////////////// + const param_atk* access_atk() const { return atk; } + public: // COC_MAIN only functions //////////////////////////////////////// bool is_dfs_defined() const @@ -814,7 +817,10 @@ namespace ojph { public: // member functions param_atk() { init(); } ~param_atk() { - if (next) delete next; + if (next && alloced_next) { + delete next; + next = NULL; + } if (d != NULL && d != d_store) { delete[] d; init(false); @@ -828,13 +834,17 @@ namespace ojph { memset(this, 0, sizeof(param_atk)); d = d_store; max_steps = sizeof(d_store) / sizeof(data); } + void init_irv97(); + void init_rev53(); + void link(param_atk* next) + { assert(this->next == NULL); this->next = next; alloced_next = false; } ui8 get_index() const { return (ui8)(Satk & 0xFF); } int get_coeff_type() const { return (Satk >> 8) & 0x7; } bool is_whole_sample() const { return (Satk & 0x800) != 0; } bool is_reversible() const { return (Satk & 0x1000) != 0; } bool is_m_init0() const { return (Satk & 0x2000) == 0; } - bool is_using_ws_extension() const { return (Satk & 0x4000) != 0x4000; } + bool is_using_ws_extension() const { return (Satk & 0x4000) != 0; } const param_atk* get_atk(int index) const; const data* get_step(ui32 s) const { assert(s < Natk); return d + s; } @@ -848,6 +858,8 @@ namespace ojph { data d_store[6]; // step coefficient param_atk* next; // used for chaining if more than one atk segment // exist in the codestream + bool alloced_next; // true if next was allocated, not just set to an + // existing object }; } // !local namespace } // !ojph namespace diff --git a/src/core/codestream/ojph_resolution.cpp b/src/core/codestream/ojph_resolution.cpp index 0cc7e3b9..03d1278d 100644 --- a/src/core/codestream/ojph_resolution.cpp +++ b/src/core/codestream/ojph_resolution.cpp @@ -166,7 +166,7 @@ namespace ojph { //allocate lines if (skipped_res_for_recon == false) { - bool reversible = (cdp->get_wavelet_kern() == param_cod::DWT_REV53); + bool reversible = cdp->access_atk()->is_reversible(); ui32 num_lines = reversible ? 4 : 6; allocator->pre_alloc_obj(num_lines); @@ -321,7 +321,7 @@ namespace ojph { //allocate lines if (skipped_res_for_recon == false) { - this->reversible = cdp->get_wavelet_kern() == param_cod::DWT_REV53; + this->reversible = cdp->access_atk()->is_reversible(); this->num_lines = this->reversible ? 4 : 6; lines = allocator->post_alloc_obj(num_lines); diff --git a/src/core/codestream/ojph_subband.cpp b/src/core/codestream/ojph_subband.cpp index ba6c5b96..c65a2ebb 100644 --- a/src/core/codestream/ojph_subband.cpp +++ b/src/core/codestream/ojph_subband.cpp @@ -112,7 +112,7 @@ namespace ojph { this->parent = res; const param_cod* cdp = codestream->get_cod(); - this->reversible = cdp->get_wavelet_kern() == param_cod::DWT_REV53; + this->reversible = cdp->access_atk()->is_reversible(); size log_cb = cdp->get_log_block_dims(); log_PP = cdp->get_log_precinct_size(res_num); diff --git a/src/core/codestream/ojph_tile.cpp b/src/core/codestream/ojph_tile.cpp index 38bcd686..b7cb52cd 100644 --- a/src/core/codestream/ojph_tile.cpp +++ b/src/core/codestream/ojph_tile.cpp @@ -214,7 +214,7 @@ namespace ojph { //allocate lines const param_cod* cdp = codestream->get_cod(); - this->reversible = cdp->get_wavelet_kern() == param_cod::DWT_REV53; + this->reversible = cdp->access_atk()->is_reversible(); this->employ_color_transform = cdp->is_employing_color_transform(); if (this->employ_color_transform) { From 32f17b5d4d9a95f202b52061c899d2b25fa926a6 Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Sun, 31 Mar 2024 13:55:11 +1100 Subject: [PATCH 041/348] A bug fix. --- src/core/codestream/ojph_params.cpp | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/src/core/codestream/ojph_params.cpp b/src/core/codestream/ojph_params.cpp index 1735c819..67c8fad3 100644 --- a/src/core/codestream/ojph_params.cpp +++ b/src/core/codestream/ojph_params.cpp @@ -860,15 +860,11 @@ namespace ojph { ////////////////////////////////////////////////////////////////////////// void param_cod::update_atk(const param_atk* atk) { - if (SPcod.wavelet_trans > 1) { - this->atk = atk->get_atk(SPcod.wavelet_trans); - if (this->atk == NULL) - OJPH_ERROR(0x00050131, "A COD/COC segment employs the DWT kernel " - "atk=%d, but a corresponding ATK segment cannot be found", - SPcod.wavelet_trans); - } - else - this->atk = NULL; + this->atk = atk->get_atk(SPcod.wavelet_trans); + if (this->atk == NULL) + OJPH_ERROR(0x00050131, "A COD/COC segment employs the DWT kernel " + "atk=%d, but a corresponding ATK segment cannot be found", + SPcod.wavelet_trans); } ////////////////////////////////////////////////////////////////////////// @@ -1663,7 +1659,8 @@ namespace ojph { Satk = 0x4a00; // illegal because ATK = 0 Katk = (float)1.230174104914001; Natk = 4; - Latk = 5 + Natk + sizeof(float) * (1 + Natk); // (A-4) in T.801 + // next is (A-4) in T.801 second line + Latk = (ui16)(5 + Natk + sizeof(float) * (1 + Natk)); d[0].irv.Aatk = (float)-1.586134342059924; d[1].irv.Aatk = (float)-0.052980118572961; d[2].irv.Aatk = (float)0.882911075530934; @@ -1675,7 +1672,8 @@ namespace ojph { { Satk = 0x5801; // illegal because ATK = 1 Natk = 2; - Latk = 5 + 2 * Natk + sizeof(ui8) * (Natk + Natk); // (A-4) in T.801 + // next is (A-4) in T.801 fourth line + Latk = (ui16)(5 + 2 * Natk + sizeof(ui8) * (Natk + Natk)); d[0].rev.Aatk = -1; d[0].rev.Batk = 0; d[0].rev.Eatk = 1; From abe4ccf67b05eb1356261fed35965300cde13c47 Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Sun, 31 Mar 2024 14:06:58 +1100 Subject: [PATCH 042/348] Remove warnings. --- src/core/codestream/ojph_params.cpp | 12 ++++++------ src/core/codestream/ojph_params_local.h | 15 +++++++++------ src/core/codestream/ojph_subband.cpp | 2 +- 3 files changed, 16 insertions(+), 13 deletions(-) diff --git a/src/core/codestream/ojph_params.cpp b/src/core/codestream/ojph_params.cpp index 67c8fad3..affa222a 100644 --- a/src/core/codestream/ojph_params.cpp +++ b/src/core/codestream/ojph_params.cpp @@ -876,7 +876,7 @@ namespace ojph { ////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////// - void param_qcd::set_rev_quant(int num_decomps, ui32 bit_depth, + void param_qcd::set_rev_quant(ui32 num_decomps, ui32 bit_depth, bool is_employing_color_transform) { int guard_bits = 1; @@ -901,7 +901,7 @@ namespace ojph { } ////////////////////////////////////////////////////////////////////////// - void param_qcd::set_irrev_quant(int num_decomps) + void param_qcd::set_irrev_quant(ui32 num_decomps) { int guard_bits = 1; Sqcd = (ui8)((guard_bits<<5)|0x2);//one guard bit, scalar quantization @@ -1404,8 +1404,8 @@ namespace ojph { } ////////////////////////////////////////////////////////////////////////// - int param_dfs::get_subband_idx(ui32 num_decompositions, ui32 resolution, - ui32 subband) const + ui32 param_dfs::get_subband_idx(ui32 num_decompositions, ui32 resolution, + ui32 subband) const { assert((resolution == 0 && subband == 0) || (resolution > 0 && resolution <= Ids && @@ -1413,7 +1413,7 @@ namespace ojph { ui32 ns[4] = { 0, 3, 2, 2 }; - int idx = 0; + ui32 idx = 0; if (resolution > 0) { idx = 0; @@ -1538,7 +1538,7 @@ namespace ojph { e <<= 23; // move bits to their location s.i = 0; s.i |= ((ui32)(v >> 32) & 0x80000000); // copy sign bit - s.i |= e; // copy exponent + s.i |= (ui32)e; // copy exponent s.i |= (ui32)((v >> 25) & 0x007FFFFF); // copy 23 mantissa K = s.f; } diff --git a/src/core/codestream/ojph_params_local.h b/src/core/codestream/ojph_params_local.h index 59425da3..e8e43f8f 100644 --- a/src/core/codestream/ojph_params_local.h +++ b/src/core/codestream/ojph_params_local.h @@ -541,7 +541,10 @@ namespace ojph { public: param_qcd() { - memset(this, 0, sizeof(param_qcd)); + Lqcd = 0; + Sqcd = 0; + memset(u16_SPqcd, 0, sizeof(u16_SPqcd)); + num_subbands = 0; base_delta = -1.0f; } @@ -549,7 +552,7 @@ namespace ojph { void check_validity(const param_siz& siz, const param_cod& cod) { - int num_decomps = cod.get_num_decompositions(); + ui32 num_decomps = cod.get_num_decompositions(); num_subbands = 1 + 3 * num_decomps; if (cod.get_wavelet_kern() == param_cod::DWT_REV53) { @@ -585,9 +588,9 @@ namespace ojph { void read(infile_base *file); protected: - void set_rev_quant(int num_decomps, ui32 bit_depth, + void set_rev_quant(ui32 num_decomps, ui32 bit_depth, bool is_employing_color_transform); - void set_irrev_quant(int num_decomps); + void set_irrev_quant(ui32 num_decomps); protected: ui16 Lqcd; @@ -766,8 +769,8 @@ namespace ojph { // decomp_level is the decomposition level, starting from 1 for highest // resolution to num_decomps for the coarsest resolution dfs_dwt_type get_dwt_type(ui32 decomp_level) const; - int get_subband_idx(ui32 num_decompositions, ui32 resolution, - ui32 subband) const; + ui32 get_subband_idx(ui32 num_decompositions, ui32 resolution, + ui32 subband) const; private: // member variables ui16 Ldfs; // length of the segment marker diff --git a/src/core/codestream/ojph_subband.cpp b/src/core/codestream/ojph_subband.cpp index c65a2ebb..6348e98b 100644 --- a/src/core/codestream/ojph_subband.cpp +++ b/src/core/codestream/ojph_subband.cpp @@ -126,7 +126,7 @@ namespace ojph { cur_cb_height = 0; param_qcd* qcd = codestream->access_qcd(parent->get_comp_num()); const param_cod* cod = codestream->get_cod(); - int num_decomps = cod->get_num_decompositions(); + ui32 num_decomps = cod->get_num_decompositions(); this->K_max = qcd->get_Kmax(NULL, num_decomps, this->res_num, band_num); if (!reversible) { From fc377de1de6eabca5b193b6da8b36c5f189a2800 Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Sun, 31 Mar 2024 14:19:09 +1100 Subject: [PATCH 043/348] Warning/bug fix. --- src/core/codestream/ojph_params.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/core/codestream/ojph_params.cpp b/src/core/codestream/ojph_params.cpp index affa222a..3795d4b8 100644 --- a/src/core/codestream/ojph_params.cpp +++ b/src/core/codestream/ojph_params.cpp @@ -1551,15 +1551,14 @@ namespace ojph { { int coeff_type = get_coeff_type(); if (coeff_type == 0) { - ui8 v; + si8 v; if (file->read(&v, 1) != 1) return false; K = v; } else if (coeff_type == 1) { - ui16 v; + si16 v; if (file->read(&v, 2) != 2) return false; - v = swap_byte(v); - K = v; + K = (si16)swap_byte((ui16)v); } else return false; From 1c08cf3b7a3842611a18d633c76b6c5932f1b600 Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Tue, 2 Apr 2024 21:07:54 +1100 Subject: [PATCH 044/348] Added COC. ATK is used for wavelet transform. Modified wavelet synthesis (no acceleration yet). Wavelet analysis is broken. Close to enabling DFS in synthesis. --- src/core/codestream/ojph_codestream_local.h | 15 +- src/core/codestream/ojph_params.cpp | 10 +- src/core/codestream/ojph_params_local.h | 44 +- src/core/codestream/ojph_resolution.cpp | 811 +++++++++++--------- src/core/codestream/ojph_resolution.h | 15 +- src/core/codestream/ojph_subband.cpp | 9 +- src/core/codestream/ojph_subband.h | 2 +- src/core/codestream/ojph_tile.cpp | 2 +- src/core/codestream/ojph_tile_comp.cpp | 7 +- src/core/codestream/ojph_tile_comp.h | 3 +- src/core/common/ojph_mem.h | 10 + src/core/transform/ojph_transform.cpp | 235 ++++++ src/core/transform/ojph_transform.h | 42 + src/core/transform/ojph_transform_local.h | 38 + 14 files changed, 839 insertions(+), 404 deletions(-) diff --git a/src/core/codestream/ojph_codestream_local.h b/src/core/codestream/ojph_codestream_local.h index 5bfa09d4..8e77eb17 100644 --- a/src/core/codestream/ojph_codestream_local.h +++ b/src/core/codestream/ojph_codestream_local.h @@ -82,8 +82,19 @@ namespace ojph { { return &siz; } ojph::param_cod access_cod() //return externally wrapped cod { return ojph::param_cod(&cod); } - const param_cod* get_cod() //return internal code + const param_cod* get_cod() //return internal code { return &cod; } + const param_cod* get_cod(ui32 comp_num) //return internal code + { + if (used_coc_fields == 0) + return &cod; + else { + for (int i = 0; i < used_coc_fields; ++i) + if (coc[i].get_comp_num() == comp_num) + return coc + i; + return &cod; + } + } param_qcd* access_qcd(ui32 comp_num) { if (used_qcc_fields > 0) @@ -92,6 +103,8 @@ namespace ojph { return qcc + v; return &qcd; } + const param_dfs* access_dfs() + { if (dfs.exists()) return &dfs; else return NULL; } mem_fixed_allocator* get_allocator() { return allocator; } mem_elastic_allocator* get_elastic_alloc() { return elastic_alloc; } outfile_base* get_file() { return outfile; } diff --git a/src/core/codestream/ojph_params.cpp b/src/core/codestream/ojph_params.cpp index 3795d4b8..ef652651 100644 --- a/src/core/codestream/ojph_params.cpp +++ b/src/core/codestream/ojph_params.cpp @@ -824,12 +824,12 @@ namespace ojph { ui8 t; if (file->read(&t, 1) != 1) OJPH_ERROR(0x00050122, "error reading COC segment"); - comp_idx = t; + comp_num = t; } else { - if (file->read(&comp_idx, 2) != 2) + if (file->read(&comp_num, 2) != 2) OJPH_ERROR(0x00050123, "error reading COC segment"); - comp_idx = swap_byte(comp_idx); + comp_num = swap_byte(comp_num); } if (file->read(&Scod, 1) != 1) OJPH_ERROR(0x00050124, "error reading COC segment"); @@ -1393,8 +1393,6 @@ namespace ojph { ////////////////////////////////////////////////////////////////////////// param_dfs::dfs_dwt_type param_dfs::get_dwt_type(ui32 decomp_level) const { - assert(decomp_level > 0 && decomp_level <= Ids); - decomp_level = ojph_min(decomp_level, Ids); ui32 d = decomp_level - 1; // decomp_level starts from 1 ui32 idx = d >> 2; // complete bytes @@ -1605,7 +1603,7 @@ namespace ojph { if (Natk > max_steps) { if (d != d_store) // was this allocated -- very unlikely delete[] d; - d = new data[Natk]; + d = new lifting_step[Natk]; max_steps = Natk; } diff --git a/src/core/codestream/ojph_params_local.h b/src/core/codestream/ojph_params_local.h index e8e43f8f..c08d750e 100644 --- a/src/core/codestream/ojph_params_local.h +++ b/src/core/codestream/ojph_params_local.h @@ -515,6 +515,10 @@ namespace ojph { ui16 get_dfs_index() const // cannot be more than 15 { return SPcod.num_decomp & 0xF; } + //////////////////////////////////////// + ui32 get_comp_num() + { assert(type == COC_MAIN); return comp_num; } + private: // Common variables cod_type type; // The type of this cod structure ui16 Lcod; // serves as Lcod and Scod @@ -524,7 +528,7 @@ namespace ojph { private: // COC only variables param_cod* parent; // parent COD structure - ui16 comp_idx; // component index of this COC structure + ui16 comp_num; // component index of this COC structure const param_atk* atk; // useful when SPcod.wavelet_trans > 1 }; @@ -788,16 +792,9 @@ namespace ojph { // // /////////////////////////////////////////////////////////////////////////// - struct param_atk - { - // Limitations: - // Arbitrary filters (ARB) are not supported - // Up to 6 steps are supported -- more than 6 are not supported - // Only one coefficient per step -- first order filter - // Only even-indexed subsequence in first reconstruction step, - // m_init = 0 is supported + // data structures used by param_atk - public: // data structures used by this object + union lifting_step { struct irv_data { // si8 Oatk; // only for arbitrary filter // ui8 LCatk; // number of lifting coefficients in a step @@ -812,10 +809,18 @@ namespace ojph { si16 Aatk; // lifting coefficient }; - union data { - irv_data irv; - rev_data rev; - }; + irv_data irv; + rev_data rev; + }; + + struct param_atk + { + // Limitations: + // Arbitrary filters (ARB) are not supported + // Up to 6 steps are supported -- more than 6 are not supported + // Only one coefficient per step -- first order filter + // Only even-indexed subsequence in first reconstruction step, + // m_init = 0 is supported public: // member functions param_atk() { init(); } @@ -835,7 +840,7 @@ namespace ojph { void init(bool clear_all = true) { if (clear_all) memset(this, 0, sizeof(param_atk)); - d = d_store; max_steps = sizeof(d_store) / sizeof(data); + d = d_store; max_steps = sizeof(d_store) / sizeof(lifting_step); } void init_irv97(); void init_rev53(); @@ -849,16 +854,19 @@ namespace ojph { bool is_m_init0() const { return (Satk & 0x2000) == 0; } bool is_using_ws_extension() const { return (Satk & 0x4000) != 0; } const param_atk* get_atk(int index) const; - const data* get_step(ui32 s) const { assert(s < Natk); return d + s; } + const lifting_step* get_step(ui32 s) const + { assert(s < Natk); return d + s; } + const ui32 get_num_steps() const { return Natk; } + const float get_K() const { return Katk; } private: // member variables ui16 Latk; // structure length ui16 Satk; // carries a variety of information float Katk; // only for irreversible scaling factor K ui8 Natk; // number of lifting steps - data* d; // pointer to data, initialized to d_store + lifting_step* d; // pointer to data, initialized to d_store int max_steps; // maximum number of steps without memory allocation - data d_store[6]; // step coefficient + lifting_step d_store[6]; // lifting step coefficient param_atk* next; // used for chaining if more than one atk segment // exist in the codestream bool alloced_next; // true if next was allocated, not just set to an diff --git a/src/core/codestream/ojph_resolution.cpp b/src/core/codestream/ojph_resolution.cpp index 03d1278d..105c57de 100644 --- a/src/core/codestream/ojph_resolution.cpp +++ b/src/core/codestream/ojph_resolution.cpp @@ -98,14 +98,39 @@ namespace ojph { ////////////////////////////////////////////////////////////////////////// void resolution::pre_alloc(codestream* codestream, const rect& res_rect, - const rect& recon_res_rect, ui32 res_num) + const rect& recon_res_rect, + ui32 comp_num, ui32 res_num) { mem_fixed_allocator* allocator = codestream->get_allocator(); - const param_cod* cdp = codestream->get_cod(); - ui32 t = codestream->get_cod()->get_num_decompositions() + const param_cod* cdp = codestream->get_cod(comp_num); + ui32 t = cdp->get_num_decompositions() - codestream->get_skipped_res_for_recon(); bool skipped_res_for_recon = res_num > t; + const param_atk* atk = cdp->access_atk(); + param_dfs::dfs_dwt_type downsampling_style = param_dfs::BIDIR_DWT; + if (cdp->is_dfs_defined()) { + const param_dfs* dfs = codestream->access_dfs(); + if (dfs == NULL) { + OJPH_ERROR(0x00070001, "There is a problem with codestream " + "marker segments. COD/COC specifies the use of a DFS marker " + "but there are no DFS markers within the main codestream " + "headers"); + } + else { + ui16 dfs_idx = cdp->get_dfs_index(); + dfs = dfs->get_dfs(dfs_idx); + if (dfs == NULL) { + OJPH_ERROR(0x00070002, "There is a problem with codestream " + "marker segments. COD/COC specifies the use of a DFS marker " + "with index %d, but there are no such marker within the " + "main codestream headers", dfs_idx); + } + ui32 num_decomps = cdp->get_num_decompositions(); + downsampling_style = dfs->get_dwt_type(num_decomps - res_num + 1); + } + } + //create next resolution if (res_num > 0) { @@ -122,7 +147,8 @@ namespace ojph { next_res_rect.siz.h = try1 - try0; resolution::pre_alloc(codestream, next_res_rect, - skipped_res_for_recon ? recon_res_rect : next_res_rect, res_num - 1); + skipped_res_for_recon ? recon_res_rect : next_res_rect, + comp_num, res_num - 1); } //allocate subbands @@ -145,11 +171,11 @@ namespace ojph { band_rect.org.y = tby0; band_rect.siz.w = tbx1 - tbx0; band_rect.siz.h = tby1 - tby0; - subband::pre_alloc(codestream, band_rect, res_num); + subband::pre_alloc(codestream, band_rect, comp_num, res_num); } } else - subband::pre_alloc(codestream, res_rect, res_num); + subband::pre_alloc(codestream, res_rect, comp_num, res_num); //prealloc precincts size log_PP = cdp->get_log_precinct_size(res_num); @@ -166,13 +192,15 @@ namespace ojph { //allocate lines if (skipped_res_for_recon == false) { - bool reversible = cdp->access_atk()->is_reversible(); - ui32 num_lines = reversible ? 4 : 6; - allocator->pre_alloc_obj(num_lines); + ui32 num_steps = atk->get_num_steps(); + allocator->pre_alloc_obj(num_steps + 2); + allocator->pre_alloc_obj(num_steps + 2); ui32 width = res_rect.siz.w + 1; - for (ui32 i = 0; i < num_lines; ++i) + for (ui32 i = 0; i < num_steps; ++i) allocator->pre_alloc_data(width, 1); + allocator->pre_alloc_data(width, 1); + allocator->pre_alloc_data(width, 1); } } @@ -187,12 +215,12 @@ namespace ojph { { mem_fixed_allocator* allocator = codestream->get_allocator(); elastic = codestream->get_elastic_alloc(); - ui32 t, num_decomps = codestream->get_cod()->get_num_decompositions(); + const param_cod* cdp = codestream->get_cod(comp_num); + ui32 t, num_decomps = cdp->get_num_decompositions(); t = num_decomps - codestream->get_skipped_res_for_recon(); skipped_res_for_recon = res_num > t; t = num_decomps - codestream->get_skipped_res_for_read(); skipped_res_for_read = res_num > t; - const param_cod* cdp = codestream->get_cod(); this->comp_downsamp = comp_downsamp; this->parent_comp = parent_tile_comp; @@ -201,6 +229,31 @@ namespace ojph { this->comp_num = comp_num; this->res_num = res_num; this->num_bytes = 0; + this->atk = cdp->access_atk(); + this->downsampling_style = param_dfs::BIDIR_DWT; + if (cdp->is_dfs_defined()) { + const param_dfs* dfs = codestream->access_dfs(); + if (dfs == NULL) { + OJPH_ERROR(0x00070011, "There is a problem with codestream " + "marker segments. COD/COC specifies the use of a DFS marker " + "but there are no DFS markers within the main codestream " + "headers"); + } + else { + ui16 dfs_idx = cdp->get_dfs_index(); + dfs = dfs->get_dfs(dfs_idx); + if (dfs == NULL) { + OJPH_ERROR(0x00070012, "There is a problem with codestream " + "marker segments. COD/COC specifies the use of a DFS marker " + "with index %d, but there are no such marker within the " + "main codestream headers", dfs_idx); + } + ui32 num_decomps = cdp->get_num_decompositions(); + this->downsampling_style = + dfs->get_dwt_type(num_decomps - res_num + 1); + } + } + //finalize next resolution if (res_num > 0) { @@ -321,13 +374,33 @@ namespace ojph { //allocate lines if (skipped_res_for_recon == false) { - this->reversible = cdp->access_atk()->is_reversible(); - this->num_lines = this->reversible ? 4 : 6; - lines = allocator->post_alloc_obj(num_lines); - + this->atk = cdp->access_atk(); + this->reversible = atk->is_reversible(); + this->num_steps = atk->get_num_steps(); + // create line buffers and lifting_bufs + lines = allocator->post_alloc_obj(num_steps + 2); + ssp = allocator->post_alloc_obj(num_steps + 2); + sig = ssp + num_steps; + aug = ssp + num_steps + 1; + + // initiate lifting_bufs + for (ui32 i = 0; i < num_steps; ++i) { + new (ssp + i) lifting_buf; + ssp[i].line = lines + i; + }; + new (sig) lifting_buf; + sig->line = lines + num_steps; + new (aug) lifting_buf; + aug->line = lines + num_steps + 1; + + // initiate storage of line_buf ui32 width = res_rect.siz.w + 1; - for (ui32 i = 0; i < num_lines; ++i) - lines[i].wrap(allocator->post_alloc_data(width, 1), width, 1); + for (ui32 i = 0; i < num_steps; ++i) + ssp[i].line->wrap( + allocator->post_alloc_data(width, 1), width, 1); + sig->line->wrap(allocator->post_alloc_data(width, 1), width, 1); + aug->line->wrap(allocator->post_alloc_data(width, 1), width, 1); + cur_line = 0; vert_even = (res_rect.org.y & 1) == 0; horz_even = (res_rect.org.x & 1) == 0; @@ -340,271 +413,271 @@ namespace ojph { if (res_num == 0) { assert(num_bands == 1 && child_res == NULL); - bands[0].exchange_buf(lines + 0);//line at location 0 + bands[0].exchange_buf(ssp[0].line);//line at location 0 bands[0].push_line(); return; } - ui32 width = res_rect.siz.w; - if (width == 0) - return; - if (reversible) - { - //vertical transform - assert(num_lines >= 4); - if (vert_even) - { - rev_vert_wvlt_fwd_predict(lines, - cur_line > 1 ? lines + 2 : lines, - lines + 1, width); - rev_vert_wvlt_fwd_update(lines + 1, - cur_line > 2 ? lines + 3 : lines + 1, - lines + 2, width); - - // push to horizontal transform lines[2](L) and lines[1] (H) - if (cur_line >= 1) - { - rev_horz_wvlt_fwd_tx(lines + 1, bands[2].get_line(), - bands[3].get_line(), width, horz_even); - bands[2].push_line(); - bands[3].push_line(); - } - if (cur_line >= 2) - { - rev_horz_wvlt_fwd_tx(lines + 2, child_res->get_line(), - bands[1].get_line(), width, horz_even); - bands[1].push_line(); - child_res->push_line(); - } - } - - if (cur_line >= res_rect.siz.h - 1) - { //finished, so we need to process any lines left - if (cur_line) - { - if (vert_even) - { - rev_vert_wvlt_fwd_update(lines + 1, lines + 1, - lines, width); - //push lines[0] to L - rev_horz_wvlt_fwd_tx(lines, child_res->get_line(), - bands[1].get_line(), width, horz_even); - bands[1].push_line(); - child_res->push_line(); - } - else - { - rev_vert_wvlt_fwd_predict(lines + 1, lines + 1, - lines, width); - rev_vert_wvlt_fwd_update(lines, - cur_line > 1 ? lines + 2 : lines, - lines + 1, width); - - // push to horizontal transform lines[1](L) and line[0] (H) - //line[0] to H - rev_horz_wvlt_fwd_tx(lines, bands[2].get_line(), - bands[3].get_line(), width, horz_even); - bands[2].push_line(); - bands[3].push_line(); - //line[1] to L - rev_horz_wvlt_fwd_tx(lines + 1, child_res->get_line(), - bands[1].get_line(), width, horz_even); - bands[1].push_line(); - child_res->push_line(); - } - } - else - { //only one line - if (vert_even) - { - //push to L - rev_horz_wvlt_fwd_tx(lines, child_res->get_line(), - bands[1].get_line(), width, horz_even); - bands[1].push_line(); - child_res->push_line(); - } - else - { - si32* sp = lines[0].i32; - for (ui32 i = width; i > 0; --i) - *sp++ <<= 1; - //push to H - rev_horz_wvlt_fwd_tx(lines, bands[2].get_line(), - bands[3].get_line(), width, horz_even); - bands[2].push_line(); - bands[3].push_line(); - } - } - } - - rotate_buffers(lines, lines + 1, lines + 2, lines + 3); - - ++cur_line; - vert_even = !vert_even; - } - else - { - //vertical transform - assert(num_lines >= 6); - if (vert_even) - { - irrev_vert_wvlt_step(lines + 0, - cur_line > 1 ? lines + 2 : lines, - lines + 1, 0, width); - irrev_vert_wvlt_step(lines + 1, - cur_line > 2 ? lines + 3 : lines + 1, - lines + 2, 1, width); - irrev_vert_wvlt_step(lines + 2, - cur_line > 3 ? lines + 4 : lines + 2, - lines + 3, 2, width); - irrev_vert_wvlt_step(lines + 3, - cur_line > 4 ? lines + 5 : lines + 3, - lines + 4, 3, width); - - // push to horizontal transform lines[4](L) and lines[3] (H) - if (cur_line >= 3) - { - irrev_vert_wvlt_K(lines + 3, lines + 5, - false, width); - irrev_horz_wvlt_fwd_tx(lines + 5, bands[2].get_line(), - bands[3].get_line(), width, horz_even); - bands[2].push_line(); - bands[3].push_line(); - } - if (cur_line >= 4) - { - irrev_vert_wvlt_K(lines + 4, lines + 5, - true, width); - irrev_horz_wvlt_fwd_tx(lines + 5, child_res->get_line(), - bands[1].get_line(), width, horz_even); - bands[1].push_line(); - child_res->push_line(); - } - } - - if (cur_line >= res_rect.siz.h - 1) - { //finished, so we need to process any left line - if (cur_line) - { - if (vert_even) - { - irrev_vert_wvlt_step(lines + 1, lines + 1, - lines, 1, width); - irrev_vert_wvlt_step(lines, - cur_line > 1 ? lines + 2 : lines, - lines + 1, 2, width); - irrev_vert_wvlt_step(lines + 1, - cur_line > 2 ? lines + 3 : lines + 1, - lines + 2, 3, width); - irrev_vert_wvlt_step(lines + 1, lines + 1, - lines, 3, width); - //push lines[2] to L, lines[1] to H, and lines[0] to L - if (cur_line >= 2) - { - irrev_vert_wvlt_K(lines + 2, lines + 5, - true, width); - irrev_horz_wvlt_fwd_tx(lines + 5, - child_res->get_line(), bands[1].get_line(), - width, horz_even); - bands[1].push_line(); - child_res->push_line(); - } - irrev_vert_wvlt_K(lines + 1, lines + 5, - false, width); - irrev_horz_wvlt_fwd_tx(lines + 5, bands[2].get_line(), - bands[3].get_line(), width, horz_even); - bands[2].push_line(); - bands[3].push_line(); - irrev_vert_wvlt_K(lines, lines + 5, - true, width); - irrev_horz_wvlt_fwd_tx(lines + 5, child_res->get_line(), - bands[1].get_line(), width, horz_even); - bands[1].push_line(); - child_res->push_line(); - } - else - { - irrev_vert_wvlt_step(lines + 1, lines + 1, - lines, 0, width); - irrev_vert_wvlt_step(lines, - cur_line > 1 ? lines + 2 : lines, - lines + 1, 1, width); - irrev_vert_wvlt_step(lines + 1, - cur_line > 2 ? lines + 3 : lines + 1, - lines + 2, 2, width); - irrev_vert_wvlt_step(lines + 2, - cur_line > 3 ? lines + 4 : lines + 2, - lines + 3, 3, width); - - irrev_vert_wvlt_step(lines + 1, lines + 1, - lines, 2, width); - irrev_vert_wvlt_step(lines, - cur_line > 1 ? lines + 2 : lines, - lines + 1, 3, width); - - //push lines[3] L, lines[2] H, lines[1] L, and lines[0] H - if (cur_line >= 3) - { - irrev_vert_wvlt_K(lines + 3, lines + 5, - true, width); - irrev_horz_wvlt_fwd_tx(lines + 5, - child_res->get_line(), bands[1].get_line(), - width, horz_even); - bands[1].push_line(); - child_res->push_line(); - } - if (cur_line >= 2) - irrev_vert_wvlt_K(lines + 2, lines + 5, false, width); - else - irrev_vert_wvlt_K(lines, lines + 5, false, width); - irrev_horz_wvlt_fwd_tx(lines + 5, bands[2].get_line(), - bands[3].get_line(), width, horz_even); - bands[2].push_line(); - bands[3].push_line(); - irrev_vert_wvlt_K(lines + 1, lines + 5, - true, width); - irrev_horz_wvlt_fwd_tx(lines + 5, child_res->get_line(), - bands[1].get_line(), width, horz_even); - bands[1].push_line(); - child_res->push_line(); - irrev_vert_wvlt_K(lines, lines + 5, - false, width); - irrev_horz_wvlt_fwd_tx(lines + 5, bands[2].get_line(), - bands[3].get_line(), width, horz_even); - bands[2].push_line(); - bands[3].push_line(); - } - } - else - { //only one line - if (vert_even) - { - //push to L - irrev_horz_wvlt_fwd_tx(lines, child_res->get_line(), - bands[1].get_line(), width, horz_even); - bands[1].push_line(); - child_res->push_line(); - } - else - { - float* sp = lines[0].f32; - for (ui32 i = width; i > 0; --i) - *sp++ *= 2.0f; - //push to H - irrev_horz_wvlt_fwd_tx(lines, bands[2].get_line(), - bands[3].get_line(), width, horz_even); - bands[2].push_line(); - bands[3].push_line(); - } - } - } - - rotate_buffers(lines, lines + 1, lines + 2, lines + 3, lines + 4, - lines + 5); - - ++cur_line; - vert_even = !vert_even; - } + //ui32 width = res_rect.siz.w; + //if (width == 0) + // return; + //if (reversible) + //{ + // //vertical transform + // assert(num_lines >= 4); + // if (vert_even) + // { + // rev_vert_wvlt_fwd_predict(lines, + // cur_line > 1 ? lines + 2 : lines, + // lines + 1, width); + // rev_vert_wvlt_fwd_update(lines + 1, + // cur_line > 2 ? lines + 3 : lines + 1, + // lines + 2, width); + + // // push to horizontal transform lines[2](L) and lines[1] (H) + // if (cur_line >= 1) + // { + // rev_horz_wvlt_fwd_tx(lines + 1, bands[2].get_line(), + // bands[3].get_line(), width, horz_even); + // bands[2].push_line(); + // bands[3].push_line(); + // } + // if (cur_line >= 2) + // { + // rev_horz_wvlt_fwd_tx(lines + 2, child_res->get_line(), + // bands[1].get_line(), width, horz_even); + // bands[1].push_line(); + // child_res->push_line(); + // } + // } + + // if (cur_line >= res_rect.siz.h - 1) + // { //finished, so we need to process any lines left + // if (cur_line) + // { + // if (vert_even) + // { + // rev_vert_wvlt_fwd_update(lines + 1, lines + 1, + // lines, width); + // //push lines[0] to L + // rev_horz_wvlt_fwd_tx(lines, child_res->get_line(), + // bands[1].get_line(), width, horz_even); + // bands[1].push_line(); + // child_res->push_line(); + // } + // else + // { + // rev_vert_wvlt_fwd_predict(lines + 1, lines + 1, + // lines, width); + // rev_vert_wvlt_fwd_update(lines, + // cur_line > 1 ? lines + 2 : lines, + // lines + 1, width); + + // // push to horizontal transform lines[1](L) and line[0] (H) + // //line[0] to H + // rev_horz_wvlt_fwd_tx(lines, bands[2].get_line(), + // bands[3].get_line(), width, horz_even); + // bands[2].push_line(); + // bands[3].push_line(); + // //line[1] to L + // rev_horz_wvlt_fwd_tx(lines + 1, child_res->get_line(), + // bands[1].get_line(), width, horz_even); + // bands[1].push_line(); + // child_res->push_line(); + // } + // } + // else + // { //only one line + // if (vert_even) + // { + // //push to L + // rev_horz_wvlt_fwd_tx(lines, child_res->get_line(), + // bands[1].get_line(), width, horz_even); + // bands[1].push_line(); + // child_res->push_line(); + // } + // else + // { + // si32* sp = lines[0].i32; + // for (ui32 i = width; i > 0; --i) + // *sp++ <<= 1; + // //push to H + // rev_horz_wvlt_fwd_tx(lines, bands[2].get_line(), + // bands[3].get_line(), width, horz_even); + // bands[2].push_line(); + // bands[3].push_line(); + // } + // } + // } + + // rotate_buffers(lines, lines + 1, lines + 2, lines + 3); + + // ++cur_line; + // vert_even = !vert_even; + //} + //else + //{ + // //vertical transform + // assert(num_lines >= 6); + // if (vert_even) + // { + // irrev_vert_wvlt_step(lines + 0, + // cur_line > 1 ? lines + 2 : lines, + // lines + 1, 0, width); + // irrev_vert_wvlt_step(lines + 1, + // cur_line > 2 ? lines + 3 : lines + 1, + // lines + 2, 1, width); + // irrev_vert_wvlt_step(lines + 2, + // cur_line > 3 ? lines + 4 : lines + 2, + // lines + 3, 2, width); + // irrev_vert_wvlt_step(lines + 3, + // cur_line > 4 ? lines + 5 : lines + 3, + // lines + 4, 3, width); + + // // push to horizontal transform lines[4](L) and lines[3] (H) + // if (cur_line >= 3) + // { + // irrev_vert_wvlt_K(lines + 3, lines + 5, + // false, width); + // irrev_horz_wvlt_fwd_tx(lines + 5, bands[2].get_line(), + // bands[3].get_line(), width, horz_even); + // bands[2].push_line(); + // bands[3].push_line(); + // } + // if (cur_line >= 4) + // { + // irrev_vert_wvlt_K(lines + 4, lines + 5, + // true, width); + // irrev_horz_wvlt_fwd_tx(lines + 5, child_res->get_line(), + // bands[1].get_line(), width, horz_even); + // bands[1].push_line(); + // child_res->push_line(); + // } + // } + + // if (cur_line >= res_rect.siz.h - 1) + // { //finished, so we need to process any left line + // if (cur_line) + // { + // if (vert_even) + // { + // irrev_vert_wvlt_step(lines + 1, lines + 1, + // lines, 1, width); + // irrev_vert_wvlt_step(lines, + // cur_line > 1 ? lines + 2 : lines, + // lines + 1, 2, width); + // irrev_vert_wvlt_step(lines + 1, + // cur_line > 2 ? lines + 3 : lines + 1, + // lines + 2, 3, width); + // irrev_vert_wvlt_step(lines + 1, lines + 1, + // lines, 3, width); + // //push lines[2] to L, lines[1] to H, and lines[0] to L + // if (cur_line >= 2) + // { + // irrev_vert_wvlt_K(lines + 2, lines + 5, + // true, width); + // irrev_horz_wvlt_fwd_tx(lines + 5, + // child_res->get_line(), bands[1].get_line(), + // width, horz_even); + // bands[1].push_line(); + // child_res->push_line(); + // } + // irrev_vert_wvlt_K(lines + 1, lines + 5, + // false, width); + // irrev_horz_wvlt_fwd_tx(lines + 5, bands[2].get_line(), + // bands[3].get_line(), width, horz_even); + // bands[2].push_line(); + // bands[3].push_line(); + // irrev_vert_wvlt_K(lines, lines + 5, + // true, width); + // irrev_horz_wvlt_fwd_tx(lines + 5, child_res->get_line(), + // bands[1].get_line(), width, horz_even); + // bands[1].push_line(); + // child_res->push_line(); + // } + // else + // { + // irrev_vert_wvlt_step(lines + 1, lines + 1, + // lines, 0, width); + // irrev_vert_wvlt_step(lines, + // cur_line > 1 ? lines + 2 : lines, + // lines + 1, 1, width); + // irrev_vert_wvlt_step(lines + 1, + // cur_line > 2 ? lines + 3 : lines + 1, + // lines + 2, 2, width); + // irrev_vert_wvlt_step(lines + 2, + // cur_line > 3 ? lines + 4 : lines + 2, + // lines + 3, 3, width); + + // irrev_vert_wvlt_step(lines + 1, lines + 1, + // lines, 2, width); + // irrev_vert_wvlt_step(lines, + // cur_line > 1 ? lines + 2 : lines, + // lines + 1, 3, width); + + // //push lines[3] L, lines[2] H, lines[1] L, and lines[0] H + // if (cur_line >= 3) + // { + // irrev_vert_wvlt_K(lines + 3, lines + 5, + // true, width); + // irrev_horz_wvlt_fwd_tx(lines + 5, + // child_res->get_line(), bands[1].get_line(), + // width, horz_even); + // bands[1].push_line(); + // child_res->push_line(); + // } + // if (cur_line >= 2) + // irrev_vert_wvlt_K(lines + 2, lines + 5, false, width); + // else + // irrev_vert_wvlt_K(lines, lines + 5, false, width); + // irrev_horz_wvlt_fwd_tx(lines + 5, bands[2].get_line(), + // bands[3].get_line(), width, horz_even); + // bands[2].push_line(); + // bands[3].push_line(); + // irrev_vert_wvlt_K(lines + 1, lines + 5, + // true, width); + // irrev_horz_wvlt_fwd_tx(lines + 5, child_res->get_line(), + // bands[1].get_line(), width, horz_even); + // bands[1].push_line(); + // child_res->push_line(); + // irrev_vert_wvlt_K(lines, lines + 5, + // false, width); + // irrev_horz_wvlt_fwd_tx(lines + 5, bands[2].get_line(), + // bands[3].get_line(), width, horz_even); + // bands[2].push_line(); + // bands[3].push_line(); + // } + // } + // else + // { //only one line + // if (vert_even) + // { + // //push to L + // irrev_horz_wvlt_fwd_tx(lines, child_res->get_line(), + // bands[1].get_line(), width, horz_even); + // bands[1].push_line(); + // child_res->push_line(); + // } + // else + // { + // float* sp = lines[0].f32; + // for (ui32 i = width; i > 0; --i) + // *sp++ *= 2.0f; + // //push to H + // irrev_horz_wvlt_fwd_tx(lines, bands[2].get_line(), + // bands[3].get_line(), width, horz_even); + // bands[2].push_line(); + // bands[3].push_line(); + // } + // } + // } + + // rotate_buffers(lines, lines + 1, lines + 2, lines + 3, lines + 4, + // lines + 5); + + // ++cur_line; + // vert_even = !vert_even; + //} } ////////////////////////////////////////////////////////////////////////// @@ -621,147 +694,159 @@ namespace ojph { ui32 width = res_rect.siz.w; if (width == 0) - return lines; + return NULL; if (reversible) { - assert(num_lines >= 4); if (res_rect.siz.h > 1) { - do + if (sig->active) { + sig->active = false; + return sig->line; + }; + for (;;) { //horizontal transform if (cur_line < res_rect.siz.h) { - if (vert_even) - rev_horz_wvlt_bwd_tx(lines, + if (vert_even) { // even + rev_horz_syn(atk, aug->line, child_res->pull_line(), bands[1].pull_line(), width, horz_even); - else - rev_horz_wvlt_bwd_tx(lines, + aug->active = true; + vert_even = !vert_even; + ++cur_line; + continue; + } + else { + rev_horz_syn(atk, sig->line, bands[2].pull_line(), bands[3].pull_line(), width, horz_even); + sig->active = true; + vert_even = !vert_even; + ++cur_line; + } } //vertical transform - if (!vert_even) + for (ui32 i = 0; i < num_steps; ++i) { - rev_vert_wvlt_bwd_update( - cur_line > 1 ? lines + 2 : lines, - cur_line < res_rect.siz.h ? lines : lines + 2, - lines + 1, width); - rev_vert_wvlt_bwd_predict( - cur_line > 2 ? lines + 3 : lines + 1, - cur_line < res_rect.siz.h + 1 ? lines + 1 : lines + 3, - lines + 2, width); + if (aug->active && + (sig->active == true || ssp[i].active == true)) + { + line_buf* dp = aug->line; + line_buf* sp1 = sig->active ? sig->line : ssp[i].line; + line_buf* sp2 = ssp[i].active ? ssp[i].line : sig->line; + const lifting_step* s = atk->get_step(num_steps - i - 1); + rev_vert_syn_step(s, dp, sp1, sp2, width); + } + lifting_buf t = *aug; *aug = ssp[i]; ssp[i] = *sig; *sig = t; } - vert_even = !vert_even; - rotate_buffers(lines, lines + 1, lines + 2, lines + 3); - ++cur_line; - } while (cur_line < 3); - memcpy(lines[0].i32, lines[3].i32, res_rect.siz.w * sizeof(si32)); - return lines; + if (aug->active) { + aug->active = false; + return aug->line; + } + if (sig->active) { + sig->active = false; + return sig->line; + }; + } } - else if (res_rect.siz.h == 1) + else { if (vert_even) - { - rev_horz_wvlt_bwd_tx(lines, child_res->pull_line(), + rev_horz_syn(atk, aug->line, child_res->pull_line(), bands[1].pull_line(), width, horz_even); - } else { - rev_horz_wvlt_bwd_tx(lines, bands[2].pull_line(), + rev_horz_syn(atk, aug->line, bands[2].pull_line(), bands[3].pull_line(), width, horz_even); - if (width) - { - si32* sp = lines[0].i32; - for (ui32 i = width; i > 0; --i) - *sp++ >>= 1; - } + si32* sp = aug->line->i32; + for (ui32 i = width; i > 0; --i) + *sp++ >>= 1; } - return lines; + return aug->line; } - else - return lines; } else { - assert(num_lines >= 6); if (res_rect.siz.h > 1) { - do + if (sig->active) { + sig->active = false; + return sig->line; + }; + for (;;) { //horizontal transform if (cur_line < res_rect.siz.h) { - if (vert_even) - { - irrev_horz_wvlt_bwd_tx(lines, + if (vert_even) { // even + irv_horz_syn(atk, aug->line, child_res->pull_line(), bands[1].pull_line(), width, horz_even); - irrev_vert_wvlt_K(lines, lines, false, width); + aug->active = true; + vert_even = !vert_even; + ++cur_line; + + const float K = atk->get_K(); + irv_vert_syn_K(K, aug->line, width); + + continue; } - else - { - irrev_horz_wvlt_bwd_tx(lines, + else { + irv_horz_syn(atk, sig->line, bands[2].pull_line(), bands[3].pull_line(), width, horz_even); - irrev_vert_wvlt_K(lines, lines, true, width); + sig->active = true; + vert_even = !vert_even; + ++cur_line; + + const float K_inv = 1.0f / atk->get_K(); + irv_vert_syn_K(K_inv, sig->line, width); } } //vertical transform - if (!vert_even) + for (ui32 i = 0; i < num_steps; ++i) { - irrev_vert_wvlt_step( - cur_line > 1 ? lines + 2 : lines, - cur_line < res_rect.siz.h ? lines : lines + 2, - lines + 1, 7, width); - irrev_vert_wvlt_step( - cur_line > 2 ? lines + 3 : lines + 1, - cur_line < res_rect.siz.h + 1 ? lines + 1 : lines + 3, - lines + 2, 6, width); - irrev_vert_wvlt_step( - cur_line > 3 ? lines + 4 : lines + 2, - cur_line < res_rect.siz.h + 2 ? lines + 2 : lines + 4, - lines + 3, 5, width); - irrev_vert_wvlt_step( - cur_line > 4 ? lines + 5 : lines + 3, - cur_line < res_rect.siz.h + 3 ? lines + 3 : lines + 5, - lines + 4, 4, width); + if (aug->active && + (sig->active == true || ssp[i].active == true)) + { + line_buf* dp = aug->line; + line_buf* sp1 = sig->active ? sig->line : ssp[i].line; + line_buf* sp2 = ssp[i].active ? ssp[i].line : sig->line; + const lifting_step* s = atk->get_step(num_steps - i - 1); + irv_vert_syn_step(s, dp, sp1, sp2, width); + } + lifting_buf t = *aug; *aug = ssp[i]; ssp[i] = *sig; *sig = t; } - vert_even = !vert_even; - rotate_buffers(lines, lines + 1, lines + 2, lines + 3, lines + 4, - lines + 5); - ++cur_line; - } while (cur_line < 5); - memcpy(lines[0].f32, lines[5].f32, res_rect.siz.w * sizeof(float)); - return lines; + if (aug->active) { + aug->active = false; + return aug->line; + } + if (sig->active) { + sig->active = false; + return sig->line; + }; + } } - else if (res_rect.siz.h == 1) + else { if (vert_even) - { - irrev_horz_wvlt_bwd_tx(lines, child_res->pull_line(), + irv_horz_syn(atk, aug->line, child_res->pull_line(), bands[1].pull_line(), width, horz_even); - } else { - irrev_horz_wvlt_bwd_tx(lines, bands[2].pull_line(), + irv_horz_syn(atk, aug->line, bands[2].pull_line(), bands[3].pull_line(), width, horz_even); - if (width) - { - float* sp = lines[0].f32; - for (ui32 i = width; i > 0; --i) - *sp++ *= 0.5f; - } + float *sp = aug->line->f32; + for (ui32 i = width; i > 0; --i) + *sp++ *= 0.5f; } - return lines; + return aug->line; } - else - return lines; } } diff --git a/src/core/codestream/ojph_resolution.h b/src/core/codestream/ojph_resolution.h index e110811b..7a7d43d5 100644 --- a/src/core/codestream/ojph_resolution.h +++ b/src/core/codestream/ojph_resolution.h @@ -64,14 +64,15 @@ namespace ojph { public: static void pre_alloc(codestream *codestream, const rect& res_rect, - const rect& recon_res_rect, ui32 res_num); + const rect& recon_res_rect, + ui32 comp_num, ui32 res_num); void finalize_alloc(codestream *codestream, const rect& res_rect, const rect& recon_res_rect, ui32 comp_num, ui32 res_num, point comp_downsamp, tile_comp *parent_tile_comp, resolution *parent_res); - line_buf* get_line() { return lines + 0; } + line_buf* get_line() { return ssp[0].line; } void push_line(); line_buf* pull_line(); rect get_rect() { return res_rect; } @@ -90,14 +91,16 @@ namespace ojph { private: bool reversible, skipped_res_for_read, skipped_res_for_recon; - ui32 num_lines; + ui32 num_steps; ui32 num_bands, res_num; ui32 comp_num; ui32 num_bytes; // number of bytes in this resolution // used for tilepart length point comp_downsamp; - rect res_rect; - line_buf *lines; + rect res_rect; // resolution rectangle + line_buf* lines; // used to store lines + lifting_buf *ssp; // step state pointer + lifting_buf *aug, *sig; subband *bands; tile_comp *parent_comp; resolution *parent_res, *child_res; @@ -109,6 +112,8 @@ namespace ojph { int tag_tree_size; ui32 level_index[20]; //more than enough point cur_precinct_loc; //used for progressing spatial modes (2, 3, 4) + const param_atk* atk; + param_dfs::dfs_dwt_type downsampling_style; //wavelet machinery ui32 cur_line; bool vert_even, horz_even; diff --git a/src/core/codestream/ojph_subband.cpp b/src/core/codestream/ojph_subband.cpp index 6348e98b..dbef3b75 100644 --- a/src/core/codestream/ojph_subband.cpp +++ b/src/core/codestream/ojph_subband.cpp @@ -55,7 +55,7 @@ namespace ojph { ////////////////////////////////////////////////////////////////////////// void subband::pre_alloc(codestream *codestream, const rect &band_rect, - ui32 res_num) + ui32 comp_num, ui32 res_num) { mem_fixed_allocator* allocator = codestream->get_allocator(); @@ -63,7 +63,7 @@ namespace ojph { if (empty) return; - const param_cod* cdp = codestream->get_cod(); + const param_cod* cdp = codestream->get_cod(comp_num); size log_cb = cdp->get_log_block_dims(); size log_PP = cdp->get_log_precinct_size(res_num); @@ -111,7 +111,7 @@ namespace ojph { this->band_rect = band_rect; this->parent = res; - const param_cod* cdp = codestream->get_cod(); + const param_cod* cdp = codestream->get_cod(parent->get_comp_num()); this->reversible = cdp->access_atk()->is_reversible(); size log_cb = cdp->get_log_block_dims(); log_PP = cdp->get_log_precinct_size(res_num); @@ -125,8 +125,7 @@ namespace ojph { cur_line = 0; cur_cb_height = 0; param_qcd* qcd = codestream->access_qcd(parent->get_comp_num()); - const param_cod* cod = codestream->get_cod(); - ui32 num_decomps = cod->get_num_decompositions(); + ui32 num_decomps = cdp->get_num_decompositions(); this->K_max = qcd->get_Kmax(NULL, num_decomps, this->res_num, band_num); if (!reversible) { diff --git a/src/core/codestream/ojph_subband.h b/src/core/codestream/ojph_subband.h index 3bcc6edb..9928c5ef 100644 --- a/src/core/codestream/ojph_subband.h +++ b/src/core/codestream/ojph_subband.h @@ -64,7 +64,7 @@ namespace ojph { friend struct precinct; public: static void pre_alloc(codestream *codestream, const rect& band_rect, - ui32 res_num); + ui32 comp_num, ui32 res_num); void finalize_alloc(codestream *codestream, const rect& band_rect, resolution* res, ui32 res_num, ui32 subband_num); diff --git a/src/core/codestream/ojph_tile.cpp b/src/core/codestream/ojph_tile.cpp index b7cb52cd..48f8bb56 100644 --- a/src/core/codestream/ojph_tile.cpp +++ b/src/core/codestream/ojph_tile.cpp @@ -116,7 +116,7 @@ namespace ojph { recon_comp_rect.siz.w = recon_tcx1 - recon_tcx0; recon_comp_rect.siz.h = recon_tcy1 - recon_tcy0; - tile_comp::pre_alloc(codestream, comp_rect, recon_comp_rect); + tile_comp::pre_alloc(codestream, i, comp_rect, recon_comp_rect); width = ojph_max(width, recon_comp_rect.siz.w); } diff --git a/src/core/codestream/ojph_tile_comp.cpp b/src/core/codestream/ojph_tile_comp.cpp index a2124e8b..69ed0bcb 100644 --- a/src/core/codestream/ojph_tile_comp.cpp +++ b/src/core/codestream/ojph_tile_comp.cpp @@ -51,7 +51,8 @@ namespace ojph { { ////////////////////////////////////////////////////////////////////////// - void tile_comp::pre_alloc(codestream *codestream, const rect& comp_rect, + void tile_comp::pre_alloc(codestream *codestream, ui32 comp_num, + const rect& comp_rect, const rect& recon_comp_rect) { mem_fixed_allocator* allocator = codestream->get_allocator(); @@ -60,7 +61,7 @@ namespace ojph { ui32 num_decomps = codestream->access_cod().get_num_decompositions(); allocator->pre_alloc_obj(1); - resolution::pre_alloc(codestream, comp_rect, recon_comp_rect, + resolution::pre_alloc(codestream, comp_rect, recon_comp_rect, comp_num, num_decomps); } @@ -72,7 +73,7 @@ namespace ojph { mem_fixed_allocator* allocator = codestream->get_allocator(); //allocate a resolution - num_decomps = codestream->get_cod()->get_num_decompositions(); + num_decomps = codestream->get_cod(comp_num)->get_num_decompositions(); comp_downsamp = codestream->get_siz()->get_downsampling(comp_num); this->comp_rect = comp_rect; diff --git a/src/core/codestream/ojph_tile_comp.h b/src/core/codestream/ojph_tile_comp.h index d7304d96..def39e55 100644 --- a/src/core/codestream/ojph_tile_comp.h +++ b/src/core/codestream/ojph_tile_comp.h @@ -62,7 +62,8 @@ namespace ojph { class tile_comp { public: - static void pre_alloc(codestream *codestream, const rect& comp_rect, + static void pre_alloc(codestream *codestream, ui32 comp_num, + const rect& comp_rect, const rect& recon_comp_rect); void finalize_alloc(codestream *codestream, tile *parent, ui32 comp_num, const rect& comp_rect, diff --git a/src/core/common/ojph_mem.h b/src/core/common/ojph_mem.h index 712727c0..d7497cdb 100644 --- a/src/core/common/ojph_mem.h +++ b/src/core/common/ojph_mem.h @@ -134,6 +134,8 @@ namespace ojph { ///////////////////////////////////////////////////////////////////////////// struct line_buf { + line_buf() : size(0), pre_size(0), i32(0) {} + template void pre_alloc(mem_fixed_allocator *p, size_t num_ele, ui32 pre_size) { @@ -157,6 +159,14 @@ namespace ojph { }; }; + ///////////////////////////////////////////////////////////////////////////// + struct lifting_buf + { + lifting_buf() { line = NULL; active = false; } + line_buf *line; + bool active; + }; + ///////////////////////////////////////////////////////////////////////////// struct coded_lists { diff --git a/src/core/transform/ojph_transform.cpp b/src/core/transform/ojph_transform.cpp index b6919032..46231d63 100644 --- a/src/core/transform/ojph_transform.cpp +++ b/src/core/transform/ojph_transform.cpp @@ -41,6 +41,8 @@ #include "ojph_mem.h" #include "ojph_transform.h" #include "ojph_transform_local.h" +#include "ojph_params.h" +#include "../codestream/ojph_params_local.h" namespace ojph { struct line_buf; @@ -81,6 +83,24 @@ namespace ojph { (line_buf* dst, line_buf *lsrc, line_buf *hsrc, ui32 width, bool even) = NULL; + + + + + ///////////////////////////////////////////////////////////////////////// + void (*rev_vert_syn_step) + (const lifting_step* s, line_buf* aug, const line_buf* sig, + line_buf* other, ui32 repeat) = NULL; + + ///////////////////////////////////////////////////////////////////////// + void (*rev_horz_syn) + (const param_atk* atk, line_buf* dst, line_buf* lsrc, + line_buf* hsrc, ui32 width, bool even) = NULL; + + + + + ///////////////////////////////////////////////////////////////////////// // Irreversible functions ///////////////////////////////////////////////////////////////////////// @@ -105,6 +125,27 @@ namespace ojph { (line_buf* src, line_buf *ldst, line_buf *hdst, ui32 width, bool even) = NULL; + + + + + ///////////////////////////////////////////////////////////////////////// + void (*irv_vert_syn_step) + (const lifting_step* s, line_buf* aug, const line_buf* sig, + line_buf* other, ui32 repeat) = NULL; + + ///////////////////////////////////////////////////////////////////////// + void (*irv_vert_syn_K)(const float K, line_buf* aug, ui32 repeat) = NULL; + + ///////////////////////////////////////////////////////////////////////// + void (*irv_horz_syn) + (const param_atk* atk, line_buf* dst, line_buf* lsrc, + line_buf* hsrc, ui32 width, bool even) = NULL; + + + + + //////////////////////////////////////////////////////////////////////////// static bool wavelet_transform_functions_initialized = false; @@ -122,11 +163,19 @@ namespace ojph { rev_vert_wvlt_bwd_predict = gen_rev_vert_wvlt_bwd_predict; rev_vert_wvlt_bwd_update = gen_rev_vert_wvlt_bwd_update; rev_horz_wvlt_bwd_tx = gen_rev_horz_wvlt_bwd_tx; + + rev_vert_syn_step = gen_rev_vert_syn_step; + rev_horz_syn = gen_rev_horz_syn; + irrev_vert_wvlt_step = gen_irrev_vert_wvlt_step; irrev_vert_wvlt_K = gen_irrev_vert_wvlt_K; irrev_horz_wvlt_fwd_tx = gen_irrev_horz_wvlt_fwd_tx; irrev_horz_wvlt_bwd_tx = gen_irrev_horz_wvlt_bwd_tx; + irv_vert_syn_step = gen_irv_vert_syn_step; + irv_vert_syn_K = gen_irv_vert_syn_K; + irv_horz_syn = gen_irv_horz_syn; + #ifndef OJPH_DISABLE_INTEL_SIMD int level = get_cpu_ext_level(); @@ -326,6 +375,96 @@ namespace ojph { } + + + + ////////////////////////////////////////////////////////////////////////// + void gen_rev_vert_syn_step(const lifting_step* s, line_buf* aug, + const line_buf* sig, line_buf* other, + ui32 repeat) + { + si32 a = s->rev.Aatk; + si32 b = s->rev.Batk; + ui32 e = s->rev.Eatk; + + si32* dst = aug->i32; + const si32* src1 = sig->i32, * src2 = other->i32; + if (a >= 0) + for (ui32 i = repeat; i > 0; --i) + *dst++ -= (b + a * (*src1++ + *src2++)) >> e; + else + for (ui32 i = repeat; i > 0; --i) + *dst++ += (b - a * (*src1++ + *src2++)) >> e; + } + + ////////////////////////////////////////////////////////////////////////// + void gen_rev_horz_syn(const param_atk *atk, line_buf* dst, line_buf *lsrc, + line_buf *hsrc, ui32 width, bool even) + { + if (width > 1) + { + bool ev = even; + si32* oth = hsrc->i32, * aug = lsrc->i32; + ui32 aug_width = (width + (even ? 1 : 0)) >> 1; // low pass + ui32 oth_width = (width + (even ? 0 : 1)) >> 1; // high pass + ui32 num_steps = atk->get_num_steps(); + for (ui32 j = num_steps; j > 0; --j) + { + // first lifting step + const lifting_step* s = atk->get_step(j - 1); + si32 a = s->rev.Aatk; + si32 b = s->rev.Batk; + ui32 e = s->rev.Eatk; + + // extension + oth[-1] = oth[0]; + oth[oth_width] = oth[oth_width - 1]; + // lifting step + const si32* sp = oth + (ev ? 0 : 1); + si32* dp = aug; + if (a >= 0) + for (ui32 i = aug_width; i > 0; --i, sp++, dp++) + *dp -= (b + a * (sp[-1] + sp[0])) >> e; + else + for (ui32 i = aug_width; i > 0; --i, sp++, dp++) + *dp += (b - a * (sp[-1] + sp[0])) >> e; + + // swap buffers + si32* t = aug; aug = oth; oth = t; + ev = !ev; + ui32 w = aug_width; aug_width = oth_width; oth_width = w; + } + + // combine both lsrc and hsrc into dst + si32* sph = hsrc->i32; + si32* spl = lsrc->i32; + si32* dp = dst->i32; + ui32 w = width; + if (!even) + { + *dp++ = *sph++; --w; + } + for (; w > 1; w -= 2) + { + *dp++ = *spl++; *dp++ = *sph++; + } + if (w) + { + *dp++ = *spl++; --w; + } + } + else { + if (even) + dst->i32[0] = lsrc->i32[0]; + else + dst->i32[0] = hsrc->i32[0] >> 1; + } + } + + + + + ////////////////////////////////////////////////////////////////////////// void gen_irrev_vert_wvlt_step(const line_buf* line_src1, const line_buf* line_src2, @@ -499,6 +638,102 @@ namespace ojph { } } + + + + + ////////////////////////////////////////////////////////////////////////// + void gen_irv_vert_syn_step(const lifting_step* s, line_buf* aug, + const line_buf* sig, line_buf* other, + ui32 repeat) + { + float a = s->irv.Aatk; + + float* dst = aug->f32; + const float* src1 = sig->f32, * src2 = other->f32; + for (ui32 i = repeat; i > 0; --i) + *dst++ -= a * (*src1++ + *src2++); + } + + ////////////////////////////////////////////////////////////////////////// + void gen_irv_vert_syn_K(const float K, line_buf* aug, ui32 repeat) + { + float* dst = aug->f32; + for (ui32 i = repeat; i > 0; --i) + *dst++ *= K; + } + + ////////////////////////////////////////////////////////////////////////// + void gen_irv_horz_syn(const param_atk* atk, line_buf* dst, line_buf* lsrc, + line_buf* hsrc, ui32 width, bool even) + { + if (width > 1) + { + bool ev = even; + float* oth = hsrc->f32, * aug = lsrc->f32; + ui32 aug_width = (width + (even ? 1 : 0)) >> 1; // low pass + ui32 oth_width = (width + (even ? 0 : 1)) >> 1; // high pass + + { + float K = atk->get_K(); + float K_inv = 1.0f / K; + float* dp; + + dp = aug; + for (ui32 i = aug_width; i > 0; --i) + *dp++ *= K; + + dp = oth; + for (ui32 i = oth_width; i > 0; --i) + *dp++ *= K_inv; + } + + ui32 num_steps = atk->get_num_steps(); + for (ui32 j = num_steps; j > 0; --j) + { + // first lifting step + const lifting_step* s = atk->get_step(j - 1); + float a = s->irv.Aatk; + + // extension + oth[-1] = oth[0]; + oth[oth_width] = oth[oth_width - 1]; + // lifting step + const float* sp = oth + (ev ? 0 : 1); + float* dp = aug; + for (ui32 i = aug_width; i > 0; --i, sp++, dp++) + *dp -= a * (sp[-1] + sp[0]); + + // swap buffers + float* t = aug; aug = oth; oth = t; + ev = !ev; + ui32 w = aug_width; aug_width = oth_width; oth_width = w; + } + + // combine both lsrc and hsrc into dst + float* sph = hsrc->f32; + float* spl = lsrc->f32; + float* dp = dst->f32; + ui32 w = width; + if (!even) + { *dp++ = *sph++; --w; } + for (; w > 1; w -= 2) + { *dp++ = *spl++; *dp++ = *sph++; } + if (w) + { *dp++ = *spl++; --w; } + } + else { + if (even) + dst->f32[0] = lsrc->f32[0]; + else + dst->f32[0] = hsrc->f32[0] * 0.5f; + } + } + + + + + #endif // !OJPH_ENABLE_WASM_SIMD } diff --git a/src/core/transform/ojph_transform.h b/src/core/transform/ojph_transform.h index 002235d3..77ede96f 100644 --- a/src/core/transform/ojph_transform.h +++ b/src/core/transform/ojph_transform.h @@ -44,6 +44,8 @@ namespace ojph { struct line_buf; namespace local { + union lifting_step; + struct param_atk; ////////////////////////////////////////////////////////////////////////// void init_wavelet_transform_functions(); @@ -80,6 +82,24 @@ namespace ojph { extern void (*rev_horz_wvlt_bwd_tx) (line_buf* dst, line_buf *lsrc, line_buf *hsrc, ui32 width, bool even); + + + + + ///////////////////////////////////////////////////////////////////////// + extern void (*rev_vert_syn_step) + (const lifting_step* s, line_buf* aug, const line_buf* sig, + line_buf* other, ui32 repeat); + + ///////////////////////////////////////////////////////////////////////// + extern void (*rev_horz_syn) + (const param_atk* atk, line_buf* dst, line_buf* lsrc, + line_buf* hsrc, ui32 width, bool even); + + + + + ///////////////////////////////////////////////////////////////////////// // Irreversible functions ///////////////////////////////////////////////////////////////////////// @@ -102,6 +122,28 @@ namespace ojph { extern void (*irrev_horz_wvlt_bwd_tx) (line_buf* src, line_buf *ldst, line_buf *hdst, ui32 width, bool even); + + + + + ///////////////////////////////////////////////////////////////////////// + extern void (*irv_vert_syn_step) + (const lifting_step* s, line_buf* aug, const line_buf* sig, + line_buf* other, ui32 repeat); + + ///////////////////////////////////////////////////////////////////////// + extern void (*irv_vert_syn_K) + (const float K, line_buf* aug, ui32 repeat); + + ///////////////////////////////////////////////////////////////////////// + extern void (*irv_horz_syn) + (const param_atk* atk, line_buf* dst, line_buf* lsrc, + line_buf* hsrc, ui32 width, bool even); + + + + + } } diff --git a/src/core/transform/ojph_transform_local.h b/src/core/transform/ojph_transform_local.h index 2bf041c8..42cec378 100644 --- a/src/core/transform/ojph_transform_local.h +++ b/src/core/transform/ojph_transform_local.h @@ -44,6 +44,7 @@ namespace ojph { struct line_buf; namespace local { + struct param_atk; ////////////////////////////////////////////////////////////////////////// struct LIFTING_FACTORS @@ -93,6 +94,23 @@ namespace ojph { void gen_rev_horz_wvlt_bwd_tx(line_buf* dst, line_buf *lsrc, line_buf *hsrc, ui32 width, bool even); + + + + + ///////////////////////////////////////////////////////////////////////// + void gen_rev_vert_syn_step(const lifting_step* s, line_buf* aug, + const line_buf* sig, line_buf* other, + ui32 repeat); + + ///////////////////////////////////////////////////////////////////////// + void gen_rev_horz_syn(const param_atk *atk, line_buf* dst, line_buf *lsrc, + line_buf *hsrc, ui32 width, bool even); + + + + + ////////////////////////////////////////////////////////////////////////// // Irreversible functions ////////////////////////////////////////////////////////////////////////// @@ -113,6 +131,26 @@ namespace ojph { void gen_irrev_horz_wvlt_bwd_tx(line_buf* src, line_buf *ldst, line_buf *hdst, ui32 width, bool even); + + + + + ///////////////////////////////////////////////////////////////////////// + void gen_irv_vert_syn_step(const lifting_step* s, line_buf* aug, + const line_buf* sig, line_buf* other, + ui32 repeat); + + ///////////////////////////////////////////////////////////////////////// + void gen_irv_vert_syn_K(const float K, line_buf* aug, ui32 repeat); + + ///////////////////////////////////////////////////////////////////////// + void gen_irv_horz_syn(const param_atk *atk, line_buf* dst, line_buf *lsrc, + line_buf *hsrc, ui32 width, bool even); + + + + + ////////////////////////////////////////////////////////////////////////// // // From 2c74db3ab52487b2d74c63e58bce5c89f0547e02 Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Tue, 2 Apr 2024 21:16:57 +1100 Subject: [PATCH 045/348] Syntax error fix. --- src/core/codestream/ojph_params_local.h | 4 +-- src/core/codestream/ojph_resolution.cpp | 43 +------------------------ 2 files changed, 3 insertions(+), 44 deletions(-) diff --git a/src/core/codestream/ojph_params_local.h b/src/core/codestream/ojph_params_local.h index c08d750e..43c1181d 100644 --- a/src/core/codestream/ojph_params_local.h +++ b/src/core/codestream/ojph_params_local.h @@ -856,8 +856,8 @@ namespace ojph { const param_atk* get_atk(int index) const; const lifting_step* get_step(ui32 s) const { assert(s < Natk); return d + s; } - const ui32 get_num_steps() const { return Natk; } - const float get_K() const { return Katk; } + ui32 get_num_steps() const { return Natk; } + float get_K() const { return Katk; } private: // member variables ui16 Latk; // structure length diff --git a/src/core/codestream/ojph_resolution.cpp b/src/core/codestream/ojph_resolution.cpp index 105c57de..7f226445 100644 --- a/src/core/codestream/ojph_resolution.cpp +++ b/src/core/codestream/ojph_resolution.cpp @@ -38,6 +38,7 @@ #include #include +#include #include "ojph_mem.h" #include "ojph_params.h" @@ -54,48 +55,6 @@ namespace ojph { namespace local { - - ////////////////////////////////////////////////////////////////////////// - static void rotate_buffers(line_buf* line1, line_buf* line2, - line_buf* line3, line_buf* line4) - { - assert(line1->size == line2->size && - line1->pre_size == line2->pre_size && - line1->size == line3->size && - line1->pre_size == line3->pre_size && - line1->size == line4->size && - line1->pre_size == line4->pre_size); - si32* p = line4->i32; - line4->i32 = line3->i32; - line3->i32 = line2->i32; - line2->i32 = line1->i32; - line1->i32 = p; - } - - ////////////////////////////////////////////////////////////////////////// - static void rotate_buffers(line_buf* line1, line_buf* line2, - line_buf* line3, line_buf* line4, - line_buf* line5, line_buf* line6) - { - assert(line1->size == line2->size && - line1->pre_size == line2->pre_size && - line1->size == line3->size && - line1->pre_size == line3->pre_size && - line1->size == line4->size && - line1->pre_size == line4->pre_size && - line1->size == line5->size && - line1->pre_size == line5->pre_size && - line1->size == line6->size && - line1->pre_size == line6->pre_size); - si32* p = line6->i32; - line6->i32 = line5->i32; - line5->i32 = line4->i32; - line4->i32 = line3->i32; - line3->i32 = line2->i32; - line2->i32 = line1->i32; - line1->i32 = p; - } - ////////////////////////////////////////////////////////////////////////// void resolution::pre_alloc(codestream* codestream, const rect& res_rect, const rect& recon_res_rect, From be39386e13e426e5868fce6563f2520d6b4cd10a Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Thu, 4 Apr 2024 06:49:08 +1100 Subject: [PATCH 046/348] reversible analysis is working; irreversible not. More testing is needed. --- src/core/codestream/ojph_resolution.cpp | 430 ++++++++-------------- src/core/codestream/ojph_resolution.h | 2 +- src/core/transform/ojph_transform.cpp | 242 ++++++++++-- src/core/transform/ojph_transform.h | 41 ++- src/core/transform/ojph_transform_local.h | 39 +- 5 files changed, 442 insertions(+), 312 deletions(-) diff --git a/src/core/codestream/ojph_resolution.cpp b/src/core/codestream/ojph_resolution.cpp index 105c57de..f28cfd5d 100644 --- a/src/core/codestream/ojph_resolution.cpp +++ b/src/core/codestream/ojph_resolution.cpp @@ -407,277 +407,175 @@ namespace ojph { } } + ////////////////////////////////////////////////////////////////////////// + line_buf* resolution::get_line() + { + if (vert_even) + { + ++cur_line; + sig->active = true; + return sig->line; + } + else + { + ++cur_line; + aug->active = true; + return aug->line; + } + } + ////////////////////////////////////////////////////////////////////////// void resolution::push_line() { if (res_num == 0) { assert(num_bands == 1 && child_res == NULL); - bands[0].exchange_buf(ssp[0].line);//line at location 0 + bands[0].exchange_buf(vert_even ? sig->line : aug->line); bands[0].push_line(); return; } - //ui32 width = res_rect.siz.w; - //if (width == 0) - // return; - //if (reversible) - //{ - // //vertical transform - // assert(num_lines >= 4); - // if (vert_even) - // { - // rev_vert_wvlt_fwd_predict(lines, - // cur_line > 1 ? lines + 2 : lines, - // lines + 1, width); - // rev_vert_wvlt_fwd_update(lines + 1, - // cur_line > 2 ? lines + 3 : lines + 1, - // lines + 2, width); - - // // push to horizontal transform lines[2](L) and lines[1] (H) - // if (cur_line >= 1) - // { - // rev_horz_wvlt_fwd_tx(lines + 1, bands[2].get_line(), - // bands[3].get_line(), width, horz_even); - // bands[2].push_line(); - // bands[3].push_line(); - // } - // if (cur_line >= 2) - // { - // rev_horz_wvlt_fwd_tx(lines + 2, child_res->get_line(), - // bands[1].get_line(), width, horz_even); - // bands[1].push_line(); - // child_res->push_line(); - // } - // } - - // if (cur_line >= res_rect.siz.h - 1) - // { //finished, so we need to process any lines left - // if (cur_line) - // { - // if (vert_even) - // { - // rev_vert_wvlt_fwd_update(lines + 1, lines + 1, - // lines, width); - // //push lines[0] to L - // rev_horz_wvlt_fwd_tx(lines, child_res->get_line(), - // bands[1].get_line(), width, horz_even); - // bands[1].push_line(); - // child_res->push_line(); - // } - // else - // { - // rev_vert_wvlt_fwd_predict(lines + 1, lines + 1, - // lines, width); - // rev_vert_wvlt_fwd_update(lines, - // cur_line > 1 ? lines + 2 : lines, - // lines + 1, width); - - // // push to horizontal transform lines[1](L) and line[0] (H) - // //line[0] to H - // rev_horz_wvlt_fwd_tx(lines, bands[2].get_line(), - // bands[3].get_line(), width, horz_even); - // bands[2].push_line(); - // bands[3].push_line(); - // //line[1] to L - // rev_horz_wvlt_fwd_tx(lines + 1, child_res->get_line(), - // bands[1].get_line(), width, horz_even); - // bands[1].push_line(); - // child_res->push_line(); - // } - // } - // else - // { //only one line - // if (vert_even) - // { - // //push to L - // rev_horz_wvlt_fwd_tx(lines, child_res->get_line(), - // bands[1].get_line(), width, horz_even); - // bands[1].push_line(); - // child_res->push_line(); - // } - // else - // { - // si32* sp = lines[0].i32; - // for (ui32 i = width; i > 0; --i) - // *sp++ <<= 1; - // //push to H - // rev_horz_wvlt_fwd_tx(lines, bands[2].get_line(), - // bands[3].get_line(), width, horz_even); - // bands[2].push_line(); - // bands[3].push_line(); - // } - // } - // } - - // rotate_buffers(lines, lines + 1, lines + 2, lines + 3); - - // ++cur_line; - // vert_even = !vert_even; - //} - //else - //{ - // //vertical transform - // assert(num_lines >= 6); - // if (vert_even) - // { - // irrev_vert_wvlt_step(lines + 0, - // cur_line > 1 ? lines + 2 : lines, - // lines + 1, 0, width); - // irrev_vert_wvlt_step(lines + 1, - // cur_line > 2 ? lines + 3 : lines + 1, - // lines + 2, 1, width); - // irrev_vert_wvlt_step(lines + 2, - // cur_line > 3 ? lines + 4 : lines + 2, - // lines + 3, 2, width); - // irrev_vert_wvlt_step(lines + 3, - // cur_line > 4 ? lines + 5 : lines + 3, - // lines + 4, 3, width); - - // // push to horizontal transform lines[4](L) and lines[3] (H) - // if (cur_line >= 3) - // { - // irrev_vert_wvlt_K(lines + 3, lines + 5, - // false, width); - // irrev_horz_wvlt_fwd_tx(lines + 5, bands[2].get_line(), - // bands[3].get_line(), width, horz_even); - // bands[2].push_line(); - // bands[3].push_line(); - // } - // if (cur_line >= 4) - // { - // irrev_vert_wvlt_K(lines + 4, lines + 5, - // true, width); - // irrev_horz_wvlt_fwd_tx(lines + 5, child_res->get_line(), - // bands[1].get_line(), width, horz_even); - // bands[1].push_line(); - // child_res->push_line(); - // } - // } - - // if (cur_line >= res_rect.siz.h - 1) - // { //finished, so we need to process any left line - // if (cur_line) - // { - // if (vert_even) - // { - // irrev_vert_wvlt_step(lines + 1, lines + 1, - // lines, 1, width); - // irrev_vert_wvlt_step(lines, - // cur_line > 1 ? lines + 2 : lines, - // lines + 1, 2, width); - // irrev_vert_wvlt_step(lines + 1, - // cur_line > 2 ? lines + 3 : lines + 1, - // lines + 2, 3, width); - // irrev_vert_wvlt_step(lines + 1, lines + 1, - // lines, 3, width); - // //push lines[2] to L, lines[1] to H, and lines[0] to L - // if (cur_line >= 2) - // { - // irrev_vert_wvlt_K(lines + 2, lines + 5, - // true, width); - // irrev_horz_wvlt_fwd_tx(lines + 5, - // child_res->get_line(), bands[1].get_line(), - // width, horz_even); - // bands[1].push_line(); - // child_res->push_line(); - // } - // irrev_vert_wvlt_K(lines + 1, lines + 5, - // false, width); - // irrev_horz_wvlt_fwd_tx(lines + 5, bands[2].get_line(), - // bands[3].get_line(), width, horz_even); - // bands[2].push_line(); - // bands[3].push_line(); - // irrev_vert_wvlt_K(lines, lines + 5, - // true, width); - // irrev_horz_wvlt_fwd_tx(lines + 5, child_res->get_line(), - // bands[1].get_line(), width, horz_even); - // bands[1].push_line(); - // child_res->push_line(); - // } - // else - // { - // irrev_vert_wvlt_step(lines + 1, lines + 1, - // lines, 0, width); - // irrev_vert_wvlt_step(lines, - // cur_line > 1 ? lines + 2 : lines, - // lines + 1, 1, width); - // irrev_vert_wvlt_step(lines + 1, - // cur_line > 2 ? lines + 3 : lines + 1, - // lines + 2, 2, width); - // irrev_vert_wvlt_step(lines + 2, - // cur_line > 3 ? lines + 4 : lines + 2, - // lines + 3, 3, width); - - // irrev_vert_wvlt_step(lines + 1, lines + 1, - // lines, 2, width); - // irrev_vert_wvlt_step(lines, - // cur_line > 1 ? lines + 2 : lines, - // lines + 1, 3, width); - - // //push lines[3] L, lines[2] H, lines[1] L, and lines[0] H - // if (cur_line >= 3) - // { - // irrev_vert_wvlt_K(lines + 3, lines + 5, - // true, width); - // irrev_horz_wvlt_fwd_tx(lines + 5, - // child_res->get_line(), bands[1].get_line(), - // width, horz_even); - // bands[1].push_line(); - // child_res->push_line(); - // } - // if (cur_line >= 2) - // irrev_vert_wvlt_K(lines + 2, lines + 5, false, width); - // else - // irrev_vert_wvlt_K(lines, lines + 5, false, width); - // irrev_horz_wvlt_fwd_tx(lines + 5, bands[2].get_line(), - // bands[3].get_line(), width, horz_even); - // bands[2].push_line(); - // bands[3].push_line(); - // irrev_vert_wvlt_K(lines + 1, lines + 5, - // true, width); - // irrev_horz_wvlt_fwd_tx(lines + 5, child_res->get_line(), - // bands[1].get_line(), width, horz_even); - // bands[1].push_line(); - // child_res->push_line(); - // irrev_vert_wvlt_K(lines, lines + 5, - // false, width); - // irrev_horz_wvlt_fwd_tx(lines + 5, bands[2].get_line(), - // bands[3].get_line(), width, horz_even); - // bands[2].push_line(); - // bands[3].push_line(); - // } - // } - // else - // { //only one line - // if (vert_even) - // { - // //push to L - // irrev_horz_wvlt_fwd_tx(lines, child_res->get_line(), - // bands[1].get_line(), width, horz_even); - // bands[1].push_line(); - // child_res->push_line(); - // } - // else - // { - // float* sp = lines[0].f32; - // for (ui32 i = width; i > 0; --i) - // *sp++ *= 2.0f; - // //push to H - // irrev_horz_wvlt_fwd_tx(lines, bands[2].get_line(), - // bands[3].get_line(), width, horz_even); - // bands[2].push_line(); - // bands[3].push_line(); - // } - // } - // } - - // rotate_buffers(lines, lines + 1, lines + 2, lines + 3, lines + 4, - // lines + 5); - - // ++cur_line; - // vert_even = !vert_even; - //} + ui32 width = res_rect.siz.w; + if (width == 0) + return; + if (reversible) + { + if (res_rect.siz.h > 1) + { + if (!vert_even && cur_line < res_rect.siz.h) { + vert_even = !vert_even; + return; + } + + bool finished; + do + { + //vertical transform + for (ui32 i = 0; i < num_steps; ++i) + { + if (aug->active && (sig->active || ssp[i].active)) + { + line_buf* dp = aug->line; + line_buf* sp1 = sig->active ? sig->line : ssp[i].line; + line_buf* sp2 = ssp[i].active ? ssp[i].line : sig->line; + const lifting_step* s = atk->get_step(i); + rev_vert_ana_step(s, sp1, sp2, dp, width); + } + lifting_buf t = *aug; *aug = ssp[i]; ssp[i] = *sig; *sig = t; + } + + finished = true; + if (aug->active) { + rev_horz_ana(atk, bands[2].get_line(), + bands[3].get_line(), aug->line, width, horz_even); + bands[2].push_line(); + bands[3].push_line(); + aug->active = false; + finished = false; + } + if (sig->active) { + rev_horz_ana(atk, child_res->get_line(), + bands[1].get_line(), sig->line, width, horz_even); + bands[1].push_line(); + child_res->push_line(); + sig->active = false; + finished = false; + }; + vert_even = !vert_even; + } while (cur_line >= res_rect.siz.h && !finished); + } + else + { + if (vert_even) { + rev_horz_ana(atk, child_res->get_line(), + bands[1].get_line(), sig->line, width, horz_even); + bands[1].push_line(); + child_res->push_line(); + } + else + { + si32* sp = aug->line->i32; + for (ui32 i = width; i > 0; --i) + *sp++ <<= 1; + rev_horz_ana(atk, bands[2].get_line(), + bands[3].get_line(), aug->line, width, horz_even); + bands[2].push_line(); + bands[3].push_line(); + } + } + } + else + { + if (res_rect.siz.h > 1) + { + if (!vert_even && cur_line < res_rect.siz.h) { + vert_even = !vert_even; + return; + } + + bool finished; + do + { + //vertical transform + for (ui32 i = 0; i < num_steps; ++i) + { + if (aug->active && (sig->active || ssp[i].active)) + { + line_buf* dp = aug->line; + line_buf* sp1 = sig->active ? sig->line : ssp[i].line; + line_buf* sp2 = ssp[i].active ? ssp[i].line : sig->line; + const lifting_step* s = atk->get_step(i); + irv_vert_ana_step(s, sp1, sp2, dp, width); + } + lifting_buf t = *aug; *aug = ssp[i]; ssp[i] = *sig; *sig = t; + } + + finished = true; + if (aug->active) { + const float K = atk->get_K(); + irv_vert_times_K(K, aug->line, width); + + irv_horz_ana(atk, bands[2].get_line(), + bands[3].get_line(), aug->line, width, horz_even); + bands[2].push_line(); + bands[3].push_line(); + aug->active = false; + finished = false; + } + if (sig->active) { + const float K_inv = 1.0f / atk->get_K(); + irv_vert_times_K(K_inv, sig->line, width); + + irv_horz_ana(atk, child_res->get_line(), + bands[1].get_line(), sig->line, width, horz_even); + bands[1].push_line(); + child_res->push_line(); + sig->active = false; + finished = false; + }; + vert_even = !vert_even; + } while (cur_line >= res_rect.siz.h && !finished); + } + else + { + if (vert_even) { + irv_horz_ana(atk, child_res->get_line(), + bands[1].get_line(), sig->line, width, horz_even); + bands[1].push_line(); + child_res->push_line(); + } + else + { + float* sp = aug->line->f32; + for (ui32 i = width; i > 0; --i) + *sp++ *= 2.0f; + irv_horz_ana(atk, bands[2].get_line(), + bands[3].get_line(), aug->line, width, horz_even); + bands[2].push_line(); + bands[3].push_line(); + } + } + } } ////////////////////////////////////////////////////////////////////////// @@ -730,8 +628,7 @@ namespace ojph { //vertical transform for (ui32 i = 0; i < num_steps; ++i) { - if (aug->active && - (sig->active == true || ssp[i].active == true)) + if (aug->active && (sig->active || ssp[i].active)) { line_buf* dp = aug->line; line_buf* sp1 = sig->active ? sig->line : ssp[i].line; @@ -790,7 +687,7 @@ namespace ojph { ++cur_line; const float K = atk->get_K(); - irv_vert_syn_K(K, aug->line, width); + irv_vert_times_K(K, aug->line, width); continue; } @@ -803,15 +700,14 @@ namespace ojph { ++cur_line; const float K_inv = 1.0f / atk->get_K(); - irv_vert_syn_K(K_inv, sig->line, width); + irv_vert_times_K(K_inv, sig->line, width); } } //vertical transform for (ui32 i = 0; i < num_steps; ++i) { - if (aug->active && - (sig->active == true || ssp[i].active == true)) + if (aug->active && (sig->active || ssp[i].active)) { line_buf* dp = aug->line; line_buf* sp1 = sig->active ? sig->line : ssp[i].line; diff --git a/src/core/codestream/ojph_resolution.h b/src/core/codestream/ojph_resolution.h index 7a7d43d5..36ae5d00 100644 --- a/src/core/codestream/ojph_resolution.h +++ b/src/core/codestream/ojph_resolution.h @@ -72,7 +72,7 @@ namespace ojph { tile_comp *parent_tile_comp, resolution *parent_res); - line_buf* get_line() { return ssp[0].line; } + line_buf* get_line(); void push_line(); line_buf* pull_line(); rect get_rect() { return res_rect; } diff --git a/src/core/transform/ojph_transform.cpp b/src/core/transform/ojph_transform.cpp index 46231d63..4f7f8cc1 100644 --- a/src/core/transform/ojph_transform.cpp +++ b/src/core/transform/ojph_transform.cpp @@ -87,15 +87,25 @@ namespace ojph { + ///////////////////////////////////////////////////////////////////////// + void (*rev_vert_ana_step) + (const lifting_step* s, const line_buf* sig, const line_buf* other, + const line_buf* aug, ui32 repeat) = NULL; + + ///////////////////////////////////////////////////////////////////////// + void (*rev_horz_ana) + (const param_atk* atk, const line_buf* ldst, const line_buf* hdst, + const line_buf* src, ui32 width, bool even) = NULL; + ///////////////////////////////////////////////////////////////////////// void (*rev_vert_syn_step) - (const lifting_step* s, line_buf* aug, const line_buf* sig, - line_buf* other, ui32 repeat) = NULL; + (const lifting_step* s, const line_buf* aug, const line_buf* sig, + const line_buf* other, ui32 repeat) = NULL; ///////////////////////////////////////////////////////////////////////// void (*rev_horz_syn) - (const param_atk* atk, line_buf* dst, line_buf* lsrc, - line_buf* hsrc, ui32 width, bool even) = NULL; + (const param_atk* atk, const line_buf* dst, const line_buf* lsrc, + const line_buf* hsrc, ui32 width, bool even) = NULL; @@ -130,17 +140,28 @@ namespace ojph { ///////////////////////////////////////////////////////////////////////// - void (*irv_vert_syn_step) - (const lifting_step* s, line_buf* aug, const line_buf* sig, - line_buf* other, ui32 repeat) = NULL; + void (*irv_vert_ana_step) + (const lifting_step* s, const line_buf* sig, const line_buf* other, + const line_buf* aug, ui32 repeat) = NULL; + + ///////////////////////////////////////////////////////////////////////// + void (*irv_horz_ana) + (const param_atk* atk, const line_buf* ldst, const line_buf* hdst, + const line_buf* src, ui32 width, bool even) = NULL; ///////////////////////////////////////////////////////////////////////// - void (*irv_vert_syn_K)(const float K, line_buf* aug, ui32 repeat) = NULL; + void (*irv_vert_syn_step) + (const lifting_step* s, const line_buf* aug, const line_buf* sig, + const line_buf* other, ui32 repeat) = NULL; ///////////////////////////////////////////////////////////////////////// void (*irv_horz_syn) - (const param_atk* atk, line_buf* dst, line_buf* lsrc, - line_buf* hsrc, ui32 width, bool even) = NULL; + (const param_atk* atk, const line_buf* dst, const line_buf* lsrc, + const line_buf* hsrc, ui32 width, bool even) = NULL; + + ///////////////////////////////////////////////////////////////////////// + void (*irv_vert_times_K) + (float K, const line_buf* aug, ui32 repeat) = NULL; @@ -164,6 +185,8 @@ namespace ojph { rev_vert_wvlt_bwd_update = gen_rev_vert_wvlt_bwd_update; rev_horz_wvlt_bwd_tx = gen_rev_horz_wvlt_bwd_tx; + rev_vert_ana_step = gen_rev_vert_ana_step; + rev_horz_ana = gen_rev_horz_ana; rev_vert_syn_step = gen_rev_vert_syn_step; rev_horz_syn = gen_rev_horz_syn; @@ -172,9 +195,11 @@ namespace ojph { irrev_horz_wvlt_fwd_tx = gen_irrev_horz_wvlt_fwd_tx; irrev_horz_wvlt_bwd_tx = gen_irrev_horz_wvlt_bwd_tx; + irv_vert_ana_step = gen_irv_vert_ana_step; + irv_horz_ana = gen_irv_horz_ana; irv_vert_syn_step = gen_irv_vert_syn_step; - irv_vert_syn_K = gen_irv_vert_syn_K; irv_horz_syn = gen_irv_horz_syn; + irv_vert_times_K = gen_irv_vert_times_K; #ifndef OJPH_DISABLE_INTEL_SIMD int level = get_cpu_ext_level(); @@ -378,9 +403,92 @@ namespace ojph { + ///////////////////////////////////////////////////////////////////////// + void gen_rev_vert_ana_step(const lifting_step* s, const line_buf* sig, + const line_buf* other, const line_buf* aug, + ui32 repeat) + { + si32 a = s->rev.Aatk; + si32 b = s->rev.Batk; + ui32 e = s->rev.Eatk; + + si32* dst = aug->i32; + const si32* src1 = sig->i32, * src2 = other->i32; + if (a >= 0) + for (ui32 i = repeat; i > 0; --i) + *dst++ += (b + a * (*src1++ + *src2++)) >> e; + else + for (ui32 i = repeat; i > 0; --i) + *dst++ -= (b - a * (*src1++ + *src2++)) >> e; + } + + ///////////////////////////////////////////////////////////////////////// + void gen_rev_horz_ana(const param_atk* atk, const line_buf* ldst, + const line_buf* hdst, const line_buf* src, + ui32 width, bool even) + { + if (width > 1) + { + // combine both lsrc and hsrc into dst + si32* dph = hdst->i32; + si32* dpl = ldst->i32; + si32* sp = src->i32; + ui32 w = width; + if (!even) + { + *dph++ = *sp++; --w; + } + for (; w > 1; w -= 2) + { + *dpl++ = *sp++; *dph++ = *sp++; + } + if (w) + { + *dpl++ = *sp++; --w; + } + + si32* hp = hdst->i32, * lp = ldst->i32; + ui32 l_width = (width + (even ? 1 : 0)) >> 1; // low pass + ui32 h_width = (width + (even ? 0 : 1)) >> 1; // high pass + ui32 num_steps = atk->get_num_steps(); + for (ui32 j = 0; j < num_steps; ++j) + { + // first lifting step + const lifting_step* s = atk->get_step(j); + si32 a = s->rev.Aatk; + si32 b = s->rev.Batk; + ui32 e = s->rev.Eatk; + + // extension + lp[-1] = lp[0]; + lp[l_width] = lp[l_width - 1]; + // lifting step + const si32* sp = lp + (even ? 1 : 0); + si32* dp = hp; + if (a >= 0) + for (ui32 i = h_width; i > 0; --i, sp++, dp++) + *dp += (b + a * (sp[-1] + sp[0])) >> e; + else + for (ui32 i = h_width; i > 0; --i, sp++, dp++) + *dp -= (b - a * (sp[-1] + sp[0])) >> e; + + // swap buffers + si32* t = lp; lp = hp; hp = t; + even = !even; + ui32 w = l_width; l_width = h_width; h_width = w; + } + } + else { + if (even) + ldst->i32[0] = src->i32[0]; + else + hdst->i32[0] = src->i32[0] << 1; + } + } + ////////////////////////////////////////////////////////////////////////// - void gen_rev_vert_syn_step(const lifting_step* s, line_buf* aug, - const line_buf* sig, line_buf* other, + void gen_rev_vert_syn_step(const lifting_step* s, const line_buf* aug, + const line_buf* sig, const line_buf* other, ui32 repeat) { si32 a = s->rev.Aatk; @@ -398,8 +506,9 @@ namespace ojph { } ////////////////////////////////////////////////////////////////////////// - void gen_rev_horz_syn(const param_atk *atk, line_buf* dst, line_buf *lsrc, - line_buf *hsrc, ui32 width, bool even) + void gen_rev_horz_syn(const param_atk* atk, const line_buf* dst, + const line_buf* lsrc, const line_buf* hsrc, + ui32 width, bool even) { if (width > 1) { @@ -643,8 +752,8 @@ namespace ojph { ////////////////////////////////////////////////////////////////////////// - void gen_irv_vert_syn_step(const lifting_step* s, line_buf* aug, - const line_buf* sig, line_buf* other, + void gen_irv_vert_ana_step(const lifting_step* s, const line_buf* aug, + const line_buf* sig, const line_buf* other, ui32 repeat) { float a = s->irv.Aatk; @@ -652,20 +761,100 @@ namespace ojph { float* dst = aug->f32; const float* src1 = sig->f32, * src2 = other->f32; for (ui32 i = repeat; i > 0; --i) - *dst++ -= a * (*src1++ + *src2++); + *dst++ += a * (*src1++ + *src2++); } + + ///////////////////////////////////////////////////////////////////////// + void gen_irv_horz_ana(const param_atk* atk, const line_buf* ldst, + const line_buf* hdst, const line_buf* src, + ui32 width, bool even) + { + if (width > 1) + { + // split src into ldst and hdst + float* dph = hdst->f32; + float* dpl = ldst->f32; + float* sp = src->f32; + ui32 w = width; + if (!even) + { + *dph++ = *sp++; --w; + } + for (; w > 1; w -= 2) + { + *dpl++ = *sp++; *dph++ = *sp++; + } + if (w) + { + *dpl++ = *sp++; --w; + } + + float* hp = hdst->f32, * lp = ldst->f32; + ui32 l_width = (width + (even ? 1 : 0)) >> 1; // low pass + ui32 h_width = (width + (even ? 0 : 1)) >> 1; // high pass + ui32 num_steps = atk->get_num_steps(); + for (ui32 j = 0; j < num_steps; ++j) + { + // first lifting step + const lifting_step* s = atk->get_step(j); + float a = s->irv.Aatk; + + // extension + lp[-1] = lp[0]; + lp[l_width] = lp[l_width - 1]; + // lifting step + const float* sp = lp + (even ? 1 : 0); + float* dp = hp; + for (ui32 i = h_width; i > 0; --i, sp++, dp++) + *dp += a * (sp[-1] + sp[0]); + + // swap buffers + float* t = lp; lp = hp; hp = t; + even = !even; + ui32 w = l_width; l_width = h_width; h_width = w; + } + + { + float K = atk->get_K(); + float K_inv = 1.0f / K; + float* dp; + + dp = lp; + for (ui32 i = l_width; i > 0; --i) + *dp++ *= K_inv; + dp = hp; + for (ui32 i = h_width; i > 0; --i) + *dp++ *= K; + } + } + else { + if (even) + ldst->f32[0] = src->f32[0]; + else + hdst->f32[0] = src->f32[0] * 2.0f; + } + + + } + ////////////////////////////////////////////////////////////////////////// - void gen_irv_vert_syn_K(const float K, line_buf* aug, ui32 repeat) + void gen_irv_vert_syn_step(const lifting_step* s, const line_buf* aug, + const line_buf* sig, const line_buf* other, + ui32 repeat) { + float a = s->irv.Aatk; + float* dst = aug->f32; + const float* src1 = sig->f32, * src2 = other->f32; for (ui32 i = repeat; i > 0; --i) - *dst++ *= K; + *dst++ -= a * (*src1++ + *src2++); } ////////////////////////////////////////////////////////////////////////// - void gen_irv_horz_syn(const param_atk* atk, line_buf* dst, line_buf* lsrc, - line_buf* hsrc, ui32 width, bool even) + void gen_irv_horz_syn(const param_atk* atk, const line_buf* dst, + const line_buf* lsrc, const line_buf* hsrc, + ui32 width, bool even) { if (width > 1) { @@ -691,7 +880,6 @@ namespace ojph { ui32 num_steps = atk->get_num_steps(); for (ui32 j = num_steps; j > 0; --j) { - // first lifting step const lifting_step* s = atk->get_step(j - 1); float a = s->irv.Aatk; @@ -730,7 +918,13 @@ namespace ojph { } } - + ////////////////////////////////////////////////////////////////////////// + void gen_irv_vert_times_K(float K, const line_buf* aug, ui32 repeat) + { + float* dst = aug->f32; + for (ui32 i = repeat; i > 0; --i) + *dst++ *= K; + } diff --git a/src/core/transform/ojph_transform.h b/src/core/transform/ojph_transform.h index 77ede96f..b31df0ef 100644 --- a/src/core/transform/ojph_transform.h +++ b/src/core/transform/ojph_transform.h @@ -85,16 +85,25 @@ namespace ojph { + ///////////////////////////////////////////////////////////////////////// + extern void (*rev_vert_ana_step) + (const lifting_step* s, const line_buf* sig, const line_buf* other, + const line_buf* aug, ui32 repeat); + + ///////////////////////////////////////////////////////////////////////// + extern void (*rev_horz_ana) + (const param_atk* atk, const line_buf* ldst, const line_buf* hdst, + const line_buf* src, ui32 width, bool even); ///////////////////////////////////////////////////////////////////////// extern void (*rev_vert_syn_step) - (const lifting_step* s, line_buf* aug, const line_buf* sig, - line_buf* other, ui32 repeat); + (const lifting_step* s, const line_buf* aug, const line_buf* sig, + const line_buf* other, ui32 repeat); ///////////////////////////////////////////////////////////////////////// extern void (*rev_horz_syn) - (const param_atk* atk, line_buf* dst, line_buf* lsrc, - line_buf* hsrc, ui32 width, bool even); + (const param_atk* atk, const line_buf* dst, const line_buf* lsrc, + const line_buf* hsrc, ui32 width, bool even); @@ -126,20 +135,30 @@ namespace ojph { + ///////////////////////////////////////////////////////////////////////// - extern void (*irv_vert_syn_step) - (const lifting_step* s, line_buf* aug, const line_buf* sig, - line_buf* other, ui32 repeat); + extern void (*irv_vert_ana_step) + (const lifting_step* s, const line_buf* sig, const line_buf* other, + const line_buf* aug, ui32 repeat); ///////////////////////////////////////////////////////////////////////// - extern void (*irv_vert_syn_K) - (const float K, line_buf* aug, ui32 repeat); + extern void (*irv_horz_ana) + (const param_atk* atk, const line_buf* ldst, const line_buf* hdst, + const line_buf* src, ui32 width, bool even); + + ///////////////////////////////////////////////////////////////////////// + extern void (*irv_vert_syn_step) + (const lifting_step* s, const line_buf* aug, const line_buf* sig, + const line_buf* other, ui32 repeat); ///////////////////////////////////////////////////////////////////////// extern void (*irv_horz_syn) - (const param_atk* atk, line_buf* dst, line_buf* lsrc, - line_buf* hsrc, ui32 width, bool even); + (const param_atk* atk, const line_buf* dst, const line_buf* lsrc, + const line_buf* hsrc, ui32 width, bool even); + ///////////////////////////////////////////////////////////////////////// + extern void (*irv_vert_times_K) + (float K, const line_buf* aug, ui32 repeat); diff --git a/src/core/transform/ojph_transform_local.h b/src/core/transform/ojph_transform_local.h index 42cec378..c484d279 100644 --- a/src/core/transform/ojph_transform_local.h +++ b/src/core/transform/ojph_transform_local.h @@ -99,13 +99,24 @@ namespace ojph { ///////////////////////////////////////////////////////////////////////// - void gen_rev_vert_syn_step(const lifting_step* s, line_buf* aug, - const line_buf* sig, line_buf* other, + void gen_rev_vert_ana_step(const lifting_step* s, const line_buf* sig, + const line_buf* other, const line_buf* aug, ui32 repeat); ///////////////////////////////////////////////////////////////////////// - void gen_rev_horz_syn(const param_atk *atk, line_buf* dst, line_buf *lsrc, - line_buf *hsrc, ui32 width, bool even); + void gen_rev_horz_ana(const param_atk* atk, const line_buf* ldst, + const line_buf* hdst, const line_buf* src, + ui32 width, bool even); + + ///////////////////////////////////////////////////////////////////////// + void gen_rev_vert_syn_step(const lifting_step* s, const line_buf* aug, + const line_buf* sig, const line_buf* other, + ui32 repeat); + + ///////////////////////////////////////////////////////////////////////// + void gen_rev_horz_syn(const param_atk* atk, const line_buf* dst, + const line_buf* lsrc, const line_buf* hsrc, + ui32 width, bool even); @@ -134,18 +145,28 @@ namespace ojph { + ///////////////////////////////////////////////////////////////////////// + void gen_irv_vert_ana_step(const lifting_step* s, const line_buf* sig, + const line_buf* other, const line_buf* aug, + ui32 repeat); + + ///////////////////////////////////////////////////////////////////////// + void gen_irv_horz_ana(const param_atk* atk, const line_buf* ldst, + const line_buf* hdst, const line_buf* src, + ui32 width, bool even); ///////////////////////////////////////////////////////////////////////// - void gen_irv_vert_syn_step(const lifting_step* s, line_buf* aug, - const line_buf* sig, line_buf* other, + void gen_irv_vert_syn_step(const lifting_step* s, const line_buf* aug, + const line_buf* sig, const line_buf* other, ui32 repeat); ///////////////////////////////////////////////////////////////////////// - void gen_irv_vert_syn_K(const float K, line_buf* aug, ui32 repeat); + void gen_irv_horz_syn(const param_atk *atk, const line_buf* dst, + const line_buf *lsrc, const line_buf *hsrc, + ui32 width, bool even); ///////////////////////////////////////////////////////////////////////// - void gen_irv_horz_syn(const param_atk *atk, line_buf* dst, line_buf *lsrc, - line_buf *hsrc, ui32 width, bool even); + void gen_irv_vert_times_K(float K, const line_buf* aug, ui32 repeat); From df12e7fbadce7476c27580101f618400727503e2 Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Thu, 4 Apr 2024 10:00:23 +1100 Subject: [PATCH 047/348] README.md update --- README.md | 134 ++------------------- docs/compiling.md | 118 ++++++++++++++++++ docs/docker.md | 16 +++ docs/{DoxygenStyle.md => doxygen_style.md} | 0 docs/status.md | 9 ++ docs/usage_examples.md | 19 +++ docs/web_demos.md | 5 + 7 files changed, 177 insertions(+), 124 deletions(-) create mode 100644 docs/compiling.md create mode 100644 docs/docker.md rename docs/{DoxygenStyle.md => doxygen_style.md} (100%) create mode 100644 docs/status.md create mode 100644 docs/usage_examples.md create mode 100644 docs/web_demos.md diff --git a/README.md b/README.md index b07f568c..da4d2d61 100644 --- a/README.md +++ b/README.md @@ -4,131 +4,17 @@ Open source implementation of High-throughput JPEG2000 (HTJ2K), also known as JPH, JPEG2000 Part 15, ISO/IEC 15444-15, and ITU-T T.814. Here, we are interested in implementing the HTJ2K only, supporting features that are defined in JPEG2000 Part 1 (for example, for wavelet transform, only reversible 5/3 and irreversible 9/7 are supported). -The interested reader is referred to the [short HTJ2K white paper](http://ds.jpeg.org/whitepapers/jpeg-htj2k-whitepaper.pdf), or the [extended HTJ2K white paper](https://htj2k.com/wp-content/uploads/white-paper.pdf) for more details on HTJ2K. [This](https://kakadusoftware.com/wp-content/uploads/2019/09/icip2019.pdf) paper explores the attainable performance on CPU, and [this](https://kakadusoftware.com/wp-content/uploads/2019/09/ICIP2019_GPU.pdf) paper for decoding on a GPU. - -# Status # - -The code is written in C++; the color and wavelet transform steps can employ SIMD instructions on Intel platforms. It conceivable that at some point in the future, SIMD instructions are employed to improve performance of the block (de)coder, and/or for platforms other than Intel. As it stands, on Intel Skylake i7-6700, encoding 4K 4:4:4 HDR images losslessly takes around 0.5s, and decoding takes around 0.34s; for lossy compression, performance depends on the quantisation step size (qstep), but for a high-quality image at a bitrate of around 3 bits/pixel, encoding takes around 0.27s and decoding takes 0.22s. - -As it stands, the OpenJPH library needs documentation. The provided encoder ojph\_compress only generates HTJ2K codestreams, with the extension j2c; the generated files lack the .jph header. Adding the .jph header is of little urgency, as the codestream contains all needed information to properly decode an image. The .jph header will be added at a future point in time. The provided decoder ojph\_expand decodes .jph files, by ignoring the .jph header if it is present. - -The provided command line tools ojph\_compress and ojph\_expand accepts and generated .pgm, .ppm, and .yuv. See the usage examples below. - -# Web-based Demos # - -The associate site [openjph.org](https://openjph.org) serves as a blog. It currently host the [javascript](https://openjph.org/javascript/demo.html) demo of the decoder; the webpage demonstrates that the library can be compiled to javascript, and can run inside a web-browser. Any browser supporting webassembly can be used to view this webpage; examples include Firefox, Chrome, Safari, and Edge, on a desktop, mobile, or tablet. - -Another project of interest is the [openjphjs](https://github.com/chafey/openjphjs) project, developed by [Chris](https://github.com/chafey). You can see [there](https://chafey.github.io/openjphjs/test/browser/index.html) a nice online demonstration of javascript-based HTJ2K encoding/decoding, with a wealth of features and user-selectable options. - -# Compiling # - -The code employs the *cmake* tool to generate a variety of build environments. A visual studio code container is included for building using -the visual studio code remote containers add in (highly recommended) - -## For Linux ## - - cd build - cmake -DCMAKE_BUILD_TYPE=Release ../ - make - -The generated library and executables will be in the bin folder. - -## For Windows ## - - cd build - cmake ../ -G "Visual Studio 14 2015 Win64" - -cmake support other visual studio versions. This command generates a solution in the build folder, which can be build using visual studio. - -## For macOS ## - -You can use the "For Linux" approach above. Alternatively, you can use the Xcode project in src/apps/apps.xcodeproj, which I use. Another approach is to use cmake to generate an xcode project, in the build folder, using - - cd build - cmake ../ -G Xcode - make - -The generated library and executables will be in the bin folder. - -# Compiling to javascript/wasm # - -The library can now be compiled to javascript/wasm. For this purpose, a small wrapper file (ojph_wrapper.cpp) has been written to interface between javascript and C++; the wrapper currently supports decoding only. A small demo page demonstrating the script can be accessed [here](https://openjph.org/javascript/demo.html). - -Compilation needs the [emscripten](https://emscripten.org/) tools. One way of using these tools is to install them on your machine, and activate them using - - source emsdk_env.sh - -before compilation. Alternatively, if you are a docker user, the you can launch a docker session using script provided at ```subprojects/js/emscripten-docker.sh```; this script will download a third-party docker image that has the emscripten tools integrated in it -- Thanks to [Chris](https://github.com/chafey) for the suggesting and providing these tools. - -The javascript decoder can be compiled using - - cd subprojects/js/build - emcmake cmake .. - emmake make - -The compilation creates libopenjph.js and libopenjph.wasm in subprojects/js/html folder. That html folder also has the demo webpage index.html and a compressed image test.j2c which the script in index.html decodes. To run the demo webpage on your machine, you need a webserver running on the machine -- Due to security reasons, javascript engines running in a browser cannot access local files on the machine. You can use the ```emrun``` command, provided with the emscripten -tools, by issuing the command - - emrun index.html - -from inside the html folder; the default port is 6931. -Alternatively, a simple python webserver can be run using - - python -m http.server 8000 - -also from inside the html folder. Here, 8000 is the port number at which the webserver will be listening. The webpage can then be accessed by open localhost:8000 in you browser. Any browser supporting webassembly can be used to view this webpage; examples include Firefox, Chrome, Safari, and Edge, on a desktop, mobile, or tablet. - -# Visual Studio Code Remote Containers # - -Visual Studio Code Remote Containers are now available with OpenJPH. These scripts/configuration files are provided by [Chris](https://github.com/chafey) -- Thank you Chris, and I must say I am not familiar with them. -The scripts, in the ```.devcontainer``` folder, will build a docker image that can be used with visual studio code as a development environment. - -# Compiling for ARM and other platforms # - -To compile for platforms where x86_64 SIMD instructions are not supported, such as on ARM, we need to disable SIMD instructions; this can be achieved using - - cd build - cmake -DCMAKE_BUILD_TYPE=Release -DOJPH_DISABLE_INTEL_SIMD=ON ../ - make - -As I do not have an ARM board, I tested this using QEMU for aarch64 architecture, targeting a Cortex-A57 CPU. The code worked without issues, but because the ARM platform is emulated, the whole process was slow. - -# Compiling and Running in Docker # - -## Step 1 - clone repository -`https://github.com/aous72/OpenJPH.git` - -## Step 2 - build docker image -`cd OpenJPH` -`docker build --rm -f Dockerfile -t openjph:latest .` - -## Step 3 - run docker image - -### in isolated container -`docker run -it --rm openjph:latest` - -### mapping /usr/src/openjph/build directory in the container to local windows c:\temp -`docker run -it --rm -v C:\\temp:/usr/src/openjph/build openjph:latest` - -# Usage Example # - -Here are some usage examples: - - ojph_compress -i input_file.ppm -o output_file.j2c -num_decomps 5 -block_size {64,64} -precincts {128,128},{256,256} -prog_order CPRL -colour_trans true -qstep 0.05 - ojph_compress -i input_file.yuv -o output_file.j2c -num_decomps 5 -reversible true -dims {3840,2160} -num_comps 3 -signed false -bit_depth 10 -downsamp {1,1},{2,2} - - ojph_expand -i input_file.j2c -o output_file.ppm - ojph_expand -i input_file.j2c -o output_file.yuv - -**Notes**: - -* Issuing ojph\_compress or ojph\_expand without arguments prints a short usage statement. -* In reversible compression, quantization is not supported. -* On Linux and MacOS, but NOT Windows, { and } need to be escaped; i.e, we need to write \\\{ and \\\}. So, -block\_size {64,64} must be written as -block\_size \\\{64,64\\\}. -* When the source is a .yuv file, use -downsamp {1,1} for 4:4:4 sources. For 4:2:2 downsampling, specify -downsamp {1,1},{2,1}, and for 4:2:0 subsampling specify -downsamp {1,1},{2,2}. The source must have already been downsampled (i.e., OpenJPH does not downsample the source before compression, but can compress downsampled sources). -* In Kakadu, pairs of data in command line arguments represent columns,rows. Here, a pair represents x,y information. +The interested reader is referred to the [short HTJ2K white paper](http://ds.jpeg.org/whitepapers/jpeg-htj2k-whitepaper.pdf), or the [extended HTJ2K white paper](https://htj2k.com/wp-content/uploads/white-paper.pdf) for more details on HTJ2K. [This](https://kakadusoftware.com/wp-content/uploads/icip2019.pdf) paper explores the attainable performance on CPU, while [this](https://kakadusoftware.com/wp-content/uploads/ICIP2019_GPU.pdf) and [this](https://webapps.unsworks.library.unsw.edu.au/fapi/datastream/unsworks:75139/bin990339e4-8805-4456-ae30-223d85f9b1c1) explores performance on the GPU. # The standard # -The standard is available free of charge from [ITU website](https://www.itu.int/rec/T-REC-T.814/en). It can also be purchased from the [ISO website](https://www.iso.org/standard/76621.html). +The standard is available free of charge from [ITU website](https://www.itu.int/rec/T-REC-T.814/en). It can also be purchased from the [ISO website](https://www.iso.org/standard/76621.html). + +# Tabke of Contents # +* [Status](/docs/status.md) +* [Compiling](./docs/compiling.md) +* [Compiling and Running in Docker](./docs/docker.md) +* [Usage Example](./docs/usage_examples.md) +* [Web-based Demos](./docs/web_demos.md) +* [Doxygen Documentation Style](./docs/doxygen_style.md) \ No newline at end of file diff --git a/docs/compiling.md b/docs/compiling.md new file mode 100644 index 00000000..d4058aaf --- /dev/null +++ b/docs/compiling.md @@ -0,0 +1,118 @@ +# Compiling # + +The code employs the *cmake* tool to generate a variety of build environments. A visual studio code container is included for building using +the visual studio code remote containers add in (highly recommended) + +## For Linux ## + +You may need to install libtiff; then, + + cd build + cmake -DCMAKE_BUILD_TYPE=Release ../ + make + sudo make install + +## For Windows ## + +Compilation depends on libtiff. A pre-compiled library with all the library features for Windows is not available; I am using [this](https://github.com/aous72/OpenJPH/files/14060335/tiff.zip), but I think I have only the basic library. + + cd build + cmake .. -G "Visual Studio 17 2022 Win64" -DCMAKE_PREFIX_PATH= + +`cmake` supports other visual studio versions. This command generates a solution in the build folder, which can be build using visual studio. + +To compile from the command line, use + + cmake --build . --config Release + +To install either use + + cmake --install . --prefix + +to install the library to your desired folder, or, if you want to install to C:\Program Files, you need a PwoerShell/CMD running as administrator, and + + cmake --install . + + +## For macOS ## + +You can use the "For Linux" approach above. Alternatively, you can use the Xcode project in src/apps/apps.xcodeproj, which I use. Another approach is to use cmake to generate an xcode project, in the build folder, using + + cd build + cmake ../ -G Xcode + make + sudo make install + +I have not tested this in a long time, but you get the picture. + +## Building Tests ## + +When you invoke `cmake` add `-DOJPH_BUILD_TESTS=ON`, then, for Windows + + cd tests + ctest -C Release + +For other platforms + + cd tests + ctest + +The test setup is a bit finicky, and may sometimes fail for silly reasons. + +# Compiling to Node.js # + +The library can be compiled to run with Node.js. Compilation needs the [emscripten](https://emscripten.org/) tools. One way of using these tools is to install them on your machine, and activate them using, assuming running on platform other than Windows, + + source emsdk_env.sh + +before compilation. Then, + emcmake cmake .. + emmake make + +Compilation will generate two version of the library and executables, one with WebAssembly SIMD isntructions and one without. + + +# Compiling to javascript/wasm # + +The library can now be compiled to javascript/wasm. For this purpose, a small wrapper file (ojph_wrapper.cpp) has been written to interface between javascript and C++; the wrapper currently supports decoding only. A small demo page demonstrating the script can be accessed [here](https://openjph.org/javascript/demo.html). + +Compilation needs the [emscripten](https://emscripten.org/) tools. One way of using these tools is to install them on your machine, and activate them using + + source emsdk_env.sh + +before compilation. Alternatively, if you are a docker user, the you can launch a docker session using script provided at ```subprojects/js/emscripten-docker.sh```; this script will download a third-party docker image that has the emscripten tools integrated in it -- Thanks to [Chris](https://github.com/chafey) for the suggesting and providing these tools. + +The javascript decoder can be compiled using + + cd subprojects/js/build + emcmake cmake .. + emmake make + +The compilation creates libopenjph.js and libopenjph.wasm in subprojects/js/html folder; it also creates libopenjphsimd.js and libopenjphsimd.wasm. That html folder also has the demo webpage index.html and a compressed image test.j2c which the script in index.html decodes. The index.html detects if the browser supports WebAssembly SIMD instructions, and loads the correct library accordingly. + +To run the demo webpage on your machine, you need a webserver running on the machine -- Due to security reasons, javascript engines running in a browser cannot access local files on the machine. You can use the ```emrun``` command, provided with the emscripten +tools, by issuing the command + + emrun index.html + +from inside the html folder; the default port is 6931. +Alternatively, a simple python webserver can be run using + + python -m http.server 8000 + +also from inside the html folder. Here, 8000 is the port number at which the webserver will be listening. The webpage can then be accessed by open localhost:8000 in you browser. Any browser supporting webassembly can be used to view this webpage; examples include Firefox, Chrome, Safari, and Edge, on a desktop, mobile, or tablet. + +# Visual Studio Code Remote Containers # + +Visual Studio Code Remote Containers are now available with OpenJPH. These scripts/configuration files are provided by [Chris](https://github.com/chafey) -- Thank you Chris, and I must say I am not familiar with them. +The scripts, in the ```.devcontainer``` folder, will build a docker image that can be used with visual studio code as a development environment. + +# Compiling for ARM and other platforms # + +To compile for platforms where x86_64 SIMD instructions are not supported, such as on ARM, we need to disable SIMD instructions; this can be achieved using + + cd build + cmake -DCMAKE_BUILD_TYPE=Release \ -DOJPH_DISABLE_INTEL_SIMD=ON ../ + make + +You may need to install libtiff. As I do not have an ARM board, I tested this using QEMU for aarch64 architecture, targeting a Cortex-A57 CPU. The code worked without issues, but because the ARM platform is emulated, the whole process was slow. \ No newline at end of file diff --git a/docs/docker.md b/docs/docker.md new file mode 100644 index 00000000..7e03bfb1 --- /dev/null +++ b/docs/docker.md @@ -0,0 +1,16 @@ +# Compiling and Running in Docker # + +## Step 1 - clone repository +`https://github.com/aous72/OpenJPH.git` + +## Step 2 - build docker image +`cd OpenJPH` +`docker build --rm -f Dockerfile -t openjph:latest .` + +## Step 3 - run docker image + +### in isolated container +`docker run -it --rm openjph:latest` + +### mapping /usr/src/openjph/build directory in the container to local windows c:\temp +`docker run -it --rm -v C:\\temp:/usr/src/openjph/build openjph:latest` diff --git a/docs/DoxygenStyle.md b/docs/doxygen_style.md similarity index 100% rename from docs/DoxygenStyle.md rename to docs/doxygen_style.md diff --git a/docs/status.md b/docs/status.md new file mode 100644 index 00000000..ee665584 --- /dev/null +++ b/docs/status.md @@ -0,0 +1,9 @@ +# Status # + +The code is written in C++; the color and wavelet transform steps can employ SIMD instructions on Intel platforms. SIMD instuctions are also available for the block decoder (SSE3) and for the block encoder (AVX512). Other parts of the library may include SIMD in the future, for Intel and ARM; existing implementations can also be improved as there is still decent performance improvements on the table. SIMD instructions are also employed for WebAssembly (Emscripten-based), which is now widely supported in most browsers. + +The encoder supports lossless and quantization-based lossy encoding. There is currently no implementation for rate-control-based encoding. + +As it stands, the OpenJPH library needs documentation. The provided encoder ojph\_compress only generates HTJ2K codestreams, with the extension j2c; the generated files lack the .jph header. Adding the .jph header is of little urgency, as the codestream contains all needed information to properly decode an image. The .jph header will be added at a future point in time. The provided decoder ojph\_expand decodes .jph files, by ignoring the .jph header if it is present. + +The provided command line tools ojph\_compress and ojph\_expand accepts and generated .pgm, .ppm, .yuv, .raw, and .dpx. See the usage examples below. \ No newline at end of file diff --git a/docs/usage_examples.md b/docs/usage_examples.md new file mode 100644 index 00000000..fc71bc28 --- /dev/null +++ b/docs/usage_examples.md @@ -0,0 +1,19 @@ +# Usage Example # + +Here are some usage examples: + + ojph_compress -i input_file.ppm -o output_file.j2c -num_decomps 5 -block_size {64,64} -precincts {128,128},{256,256} -prog_order CPRL -colour_trans true -qstep 0.05 + + ojph_compress -i input_file.yuv -o output_file.j2c -num_decomps 5 -reversible true -dims {3840,2160} -num_comps 3 -signed false -bit_depth 10 -downsamp {1,1},{2,2} + + ojph_expand -i input_file.j2c -o output_file.ppm + ojph_expand -i input_file.j2c -o output_file.yuv + +**Notes**: + +* Issuing ojph\_compress or ojph\_expand without arguments prints a short usage statement. +* In reversible compression, quantization is not supported. +* On Linux and MacOS, but NOT Windows, { and } need to be escaped; i.e, we need to write \\\{ and \\\}. So, -block\_size {64,64} must be written as -block\_size \\\{64,64\\\}. +* When the source is a .yuv file, use -downsamp {1,1} for 4:4:4 sources. For 4:2:2 downsampling, specify -downsamp {1,1},{2,1}, and for 4:2:0 subsampling specify -downsamp {1,1},{2,2}. The source must have already been downsampled (i.e., OpenJPH does not downsample the source before compression, but can compress downsampled sources). +* In Kakadu, pairs of data in command line arguments represent columns,rows. Here, a pair represents x,y information. + diff --git a/docs/web_demos.md b/docs/web_demos.md new file mode 100644 index 00000000..f46590b9 --- /dev/null +++ b/docs/web_demos.md @@ -0,0 +1,5 @@ +# Web-based Demos # + +The associate site [openjph.org](https://openjph.org) serves as a blog. It currently host the [javascript](https://openjph.org/javascript/demo.html) demo of the decoder; the webpage demonstrates that the library can be compiled to javascript, and can run inside a web-browser. Any browser supporting webassembly can be used to view this webpage; examples include Firefox, Chrome, Safari, and Edge, on a desktop, mobile, or tablet. + +Another project of interest is the [openjphjs](https://github.com/chafey/openjphjs) project, developed by [Chris](https://github.com/chafey). You can see [there](https://chafey.github.io/openjphjs/test/browser/index.html) a nice online demonstration of javascript-based HTJ2K encoding/decoding, with a wealth of features and user-selectable options. From 12c3bf57624704daf3493e95eac40ae2327c3137 Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Thu, 4 Apr 2024 16:04:52 +1100 Subject: [PATCH 048/348] Fixed 97 analysis. --- src/core/transform/ojph_transform.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/transform/ojph_transform.cpp b/src/core/transform/ojph_transform.cpp index 4f7f8cc1..028ac013 100644 --- a/src/core/transform/ojph_transform.cpp +++ b/src/core/transform/ojph_transform.cpp @@ -752,8 +752,8 @@ namespace ojph { ////////////////////////////////////////////////////////////////////////// - void gen_irv_vert_ana_step(const lifting_step* s, const line_buf* aug, - const line_buf* sig, const line_buf* other, + void gen_irv_vert_ana_step(const lifting_step* s, const line_buf* sig, + const line_buf* other, const line_buf* aug, ui32 repeat) { float a = s->irv.Aatk; From 4dc10b6abb0e0379ef24e5d79a2e3d598a2fe2b3 Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Fri, 5 Apr 2024 06:54:32 +1100 Subject: [PATCH 049/348] A bug fix. Still buggy. --- src/core/codestream/ojph_codeblock.cpp | 12 +++-- src/core/codestream/ojph_resolution.cpp | 23 ++++---- src/core/codestream/ojph_resolution.h | 1 + tests/test_executables.cpp | 70 ++++++++++++++++++++++++- tests/test_helpers/ht_cmdlines.txt | 48 +++++++++-------- 5 files changed, 116 insertions(+), 38 deletions(-) diff --git a/src/core/codestream/ojph_codeblock.cpp b/src/core/codestream/ojph_codeblock.cpp index a95cbef5..25bdc2ae 100644 --- a/src/core/codestream/ojph_codeblock.cpp +++ b/src/core/codestream/ojph_codeblock.cpp @@ -150,12 +150,14 @@ namespace ojph { cb_size.w, cb_size.h, stride, stripe_causal); if (result == false) - { - if (resilient == true) - zero_block = true; - else - OJPH_ERROR(0x000300A1, "Error decoding a codeblock\n"); + { + if (resilient == true) { + OJPH_INFO(0x000300A1, "Error decoding a codeblock\n"); + zero_block = true; } + else + OJPH_ERROR(0x000300A1, "Error decoding a codeblock\n"); + } } else zero_block = true; diff --git a/src/core/codestream/ojph_resolution.cpp b/src/core/codestream/ojph_resolution.cpp index ff148400..c4507707 100644 --- a/src/core/codestream/ojph_resolution.cpp +++ b/src/core/codestream/ojph_resolution.cpp @@ -361,6 +361,7 @@ namespace ojph { aug->line->wrap(allocator->post_alloc_data(width, 1), width, 1); cur_line = 0; + rows_to_produce = res_rect.siz.h; vert_even = (res_rect.org.y & 1) == 0; horz_even = (res_rect.org.x & 1) == 0; } @@ -406,7 +407,6 @@ namespace ojph { return; } - bool finished; do { //vertical transform @@ -423,14 +423,13 @@ namespace ojph { lifting_buf t = *aug; *aug = ssp[i]; ssp[i] = *sig; *sig = t; } - finished = true; if (aug->active) { rev_horz_ana(atk, bands[2].get_line(), bands[3].get_line(), aug->line, width, horz_even); bands[2].push_line(); bands[3].push_line(); aug->active = false; - finished = false; + --rows_to_produce; } if (sig->active) { rev_horz_ana(atk, child_res->get_line(), @@ -438,14 +437,15 @@ namespace ojph { bands[1].push_line(); child_res->push_line(); sig->active = false; - finished = false; + --rows_to_produce; }; vert_even = !vert_even; - } while (cur_line >= res_rect.siz.h && !finished); + } while (cur_line >= res_rect.siz.h && rows_to_produce > 0); } else { if (vert_even) { + // horizontal transform rev_horz_ana(atk, child_res->get_line(), bands[1].get_line(), sig->line, width, horz_even); bands[1].push_line(); @@ -453,9 +453,11 @@ namespace ojph { } else { + // vertical transform si32* sp = aug->line->i32; for (ui32 i = width; i > 0; --i) *sp++ <<= 1; + // horizontal transform rev_horz_ana(atk, bands[2].get_line(), bands[3].get_line(), aug->line, width, horz_even); bands[2].push_line(); @@ -472,7 +474,6 @@ namespace ojph { return; } - bool finished; do { //vertical transform @@ -489,7 +490,6 @@ namespace ojph { lifting_buf t = *aug; *aug = ssp[i]; ssp[i] = *sig; *sig = t; } - finished = true; if (aug->active) { const float K = atk->get_K(); irv_vert_times_K(K, aug->line, width); @@ -499,7 +499,7 @@ namespace ojph { bands[2].push_line(); bands[3].push_line(); aug->active = false; - finished = false; + --rows_to_produce; } if (sig->active) { const float K_inv = 1.0f / atk->get_K(); @@ -510,14 +510,15 @@ namespace ojph { bands[1].push_line(); child_res->push_line(); sig->active = false; - finished = false; + --rows_to_produce; }; vert_even = !vert_even; - } while (cur_line >= res_rect.siz.h && !finished); + } while (cur_line >= res_rect.siz.h && rows_to_produce > 0); } else { if (vert_even) { + // horizontal transform irv_horz_ana(atk, child_res->get_line(), bands[1].get_line(), sig->line, width, horz_even); bands[1].push_line(); @@ -525,9 +526,11 @@ namespace ojph { } else { + // vertical transform float* sp = aug->line->f32; for (ui32 i = width; i > 0; --i) *sp++ *= 2.0f; + // horizontal transform irv_horz_ana(atk, bands[2].get_line(), bands[3].get_line(), aug->line, width, horz_even); bands[2].push_line(); diff --git a/src/core/codestream/ojph_resolution.h b/src/core/codestream/ojph_resolution.h index 36ae5d00..72e0b91a 100644 --- a/src/core/codestream/ojph_resolution.h +++ b/src/core/codestream/ojph_resolution.h @@ -116,6 +116,7 @@ namespace ojph { param_dfs::dfs_dwt_type downsampling_style; //wavelet machinery ui32 cur_line; + ui32 rows_to_produce; bool vert_even, horz_even; mem_elastic_allocator *elastic; }; diff --git a/tests/test_executables.cpp b/tests/test_executables.cpp index 7e6a00cb..4c3a12b9 100644 --- a/tests/test_executables.cpp +++ b/tests/test_executables.cpp @@ -1015,6 +1015,40 @@ TEST(TestExecutables, SimpleEncIrv9732x128) { "Malamute.ppm", "", 3, mse, pae); } +/////////////////////////////////////////////////////////////////////////////// +// Test ojph_compress with codeblocks when the irv97 wavelet is used. +// We test by comparing MSE and PAE of decoded images. +// The compressed file is obtained using these command-line options: +// -o simple_enc_irv97_64x64_tiles_33x33_d5.j2c -qstep 0.01 -tile_size {33,33} +// -num_decomps 5 +TEST(TestExecutables, SimpleEncIrv9764x64Tiles33x33D5) { + double mse[3] = { 46.2004, 43.622, 56.7452}; + int pae[3] = { 48, 46, 52}; + run_ojph_compress("Malamute.ppm", + "simple_enc_irv97_64x64_tiles_33x33_d5", "", "j2c", + "-qstep 0.01 -tile_size \"{33,33}\" -num_decomps 5"); + run_ojph_compress_expand("simple_enc_irv97_64x64_tiles_33x33_d5", "j2c", "ppm"); + run_mse_pae("simple_enc_irv97_64x64_tiles_33x33_d5", "ppm", + "Malamute.ppm", "", 3, mse, pae); +} + +/////////////////////////////////////////////////////////////////////////////// +// Test ojph_compress with codeblocks when the irv97 wavelet is used. +// We test by comparing MSE and PAE of decoded images. +// The compressed file is obtained using these command-line options: +// -o simple_enc_irv97_64x64_tiles_33x33_d6.j2c -qstep 0.01 -tile_size {33,33} +// -num_decomps 6 +TEST(TestExecutables, SimpleEncIrv9764x64Tiles33x33D6) { + double mse[3] = { 46.2004, 43.622, 56.7452}; + int pae[3] = { 48, 46, 52}; + run_ojph_compress("Malamute.ppm", + "simple_enc_irv97_64x64_tiles_33x33_d6", "", "j2c", + "-qstep 0.01 -tile_size \"{33,33}\" -num_decomps 6"); + run_ojph_compress_expand("simple_enc_irv97_64x64_tiles_33x33_d6", "j2c", "ppm"); + run_mse_pae("simple_enc_irv97_64x64_tiles_33x33_d6", "ppm", + "Malamute.ppm", "", 3, mse, pae); +} + /////////////////////////////////////////////////////////////////////////////// // Test ojph_compress with codeblocks when the irv97 wavelet is used. // We test by comparing MSE and PAE of decoded images. @@ -1159,6 +1193,40 @@ TEST(TestExecutables, SimpleEncRev534x1024) { "Malamute.ppm", "", 3, mse, pae); } +/////////////////////////////////////////////////////////////////////////////// +// Test ojph_compress with codeblocks when the rev53 wavelet is used. +// We test by comparing MSE and PAE of decoded images. +// The compressed file is obtained using these command-line options: +// -o simple_enc_rev53_64x64_tiles_33x33.j2c -reversible true -tile_size +// {32,32} -num_decomps 5 +TEST(TestExecutables, SimpleEncRev5364x64Tiles33x33D5) { + double mse[3] = { 0, 0, 0}; + int pae[3] = { 0, 0, 0}; + run_ojph_compress("Malamute.ppm", + "simple_enc_rev53_64x64_tiles_33x33_d5", "", "j2c", + "-reversible true -tile_size \"{32,32}\" -num_decomps 5"); + run_ojph_compress_expand("simple_enc_rev53_64x64_tiles_33x33_d5", "j2c", "ppm"); + run_mse_pae("simple_enc_rev53_64x64_tiles_33x33_d5", "ppm", + "Malamute.ppm", "", 3, mse, pae); +} + +/////////////////////////////////////////////////////////////////////////////// +// Test ojph_compress with codeblocks when the rev53 wavelet is used. +// We test by comparing MSE and PAE of decoded images. +// The compressed file is obtained using these command-line options: +// -o simple_enc_rev53_64x64_tiles_33x33.j2c -reversible true -tile_size +// {32,32} -num_decomps 6 +TEST(TestExecutables, SimpleEncRev5364x64Tiles33x33D6) { + double mse[3] = { 0, 0, 0}; + int pae[3] = { 0, 0, 0}; + run_ojph_compress("Malamute.ppm", + "simple_enc_rev53_64x64_tiles_33x33_d6", "", "j2c", + "-reversible true -tile_size \"{32,32}\" -num_decomps 6"); + run_ojph_compress_expand("simple_enc_rev53_64x64_tiles_33x33_d6", "j2c", "ppm"); + run_mse_pae("simple_enc_rev53_64x64_tiles_33x33_d6", "ppm", + "Malamute.ppm", "", 3, mse, pae); +} + /////////////////////////////////////////////////////////////////////////////// // Test ojph_compress with codeblocks when the irv97 wavelet is used. // We test by comparing MSE and PAE of decoded images. @@ -1220,7 +1288,7 @@ TEST(TestExecutables, SimpleEncIrv97TallNarrow) { // The compressed file is obtained using these command-line options: // -o simple_enc_irv97_tall_narrow1.j2c -image_offset {1,0} -qstep 0.1 TEST(TestExecutables, SimpleEncIrv97TallNarrow1) { - double mse[3] = { 100.905762, 76.113037, 72.834717}; + double mse[3] = { 100.906, 76.113, 72.8347}; int pae[3] = { 39, 35, 34}; run_ojph_compress("tall_narrow.ppm", "simple_enc_irv97_tall_narrow1", "", "j2c", diff --git a/tests/test_helpers/ht_cmdlines.txt b/tests/test_helpers/ht_cmdlines.txt index 55b8e865..c8590611 100644 --- a/tests/test_helpers/ht_cmdlines.txt +++ b/tests/test_helpers/ht_cmdlines.txt @@ -57,28 +57,32 @@ add_test(NAME simple_dec_rev53_64x64_16bit_gray COMMAND ${CMAKE_CURRENT_SOURCE_D # Encoding ############################################################# -add_test(NAME simple_enc_irv97_64x64 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -enc "-i ${images_folder}/mm.ppm -o simple_enc_irv97_64x64.j2c -qstep 0.1" "-i simple_enc_irv97_64x64.j2c -o test1.ppm -precise -quiet" "-i simple_enc_irv97_64x64.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") -add_test(NAME simple_enc_irv97_32x32 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -enc "-i ${images_folder}/mm.ppm -o simple_enc_irv97_32x32.j2c -qstep 0.01 -block_size \{32,32\}" "-i simple_enc_irv97_32x32.j2c -o test1.ppm -precise -quiet" "-i simple_enc_irv97_32x32.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") -add_test(NAME simple_enc_irv97_16x16 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -enc "-i ${images_folder}/mm.ppm -o simple_enc_irv97_16x16.j2c -qstep 0.01 -block_size \{16,16\}" "-i simple_enc_irv97_16x16.j2c -o test1.ppm -precise -quiet" "-i simple_enc_irv97_16x16.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") -add_test(NAME simple_enc_irv97_4x4 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -enc "-i ${images_folder}/mm.ppm -o simple_enc_irv97_4x4.j2c -qstep 0.01 -block_size \{4,4\}" "-i simple_enc_irv97_4x4.j2c -o test1.ppm -precise -quiet" "-i simple_enc_irv97_4x4.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") -add_test(NAME simple_enc_irv97_1024x4 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -enc "-i ${images_folder}/mm.ppm -o simple_enc_irv97_1024x4.j2c -qstep 0.01 -block_size \{4,1024\}" "-i simple_enc_irv97_1024x4.j2c -o test1.ppm -precise -quiet" "-i simple_enc_irv97_1024x4.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") -add_test(NAME simple_enc_irv97_4x1024 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -enc "-i ${images_folder}/mm.ppm -o simple_enc_irv97_4x1024.j2c -qstep 0.01 -block_size \{1024,4\}" "-i simple_enc_irv97_4x1024.j2c -o test1.ppm -precise -quiet" "-i simple_enc_irv97_4x1024.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") -add_test(NAME simple_enc_irv97_512x8 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -enc "-i ${images_folder}/mm.ppm -o simple_enc_irv97_512x8.j2c -qstep 0.01 -block_size \{8,512\}" "-i simple_enc_irv97_512x8.j2c -o test1.ppm -precise -quiet" "-i simple_enc_irv97_512x8.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") -add_test(NAME simple_enc_irv97_8x512 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -enc "-i ${images_folder}/mm.ppm -o simple_enc_irv97_8x512.j2c -qstep 0.01 -block_size \{512,8\}" "-i simple_enc_irv97_8x512.j2c -o test1.ppm -precise -quiet" "-i simple_enc_irv97_8x512.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") -add_test(NAME simple_enc_irv97_256x16 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -enc "-i ${images_folder}/mm.ppm -o simple_enc_irv97_256x16.j2c -qstep 0.01 -block_size \{16,256\}" "-i simple_enc_irv97_256x16.j2c -o test1.ppm -precise -quiet" "-i simple_enc_irv97_256x16.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") -add_test(NAME simple_enc_irv97_16x256 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -enc "-i ${images_folder}/mm.ppm -o simple_enc_irv97_16x256.j2c -qstep 0.01 -block_size \{256,16\}" "-i simple_enc_irv97_16x256.j2c -o test1.ppm -precise -quiet" "-i simple_enc_irv97_16x256.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") -add_test(NAME simple_enc_irv97_128x32 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -enc "-i ${images_folder}/mm.ppm -o simple_enc_irv97_128x32.j2c -qstep 0.01 -block_size \{32,128\}" "-i simple_enc_irv97_128x32.j2c -o test1.ppm -precise -quiet" "-i simple_enc_irv97_128x32.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") -add_test(NAME simple_enc_irv97_32x128 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -enc "-i ${images_folder}/mm.ppm -o simple_enc_irv97_32x128.j2c -qstep 0.01 -block_size \{128,32\}" "-i simple_enc_irv97_32x128.j2c -o test1.ppm -precise -quiet" "-i simple_enc_irv97_32x128.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") -add_test(NAME simple_enc_irv97_64x64_16bit COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -enc "-i ${images_folder}/mm.ppm -o simple_enc_irv97_64x64_16bit.j2c -qstep 0.01" "-i simple_enc_irv97_64x64_16bit.j2c -o test1.ppm -precise -quiet" "-i simple_enc_irv97_64x64_16bit.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") -add_test(NAME simple_enc_irv97_64x64_16bit_gray COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -enc "-i ${images_folder}/mm.pgm -o simple_enc_irv97_64x64_16bit_gray.j2c -qstep 0.01" "-i simple_enc_irv97_64x64_16bit_gray.j2c -o test1.pgm -precise -quiet" "-i simple_enc_irv97_64x64_16bit_gray.j2c -o test2.pgm" "${images_folder}/mm.pgm" "test1.pgm" "test2.pgm") -add_test(NAME simple_enc_rev53_64x64_16bit COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -renc "-i ${images_folder}/mm.ppm -o simple_enc_rev53_64x64_16bit.j2c -reversible true" "-i simple_enc_rev53_64x64_16bit.j2c -o test1.ppm -precise -quiet" "-i simple_enc_rev53_64x64_16bit.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") -add_test(NAME simple_enc_rev53_64x64_16bit_gray COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -renc "-i ${images_folder}/mm.pgm -o simple_enc_rev53_64x64_16bit_gray.j2c -reversible true" "-i simple_enc_rev53_64x64_16bit_gray.j2c -o test1.pgm -precise -quiet" "-i simple_enc_rev53_64x64_16bit_gray.j2c -o test2.pgm" "${images_folder}/mm.pgm" "test1.pgm" "test2.pgm") - -add_test(NAME simple_enc_rev53_64x64 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -renc "-i ${images_folder}/mm.ppm -o simple_enc_rev53_64x64.j2c -reversible true" "-i simple_enc_rev53_64x64.j2c -o test1.ppm -precise -quiet" "-i simple_enc_rev53_64x64.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") -add_test(NAME simple_enc_rev53_32x32 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -renc "-i ${images_folder}/mm.ppm -o simple_enc_rev53_32x32.j2c -reversible true -block_size \{32,32\}" "-i simple_enc_rev53_32x32.j2c -o test1.ppm -precise -quiet" "-i simple_enc_rev53_32x32.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") -add_test(NAME simple_enc_rev53_4x4 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -renc "-i ${images_folder}/mm.ppm -o simple_enc_rev53_4x4.j2c -reversible true -block_size \{4,4\}" "-i simple_enc_rev53_4x4.j2c -o test1.ppm -precise -quiet" "-i simple_enc_rev53_4x4.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") -add_test(NAME simple_enc_rev53_1024x4 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -renc "-i ${images_folder}/mm.ppm -o simple_enc_rev53_1024x4.j2c -reversible true -block_size \{4,1024\}" "-i simple_enc_rev53_1024x4.j2c -o test1.ppm -precise -quiet" "-i simple_enc_rev53_1024x4.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") -add_test(NAME simple_enc_rev53_4x1024 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -renc "-i ${images_folder}/mm.ppm -o simple_enc_rev53_4x1024.j2c -reversible true -block_size \{1024,4\}" "-i simple_enc_rev53_4x1024.j2c -o test1.ppm -precise -quiet" "-i simple_enc_rev53_4x1024.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") +add_test(NAME simple_enc_irv97_64x64 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -enc "-i ${images_folder}/mm.ppm -o simple_enc_irv97_64x64.j2c -qstep 0.1" "-i simple_enc_irv97_64x64.j2c -o test1.ppm -precise -quiet" "-i simple_enc_irv97_64x64.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") +add_test(NAME simple_enc_irv97_32x32 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -enc "-i ${images_folder}/mm.ppm -o simple_enc_irv97_32x32.j2c -qstep 0.01 -block_size \{32,32\}" "-i simple_enc_irv97_32x32.j2c -o test1.ppm -precise -quiet" "-i simple_enc_irv97_32x32.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") +add_test(NAME simple_enc_irv97_16x16 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -enc "-i ${images_folder}/mm.ppm -o simple_enc_irv97_16x16.j2c -qstep 0.01 -block_size \{16,16\}" "-i simple_enc_irv97_16x16.j2c -o test1.ppm -precise -quiet" "-i simple_enc_irv97_16x16.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") +add_test(NAME simple_enc_irv97_4x4 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -enc "-i ${images_folder}/mm.ppm -o simple_enc_irv97_4x4.j2c -qstep 0.01 -block_size \{4,4\}" "-i simple_enc_irv97_4x4.j2c -o test1.ppm -precise -quiet" "-i simple_enc_irv97_4x4.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") +add_test(NAME simple_enc_irv97_1024x4 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -enc "-i ${images_folder}/mm.ppm -o simple_enc_irv97_1024x4.j2c -qstep 0.01 -block_size \{4,1024\}" "-i simple_enc_irv97_1024x4.j2c -o test1.ppm -precise -quiet" "-i simple_enc_irv97_1024x4.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") +add_test(NAME simple_enc_irv97_4x1024 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -enc "-i ${images_folder}/mm.ppm -o simple_enc_irv97_4x1024.j2c -qstep 0.01 -block_size \{1024,4\}" "-i simple_enc_irv97_4x1024.j2c -o test1.ppm -precise -quiet" "-i simple_enc_irv97_4x1024.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") +add_test(NAME simple_enc_irv97_512x8 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -enc "-i ${images_folder}/mm.ppm -o simple_enc_irv97_512x8.j2c -qstep 0.01 -block_size \{8,512\}" "-i simple_enc_irv97_512x8.j2c -o test1.ppm -precise -quiet" "-i simple_enc_irv97_512x8.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") +add_test(NAME simple_enc_irv97_8x512 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -enc "-i ${images_folder}/mm.ppm -o simple_enc_irv97_8x512.j2c -qstep 0.01 -block_size \{512,8\}" "-i simple_enc_irv97_8x512.j2c -o test1.ppm -precise -quiet" "-i simple_enc_irv97_8x512.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") +add_test(NAME simple_enc_irv97_256x16 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -enc "-i ${images_folder}/mm.ppm -o simple_enc_irv97_256x16.j2c -qstep 0.01 -block_size \{16,256\}" "-i simple_enc_irv97_256x16.j2c -o test1.ppm -precise -quiet" "-i simple_enc_irv97_256x16.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") +add_test(NAME simple_enc_irv97_16x256 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -enc "-i ${images_folder}/mm.ppm -o simple_enc_irv97_16x256.j2c -qstep 0.01 -block_size \{256,16\}" "-i simple_enc_irv97_16x256.j2c -o test1.ppm -precise -quiet" "-i simple_enc_irv97_16x256.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") +add_test(NAME simple_enc_irv97_128x32 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -enc "-i ${images_folder}/mm.ppm -o simple_enc_irv97_128x32.j2c -qstep 0.01 -block_size \{32,128\}" "-i simple_enc_irv97_128x32.j2c -o test1.ppm -precise -quiet" "-i simple_enc_irv97_128x32.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") +add_test(NAME simple_enc_irv97_32x128 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -enc "-i ${images_folder}/mm.ppm -o simple_enc_irv97_32x128.j2c -qstep 0.01 -block_size \{128,32\}" "-i simple_enc_irv97_32x128.j2c -o test1.ppm -precise -quiet" "-i simple_enc_irv97_32x128.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") +add_test(NAME simple_enc_irv97_64x64_tiles_33x33_d5 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -enc "-i ${images_folder}/mm.ppm -o simple_enc_irv97_64x64_tiles_33x33_d5.j2c -qstep 0.01 -tile_size \{33,33\} -num_decomps 5" "-i simple_enc_irv97_64x64_tiles_33x33_d5.j2c -o test1.ppm -precise -quiet" "-i simple_enc_irv97_64x64_tiles_33x33_d5.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") +add_test(NAME simple_enc_irv97_64x64_tiles_33x33_d6 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -enc "-i ${images_folder}/mm.ppm -o simple_enc_irv97_64x64_tiles_33x33_d6.j2c -qstep 0.01 -tile_size \{33,33\} -num_decomps 6" "-i simple_enc_irv97_64x64_tiles_33x33_d6.j2c -o test1.ppm -precise -quiet" "-i simple_enc_irv97_64x64_tiles_33x33_d6.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") +add_test(NAME simple_enc_irv97_64x64_16bit COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -enc "-i ${images_folder}/mm.ppm -o simple_enc_irv97_64x64_16bit.j2c -qstep 0.01" "-i simple_enc_irv97_64x64_16bit.j2c -o test1.ppm -precise -quiet" "-i simple_enc_irv97_64x64_16bit.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") +add_test(NAME simple_enc_irv97_64x64_16bit_gray COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -enc "-i ${images_folder}/mm.pgm -o simple_enc_irv97_64x64_16bit_gray.j2c -qstep 0.01" "-i simple_enc_irv97_64x64_16bit_gray.j2c -o test1.pgm -precise -quiet" "-i simple_enc_irv97_64x64_16bit_gray.j2c -o test2.pgm" "${images_folder}/mm.pgm" "test1.pgm" "test2.pgm") +add_test(NAME simple_enc_rev53_64x64_16bit COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -renc "-i ${images_folder}/mm.ppm -o simple_enc_rev53_64x64_16bit.j2c -reversible true" "-i simple_enc_rev53_64x64_16bit.j2c -o test1.ppm -precise -quiet" "-i simple_enc_rev53_64x64_16bit.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") +add_test(NAME simple_enc_rev53_64x64_16bit_gray COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -renc "-i ${images_folder}/mm.pgm -o simple_enc_rev53_64x64_16bit_gray.j2c -reversible true" "-i simple_enc_rev53_64x64_16bit_gray.j2c -o test1.pgm -precise -quiet" "-i simple_enc_rev53_64x64_16bit_gray.j2c -o test2.pgm" "${images_folder}/mm.pgm" "test1.pgm" "test2.pgm") + +add_test(NAME simple_enc_rev53_64x64 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -renc "-i ${images_folder}/mm.ppm -o simple_enc_rev53_64x64.j2c -reversible true" "-i simple_enc_rev53_64x64.j2c -o test1.ppm -precise -quiet" "-i simple_enc_rev53_64x64.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") +add_test(NAME simple_enc_rev53_32x32 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -renc "-i ${images_folder}/mm.ppm -o simple_enc_rev53_32x32.j2c -reversible true -block_size \{32,32\}" "-i simple_enc_rev53_32x32.j2c -o test1.ppm -precise -quiet" "-i simple_enc_rev53_32x32.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") +add_test(NAME simple_enc_rev53_4x4 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -renc "-i ${images_folder}/mm.ppm -o simple_enc_rev53_4x4.j2c -reversible true -block_size \{4,4\}" "-i simple_enc_rev53_4x4.j2c -o test1.ppm -precise -quiet" "-i simple_enc_rev53_4x4.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") +add_test(NAME simple_enc_rev53_1024x4 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -renc "-i ${images_folder}/mm.ppm -o simple_enc_rev53_1024x4.j2c -reversible true -block_size \{4,1024\}" "-i simple_enc_rev53_1024x4.j2c -o test1.ppm -precise -quiet" "-i simple_enc_rev53_1024x4.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") +add_test(NAME simple_enc_rev53_4x1024 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -renc "-i ${images_folder}/mm.ppm -o simple_enc_rev53_4x1024.j2c -reversible true -block_size \{1024,4\}" "-i simple_enc_rev53_4x1024.j2c -o test1.ppm -precise -quiet" "-i simple_enc_rev53_4x1024.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") +add_test(NAME simple_enc_rev53_64x64_tiles_33x33_d5 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -renc "-i ${images_folder}/mm.ppm -o simple_enc_rev53_64x64_tiles_33x33.j2c -reversible true -tile_size \{32,32\} -num_decomps 5" "-i simple_enc_rev53_64x64_tiles_33x33.j2c -o test1.ppm -precise -quiet" "-i simple_enc_rev53_64x64.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") +add_test(NAME simple_enc_rev53_64x64_tiles_33x33_d6 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -renc "-i ${images_folder}/mm.ppm -o simple_enc_rev53_64x64_tiles_33x33.j2c -reversible true -tile_size \{32,32\} -num_decomps 6" "-i simple_enc_rev53_64x64_tiles_33x33.j2c -o test1.ppm -precise -quiet" "-i simple_enc_rev53_64x64.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") add_test(NAME simple_enc_irv97_64x64_yuv COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom_yuv.sh -enc "-i ${images_folder}/foreman_420.yuv -o simple_enc_irv97_64x64_yuv.j2c -qstep 0.1 -dims \{352,288\} -num_comps 3 -downsamp \{1,1\},\{2,2\},\{2,2\} -bit_depth 8,8,8 -signed false,false,false" "-i simple_enc_irv97_64x64_yuv.j2c -o test1y.rawl,test1u.rawl,test1v.rawl -precise -quiet" "-i simple_enc_irv97_64x64_yuv.j2c -o test2.yuv" "${images_folder}/foreman_420.yuv:352x288x8x420" "test1.yuv:352x288x8x420" "test2.yuv:352x288x8x420") add_test(NAME simple_enc_rev53_64x64_yuv COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom_yuv.sh -renc "-i ${images_folder}/foreman_420.yuv -o simple_enc_rev53_64x64_yuv.j2c -reversible true -qstep 0.1 -dims \{352,288\} -num_comps 3 -downsamp \{1,1\},\{2,2\},\{2,2\} -bit_depth 8,8,8 -signed false,false,false" "-i simple_enc_rev53_64x64_yuv.j2c -o test1y.rawl,test1u.rawl,test1v.rawl -precise -quiet" "-i simple_enc_rev53_64x64_yuv.j2c -o test2.yuv" "${images_folder}/foreman_420.yuv:352x288x8x420" "test1.yuv:352x288x8x420" "test2.yuv:352x288x8x420") From 9846f01b5d40a38116cd871754678a110d9d837b Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Fri, 5 Apr 2024 14:07:32 +1100 Subject: [PATCH 050/348] Small touchup for error messages --- src/core/codestream/ojph_codeblock.cpp | 4 ++-- src/core/coding/ojph_block_decoder.cpp | 12 ++++++------ src/core/coding/ojph_block_decoder_ssse3.cpp | 12 ++++++------ 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/core/codestream/ojph_codeblock.cpp b/src/core/codestream/ojph_codeblock.cpp index 25bdc2ae..9a63ca19 100644 --- a/src/core/codestream/ojph_codeblock.cpp +++ b/src/core/codestream/ojph_codeblock.cpp @@ -152,11 +152,11 @@ namespace ojph { if (result == false) { if (resilient == true) { - OJPH_INFO(0x000300A1, "Error decoding a codeblock\n"); + OJPH_INFO(0x000300A1, "Error decoding a codeblock"); zero_block = true; } else - OJPH_ERROR(0x000300A1, "Error decoding a codeblock\n"); + OJPH_ERROR(0x000300A1, "Error decoding a codeblock"); } } else diff --git a/src/core/coding/ojph_block_decoder.cpp b/src/core/coding/ojph_block_decoder.cpp index 9a121876..5be5430a 100644 --- a/src/core/coding/ojph_block_decoder.cpp +++ b/src/core/coding/ojph_block_decoder.cpp @@ -753,14 +753,14 @@ namespace ojph { { OJPH_WARN(0x00010001, "A malformed codeblock that has more than " "one coding pass, but zero length for " - "2nd and potential 3rd pass.\n"); + "2nd and potential 3rd pass"); num_passes = 1; } if (num_passes > 3) { OJPH_WARN(0x00010002, "We do not support more than 3 coding passes; " - "This codeblocks has %d passes.\n", + "This codeblocks has %d passes", num_passes); return false; } @@ -772,7 +772,7 @@ namespace ojph { insufficient_precision = true; OJPH_WARN(0x00010003, "32 bits are not enough to decode this " "codeblock. This message will not be " - "displayed again.\n"); + "displayed again"); } return false; } @@ -783,7 +783,7 @@ namespace ojph { OJPH_WARN(0x00010004, "Not enough precision to decode the cleanup " "pass. The code can be modified to support " "this case. This message will not be " - "displayed again.\n"); + "displayed again"); } return false; // 32 bits are not enough to decode this } @@ -796,7 +796,7 @@ namespace ojph { OJPH_WARN(0x00010005, "Not enough precision to decode the SgnProp " "nor MagRef passes; both will be skipped. " "This message will not be displayed " - "again.\n"); + "again"); } } } @@ -806,7 +806,7 @@ namespace ojph { if (lengths1 < 2) { - OJPH_WARN(0x00010006, "Wrong codeblock length.\n"); + OJPH_WARN(0x00010006, "Wrong codeblock length"); return false; } diff --git a/src/core/coding/ojph_block_decoder_ssse3.cpp b/src/core/coding/ojph_block_decoder_ssse3.cpp index a8f89138..99ae38cb 100644 --- a/src/core/coding/ojph_block_decoder_ssse3.cpp +++ b/src/core/coding/ojph_block_decoder_ssse3.cpp @@ -1033,14 +1033,14 @@ namespace ojph { { OJPH_WARN(0x00010001, "A malformed codeblock that has more than " "one coding pass, but zero length for " - "2nd and potential 3rd pass.\n"); + "2nd and potential 3rd pass"); num_passes = 1; } if (num_passes > 3) { OJPH_WARN(0x00010002, "We do not support more than 3 coding passes; " - "This codeblocks has %d passes.\n", + "This codeblocks has %d passes", num_passes); return false; } @@ -1052,7 +1052,7 @@ namespace ojph { insufficient_precision = true; OJPH_WARN(0x00010003, "32 bits are not enough to decode this " "codeblock. This message will not be " - "displayed again.\n"); + "displayed again"); } return false; } @@ -1063,7 +1063,7 @@ namespace ojph { OJPH_WARN(0x00010004, "Not enough precision to decode the cleanup " "pass. The code can be modified to support " "this case. This message will not be " - "displayed again.\n"); + "displayed again"); } return false; // 32 bits are not enough to decode this } @@ -1076,7 +1076,7 @@ namespace ojph { OJPH_WARN(0x00010005, "Not enough precision to decode the SgnProp " "nor MagRef passes; both will be skipped. " "This message will not be displayed " - "again.\n"); + "again"); } } } @@ -1086,7 +1086,7 @@ namespace ojph { if (lengths1 < 2) { - OJPH_WARN(0x00010006, "Wrong codeblock length.\n"); + OJPH_WARN(0x00010006, "Wrong codeblock length"); return false; } From 86b139d62f6246ca686801a7da43d198b82e02db Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Sat, 6 Apr 2024 09:13:39 +1100 Subject: [PATCH 051/348] This is a very important bug fix -- Empty subbands/precincts. --- src/core/codestream/ojph_precinct.cpp | 24 ++++++++++++++++++++++++ src/core/codestream/ojph_precinct.h | 2 +- 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/src/core/codestream/ojph_precinct.cpp b/src/core/codestream/ojph_precinct.cpp index 71b61de8..f8d69fbe 100644 --- a/src/core/codestream/ojph_precinct.cpp +++ b/src/core/codestream/ojph_precinct.cpp @@ -332,6 +332,30 @@ namespace ojph { if (may_use_sop) bb_skip_sop(&bb); + if (num_bands == 3) + { + if (bands[1].empty && bands[2].empty && bands[3].empty) + { + ui32 bit = 0; + bb_read_bit(&bb, bit); + bb_terminate(&bb, uses_eph); + assert(bit == 0); + return; + } + } + else + { + if (bands[0].empty) + { + ui32 bit = 0; + bb_read_bit(&bb, bit); + bb_terminate(&bb, uses_eph); + assert(bit == 0); + return; + } + } + + int sst = num_bands == 3 ? 1 : 0; int send = num_bands == 3 ? 4 : 1; bool empty_packet = true; diff --git a/src/core/codestream/ojph_precinct.h b/src/core/codestream/ojph_precinct.h index 4641ed68..d8e880a9 100644 --- a/src/core/codestream/ojph_precinct.h +++ b/src/core/codestream/ojph_precinct.h @@ -69,7 +69,7 @@ namespace ojph { ui32& data_left, infile_base *file, bool skipped); ui8 *scratch; - point img_point; //the precinct projected to full resolution + point img_point; //the precinct projected to full resolution rect cb_idxs[4]; //indices of codeblocks subband *bands; //the subbands coded_lists* coded; From 55993264b15ee2efba172d40d6e626e5c6f2ff06 Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Sat, 6 Apr 2024 14:33:56 +1100 Subject: [PATCH 052/348] A small improvement. --- src/core/codestream/ojph_precinct.cpp | 27 ++++++------------------- src/core/codestream/ojph_resolution.cpp | 2 ++ src/core/codestream/ojph_subband.h | 2 ++ 3 files changed, 10 insertions(+), 21 deletions(-) diff --git a/src/core/codestream/ojph_precinct.cpp b/src/core/codestream/ojph_precinct.cpp index f8d69fbe..b7e25aa0 100644 --- a/src/core/codestream/ojph_precinct.cpp +++ b/src/core/codestream/ojph_precinct.cpp @@ -332,30 +332,15 @@ namespace ojph { if (may_use_sop) bb_skip_sop(&bb); - if (num_bands == 3) + if (bands[0].empty && bands[1].empty && bands[2].empty && bands[3].empty) { - if (bands[1].empty && bands[2].empty && bands[3].empty) - { - ui32 bit = 0; - bb_read_bit(&bb, bit); - bb_terminate(&bb, uses_eph); - assert(bit == 0); - return; - } - } - else - { - if (bands[0].empty) - { - ui32 bit = 0; - bb_read_bit(&bb, bit); - bb_terminate(&bb, uses_eph); - assert(bit == 0); - return; - } + ui32 bit = 0; + bb_read_bit(&bb, bit); + bb_terminate(&bb, uses_eph); + assert(bit == 0); + return; } - int sst = num_bands == 3 ? 1 : 0; int send = num_bands == 3 ? 4 : 1; bool empty_packet = true; diff --git a/src/core/codestream/ojph_resolution.cpp b/src/core/codestream/ojph_resolution.cpp index c4507707..a0413b76 100644 --- a/src/core/codestream/ojph_resolution.cpp +++ b/src/core/codestream/ojph_resolution.cpp @@ -241,6 +241,8 @@ namespace ojph { ui32 trx1 = res_rect.org.x + res_rect.siz.w; ui32 try1 = res_rect.org.y + res_rect.siz.h; bands = allocator->post_alloc_obj(4); + for (int i = 0; i < 4; ++i) + new (bands + i) subband; if (res_num > 0) { this->num_bands = 3; diff --git a/src/core/codestream/ojph_subband.h b/src/core/codestream/ojph_subband.h index 9928c5ef..34cc7396 100644 --- a/src/core/codestream/ojph_subband.h +++ b/src/core/codestream/ojph_subband.h @@ -63,6 +63,8 @@ namespace ojph { { friend struct precinct; public: + subband() { memset(this, 0, sizeof(subband)); empty = true; } + static void pre_alloc(codestream *codestream, const rect& band_rect, ui32 comp_num, ui32 res_num); void finalize_alloc(codestream *codestream, const rect& band_rect, From 0e0d41ddd4d3770df81fd3e71f1091af4d7ae9bb Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Sat, 6 Apr 2024 15:42:00 +1100 Subject: [PATCH 053/348] Corrected Tests. --- src/core/codestream/ojph_precinct.cpp | 11 +- src/core/codestream/ojph_subband.h | 20 +- tests/test_executables.cpp | 744 +++++++++++++------------- tests/test_helpers/ht_cmdlines.txt | 4 +- 4 files changed, 399 insertions(+), 380 deletions(-) diff --git a/src/core/codestream/ojph_precinct.cpp b/src/core/codestream/ojph_precinct.cpp index b7e25aa0..c20c8589 100644 --- a/src/core/codestream/ojph_precinct.cpp +++ b/src/core/codestream/ojph_precinct.cpp @@ -341,11 +341,12 @@ namespace ojph { return; } - int sst = num_bands == 3 ? 1 : 0; - int send = num_bands == 3 ? 4 : 1; bool empty_packet = true; - for (int s = sst; s < send; ++s) + for (int s = 0; s < 4; ++s) { + if (bands[s].empty) + continue; + if (cb_idxs[s].siz.w == 0 || cb_idxs[s].siz.h == 0) continue; @@ -505,8 +506,10 @@ namespace ojph { } bb_terminate(&bb, uses_eph); //read codeblock data - for (int s = sst; s < send; ++s) + for (int s = 0; s < 4; ++s) { + if (bands[s].empty) + continue; ui32 band_width = bands[s].num_blocks.w; ui32 width = cb_idxs[s].siz.w; ui32 height = cb_idxs[s].siz.h; diff --git a/src/core/codestream/ojph_subband.h b/src/core/codestream/ojph_subband.h index 34cc7396..5dd145e6 100644 --- a/src/core/codestream/ojph_subband.h +++ b/src/core/codestream/ojph_subband.h @@ -63,7 +63,22 @@ namespace ojph { { friend struct precinct; public: - subband() { memset(this, 0, sizeof(subband)); empty = true; } + subband() { + res_num = band_num = 0; + reversible = false; + empty = true; // <---- true + lines = NULL; + parent = NULL; + blocks = NULL; + xcb_prime = ycb_prime = 0; + cur_cb_row = 0; + cur_line = 0; + cur_cb_height = 0; + delta = delta_inv = 0.0f; + K_max = 0; + coded_cbs = NULL; + elastic = NULL; + } static void pre_alloc(codestream *codestream, const rect& band_rect, ui32 comp_num, ui32 res_num); @@ -80,9 +95,10 @@ namespace ojph { line_buf* pull_line(); private: + bool empty; // true if the subband has no pixels or + // the subband is NOT USED ui32 res_num, band_num; bool reversible; - bool empty; rect band_rect; line_buf *lines; resolution* parent; diff --git a/tests/test_executables.cpp b/tests/test_executables.cpp index 4c3a12b9..f42174f6 100644 --- a/tests/test_executables.cpp +++ b/tests/test_executables.cpp @@ -44,7 +44,7 @@ // STATIC ojph_popen //////////////////////////////////////////////////////////////////////////////// static inline -FILE *ojph_popen(const char *command, const char *modes) +FILE* ojph_popen(const char* command, const char* modes) { #ifdef OJPH_COMPILER_MSVC return _popen(command, modes); @@ -57,7 +57,7 @@ FILE *ojph_popen(const char *command, const char *modes) // STATIC ojph_pclose //////////////////////////////////////////////////////////////////////////////// static inline -int ojph_pclose(FILE *stream) +int ojph_pclose(FILE* stream) { #ifdef OJPH_COMPILER_MSVC return _pclose(stream); @@ -69,16 +69,16 @@ int ojph_pclose(FILE *stream) //////////////////////////////////////////////////////////////////////////////// // STATIC execute //////////////////////////////////////////////////////////////////////////////// -static -int execute(const std::string& cmd, std::string& result) +static +int execute(const std::string& cmd, std::string& result) { std::array buffer; result.clear(); FILE* pipe = ojph_popen(cmd.c_str(), "r"); - if (!pipe) + if (!pipe) throw std::runtime_error("ojph_popen() failed!"); - + while (!feof(pipe)) if (fgets(buffer.data(), 128, pipe) != nullptr) result += buffer.data(); @@ -94,21 +94,21 @@ int execute(const std::string& cmd, std::string& result) //////////////////////////////////////////////////////////////////////////////// #ifdef OJPH_OS_WINDOWS - #define SRC_FILE_DIR ".\\jp2k_test_codestreams\\openjph\\" - #define OUT_FILE_DIR ".\\" - #define REF_FILE_DIR ".\\jp2k_test_codestreams\\openjph\\references\\" - #define MSE_PAE_PATH ".\\mse_pae" - #define COMPARE_FILES_PATH ".\\compare_files" - #define EXPAND_EXECUTABLE ".\\ojph_expand.exe" - #define COMPRESS_EXECUTABLE ".\\ojph_compress.exe" +#define SRC_FILE_DIR ".\\jp2k_test_codestreams\\openjph\\" +#define OUT_FILE_DIR ".\\" +#define REF_FILE_DIR ".\\jp2k_test_codestreams\\openjph\\references\\" +#define MSE_PAE_PATH ".\\mse_pae" +#define COMPARE_FILES_PATH ".\\compare_files" +#define EXPAND_EXECUTABLE ".\\ojph_expand.exe" +#define COMPRESS_EXECUTABLE ".\\ojph_compress.exe" #else - #define SRC_FILE_DIR "./jp2k_test_codestreams/openjph/" - #define OUT_FILE_DIR "./" - #define REF_FILE_DIR "./jp2k_test_codestreams/openjph/references/" - #define MSE_PAE_PATH "./mse_pae" - #define COMPARE_FILES_PATH "./compare_files" - #define EXPAND_EXECUTABLE "./ojph_expand" - #define COMPRESS_EXECUTABLE "./ojph_compress" +#define SRC_FILE_DIR "./jp2k_test_codestreams/openjph/" +#define OUT_FILE_DIR "./" +#define REF_FILE_DIR "./jp2k_test_codestreams/openjph/references/" +#define MSE_PAE_PATH "./mse_pae" +#define COMPARE_FILES_PATH "./compare_files" +#define EXPAND_EXECUTABLE "./ojph_expand" +#define COMPRESS_EXECUTABLE "./ojph_compress" #endif #define TOL_DOUBLE 0.01 #define TOL_INTEGER 1 @@ -116,22 +116,22 @@ int execute(const std::string& cmd, std::string& result) //////////////////////////////////////////////////////////////////////////////// // run_ojph_compress //////////////////////////////////////////////////////////////////////////////// -void run_ojph_compress(const std::string& ref_filename, - const std::string& base_filename, - const std::string& extended_base_fname, - const std::string& out_ext, - const std::string& extra_options) +void run_ojph_compress(const std::string& ref_filename, + const std::string& base_filename, + const std::string& extended_base_fname, + const std::string& out_ext, + const std::string& extra_options) { try { std::string result, command; - command = std::string(COMPRESS_EXECUTABLE) + command = std::string(COMPRESS_EXECUTABLE) + " -i " + REF_FILE_DIR + ref_filename - + " -o " + OUT_FILE_DIR + base_filename + extended_base_fname + + + " -o " + OUT_FILE_DIR + base_filename + extended_base_fname + "." + out_ext + " " + extra_options; std::cerr << command << std::endl; EXPECT_EQ(execute(command, result), 0); } - catch(const std::runtime_error& error) { + catch (const std::runtime_error& error) { FAIL() << error.what(); } } @@ -139,18 +139,18 @@ void run_ojph_compress(const std::string& ref_filename, //////////////////////////////////////////////////////////////////////////////// // run_ojph_expand //////////////////////////////////////////////////////////////////////////////// -void run_ojph_expand(const std::string& base_filename, - const std::string& src_ext, - const std::string& out_ext) +void run_ojph_expand(const std::string& base_filename, + const std::string& src_ext, + const std::string& out_ext) { try { std::string result, command; - command = std::string(EXPAND_EXECUTABLE) + command = std::string(EXPAND_EXECUTABLE) + " -i " + SRC_FILE_DIR + base_filename + "." + src_ext + " -o " + OUT_FILE_DIR + base_filename + "." + out_ext; EXPECT_EQ(execute(command, result), 0); } - catch(const std::runtime_error& error) { + catch (const std::runtime_error& error) { FAIL() << error.what(); } } @@ -158,34 +158,34 @@ void run_ojph_expand(const std::string& base_filename, //////////////////////////////////////////////////////////////////////////////// // run_ojph_compress //////////////////////////////////////////////////////////////////////////////// -void run_ojph_compress_expand(const std::string& base_filename, - const std::string& out_ext, - const std::string& decode_ext) +void run_ojph_compress_expand(const std::string& base_filename, + const std::string& out_ext, + const std::string& decode_ext) { try { std::string result, command; - command = std::string(EXPAND_EXECUTABLE) + command = std::string(EXPAND_EXECUTABLE) + " -i " + OUT_FILE_DIR + base_filename + "." + out_ext + " -o " + OUT_FILE_DIR + base_filename + "." + decode_ext; EXPECT_EQ(execute(command, result), 0); } - catch(const std::runtime_error& error) { + catch (const std::runtime_error& error) { FAIL() << error.what(); - } + } } //////////////////////////////////////////////////////////////////////////////// // run_mse_pae //////////////////////////////////////////////////////////////////////////////// -void run_mse_pae(const std::string& base_filename, - const std::string& out_ext, - const std::string& ref_filename, - const std::string& yuv_specs, - int num_components, double* mse, int* pae) +void run_mse_pae(const std::string& base_filename, + const std::string& out_ext, + const std::string& ref_filename, + const std::string& yuv_specs, + int num_components, double* mse, int* pae) { try { std::string result, command; - command = std::string(MSE_PAE_PATH) + command = std::string(MSE_PAE_PATH) + " " + OUT_FILE_DIR + base_filename + "." + out_ext + yuv_specs + " " + REF_FILE_DIR + ref_filename + yuv_specs; EXPECT_EQ(execute(command, result), 0); @@ -214,7 +214,7 @@ void run_mse_pae(const std::string& base_filename, ++pos; } } - catch(const std::runtime_error& error) { + catch (const std::runtime_error& error) { FAIL() << error.what(); } } @@ -222,20 +222,20 @@ void run_mse_pae(const std::string& base_filename, //////////////////////////////////////////////////////////////////////////////// // compare_files //////////////////////////////////////////////////////////////////////////////// -void compare_files(const std::string& base_filename, - const std::string& extended_base_fname, - const std::string& ext) +void compare_files(const std::string& base_filename, + const std::string& extended_base_fname, + const std::string& ext) { try { std::string result, command; - command = std::string(COMPARE_FILES_PATH) + command = std::string(COMPARE_FILES_PATH) + " " + OUT_FILE_DIR + base_filename + extended_base_fname + "." + ext + " " + SRC_FILE_DIR + base_filename + "." + ext; EXPECT_EQ(execute(command, result), 0); } - catch(const std::runtime_error& error) { + catch (const std::runtime_error& error) { FAIL() << error.what(); - } + } } //////////////////////////////////////////////////////////////////////////////// @@ -249,7 +249,7 @@ TEST(TestExecutables, OpenJPHCompressNoArguments) { std::string result; EXPECT_EQ(execute(COMPRESS_EXECUTABLE, result), 1); } - catch(const std::runtime_error& error) { + catch (const std::runtime_error& error) { FAIL() << error.what(); } } @@ -261,7 +261,7 @@ TEST(TestExecutables, OpenJPHExpandNoArguments) { std::string result; EXPECT_EQ(execute(EXPAND_EXECUTABLE, result), 1); } - catch(const std::runtime_error& error) { + catch (const std::runtime_error& error) { FAIL() << error.what(); } } @@ -275,11 +275,11 @@ TEST(TestExecutables, OpenJPHExpandNoArguments) { // Command-line options used to obtain this file is: // -o simple_dec_irv97_64x64.jph -precise -quiet -rate 0.5 -full TEST(TestExecutables, SimpleDecIrv9764x64) { - double mse[3] = { 39.2812, 36.3819, 47.642}; - int pae[3] = { 74, 77, 73}; + double mse[3] = { 39.2812, 36.3819, 47.642 }; + int pae[3] = { 74, 77, 73 }; run_ojph_expand("simple_dec_irv97_64x64", "jph", "ppm"); run_mse_pae("simple_dec_irv97_64x64", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -287,11 +287,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64) { // Command-line options used to obtain this file is: // -o simple_dec_irv97_32x32.jph -precise -quiet -rate 1 Cblk={32,32} -full TEST(TestExecutables, SimpleDecIrv9732x32) { - double mse[3] = { 18.6979, 17.1208, 22.7539}; - int pae[3] = { 51, 48, 46}; + double mse[3] = { 18.6979, 17.1208, 22.7539 }; + int pae[3] = { 51, 48, 46 }; run_ojph_expand("simple_dec_irv97_32x32", "jph", "ppm"); run_mse_pae("simple_dec_irv97_32x32", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -299,11 +299,11 @@ TEST(TestExecutables, SimpleDecIrv9732x32) { // Command-line options used to obtain this file is: // -o simple_dec_irv97_16x16.jph -precise -quiet -rate 1 Cblk={16,16} -full TEST(TestExecutables, SimpleDecIrv9716x16) { - double mse[3] = { 20.1706, 18.5427, 24.6146}; - int pae[3] = { 53, 51, 47}; + double mse[3] = { 20.1706, 18.5427, 24.6146 }; + int pae[3] = { 53, 51, 47 }; run_ojph_expand("simple_dec_irv97_16x16", "jph", "ppm"); run_mse_pae("simple_dec_irv97_16x16", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -311,11 +311,11 @@ TEST(TestExecutables, SimpleDecIrv9716x16) { // Command-line options used to obtain this file is: // -o simple_dec_irv97_4x4.jph -precise -quiet -rate 1 Cblk={4,4} -full TEST(TestExecutables, SimpleDecIrv974x4) { - double mse[3] = { 40.8623, 37.9308, 49.7276}; - int pae[3] = { 75, 77, 80}; + double mse[3] = { 40.8623, 37.9308, 49.7276 }; + int pae[3] = { 75, 77, 80 }; run_ojph_expand("simple_dec_irv97_4x4", "jph", "ppm"); run_mse_pae("simple_dec_irv97_4x4", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -323,11 +323,11 @@ TEST(TestExecutables, SimpleDecIrv974x4) { // Command-line options used to obtain this file is: // -o simple_dec_irv97_1024x4.jph -precise -quiet -rate 1 Cblk={1024,4} -full TEST(TestExecutables, SimpleDecIrv971024x4) { - double mse[3] = { 19.8275, 18.2511, 24.2832}; - int pae[3] = { 53, 52, 50}; + double mse[3] = { 19.8275, 18.2511, 24.2832 }; + int pae[3] = { 53, 52, 50 }; run_ojph_expand("simple_dec_irv97_1024x4", "jph", "ppm"); run_mse_pae("simple_dec_irv97_1024x4", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -335,11 +335,11 @@ TEST(TestExecutables, SimpleDecIrv971024x4) { // Command-line options used to obtain this file is: // -o simple_dec_irv97_4x1024.jph -precise -quiet -rate 1 Cblk={4,1024} -full TEST(TestExecutables, SimpleDecIrv974x1024) { - double mse[3] = { 19.9635, 18.4063, 24.1719}; - int pae[3] = { 51, 48, 51}; + double mse[3] = { 19.9635, 18.4063, 24.1719 }; + int pae[3] = { 51, 48, 51 }; run_ojph_expand("simple_dec_irv97_4x1024", "jph", "ppm"); run_mse_pae("simple_dec_irv97_4x1024", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -347,11 +347,11 @@ TEST(TestExecutables, SimpleDecIrv974x1024) { // Command-line options used to obtain this file is: // -o simple_dec_irv97_512x8.jph -precise -quiet -rate 1 Cblk={512,8} -full TEST(TestExecutables, SimpleDecIrv97512x8) { - double mse[3] = { 18.7929, 17.2026, 22.9922}; - int pae[3] = { 53, 52, 50}; + double mse[3] = { 18.7929, 17.2026, 22.9922 }; + int pae[3] = { 53, 52, 50 }; run_ojph_expand("simple_dec_irv97_512x8", "jph", "ppm"); run_mse_pae("simple_dec_irv97_512x8", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -359,11 +359,11 @@ TEST(TestExecutables, SimpleDecIrv97512x8) { // Command-line options used to obtain this file is: // -o simple_dec_irv97_8x512.jph -precise -quiet -rate 1 Cblk={8,512} -full TEST(TestExecutables, SimpleDecIrv978x512) { - double mse[3] = { 19.3661, 17.8067, 23.4574}; - int pae[3] = { 51, 48, 52}; + double mse[3] = { 19.3661, 17.8067, 23.4574 }; + int pae[3] = { 51, 48, 52 }; run_ojph_expand("simple_dec_irv97_8x512", "jph", "ppm"); run_mse_pae("simple_dec_irv97_8x512", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -371,11 +371,11 @@ TEST(TestExecutables, SimpleDecIrv978x512) { // Command-line options used to obtain this file is: // -o simple_dec_irv97_256x16.jph -precise -quiet -rate 1 Cblk={256,16} -full TEST(TestExecutables, SimpleDecIrv97256x16) { - double mse[3] = { 18.6355, 17.0963, 22.6076}; - int pae[3] = { 54, 51, 48}; + double mse[3] = { 18.6355, 17.0963, 22.6076 }; + int pae[3] = { 54, 51, 48 }; run_ojph_expand("simple_dec_irv97_256x16", "jph", "ppm"); run_mse_pae("simple_dec_irv97_256x16", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -383,11 +383,11 @@ TEST(TestExecutables, SimpleDecIrv97256x16) { // Command-line options used to obtain this file is: // -o simple_dec_irv97_16x256.jph -precise -quiet -rate 1 Cblk={16,256} -full TEST(TestExecutables, SimpleDecIrv9716x256) { - double mse[3] = { 18.5933, 17.0208, 22.5709}; - int pae[3] = { 51, 48, 47}; + double mse[3] = { 18.5933, 17.0208, 22.5709 }; + int pae[3] = { 51, 48, 47 }; run_ojph_expand("simple_dec_irv97_16x256", "jph", "ppm"); run_mse_pae("simple_dec_irv97_16x256", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -395,11 +395,11 @@ TEST(TestExecutables, SimpleDecIrv9716x256) { // Command-line options used to obtain this file is: // -o simple_dec_irv97_128x32.jph -precise -quiet -rate 1 Cblk={128,32} -full TEST(TestExecutables, SimpleDecIrv97128x32) { - double mse[3] = { 18.4443, 16.9133, 22.4193}; - int pae[3] = { 52, 50, 46}; + double mse[3] = { 18.4443, 16.9133, 22.4193 }; + int pae[3] = { 52, 50, 46 }; run_ojph_expand("simple_dec_irv97_128x32", "jph", "ppm"); run_mse_pae("simple_dec_irv97_128x32", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -407,11 +407,11 @@ TEST(TestExecutables, SimpleDecIrv97128x32) { // Command-line options used to obtain this file is: // -o simple_dec_irv97_32x128.jph -precise -quiet -rate 1 Cblk={32,128} -full TEST(TestExecutables, SimpleDecIrv9732x128) { - double mse[3] = { 18.4874, 16.9379, 22.4855}; - int pae[3] = { 51, 48, 45}; + double mse[3] = { 18.4874, 16.9379, 22.4855 }; + int pae[3] = { 51, 48, 45 }; run_ojph_expand("simple_dec_irv97_32x128", "jph", "ppm"); run_mse_pae("simple_dec_irv97_32x128", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -419,11 +419,11 @@ TEST(TestExecutables, SimpleDecIrv9732x128) { // Command-line options used to obtain this file is: // -o simple_dec_rev53_64x64.jph -precise -quiet Creversible=yes -full TEST(TestExecutables, SimpleDecRev5364x64) { - double mse[3] = { 0, 0, 0}; - int pae[3] = { 0, 0, 0}; + double mse[3] = { 0, 0, 0 }; + int pae[3] = { 0, 0, 0 }; run_ojph_expand("simple_dec_rev53_64x64", "jph", "ppm"); run_mse_pae("simple_dec_rev53_64x64", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -432,11 +432,11 @@ TEST(TestExecutables, SimpleDecRev5364x64) { // -o simple_dec_rev53_32x32.jph -precise -quiet Creversible=yes Cblk={32,32} // -full TEST(TestExecutables, SimpleDecRev5332x32) { - double mse[3] = { 0, 0, 0}; - int pae[3] = { 0, 0, 0}; + double mse[3] = { 0, 0, 0 }; + int pae[3] = { 0, 0, 0 }; run_ojph_expand("simple_dec_rev53_32x32", "jph", "ppm"); run_mse_pae("simple_dec_rev53_32x32", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -445,11 +445,11 @@ TEST(TestExecutables, SimpleDecRev5332x32) { // -o simple_dec_rev53_4x4.jph -precise -quiet Creversible=yes Cblk={4,4} // -full TEST(TestExecutables, SimpleDecRev534x4) { - double mse[3] = { 0, 0, 0}; - int pae[3] = { 0, 0, 0}; + double mse[3] = { 0, 0, 0 }; + int pae[3] = { 0, 0, 0 }; run_ojph_expand("simple_dec_rev53_4x4", "jph", "ppm"); run_mse_pae("simple_dec_rev53_4x4", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -458,11 +458,11 @@ TEST(TestExecutables, SimpleDecRev534x4) { // -o simple_dec_rev53_1024x4.jph -precise -quiet Creversible=yes // Cblk={1024,4} -full TEST(TestExecutables, SimpleDecRev531024x4) { - double mse[3] = { 0, 0, 0}; - int pae[3] = { 0, 0, 0}; + double mse[3] = { 0, 0, 0 }; + int pae[3] = { 0, 0, 0 }; run_ojph_expand("simple_dec_rev53_1024x4", "jph", "ppm"); run_mse_pae("simple_dec_rev53_1024x4", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -471,11 +471,11 @@ TEST(TestExecutables, SimpleDecRev531024x4) { // -o simple_dec_rev53_4x1024.jph -precise -quiet Creversible=yes // Cblk={4,1024} -full TEST(TestExecutables, SimpleDecRev534x1024) { - double mse[3] = { 0, 0, 0}; - int pae[3] = { 0, 0, 0}; + double mse[3] = { 0, 0, 0 }; + int pae[3] = { 0, 0, 0 }; run_ojph_expand("simple_dec_rev53_4x1024", "jph", "ppm"); run_mse_pae("simple_dec_rev53_4x1024", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -486,11 +486,11 @@ TEST(TestExecutables, SimpleDecRev534x1024) { // Sdims={288,352},{144,176},{144,176} Ssampling={1,1},{2,2},{2,2} // Nprecision={8} Nsigned={no} -full TEST(TestExecutables, SimpleDecIrv9764x64Yuv) { - double mse[3] = { 20.2778, 6.27912, 4.15937}; - int pae[3] = { 52, 22, 31}; + double mse[3] = { 20.2778, 6.27912, 4.15937 }; + int pae[3] = { 52, 22, 31 }; run_ojph_expand("simple_dec_irv97_64x64_yuv", "jph", "yuv"); run_mse_pae("simple_dec_irv97_64x64_yuv", "yuv", "foreman_420.yuv", - ":352x288x8x420", 3, mse, pae); + ":352x288x8x420", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -501,11 +501,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64Yuv) { // Sdims={288,352},{144,176},{144,176} Ssampling={1,1},{2,2},{2,2} // Nprecision={8} Nsigned={no} -full TEST(TestExecutables, SimpleDecRev5364x64Yuv) { - double mse[3] = { 0, 0, 0}; - int pae[3] = { 0, 0, 0}; + double mse[3] = { 0, 0, 0 }; + int pae[3] = { 0, 0, 0 }; run_ojph_expand("simple_dec_rev53_64x64_yuv", "jph", "yuv"); run_mse_pae("simple_dec_rev53_64x64_yuv", "yuv", "foreman_420.yuv", - ":352x288x8x420", 3, mse, pae); + ":352x288x8x420", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -516,11 +516,11 @@ TEST(TestExecutables, SimpleDecRev5364x64Yuv) { // Sdims={288,352},{144,176},{144,176} Ssampling={1,1},{2,2},{2,2} // Nprecision={8} Nsigned={no} Stiles={33,257} -full TEST(TestExecutables, SimpleDecIrv9764x64TilesYuv) { - double mse[3] = { 34.4972, 10.1112, 7.96331}; - int pae[3] = { 67, 30, 39}; + double mse[3] = { 34.4972, 10.1112, 7.96331 }; + int pae[3] = { 67, 30, 39 }; run_ojph_expand("simple_dec_irv97_64x64_tiles_yuv", "jph", "yuv"); run_mse_pae("simple_dec_irv97_64x64_tiles_yuv", "yuv", "foreman_420.yuv", - ":352x288x8x420", 3, mse, pae); + ":352x288x8x420", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -531,11 +531,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64TilesYuv) { // Sdims={288,352},{144,176},{144,176} Ssampling={1,1},{2,2},{2,2} // Nprecision={8} Nsigned={no} Stiles={33,257} -full TEST(TestExecutables, SimpleDecRev5364x64TilesYuv) { - double mse[3] = { 0, 0, 0}; - int pae[3] = { 0, 0, 0}; + double mse[3] = { 0, 0, 0 }; + int pae[3] = { 0, 0, 0 }; run_ojph_expand("simple_dec_rev53_64x64_tiles_yuv", "jph", "yuv"); run_mse_pae("simple_dec_rev53_64x64_tiles_yuv", "yuv", "foreman_420.yuv", - ":352x288x8x420", 3, mse, pae); + ":352x288x8x420", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -545,11 +545,11 @@ TEST(TestExecutables, SimpleDecRev5364x64TilesYuv) { // Clevels=5 Corder=LRCP Cprecincts={2,256} Sorigin={374,1717} // Stile_origin={374,1717} -full TEST(TestExecutables, SimpleDecIrv9764x64TilesLRCP) { - double mse[3] = { 71.8149, 68.7115, 89.4001}; - int pae[3] = { 78, 78, 83}; + double mse[3] = { 71.8149, 68.7115, 89.4001 }; + int pae[3] = { 78, 78, 83 }; run_ojph_expand("simple_dec_irv97_64x64_tiles_LRCP", "jph", "ppm"); run_mse_pae("simple_dec_irv97_64x64_tiles_LRCP", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -559,11 +559,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64TilesLRCP) { // Clevels=5 Corder=RLCP Cprecincts={2,256} Sorigin={374,1717} // Stile_origin={374,1717} -full TEST(TestExecutables, SimpleDecIrv9764x64TilesRLCP) { - double mse[3] = { 71.8149, 68.7115, 89.4001}; - int pae[3] = { 78, 78, 83}; + double mse[3] = { 71.8149, 68.7115, 89.4001 }; + int pae[3] = { 78, 78, 83 }; run_ojph_expand("simple_dec_irv97_64x64_tiles_RLCP", "jph", "ppm"); run_mse_pae("simple_dec_irv97_64x64_tiles_RLCP", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -573,11 +573,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64TilesRLCP) { // Clevels=5 Corder=RPCL Cprecincts={2,256} Sorigin={374,1717} // Stile_origin={374,1717} -full TEST(TestExecutables, SimpleDecIrv9764x64TilesRPCL) { - double mse[3] = { 71.8149, 68.7115, 89.4001}; - int pae[3] = { 78, 78, 83}; + double mse[3] = { 71.8149, 68.7115, 89.4001 }; + int pae[3] = { 78, 78, 83 }; run_ojph_expand("simple_dec_irv97_64x64_tiles_RPCL", "jph", "ppm"); run_mse_pae("simple_dec_irv97_64x64_tiles_RPCL", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -587,11 +587,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64TilesRPCL) { // Clevels=5 Corder=PCRL Cprecincts={2,256} Sorigin={374,1717} // Stile_origin={374,1717} -full TEST(TestExecutables, SimpleDecIrv9764x64TilesPCRL) { - double mse[3] = { 71.8149, 68.7115, 89.4001}; - int pae[3] = { 78, 78, 83}; + double mse[3] = { 71.8149, 68.7115, 89.4001 }; + int pae[3] = { 78, 78, 83 }; run_ojph_expand("simple_dec_irv97_64x64_tiles_PCRL", "jph", "ppm"); run_mse_pae("simple_dec_irv97_64x64_tiles_PCRL", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -601,11 +601,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64TilesPCRL) { // Clevels=5 Corder=CPRL Cprecincts={2,256} Sorigin={374,1717} // Stile_origin={374,1717} -full TEST(TestExecutables, SimpleDecIrv9764x64TilesCPRL) { - double mse[3] = { 71.8149, 68.7115, 89.4001}; - int pae[3] = { 78, 78, 83}; + double mse[3] = { 71.8149, 68.7115, 89.4001 }; + int pae[3] = { 78, 78, 83 }; run_ojph_expand("simple_dec_irv97_64x64_tiles_CPRL", "jph", "ppm"); run_mse_pae("simple_dec_irv97_64x64_tiles_CPRL", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -615,11 +615,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64TilesCPRL) { // Clevels=5 Corder=LRCP Sorigin={5,33} Stile_origin={5,10} Stiles={33,257} // -full TEST(TestExecutables, SimpleDecIrv9764x64TilesLRCP33) { - double mse[3] = { 56.2139, 51.4121, 69.0107}; - int pae[3] = { 80, 81, 98}; + double mse[3] = { 56.2139, 51.4121, 69.0107 }; + int pae[3] = { 80, 81, 98 }; run_ojph_expand("simple_dec_irv97_64x64_tiles_LRCP33", "jph", "ppm"); run_mse_pae("simple_dec_irv97_64x64_tiles_LRCP33", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -629,11 +629,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64TilesLRCP33) { // Clevels=5 Corder=RLCP Sorigin={5,33} Stile_origin={5,10} Stiles={33,257} // -full TEST(TestExecutables, SimpleDecIrv9764x64TilesRLCP33) { - double mse[3] = { 56.2139, 51.4121, 69.0107}; - int pae[3] = { 80, 81, 98}; + double mse[3] = { 56.2139, 51.4121, 69.0107 }; + int pae[3] = { 80, 81, 98 }; run_ojph_expand("simple_dec_irv97_64x64_tiles_RLCP33", "jph", "ppm"); run_mse_pae("simple_dec_irv97_64x64_tiles_RLCP33", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -643,11 +643,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64TilesRLCP33) { // Clevels=5 Corder=RPCL Sorigin={5,33} Stile_origin={5,10} Stiles={33,257} // -full TEST(TestExecutables, SimpleDecIrv9764x64TilesRPCL33) { - double mse[3] = { 56.2139, 51.4121, 69.0107}; - int pae[3] = { 80, 81, 98}; + double mse[3] = { 56.2139, 51.4121, 69.0107 }; + int pae[3] = { 80, 81, 98 }; run_ojph_expand("simple_dec_irv97_64x64_tiles_RPCL33", "jph", "ppm"); run_mse_pae("simple_dec_irv97_64x64_tiles_RPCL33", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -657,11 +657,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64TilesRPCL33) { // Clevels=5 Corder=PCRL Sorigin={5,33} Stile_origin={5,10} Stiles={33,257} // -full TEST(TestExecutables, SimpleDecIrv9764x64TilesPCRL33) { - double mse[3] = { 56.2139, 51.4121, 69.0107}; - int pae[3] = { 80, 81, 98}; + double mse[3] = { 56.2139, 51.4121, 69.0107 }; + int pae[3] = { 80, 81, 98 }; run_ojph_expand("simple_dec_irv97_64x64_tiles_PCRL33", "jph", "ppm"); run_mse_pae("simple_dec_irv97_64x64_tiles_PCRL33", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -671,11 +671,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64TilesPCRL33) { // Clevels=5 Corder=CPRL Sorigin={5,33} Stile_origin={5,10} Stiles={33,257} // -full TEST(TestExecutables, SimpleDecIrv9764x64TilesCPRL33) { - double mse[3] = { 56.2139, 51.4121, 69.0107}; - int pae[3] = { 80, 81, 98}; + double mse[3] = { 56.2139, 51.4121, 69.0107 }; + int pae[3] = { 80, 81, 98 }; run_ojph_expand("simple_dec_irv97_64x64_tiles_CPRL33", "jph", "ppm"); run_mse_pae("simple_dec_irv97_64x64_tiles_CPRL33", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -685,11 +685,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64TilesCPRL33) { // Clevels=5 Corder=LRCP Sorigin={5,33} Stile_origin={5,10} Stiles={33,33} // -full TEST(TestExecutables, SimpleDecIrv9764x64TilesLRCP33x33) { - double mse[3] = { 210.283, 210.214, 257.276}; - int pae[3] = { 165, 161, 166}; + double mse[3] = { 210.283, 210.214, 257.276 }; + int pae[3] = { 165, 161, 166 }; run_ojph_expand("simple_dec_irv97_64x64_tiles_LRCP33x33", "jph", "ppm"); run_mse_pae("simple_dec_irv97_64x64_tiles_LRCP33x33", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -699,11 +699,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64TilesLRCP33x33) { // Clevels=5 Corder=RLCP Sorigin={5,33} Stile_origin={5,10} Stiles={33,33} // -full TEST(TestExecutables, SimpleDecIrv9764x64TilesRLCP33x33) { - double mse[3] = { 210.283, 210.214, 257.276}; - int pae[3] = { 165, 161, 166}; + double mse[3] = { 210.283, 210.214, 257.276 }; + int pae[3] = { 165, 161, 166 }; run_ojph_expand("simple_dec_irv97_64x64_tiles_RLCP33x33", "jph", "ppm"); run_mse_pae("simple_dec_irv97_64x64_tiles_RLCP33x33", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -713,11 +713,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64TilesRLCP33x33) { // Clevels=5 Corder=RPCL Sorigin={5,33} Stile_origin={5,10} Stiles={33,33} // -full TEST(TestExecutables, SimpleDecIrv9764x64TilesRPCL33x33) { - double mse[3] = { 210.283, 210.214, 257.276}; - int pae[3] = { 165, 161, 166}; + double mse[3] = { 210.283, 210.214, 257.276 }; + int pae[3] = { 165, 161, 166 }; run_ojph_expand("simple_dec_irv97_64x64_tiles_RPCL33x33", "jph", "ppm"); run_mse_pae("simple_dec_irv97_64x64_tiles_RPCL33x33", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -727,11 +727,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64TilesRPCL33x33) { // Clevels=5 Corder=PCRL Sorigin={5,33} Stile_origin={5,10} Stiles={33,33} // -full TEST(TestExecutables, SimpleDecIrv9764x64TilesPCRL33x33) { - double mse[3] = { 210.283, 210.214, 257.276}; - int pae[3] = { 165, 161, 166}; + double mse[3] = { 210.283, 210.214, 257.276 }; + int pae[3] = { 165, 161, 166 }; run_ojph_expand("simple_dec_irv97_64x64_tiles_PCRL33x33", "jph", "ppm"); run_mse_pae("simple_dec_irv97_64x64_tiles_PCRL33x33", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -741,11 +741,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64TilesPCRL33x33) { // Clevels=5 Corder=CPRL Sorigin={5,33} Stile_origin={5,10} Stiles={33,33} // -full TEST(TestExecutables, SimpleDecIrv9764x64TilesCPRL33x33) { - double mse[3] = { 210.283, 210.214, 257.276}; - int pae[3] = { 165, 161, 166}; + double mse[3] = { 210.283, 210.214, 257.276 }; + int pae[3] = { 165, 161, 166 }; run_ojph_expand("simple_dec_irv97_64x64_tiles_CPRL33x33", "jph", "ppm"); run_mse_pae("simple_dec_irv97_64x64_tiles_CPRL33x33", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -754,11 +754,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64TilesCPRL33x33) { // -o simple_dec_rev53_64x64_gray_tiles.jph -precise -quiet Creversible=yes // Clevels=5 Stiles={33,257} -full TEST(TestExecutables, SimpleDecRev5364x64GrayTiles) { - double mse[1] = { 0}; - int pae[1] = { 0}; + double mse[1] = { 0 }; + int pae[1] = { 0 }; run_ojph_expand("simple_dec_rev53_64x64_gray_tiles", "jph", "pgm"); run_mse_pae("simple_dec_rev53_64x64_gray_tiles", "pgm", "monarch.pgm", - "", 1, mse, pae); + "", 1, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -767,11 +767,11 @@ TEST(TestExecutables, SimpleDecRev5364x64GrayTiles) { // -o simple_dec_irv97_64x64_gray_tiles.jph -precise -quiet -rate 0.5 // Clevels=5 Stiles={33,257} -full TEST(TestExecutables, SimpleDecIrv9764x64GrayTiles) { - double mse[1] = { 18.9601}; - int pae[1] = { 56}; + double mse[1] = { 18.9601 }; + int pae[1] = { 56 }; run_ojph_expand("simple_dec_irv97_64x64_gray_tiles", "jph", "pgm"); run_mse_pae("simple_dec_irv97_64x64_gray_tiles", "pgm", "monarch.pgm", - "", 1, mse, pae); + "", 1, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -779,11 +779,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64GrayTiles) { // Command-line options used to obtain this file is: // -o simple_dec_irv97_64x64_16bit.jph -precise -quiet -rate 0.5 -full TEST(TestExecutables, SimpleDecIrv9764x6416bit) { - double mse[3] = { 60507.2, 36672.5, 64809.8}; - int pae[3] = { 2547, 1974, 1922}; + double mse[3] = { 60507.2, 36672.5, 64809.8 }; + int pae[3] = { 2547, 1974, 1922 }; run_ojph_expand("simple_dec_irv97_64x64_16bit", "jph", "ppm"); run_mse_pae("simple_dec_irv97_64x64_16bit", "ppm", "mm.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -791,11 +791,11 @@ TEST(TestExecutables, SimpleDecIrv9764x6416bit) { // Command-line options used to obtain this file is: // -o simple_dec_irv97_64x64_16bit_gray.jph -precise -quiet -rate 0.5 -full TEST(TestExecutables, SimpleDecIrv9764x6416bitGray) { - double mse[1] = { 19382.9}; - int pae[1] = { 1618}; + double mse[1] = { 19382.9 }; + int pae[1] = { 1618 }; run_ojph_expand("simple_dec_irv97_64x64_16bit_gray", "jph", "pgm"); run_mse_pae("simple_dec_irv97_64x64_16bit_gray", "pgm", "mm.pgm", - "", 1, mse, pae); + "", 1, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -803,11 +803,11 @@ TEST(TestExecutables, SimpleDecIrv9764x6416bitGray) { // Command-line options used to obtain this file is: // -o simple_dec_rev53_64x64_16bit.jph -precise -quiet Creversible=yes -full TEST(TestExecutables, SimpleDecRev5364x6416bit) { - double mse[3] = { 0, 0, 0}; - int pae[3] = { 0, 0, 0}; + double mse[3] = { 0, 0, 0 }; + int pae[3] = { 0, 0, 0 }; run_ojph_expand("simple_dec_rev53_64x64_16bit", "jph", "ppm"); run_mse_pae("simple_dec_rev53_64x64_16bit", "ppm", "mm.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -816,11 +816,11 @@ TEST(TestExecutables, SimpleDecRev5364x6416bit) { // -o simple_dec_rev53_64x64_16bit_gray.jph -precise -quiet Creversible=yes // -full TEST(TestExecutables, SimpleDecRev5364x6416bitGray) { - double mse[1] = { 0}; - int pae[1] = { 0}; + double mse[1] = { 0 }; + int pae[1] = { 0 }; run_ojph_expand("simple_dec_rev53_64x64_16bit_gray", "jph", "pgm"); run_mse_pae("simple_dec_rev53_64x64_16bit_gray", "pgm", "mm.pgm", - "", 1, mse, pae); + "", 1, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -829,14 +829,14 @@ TEST(TestExecutables, SimpleDecRev5364x6416bitGray) { // The compressed file is obtained using these command-line options: // -o simple_enc_irv97_64x64.j2c -qstep 0.1 TEST(TestExecutables, SimpleEncIrv9764x64) { - double mse[3] = { 46.2004, 43.622, 56.7452}; - int pae[3] = { 48, 46, 52}; + double mse[3] = { 46.2004, 43.622, 56.7452 }; + int pae[3] = { 48, 46, 52 }; run_ojph_compress("Malamute.ppm", - "simple_enc_irv97_64x64", "", "j2c", - "-qstep 0.1"); + "simple_enc_irv97_64x64", "", "j2c", + "-qstep 0.1"); run_ojph_compress_expand("simple_enc_irv97_64x64", "j2c", "ppm"); run_mse_pae("simple_enc_irv97_64x64", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -845,14 +845,14 @@ TEST(TestExecutables, SimpleEncIrv9764x64) { // The compressed file is obtained using these command-line options: // -o simple_enc_irv97_32x32.j2c -qstep 0.01 -block_size {32,32} TEST(TestExecutables, SimpleEncIrv9732x32) { - double mse[3] = { 1.78779, 1.26001, 2.38395}; - int pae[3] = { 7, 6, 9}; + double mse[3] = { 1.78779, 1.26001, 2.38395 }; + int pae[3] = { 7, 6, 9 }; run_ojph_compress("Malamute.ppm", - "simple_enc_irv97_32x32", "", "j2c", - "-qstep 0.01 -block_size \"{32,32}\""); + "simple_enc_irv97_32x32", "", "j2c", + "-qstep 0.01 -block_size \"{32,32}\""); run_ojph_compress_expand("simple_enc_irv97_32x32", "j2c", "ppm"); run_mse_pae("simple_enc_irv97_32x32", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -861,14 +861,14 @@ TEST(TestExecutables, SimpleEncIrv9732x32) { // The compressed file is obtained using these command-line options: // -o simple_enc_irv97_16x16.j2c -qstep 0.01 -block_size {16,16} TEST(TestExecutables, SimpleEncIrv9716x16) { - double mse[3] = { 1.78779, 1.26001, 2.38395}; - int pae[3] = { 7, 6, 9}; + double mse[3] = { 1.78779, 1.26001, 2.38395 }; + int pae[3] = { 7, 6, 9 }; run_ojph_compress("Malamute.ppm", - "simple_enc_irv97_16x16", "", "j2c", - "-qstep 0.01 -block_size \"{16,16}\""); + "simple_enc_irv97_16x16", "", "j2c", + "-qstep 0.01 -block_size \"{16,16}\""); run_ojph_compress_expand("simple_enc_irv97_16x16", "j2c", "ppm"); run_mse_pae("simple_enc_irv97_16x16", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -877,14 +877,14 @@ TEST(TestExecutables, SimpleEncIrv9716x16) { // The compressed file is obtained using these command-line options: // -o simple_enc_irv97_4x4.j2c -qstep 0.01 -block_size {4,4} TEST(TestExecutables, SimpleEncIrv974x4) { - double mse[3] = { 1.78779, 1.26001, 2.38395}; - int pae[3] = { 7, 6, 9}; + double mse[3] = { 1.78779, 1.26001, 2.38395 }; + int pae[3] = { 7, 6, 9 }; run_ojph_compress("Malamute.ppm", - "simple_enc_irv97_4x4", "", "j2c", - "-qstep 0.01 -block_size \"{4,4}\""); + "simple_enc_irv97_4x4", "", "j2c", + "-qstep 0.01 -block_size \"{4,4}\""); run_ojph_compress_expand("simple_enc_irv97_4x4", "j2c", "ppm"); run_mse_pae("simple_enc_irv97_4x4", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -893,14 +893,14 @@ TEST(TestExecutables, SimpleEncIrv974x4) { // The compressed file is obtained using these command-line options: // -o simple_enc_irv97_1024x4.j2c -qstep 0.01 -block_size {4,1024} TEST(TestExecutables, SimpleEncIrv971024x4) { - double mse[3] = { 1.78779, 1.26001, 2.38395}; - int pae[3] = { 7, 6, 9}; + double mse[3] = { 1.78779, 1.26001, 2.38395 }; + int pae[3] = { 7, 6, 9 }; run_ojph_compress("Malamute.ppm", - "simple_enc_irv97_1024x4", "", "j2c", - "-qstep 0.01 -block_size \"{4,1024}\""); + "simple_enc_irv97_1024x4", "", "j2c", + "-qstep 0.01 -block_size \"{4,1024}\""); run_ojph_compress_expand("simple_enc_irv97_1024x4", "j2c", "ppm"); run_mse_pae("simple_enc_irv97_1024x4", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -909,14 +909,14 @@ TEST(TestExecutables, SimpleEncIrv971024x4) { // The compressed file is obtained using these command-line options: // -o simple_enc_irv97_4x1024.j2c -qstep 0.01 -block_size {1024,4} TEST(TestExecutables, SimpleEncIrv974x1024) { - double mse[3] = { 1.78779, 1.26001, 2.38395}; - int pae[3] = { 7, 6, 9}; + double mse[3] = { 1.78779, 1.26001, 2.38395 }; + int pae[3] = { 7, 6, 9 }; run_ojph_compress("Malamute.ppm", - "simple_enc_irv97_4x1024", "", "j2c", - "-qstep 0.01 -block_size \"{1024,4}\""); + "simple_enc_irv97_4x1024", "", "j2c", + "-qstep 0.01 -block_size \"{1024,4}\""); run_ojph_compress_expand("simple_enc_irv97_4x1024", "j2c", "ppm"); run_mse_pae("simple_enc_irv97_4x1024", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -925,14 +925,14 @@ TEST(TestExecutables, SimpleEncIrv974x1024) { // The compressed file is obtained using these command-line options: // -o simple_enc_irv97_512x8.j2c -qstep 0.01 -block_size {8,512} TEST(TestExecutables, SimpleEncIrv97512x8) { - double mse[3] = { 1.78779, 1.26001, 2.38395}; - int pae[3] = { 7, 6, 9}; + double mse[3] = { 1.78779, 1.26001, 2.38395 }; + int pae[3] = { 7, 6, 9 }; run_ojph_compress("Malamute.ppm", - "simple_enc_irv97_512x8", "", "j2c", - "-qstep 0.01 -block_size \"{8,512}\""); + "simple_enc_irv97_512x8", "", "j2c", + "-qstep 0.01 -block_size \"{8,512}\""); run_ojph_compress_expand("simple_enc_irv97_512x8", "j2c", "ppm"); run_mse_pae("simple_enc_irv97_512x8", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -941,14 +941,14 @@ TEST(TestExecutables, SimpleEncIrv97512x8) { // The compressed file is obtained using these command-line options: // -o simple_enc_irv97_8x512.j2c -qstep 0.01 -block_size {512,8} TEST(TestExecutables, SimpleEncIrv978x512) { - double mse[3] = { 1.78779, 1.26001, 2.38395}; - int pae[3] = { 7, 6, 9}; + double mse[3] = { 1.78779, 1.26001, 2.38395 }; + int pae[3] = { 7, 6, 9 }; run_ojph_compress("Malamute.ppm", - "simple_enc_irv97_8x512", "", "j2c", - "-qstep 0.01 -block_size \"{512,8}\""); + "simple_enc_irv97_8x512", "", "j2c", + "-qstep 0.01 -block_size \"{512,8}\""); run_ojph_compress_expand("simple_enc_irv97_8x512", "j2c", "ppm"); run_mse_pae("simple_enc_irv97_8x512", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -957,14 +957,14 @@ TEST(TestExecutables, SimpleEncIrv978x512) { // The compressed file is obtained using these command-line options: // -o simple_enc_irv97_256x16.j2c -qstep 0.01 -block_size {16,256} TEST(TestExecutables, SimpleEncIrv97256x16) { - double mse[3] = { 1.78779, 1.26001, 2.38395}; - int pae[3] = { 7, 6, 9}; + double mse[3] = { 1.78779, 1.26001, 2.38395 }; + int pae[3] = { 7, 6, 9 }; run_ojph_compress("Malamute.ppm", - "simple_enc_irv97_256x16", "", "j2c", - "-qstep 0.01 -block_size \"{16,256}\""); + "simple_enc_irv97_256x16", "", "j2c", + "-qstep 0.01 -block_size \"{16,256}\""); run_ojph_compress_expand("simple_enc_irv97_256x16", "j2c", "ppm"); run_mse_pae("simple_enc_irv97_256x16", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -973,14 +973,14 @@ TEST(TestExecutables, SimpleEncIrv97256x16) { // The compressed file is obtained using these command-line options: // -o simple_enc_irv97_16x256.j2c -qstep 0.01 -block_size {256,16} TEST(TestExecutables, SimpleEncIrv9716x256) { - double mse[3] = { 1.78779, 1.26001, 2.38395}; - int pae[3] = { 7, 6, 9}; + double mse[3] = { 1.78779, 1.26001, 2.38395 }; + int pae[3] = { 7, 6, 9 }; run_ojph_compress("Malamute.ppm", - "simple_enc_irv97_16x256", "", "j2c", - "-qstep 0.01 -block_size \"{256,16}\""); + "simple_enc_irv97_16x256", "", "j2c", + "-qstep 0.01 -block_size \"{256,16}\""); run_ojph_compress_expand("simple_enc_irv97_16x256", "j2c", "ppm"); run_mse_pae("simple_enc_irv97_16x256", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -989,14 +989,14 @@ TEST(TestExecutables, SimpleEncIrv9716x256) { // The compressed file is obtained using these command-line options: // -o simple_enc_irv97_128x32.j2c -qstep 0.01 -block_size {32,128} TEST(TestExecutables, SimpleEncIrv97128x32) { - double mse[3] = { 1.78779, 1.26001, 2.38395}; - int pae[3] = { 7, 6, 9}; + double mse[3] = { 1.78779, 1.26001, 2.38395 }; + int pae[3] = { 7, 6, 9 }; run_ojph_compress("Malamute.ppm", - "simple_enc_irv97_128x32", "", "j2c", - "-qstep 0.01 -block_size \"{32,128}\""); + "simple_enc_irv97_128x32", "", "j2c", + "-qstep 0.01 -block_size \"{32,128}\""); run_ojph_compress_expand("simple_enc_irv97_128x32", "j2c", "ppm"); run_mse_pae("simple_enc_irv97_128x32", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1005,14 +1005,14 @@ TEST(TestExecutables, SimpleEncIrv97128x32) { // The compressed file is obtained using these command-line options: // -o simple_enc_irv97_32x128.j2c -qstep 0.01 -block_size {128,32} TEST(TestExecutables, SimpleEncIrv9732x128) { - double mse[3] = { 1.78779, 1.26001, 2.38395}; - int pae[3] = { 7, 6, 9}; + double mse[3] = { 1.78779, 1.26001, 2.38395 }; + int pae[3] = { 7, 6, 9 }; run_ojph_compress("Malamute.ppm", - "simple_enc_irv97_32x128", "", "j2c", - "-qstep 0.01 -block_size \"{128,32}\""); + "simple_enc_irv97_32x128", "", "j2c", + "-qstep 0.01 -block_size \"{128,32}\""); run_ojph_compress_expand("simple_enc_irv97_32x128", "j2c", "ppm"); run_mse_pae("simple_enc_irv97_32x128", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1022,14 +1022,14 @@ TEST(TestExecutables, SimpleEncIrv9732x128) { // -o simple_enc_irv97_64x64_tiles_33x33_d5.j2c -qstep 0.01 -tile_size {33,33} // -num_decomps 5 TEST(TestExecutables, SimpleEncIrv9764x64Tiles33x33D5) { - double mse[3] = { 46.2004, 43.622, 56.7452}; - int pae[3] = { 48, 46, 52}; + double mse[3] = { 1.88906, 1.30757, 2.5347 }; + int pae[3] = { 9, 6, 10 }; run_ojph_compress("Malamute.ppm", - "simple_enc_irv97_64x64_tiles_33x33_d5", "", "j2c", - "-qstep 0.01 -tile_size \"{33,33}\" -num_decomps 5"); + "simple_enc_irv97_64x64_tiles_33x33_d5", "", "j2c", + "-qstep 0.01 -tile_size \"{33,33}\" -num_decomps 5"); run_ojph_compress_expand("simple_enc_irv97_64x64_tiles_33x33_d5", "j2c", "ppm"); run_mse_pae("simple_enc_irv97_64x64_tiles_33x33_d5", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1039,14 +1039,14 @@ TEST(TestExecutables, SimpleEncIrv9764x64Tiles33x33D5) { // -o simple_enc_irv97_64x64_tiles_33x33_d6.j2c -qstep 0.01 -tile_size {33,33} // -num_decomps 6 TEST(TestExecutables, SimpleEncIrv9764x64Tiles33x33D6) { - double mse[3] = { 46.2004, 43.622, 56.7452}; - int pae[3] = { 48, 46, 52}; + double mse[3] = { 1.88751, 1.30673, 2.53378 }; + int pae[3] = { 8, 6, 10 }; run_ojph_compress("Malamute.ppm", - "simple_enc_irv97_64x64_tiles_33x33_d6", "", "j2c", - "-qstep 0.01 -tile_size \"{33,33}\" -num_decomps 6"); + "simple_enc_irv97_64x64_tiles_33x33_d6", "", "j2c", + "-qstep 0.01 -tile_size \"{33,33}\" -num_decomps 6"); run_ojph_compress_expand("simple_enc_irv97_64x64_tiles_33x33_d6", "j2c", "ppm"); run_mse_pae("simple_enc_irv97_64x64_tiles_33x33_d6", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1055,14 +1055,14 @@ TEST(TestExecutables, SimpleEncIrv9764x64Tiles33x33D6) { // The compressed file is obtained using these command-line options: // -o simple_enc_irv97_64x64_16bit.j2c -qstep 0.01 TEST(TestExecutables, SimpleEncIrv9764x6416bit) { - double mse[3] = { 51727.3, 32596.4, 45897.8}; - int pae[3] = { 1512, 1481, 1778}; + double mse[3] = { 51727.3, 32596.4, 45897.8 }; + int pae[3] = { 1512, 1481, 1778 }; run_ojph_compress("mm.ppm", - "simple_enc_irv97_64x64_16bit", "", "j2c", - "-qstep 0.01"); + "simple_enc_irv97_64x64_16bit", "", "j2c", + "-qstep 0.01"); run_ojph_compress_expand("simple_enc_irv97_64x64_16bit", "j2c", "ppm"); run_mse_pae("simple_enc_irv97_64x64_16bit", "ppm", - "mm.ppm", "", 3, mse, pae); + "mm.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1071,14 +1071,14 @@ TEST(TestExecutables, SimpleEncIrv9764x6416bit) { // The compressed file is obtained using these command-line options: // -o simple_enc_irv97_64x64_16bit_gray.j2c -qstep 0.01 TEST(TestExecutables, SimpleEncIrv9764x6416bitGray) { - double mse[1] = { 25150.6}; - int pae[1] = { 1081}; + double mse[1] = { 25150.6 }; + int pae[1] = { 1081 }; run_ojph_compress("mm.pgm", - "simple_enc_irv97_64x64_16bit_gray", "", "j2c", - "-qstep 0.01"); + "simple_enc_irv97_64x64_16bit_gray", "", "j2c", + "-qstep 0.01"); run_ojph_compress_expand("simple_enc_irv97_64x64_16bit_gray", "j2c", "pgm"); run_mse_pae("simple_enc_irv97_64x64_16bit_gray", "pgm", - "mm.pgm", "", 1, mse, pae); + "mm.pgm", "", 1, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1087,14 +1087,14 @@ TEST(TestExecutables, SimpleEncIrv9764x6416bitGray) { // The compressed file is obtained using these command-line options: // -o simple_enc_rev53_64x64_16bit.j2c -reversible true TEST(TestExecutables, SimpleEncRev5364x6416bit) { - double mse[3] = { 0, 0, 0}; - int pae[3] = { 0, 0, 0}; + double mse[3] = { 0, 0, 0 }; + int pae[3] = { 0, 0, 0 }; run_ojph_compress("mm.ppm", - "simple_enc_rev53_64x64_16bit", "", "j2c", - "-reversible true"); + "simple_enc_rev53_64x64_16bit", "", "j2c", + "-reversible true"); run_ojph_compress_expand("simple_enc_rev53_64x64_16bit", "j2c", "ppm"); run_mse_pae("simple_enc_rev53_64x64_16bit", "ppm", - "mm.ppm", "", 3, mse, pae); + "mm.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1103,14 +1103,14 @@ TEST(TestExecutables, SimpleEncRev5364x6416bit) { // The compressed file is obtained using these command-line options: // -o simple_enc_rev53_64x64_16bit_gray.j2c -reversible true TEST(TestExecutables, SimpleEncRev5364x6416bitGray) { - double mse[1] = { 0}; - int pae[1] = { 0}; + double mse[1] = { 0 }; + int pae[1] = { 0 }; run_ojph_compress("mm.pgm", - "simple_enc_rev53_64x64_16bit_gray", "", "j2c", - "-reversible true"); + "simple_enc_rev53_64x64_16bit_gray", "", "j2c", + "-reversible true"); run_ojph_compress_expand("simple_enc_rev53_64x64_16bit_gray", "j2c", "pgm"); run_mse_pae("simple_enc_rev53_64x64_16bit_gray", "pgm", - "mm.pgm", "", 1, mse, pae); + "mm.pgm", "", 1, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1119,14 +1119,14 @@ TEST(TestExecutables, SimpleEncRev5364x6416bitGray) { // The compressed file is obtained using these command-line options: // -o simple_enc_rev53_64x64_16bit.j2c -reversible true TEST(TestExecutables, SimpleEncRev5364x64) { - double mse[3] = { 0, 0, 0}; - int pae[3] = { 0, 0, 0}; + double mse[3] = { 0, 0, 0 }; + int pae[3] = { 0, 0, 0 }; run_ojph_compress("Malamute.ppm", - "simple_enc_rev53_64x64", "", "j2c", - "-reversible true"); + "simple_enc_rev53_64x64", "", "j2c", + "-reversible true"); run_ojph_compress_expand("simple_enc_rev53_64x64", "j2c", "ppm"); run_mse_pae("simple_enc_rev53_64x64", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1135,14 +1135,14 @@ TEST(TestExecutables, SimpleEncRev5364x64) { // The compressed file is obtained using these command-line options: // -o simple_enc_rev53_32x32.j2c -reversible true -block_size {32,32} TEST(TestExecutables, SimpleEncRev5332x32) { - double mse[3] = { 0, 0, 0}; - int pae[3] = { 0, 0, 0}; + double mse[3] = { 0, 0, 0 }; + int pae[3] = { 0, 0, 0 }; run_ojph_compress("Malamute.ppm", - "simple_enc_rev53_32x32", "", "j2c", - "-reversible true -block_size \"{32,32}\""); + "simple_enc_rev53_32x32", "", "j2c", + "-reversible true -block_size \"{32,32}\""); run_ojph_compress_expand("simple_enc_rev53_32x32", "j2c", "ppm"); run_mse_pae("simple_enc_rev53_32x32", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1151,14 +1151,14 @@ TEST(TestExecutables, SimpleEncRev5332x32) { // The compressed file is obtained using these command-line options: // -o simple_enc_rev53_4x4.j2c -reversible true -block_size {4,4} TEST(TestExecutables, SimpleEncRev534x4) { - double mse[3] = { 0, 0, 0}; - int pae[3] = { 0, 0, 0}; + double mse[3] = { 0, 0, 0 }; + int pae[3] = { 0, 0, 0 }; run_ojph_compress("Malamute.ppm", - "simple_enc_rev53_4x4", "", "j2c", - "-reversible true -block_size \"{4,4}\""); + "simple_enc_rev53_4x4", "", "j2c", + "-reversible true -block_size \"{4,4}\""); run_ojph_compress_expand("simple_enc_rev53_4x4", "j2c", "ppm"); run_mse_pae("simple_enc_rev53_4x4", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1167,14 +1167,14 @@ TEST(TestExecutables, SimpleEncRev534x4) { // The compressed file is obtained using these command-line options: // -o simple_enc_rev53_1024x4.j2c -reversible true -block_size {4,1024} TEST(TestExecutables, SimpleEncRev531024x4) { - double mse[3] = { 0, 0, 0}; - int pae[3] = { 0, 0, 0}; + double mse[3] = { 0, 0, 0 }; + int pae[3] = { 0, 0, 0 }; run_ojph_compress("Malamute.ppm", - "simple_enc_rev53_1024x4", "", "j2c", - "-reversible true -block_size \"{4,1024}\""); + "simple_enc_rev53_1024x4", "", "j2c", + "-reversible true -block_size \"{4,1024}\""); run_ojph_compress_expand("simple_enc_rev53_1024x4", "j2c", "ppm"); run_mse_pae("simple_enc_rev53_1024x4", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1183,48 +1183,48 @@ TEST(TestExecutables, SimpleEncRev531024x4) { // The compressed file is obtained using these command-line options: // -o simple_enc_rev53_4x1024.j2c -reversible true -block_size {1024,4} TEST(TestExecutables, SimpleEncRev534x1024) { - double mse[3] = { 0, 0, 0}; - int pae[3] = { 0, 0, 0}; + double mse[3] = { 0, 0, 0 }; + int pae[3] = { 0, 0, 0 }; run_ojph_compress("Malamute.ppm", - "simple_enc_rev53_4x1024", "", "j2c", - "-reversible true -block_size \"{1024,4}\""); + "simple_enc_rev53_4x1024", "", "j2c", + "-reversible true -block_size \"{1024,4}\""); run_ojph_compress_expand("simple_enc_rev53_4x1024", "j2c", "ppm"); run_mse_pae("simple_enc_rev53_4x1024", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// // Test ojph_compress with codeblocks when the rev53 wavelet is used. // We test by comparing MSE and PAE of decoded images. // The compressed file is obtained using these command-line options: -// -o simple_enc_rev53_64x64_tiles_33x33.j2c -reversible true -tile_size +// -o simple_enc_rev53_64x64_tiles_33x33_d5.j2c -reversible true -tile_size // {32,32} -num_decomps 5 TEST(TestExecutables, SimpleEncRev5364x64Tiles33x33D5) { - double mse[3] = { 0, 0, 0}; - int pae[3] = { 0, 0, 0}; + double mse[3] = { 0, 0, 0 }; + int pae[3] = { 0, 0, 0 }; run_ojph_compress("Malamute.ppm", - "simple_enc_rev53_64x64_tiles_33x33_d5", "", "j2c", - "-reversible true -tile_size \"{32,32}\" -num_decomps 5"); + "simple_enc_rev53_64x64_tiles_33x33_d5", "", "j2c", + "-reversible true -tile_size \"{32,32}\" -num_decomps 5"); run_ojph_compress_expand("simple_enc_rev53_64x64_tiles_33x33_d5", "j2c", "ppm"); run_mse_pae("simple_enc_rev53_64x64_tiles_33x33_d5", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// // Test ojph_compress with codeblocks when the rev53 wavelet is used. // We test by comparing MSE and PAE of decoded images. // The compressed file is obtained using these command-line options: -// -o simple_enc_rev53_64x64_tiles_33x33.j2c -reversible true -tile_size +// -o simple_enc_rev53_64x64_tiles_33x33_d6.j2c -reversible true -tile_size // {32,32} -num_decomps 6 TEST(TestExecutables, SimpleEncRev5364x64Tiles33x33D6) { - double mse[3] = { 0, 0, 0}; - int pae[3] = { 0, 0, 0}; + double mse[3] = { 0, 0, 0 }; + int pae[3] = { 0, 0, 0 }; run_ojph_compress("Malamute.ppm", - "simple_enc_rev53_64x64_tiles_33x33_d6", "", "j2c", - "-reversible true -tile_size \"{32,32}\" -num_decomps 6"); + "simple_enc_rev53_64x64_tiles_33x33_d6", "", "j2c", + "-reversible true -tile_size \"{32,32}\" -num_decomps 6"); run_ojph_compress_expand("simple_enc_rev53_64x64_tiles_33x33_d6", "j2c", "ppm"); run_mse_pae("simple_enc_rev53_64x64_tiles_33x33_d6", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1234,16 +1234,16 @@ TEST(TestExecutables, SimpleEncRev5364x64Tiles33x33D6) { // -o simple_enc_irv97_64x64_yuv.j2c -qstep 0.1 -dims {352,288} -num_comps 3 // -downsamp {1,1},{2,2},{2,2} -bit_depth 8,8,8 -signed false,false,false TEST(TestExecutables, SimpleEncIrv9764x64Yuv) { - double mse[3] = { 30.3548, 7.69602, 5.22246}; - int pae[3] = { 49, 27, 26}; + double mse[3] = { 30.3548, 7.69602, 5.22246 }; + int pae[3] = { 49, 27, 26 }; run_ojph_compress("foreman_420.yuv", - "simple_enc_irv97_64x64_yuv", "", "j2c", - "-qstep 0.1 -dims \"{352,288}\" -num_comps 3 -downsamp" - " \"{1,1}\",\"{2,2}\",\"{2,2}\" -bit_depth 8,8,8" - " -signed false,false,false"); + "simple_enc_irv97_64x64_yuv", "", "j2c", + "-qstep 0.1 -dims \"{352,288}\" -num_comps 3 -downsamp" + " \"{1,1}\",\"{2,2}\",\"{2,2}\" -bit_depth 8,8,8" + " -signed false,false,false"); run_ojph_compress_expand("simple_enc_irv97_64x64_yuv", "j2c", "yuv"); run_mse_pae("simple_enc_irv97_64x64_yuv", "yuv", - "foreman_420.yuv", ":352x288x8x420", 3, mse, pae); + "foreman_420.yuv", ":352x288x8x420", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1254,16 +1254,16 @@ TEST(TestExecutables, SimpleEncIrv9764x64Yuv) { // {352,288} -num_comps 3 -downsamp {1,1},{2,2},{2,2} -bit_depth 8,8,8 -signed // false,false,false TEST(TestExecutables, SimpleEncRev5364x64Yuv) { - double mse[3] = { 0, 0, 0}; - int pae[3] = { 0, 0, 0}; + double mse[3] = { 0, 0, 0 }; + int pae[3] = { 0, 0, 0 }; run_ojph_compress("foreman_420.yuv", - "simple_enc_rev53_64x64_yuv", "", "j2c", - "-reversible true -qstep 0.1 -dims \"{352,288}\"" - " -num_comps 3 -downsamp \"{1,1}\",\"{2,2}\",\"{2,2}\"" - " -bit_depth 8,8,8 -signed false,false,false"); + "simple_enc_rev53_64x64_yuv", "", "j2c", + "-reversible true -qstep 0.1 -dims \"{352,288}\"" + " -num_comps 3 -downsamp \"{1,1}\",\"{2,2}\",\"{2,2}\"" + " -bit_depth 8,8,8 -signed false,false,false"); run_ojph_compress_expand("simple_enc_rev53_64x64_yuv", "j2c", "yuv"); run_mse_pae("simple_enc_rev53_64x64_yuv", "yuv", - "foreman_420.yuv", ":352x288x8x420", 3, mse, pae); + "foreman_420.yuv", ":352x288x8x420", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1272,14 +1272,14 @@ TEST(TestExecutables, SimpleEncRev5364x64Yuv) { // The compressed file is obtained using these command-line options: // -o simple_enc_irv97_tall_narrow.j2c -qstep 0.1 TEST(TestExecutables, SimpleEncIrv97TallNarrow) { - double mse[3] = { 112.097, 79.2214, 71.1367}; - int pae[3] = { 56, 41, 32}; + double mse[3] = { 112.097, 79.2214, 71.1367 }; + int pae[3] = { 56, 41, 32 }; run_ojph_compress("tall_narrow.ppm", - "simple_enc_irv97_tall_narrow", "", "j2c", - "-qstep 0.1"); + "simple_enc_irv97_tall_narrow", "", "j2c", + "-qstep 0.1"); run_ojph_compress_expand("simple_enc_irv97_tall_narrow", "j2c", "ppm"); run_mse_pae("simple_enc_irv97_tall_narrow", "ppm", - "tall_narrow.ppm", "", 3, mse, pae); + "tall_narrow.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1288,14 +1288,14 @@ TEST(TestExecutables, SimpleEncIrv97TallNarrow) { // The compressed file is obtained using these command-line options: // -o simple_enc_irv97_tall_narrow1.j2c -image_offset {1,0} -qstep 0.1 TEST(TestExecutables, SimpleEncIrv97TallNarrow1) { - double mse[3] = { 100.906, 76.113, 72.8347}; - int pae[3] = { 39, 35, 34}; + double mse[3] = { 100.906, 76.113, 72.8347 }; + int pae[3] = { 39, 35, 34 }; run_ojph_compress("tall_narrow.ppm", - "simple_enc_irv97_tall_narrow1", "", "j2c", - "-image_offset \"{1,0}\" -qstep 0.1"); + "simple_enc_irv97_tall_narrow1", "", "j2c", + "-image_offset \"{1,0}\" -qstep 0.1"); run_ojph_compress_expand("simple_enc_irv97_tall_narrow1", "j2c", "ppm"); run_mse_pae("simple_enc_irv97_tall_narrow1", "ppm", - "tall_narrow.ppm", "", 3, mse, pae); + "tall_narrow.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1304,14 +1304,14 @@ TEST(TestExecutables, SimpleEncIrv97TallNarrow1) { // The compressed file is obtained using these command-line options: // -o simple_enc_rev53_tall_narrow.j2c -reversible true TEST(TestExecutables, SimpleEncRev53TallNarrow) { - double mse[3] = { 0, 0, 0}; - int pae[3] = { 0, 0, 0}; + double mse[3] = { 0, 0, 0 }; + int pae[3] = { 0, 0, 0 }; run_ojph_compress("tall_narrow.ppm", - "simple_enc_rev53_tall_narrow", "", "j2c", - "-reversible true"); + "simple_enc_rev53_tall_narrow", "", "j2c", + "-reversible true"); run_ojph_compress_expand("simple_enc_rev53_tall_narrow", "j2c", "ppm"); run_mse_pae("simple_enc_rev53_tall_narrow", "ppm", - "tall_narrow.ppm", "", 3, mse, pae); + "tall_narrow.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1320,14 +1320,14 @@ TEST(TestExecutables, SimpleEncRev53TallNarrow) { // The compressed file is obtained using these command-line options: // -o simple_enc_rev53_tall_narrow1.j2c -image_offset {1,0} -reversible true TEST(TestExecutables, SimpleEncRev53TallNarrow1) { - double mse[3] = { 0, 0, 0}; - int pae[3] = { 0, 0, 0}; + double mse[3] = { 0, 0, 0 }; + int pae[3] = { 0, 0, 0 }; run_ojph_compress("tall_narrow.ppm", - "simple_enc_rev53_tall_narrow1", "", "j2c", - "-image_offset \"{1,0}\" -reversible true"); + "simple_enc_rev53_tall_narrow1", "", "j2c", + "-image_offset \"{1,0}\" -reversible true"); run_ojph_compress_expand("simple_enc_rev53_tall_narrow1", "j2c", "ppm"); run_mse_pae("simple_enc_rev53_tall_narrow1", "ppm", - "tall_narrow.ppm", "", 3, mse, pae); + "tall_narrow.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1336,14 +1336,14 @@ TEST(TestExecutables, SimpleEncRev53TallNarrow1) { // The compressed file is obtained using these command-line options: // -o dpx_enc_1280x720_10bit_le_nuke11.j2c -reversible true TEST(TestExecutables, DpxEnc1280x72010bitLeNuke11) { - double mse[3] = { 0, 0, 0}; - int pae[3] = { 0, 0, 0}; + double mse[3] = { 0, 0, 0 }; + int pae[3] = { 0, 0, 0 }; run_ojph_compress("dpx_1280x720_10bit.ppm", - "dpx_enc_1280x720_10bit_le_nuke11", "", "j2c", - "-reversible true"); + "dpx_enc_1280x720_10bit_le_nuke11", "", "j2c", + "-reversible true"); run_ojph_compress_expand("dpx_enc_1280x720_10bit_le_nuke11", "j2c", "ppm"); run_mse_pae("dpx_enc_1280x720_10bit_le_nuke11", "ppm", - "dpx_1280x720_10bit.ppm", "", 3, mse, pae); + "dpx_1280x720_10bit.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1352,14 +1352,14 @@ TEST(TestExecutables, DpxEnc1280x72010bitLeNuke11) { // The compressed file is obtained using these command-line options: // -o dpx_enc_1280x720_10bit_be_nuke11.j2c -reversible true TEST(TestExecutables, DpxEnc1280x72010bitBeNuke11) { - double mse[3] = { 0, 0, 0}; - int pae[3] = { 0, 0, 0}; + double mse[3] = { 0, 0, 0 }; + int pae[3] = { 0, 0, 0 }; run_ojph_compress("dpx_1280x720_10bit.ppm", - "dpx_enc_1280x720_10bit_be_nuke11", "", "j2c", - "-reversible true"); + "dpx_enc_1280x720_10bit_be_nuke11", "", "j2c", + "-reversible true"); run_ojph_compress_expand("dpx_enc_1280x720_10bit_be_nuke11", "j2c", "ppm"); run_mse_pae("dpx_enc_1280x720_10bit_be_nuke11", "ppm", - "dpx_1280x720_10bit.ppm", "", 3, mse, pae); + "dpx_1280x720_10bit.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1368,14 +1368,14 @@ TEST(TestExecutables, DpxEnc1280x72010bitBeNuke11) { // The compressed file is obtained using these command-line options: // -o dpx_enc_1280x720_16bit_le_nuke11.j2c -reversible true TEST(TestExecutables, DpxEnc1280x72016bitLeNuke11) { - double mse[3] = { 0, 0, 0}; - int pae[3] = { 0, 0, 0}; + double mse[3] = { 0, 0, 0 }; + int pae[3] = { 0, 0, 0 }; run_ojph_compress("dpx_1280x720_16bit.ppm", - "dpx_enc_1280x720_16bit_le_nuke11", "", "j2c", - "-reversible true"); + "dpx_enc_1280x720_16bit_le_nuke11", "", "j2c", + "-reversible true"); run_ojph_compress_expand("dpx_enc_1280x720_16bit_le_nuke11", "j2c", "ppm"); run_mse_pae("dpx_enc_1280x720_16bit_le_nuke11", "ppm", - "dpx_1280x720_16bit.ppm", "", 3, mse, pae); + "dpx_1280x720_16bit.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1384,14 +1384,14 @@ TEST(TestExecutables, DpxEnc1280x72016bitLeNuke11) { // The compressed file is obtained using these command-line options: // -o dpx_enc_1280x720_16bit_be_nuke11.j2c -reversible true TEST(TestExecutables, DpxEnc1280x72016bitBeNuke11) { - double mse[3] = { 0, 0, 0}; - int pae[3] = { 0, 0, 0}; + double mse[3] = { 0, 0, 0 }; + int pae[3] = { 0, 0, 0 }; run_ojph_compress("dpx_1280x720_16bit.ppm", - "dpx_enc_1280x720_16bit_be_nuke11", "", "j2c", - "-reversible true"); + "dpx_enc_1280x720_16bit_be_nuke11", "", "j2c", + "-reversible true"); run_ojph_compress_expand("dpx_enc_1280x720_16bit_be_nuke11", "j2c", "ppm"); run_mse_pae("dpx_enc_1280x720_16bit_be_nuke11", "ppm", - "dpx_1280x720_16bit.ppm", "", 3, mse, pae); + "dpx_1280x720_16bit.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1400,14 +1400,14 @@ TEST(TestExecutables, DpxEnc1280x72016bitBeNuke11) { // The compressed file is obtained using these command-line options: // -o dpx_enc_1280x720_10bit_resolve18.j2c -reversible true TEST(TestExecutables, DpxEnc1280x72010bitResolve18) { - double mse[3] = { 0, 0, 0}; - int pae[3] = { 0, 0, 0}; + double mse[3] = { 0, 0, 0 }; + int pae[3] = { 0, 0, 0 }; run_ojph_compress("dpx_1280x720_10bit.ppm", - "dpx_enc_1280x720_10bit_resolve18", "", "j2c", - "-reversible true"); + "dpx_enc_1280x720_10bit_resolve18", "", "j2c", + "-reversible true"); run_ojph_compress_expand("dpx_enc_1280x720_10bit_resolve18", "j2c", "ppm"); run_mse_pae("dpx_enc_1280x720_10bit_resolve18", "ppm", - "dpx_1280x720_10bit.ppm", "", 3, mse, pae); + "dpx_1280x720_10bit.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1416,20 +1416,20 @@ TEST(TestExecutables, DpxEnc1280x72010bitResolve18) { // The compressed file is obtained using these command-line options: // -o dpx_enc_1280x720_16bit_resolve18.j2c -reversible true TEST(TestExecutables, DpxEnc1280x72016bitResolve18) { - double mse[3] = { 0, 0, 0}; - int pae[3] = { 0, 0, 0}; + double mse[3] = { 0, 0, 0 }; + int pae[3] = { 0, 0, 0 }; run_ojph_compress("dpx_1280x720_16bit.ppm", - "dpx_enc_1280x720_16bit_resolve18", "", "j2c", - "-reversible true"); + "dpx_enc_1280x720_16bit_resolve18", "", "j2c", + "-reversible true"); run_ojph_compress_expand("dpx_enc_1280x720_16bit_resolve18", "j2c", "ppm"); run_mse_pae("dpx_enc_1280x720_16bit_resolve18", "ppm", - "dpx_1280x720_16bit.ppm", "", 3, mse, pae); + "dpx_1280x720_16bit.ppm", "", 3, mse, pae); } //////////////////////////////////////////////////////////////////////////////// // main //////////////////////////////////////////////////////////////////////////////// -int main(int argc, char **argv) { +int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } diff --git a/tests/test_helpers/ht_cmdlines.txt b/tests/test_helpers/ht_cmdlines.txt index c8590611..a8c0987d 100644 --- a/tests/test_helpers/ht_cmdlines.txt +++ b/tests/test_helpers/ht_cmdlines.txt @@ -81,8 +81,8 @@ add_test(NAME simple_enc_rev53_32x32 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_de add_test(NAME simple_enc_rev53_4x4 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -renc "-i ${images_folder}/mm.ppm -o simple_enc_rev53_4x4.j2c -reversible true -block_size \{4,4\}" "-i simple_enc_rev53_4x4.j2c -o test1.ppm -precise -quiet" "-i simple_enc_rev53_4x4.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") add_test(NAME simple_enc_rev53_1024x4 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -renc "-i ${images_folder}/mm.ppm -o simple_enc_rev53_1024x4.j2c -reversible true -block_size \{4,1024\}" "-i simple_enc_rev53_1024x4.j2c -o test1.ppm -precise -quiet" "-i simple_enc_rev53_1024x4.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") add_test(NAME simple_enc_rev53_4x1024 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -renc "-i ${images_folder}/mm.ppm -o simple_enc_rev53_4x1024.j2c -reversible true -block_size \{1024,4\}" "-i simple_enc_rev53_4x1024.j2c -o test1.ppm -precise -quiet" "-i simple_enc_rev53_4x1024.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") -add_test(NAME simple_enc_rev53_64x64_tiles_33x33_d5 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -renc "-i ${images_folder}/mm.ppm -o simple_enc_rev53_64x64_tiles_33x33.j2c -reversible true -tile_size \{32,32\} -num_decomps 5" "-i simple_enc_rev53_64x64_tiles_33x33.j2c -o test1.ppm -precise -quiet" "-i simple_enc_rev53_64x64.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") -add_test(NAME simple_enc_rev53_64x64_tiles_33x33_d6 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -renc "-i ${images_folder}/mm.ppm -o simple_enc_rev53_64x64_tiles_33x33.j2c -reversible true -tile_size \{32,32\} -num_decomps 6" "-i simple_enc_rev53_64x64_tiles_33x33.j2c -o test1.ppm -precise -quiet" "-i simple_enc_rev53_64x64.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") +add_test(NAME simple_enc_rev53_64x64_tiles_33x33_d5 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -renc "-i ${images_folder}/mm.ppm -o simple_enc_rev53_64x64_tiles_33x33_d5.j2c -reversible true -tile_size \{32,32\} -num_decomps 5" "-i simple_enc_rev53_64x64_tiles_33x33_d5.j2c -o test1.ppm -precise -quiet" "-i simple_enc_rev53_64x64_tiles_33x33_d5.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") +add_test(NAME simple_enc_rev53_64x64_tiles_33x33_d6 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -renc "-i ${images_folder}/mm.ppm -o simple_enc_rev53_64x64_tiles_33x33_d6.j2c -reversible true -tile_size \{32,32\} -num_decomps 6" "-i simple_enc_rev53_64x64_tiles_33x33_d6.j2c -o test1.ppm -precise -quiet" "-i simple_enc_rev53_64x64_tiles_33x33_d6.j2c -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") add_test(NAME simple_enc_irv97_64x64_yuv COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom_yuv.sh -enc "-i ${images_folder}/foreman_420.yuv -o simple_enc_irv97_64x64_yuv.j2c -qstep 0.1 -dims \{352,288\} -num_comps 3 -downsamp \{1,1\},\{2,2\},\{2,2\} -bit_depth 8,8,8 -signed false,false,false" "-i simple_enc_irv97_64x64_yuv.j2c -o test1y.rawl,test1u.rawl,test1v.rawl -precise -quiet" "-i simple_enc_irv97_64x64_yuv.j2c -o test2.yuv" "${images_folder}/foreman_420.yuv:352x288x8x420" "test1.yuv:352x288x8x420" "test2.yuv:352x288x8x420") add_test(NAME simple_enc_rev53_64x64_yuv COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom_yuv.sh -renc "-i ${images_folder}/foreman_420.yuv -o simple_enc_rev53_64x64_yuv.j2c -reversible true -qstep 0.1 -dims \{352,288\} -num_comps 3 -downsamp \{1,1\},\{2,2\},\{2,2\} -bit_depth 8,8,8 -signed false,false,false" "-i simple_enc_rev53_64x64_yuv.j2c -o test1y.rawl,test1u.rawl,test1v.rawl -precise -quiet" "-i simple_enc_rev53_64x64_yuv.j2c -o test2.yuv" "${images_folder}/foreman_420.yuv:352x288x8x420" "test1.yuv:352x288x8x420" "test2.yuv:352x288x8x420") From 6e9cfdc60d7d8da0ae1ef2e7cdf707623f1ef136 Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Mon, 8 Apr 2024 23:50:35 +1000 Subject: [PATCH 054/348] All changes needed for DFS and ATK are done. Still some bugs. --- src/core/codestream/ojph_codestream_local.cpp | 31 +- src/core/codestream/ojph_codestream_local.h | 15 +- src/core/codestream/ojph_params.cpp | 75 ++- src/core/codestream/ojph_params_local.h | 53 +- src/core/codestream/ojph_precinct.cpp | 14 +- src/core/codestream/ojph_precinct.h | 3 +- src/core/codestream/ojph_resolution.cpp | 569 +++++++++++------- src/core/codestream/ojph_resolution.h | 14 +- src/core/codestream/ojph_subband.cpp | 38 +- src/core/codestream/ojph_subband.h | 3 +- src/core/codestream/ojph_tile.cpp | 22 +- src/core/codestream/ojph_tile.h | 5 +- src/core/codestream/ojph_tile_comp.cpp | 3 +- src/core/transform/ojph_transform.cpp | 53 +- 14 files changed, 566 insertions(+), 332 deletions(-) diff --git a/src/core/codestream/ojph_codestream_local.cpp b/src/core/codestream/ojph_codestream_local.cpp index 737daffb..5f72d3e8 100644 --- a/src/core/codestream/ojph_codestream_local.cpp +++ b/src/core/codestream/ojph_codestream_local.cpp @@ -186,8 +186,6 @@ namespace ojph { for (ui32 r = 0; r <= num_decomps; ++r) { size log_PP = cod.get_log_precinct_size(r); - log_PP.w -= (r ? 1 : 0); - log_PP.h -= (r ? 1 : 0); ratio.w = ojph_max(ratio.w, log_PP.w - ojph_min(log_cb.w, log_PP.w)); ratio.h = ojph_max(ratio.h, log_PP.h - ojph_min(log_cb.h, log_PP.h)); } @@ -200,7 +198,7 @@ namespace ojph { // We need 4 such tables. These tables store // 1. missing msbs and 2. their flags, // 3. number of layers and 4. their flags - precinct_scratch_needed_bytes = + precinct_scratch_needed_bytes = 4 * ((max_ratio * max_ratio * 4 + 2) / 3); allocator->pre_alloc_obj(precinct_scratch_needed_bytes); @@ -220,7 +218,7 @@ namespace ojph { ui32 num_tileparts = 0; point index; - rect tile_rect, recon_tile_rect; + rect tile_rect; ojph::param_siz sz = access_siz(); ui32 ds = 1 << skipped_res_for_recon; for (index.y = 0; index.y < num_tiles.h; ++index.y) @@ -233,12 +231,6 @@ namespace ojph { tile_rect.siz.h = ojph_min(y1, sz.get_image_extent().y) - tile_rect.org.y; - recon_tile_rect.org.y = ojph_max(ojph_div_ceil(y0, ds), - ojph_div_ceil(sz.get_image_offset().y, ds)); - recon_tile_rect.siz.h = ojph_min(ojph_div_ceil(y1, ds), - ojph_div_ceil(sz.get_image_extent().y, ds)) - - recon_tile_rect.org.y; - ui32 offset = 0; for (index.x = 0; index.x < num_tiles.w; ++index.x) { @@ -250,17 +242,9 @@ namespace ojph { tile_rect.siz.w = ojph_min(x1, sz.get_image_extent().x) - tile_rect.org.x; - recon_tile_rect.org.x = ojph_max(ojph_div_ceil(x0, ds), - ojph_div_ceil(sz.get_image_offset().x, ds)); - recon_tile_rect.siz.w = ojph_min(ojph_div_ceil(x1, ds), - ojph_div_ceil(sz.get_image_extent().x, ds)) - - recon_tile_rect.org.x; - ui32 tps = 0; // number of tileparts for this tile ui32 idx = index.y * num_tiles.w + index.x; - tiles[idx].finalize_alloc(this, tile_rect, recon_tile_rect, - idx, offset, tps); - offset += recon_tile_rect.siz.w; + tiles[idx].finalize_alloc(this, tile_rect, idx, offset, tps); num_tileparts += tps; } } @@ -836,8 +820,15 @@ namespace ojph { } cod.update_atk(atk); - for (int i = 0; i < used_coc_fields; ++i) + for (int i = 0; i < used_coc_fields; ++i) + { + if (i == 0) cod.link_cod(coc); + else coc[i - 1].link_cod(coc + i); coc[i].update_atk(atk); + } + siz.link(&cod); + if (dfs.exists()) + siz.link(&dfs); if (received_markers != 3) OJPH_ERROR(0x00030052, "markers error, COD and QCD are required"); diff --git a/src/core/codestream/ojph_codestream_local.h b/src/core/codestream/ojph_codestream_local.h index 8e77eb17..8ca8c717 100644 --- a/src/core/codestream/ojph_codestream_local.h +++ b/src/core/codestream/ojph_codestream_local.h @@ -82,19 +82,10 @@ namespace ojph { { return &siz; } ojph::param_cod access_cod() //return externally wrapped cod { return ojph::param_cod(&cod); } - const param_cod* get_cod() //return internal code + const param_cod* get_cod() //return internal cod { return &cod; } - const param_cod* get_cod(ui32 comp_num) //return internal code - { - if (used_coc_fields == 0) - return &cod; - else { - for (int i = 0; i < used_coc_fields; ++i) - if (coc[i].get_comp_num() == comp_num) - return coc + i; - return &cod; - } - } + const param_cod* get_cod(ui32 comp_num) //return internal cod + { return cod.get_cod(comp_num); } param_qcd* access_qcd(ui32 comp_num) { if (used_qcc_fields > 0) diff --git a/src/core/codestream/ojph_params.cpp b/src/core/codestream/ojph_params.cpp index ef652651..268135c4 100644 --- a/src/core/codestream/ojph_params.cpp +++ b/src/core/codestream/ojph_params.cpp @@ -663,6 +663,35 @@ namespace ojph { dfs_support_needed = (Rsiz & 0x80) != 0; } + ////////////////////////////////////////////////////////////////////////// + point param_siz::get_recon_downsampling(ui32 comp_num) const + { + assert(comp_num < get_num_components()); + + point factor(1u << skipped_resolutions, 1u << skipped_resolutions); + const param_cod* cdp = cod->get_cod(comp_num); + if (dfs && cdp && cdp->is_dfs_defined()) { + const param_dfs* d = dfs->get_dfs(cdp->get_dfs_index()); + factor = d->get_res_downsamp(skipped_resolutions); + } + factor.x *= (ui32)cptr[comp_num].XRsiz; + factor.y *= (ui32)cptr[comp_num].YRsiz; + return factor; + } + + ////////////////////////////////////////////////////////////////////////// + point param_siz::get_recon_size(ui32 comp_num) const + { + assert(comp_num < get_num_components()); + + point factor = get_recon_downsampling(comp_num); + point r; + r.x = ojph_div_ceil(Xsiz, factor.x) - ojph_div_ceil(XOsiz, factor.x); + r.y = ojph_div_ceil(Ysiz, factor.y) - ojph_div_ceil(YOsiz, factor.y); + return r; + } + + ////////////////////////////////////////////////////////////////////////// // // @@ -1406,10 +1435,9 @@ namespace ojph { ui32 subband) const { assert((resolution == 0 && subband == 0) || - (resolution > 0 && resolution <= Ids && - subband > 0 && subband < 4)); + (resolution > 0 && subband > 0 && subband < 4)); - ui32 ns[4] = { 0, 3, 2, 2 }; + ui32 ns[4] = { 0, 3, 1, 1 }; ui32 idx = 0; if (resolution > 0) @@ -1427,6 +1455,27 @@ namespace ojph { return idx; } + ////////////////////////////////////////////////////////////////////////// + point param_dfs::get_res_downsamp(ui32 skipped_resolutions) const + { + point factor(1, 1); + ui32 decomp_level = 1; + while (skipped_resolutions > 0) + { + param_dfs::dfs_dwt_type type = get_dwt_type(decomp_level); + if (type == BIDIR_DWT) + { factor.x *= 2; factor.y *= 2; } + else if (type == HORZ_DWT) + factor.x *= 2; + else if (type == VERT_DWT) + factor.y *= 2; + + ++decomp_level; + --skipped_resolutions; + } + return factor; + } + ////////////////////////////////////////////////////////////////////////// bool param_dfs::read(infile_base *file) { @@ -1658,10 +1707,10 @@ namespace ojph { Natk = 4; // next is (A-4) in T.801 second line Latk = (ui16)(5 + Natk + sizeof(float) * (1 + Natk)); - d[0].irv.Aatk = (float)-1.586134342059924; - d[1].irv.Aatk = (float)-0.052980118572961; - d[2].irv.Aatk = (float)0.882911075530934; - d[3].irv.Aatk = (float)0.443506852043971; + d[0].irv.Aatk = (float)0.443506852043971; + d[1].irv.Aatk = (float)0.882911075530934; + d[2].irv.Aatk = (float)-0.052980118572961; + d[3].irv.Aatk = (float)-1.586134342059924; } ////////////////////////////////////////////////////////////////////////// @@ -1671,12 +1720,12 @@ namespace ojph { Natk = 2; // next is (A-4) in T.801 fourth line Latk = (ui16)(5 + 2 * Natk + sizeof(ui8) * (Natk + Natk)); - d[0].rev.Aatk = -1; - d[0].rev.Batk = 0; - d[0].rev.Eatk = 1; - d[1].rev.Aatk = 1; - d[1].rev.Batk = 2; - d[1].rev.Eatk = 2; + d[0].rev.Aatk = 1; + d[0].rev.Batk = 2; + d[0].rev.Eatk = 2; + d[1].rev.Aatk = -1; + d[1].rev.Batk = 0; + d[1].rev.Eatk = 1; } } // !local namespace diff --git a/src/core/codestream/ojph_params_local.h b/src/core/codestream/ojph_params_local.h index 43c1181d..1ee508dc 100644 --- a/src/core/codestream/ojph_params_local.h +++ b/src/core/codestream/ojph_params_local.h @@ -172,7 +172,6 @@ namespace ojph { cptr = store; old_Csiz = 4; Rsiz = 0x4000; //for jph, bit 14 of Rsiz is 1 - ws_kern_support_needed = dfs_support_needed = false; } ~param_siz() @@ -238,10 +237,15 @@ namespace ojph { bool write(outfile_base *file); void read(infile_base *file); + void link(const param_cod* cod) + { this->cod = cod; } + + void link(const param_dfs* dfs) + { this->dfs = dfs; } + void set_skipped_resolutions(ui32 skipped_resolutions) - { - this->skipped_resolutions = skipped_resolutions; - } + { this->skipped_resolutions = skipped_resolutions; } + ui32 get_width(ui32 comp_num) const { assert(comp_num < get_num_components()); @@ -256,20 +260,14 @@ namespace ojph { ui32 t = ojph_div_ceil(Ysiz, ds) - ojph_div_ceil(YOsiz, ds); return t; } + + point get_recon_downsampling(ui32 comp_num) const; + point get_recon_size(ui32 comp_num) const; ui32 get_recon_width(ui32 comp_num) const - { - assert(comp_num < get_num_components()); - ui32 ds = (ui32)cptr[comp_num].XRsiz * (1u << skipped_resolutions); - ui32 t = ojph_div_ceil(Xsiz, ds) - ojph_div_ceil(XOsiz, ds); - return t; - } + { return get_recon_size(comp_num).x; } ui32 get_recon_height(ui32 comp_num) const - { - assert(comp_num < get_num_components()); - ui32 ds = (ui32)cptr[comp_num].YRsiz * (1u << skipped_resolutions); - ui32 t = ojph_div_ceil(Ysiz, ds) - ojph_div_ceil(YOsiz, ds); - return t; - } + { return get_recon_size(comp_num).y; } + bool is_ws_kern_support_needed() { return ws_kern_support_needed; } bool is_dfs_support_needed() { return dfs_support_needed; } @@ -293,6 +291,8 @@ namespace ojph { siz_comp_info store[4]; bool ws_kern_support_needed; bool dfs_support_needed; + const param_cod* cod; + const param_dfs* dfs; param_siz(const param_siz&) = delete; //prevent copy constructor param_siz& operator=(const param_siz&) = delete; //prevent copy }; @@ -370,6 +370,7 @@ namespace ojph { SPcod.num_decomp = 5; SPcod.block_width = 4; //64 SPcod.block_height = 4; //64 + next = NULL; } //////////////////////////////////////// @@ -503,6 +504,22 @@ namespace ojph { //////////////////////////////////////// void update_atk(const param_atk* atk); + //////////////////////////////////////// + void link_cod(const param_cod* cod) + { this->next = cod; } + + //////////////////////////////////////// + const param_cod* get_cod(ui32 comp_num) const + { + const param_cod* result = this->next; + while (result != NULL && result->get_comp_num() != comp_num) + result = result->next; + if (result) + return result; + else + return this; + } + //////////////////////////////////////// const param_atk* access_atk() const { return atk; } @@ -516,7 +533,7 @@ namespace ojph { { return SPcod.num_decomp & 0xF; } //////////////////////////////////////// - ui32 get_comp_num() + ui32 get_comp_num() const { assert(type == COC_MAIN); return comp_num; } private: // Common variables @@ -525,6 +542,7 @@ namespace ojph { ui8 Scod; // serves as Scod and Scoc cod_SGcod SGCod; // Used in COD and copied to COC cod_SPcod SPcod; // serves as SPcod and SPcoc + const param_cod* next;// to link cod parameters private: // COC only variables param_cod* parent; // parent COD structure @@ -775,6 +793,7 @@ namespace ojph { dfs_dwt_type get_dwt_type(ui32 decomp_level) const; ui32 get_subband_idx(ui32 num_decompositions, ui32 resolution, ui32 subband) const; + point get_res_downsamp(ui32 skipped_resolutions) const; private: // member variables ui16 Ldfs; // length of the segment marker diff --git a/src/core/codestream/ojph_precinct.cpp b/src/core/codestream/ojph_precinct.cpp index c20c8589..813e33b8 100644 --- a/src/core/codestream/ojph_precinct.cpp +++ b/src/core/codestream/ojph_precinct.cpp @@ -98,11 +98,12 @@ namespace ojph { coded_lists *cur_coded_list = NULL; ui32 cb_bytes = 0; //cb_bytes; ui32 ph_bytes = 0; //precinct header size - int sst = num_bands == 3 ? 1 : 0; - int send = num_bands == 3 ? 4 : 1; int num_skipped_subbands = 0; - for (int s = sst; s < send; ++s) + for (int s = 0; s < 4; ++s) { + if (bands[s].empty) + continue; + if (cb_idxs[s].siz.w == 0 || cb_idxs[s].siz.h == 0) continue; @@ -288,10 +289,11 @@ namespace ojph { } //write codeblocks - int sst = num_bands == 3 ? 1 : 0; - int send = num_bands == 3 ? 4 : 1; - for (int s = sst; s < send; ++s) + for (int s = 0; s < 4; ++s) { + if (bands[s].empty) + continue; + ui32 band_width = bands[s].num_blocks.w; ui32 width = cb_idxs[s].siz.w; ui32 height = cb_idxs[s].siz.h; diff --git a/src/core/codestream/ojph_precinct.h b/src/core/codestream/ojph_precinct.h index d8e880a9..47ec4736 100644 --- a/src/core/codestream/ojph_precinct.h +++ b/src/core/codestream/ojph_precinct.h @@ -59,7 +59,7 @@ namespace ojph { { precinct() { scratch = NULL; bands = NULL; coded = NULL; - num_bands = 0; may_use_sop = uses_eph = false; + may_use_sop = uses_eph = false; } ui32 prepare_precinct(int tag_tree_size, ui32* lev_idx, mem_elastic_allocator *elastic); @@ -73,7 +73,6 @@ namespace ojph { rect cb_idxs[4]; //indices of codeblocks subband *bands; //the subbands coded_lists* coded; - ui32 num_bands; bool may_use_sop, uses_eph; }; diff --git a/src/core/codestream/ojph_resolution.cpp b/src/core/codestream/ojph_resolution.cpp index a0413b76..14743249 100644 --- a/src/core/codestream/ojph_resolution.cpp +++ b/src/core/codestream/ojph_resolution.cpp @@ -67,7 +67,7 @@ namespace ojph { bool skipped_res_for_recon = res_num > t; const param_atk* atk = cdp->access_atk(); - param_dfs::dfs_dwt_type downsampling_style = param_dfs::BIDIR_DWT; + param_dfs::dfs_dwt_type ds = param_dfs::BIDIR_DWT; if (cdp->is_dfs_defined()) { const param_dfs* dfs = codestream->access_dfs(); if (dfs == NULL) { @@ -86,31 +86,22 @@ namespace ojph { "main codestream headers", dfs_idx); } ui32 num_decomps = cdp->get_num_decompositions(); - downsampling_style = dfs->get_dwt_type(num_decomps - res_num + 1); + ds = dfs->get_dwt_type(num_decomps - res_num + 1); } } - //create next resolution + ui32 transform_flags = 0; if (res_num > 0) { - //allocate a resolution - allocator->pre_alloc_obj(1); - ui32 trx0 = ojph_div_ceil(res_rect.org.x, 2); - ui32 try0 = ojph_div_ceil(res_rect.org.y, 2); - ui32 trx1 = ojph_div_ceil(res_rect.org.x + res_rect.siz.w, 2); - ui32 try1 = ojph_div_ceil(res_rect.org.y + res_rect.siz.h, 2); - rect next_res_rect; - next_res_rect.org.x = trx0; - next_res_rect.org.y = try0; - next_res_rect.siz.w = trx1 - trx0; - next_res_rect.siz.h = try1 - try0; - - resolution::pre_alloc(codestream, next_res_rect, - skipped_res_for_recon ? recon_res_rect : next_res_rect, - comp_num, res_num - 1); + if (ds == param_dfs::BIDIR_DWT) + transform_flags = HORZ_TRX | VERT_TRX; + else if (ds == param_dfs::HORZ_DWT) + transform_flags = HORZ_TRX; + else if (ds == param_dfs::VERT_DWT) + transform_flags = VERT_TRX; } - //allocate subbands + //allocate resolution/subbands ui32 trx0 = res_rect.org.x; ui32 try0 = res_rect.org.y; ui32 trx1 = res_rect.org.x + res_rect.siz.w; @@ -118,23 +109,83 @@ namespace ojph { allocator->pre_alloc_obj(4); if (res_num > 0) { - for (ui32 i = 1; i < 4; ++i) + if (ds == param_dfs::BIDIR_DWT) { - ui32 tbx0 = (trx0 - (i & 1) + 1) >> 1; - ui32 tbx1 = (trx1 - (i & 1) + 1) >> 1; - ui32 tby0 = (try0 - (i >> 1) + 1) >> 1; - ui32 tby1 = (try1 - (i >> 1) + 1) >> 1; - - rect band_rect; - band_rect.org.x = tbx0; - band_rect.org.y = tby0; - band_rect.siz.w = tbx1 - tbx0; - band_rect.siz.h = tby1 - tby0; - subband::pre_alloc(codestream, band_rect, comp_num, res_num); + for (ui32 i = 0; i < 4; ++i) + { + ui32 tbx0 = (trx0 - (i & 1) + 1) >> 1; + ui32 tbx1 = (trx1 - (i & 1) + 1) >> 1; + ui32 tby0 = (try0 - (i >> 1) + 1) >> 1; + ui32 tby1 = (try1 - (i >> 1) + 1) >> 1; + + rect re; + re.org.x = tbx0; + re.org.y = tby0; + re.siz.w = tbx1 - tbx0; + re.siz.h = tby1 - tby0; + if (i == 0) { + allocator->pre_alloc_obj(1); + resolution::pre_alloc(codestream, re, + skipped_res_for_recon ? recon_res_rect : re, + comp_num, res_num - 1); + } + else + subband::pre_alloc(codestream, re, comp_num, res_num, + transform_flags); + } + } + else if (ds == param_dfs::VERT_DWT) + { + ui32 tby0, tby1; + rect re = res_rect; + tby0 = (try0 + 1) >> 1; + tby1 = (try1 + 1) >> 1; + re.org.y = tby0; + re.siz.h = tby1 - tby0; + allocator->pre_alloc_obj(1); + resolution::pre_alloc(codestream, re, + skipped_res_for_recon ? recon_res_rect : re, + comp_num, res_num - 1); + + tby0 = try0 >> 1; + tby1 = try1 >> 1; + re.org.y = tby0; + re.siz.h = tby1 - tby0; + subband::pre_alloc(codestream, re, comp_num, res_num, + transform_flags); + } + else if (ds == param_dfs::HORZ_DWT) + { + ui32 tbx0, tbx1; + rect re = res_rect; + tbx0 = (trx0 + 1) >> 1; + tbx1 = (trx1 + 1) >> 1; + re.org.x = tbx0; + re.siz.w = tbx1 - tbx0; + allocator->pre_alloc_obj(1); + resolution::pre_alloc(codestream, re, + skipped_res_for_recon ? recon_res_rect : re, + comp_num, res_num - 1); + + tbx0 = trx0 >> 1; + tbx1 = trx1 >> 1; + re.org.x = tbx0; + re.siz.w = tbx1 - tbx0; + subband::pre_alloc(codestream, re, comp_num, res_num, + transform_flags); + } + else + { + assert(ds == param_dfs::NO_DWT); + allocator->pre_alloc_obj(1); + resolution::pre_alloc(codestream, res_rect, + skipped_res_for_recon ? recon_res_rect : res_rect, + comp_num, res_num - 1); } } else - subband::pre_alloc(codestream, res_rect, comp_num, res_num); + subband::pre_alloc(codestream, res_rect, comp_num, res_num, + transform_flags); //prealloc precincts size log_PP = cdp->get_log_precinct_size(res_num); @@ -168,7 +219,7 @@ namespace ojph { const rect& res_rect, const rect& recon_res_rect, ui32 comp_num, ui32 res_num, - point comp_downsamp, + point comp_downsamp, point res_downsamp, tile_comp* parent_tile_comp, resolution* parent_res) { @@ -189,7 +240,7 @@ namespace ojph { this->res_num = res_num; this->num_bytes = 0; this->atk = cdp->access_atk(); - this->downsampling_style = param_dfs::BIDIR_DWT; + param_dfs::dfs_dwt_type ds = param_dfs::BIDIR_DWT; if (cdp->is_dfs_defined()) { const param_dfs* dfs = codestream->access_dfs(); if (dfs == NULL) { @@ -208,34 +259,22 @@ namespace ojph { "main codestream headers", dfs_idx); } ui32 num_decomps = cdp->get_num_decompositions(); - this->downsampling_style = - dfs->get_dwt_type(num_decomps - res_num + 1); + ds = dfs->get_dwt_type(num_decomps - res_num + 1); } } - //finalize next resolution + transform_flags = 0; if (res_num > 0) { - //allocate a resolution - child_res = allocator->post_alloc_obj(1); - ui32 trx0 = ojph_div_ceil(res_rect.org.x, 2); - ui32 try0 = ojph_div_ceil(res_rect.org.y, 2); - ui32 trx1 = ojph_div_ceil(res_rect.org.x + res_rect.siz.w, 2); - ui32 try1 = ojph_div_ceil(res_rect.org.y + res_rect.siz.h, 2); - rect next_res_rect; - next_res_rect.org.x = trx0; - next_res_rect.org.y = try0; - next_res_rect.siz.w = trx1 - trx0; - next_res_rect.siz.h = try1 - try0; - - child_res->finalize_alloc(codestream, next_res_rect, - skipped_res_for_recon ? recon_res_rect : next_res_rect, comp_num, - res_num - 1, comp_downsamp, parent_tile_comp, this); + if (ds == param_dfs::BIDIR_DWT) + transform_flags = HORZ_TRX | VERT_TRX; + else if (ds == param_dfs::HORZ_DWT) + transform_flags = HORZ_TRX; + else if (ds == param_dfs::VERT_DWT) + transform_flags = VERT_TRX; } - else - child_res = NULL; - //allocate subbands + //allocate resolution/subbands ui32 trx0 = res_rect.org.x; ui32 try0 = res_rect.org.y; ui32 trx1 = res_rect.org.x + res_rect.siz.w; @@ -245,24 +284,94 @@ namespace ojph { new (bands + i) subband; if (res_num > 0) { - this->num_bands = 3; - for (ui32 i = 1; i < 4; ++i) + if (ds == param_dfs::BIDIR_DWT) + { + for (ui32 i = 0; i < 4; ++i) + { + ui32 tbx0 = (trx0 - (i & 1) + 1) >> 1; + ui32 tbx1 = (trx1 - (i & 1) + 1) >> 1; + ui32 tby0 = (try0 - (i >> 1) + 1) >> 1; + ui32 tby1 = (try1 - (i >> 1) + 1) >> 1; + + rect re; + re.org.x = tbx0; + re.org.y = tby0; + re.siz.w = tbx1 - tbx0; + re.siz.h = tby1 - tby0; + if (i == 0) { + point next_res_downsamp; + next_res_downsamp.x = res_downsamp.x * 2; + next_res_downsamp.y = res_downsamp.y * 2; + + child_res = allocator->post_alloc_obj(1); + child_res->finalize_alloc(codestream, re, + skipped_res_for_recon ? recon_res_rect : re, comp_num, + res_num - 1, comp_downsamp, next_res_downsamp, + parent_tile_comp, this); + } + else + bands[i].finalize_alloc(codestream, re, this, res_num, i); + } + } + else if (ds == param_dfs::VERT_DWT) + { + ui32 tby0, tby1; + rect re = res_rect; + tby0 = (try0 + 1) >> 1; + tby1 = (try1 + 1) >> 1; + re.org.y = tby0; + re.siz.h = tby1 - tby0; + + point next_res_downsamp; + next_res_downsamp.x = res_downsamp.x; + next_res_downsamp.y = res_downsamp.y * 2; + child_res = allocator->post_alloc_obj(1); + child_res->finalize_alloc(codestream, re, + skipped_res_for_recon ? recon_res_rect : re, comp_num, + res_num - 1, comp_downsamp, next_res_downsamp, + parent_tile_comp, this); + + tby0 = try0 >> 1; + tby1 = try1 >> 1; + re.org.y = tby0; + re.siz.h = tby1 - tby0; + bands[2].finalize_alloc(codestream, re, this, res_num, 2); + } + else if (ds == param_dfs::HORZ_DWT) + { + ui32 tbx0, tbx1; + rect re = res_rect; + tbx0 = (trx0 + 1) >> 1; + tbx1 = (trx1 + 1) >> 1; + re.org.x = tbx0; + re.siz.w = tbx1 - tbx0; + + point next_res_downsamp; + next_res_downsamp.x = res_downsamp.x * 2; + next_res_downsamp.y = res_downsamp.y; + child_res = allocator->post_alloc_obj(1); + child_res->finalize_alloc(codestream, re, + skipped_res_for_recon ? recon_res_rect : re, comp_num, + res_num - 1, comp_downsamp, next_res_downsamp, + parent_tile_comp, this); + + tbx0 = trx0 >> 1; + tbx1 = trx1 >> 1; + re.org.x = tbx0; + re.siz.w = tbx1 - tbx0; + bands[1].finalize_alloc(codestream, re, this, res_num, 1); + } + else { - ui32 tbx0 = (trx0 - (i & 1) + 1) >> 1; - ui32 tbx1 = (trx1 - (i & 1) + 1) >> 1; - ui32 tby0 = (try0 - (i >> 1) + 1) >> 1; - ui32 tby1 = (try1 - (i >> 1) + 1) >> 1; - - rect band_rect; - band_rect.org.x = tbx0; - band_rect.org.y = tby0; - band_rect.siz.w = tbx1 - tbx0; - band_rect.siz.h = tby1 - tby0; - bands[i].finalize_alloc(codestream, band_rect, this, res_num, i); + assert(ds == param_dfs::NO_DWT); + child_res = allocator->post_alloc_obj(1); + child_res->finalize_alloc(codestream, res_rect, + skipped_res_for_recon ? recon_res_rect : res_rect, comp_num, + res_num - 1, comp_downsamp, res_downsamp, parent_tile_comp, this); } } else { - this->num_bands = 1; + child_res = NULL; bands[0].finalize_alloc(codestream, res_rect, this, res_num, 0); } @@ -287,11 +396,7 @@ namespace ojph { ui32 x_lower_bound = (trx0 >> log_PP.w) << log_PP.w; ui32 y_lower_bound = (try0 >> log_PP.h) << log_PP.h; - point proj_factor; - proj_factor.x = comp_downsamp.x * (1 << (num_decomps - res_num)); - proj_factor.y = comp_downsamp.y * (1 << (num_decomps - res_num)); precinct* pp = precincts; - point tile_top_left = parent_tile_comp->get_tile()->get_tile_rect().org; for (ui32 y = 0; y < num_precincts.h; ++y) { @@ -299,11 +404,10 @@ namespace ojph { for (ui32 x = 0; x < num_precincts.w; ++x, ++pp) { ui32 ppx0 = x_lower_bound + (x << log_PP.w); - point t(proj_factor.x * ppx0, proj_factor.y * ppy0); + point t(res_downsamp.x * ppx0, res_downsamp.y * ppy0); t.x = t.x > tile_top_left.x ? t.x : tile_top_left.x; t.y = t.y > tile_top_left.y ? t.y : tile_top_left.y; pp->img_point = t; - pp->num_bands = num_bands; pp->bands = bands; pp->may_use_sop = cdp->packets_may_use_sop(); pp->uses_eph = cdp->packets_use_eph(); @@ -311,15 +415,15 @@ namespace ojph { pp->coded = NULL; } } - if (num_bands == 1) - bands[0].get_cb_indices(num_precincts, precincts); - else - for (int i = 1; i < 4; ++i) + for (int i = 0; i < 4; ++i) + if (bands[i].exists()) bands[i].get_cb_indices(num_precincts, precincts); + // determine how to divide scratch into multiple levels of + // tag trees size log_cb = cdp->get_log_block_dims(); - log_PP.w -= (res_num ? 1 : 0); - log_PP.h -= (res_num ? 1 : 0); + log_PP.w -= (transform_flags & HORZ_TRX) ? 1 : 0; + log_PP.h -= (transform_flags & VERT_TRX) ? 1 : 0; size ratio; ratio.w = log_PP.w - ojph_min(log_cb.w, log_PP.w); ratio.h = log_PP.h - ojph_min(log_cb.h, log_PP.h); @@ -391,7 +495,9 @@ namespace ojph { { if (res_num == 0) { - assert(num_bands == 1 && child_res == NULL); + assert(child_res == NULL); + assert(bands[0].exists() && !bands[1].exists() + && !bands[2].exists() && !bands[3].exists()); bands[0].exchange_buf(vert_even ? sig->line : aug->line); bands[0].push_line(); return; @@ -419,7 +525,7 @@ namespace ojph { line_buf* dp = aug->line; line_buf* sp1 = sig->active ? sig->line : ssp[i].line; line_buf* sp2 = ssp[i].active ? ssp[i].line : sig->line; - const lifting_step* s = atk->get_step(i); + const lifting_step* s = atk->get_step(num_steps - i - 1); rev_vert_ana_step(s, sp1, sp2, dp, width); } lifting_buf t = *aug; *aug = ssp[i]; ssp[i] = *sig; *sig = t; @@ -486,7 +592,7 @@ namespace ojph { line_buf* dp = aug->line; line_buf* sp1 = sig->active ? sig->line : ssp[i].line; line_buf* sp2 = ssp[i].active ? ssp[i].line : sig->line; - const lifting_step* s = atk->get_step(i); + const lifting_step* s = atk->get_step(num_steps - i - 1); irv_vert_ana_step(s, sp1, sp2, dp, width); } lifting_buf t = *aug; *aug = ssp[i]; ssp[i] = *sig; *sig = t; @@ -547,7 +653,9 @@ namespace ojph { { if (res_num == 0) { - assert(num_bands == 1 && child_res == NULL); + assert(child_res == NULL); + assert(bands[0].exists() && !bands[1].exists() + && !bands[2].exists() && !bands[3].exists()); return bands[0].pull_line(); } @@ -557,154 +665,211 @@ namespace ojph { ui32 width = res_rect.siz.w; if (width == 0) return NULL; - if (reversible) + + if (transform_flags & VERT_TRX) { - if (res_rect.siz.h > 1) + if (reversible) { - if (sig->active) { - sig->active = false; - return sig->line; - }; - for (;;) + if (res_rect.siz.h > 1) { - //horizontal transform - if (cur_line < res_rect.siz.h) + if (sig->active) { + sig->active = false; + return sig->line; + }; + for (;;) { - if (vert_even) { // even - rev_horz_syn(atk, aug->line, - child_res->pull_line(), bands[1].pull_line(), - width, horz_even); - aug->active = true; - vert_even = !vert_even; - ++cur_line; - continue; - } - else { - rev_horz_syn(atk, sig->line, - bands[2].pull_line(), bands[3].pull_line(), - width, horz_even); - sig->active = true; - vert_even = !vert_even; - ++cur_line; + //horizontal transform + if (cur_line < res_rect.siz.h) + { + if (vert_even) { // even + if (transform_flags & HORZ_TRX) + rev_horz_syn(atk, aug->line, child_res->pull_line(), + bands[1].pull_line(), width, horz_even); + else + memcpy(aug->line->i32, child_res->pull_line()->i32, + width * sizeof(si32)); + aug->active = true; + vert_even = !vert_even; + ++cur_line; + continue; + } + else { + if (transform_flags & HORZ_TRX) + rev_horz_syn(atk, sig->line, bands[2].pull_line(), + bands[3].pull_line(), width, horz_even); + else + memcpy(sig->line->i32, bands[2].pull_line()->i32, + width * sizeof(si32)); + sig->active = true; + vert_even = !vert_even; + ++cur_line; + } } - } - //vertical transform - for (ui32 i = 0; i < num_steps; ++i) - { - if (aug->active && (sig->active || ssp[i].active)) + //vertical transform + for (ui32 i = 0; i < num_steps; ++i) { - line_buf* dp = aug->line; - line_buf* sp1 = sig->active ? sig->line : ssp[i].line; - line_buf* sp2 = ssp[i].active ? ssp[i].line : sig->line; - const lifting_step* s = atk->get_step(num_steps - i - 1); - rev_vert_syn_step(s, dp, sp1, sp2, width); + if (aug->active && (sig->active || ssp[i].active)) + { + line_buf* dp = aug->line; + line_buf* sp1 = sig->active ? sig->line : ssp[i].line; + line_buf* sp2 = ssp[i].active ? ssp[i].line : sig->line; + const lifting_step* s = atk->get_step(i); + rev_vert_syn_step(s, dp, sp1, sp2, width); + } + lifting_buf t = *aug; *aug = ssp[i]; ssp[i] = *sig; *sig = t; } - lifting_buf t = *aug; *aug = ssp[i]; ssp[i] = *sig; *sig = t; - } - if (aug->active) { - aug->active = false; - return aug->line; + if (aug->active) { + aug->active = false; + return aug->line; + } + if (sig->active) { + sig->active = false; + return sig->line; + }; } - if (sig->active) { - sig->active = false; - return sig->line; - }; } - } - else - { - if (vert_even) - rev_horz_syn(atk, aug->line, child_res->pull_line(), - bands[1].pull_line(), width, horz_even); else { - rev_horz_syn(atk, aug->line, bands[2].pull_line(), - bands[3].pull_line(), width, horz_even); - si32* sp = aug->line->i32; - for (ui32 i = width; i > 0; --i) - *sp++ >>= 1; + if (vert_even) { + if (transform_flags & HORZ_TRX) + rev_horz_syn(atk, aug->line, child_res->pull_line(), + bands[1].pull_line(), width, horz_even); + else + memcpy(aug->line->i32, child_res->pull_line()->i32, + width * sizeof(si32)); + } + else + { + if (transform_flags & HORZ_TRX) + rev_horz_syn(atk, aug->line, bands[2].pull_line(), + bands[3].pull_line(), width, horz_even); + else + memcpy(aug->line->i32, bands[2].pull_line()->i32, + width * sizeof(si32)); + si32* sp = aug->line->i32; + for (ui32 i = width; i > 0; --i) + *sp++ >>= 1; + } + return aug->line; } - return aug->line; } - } - else - { - if (res_rect.siz.h > 1) + else { - if (sig->active) { - sig->active = false; - return sig->line; - }; - for (;;) + if (res_rect.siz.h > 1) { - //horizontal transform - if (cur_line < res_rect.siz.h) + if (sig->active) { + sig->active = false; + return sig->line; + }; + for (;;) { - if (vert_even) { // even - irv_horz_syn(atk, aug->line, - child_res->pull_line(), bands[1].pull_line(), - width, horz_even); - aug->active = true; - vert_even = !vert_even; - ++cur_line; - - const float K = atk->get_K(); - irv_vert_times_K(K, aug->line, width); - - continue; - } - else { - irv_horz_syn(atk, sig->line, - bands[2].pull_line(), bands[3].pull_line(), - width, horz_even); - sig->active = true; - vert_even = !vert_even; - ++cur_line; - - const float K_inv = 1.0f / atk->get_K(); - irv_vert_times_K(K_inv, sig->line, width); + //horizontal transform + if (cur_line < res_rect.siz.h) + { + if (vert_even) { // even + if (transform_flags & HORZ_TRX) + irv_horz_syn(atk, aug->line, child_res->pull_line(), + bands[1].pull_line(), width, horz_even); + else + memcpy(aug->line->f32, child_res->pull_line()->f32, + width * sizeof(float)); + aug->active = true; + vert_even = !vert_even; + ++cur_line; + + const float K = atk->get_K(); + irv_vert_times_K(K, aug->line, width); + + continue; + } + else { + if (transform_flags & HORZ_TRX) + irv_horz_syn(atk, sig->line, bands[2].pull_line(), + bands[3].pull_line(), width, horz_even); + else + memcpy(sig->line->f32, bands[2].pull_line()->f32, + width * sizeof(float)); + sig->active = true; + vert_even = !vert_even; + ++cur_line; + + const float K_inv = 1.0f / atk->get_K(); + irv_vert_times_K(K_inv, sig->line, width); + } } - } - //vertical transform - for (ui32 i = 0; i < num_steps; ++i) - { - if (aug->active && (sig->active || ssp[i].active)) + //vertical transform + for (ui32 i = 0; i < num_steps; ++i) { - line_buf* dp = aug->line; - line_buf* sp1 = sig->active ? sig->line : ssp[i].line; - line_buf* sp2 = ssp[i].active ? ssp[i].line : sig->line; - const lifting_step* s = atk->get_step(num_steps - i - 1); - irv_vert_syn_step(s, dp, sp1, sp2, width); + if (aug->active && (sig->active || ssp[i].active)) + { + line_buf* dp = aug->line; + line_buf* sp1 = sig->active ? sig->line : ssp[i].line; + line_buf* sp2 = ssp[i].active ? ssp[i].line : sig->line; + const lifting_step* s = atk->get_step(i); + irv_vert_syn_step(s, dp, sp1, sp2, width); + } + lifting_buf t = *aug; *aug = ssp[i]; ssp[i] = *sig; *sig = t; } - lifting_buf t = *aug; *aug = ssp[i]; ssp[i] = *sig; *sig = t; - } - if (aug->active) { - aug->active = false; - return aug->line; + if (aug->active) { + aug->active = false; + return aug->line; + } + if (sig->active) { + sig->active = false; + return sig->line; + }; } - if (sig->active) { - sig->active = false; - return sig->line; - }; } + else + { + if (vert_even) { + if (transform_flags & HORZ_TRX) + irv_horz_syn(atk, aug->line, child_res->pull_line(), + bands[1].pull_line(), width, horz_even); + else + memcpy(aug->line->f32, child_res->pull_line()->f32, + width * sizeof(float)); + } + else + { + if (transform_flags & HORZ_TRX) + irv_horz_syn(atk, aug->line, bands[2].pull_line(), + bands[3].pull_line(), width, horz_even); + else + memcpy(aug->line->f32, bands[2].pull_line()->f32, + width * sizeof(float)); + float* sp = aug->line->f32; + for (ui32 i = width; i > 0; --i) + *sp++ *= 0.5f; + } + return aug->line; + } + } + } + else + { + if (reversible) + { + if (transform_flags & HORZ_TRX) + rev_horz_syn(atk, aug->line, child_res->pull_line(), + bands[1].pull_line(), width, horz_even); + else + memcpy(aug->line->i32, child_res->pull_line()->i32, + width * sizeof(si32)); + return aug->line; } else { - if (vert_even) + if (transform_flags & HORZ_TRX) irv_horz_syn(atk, aug->line, child_res->pull_line(), bands[1].pull_line(), width, horz_even); else - { - irv_horz_syn(atk, aug->line, bands[2].pull_line(), - bands[3].pull_line(), width, horz_even); - float *sp = aug->line->f32; - for (ui32 i = width; i > 0; --i) - *sp++ *= 0.5f; - } + memcpy(aug->line->f32, child_res->pull_line()->f32, + width * sizeof(float)); return aug->line; } } diff --git a/src/core/codestream/ojph_resolution.h b/src/core/codestream/ojph_resolution.h index 72e0b91a..635a4ced 100644 --- a/src/core/codestream/ojph_resolution.h +++ b/src/core/codestream/ojph_resolution.h @@ -61,6 +61,10 @@ namespace ojph { class resolution { public: + enum : ui32 { + HORZ_TRX = 0x01, // horizontal transform + VERT_TRX = 0x02, // vertical transform + }; public: static void pre_alloc(codestream *codestream, const rect& res_rect, @@ -68,8 +72,8 @@ namespace ojph { ui32 comp_num, ui32 res_num); void finalize_alloc(codestream *codestream, const rect& res_rect, const rect& recon_res_rect, ui32 comp_num, - ui32 res_num, point comp_downsamp, - tile_comp *parent_tile_comp, + ui32 res_num, point comp_downsamp, + point res_downsamp, tile_comp *parent_tile_comp, resolution *parent_res); line_buf* get_line(); @@ -77,6 +81,8 @@ namespace ojph { line_buf* pull_line(); rect get_rect() { return res_rect; } ui32 get_comp_num() { return comp_num; } + bool has_horz_transform() { return (transform_flags & HORZ_TRX) != 0; } + bool has_vert_transform() { return (transform_flags & VERT_TRX) != 0; } ui32 prepare_precinct(); void write_precincts(outfile_base *file); @@ -92,7 +98,7 @@ namespace ojph { private: bool reversible, skipped_res_for_read, skipped_res_for_recon; ui32 num_steps; - ui32 num_bands, res_num; + ui32 res_num; ui32 comp_num; ui32 num_bytes; // number of bytes in this resolution // used for tilepart length @@ -113,7 +119,7 @@ namespace ojph { ui32 level_index[20]; //more than enough point cur_precinct_loc; //used for progressing spatial modes (2, 3, 4) const param_atk* atk; - param_dfs::dfs_dwt_type downsampling_style; + ui32 transform_flags; //wavelet machinery ui32 cur_line; ui32 rows_to_produce; diff --git a/src/core/codestream/ojph_subband.cpp b/src/core/codestream/ojph_subband.cpp index dbef3b75..cf007fc9 100644 --- a/src/core/codestream/ojph_subband.cpp +++ b/src/core/codestream/ojph_subband.cpp @@ -55,7 +55,7 @@ namespace ojph { ////////////////////////////////////////////////////////////////////////// void subband::pre_alloc(codestream *codestream, const rect &band_rect, - ui32 comp_num, ui32 res_num) + ui32 comp_num, ui32 res_num, ui32 transform_flags) { mem_fixed_allocator* allocator = codestream->get_allocator(); @@ -67,8 +67,11 @@ namespace ojph { size log_cb = cdp->get_log_block_dims(); size log_PP = cdp->get_log_precinct_size(res_num); - ui32 xcb_prime = ojph_min(log_cb.w, log_PP.w - (res_num?1:0)); - ui32 ycb_prime = ojph_min(log_cb.h, log_PP.h - (res_num?1:0)); + ui32 x_off = ((transform_flags & resolution::HORZ_TRX) ? 1 : 0); + ui32 y_off = ((transform_flags & resolution::VERT_TRX) ? 1 : 0); + + ui32 xcb_prime = ojph_min(log_cb.w, log_PP.w - x_off); + ui32 ycb_prime = ojph_min(log_cb.h, log_PP.h - y_off); size nominal(1 << xcb_prime, 1 << ycb_prime); @@ -116,21 +119,30 @@ namespace ojph { size log_cb = cdp->get_log_block_dims(); log_PP = cdp->get_log_precinct_size(res_num); - xcb_prime = ojph_min(log_cb.w, log_PP.w - (res_num?1:0)); - ycb_prime = ojph_min(log_cb.h, log_PP.h - (res_num?1:0)); + ui32 x_off = ((parent->has_horz_transform()) ? 1 : 0); + ui32 y_off = ((parent->has_vert_transform()) ? 1 : 0); + + xcb_prime = ojph_min(log_cb.w, log_PP.w - x_off); + ycb_prime = ojph_min(log_cb.h, log_PP.h - y_off); size nominal(1 << xcb_prime, 1 << ycb_prime); cur_cb_row = 0; cur_line = 0; cur_cb_height = 0; + const param_dfs* dfs = NULL; + if (cdp->is_dfs_defined()) { + dfs = codestream->access_dfs(); + if (dfs != NULL) + dfs = dfs->get_dfs(cdp->get_dfs_index()); + } param_qcd* qcd = codestream->access_qcd(parent->get_comp_num()); ui32 num_decomps = cdp->get_num_decompositions(); - this->K_max = qcd->get_Kmax(NULL, num_decomps, this->res_num, band_num); + this->K_max = qcd->get_Kmax(dfs, num_decomps, this->res_num, band_num); if (!reversible) { float d = - qcd->irrev_get_delta(NULL, num_decomps, res_num, subband_num); + qcd->irrev_get_delta(dfs, num_decomps, res_num, subband_num); d /= (float)(1u << (31 - this->K_max)); delta = d; delta_inv = (1.0f/d); @@ -199,14 +211,16 @@ namespace ojph { ui32 pc_lft = (res_rect.org.x >> log_PP.w) << log_PP.w; ui32 pc_top = (res_rect.org.y >> log_PP.h) << log_PP.h; - ui32 pcx0, pcx1, pcy0, pcy1, shift = (band_num != 0 ? 1 : 0); + ui32 pcx0, pcx1, pcy0, pcy1; + ui32 x_shift = parent->has_horz_transform() ? 1 : 0; + ui32 y_shift = parent->has_vert_transform() ? 1 : 0; ui32 yb, xb, coly = 0, colx = 0; for (ui32 y = 0; y < num_precincts.h; ++y) { pcy0 = ojph_max(try0, pc_top + (y << log_PP.h)); pcy1 = ojph_min(try1, pc_top + ((y + 1) << log_PP.h)); - pcy0 = (pcy0 - (band_num >> 1) + (1<> shift; - pcy1 = (pcy1 - (band_num >> 1) + (1<> shift; + pcy0 = (pcy0 - (band_num >> 1) + (1 << y_shift) - 1) >> y_shift; + pcy1 = (pcy1 - (band_num >> 1) + (1 << y_shift) - 1) >> y_shift; precinct *p = precincts + y * num_precincts.w; yb = ((pcy1 + (1<> ycb_prime); @@ -217,8 +231,8 @@ namespace ojph { { pcx0 = ojph_max(trx0, pc_lft + (x << log_PP.w)); pcx1 = ojph_min(trx1, pc_lft + ((x + 1) << log_PP.w)); - pcx0 = (pcx0 - (band_num & 1) + (1<> shift; - pcx1 = (pcx1 - (band_num & 1) + (1<> shift; + pcx0 = (pcx0 - (band_num & 1) + (1 << x_shift) - 1) >> x_shift; + pcx1 = (pcx1 - (band_num & 1) + (1 << x_shift) - 1) >> x_shift; rect *bp = p->cb_idxs + band_num; xb = ((pcx1 + (1<> xcb_prime); diff --git a/src/core/codestream/ojph_subband.h b/src/core/codestream/ojph_subband.h index 5dd145e6..8cadae07 100644 --- a/src/core/codestream/ojph_subband.h +++ b/src/core/codestream/ojph_subband.h @@ -81,7 +81,7 @@ namespace ojph { } static void pre_alloc(codestream *codestream, const rect& band_rect, - ui32 comp_num, ui32 res_num); + ui32 comp_num, ui32 res_num, ui32 transform_flags); void finalize_alloc(codestream *codestream, const rect& band_rect, resolution* res, ui32 res_num, ui32 subband_num); @@ -91,6 +91,7 @@ namespace ojph { void get_cb_indices(const size& num_precincts, precinct *precincts); float get_delta() { return delta; } + bool exists() { return !empty; } line_buf* pull_line(); diff --git a/src/core/codestream/ojph_tile.cpp b/src/core/codestream/ojph_tile.cpp index 48f8bb56..3be907d4 100644 --- a/src/core/codestream/ojph_tile.cpp +++ b/src/core/codestream/ojph_tile.cpp @@ -131,8 +131,8 @@ namespace ojph { ////////////////////////////////////////////////////////////////////////// void tile::finalize_alloc(codestream *codestream, const rect& tile_rect, - const rect& recon_tile_rect, ui32 tile_idx, - ui32 offset, ui32 &num_tileparts) + ui32 tile_idx, ui32& offset, + ui32 &num_tileparts) { //this->parent = codestream; mem_fixed_allocator* allocator = codestream->get_allocator(); @@ -167,33 +167,29 @@ namespace ojph { this->resilient = codestream->is_resilient(); this->tile_rect = tile_rect; - this->recon_tile_rect = recon_tile_rect; ui32 tx0 = tile_rect.org.x; ui32 ty0 = tile_rect.org.y; ui32 tx1 = tile_rect.org.x + tile_rect.siz.w; ui32 ty1 = tile_rect.org.y + tile_rect.siz.h; - ui32 recon_tx0 = recon_tile_rect.org.x; - ui32 recon_ty0 = recon_tile_rect.org.y; - ui32 recon_tx1 = recon_tile_rect.org.x + recon_tile_rect.siz.w; - ui32 recon_ty1 = recon_tile_rect.org.y + recon_tile_rect.siz.h; ui32 width = 0; for (ui32 i = 0; i < num_comps; ++i) { point downsamp = szp->get_downsampling(i); + point recon_downsamp = szp->get_recon_downsampling(i); ui32 tcx0 = ojph_div_ceil(tx0, downsamp.x); ui32 tcy0 = ojph_div_ceil(ty0, downsamp.y); ui32 tcx1 = ojph_div_ceil(tx1, downsamp.x); ui32 tcy1 = ojph_div_ceil(ty1, downsamp.y); - ui32 recon_tcx0 = ojph_div_ceil(recon_tx0, downsamp.x); - ui32 recon_tcy0 = ojph_div_ceil(recon_ty0, downsamp.y); - ui32 recon_tcx1 = ojph_div_ceil(recon_tx1, downsamp.x); - ui32 recon_tcy1 = ojph_div_ceil(recon_ty1, downsamp.y); + ui32 recon_tcx0 = ojph_div_ceil(tx0, recon_downsamp.x); + ui32 recon_tcy0 = ojph_div_ceil(ty0, recon_downsamp.y); + ui32 recon_tcx1 = ojph_div_ceil(tx1, recon_downsamp.x); + ui32 recon_tcy1 = ojph_div_ceil(ty1, recon_downsamp.y); line_offsets[i] = - recon_tcx0 - ojph_div_ceil(recon_tx0 - offset, downsamp.x); + recon_tcx0 - ojph_div_ceil(tx0 - offset, recon_downsamp.x); comp_rects[i].org.x = tcx0; comp_rects[i].org.y = tcy0; comp_rects[i].siz.w = tcx1 - tcx0; @@ -212,6 +208,8 @@ namespace ojph { cur_line[i] = 0; } + offset += tile_rect.siz.w; + //allocate lines const param_cod* cdp = codestream->get_cod(); this->reversible = cdp->access_atk()->is_reversible(); diff --git a/src/core/codestream/ojph_tile.h b/src/core/codestream/ojph_tile.h index b00c8181..056c7c94 100644 --- a/src/core/codestream/ojph_tile.h +++ b/src/core/codestream/ojph_tile.h @@ -63,8 +63,7 @@ namespace ojph { static void pre_alloc(codestream *codestream, const rect& tile_rect, const rect& recon_tile_rect, ui32 &num_tileparts); void finalize_alloc(codestream *codestream, const rect& tile_rect, - const rect& recon_tile_rect, ui32 tile_idx, - ui32 offset, ui32 &num_tileparts); + ui32 tile_idx, ui32& offset, ui32 &num_tileparts); bool push(line_buf *line, ui32 comp_num); void prepare_for_flush(); @@ -77,7 +76,7 @@ namespace ojph { private: //codestream *parent; - rect tile_rect, recon_tile_rect; + rect tile_rect; ui32 num_comps; tile_comp *comps; ui32 num_lines; diff --git a/src/core/codestream/ojph_tile_comp.cpp b/src/core/codestream/ojph_tile_comp.cpp index 69ed0bcb..83d1b624 100644 --- a/src/core/codestream/ojph_tile_comp.cpp +++ b/src/core/codestream/ojph_tile_comp.cpp @@ -83,7 +83,8 @@ namespace ojph { this->num_bytes = 0; res = allocator->post_alloc_obj(1); res->finalize_alloc(codestream, comp_rect, recon_comp_rect, comp_num, - num_decomps, comp_downsamp, this, NULL); + num_decomps, comp_downsamp, comp_downsamp, this, + NULL); } ////////////////////////////////////////////////////////////////////////// diff --git a/src/core/transform/ojph_transform.cpp b/src/core/transform/ojph_transform.cpp index 028ac013..b031860e 100644 --- a/src/core/transform/ojph_transform.cpp +++ b/src/core/transform/ojph_transform.cpp @@ -408,9 +408,9 @@ namespace ojph { const line_buf* other, const line_buf* aug, ui32 repeat) { - si32 a = s->rev.Aatk; - si32 b = s->rev.Batk; - ui32 e = s->rev.Eatk; + const si32 a = s->rev.Aatk; + const si32 b = s->rev.Batk; + const ui32 e = s->rev.Eatk; si32* dst = aug->i32; const si32* src1 = sig->i32, * src2 = other->i32; @@ -419,7 +419,7 @@ namespace ojph { *dst++ += (b + a * (*src1++ + *src2++)) >> e; else for (ui32 i = repeat; i > 0; --i) - *dst++ -= (b - a * (*src1++ + *src2++)) >> e; + *dst++ -= (- b - a * (*src1++ + *src2++)) >> e; } ///////////////////////////////////////////////////////////////////////// @@ -451,13 +451,13 @@ namespace ojph { ui32 l_width = (width + (even ? 1 : 0)) >> 1; // low pass ui32 h_width = (width + (even ? 0 : 1)) >> 1; // high pass ui32 num_steps = atk->get_num_steps(); - for (ui32 j = 0; j < num_steps; ++j) + for (ui32 j = num_steps; j > 0; --j) { // first lifting step - const lifting_step* s = atk->get_step(j); - si32 a = s->rev.Aatk; - si32 b = s->rev.Batk; - ui32 e = s->rev.Eatk; + const lifting_step* s = atk->get_step(j - 1); + const si32 a = s->rev.Aatk; + const si32 b = s->rev.Batk; + const ui32 e = s->rev.Eatk; // extension lp[-1] = lp[0]; @@ -470,7 +470,7 @@ namespace ojph { *dp += (b + a * (sp[-1] + sp[0])) >> e; else for (ui32 i = h_width; i > 0; --i, sp++, dp++) - *dp -= (b - a * (sp[-1] + sp[0])) >> e; + *dp -= (- b - a * (sp[-1] + sp[0])) >> e; // swap buffers si32* t = lp; lp = hp; hp = t; @@ -491,9 +491,9 @@ namespace ojph { const line_buf* sig, const line_buf* other, ui32 repeat) { - si32 a = s->rev.Aatk; - si32 b = s->rev.Batk; - ui32 e = s->rev.Eatk; + const si32 a = s->rev.Aatk; + const si32 b = s->rev.Batk; + const ui32 e = s->rev.Eatk; si32* dst = aug->i32; const si32* src1 = sig->i32, * src2 = other->i32; @@ -502,7 +502,7 @@ namespace ojph { *dst++ -= (b + a * (*src1++ + *src2++)) >> e; else for (ui32 i = repeat; i > 0; --i) - *dst++ += (b - a * (*src1++ + *src2++)) >> e; + *dst++ += (- b - a * (*src1++ + *src2++)) >> e; } ////////////////////////////////////////////////////////////////////////// @@ -517,13 +517,12 @@ namespace ojph { ui32 aug_width = (width + (even ? 1 : 0)) >> 1; // low pass ui32 oth_width = (width + (even ? 0 : 1)) >> 1; // high pass ui32 num_steps = atk->get_num_steps(); - for (ui32 j = num_steps; j > 0; --j) + for (ui32 j = 0; j < num_steps; ++j) { - // first lifting step - const lifting_step* s = atk->get_step(j - 1); - si32 a = s->rev.Aatk; - si32 b = s->rev.Batk; - ui32 e = s->rev.Eatk; + const lifting_step* s = atk->get_step(j); + const si32 a = s->rev.Aatk; + const si32 b = s->rev.Batk; + const ui32 e = s->rev.Eatk; // extension oth[-1] = oth[0]; @@ -536,7 +535,7 @@ namespace ojph { *dp -= (b + a * (sp[-1] + sp[0])) >> e; else for (ui32 i = aug_width; i > 0; --i, sp++, dp++) - *dp += (b - a * (sp[-1] + sp[0])) >> e; + *dp += (- b - a * (sp[-1] + sp[0])) >> e; // swap buffers si32* t = aug; aug = oth; oth = t; @@ -793,11 +792,11 @@ namespace ojph { ui32 l_width = (width + (even ? 1 : 0)) >> 1; // low pass ui32 h_width = (width + (even ? 0 : 1)) >> 1; // high pass ui32 num_steps = atk->get_num_steps(); - for (ui32 j = 0; j < num_steps; ++j) + for (ui32 j = num_steps; j > 0; --j) { // first lifting step - const lifting_step* s = atk->get_step(j); - float a = s->irv.Aatk; + const lifting_step* s = atk->get_step(j - 1); + const float a = s->irv.Aatk; // extension lp[-1] = lp[0]; @@ -878,10 +877,10 @@ namespace ojph { } ui32 num_steps = atk->get_num_steps(); - for (ui32 j = num_steps; j > 0; --j) + for (ui32 j = 0; j < num_steps; ++j) { - const lifting_step* s = atk->get_step(j - 1); - float a = s->irv.Aatk; + const lifting_step* s = atk->get_step(j); + const float a = s->irv.Aatk; // extension oth[-1] = oth[0]; From c87d3e402b262687f29c898526c035f0f57f0024 Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Tue, 9 Apr 2024 00:01:28 +1000 Subject: [PATCH 055/348] Small bug fix for previous commit. --- src/core/codestream/ojph_codestream_local.cpp | 3 +-- src/core/codestream/ojph_params_local.h | 4 +++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/core/codestream/ojph_codestream_local.cpp b/src/core/codestream/ojph_codestream_local.cpp index 5f72d3e8..e9f56d04 100644 --- a/src/core/codestream/ojph_codestream_local.cpp +++ b/src/core/codestream/ojph_codestream_local.cpp @@ -220,7 +220,6 @@ namespace ojph { point index; rect tile_rect; ojph::param_siz sz = access_siz(); - ui32 ds = 1 << skipped_res_for_recon; for (index.y = 0; index.y < num_tiles.h; ++index.y) { ui32 y0 = sz.get_tile_offset().y @@ -546,7 +545,7 @@ namespace ojph { ui32 num_comments) { //finalize - siz.check_validity(); + siz.check_validity(cod); cod.check_validity(siz); cod.update_atk(atk); qcd.check_validity(siz, cod); diff --git a/src/core/codestream/ojph_params_local.h b/src/core/codestream/ojph_params_local.h index 1ee508dc..f4f2c9f4 100644 --- a/src/core/codestream/ojph_params_local.h +++ b/src/core/codestream/ojph_params_local.h @@ -202,8 +202,10 @@ namespace ojph { cptr[comp_num].YRsiz = (ui8)downsampling.y; } - void check_validity() + void check_validity(const param_cod& cod) { + this->cod = &cod; + if (XTsiz == 0 && YTsiz == 0) { XTsiz = Xsiz + XOsiz; YTsiz = Ysiz + YOsiz; } if (Xsiz == 0 || Ysiz == 0 || XTsiz == 0 || YTsiz == 0) From b1c71574406078c24faa38e4a9c2c71a9ed8f1b3 Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Tue, 9 Apr 2024 14:23:32 +1000 Subject: [PATCH 056/348] A small bug fix --- src/core/codestream/ojph_params_local.h | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/src/core/codestream/ojph_params_local.h b/src/core/codestream/ojph_params_local.h index f4f2c9f4..1958b8e8 100644 --- a/src/core/codestream/ojph_params_local.h +++ b/src/core/codestream/ojph_params_local.h @@ -487,11 +487,21 @@ namespace ojph { //////////////////////////////////////// bool packets_may_use_sop() const - { return (Scod & 2) == 2; } + { + if (parent) + return (parent->Scod & 2) == 2; + else + return (Scod & 2) == 2; + } //////////////////////////////////////// bool packets_use_eph() const - { return (Scod & 4) == 4; } + { + if (parent) + return (parent->Scod & 4) == 4; + else + return (Scod & 4) == 4; + } //////////////////////////////////////// bool write(outfile_base *file); @@ -507,8 +517,8 @@ namespace ojph { void update_atk(const param_atk* atk); //////////////////////////////////////// - void link_cod(const param_cod* cod) - { this->next = cod; } + void link_cod(const param_cod* coc) + { this->next = coc; } //////////////////////////////////////// const param_cod* get_cod(ui32 comp_num) const @@ -544,7 +554,7 @@ namespace ojph { ui8 Scod; // serves as Scod and Scoc cod_SGcod SGCod; // Used in COD and copied to COC cod_SPcod SPcod; // serves as SPcod and SPcoc - const param_cod* next;// to link cod parameters + const param_cod* next;// to chain coc parameters to cod private: // COC only variables param_cod* parent; // parent COD structure From a18e7fb47ae972ebcdcb3627b25da3f72f4b9268 Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Wed, 10 Apr 2024 22:56:02 +1000 Subject: [PATCH 057/348] Working on SIMD. SSE and AVX is largely done, except the core horizontal transform. --- src/core/transform/ojph_colour_sse.cpp | 2 +- src/core/transform/ojph_colour_sse2.cpp | 2 +- src/core/transform/ojph_transform.cpp | 490 +++------------------ src/core/transform/ojph_transform.h | 58 --- src/core/transform/ojph_transform_avx.cpp | 460 +++++++++---------- src/core/transform/ojph_transform_local.h | 334 +++++++------- src/core/transform/ojph_transform_sse.cpp | 421 +++++++++--------- src/core/transform/ojph_transform_sse2.cpp | 2 +- 8 files changed, 650 insertions(+), 1119 deletions(-) diff --git a/src/core/transform/ojph_colour_sse.cpp b/src/core/transform/ojph_colour_sse.cpp index 89cc86c2..edd1eaf2 100644 --- a/src/core/transform/ojph_colour_sse.cpp +++ b/src/core/transform/ojph_colour_sse.cpp @@ -42,7 +42,7 @@ #include "ojph_colour.h" #include "ojph_colour_local.h" -#include +#include namespace ojph { namespace local { diff --git a/src/core/transform/ojph_colour_sse2.cpp b/src/core/transform/ojph_colour_sse2.cpp index 4bb56f29..4a3cb145 100644 --- a/src/core/transform/ojph_colour_sse2.cpp +++ b/src/core/transform/ojph_colour_sse2.cpp @@ -41,7 +41,7 @@ #include "ojph_arch.h" #include "ojph_colour.h" -#include +#include namespace ojph { namespace local { diff --git a/src/core/transform/ojph_transform.cpp b/src/core/transform/ojph_transform.cpp index b031860e..eba4f006 100644 --- a/src/core/transform/ojph_transform.cpp +++ b/src/core/transform/ojph_transform.cpp @@ -53,40 +53,6 @@ namespace ojph { // Reversible functions ///////////////////////////////////////////////////////////////////////// - ///////////////////////////////////////////////////////////////////////// - void (*rev_vert_wvlt_fwd_predict) - (const line_buf* src1, const line_buf* src2, line_buf *dst, - ui32 repeat) = NULL; - - ///////////////////////////////////////////////////////////////////////// - void (*rev_vert_wvlt_fwd_update) - (const line_buf* src1, const line_buf* src2, line_buf *dst, - ui32 repeat) = NULL; - - ///////////////////////////////////////////////////////////////////////// - void (*rev_horz_wvlt_fwd_tx) - (line_buf* src, line_buf *ldst, line_buf *hdst, ui32 width, bool even) - = NULL; - - ///////////////////////////////////////////////////////////////////////// - void (*rev_vert_wvlt_bwd_predict) - (const line_buf* src1, const line_buf* src2, line_buf *dst, - ui32 repeat) = NULL; - - ///////////////////////////////////////////////////////////////////////// - void (*rev_vert_wvlt_bwd_update) - (const line_buf* src1, const line_buf* src2, line_buf *dst, - ui32 repeat) = NULL; - - ///////////////////////////////////////////////////////////////////////// - void (*rev_horz_wvlt_bwd_tx) - (line_buf* dst, line_buf *lsrc, line_buf *hsrc, ui32 width, bool even) - = NULL; - - - - - ///////////////////////////////////////////////////////////////////////// void (*rev_vert_ana_step) (const lifting_step* s, const line_buf* sig, const line_buf* other, @@ -106,39 +72,11 @@ namespace ojph { void (*rev_horz_syn) (const param_atk* atk, const line_buf* dst, const line_buf* lsrc, const line_buf* hsrc, ui32 width, bool even) = NULL; - - - - ///////////////////////////////////////////////////////////////////////// // Irreversible functions ///////////////////////////////////////////////////////////////////////// - ///////////////////////////////////////////////////////////////////////// - void (*irrev_vert_wvlt_step) - (const line_buf* src1, const line_buf* src2, line_buf *dst, - int step_num, ui32 repeat) = NULL; - - ///////////////////////////////////////////////////////////////////////// - void (*irrev_vert_wvlt_K) - (const line_buf *src, line_buf *dst, bool L_analysis_or_H_synthesis, - ui32 repeat) = NULL; - - ///////////////////////////////////////////////////////////////////////// - void (*irrev_horz_wvlt_fwd_tx) - (line_buf* src, line_buf *ldst, line_buf *hdst, ui32 width, bool even) - = NULL; - - ///////////////////////////////////////////////////////////////////////// - void (*irrev_horz_wvlt_bwd_tx) - (line_buf* src, line_buf *ldst, line_buf *hdst, ui32 width, bool even) - = NULL; - - - - - ///////////////////////////////////////////////////////////////////////// void (*irv_vert_ana_step) (const lifting_step* s, const line_buf* sig, const line_buf* other, @@ -163,10 +101,6 @@ namespace ojph { void (*irv_vert_times_K) (float K, const line_buf* aug, ui32 repeat) = NULL; - - - - //////////////////////////////////////////////////////////////////////////// static bool wavelet_transform_functions_initialized = false; @@ -178,23 +112,11 @@ namespace ojph { #if !defined(OJPH_ENABLE_WASM_SIMD) || !defined(OJPH_EMSCRIPTEN) - rev_vert_wvlt_fwd_predict = gen_rev_vert_wvlt_fwd_predict; - rev_vert_wvlt_fwd_update = gen_rev_vert_wvlt_fwd_update; - rev_horz_wvlt_fwd_tx = gen_rev_horz_wvlt_fwd_tx; - rev_vert_wvlt_bwd_predict = gen_rev_vert_wvlt_bwd_predict; - rev_vert_wvlt_bwd_update = gen_rev_vert_wvlt_bwd_update; - rev_horz_wvlt_bwd_tx = gen_rev_horz_wvlt_bwd_tx; - rev_vert_ana_step = gen_rev_vert_ana_step; rev_horz_ana = gen_rev_horz_ana; rev_vert_syn_step = gen_rev_vert_syn_step; rev_horz_syn = gen_rev_horz_syn; - irrev_vert_wvlt_step = gen_irrev_vert_wvlt_step; - irrev_vert_wvlt_K = gen_irrev_vert_wvlt_K; - irrev_horz_wvlt_fwd_tx = gen_irrev_horz_wvlt_fwd_tx; - irrev_horz_wvlt_bwd_tx = gen_irrev_horz_wvlt_bwd_tx; - irv_vert_ana_step = gen_irv_vert_ana_step; irv_horz_ana = gen_irv_horz_ana; irv_vert_syn_step = gen_irv_vert_syn_step; @@ -206,203 +128,74 @@ namespace ojph { if (level >= X86_CPU_EXT_LEVEL_SSE) { - irrev_vert_wvlt_step = sse_irrev_vert_wvlt_step; - irrev_vert_wvlt_K = sse_irrev_vert_wvlt_K; - irrev_horz_wvlt_fwd_tx = sse_irrev_horz_wvlt_fwd_tx; - irrev_horz_wvlt_bwd_tx = sse_irrev_horz_wvlt_bwd_tx; + irv_vert_ana_step = sse_irv_vert_ana_step; + irv_horz_ana = sse_irv_horz_ana; + irv_vert_syn_step = sse_irv_vert_syn_step; + irv_horz_syn = sse_irv_horz_syn; + irv_vert_times_K = sse_irv_vert_times_K; } - if (level >= X86_CPU_EXT_LEVEL_SSE2) - { - rev_vert_wvlt_fwd_predict = sse2_rev_vert_wvlt_fwd_predict; - rev_vert_wvlt_fwd_update = sse2_rev_vert_wvlt_fwd_update; - rev_horz_wvlt_fwd_tx = sse2_rev_horz_wvlt_fwd_tx; - rev_vert_wvlt_bwd_predict = sse2_rev_vert_wvlt_bwd_predict; - rev_vert_wvlt_bwd_update = sse2_rev_vert_wvlt_bwd_update; - rev_horz_wvlt_bwd_tx = sse2_rev_horz_wvlt_bwd_tx; - } + //if (level >= X86_CPU_EXT_LEVEL_SSE2) + //{ + // rev_vert_ana_step = sse2_rev_vert_ana_step; + // rev_horz_ana = sse2_rev_horz_ana; + // rev_vert_syn_step = sse2_rev_vert_syn_step; + // rev_horz_syn = sse2_rev_horz_syn; + //} if (level >= X86_CPU_EXT_LEVEL_AVX) { - irrev_vert_wvlt_step = avx_irrev_vert_wvlt_step; - irrev_vert_wvlt_K = avx_irrev_vert_wvlt_K; - irrev_horz_wvlt_fwd_tx = avx_irrev_horz_wvlt_fwd_tx; - irrev_horz_wvlt_bwd_tx = avx_irrev_horz_wvlt_bwd_tx; + irv_vert_ana_step = avx_irv_vert_ana_step; + irv_horz_ana = avx_irv_horz_ana; + irv_vert_syn_step = avx_irv_vert_syn_step; + irv_horz_syn = avx_irv_horz_syn; + irv_vert_times_K = avx_irv_vert_times_K; } - if (level >= X86_CPU_EXT_LEVEL_AVX2) - { - rev_vert_wvlt_fwd_predict = avx2_rev_vert_wvlt_fwd_predict; - rev_vert_wvlt_fwd_update = avx2_rev_vert_wvlt_fwd_update; - rev_horz_wvlt_fwd_tx = avx2_rev_horz_wvlt_fwd_tx; - rev_vert_wvlt_bwd_predict = avx2_rev_vert_wvlt_bwd_predict; - rev_vert_wvlt_bwd_update = avx2_rev_vert_wvlt_bwd_update; - rev_horz_wvlt_bwd_tx = avx2_rev_horz_wvlt_bwd_tx; - } + //if (level >= X86_CPU_EXT_LEVEL_AVX2) + //{ + // rev_vert_ana_step = avx2_rev_vert_ana_step; + // rev_horz_ana = avx2_rev_horz_ana; + // rev_vert_syn_step = avx2_rev_vert_syn_step; + // rev_horz_syn = avx2_rev_horz_syn; + //} + + //if (level >= X86_CPU_EXT_LEVEL_AVX512) + //{ + // rev_vert_ana_step = avx512_rev_vert_ana_step; + // rev_horz_ana = avx512_rev_horz_ana; + // rev_vert_syn_step = avx512_rev_vert_syn_step; + // rev_horz_syn = avx512_rev_horz_syn; + + // irv_vert_ana_step = avx512_irv_vert_ana_step; + // irv_horz_ana = avx512_irv_horz_ana; + // irv_vert_syn_step = avx512_irv_vert_syn_step; + // irv_horz_syn = avx512_irv_horz_syn; + // irv_vert_times_K = avx512_irv_vert_times_K; + //} + #endif // !OJPH_DISABLE_INTEL_SIMD #else // OJPH_ENABLE_WASM_SIMD - rev_vert_wvlt_fwd_predict = wasm_rev_vert_wvlt_fwd_predict; - rev_vert_wvlt_fwd_update = wasm_rev_vert_wvlt_fwd_update; - rev_horz_wvlt_fwd_tx = wasm_rev_horz_wvlt_fwd_tx; - rev_vert_wvlt_bwd_predict = wasm_rev_vert_wvlt_bwd_predict; - rev_vert_wvlt_bwd_update = wasm_rev_vert_wvlt_bwd_update; - rev_horz_wvlt_bwd_tx = wasm_rev_horz_wvlt_bwd_tx; - irrev_vert_wvlt_step = wasm_irrev_vert_wvlt_step; - irrev_vert_wvlt_K = wasm_irrev_vert_wvlt_K; - irrev_horz_wvlt_fwd_tx = wasm_irrev_horz_wvlt_fwd_tx; - irrev_horz_wvlt_bwd_tx = wasm_irrev_horz_wvlt_bwd_tx; + rev_vert_ana_step = wasm_rev_vert_ana_step; + rev_horz_ana = wasm_rev_horz_ana; + rev_vert_syn_step = wasm_rev_vert_syn_step; + rev_horz_syn = wasm_rev_horz_syn; + + irv_vert_ana_step = wasm_irv_vert_ana_step; + irv_horz_ana = wasm_irv_horz_ana; + irv_vert_syn_step = wasm_irv_vert_syn_step; + irv_horz_syn = wasm_irv_horz_syn; + irv_vert_times_K = wasm_irv_vert_times_K; #endif // !OJPH_ENABLE_WASM_SIMD wavelet_transform_functions_initialized = true; } ////////////////////////////////////////////////////////////////////////// - const float LIFTING_FACTORS::steps[8] = - { - -1.586134342059924f, -0.052980118572961f, +0.882911075530934f, - +0.443506852043971f, - +1.586134342059924f, +0.052980118572961f, -0.882911075530934f, - -0.443506852043971f - }; - const float LIFTING_FACTORS::K = 1.230174104914001f; - const float LIFTING_FACTORS::K_inv = (float)(1.0 / 1.230174104914001); - - ////////////////////////////////////////////////////////////////////////// #if !defined(OJPH_ENABLE_WASM_SIMD) || !defined(OJPH_EMSCRIPTEN) - ////////////////////////////////////////////////////////////////////////// - void gen_rev_vert_wvlt_fwd_predict(const line_buf* line_src1, - const line_buf* line_src2, - line_buf *line_dst, ui32 repeat) - { - si32 *dst = line_dst->i32; - const si32 *src1 = line_src1->i32, *src2 = line_src2->i32; - for (ui32 i = repeat; i > 0; --i) - *dst++ -= (*src1++ + *src2++) >> 1; - } - - ////////////////////////////////////////////////////////////////////////// - void gen_rev_vert_wvlt_fwd_update(const line_buf* line_src1, - const line_buf* line_src2, - line_buf *line_dst, ui32 repeat) - { - si32 *dst = line_dst->i32; - const si32 *src1 = line_src1->i32, *src2 = line_src2->i32; - for (ui32 i = repeat; i > 0; --i) - *dst++ += (*src1++ + *src2++ + 2) >> 2; - } - - ////////////////////////////////////////////////////////////////////////// - void gen_rev_horz_wvlt_fwd_tx(line_buf *line_src, line_buf *line_ldst, - line_buf *line_hdst, ui32 width, bool even) - { - if (width > 1) - { - si32 *src = line_src->i32; - si32 *ldst = line_ldst->i32, *hdst = line_hdst->i32; - - const ui32 L_width = (width + (even ? 1 : 0)) >> 1; - const ui32 H_width = (width + (even ? 0 : 1)) >> 1; - - // extension - src[-1] = src[1]; - src[width] = src[width-2]; - // predict - const si32* sp = src + (even ? 1 : 0); - si32 *dph = hdst; - for (ui32 i = H_width; i > 0; --i, sp+=2) - *dph++ = sp[0] - ((sp[-1] + sp[1]) >> 1); - - // extension - hdst[-1] = hdst[0]; - hdst[H_width] = hdst[H_width-1]; - // update - sp = src + (even ? 0 : 1); - const si32* sph = hdst + (even ? 0 : 1); - si32 *dpl = ldst; - for (ui32 i = L_width; i > 0; --i, sp+=2, sph++) - *dpl++ = *sp + ((2 + sph[-1] + sph[0]) >> 2); - } - else - { - if (even) - line_ldst->i32[0] = line_src->i32[0]; - else - line_hdst->i32[0] = line_src->i32[0] << 1; - } - } - - ////////////////////////////////////////////////////////////////////////// - void gen_rev_vert_wvlt_bwd_predict(const line_buf* line_src1, - const line_buf* line_src2, - line_buf *line_dst, ui32 repeat) - { - si32 *dst = line_dst->i32; - const si32 *src1 = line_src1->i32, *src2 = line_src2->i32; - for (ui32 i = repeat; i > 0; --i) - *dst++ += (*src1++ + *src2++) >> 1; - } - - ////////////////////////////////////////////////////////////////////////// - void gen_rev_vert_wvlt_bwd_update(const line_buf* line_src1, - const line_buf* line_src2, - line_buf *line_dst, ui32 repeat) - { - si32 *dst = line_dst->i32; - const si32 *src1 = line_src1->i32, *src2 = line_src2->i32; - for (ui32 i = repeat; i > 0; --i) - *dst++ -= (2 + *src1++ + *src2++) >> 2; - } - - ////////////////////////////////////////////////////////////////////////// - void gen_rev_horz_wvlt_bwd_tx(line_buf* line_dst, line_buf *line_lsrc, - line_buf *line_hsrc, ui32 width, bool even) - { - if (width > 1) - { - si32 *lsrc = line_lsrc->i32, *hsrc = line_hsrc->i32; - si32 *dst = line_dst->i32; - - const ui32 L_width = (width + (even ? 1 : 0)) >> 1; - const ui32 H_width = (width + (even ? 0 : 1)) >> 1; - - // extension - hsrc[-1] = hsrc[0]; - hsrc[H_width] = hsrc[H_width-1]; - //inverse update - const si32 *sph = hsrc + (even ? 0 : 1); - si32 *spl = lsrc; - for (ui32 i = L_width; i > 0; --i, sph++, spl++) - *spl -= ((2 + sph[-1] + sph[0]) >> 2); - - // extension - lsrc[-1] = lsrc[0]; - lsrc[L_width] = lsrc[L_width - 1]; - // inverse predict and combine - si32 *dp = dst + (even ? 0 : -1); - spl = lsrc + (even ? 0 : -1); - sph = hsrc; - for (ui32 i = L_width + (even ? 0 : 1); i > 0; --i, spl++, sph++) - { - *dp++ = *spl; - *dp++ = *sph + ((spl[0] + spl[1]) >> 1); - } - } - else - { - if (even) - line_dst->i32[0] = line_lsrc->i32[0]; - else - line_dst->i32[0] = line_hsrc->i32[0] >> 1; - } - } - - - - - ///////////////////////////////////////////////////////////////////////// void gen_rev_vert_ana_step(const lifting_step* s, const line_buf* sig, const line_buf* other, const line_buf* aug, @@ -569,187 +362,6 @@ namespace ojph { } } - - - - - ////////////////////////////////////////////////////////////////////////// - void gen_irrev_vert_wvlt_step(const line_buf* line_src1, - const line_buf* line_src2, - line_buf *line_dst, - int step_num, ui32 repeat) - { - float *dst = line_dst->f32; - const float *src1 = line_src1->f32, *src2 = line_src2->f32; - float factor = LIFTING_FACTORS::steps[step_num]; - for (ui32 i = repeat; i > 0; --i) - *dst++ += factor * (*src1++ + *src2++); - } - - ///////////////////////////////////////////////////////////////////////// - void gen_irrev_vert_wvlt_K(const line_buf* line_src, - line_buf* line_dst, - bool L_analysis_or_H_synthesis, ui32 repeat) - { - float *dst = line_dst->f32; - const float *src = line_src->f32; - float factor = LIFTING_FACTORS::K_inv; - factor = L_analysis_or_H_synthesis ? factor : LIFTING_FACTORS::K; - for (ui32 i = repeat; i > 0; --i) - *dst++ = *src++ * factor; - } - - - ///////////////////////////////////////////////////////////////////////// - void gen_irrev_horz_wvlt_fwd_tx(line_buf* line_src, - line_buf *line_ldst, - line_buf *line_hdst, - ui32 width, bool even) - { - if (width > 1) - { - float *src = line_src->f32; - float *ldst = line_ldst->f32, *hdst = line_hdst->f32; - - const ui32 L_width = (width + (even ? 1 : 0)) >> 1; - const ui32 H_width = (width + (even ? 0 : 1)) >> 1; - - //extension - src[-1] = src[1]; - src[width] = src[width-2]; - // predict - float factor = LIFTING_FACTORS::steps[0]; - const float* sp = src + (even ? 1 : 0); - float *dph = hdst; - for (ui32 i = H_width; i > 0; --i, sp+=2) - *dph++ = sp[0] + factor * (sp[-1] + sp[1]); - - // extension - hdst[-1] = hdst[0]; - hdst[H_width] = hdst[H_width-1]; - // update - factor = LIFTING_FACTORS::steps[1]; - sp = src + (even ? 0 : 1); - const float* sph = hdst + (even ? 0 : 1); - float *dpl = ldst; - for (ui32 i = L_width; i > 0; --i, sp+=2, sph++) - *dpl++ = sp[0] + factor * (sph[-1] + sph[0]); - - //extension - ldst[-1] = ldst[0]; - ldst[L_width] = ldst[L_width-1]; - //predict - factor = LIFTING_FACTORS::steps[2]; - const float* spl = ldst + (even ? 1 : 0); - dph = hdst; - for (ui32 i = H_width; i > 0; --i, spl++) - *dph++ += factor * (spl[-1] + spl[0]); - - // extension - hdst[-1] = hdst[0]; - hdst[H_width] = hdst[H_width-1]; - // update - factor = LIFTING_FACTORS::steps[3]; - sph = hdst + (even ? 0 : 1); - dpl = ldst; - for (ui32 i = L_width; i > 0; --i, sph++) - *dpl++ += factor * (sph[-1] + sph[0]); - - //multipliers - float *dp = ldst; - for (ui32 i = L_width; i > 0; --i, dp++) - *dp *= LIFTING_FACTORS::K_inv; - dp = hdst; - for (ui32 i = H_width; i > 0; --i, dp++) - *dp *= LIFTING_FACTORS::K; - } - else - { - if (even) - line_ldst->f32[0] = line_src->f32[0]; - else - line_hdst->f32[0] = line_src->f32[0] + line_src->f32[0]; - } - } - - ///////////////////////////////////////////////////////////////////////// - void gen_irrev_horz_wvlt_bwd_tx(line_buf* line_dst, line_buf *line_lsrc, - line_buf *line_hsrc, ui32 width, - bool even) - { - if (width > 1) - { - float *lsrc = line_lsrc->f32, *hsrc = line_hsrc->f32; - float *dst = line_dst->f32; - - const ui32 L_width = (width + (even ? 1 : 0)) >> 1; - const ui32 H_width = (width + (even ? 0 : 1)) >> 1; - - //multipliers - float *dp = lsrc; - for (ui32 i = L_width; i > 0; --i, dp++) - *dp *= LIFTING_FACTORS::K; - dp = hsrc; - for (ui32 i = H_width; i > 0; --i, dp++) - *dp *= LIFTING_FACTORS::K_inv; - - //extension - hsrc[-1] = hsrc[0]; - hsrc[H_width] = hsrc[H_width-1]; - //inverse update - float factor = LIFTING_FACTORS::steps[7]; - const float *sph = hsrc + (even ? 0 : 1); - float *dpl = lsrc; - for (ui32 i = L_width; i > 0; --i, dpl++, sph++) - *dpl += factor * (sph[-1] + sph[0]); - - //extension - lsrc[-1] = lsrc[0]; - lsrc[L_width] = lsrc[L_width-1]; - //inverse perdict - factor = LIFTING_FACTORS::steps[6]; - const float *spl = lsrc + (even ? 0 : -1); - float *dph = hsrc; - for (ui32 i = H_width; i > 0; --i, dph++, spl++) - *dph += factor * (spl[0] + spl[1]); - - //extension - hsrc[-1] = hsrc[0]; - hsrc[H_width] = hsrc[H_width-1]; - //inverse update - factor = LIFTING_FACTORS::steps[5]; - sph = hsrc + (even ? 0 : 1); - dpl = lsrc; - for (ui32 i = L_width; i > 0; --i, dpl++, sph++) - *dpl += factor * (sph[-1] + sph[0]); - - //extension - lsrc[-1] = lsrc[0]; - lsrc[L_width] = lsrc[L_width-1]; - //inverse perdict and combine - factor = LIFTING_FACTORS::steps[4]; - dp = dst + (even ? 0 : -1); - spl = lsrc + (even ? 0 : -1); - sph = hsrc; - for (ui32 i = L_width+(even?0:1); i > 0; --i, spl++, sph++) - { - *dp++ = *spl; - *dp++ = *sph + factor * (spl[0] + spl[1]); - } - } - else - { - if (even) - line_dst->f32[0] = line_lsrc->f32[0]; - else - line_dst->f32[0] = line_hsrc->f32[0] * 0.5f; - } - } - - - - - ////////////////////////////////////////////////////////////////////////// void gen_irv_vert_ana_step(const lifting_step* s, const line_buf* sig, const line_buf* other, const line_buf* aug, @@ -833,8 +445,6 @@ namespace ojph { else hdst->f32[0] = src->f32[0] * 2.0f; } - - } ////////////////////////////////////////////////////////////////////////// @@ -925,8 +535,6 @@ namespace ojph { *dst++ *= K; } - - #endif // !OJPH_ENABLE_WASM_SIMD } diff --git a/src/core/transform/ojph_transform.h b/src/core/transform/ojph_transform.h index b31df0ef..1aae8b82 100644 --- a/src/core/transform/ojph_transform.h +++ b/src/core/transform/ojph_transform.h @@ -54,37 +54,6 @@ namespace ojph { // Reversible functions ///////////////////////////////////////////////////////////////////////// - ///////////////////////////////////////////////////////////////////////// - extern void (*rev_vert_wvlt_fwd_predict) - (const line_buf* src1, const line_buf* src2, line_buf *dst, - ui32 repeat); - - ///////////////////////////////////////////////////////////////////////// - extern void (*rev_vert_wvlt_fwd_update) - (const line_buf* src1, const line_buf* src2, line_buf *dst, - ui32 repeat); - - ///////////////////////////////////////////////////////////////////////// - extern void (*rev_horz_wvlt_fwd_tx) - (line_buf* src, line_buf *ldst, line_buf *hdst, ui32 width, bool even); - - ///////////////////////////////////////////////////////////////////////// - extern void (*rev_vert_wvlt_bwd_predict) - (const line_buf* src1, const line_buf* src2, line_buf *dst, - ui32 repeat); - - ///////////////////////////////////////////////////////////////////////// - extern void (*rev_vert_wvlt_bwd_update) - (const line_buf* src1, const line_buf* src2, line_buf *dst, - ui32 repeat); - - ///////////////////////////////////////////////////////////////////////// - extern void (*rev_horz_wvlt_bwd_tx) - (line_buf* dst, line_buf *lsrc, line_buf *hsrc, ui32 width, bool even); - - - - ///////////////////////////////////////////////////////////////////////// extern void (*rev_vert_ana_step) (const lifting_step* s, const line_buf* sig, const line_buf* other, @@ -107,35 +76,10 @@ namespace ojph { - - ///////////////////////////////////////////////////////////////////////// // Irreversible functions ///////////////////////////////////////////////////////////////////////// - ///////////////////////////////////////////////////////////////////////// - extern void (*irrev_vert_wvlt_step) - (const line_buf* src1, const line_buf* src2, line_buf *dst, - int step_num, ui32 repeat); - - ///////////////////////////////////////////////////////////////////////// - extern void (*irrev_vert_wvlt_K) - (const line_buf *src, line_buf *dst, bool L_analysis_or_H_synthesis, - ui32 repeat); - - ///////////////////////////////////////////////////////////////////////// - extern void (*irrev_horz_wvlt_fwd_tx) - (line_buf* src, line_buf *ldst, line_buf *hdst, ui32 width, bool even); - - ///////////////////////////////////////////////////////////////////////// - extern void (*irrev_horz_wvlt_bwd_tx) - (line_buf* src, line_buf *ldst, line_buf *hdst, ui32 width, bool even); - - - - - - ///////////////////////////////////////////////////////////////////////// extern void (*irv_vert_ana_step) (const lifting_step* s, const line_buf* sig, const line_buf* other, @@ -161,8 +105,6 @@ namespace ojph { (float K, const line_buf* aug, ui32 repeat); - - } } diff --git a/src/core/transform/ojph_transform_avx.cpp b/src/core/transform/ojph_transform_avx.cpp index 725d7ce8..743ceee6 100644 --- a/src/core/transform/ojph_transform_avx.cpp +++ b/src/core/transform/ojph_transform_avx.cpp @@ -36,6 +36,7 @@ //***************************************************************************/ #include +#include #include "ojph_defs.h" #include "ojph_arch.h" @@ -43,22 +44,23 @@ #include "ojph_transform.h" #include "ojph_transform_local.h" -#include +#include "ojph_params.h" +#include "../codestream/ojph_params_local.h" namespace ojph { namespace local { ////////////////////////////////////////////////////////////////////////// - void avx_irrev_vert_wvlt_step(const line_buf* line_src1, - const line_buf* line_src2, - line_buf *line_dst, int step_num, - ui32 repeat) + void avx_irv_vert_ana_step(const lifting_step* s, const line_buf* sig, + const line_buf* other, const line_buf* aug, + ui32 repeat) { - float *dst = line_dst->f32; - const float *src1 = line_src1->f32, *src2 = line_src2->f32; - - __m256 factor = _mm256_set1_ps(LIFTING_FACTORS::steps[step_num]); - for (ui32 i = (repeat + 7) >> 3; i > 0; --i, dst+=8, src1+=8, src2+=8) + __m256 factor = _mm256_set1_ps(s->irv.Aatk); + + float* dst = aug->f32; + const float* src1 = sig->f32, * src2 = other->f32; + repeat = (repeat + 7) >> 3; + for (ui32 i = repeat; i > 0; --i, dst += 8, src1 += 8, src2 += 8) { __m256 s1 = _mm256_load_ps(src1); __m256 s2 = _mm256_load_ps(src2); @@ -69,261 +71,261 @@ namespace ojph { } ///////////////////////////////////////////////////////////////////////// - void avx_irrev_vert_wvlt_K(const line_buf* line_src, line_buf* line_dst, - bool L_analysis_or_H_synthesis, ui32 repeat) - { - float *dst = line_dst->f32; - const float *src = line_src->f32; - - float f = LIFTING_FACTORS::K_inv; - f = L_analysis_or_H_synthesis ? f : LIFTING_FACTORS::K; - __m256 factor = _mm256_set1_ps(f); - for (ui32 i = (repeat + 7) >> 3; i > 0; --i, dst+=8, src+=8) - { - __m256 s = _mm256_load_ps(src); - _mm256_store_ps(dst, _mm256_mul_ps(factor, s)); - } - } - - - ///////////////////////////////////////////////////////////////////////// - void avx_irrev_horz_wvlt_fwd_tx(line_buf *line_src, line_buf *line_ldst, - line_buf *line_hdst, ui32 width, - bool even) + void avx_irv_horz_ana(const param_atk* atk, const line_buf* ldst, + const line_buf* hdst, const line_buf* src, + ui32 width, bool even) { if (width > 1) { - float *src = line_src->f32; - float *ldst = line_ldst->f32, *hdst = line_hdst->f32; - - const ui32 L_width = (width + (even ? 1 : 0)) >> 1; - const ui32 H_width = (width + (even ? 0 : 1)) >> 1; + // split src into ldst and hdst + if (even) + { + float* dph = hdst->f32; + float* dpl = ldst->f32; + float* sp = src->f32; - //extension - src[-1] = src[1]; - src[width] = src[width-2]; - // predict - const float* sp = src + (even ? 1 : 0); - float *dph = hdst; - __m256 factor = _mm256_set1_ps(LIFTING_FACTORS::steps[0]); - for (ui32 i = (H_width + 3) >> 2; i > 0; --i) - { //this is doing twice the work it needs to do - //it can be definitely written better - __m256 s1 = _mm256_loadu_ps(sp - 1); - __m256 s2 = _mm256_loadu_ps(sp + 1); - __m256 d = _mm256_loadu_ps(sp); - s1 = _mm256_mul_ps(factor, _mm256_add_ps(s1, s2)); - __m256 d1 = _mm256_add_ps(d, s1); - sp += 8; - __m128 t1 = _mm256_extractf128_ps(d1, 0); - __m128 t2 = _mm256_extractf128_ps(d1, 1); - __m128 t = _mm_shuffle_ps(t1, t2, _MM_SHUFFLE(2, 0, 2, 0)); - _mm_store_ps(dph, t); - dph += 4; + for (int i = width; i > 0; i -= 16, sp += 16, dpl += 8, dph += 8) + { + __m256 a = _mm256_load_ps(sp); + __m256 b = _mm256_load_ps(sp + 8); + __m256 c = _mm256_permute2f128_ps(a, b, (2 << 4) | (0)); + __m256 d = _mm256_permute2f128_ps(a, b, (3 << 4) | (1)); + __m256 e = _mm256_shuffle_ps(c, d, _MM_SHUFFLE(2, 0, 2, 0)); + __m256 f = _mm256_shuffle_ps(c, d, _MM_SHUFFLE(3, 1, 3, 1)); + _mm256_store_ps(dpl, e); + _mm256_store_ps(dph, f); + } } - - // extension - hdst[-1] = hdst[0]; - hdst[H_width] = hdst[H_width-1]; - // update - __m128 factor128 = _mm_set1_ps(LIFTING_FACTORS::steps[1]); - sp = src + (even ? 0 : 1); - const float* sph = hdst + (even ? 0 : 1); - float *dpl = ldst; - for (ui32 i = (L_width + 3) >> 2; i > 0; --i, sp+=8, sph+=4, dpl+=4) + else { - __m256 d1 = _mm256_loadu_ps(sp); //is there an advantage here? - __m128 t1 = _mm256_extractf128_ps(d1, 0); - __m128 t2 = _mm256_extractf128_ps(d1, 1); - __m128 d = _mm_shuffle_ps(t1, t2, _MM_SHUFFLE(2, 0, 2, 0)); + float* dph = hdst->f32; + float* dpl = ldst->f32; + float* sp = src->f32; - __m128 s1 = _mm_loadu_ps(sph - 1); - __m128 s2 = _mm_loadu_ps(sph); - s1 = _mm_mul_ps(factor128, _mm_add_ps(s1, s2)); - d = _mm_add_ps(d, s1); - _mm_store_ps(dpl, d); + for (int i = width; i > 0; i -= 16, sp += 16, dpl += 8, dph += 8) + { + __m256 a = _mm256_load_ps(sp); + __m256 b = _mm256_load_ps(sp + 8); + __m256 c = _mm256_permute2f128_ps(a, b, (2 << 4) | (0)); + __m256 d = _mm256_permute2f128_ps(a, b, (3 << 4) | (1)); + __m256 e = _mm256_shuffle_ps(c, d, _MM_SHUFFLE(2, 0, 2, 0)); + __m256 f = _mm256_shuffle_ps(c, d, _MM_SHUFFLE(3, 1, 3, 1)); + _mm256_store_ps(dpl, f); + _mm256_store_ps(dph, e); + } } - //extension - ldst[-1] = ldst[0]; - ldst[L_width] = ldst[L_width-1]; - //predict - factor = _mm256_set1_ps(LIFTING_FACTORS::steps[2]); - const float* spl = ldst + (even ? 1 : 0); - dph = hdst; - for (ui32 i = (H_width + 7) >> 3; i > 0; --i, spl+=8, dph+=8) + // the actual horizontal transform + float* hp = hdst->f32, * lp = ldst->f32; + ui32 l_width = (width + (even ? 1 : 0)) >> 1; // low pass + ui32 h_width = (width + (even ? 0 : 1)) >> 1; // high pass + ui32 num_steps = atk->get_num_steps(); + for (ui32 j = num_steps; j > 0; --j) { - __m256 s1 = _mm256_loadu_ps(spl - 1); - __m256 s2 = _mm256_loadu_ps(spl); - __m256 d = _mm256_loadu_ps(dph); - s1 = _mm256_mul_ps(factor, _mm256_add_ps(s1, s2)); - d = _mm256_add_ps(d, s1); - _mm256_store_ps(dph, d); - } + // first lifting step + const lifting_step* s = atk->get_step(j - 1); + const float a = s->irv.Aatk; - // extension - hdst[-1] = hdst[0]; - hdst[H_width] = hdst[H_width-1]; - // update - factor = _mm256_set1_ps(LIFTING_FACTORS::steps[3]); - sph = hdst + (even ? 0 : 1); - dpl = ldst; - for (ui32 i = (L_width + 7) >> 3; i > 0; --i, sph+=8, dpl+=8) - { - __m256 s1 = _mm256_loadu_ps(sph - 1); - __m256 s2 = _mm256_loadu_ps(sph); - __m256 d = _mm256_loadu_ps(dpl); - s1 = _mm256_mul_ps(factor, _mm256_add_ps(s1, s2)); - d = _mm256_add_ps(d, s1); - _mm256_store_ps(dpl, d); - } + // extension + lp[-1] = lp[0]; + lp[l_width] = lp[l_width - 1]; + // lifting step + const float* sp = lp + (even ? 1 : 0); + float* dp = hp; + for (ui32 i = h_width; i > 0; --i, sp++, dp++) + *dp += a * (sp[-1] + sp[0]); - //multipliers - float *dp = ldst; - factor = _mm256_set1_ps(LIFTING_FACTORS::K_inv); - for (ui32 i = (L_width + 7) >> 3; i > 0; --i, dp+=8) - { - __m256 d = _mm256_load_ps(dp); - _mm256_store_ps(dp, _mm256_mul_ps(factor, d)); + // swap buffers + float* t = lp; lp = hp; hp = t; + even = !even; + ui32 w = l_width; l_width = h_width; h_width = w; } - dp = hdst; - factor = _mm256_set1_ps(LIFTING_FACTORS::K); - for (ui32 i = (H_width + 7) >> 3; i > 0; --i, dp+=8) - { - __m256 d = _mm256_load_ps(dp); - _mm256_store_ps(dp, _mm256_mul_ps(factor, d)); + + { // multiply by K or 1/K + float K = atk->get_K(); + float K_inv = 1.0f / K; + float* dp; + __m256 factor; + + factor = _mm256_set1_ps(K_inv); + dp = lp; + for (ui32 i = (l_width + 7) >> 3; i > 0; --i, dp += 8) + { + __m256 s = _mm256_load_ps(dp); + _mm256_store_ps(dp, _mm256_mul_ps(factor, s)); + } + + factor = _mm256_set1_ps(K); + dp = hp; + for (ui32 i = (h_width + 7) >> 3; i > 0; --i, dp += 8) + { + __m256 s = _mm256_load_ps(dp); + _mm256_store_ps(dp, _mm256_mul_ps(factor, s)); + } } } - else - { + else { if (even) - line_ldst->f32[0] = line_src->f32[0]; + ldst->f32[0] = src->f32[0]; else - line_hdst->f32[0] = line_src->f32[0] + line_src->f32[0]; + hdst->f32[0] = src->f32[0] * 2.0f; } } + + ////////////////////////////////////////////////////////////////////////// + void avx_irv_vert_syn_step(const lifting_step* s, const line_buf* aug, + const line_buf* sig, const line_buf* other, + ui32 repeat) + { + __m256 factor = _mm256_set1_ps(s->irv.Aatk); - ///////////////////////////////////////////////////////////////////////// - void avx_irrev_horz_wvlt_bwd_tx(line_buf* line_dst, line_buf *line_lsrc, - line_buf *line_hsrc, ui32 width, - bool even) + float* dst = aug->f32; + const float* src1 = sig->f32, * src2 = other->f32; + repeat = (repeat + 7) >> 3; + for (ui32 i = repeat; i > 0; --i, dst += 8, src1 += 8, src2 += 8) + { + __m256 s1 = _mm256_load_ps(src1); + __m256 s2 = _mm256_load_ps(src2); + __m256 d = _mm256_load_ps(dst); + d = _mm256_sub_ps(d, _mm256_mul_ps(factor, _mm256_add_ps(s1, s2))); + _mm256_store_ps(dst, d); + } + } + + ////////////////////////////////////////////////////////////////////////// + void avx_irv_horz_syn(const param_atk* atk, const line_buf* dst, + const line_buf* lsrc, const line_buf* hsrc, + ui32 width, bool even) { if (width > 1) { - float *lsrc = line_lsrc->f32, *hsrc = line_hsrc->f32; - float *dst = line_dst->f32; - - const ui32 L_width = (width + (even ? 1 : 0)) >> 1; - const ui32 H_width = (width + (even ? 0 : 1)) >> 1; + bool ev = even; + float* oth = hsrc->f32, * aug = lsrc->f32; + ui32 aug_width = (width + (even ? 1 : 0)) >> 1; // low pass + ui32 oth_width = (width + (even ? 0 : 1)) >> 1; // high pass - //multipliers - float *dp = lsrc; - __m256 factor = _mm256_set1_ps(LIFTING_FACTORS::K); - for (ui32 i = (L_width + 7) >> 3; i > 0; --i, dp+=8) - { - __m256 d = _mm256_load_ps(dp); - _mm256_store_ps(dp, _mm256_mul_ps(factor, d)); - } - dp = hsrc; - factor = _mm256_set1_ps(LIFTING_FACTORS::K_inv); - for (ui32 i = (H_width + 7) >> 3; i > 0; --i, dp+=8) - { - __m256 d = _mm256_load_ps(dp); - _mm256_store_ps(dp, _mm256_mul_ps(factor, d)); - } + { // multiply by K or 1/K + float K = atk->get_K(); + float K_inv = 1.0f / K; + float* dp; + __m256 factor; - //extension - hsrc[-1] = hsrc[0]; - hsrc[H_width] = hsrc[H_width-1]; - //inverse update - factor = _mm256_set1_ps(LIFTING_FACTORS::steps[7]); - const float *sph = hsrc + (even ? 0 : 1); - float *dpl = lsrc; - for (ui32 i = (L_width + 7) >> 3; i > 0; --i, sph+=8, dpl+=8) - { - __m256 s1 = _mm256_loadu_ps(sph - 1); - __m256 s2 = _mm256_loadu_ps(sph); - __m256 d = _mm256_loadu_ps(dpl); - s1 = _mm256_mul_ps(factor, _mm256_add_ps(s1, s2)); - d = _mm256_add_ps(d, s1); - _mm256_store_ps(dpl, d); + factor = _mm256_set1_ps(K); + dp = aug; + for (ui32 i = (aug_width + 7) >> 3; i > 0; --i, dp += 8) + { + __m256 s = _mm256_load_ps(dp); + _mm256_store_ps(dp, _mm256_mul_ps(factor, s)); + } + + factor = _mm256_set1_ps(K_inv); + dp = oth; + for (ui32 i = (oth_width + 7) >> 3; i > 0; --i, dp += 8) + { + __m256 s = _mm256_load_ps(dp); + _mm256_store_ps(dp, _mm256_mul_ps(factor, s)); + } } - //extension - lsrc[-1] = lsrc[0]; - lsrc[L_width] = lsrc[L_width-1]; - //inverse perdict - factor = _mm256_set1_ps(LIFTING_FACTORS::steps[6]); - const float *spl = lsrc + (even ? 0 : -1); - float *dph = hsrc; - for (ui32 i = (H_width + 7) >> 3; i > 0; --i, dph+=8, spl+=8) + // the actual horizontal transform + ui32 num_steps = atk->get_num_steps(); + for (ui32 j = 0; j < num_steps; ++j) { - __m256 s1 = _mm256_loadu_ps(spl); - __m256 s2 = _mm256_loadu_ps(spl + 1); - __m256 d = _mm256_loadu_ps(dph); - s1 = _mm256_mul_ps(factor, _mm256_add_ps(s1, s2)); - d = _mm256_add_ps(d, s1); - _mm256_store_ps(dph, d); + const lifting_step* s = atk->get_step(j); + const float a = s->irv.Aatk; + + // extension + oth[-1] = oth[0]; + oth[oth_width] = oth[oth_width - 1]; + // lifting step + const float* sp = oth + (ev ? 0 : 1); + float* dp = aug; + for (ui32 i = aug_width; i > 0; --i, sp++, dp++) + *dp -= a * (sp[-1] + sp[0]); + + // swap buffers + float* t = aug; aug = oth; oth = t; + ev = !ev; + ui32 w = aug_width; aug_width = oth_width; oth_width = w; } - //extension - hsrc[-1] = hsrc[0]; - hsrc[H_width] = hsrc[H_width-1]; - //inverse update - factor = _mm256_set1_ps(LIFTING_FACTORS::steps[5]); - sph = hsrc + (even ? 0 : 1); - dpl = lsrc; - for (ui32 i = (L_width + 7) >> 3; i > 0; --i, dpl+=8, sph+=8) + // combine both lsrc and hsrc into dst + if (even) { - __m256 s1 = _mm256_loadu_ps(sph - 1); - __m256 s2 = _mm256_loadu_ps(sph); - __m256 d = _mm256_loadu_ps(dpl); - s1 = _mm256_mul_ps(factor, _mm256_add_ps(s1, s2)); - d = _mm256_add_ps(d, s1); - _mm256_store_ps(dpl, d); + float* sph = hsrc->f32; + float* spl = lsrc->f32; + float* dp = dst->f32; + int i = width; + for ( ; i >= 8; i -= 16, dp += 16, spl += 8, sph += 8) + { + __m256 a = _mm256_load_ps(spl); + __m256 b = _mm256_load_ps(sph); + __m256 c = _mm256_unpacklo_ps(a, b); + __m256 d = _mm256_unpackhi_ps(a, b); + __m256 e = _mm256_permute2f128_ps(c, d, (2 << 4) | (0)); + __m256 f = _mm256_permute2f128_ps(c, d, (3 << 4) | (1)); + _mm256_store_ps(dp, e); + _mm256_store_ps(dp + 8, f); + } + for (; i > 0; i -= 8, dp += 8, spl += 4, sph += 4) + { + __m128 a = _mm_load_ps(spl); + __m128 b = _mm_load_ps(sph); + __m128 c = _mm_unpacklo_ps(a, b); + __m128 d = _mm_unpackhi_ps(a, b); + _mm_store_ps(dp, c); + _mm_store_ps(dp + 4, d); + } } - - //extension - lsrc[-1] = lsrc[0]; - lsrc[L_width] = lsrc[L_width-1]; - //inverse perdict and combine - factor = _mm256_set1_ps(LIFTING_FACTORS::steps[4]); - dp = dst + (even ? 0 : -1); - spl = lsrc + (even ? 0 : -1); - sph = hsrc; - ui32 width = L_width + (even ? 0 : 1); - for (ui32 i = (width + 7) >> 3; i > 0; --i, spl+=8, sph+=8) + else { - __m256 s1 = _mm256_loadu_ps(spl); - __m256 s2 = _mm256_loadu_ps(spl + 1); - __m256 d = _mm256_load_ps(sph); - s2 = _mm256_mul_ps(factor, _mm256_add_ps(s1, s2)); - d = _mm256_add_ps(d, s2); - - __m128 a0 = _mm256_extractf128_ps(s1, 0); - __m128 a1 = _mm256_extractf128_ps(s1, 1); - __m128 a2 = _mm256_extractf128_ps(d, 0); - __m128 a3 = _mm256_extractf128_ps(d, 1); - _mm_storeu_ps(dp, _mm_unpacklo_ps(a0, a2)); dp += 4; - _mm_storeu_ps(dp, _mm_unpackhi_ps(a0, a2)); dp += 4; - _mm_storeu_ps(dp, _mm_unpacklo_ps(a1, a3)); dp += 4; - _mm_storeu_ps(dp, _mm_unpackhi_ps(a1, a3)); dp += 4; - -// s2 = _mm256_unpackhi_ps(s1, d); -// s1 = _mm256_unpacklo_ps(s1, d); -// d = _mm256_permute2f128_ps(s1, s2, (2 << 4) | 0); -// _mm256_storeu_ps(dp, d); -// d = _mm256_permute2f128_ps(s1, s2, (3 << 4) | 1); -// _mm256_storeu_ps(dp + 1, d); + float* sph = hsrc->f32; + float* spl = lsrc->f32; + float* dp = dst->f32; + int i = width; + for (; i >= 8; i -= 16, dp += 16, spl += 8, sph += 8) + { // i>=8 because we can exceed the aligned buffer by up to 7 + __m256 a = _mm256_load_ps(spl); + __m256 b = _mm256_load_ps(sph); + __m256 c = _mm256_unpacklo_ps(b, a); + __m256 d = _mm256_unpackhi_ps(b, a); + __m256 e = _mm256_permute2f128_ps(c, d, (2 << 4) | (0)); + __m256 f = _mm256_permute2f128_ps(c, d, (3 << 4) | (1)); + _mm256_store_ps(dp, e); + _mm256_store_ps(dp + 8, f); + } + for (; i > 0; i -= 8, dp += 8, spl += 4, sph += 4) + { + __m128 a = _mm_load_ps(spl); + __m128 b = _mm_load_ps(sph); + __m128 c = _mm_unpacklo_ps(b, a); + __m128 d = _mm_unpackhi_ps(b, a); + _mm_store_ps(dp, c); + _mm_store_ps(dp + 4, d); + } } } - else - { + else { if (even) - line_dst->f32[0] = line_lsrc->f32[0]; + dst->f32[0] = lsrc->f32[0]; else - line_dst->f32[0] = line_hsrc->f32[0] * 0.5f; + dst->f32[0] = hsrc->f32[0] * 0.5f; } } - } -} + + ////////////////////////////////////////////////////////////////////////// + void avx_irv_vert_times_K(float K, const line_buf* aug, ui32 repeat) + { + __m256 factor = _mm256_set1_ps(K); + float* dst = aug->f32; + repeat = (repeat + 7) >> 3; + for (ui32 i = repeat; i > 0; --i, dst += 8 ) + { + __m256 s = _mm256_load_ps(dst); + _mm256_store_ps(dst, _mm256_mul_ps(factor, s)); + } + } + + + } // !local +} // !ojph diff --git a/src/core/transform/ojph_transform_local.h b/src/core/transform/ojph_transform_local.h index c484d279..816e9e8b 100644 --- a/src/core/transform/ojph_transform_local.h +++ b/src/core/transform/ojph_transform_local.h @@ -46,14 +46,6 @@ namespace ojph { namespace local { struct param_atk; - ////////////////////////////////////////////////////////////////////////// - struct LIFTING_FACTORS - { - static const float steps[8]; - static const float K; - static const float K_inv; - }; - ////////////////////////////////////////////////////////////////////////// // // @@ -66,38 +58,6 @@ namespace ojph { // Reversible functions ////////////////////////////////////////////////////////////////////////// - ////////////////////////////////////////////////////////////////////////// - void gen_rev_vert_wvlt_fwd_predict(const line_buf* src1, - const line_buf* src2, - line_buf *dst, ui32 repeat); - - ////////////////////////////////////////////////////////////////////////// - void gen_rev_vert_wvlt_fwd_update(const line_buf* src1, - const line_buf* src2, - line_buf *dst, ui32 repeat); - - ////////////////////////////////////////////////////////////////////////// - void gen_rev_horz_wvlt_fwd_tx(line_buf* src, line_buf *ldst, - line_buf *hdst, ui32 width, bool even); - - ////////////////////////////////////////////////////////////////////////// - void gen_rev_vert_wvlt_bwd_predict(const line_buf* src1, - const line_buf* src2, - line_buf *dst, ui32 repeat); - - ////////////////////////////////////////////////////////////////////////// - void gen_rev_vert_wvlt_bwd_update(const line_buf* src1, - const line_buf* src2, - line_buf *dst, ui32 repeat); - - ////////////////////////////////////////////////////////////////////////// - void gen_rev_horz_wvlt_bwd_tx(line_buf* dst, line_buf *lsrc, - line_buf *hsrc, ui32 width, bool even); - - - - - ///////////////////////////////////////////////////////////////////////// void gen_rev_vert_ana_step(const lifting_step* s, const line_buf* sig, const line_buf* other, const line_buf* aug, @@ -118,33 +78,10 @@ namespace ojph { const line_buf* lsrc, const line_buf* hsrc, ui32 width, bool even); - - - - ////////////////////////////////////////////////////////////////////////// // Irreversible functions ////////////////////////////////////////////////////////////////////////// - ////////////////////////////////////////////////////////////////////////// - void gen_irrev_vert_wvlt_step(const line_buf* src1, const line_buf* src2, - line_buf *dst, int step_num, ui32 repeat); - - ////////////////////////////////////////////////////////////////////////// - void gen_irrev_vert_wvlt_K(const line_buf *src, line_buf *dst, - bool L_analysis_or_H_synthesis, ui32 repeat); - - ////////////////////////////////////////////////////////////////////////// - void gen_irrev_horz_wvlt_fwd_tx(line_buf* src, line_buf *ldst, - line_buf *hdst, ui32 width, bool even); - - ////////////////////////////////////////////////////////////////////////// - void gen_irrev_horz_wvlt_bwd_tx(line_buf* src, line_buf *ldst, - line_buf *hdst, ui32 width, bool even); - - - - ///////////////////////////////////////////////////////////////////////// void gen_irv_vert_ana_step(const lifting_step* s, const line_buf* sig, const line_buf* other, const line_buf* aug, @@ -168,10 +105,6 @@ namespace ojph { ///////////////////////////////////////////////////////////////////////// void gen_irv_vert_times_K(float K, const line_buf* aug, ui32 repeat); - - - - ////////////////////////////////////////////////////////////////////////// // // @@ -184,21 +117,28 @@ namespace ojph { // Irreversible functions ////////////////////////////////////////////////////////////////////////// - ////////////////////////////////////////////////////////////////////////// - void sse_irrev_vert_wvlt_step(const line_buf* src1, const line_buf* src2, - line_buf *dst, int step_num, ui32 repeat); + ///////////////////////////////////////////////////////////////////////// + void sse_irv_vert_ana_step(const lifting_step* s, const line_buf* sig, + const line_buf* other, const line_buf* aug, + ui32 repeat); - ////////////////////////////////////////////////////////////////////////// - void sse_irrev_vert_wvlt_K(const line_buf *src, line_buf *dst, - bool L_analysis_or_H_synthesis, ui32 repeat); + ///////////////////////////////////////////////////////////////////////// + void sse_irv_horz_ana(const param_atk* atk, const line_buf* ldst, + const line_buf* hdst, const line_buf* src, + ui32 width, bool even); - ////////////////////////////////////////////////////////////////////////// - void sse_irrev_horz_wvlt_fwd_tx(line_buf* src, line_buf *ldst, - line_buf *hdst, ui32 width, bool even); + ///////////////////////////////////////////////////////////////////////// + void sse_irv_vert_syn_step(const lifting_step* s, const line_buf* aug, + const line_buf* sig, const line_buf* other, + ui32 repeat); - ////////////////////////////////////////////////////////////////////////// - void sse_irrev_horz_wvlt_bwd_tx(line_buf* src, line_buf *ldst, - line_buf *hdst, ui32 width, bool even); + ///////////////////////////////////////////////////////////////////////// + void sse_irv_horz_syn(const param_atk *atk, const line_buf* dst, + const line_buf *lsrc, const line_buf *hsrc, + ui32 width, bool even); + + ///////////////////////////////////////////////////////////////////////// + void sse_irv_vert_times_K(float K, const line_buf* aug, ui32 repeat); ////////////////////////////////////////////////////////////////////////// // @@ -212,33 +152,25 @@ namespace ojph { // Reversible functions ////////////////////////////////////////////////////////////////////////// - ////////////////////////////////////////////////////////////////////////// - void sse2_rev_vert_wvlt_fwd_predict(const line_buf* src1, - const line_buf* src2, - line_buf *dst, ui32 repeat); - - ////////////////////////////////////////////////////////////////////////// - void sse2_rev_vert_wvlt_fwd_update(const line_buf* src1, - const line_buf* src2, - line_buf *dst, ui32 repeat); - - ////////////////////////////////////////////////////////////////////////// - void sse2_rev_horz_wvlt_fwd_tx(line_buf* src, line_buf *ldst, - line_buf *hdst, ui32 width, bool even); + ///////////////////////////////////////////////////////////////////////// + void sse2_rev_vert_ana_step(const lifting_step* s, const line_buf* sig, + const line_buf* other, const line_buf* aug, + ui32 repeat); - ////////////////////////////////////////////////////////////////////////// - void sse2_rev_vert_wvlt_bwd_predict(const line_buf* src1, - const line_buf* src2, - line_buf *dst, ui32 repeat); + ///////////////////////////////////////////////////////////////////////// + void sse2_rev_horz_ana(const param_atk* atk, const line_buf* ldst, + const line_buf* hdst, const line_buf* src, + ui32 width, bool even); - ////////////////////////////////////////////////////////////////////////// - void sse2_rev_vert_wvlt_bwd_update(const line_buf* src1, - const line_buf* src2, - line_buf *dst, ui32 repeat); + ///////////////////////////////////////////////////////////////////////// + void sse2_rev_vert_syn_step(const lifting_step* s, const line_buf* aug, + const line_buf* sig, const line_buf* other, + ui32 repeat); - ////////////////////////////////////////////////////////////////////////// - void sse2_rev_horz_wvlt_bwd_tx(line_buf* dst, line_buf *lsrc, - line_buf *hsrc, ui32 width, bool even); + ///////////////////////////////////////////////////////////////////////// + void sse2_rev_horz_syn(const param_atk* atk, const line_buf* dst, + const line_buf* lsrc, const line_buf* hsrc, + ui32 width, bool even); ////////////////////////////////////////////////////////////////////////// @@ -253,21 +185,28 @@ namespace ojph { // Irreversible functions ////////////////////////////////////////////////////////////////////////// - ////////////////////////////////////////////////////////////////////////// - void avx_irrev_vert_wvlt_step(const line_buf* src1, const line_buf* src2, - line_buf *dst, int step_num, ui32 repeat); + ///////////////////////////////////////////////////////////////////////// + void avx_irv_vert_ana_step(const lifting_step* s, const line_buf* sig, + const line_buf* other, const line_buf* aug, + ui32 repeat); - ////////////////////////////////////////////////////////////////////////// - void avx_irrev_vert_wvlt_K(const line_buf *src, line_buf *dst, - bool L_analysis_or_H_synthesis, ui32 repeat); + ///////////////////////////////////////////////////////////////////////// + void avx_irv_horz_ana(const param_atk* atk, const line_buf* ldst, + const line_buf* hdst, const line_buf* src, + ui32 width, bool even); - ////////////////////////////////////////////////////////////////////////// - void avx_irrev_horz_wvlt_fwd_tx(line_buf* src, line_buf *ldst, - line_buf *hdst, ui32 width, bool even); + ///////////////////////////////////////////////////////////////////////// + void avx_irv_vert_syn_step(const lifting_step* s, const line_buf* aug, + const line_buf* sig, const line_buf* other, + ui32 repeat); - ////////////////////////////////////////////////////////////////////////// - void avx_irrev_horz_wvlt_bwd_tx(line_buf* src, line_buf *ldst, - line_buf *hdst, ui32 width, bool even); + ///////////////////////////////////////////////////////////////////////// + void avx_irv_horz_syn(const param_atk *atk, const line_buf* dst, + const line_buf *lsrc, const line_buf *hsrc, + ui32 width, bool even); + + ///////////////////////////////////////////////////////////////////////// + void avx_irv_vert_times_K(float K, const line_buf* aug, ui32 repeat); ////////////////////////////////////////////////////////////////////////// // @@ -281,33 +220,85 @@ namespace ojph { // Reversible functions ////////////////////////////////////////////////////////////////////////// - ////////////////////////////////////////////////////////////////////////// - void avx2_rev_vert_wvlt_fwd_predict(const line_buf* src1, - const line_buf* src2, - line_buf *dst, ui32 repeat); + ///////////////////////////////////////////////////////////////////////// + void avx2_rev_vert_ana_step(const lifting_step* s, const line_buf* sig, + const line_buf* other, const line_buf* aug, + ui32 repeat); - ////////////////////////////////////////////////////////////////////////// - void avx2_rev_vert_wvlt_fwd_update(const line_buf* src1, - const line_buf* src2, - line_buf *dst, ui32 repeat); + ///////////////////////////////////////////////////////////////////////// + void avx2_rev_horz_ana(const param_atk* atk, const line_buf* ldst, + const line_buf* hdst, const line_buf* src, + ui32 width, bool even); - ////////////////////////////////////////////////////////////////////////// - void avx2_rev_horz_wvlt_fwd_tx(line_buf* src, line_buf *ldst, - line_buf *hdst, ui32 width, bool even); + ///////////////////////////////////////////////////////////////////////// + void avx2_rev_vert_syn_step(const lifting_step* s, const line_buf* aug, + const line_buf* sig, const line_buf* other, + ui32 repeat); + + ///////////////////////////////////////////////////////////////////////// + void avx2_rev_horz_syn(const param_atk* atk, const line_buf* dst, + const line_buf* lsrc, const line_buf* hsrc, + ui32 width, bool even); ////////////////////////////////////////////////////////////////////////// - void avx2_rev_vert_wvlt_bwd_predict(const line_buf* src1, - const line_buf* src2, - line_buf *dst, ui32 repeat); + // + // + // AVX512 Functions + // + // + ////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////// - void avx2_rev_vert_wvlt_bwd_update(const line_buf* src1, - const line_buf* src2, - line_buf *dst, ui32 repeat); + // Irreversible functions + ////////////////////////////////////////////////////////////////////////// + + ///////////////////////////////////////////////////////////////////////// + void avx512_irv_vert_ana_step(const lifting_step* s, const line_buf* sig, + const line_buf* other, const line_buf* aug, + ui32 repeat); + + ///////////////////////////////////////////////////////////////////////// + void avx512_irv_horz_ana(const param_atk* atk, const line_buf* ldst, + const line_buf* hdst, const line_buf* src, + ui32 width, bool even); + + ///////////////////////////////////////////////////////////////////////// + void avx512_irv_vert_syn_step(const lifting_step* s, const line_buf* aug, + const line_buf* sig, const line_buf* other, + ui32 repeat); + + ///////////////////////////////////////////////////////////////////////// + void avx512_irv_horz_syn(const param_atk *atk, const line_buf* dst, + const line_buf *lsrc, const line_buf *hsrc, + ui32 width, bool even); + ///////////////////////////////////////////////////////////////////////// + void avx512_irv_vert_times_K(float K, const line_buf* aug, ui32 repeat); + + + ////////////////////////////////////////////////////////////////////////// + // Reversible functions ////////////////////////////////////////////////////////////////////////// - void avx2_rev_horz_wvlt_bwd_tx(line_buf* dst, line_buf *lsrc, - line_buf *hsrc, ui32 width, bool even); + + ///////////////////////////////////////////////////////////////////////// + void avx512_rev_vert_ana_step(const lifting_step* s, const line_buf* sig, + const line_buf* other, const line_buf* aug, + ui32 repeat); + + ///////////////////////////////////////////////////////////////////////// + void avx512_rev_horz_ana(const param_atk* atk, const line_buf* ldst, + const line_buf* hdst, const line_buf* src, + ui32 width, bool even); + + ///////////////////////////////////////////////////////////////////////// + void avx512_rev_vert_syn_step(const lifting_step* s, const line_buf* aug, + const line_buf* sig, const line_buf* other, + ui32 repeat); + + ///////////////////////////////////////////////////////////////////////// + void avx512_rev_horz_syn(const param_atk* atk, const line_buf* dst, + const line_buf* lsrc, const line_buf* hsrc, + ui32 width, bool even); ////////////////////////////////////////////////////////////////////////// // @@ -321,57 +312,52 @@ namespace ojph { // Reversible functions ////////////////////////////////////////////////////////////////////////// - ////////////////////////////////////////////////////////////////////////// - void wasm_rev_vert_wvlt_fwd_predict(const line_buf *line_src1, - const line_buf *line_src2, - line_buf *line_dst, ui32 repeat); - - ////////////////////////////////////////////////////////////////////////// - void wasm_rev_vert_wvlt_fwd_update(const line_buf *line_src1, - const line_buf *line_src2, - line_buf *line_dst, ui32 repeat); - - ////////////////////////////////////////////////////////////////////////// - void wasm_rev_horz_wvlt_fwd_tx(line_buf *line_src, line_buf *line_ldst, - line_buf *line_hdst, ui32 width, bool even); + ///////////////////////////////////////////////////////////////////////// + void wasm_rev_vert_ana_step(const lifting_step* s, const line_buf* sig, + const line_buf* other, const line_buf* aug, + ui32 repeat); - ////////////////////////////////////////////////////////////////////////// - void wasm_rev_vert_wvlt_bwd_predict(const line_buf *line_src1, - const line_buf *line_src2, - line_buf *line_dst, ui32 repeat); + ///////////////////////////////////////////////////////////////////////// + void wasm_rev_horz_ana(const param_atk* atk, const line_buf* ldst, + const line_buf* hdst, const line_buf* src, + ui32 width, bool even); - ////////////////////////////////////////////////////////////////////////// - void wasm_rev_vert_wvlt_bwd_update(const line_buf *line_src1, - const line_buf *line_src2, - line_buf *line_dst, ui32 repeat); + ///////////////////////////////////////////////////////////////////////// + void wasm_rev_vert_syn_step(const lifting_step* s, const line_buf* aug, + const line_buf* sig, const line_buf* other, + ui32 repeat); - ////////////////////////////////////////////////////////////////////////// - void wasm_rev_horz_wvlt_bwd_tx(line_buf *line_dst, line_buf *line_lsrc, - line_buf *line_hsrc, ui32 width, bool even); + ///////////////////////////////////////////////////////////////////////// + void wasm_rev_horz_syn(const param_atk* atk, const line_buf* dst, + const line_buf* lsrc, const line_buf* hsrc, + ui32 width, bool even); ////////////////////////////////////////////////////////////////////////// // Irreversible functions ////////////////////////////////////////////////////////////////////////// - ////////////////////////////////////////////////////////////////////////// - void wasm_irrev_vert_wvlt_step(const line_buf* line_src1, - const line_buf* line_src2, - line_buf *line_dst, int step_num, - ui32 repeat); + ///////////////////////////////////////////////////////////////////////// + void wasm_irv_vert_ana_step(const lifting_step* s, const line_buf* sig, + const line_buf* other, const line_buf* aug, + ui32 repeat); - ////////////////////////////////////////////////////////////////////////// - void wasm_irrev_vert_wvlt_K(const line_buf *line_src, line_buf *line_dst, - bool L_analysis_or_H_synthesis, ui32 repeat); + ///////////////////////////////////////////////////////////////////////// + void wasm_irv_horz_ana(const param_atk* atk, const line_buf* ldst, + const line_buf* hdst, const line_buf* src, + ui32 width, bool even); - ////////////////////////////////////////////////////////////////////////// - void wasm_irrev_horz_wvlt_fwd_tx(line_buf *line_src, line_buf *line_ldst, - line_buf *line_hdst, ui32 width, - bool even); + ///////////////////////////////////////////////////////////////////////// + void wasm_irv_vert_syn_step(const lifting_step* s, const line_buf* aug, + const line_buf* sig, const line_buf* other, + ui32 repeat); - ////////////////////////////////////////////////////////////////////////// - void wasm_irrev_horz_wvlt_bwd_tx(line_buf *line_src, line_buf *line_ldst, - line_buf *line_hdst, ui32 width, - bool even); + ///////////////////////////////////////////////////////////////////////// + void wasm_irv_horz_syn(const param_atk *atk, const line_buf* dst, + const line_buf *lsrc, const line_buf *hsrc, + ui32 width, bool even); + + ///////////////////////////////////////////////////////////////////////// + void wasm_irv_vert_times_K(float K, const line_buf* aug, ui32 repeat); } } diff --git a/src/core/transform/ojph_transform_sse.cpp b/src/core/transform/ojph_transform_sse.cpp index c299bc8d..281ff4a6 100644 --- a/src/core/transform/ojph_transform_sse.cpp +++ b/src/core/transform/ojph_transform_sse.cpp @@ -36,6 +36,7 @@ //***************************************************************************/ #include +#include #include "ojph_defs.h" #include "ojph_arch.h" @@ -43,273 +44,265 @@ #include "ojph_transform.h" #include "ojph_transform_local.h" -#include +#include "ojph_params.h" +#include "../codestream/ojph_params_local.h" namespace ojph { namespace local { ////////////////////////////////////////////////////////////////////////// - void sse_irrev_vert_wvlt_step(const line_buf* line_src1, - const line_buf* line_src2, - line_buf *line_dst, - int step_num, ui32 repeat) + void sse_irv_vert_ana_step(const lifting_step* s, const line_buf* sig, + const line_buf* other, const line_buf* aug, + ui32 repeat) { - float *dst = line_dst->f32; - const float *src1 = line_src1->f32, *src2 = line_src2->f32; + __m128 factor = _mm_set1_ps(s->irv.Aatk); - __m128 factor = _mm_set1_ps(LIFTING_FACTORS::steps[step_num]); - for (ui32 i = (repeat + 3) >> 2; i > 0; --i, dst+=4, src1+=4, src2+=4) + float* dst = aug->f32; + const float* src1 = sig->f32, * src2 = other->f32; + repeat = (repeat + 3) >> 2; + for (ui32 i = repeat; i > 0; --i, dst += 4, src1 += 4, src2 += 4) { __m128 s1 = _mm_load_ps(src1); __m128 s2 = _mm_load_ps(src2); - __m128 d = _mm_load_ps(dst); + __m128 d = _mm_load_ps(dst); d = _mm_add_ps(d, _mm_mul_ps(factor, _mm_add_ps(s1, s2))); _mm_store_ps(dst, d); } } ///////////////////////////////////////////////////////////////////////// - void sse_irrev_vert_wvlt_K(const line_buf* line_src, line_buf* line_dst, - bool L_analysis_or_H_synthesis, ui32 repeat) - { - float *dst = line_dst->f32; - const float *src = line_src->f32; - - float f = LIFTING_FACTORS::K_inv; - f = L_analysis_or_H_synthesis ? f : LIFTING_FACTORS::K; - __m128 factor = _mm_set1_ps(f); - for (ui32 i = (repeat + 3) >> 2; i > 0; --i, dst+=4, src+=4) - { - __m128 s = _mm_load_ps(src); - _mm_store_ps(dst, _mm_mul_ps(factor, s)); - } - } - - ///////////////////////////////////////////////////////////////////////// - void sse_irrev_horz_wvlt_fwd_tx(line_buf* line_src, line_buf *line_ldst, - line_buf *line_hdst, ui32 width, - bool even) + void sse_irv_horz_ana(const param_atk* atk, const line_buf* ldst, + const line_buf* hdst, const line_buf* src, + ui32 width, bool even) { if (width > 1) { - float *src = line_src->f32; - float *ldst = line_ldst->f32, *hdst = line_hdst->f32; + // split src into ldst and hdst + if (even) + { + float* dph = hdst->f32; + float* dpl = ldst->f32; + float* sp = src->f32; - const ui32 L_width = (width + (even ? 1 : 0)) >> 1; - const ui32 H_width = (width + (even ? 0 : 1)) >> 1; + for (int i = width; i > 0; i -= 8, sp += 8, dpl += 4, dph += 4) + { + __m128 a = _mm_load_ps(sp); + __m128 b = _mm_load_ps(sp + 4); - //extension - src[-1] = src[1]; - src[width] = src[width-2]; - // predict - const float* sp = src + (even ? 1 : 0); - float *dph = hdst; - __m128 factor = _mm_set1_ps(LIFTING_FACTORS::steps[0]); - for (ui32 i = (H_width + 3) >> 2; i > 0; --i, dph+=4) - { //this is doing twice the work it needs to do - //it can be definitely written better - __m128 s1 = _mm_loadu_ps(sp - 1); - __m128 s2 = _mm_loadu_ps(sp + 1); - __m128 d = _mm_loadu_ps(sp); - s1 = _mm_mul_ps(factor, _mm_add_ps(s1, s2)); - __m128 d1 = _mm_add_ps(d, s1); - sp += 4; - s1 = _mm_loadu_ps(sp - 1); - s2 = _mm_loadu_ps(sp + 1); - d = _mm_loadu_ps(sp); - s1 = _mm_mul_ps(factor, _mm_add_ps(s1, s2)); - __m128 d2 = _mm_add_ps(d, s1); - sp += 4; - d = _mm_shuffle_ps(d1, d2, _MM_SHUFFLE(2, 0, 2, 0)); - _mm_store_ps(dph, d); - } + __m128 c = _mm_shuffle_ps(a, b, _MM_SHUFFLE(2, 0, 2, 0)); + __m128 d = _mm_shuffle_ps(a, b, _MM_SHUFFLE(3, 1, 3, 1)); - // extension - hdst[-1] = hdst[0]; - hdst[H_width] = hdst[H_width-1]; - // update - factor = _mm_set1_ps(LIFTING_FACTORS::steps[1]); - sp = src + (even ? 0 : 1); - const float* sph = hdst + (even ? 0 : 1); - float *dpl = ldst; - for (ui32 i = (L_width + 3) >> 2; i > 0; --i, sp+=8, sph+=4, dpl+=4) - { - __m128 s1 = _mm_loadu_ps(sph - 1); - __m128 s2 = _mm_loadu_ps(sph); - s1 = _mm_mul_ps(factor, _mm_add_ps(s1, s2)); - __m128 d1 = _mm_loadu_ps(sp); - __m128 d2 = _mm_loadu_ps(sp + 4); - __m128 d = _mm_shuffle_ps(d1, d2, _MM_SHUFFLE(2, 0, 2, 0)); - d = _mm_add_ps(d, s1); - _mm_store_ps(dpl, d); + _mm_store_ps(dpl, c); + _mm_store_ps(dph, d); + } } - - //extension - ldst[-1] = ldst[0]; - ldst[L_width] = ldst[L_width-1]; - //predict - factor = _mm_set1_ps(LIFTING_FACTORS::steps[2]); - const float* spl = ldst + (even ? 1 : 0); - dph = hdst; - for (ui32 i = (H_width + 3) >> 2; i > 0; --i, spl+=4, dph+=4) + else { - __m128 s1 = _mm_loadu_ps(spl - 1); - __m128 s2 = _mm_loadu_ps(spl); - __m128 d = _mm_loadu_ps(dph); - s1 = _mm_mul_ps(factor, _mm_add_ps(s1, s2)); - d = _mm_add_ps(d, s1); - _mm_store_ps(dph, d); - } + float* dph = hdst->f32; + float* dpl = ldst->f32; + float* sp = src->f32; - // extension - hdst[-1] = hdst[0]; - hdst[H_width] = hdst[H_width-1]; - // update - factor = _mm_set1_ps(LIFTING_FACTORS::steps[3]); - sph = hdst + (even ? 0 : 1); - dpl = ldst; - for (ui32 i = (L_width + 3) >> 2; i > 0; --i, sph+=4, dpl+=4) - { - __m128 s1 = _mm_loadu_ps(sph - 1); - __m128 s2 = _mm_loadu_ps(sph); - __m128 d = _mm_loadu_ps(dpl); - s1 = _mm_mul_ps(factor, _mm_add_ps(s1, s2)); - d = _mm_add_ps(d, s1); - _mm_store_ps(dpl, d); + for (int i = width; i > 0; i -= 8, sp += 8, dpl += 4, dph += 4) + { + __m128 a = _mm_load_ps(sp); + __m128 b = _mm_load_ps(sp + 4); + + __m128 c = _mm_shuffle_ps(a, b, _MM_SHUFFLE(2, 0, 2, 0)); + __m128 d = _mm_shuffle_ps(a, b, _MM_SHUFFLE(3, 1, 3, 1)); + + _mm_store_ps(dpl, d); + _mm_store_ps(dph, c); + } } - //multipliers - float *dp = ldst; - factor = _mm_set1_ps(LIFTING_FACTORS::K_inv); - for (ui32 i = (L_width + 3) >> 2; i > 0; --i, dp+=4) + // the actual horizontal transform + float* hp = hdst->f32, * lp = ldst->f32; + ui32 l_width = (width + (even ? 1 : 0)) >> 1; // low pass + ui32 h_width = (width + (even ? 0 : 1)) >> 1; // high pass + ui32 num_steps = atk->get_num_steps(); + for (ui32 j = num_steps; j > 0; --j) { - __m128 d = _mm_load_ps(dp); - _mm_store_ps(dp, _mm_mul_ps(factor, d)); + // first lifting step + const lifting_step* s = atk->get_step(j - 1); + const float a = s->irv.Aatk; + + // extension + lp[-1] = lp[0]; + lp[l_width] = lp[l_width - 1]; + // lifting step + const float* sp = lp + (even ? 1 : 0); + float* dp = hp; + for (ui32 i = h_width; i > 0; --i, sp++, dp++) + *dp += a * (sp[-1] + sp[0]); + + // swap buffers + float* t = lp; lp = hp; hp = t; + even = !even; + ui32 w = l_width; l_width = h_width; h_width = w; } - dp = hdst; - factor = _mm_set1_ps(LIFTING_FACTORS::K); - for (int i = (H_width + 3) >> 2; i > 0; --i, dp+=4) - { - __m128 d = _mm_load_ps(dp); - _mm_store_ps(dp, _mm_mul_ps(factor, d)); + + { // multiply by K or 1/K + float K = atk->get_K(); + float K_inv = 1.0f / K; + float* dp; + __m128 factor; + + factor = _mm_set1_ps(K_inv); + dp = lp; + for (ui32 i = (l_width + 3) >> 2; i > 0; --i, dp += 4) + { + __m128 s = _mm_load_ps(dp); + _mm_store_ps(dp, _mm_mul_ps(factor, s)); + } + + factor = _mm_set1_ps(K); + dp = hp; + for (ui32 i = (h_width + 3) >> 2; i > 0; --i, dp += 4) + { + __m128 s = _mm_load_ps(dp); + _mm_store_ps(dp, _mm_mul_ps(factor, s)); + } } } - else - { + else { if (even) - line_ldst->f32[0] = line_src->f32[0]; + ldst->f32[0] = src->f32[0]; else - line_hdst->f32[0] = line_src->f32[0] + line_src->f32[0]; + hdst->f32[0] = src->f32[0] * 2.0f; } } + + ////////////////////////////////////////////////////////////////////////// + void sse_irv_vert_syn_step(const lifting_step* s, const line_buf* aug, + const line_buf* sig, const line_buf* other, + ui32 repeat) + { + __m128 factor = _mm_set1_ps(s->irv.Aatk); - ///////////////////////////////////////////////////////////////////////// - void sse_irrev_horz_wvlt_bwd_tx(line_buf* line_dst, line_buf *line_lsrc, - line_buf *line_hsrc, ui32 width, - bool even) + float* dst = aug->f32; + const float* src1 = sig->f32, * src2 = other->f32; + repeat = (repeat + 3) >> 2; + for (ui32 i = repeat; i > 0; --i, dst += 4, src1 += 4, src2 += 4) + { + __m128 s1 = _mm_load_ps(src1); + __m128 s2 = _mm_load_ps(src2); + __m128 d = _mm_load_ps(dst); + d = _mm_sub_ps(d, _mm_mul_ps(factor, _mm_add_ps(s1, s2))); + _mm_store_ps(dst, d); + } + } + + ////////////////////////////////////////////////////////////////////////// + void sse_irv_horz_syn(const param_atk* atk, const line_buf* dst, + const line_buf* lsrc, const line_buf* hsrc, + ui32 width, bool even) { if (width > 1) { - float *lsrc = line_lsrc->f32, *hsrc = line_hsrc->f32; - float *dst = line_dst->f32; + bool ev = even; + float* oth = hsrc->f32, * aug = lsrc->f32; + ui32 aug_width = (width + (even ? 1 : 0)) >> 1; // low pass + ui32 oth_width = (width + (even ? 0 : 1)) >> 1; // high pass - const ui32 L_width = (width + (even ? 1 : 0)) >> 1; - const ui32 H_width = (width + (even ? 0 : 1)) >> 1; + { // multiply by K or 1/K + float K = atk->get_K(); + float K_inv = 1.0f / K; + float* dp; + __m128 factor; - //multipliers - float *dp = lsrc; - __m128 factor = _mm_set1_ps(LIFTING_FACTORS::K); - for (ui32 i = (L_width + 3) >> 2; i > 0; --i, dp+=4) - { - __m128 d = _mm_load_ps(dp); - _mm_store_ps(dp, _mm_mul_ps(factor, d)); - } - dp = hsrc; - factor = _mm_set1_ps(LIFTING_FACTORS::K_inv); - for (ui32 i = (H_width + 3) >> 2; i > 0; --i, dp+=4) - { - __m128 d = _mm_load_ps(dp); - _mm_store_ps(dp, _mm_mul_ps(factor, d)); - } + factor = _mm_set1_ps(K); + dp = aug; + for (ui32 i = (aug_width + 3) >> 2; i > 0; --i, dp += 4) + { + __m128 s = _mm_load_ps(dp); + _mm_store_ps(dp, _mm_mul_ps(factor, s)); + } - //extension - hsrc[-1] = hsrc[0]; - hsrc[H_width] = hsrc[H_width-1]; - //inverse update - factor = _mm_set1_ps(LIFTING_FACTORS::steps[7]); - const float *sph = hsrc + (even ? 0 : 1); - float *dpl = lsrc; - for (ui32 i = (L_width + 3) >> 2; i > 0; --i, dpl+=4, sph+=4) - { - __m128 s1 = _mm_loadu_ps(sph - 1); - __m128 s2 = _mm_loadu_ps(sph); - __m128 d = _mm_loadu_ps(dpl); - s1 = _mm_mul_ps(factor, _mm_add_ps(s1, s2)); - d = _mm_add_ps(d, s1); - _mm_store_ps(dpl, d); + factor = _mm_set1_ps(K_inv); + dp = oth; + for (ui32 i = (oth_width + 3) >> 2; i > 0; --i, dp += 4) + { + __m128 s = _mm_load_ps(dp); + _mm_store_ps(dp, _mm_mul_ps(factor, s)); + } } - //extension - lsrc[-1] = lsrc[0]; - lsrc[L_width] = lsrc[L_width-1]; - //inverse perdict - factor = _mm_set1_ps(LIFTING_FACTORS::steps[6]); - const float *spl = lsrc + (even ? 0 : -1); - float *dph = hsrc; - for (ui32 i = (H_width + 3) >> 2; i > 0; --i, dph+=4, spl+=4) + // the actual horizontal transform + ui32 num_steps = atk->get_num_steps(); + for (ui32 j = 0; j < num_steps; ++j) { - __m128 s1 = _mm_loadu_ps(spl); - __m128 s2 = _mm_loadu_ps(spl + 1); - __m128 d = _mm_loadu_ps(dph); - s1 = _mm_mul_ps(factor, _mm_add_ps(s1, s2)); - d = _mm_add_ps(d, s1); - _mm_store_ps(dph, d); + const lifting_step* s = atk->get_step(j); + const float a = s->irv.Aatk; + + // extension + oth[-1] = oth[0]; + oth[oth_width] = oth[oth_width - 1]; + // lifting step + const float* sp = oth + (ev ? 0 : 1); + float* dp = aug; + for (ui32 i = aug_width; i > 0; --i, sp++, dp++) + *dp -= a * (sp[-1] + sp[0]); + + // swap buffers + float* t = aug; aug = oth; oth = t; + ev = !ev; + ui32 w = aug_width; aug_width = oth_width; oth_width = w; } - //extension - hsrc[-1] = hsrc[0]; - hsrc[H_width] = hsrc[H_width-1]; - //inverse update - factor = _mm_set1_ps(LIFTING_FACTORS::steps[5]); - sph = hsrc + (even ? 0 : 1); - dpl = lsrc; - for (ui32 i = (L_width + 3) >> 2; i > 0; --i, dpl+=4, sph+=4) + // combine both lsrc and hsrc into dst + if (even) { - __m128 s1 = _mm_loadu_ps(sph - 1); - __m128 s2 = _mm_loadu_ps(sph); - __m128 d = _mm_loadu_ps(dpl); - s1 = _mm_mul_ps(factor, _mm_add_ps(s1, s2)); - d = _mm_add_ps(d, s1); - _mm_store_ps(dpl, d); + float* sph = hsrc->f32; + float* spl = lsrc->f32; + float* dp = dst->f32; + int i = width; + for (; i > 0; i -= 8, dp += 8, spl += 4, sph += 4) + { + __m128 a = _mm_load_ps(spl); + __m128 b = _mm_load_ps(sph); + __m128 c = _mm_unpacklo_ps(a, b); + __m128 d = _mm_unpackhi_ps(a, b); + _mm_store_ps(dp, c); + _mm_store_ps(dp + 4, d); + } } - - //extension - lsrc[-1] = lsrc[0]; - lsrc[L_width] = lsrc[L_width-1]; - //inverse perdict and combine - factor = _mm_set1_ps(LIFTING_FACTORS::steps[4]); - dp = dst + (even ? 0 : -1); - spl = lsrc + (even ? 0 : -1); - sph = hsrc; - ui32 width = L_width + (even ? 0 : 1); - for (ui32 i = (width + 3) >> 2; i > 0; --i, spl+=4, sph+=4, dp+=8) + else { - __m128 s1 = _mm_loadu_ps(spl); - __m128 s2 = _mm_loadu_ps(spl + 1); - __m128 d = _mm_load_ps(sph); - s2 = _mm_mul_ps(factor, _mm_add_ps(s1, s2)); - d = _mm_add_ps(d, s2); - _mm_storeu_ps(dp, _mm_unpacklo_ps(s1, d)); - _mm_storeu_ps(dp + 4, _mm_unpackhi_ps(s1, d)); + float* sph = hsrc->f32; + float* spl = lsrc->f32; + float* dp = dst->f32; + int i = width; + for (; i > 0; i -= 8, dp += 8, spl += 4, sph += 4) + { + __m128 a = _mm_load_ps(spl); + __m128 b = _mm_load_ps(sph); + __m128 c = _mm_unpacklo_ps(b, a); + __m128 d = _mm_unpackhi_ps(b, a); + _mm_store_ps(dp, c); + _mm_store_ps(dp + 4, d); + } } } - else - { + else { if (even) - line_dst->f32[0] = line_lsrc->f32[0]; + dst->f32[0] = lsrc->f32[0]; else - line_dst->f32[0] = line_hsrc->f32[0] * 0.5f; + dst->f32[0] = hsrc->f32[0] * 0.5f; } } - } -} + + ////////////////////////////////////////////////////////////////////////// + void sse_irv_vert_times_K(float K, const line_buf* aug, ui32 repeat) + { + __m128 factor = _mm_set1_ps(K); + float* dst = aug->f32; + repeat = (repeat + 3) >> 2; + for (ui32 i = repeat; i > 0; --i, dst += 4) + { + __m128 s = _mm_load_ps(dst); + _mm_store_ps(dst, _mm_mul_ps(factor, s)); + } + } + + } // !local +} // !ojph diff --git a/src/core/transform/ojph_transform_sse2.cpp b/src/core/transform/ojph_transform_sse2.cpp index a607441a..5f3de49d 100644 --- a/src/core/transform/ojph_transform_sse2.cpp +++ b/src/core/transform/ojph_transform_sse2.cpp @@ -43,7 +43,7 @@ #include "ojph_transform.h" #include "ojph_transform_local.h" -#include +#include namespace ojph { namespace local { From fe24e552cbec80c1fe0b990134fa7f87bda97579 Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Thu, 11 Apr 2024 10:34:50 +1000 Subject: [PATCH 058/348] Editorial + compilation fix + a potential bug fix --- src/core/transform/ojph_transform_avx.cpp | 58 ++++++++++++++++------- src/core/transform/ojph_transform_sse.cpp | 42 ++++++++-------- 2 files changed, 62 insertions(+), 38 deletions(-) diff --git a/src/core/transform/ojph_transform_avx.cpp b/src/core/transform/ojph_transform_avx.cpp index 743ceee6..81fc6c43 100644 --- a/src/core/transform/ojph_transform_avx.cpp +++ b/src/core/transform/ojph_transform_avx.cpp @@ -59,8 +59,8 @@ namespace ojph { float* dst = aug->f32; const float* src1 = sig->f32, * src2 = other->f32; - repeat = (repeat + 7) >> 3; - for (ui32 i = repeat; i > 0; --i, dst += 8, src1 += 8, src2 += 8) + int i = (int)repeat; + for ( ; i > 0; i -= 8, dst += 8, src1 += 8, src2 += 8) { __m256 s1 = _mm256_load_ps(src1); __m256 s2 = _mm256_load_ps(src2); @@ -83,8 +83,8 @@ namespace ojph { float* dph = hdst->f32; float* dpl = ldst->f32; float* sp = src->f32; - - for (int i = width; i > 0; i -= 16, sp += 16, dpl += 8, dph += 8) + int i = (int)width; + for ( ; i > 8; i -= 16, sp += 16, dpl += 8, dph += 8) { __m256 a = _mm256_load_ps(sp); __m256 b = _mm256_load_ps(sp + 8); @@ -95,14 +95,23 @@ namespace ojph { _mm256_store_ps(dpl, e); _mm256_store_ps(dph, f); } + for (; i > 0; i -= 8, sp += 8, dpl += 4, dph += 4) + { + __m128 a = _mm_load_ps(sp); + __m128 b = _mm_load_ps(sp + 4); + __m128 c = _mm_shuffle_ps(a, b, _MM_SHUFFLE(2, 0, 2, 0)); + __m128 d = _mm_shuffle_ps(a, b, _MM_SHUFFLE(3, 1, 3, 1)); + _mm_store_ps(dpl, c); + _mm_store_ps(dph, d); + } } else { float* dph = hdst->f32; float* dpl = ldst->f32; float* sp = src->f32; - - for (int i = width; i > 0; i -= 16, sp += 16, dpl += 8, dph += 8) + int i = (int)width; + for ( ; i > 8; i -= 16, sp += 16, dpl += 8, dph += 8) { __m256 a = _mm256_load_ps(sp); __m256 b = _mm256_load_ps(sp + 8); @@ -113,6 +122,15 @@ namespace ojph { _mm256_store_ps(dpl, f); _mm256_store_ps(dph, e); } + for (; i > 0; i -= 8, sp += 8, dpl += 4, dph += 4) + { + __m128 a = _mm_load_ps(sp); + __m128 b = _mm_load_ps(sp + 4); + __m128 c = _mm_shuffle_ps(a, b, _MM_SHUFFLE(2, 0, 2, 0)); + __m128 d = _mm_shuffle_ps(a, b, _MM_SHUFFLE(3, 1, 3, 1)); + _mm_store_ps(dpl, d); + _mm_store_ps(dph, c); + } } // the actual horizontal transform @@ -149,7 +167,8 @@ namespace ojph { factor = _mm256_set1_ps(K_inv); dp = lp; - for (ui32 i = (l_width + 7) >> 3; i > 0; --i, dp += 8) + int i = (int)l_width; + for ( ; i > 0; i -= 8, dp += 8) { __m256 s = _mm256_load_ps(dp); _mm256_store_ps(dp, _mm256_mul_ps(factor, s)); @@ -157,7 +176,8 @@ namespace ojph { factor = _mm256_set1_ps(K); dp = hp; - for (ui32 i = (h_width + 7) >> 3; i > 0; --i, dp += 8) + int i = (int)h_width; + for ( ; i > 0; i -= 8, dp += 8) { __m256 s = _mm256_load_ps(dp); _mm256_store_ps(dp, _mm256_mul_ps(factor, s)); @@ -181,8 +201,8 @@ namespace ojph { float* dst = aug->f32; const float* src1 = sig->f32, * src2 = other->f32; - repeat = (repeat + 7) >> 3; - for (ui32 i = repeat; i > 0; --i, dst += 8, src1 += 8, src2 += 8) + int i = (int)repeat; + for ( ; i > 0; i -= 8, dst += 8, src1 += 8, src2 += 8) { __m256 s1 = _mm256_load_ps(src1); __m256 s2 = _mm256_load_ps(src2); @@ -212,7 +232,8 @@ namespace ojph { factor = _mm256_set1_ps(K); dp = aug; - for (ui32 i = (aug_width + 7) >> 3; i > 0; --i, dp += 8) + int i = (int)aug_width; + for ( ; i > 0; i -= 8, dp += 8) { __m256 s = _mm256_load_ps(dp); _mm256_store_ps(dp, _mm256_mul_ps(factor, s)); @@ -220,7 +241,8 @@ namespace ojph { factor = _mm256_set1_ps(K_inv); dp = oth; - for (ui32 i = (oth_width + 7) >> 3; i > 0; --i, dp += 8) + int i = (int)oth_width; + for ( ; i > 0; i -= 8, dp += 8) { __m256 s = _mm256_load_ps(dp); _mm256_store_ps(dp, _mm256_mul_ps(factor, s)); @@ -255,8 +277,8 @@ namespace ojph { float* sph = hsrc->f32; float* spl = lsrc->f32; float* dp = dst->f32; - int i = width; - for ( ; i >= 8; i -= 16, dp += 16, spl += 8, sph += 8) + int i = (int)width; + for ( ; i > 8; i -= 16, dp += 16, spl += 8, sph += 8) { __m256 a = _mm256_load_ps(spl); __m256 b = _mm256_load_ps(sph); @@ -282,8 +304,8 @@ namespace ojph { float* sph = hsrc->f32; float* spl = lsrc->f32; float* dp = dst->f32; - int i = width; - for (; i >= 8; i -= 16, dp += 16, spl += 8, sph += 8) + int i = (int)width; + for (; i > 8; i -= 16, dp += 16, spl += 8, sph += 8) { // i>=8 because we can exceed the aligned buffer by up to 7 __m256 a = _mm256_load_ps(spl); __m256 b = _mm256_load_ps(sph); @@ -318,8 +340,8 @@ namespace ojph { { __m256 factor = _mm256_set1_ps(K); float* dst = aug->f32; - repeat = (repeat + 7) >> 3; - for (ui32 i = repeat; i > 0; --i, dst += 8 ) + int i = (int)repeat; + for ( ; i > 0; i -= 8, dst += 8 ) { __m256 s = _mm256_load_ps(dst); _mm256_store_ps(dst, _mm256_mul_ps(factor, s)); diff --git a/src/core/transform/ojph_transform_sse.cpp b/src/core/transform/ojph_transform_sse.cpp index 281ff4a6..3a4d39c8 100644 --- a/src/core/transform/ojph_transform_sse.cpp +++ b/src/core/transform/ojph_transform_sse.cpp @@ -59,8 +59,8 @@ namespace ojph { float* dst = aug->f32; const float* src1 = sig->f32, * src2 = other->f32; - repeat = (repeat + 3) >> 2; - for (ui32 i = repeat; i > 0; --i, dst += 4, src1 += 4, src2 += 4) + int i = (int)repeat; + for ( ; i > 0; i -= 4, dst += 4, src1 += 4, src2 += 4) { __m128 s1 = _mm_load_ps(src1); __m128 s2 = _mm_load_ps(src2); @@ -84,14 +84,13 @@ namespace ojph { float* dpl = ldst->f32; float* sp = src->f32; - for (int i = width; i > 0; i -= 8, sp += 8, dpl += 4, dph += 4) + int i = (int)width; + for ( ; i > 0; i -= 8, sp += 8, dpl += 4, dph += 4) { __m128 a = _mm_load_ps(sp); __m128 b = _mm_load_ps(sp + 4); - __m128 c = _mm_shuffle_ps(a, b, _MM_SHUFFLE(2, 0, 2, 0)); __m128 d = _mm_shuffle_ps(a, b, _MM_SHUFFLE(3, 1, 3, 1)); - _mm_store_ps(dpl, c); _mm_store_ps(dph, d); } @@ -102,14 +101,13 @@ namespace ojph { float* dpl = ldst->f32; float* sp = src->f32; - for (int i = width; i > 0; i -= 8, sp += 8, dpl += 4, dph += 4) + int i = (int)width; + for ( ; i > 0; i -= 8, sp += 8, dpl += 4, dph += 4) { __m128 a = _mm_load_ps(sp); __m128 b = _mm_load_ps(sp + 4); - __m128 c = _mm_shuffle_ps(a, b, _MM_SHUFFLE(2, 0, 2, 0)); __m128 d = _mm_shuffle_ps(a, b, _MM_SHUFFLE(3, 1, 3, 1)); - _mm_store_ps(dpl, d); _mm_store_ps(dph, c); } @@ -149,7 +147,8 @@ namespace ojph { factor = _mm_set1_ps(K_inv); dp = lp; - for (ui32 i = (l_width + 3) >> 2; i > 0; --i, dp += 4) + int i = (int)l_width; + for ( ; i > 0; i -= 4, dp += 4) { __m128 s = _mm_load_ps(dp); _mm_store_ps(dp, _mm_mul_ps(factor, s)); @@ -157,7 +156,8 @@ namespace ojph { factor = _mm_set1_ps(K); dp = hp; - for (ui32 i = (h_width + 3) >> 2; i > 0; --i, dp += 4) + int i = (int)h_width; + for ( ; i > 0; i -= 4, dp += 4) { __m128 s = _mm_load_ps(dp); _mm_store_ps(dp, _mm_mul_ps(factor, s)); @@ -181,8 +181,8 @@ namespace ojph { float* dst = aug->f32; const float* src1 = sig->f32, * src2 = other->f32; - repeat = (repeat + 3) >> 2; - for (ui32 i = repeat; i > 0; --i, dst += 4, src1 += 4, src2 += 4) + int i = (int)repeat; + for ( ; i > 0; i -= 4, dst += 4, src1 += 4, src2 += 4) { __m128 s1 = _mm_load_ps(src1); __m128 s2 = _mm_load_ps(src2); @@ -212,7 +212,8 @@ namespace ojph { factor = _mm_set1_ps(K); dp = aug; - for (ui32 i = (aug_width + 3) >> 2; i > 0; --i, dp += 4) + int i = (int)aug_width; + for ( ; i > 0; i -= 4, dp += 4) { __m128 s = _mm_load_ps(dp); _mm_store_ps(dp, _mm_mul_ps(factor, s)); @@ -220,7 +221,8 @@ namespace ojph { factor = _mm_set1_ps(K_inv); dp = oth; - for (ui32 i = (oth_width + 3) >> 2; i > 0; --i, dp += 4) + int i = (int)oth_width; + for ( ; i > 0; i -= 4, dp += 4) { __m128 s = _mm_load_ps(dp); _mm_store_ps(dp, _mm_mul_ps(factor, s)); @@ -255,8 +257,8 @@ namespace ojph { float* sph = hsrc->f32; float* spl = lsrc->f32; float* dp = dst->f32; - int i = width; - for (; i > 0; i -= 8, dp += 8, spl += 4, sph += 4) + int i = (int)width; + for ( ; i > 0; i -= 8, dp += 8, spl += 4, sph += 4) { __m128 a = _mm_load_ps(spl); __m128 b = _mm_load_ps(sph); @@ -271,8 +273,8 @@ namespace ojph { float* sph = hsrc->f32; float* spl = lsrc->f32; float* dp = dst->f32; - int i = width; - for (; i > 0; i -= 8, dp += 8, spl += 4, sph += 4) + int i = (int)width; + for ( ; i > 0; i -= 8, dp += 8, spl += 4, sph += 4) { __m128 a = _mm_load_ps(spl); __m128 b = _mm_load_ps(sph); @@ -296,8 +298,8 @@ namespace ojph { { __m128 factor = _mm_set1_ps(K); float* dst = aug->f32; - repeat = (repeat + 3) >> 2; - for (ui32 i = repeat; i > 0; --i, dst += 4) + int i = (int)repeat; + for ( ; i > 0; i -= 4, dst += 4) { __m128 s = _mm_load_ps(dst); _mm_store_ps(dst, _mm_mul_ps(factor, s)); From 5e4b627771abd338caecdf9d3088401633b118e7 Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Thu, 11 Apr 2024 10:39:32 +1000 Subject: [PATCH 059/348] Syntax fix. --- src/core/transform/ojph_transform_avx.cpp | 10 ++++++---- src/core/transform/ojph_transform_sse.cpp | 10 ++++++---- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/src/core/transform/ojph_transform_avx.cpp b/src/core/transform/ojph_transform_avx.cpp index 81fc6c43..66e3ec81 100644 --- a/src/core/transform/ojph_transform_avx.cpp +++ b/src/core/transform/ojph_transform_avx.cpp @@ -163,11 +163,12 @@ namespace ojph { float K = atk->get_K(); float K_inv = 1.0f / K; float* dp; + int i; __m256 factor; factor = _mm256_set1_ps(K_inv); dp = lp; - int i = (int)l_width; + i = (int)l_width; for ( ; i > 0; i -= 8, dp += 8) { __m256 s = _mm256_load_ps(dp); @@ -176,7 +177,7 @@ namespace ojph { factor = _mm256_set1_ps(K); dp = hp; - int i = (int)h_width; + i = (int)h_width; for ( ; i > 0; i -= 8, dp += 8) { __m256 s = _mm256_load_ps(dp); @@ -228,11 +229,12 @@ namespace ojph { float K = atk->get_K(); float K_inv = 1.0f / K; float* dp; + int i; __m256 factor; factor = _mm256_set1_ps(K); dp = aug; - int i = (int)aug_width; + i = (int)aug_width; for ( ; i > 0; i -= 8, dp += 8) { __m256 s = _mm256_load_ps(dp); @@ -241,7 +243,7 @@ namespace ojph { factor = _mm256_set1_ps(K_inv); dp = oth; - int i = (int)oth_width; + i = (int)oth_width; for ( ; i > 0; i -= 8, dp += 8) { __m256 s = _mm256_load_ps(dp); diff --git a/src/core/transform/ojph_transform_sse.cpp b/src/core/transform/ojph_transform_sse.cpp index 3a4d39c8..39776717 100644 --- a/src/core/transform/ojph_transform_sse.cpp +++ b/src/core/transform/ojph_transform_sse.cpp @@ -143,11 +143,12 @@ namespace ojph { float K = atk->get_K(); float K_inv = 1.0f / K; float* dp; + int i; __m128 factor; factor = _mm_set1_ps(K_inv); dp = lp; - int i = (int)l_width; + i = (int)l_width; for ( ; i > 0; i -= 4, dp += 4) { __m128 s = _mm_load_ps(dp); @@ -156,7 +157,7 @@ namespace ojph { factor = _mm_set1_ps(K); dp = hp; - int i = (int)h_width; + i = (int)h_width; for ( ; i > 0; i -= 4, dp += 4) { __m128 s = _mm_load_ps(dp); @@ -208,11 +209,12 @@ namespace ojph { float K = atk->get_K(); float K_inv = 1.0f / K; float* dp; + int i; __m128 factor; factor = _mm_set1_ps(K); dp = aug; - int i = (int)aug_width; + i = (int)aug_width; for ( ; i > 0; i -= 4, dp += 4) { __m128 s = _mm_load_ps(dp); @@ -221,7 +223,7 @@ namespace ojph { factor = _mm_set1_ps(K_inv); dp = oth; - int i = (int)oth_width; + i = (int)oth_width; for ( ; i > 0; i -= 4, dp += 4) { __m128 s = _mm_load_ps(dp); From 4b72faa72d1a4192115f3f77006c1eee5b036c7d Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Thu, 11 Apr 2024 12:52:21 +1000 Subject: [PATCH 060/348] A bug fix. --- src/core/codestream/ojph_resolution.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/core/codestream/ojph_resolution.cpp b/src/core/codestream/ojph_resolution.cpp index 14743249..3b25009f 100644 --- a/src/core/codestream/ojph_resolution.cpp +++ b/src/core/codestream/ojph_resolution.cpp @@ -496,8 +496,6 @@ namespace ojph { if (res_num == 0) { assert(child_res == NULL); - assert(bands[0].exists() && !bands[1].exists() - && !bands[2].exists() && !bands[3].exists()); bands[0].exchange_buf(vert_even ? sig->line : aug->line); bands[0].push_line(); return; From 1e9bc418b707d5dba6717d4ad92caee54967e5bf Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Thu, 11 Apr 2024 13:03:50 +1000 Subject: [PATCH 061/348] A bug fix --- src/core/codestream/ojph_resolution.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/core/codestream/ojph_resolution.cpp b/src/core/codestream/ojph_resolution.cpp index 3b25009f..6d6c500f 100644 --- a/src/core/codestream/ojph_resolution.cpp +++ b/src/core/codestream/ojph_resolution.cpp @@ -652,8 +652,6 @@ namespace ojph { if (res_num == 0) { assert(child_res == NULL); - assert(bands[0].exists() && !bands[1].exists() - && !bands[2].exists() && !bands[3].exists()); return bands[0].pull_line(); } From 7faf4576a7940b286917865e1002ae0201665ec9 Mon Sep 17 00:00:00 2001 From: aous72 Date: Thu, 11 Apr 2024 15:50:58 +1000 Subject: [PATCH 062/348] completed sse and avx. --- src/core/transform/ojph_transform_avx.cpp | 56 ++++++++++++++++++++--- src/core/transform/ojph_transform_sse.cpp | 56 ++++++++++++++++++++--- 2 files changed, 100 insertions(+), 12 deletions(-) diff --git a/src/core/transform/ojph_transform_avx.cpp b/src/core/transform/ojph_transform_avx.cpp index 66e3ec81..8499bf19 100644 --- a/src/core/transform/ojph_transform_avx.cpp +++ b/src/core/transform/ojph_transform_avx.cpp @@ -148,10 +148,32 @@ namespace ojph { lp[-1] = lp[0]; lp[l_width] = lp[l_width - 1]; // lifting step - const float* sp = lp + (even ? 1 : 0); + const float* sp = lp; float* dp = hp; - for (ui32 i = h_width; i > 0; --i, sp++, dp++) - *dp += a * (sp[-1] + sp[0]); + int i = (int)h_width; + __m256 f = _mm256_set1_ps(a); + if (even) + { + for (; i > 0; i -= 8, sp += 8, dp += 8) + { + __m256 m = _mm256_load_ps(sp); + __m256 n = _mm256_loadu_ps(sp + 1); + __m256 p = _mm256_load_ps(dp); + p = _mm256_add_ps(p, _mm256_mul_ps(f, _mm256_add_ps(m, n))); + _mm256_store_ps(dp, p); + } + } + else + { + for (; i > 0; i -= 8, sp += 8, dp += 8) + { + __m256 m = _mm256_load_ps(sp); + __m256 n = _mm256_loadu_ps(sp - 1); + __m256 p = _mm256_load_ps(dp); + p = _mm256_add_ps(p, _mm256_mul_ps(f, _mm256_add_ps(m, n))); + _mm256_store_ps(dp, p); + } + } // swap buffers float* t = lp; lp = hp; hp = t; @@ -262,10 +284,32 @@ namespace ojph { oth[-1] = oth[0]; oth[oth_width] = oth[oth_width - 1]; // lifting step - const float* sp = oth + (ev ? 0 : 1); + const float* sp = oth; float* dp = aug; - for (ui32 i = aug_width; i > 0; --i, sp++, dp++) - *dp -= a * (sp[-1] + sp[0]); + int i = (int)aug_width; + __m256 f = _mm256_set1_ps(a); + if (ev) + { + for (; i > 0; i -= 8, sp += 8, dp += 8) + { + __m256 m = _mm256_load_ps(sp); + __m256 n = _mm256_loadu_ps(sp - 1); + __m256 p = _mm256_load_ps(dp); + p = _mm256_sub_ps(p, _mm256_mul_ps(f, _mm256_add_ps(m, n))); + _mm256_store_ps(dp, p); + } + } + else + { + for (; i > 0; i -= 8, sp += 8, dp += 8) + { + __m256 m = _mm256_load_ps(sp); + __m256 n = _mm256_loadu_ps(sp + 1); + __m256 p = _mm256_load_ps(dp); + p = _mm256_sub_ps(p, _mm256_mul_ps(f, _mm256_add_ps(m, n))); + _mm256_store_ps(dp, p); + } + } // swap buffers float* t = aug; aug = oth; oth = t; diff --git a/src/core/transform/ojph_transform_sse.cpp b/src/core/transform/ojph_transform_sse.cpp index 39776717..69907841 100644 --- a/src/core/transform/ojph_transform_sse.cpp +++ b/src/core/transform/ojph_transform_sse.cpp @@ -128,10 +128,32 @@ namespace ojph { lp[-1] = lp[0]; lp[l_width] = lp[l_width - 1]; // lifting step - const float* sp = lp + (even ? 1 : 0); + const float* sp = lp; float* dp = hp; - for (ui32 i = h_width; i > 0; --i, sp++, dp++) - *dp += a * (sp[-1] + sp[0]); + int i = (int)h_width; + __m128 f = _mm_set1_ps(a); + if (even) + { + for (; i > 0; i -= 4, sp += 4, dp += 4) + { + __m128 m = _mm_load_ps(sp); + __m128 n = _mm_loadu_ps(sp + 1); + __m128 p = _mm_load_ps(dp); + p = _mm_add_ps(p, _mm_mul_ps(f, _mm_add_ps(m, n))); + _mm_store_ps(dp, p); + } + } + else + { + for (; i > 0; i -= 4, sp += 4, dp += 4) + { + __m128 m = _mm_load_ps(sp); + __m128 n = _mm_loadu_ps(sp - 1); + __m128 p = _mm_load_ps(dp); + p = _mm_add_ps(p, _mm_mul_ps(f, _mm_add_ps(m, n))); + _mm_store_ps(dp, p); + } + } // swap buffers float* t = lp; lp = hp; hp = t; @@ -242,10 +264,32 @@ namespace ojph { oth[-1] = oth[0]; oth[oth_width] = oth[oth_width - 1]; // lifting step - const float* sp = oth + (ev ? 0 : 1); + const float* sp = oth; float* dp = aug; - for (ui32 i = aug_width; i > 0; --i, sp++, dp++) - *dp -= a * (sp[-1] + sp[0]); + int i = (int)aug_width; + __m128 f = _mm_set1_ps(a); + if (ev) + { + for ( ; i > 0; i -= 4, sp += 4, dp += 4) + { + __m128 m = _mm_load_ps(sp); + __m128 n = _mm_loadu_ps(sp - 1); + __m128 p = _mm_load_ps(dp); + p = _mm_sub_ps(p, _mm_mul_ps(f, _mm_add_ps(m, n))); + _mm_store_ps(dp, p); + } + } + else + { + for ( ; i > 0; i -= 4, sp += 4, dp += 4) + { + __m128 m = _mm_load_ps(sp); + __m128 n = _mm_loadu_ps(sp + 1); + __m128 p = _mm_load_ps(dp); + p = _mm_sub_ps(p, _mm_mul_ps(f, _mm_add_ps(m, n))); + _mm_store_ps(dp, p); + } + } // swap buffers float* t = aug; aug = oth; oth = t; From 2a7ff07f00f2313ec3c7b2956817e13c2ee92958 Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Fri, 12 Apr 2024 13:32:25 +1000 Subject: [PATCH 063/348] Corrected code for reversible path. Simplified DWT code. --- src/core/codestream/ojph_params.cpp | 2 +- src/core/codestream/ojph_resolution.cpp | 8 +- src/core/transform/ojph_transform.cpp | 168 ++++++++++----------- src/core/transform/ojph_transform.h | 29 +--- src/core/transform/ojph_transform_avx.cpp | 59 +++----- src/core/transform/ojph_transform_local.h | 172 ++++++++-------------- src/core/transform/ojph_transform_sse.cpp | 58 +++----- 7 files changed, 193 insertions(+), 303 deletions(-) diff --git a/src/core/codestream/ojph_params.cpp b/src/core/codestream/ojph_params.cpp index 268135c4..b6ada178 100644 --- a/src/core/codestream/ojph_params.cpp +++ b/src/core/codestream/ojph_params.cpp @@ -1724,7 +1724,7 @@ namespace ojph { d[0].rev.Batk = 2; d[0].rev.Eatk = 2; d[1].rev.Aatk = -1; - d[1].rev.Batk = 0; + d[1].rev.Batk = 1; d[1].rev.Eatk = 1; } diff --git a/src/core/codestream/ojph_resolution.cpp b/src/core/codestream/ojph_resolution.cpp index 6d6c500f..b82a810a 100644 --- a/src/core/codestream/ojph_resolution.cpp +++ b/src/core/codestream/ojph_resolution.cpp @@ -524,7 +524,7 @@ namespace ojph { line_buf* sp1 = sig->active ? sig->line : ssp[i].line; line_buf* sp2 = ssp[i].active ? ssp[i].line : sig->line; const lifting_step* s = atk->get_step(num_steps - i - 1); - rev_vert_ana_step(s, sp1, sp2, dp, width); + rev_vert_step(s, sp1, sp2, dp, width, false); } lifting_buf t = *aug; *aug = ssp[i]; ssp[i] = *sig; *sig = t; } @@ -591,7 +591,7 @@ namespace ojph { line_buf* sp1 = sig->active ? sig->line : ssp[i].line; line_buf* sp2 = ssp[i].active ? ssp[i].line : sig->line; const lifting_step* s = atk->get_step(num_steps - i - 1); - irv_vert_ana_step(s, sp1, sp2, dp, width); + irv_vert_step(s, sp1, sp2, dp, width, false); } lifting_buf t = *aug; *aug = ssp[i]; ssp[i] = *sig; *sig = t; } @@ -711,7 +711,7 @@ namespace ojph { line_buf* sp1 = sig->active ? sig->line : ssp[i].line; line_buf* sp2 = ssp[i].active ? ssp[i].line : sig->line; const lifting_step* s = atk->get_step(i); - rev_vert_syn_step(s, dp, sp1, sp2, width); + rev_vert_step(s, sp1, sp2, dp, width, true); } lifting_buf t = *aug; *aug = ssp[i]; ssp[i] = *sig; *sig = t; } @@ -805,7 +805,7 @@ namespace ojph { line_buf* sp1 = sig->active ? sig->line : ssp[i].line; line_buf* sp2 = ssp[i].active ? ssp[i].line : sig->line; const lifting_step* s = atk->get_step(i); - irv_vert_syn_step(s, dp, sp1, sp2, width); + irv_vert_step(s, sp1, sp2, dp, width, true); } lifting_buf t = *aug; *aug = ssp[i]; ssp[i] = *sig; *sig = t; } diff --git a/src/core/transform/ojph_transform.cpp b/src/core/transform/ojph_transform.cpp index eba4f006..2a219bca 100644 --- a/src/core/transform/ojph_transform.cpp +++ b/src/core/transform/ojph_transform.cpp @@ -54,20 +54,15 @@ namespace ojph { ///////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////// - void (*rev_vert_ana_step) + void (*rev_vert_step) (const lifting_step* s, const line_buf* sig, const line_buf* other, - const line_buf* aug, ui32 repeat) = NULL; + const line_buf* aug, ui32 repeat, bool synthesis) = NULL; ///////////////////////////////////////////////////////////////////////// void (*rev_horz_ana) (const param_atk* atk, const line_buf* ldst, const line_buf* hdst, const line_buf* src, ui32 width, bool even) = NULL; - ///////////////////////////////////////////////////////////////////////// - void (*rev_vert_syn_step) - (const lifting_step* s, const line_buf* aug, const line_buf* sig, - const line_buf* other, ui32 repeat) = NULL; - ///////////////////////////////////////////////////////////////////////// void (*rev_horz_syn) (const param_atk* atk, const line_buf* dst, const line_buf* lsrc, @@ -78,29 +73,24 @@ namespace ojph { ///////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////// - void (*irv_vert_ana_step) + void (*irv_vert_step) (const lifting_step* s, const line_buf* sig, const line_buf* other, - const line_buf* aug, ui32 repeat) = NULL; + const line_buf* aug, ui32 repeat, bool synthesis) = NULL; + + ///////////////////////////////////////////////////////////////////////// + void (*irv_vert_times_K) + (float K, const line_buf* aug, ui32 repeat) = NULL; ///////////////////////////////////////////////////////////////////////// void (*irv_horz_ana) (const param_atk* atk, const line_buf* ldst, const line_buf* hdst, const line_buf* src, ui32 width, bool even) = NULL; - ///////////////////////////////////////////////////////////////////////// - void (*irv_vert_syn_step) - (const lifting_step* s, const line_buf* aug, const line_buf* sig, - const line_buf* other, ui32 repeat) = NULL; - ///////////////////////////////////////////////////////////////////////// void (*irv_horz_syn) (const param_atk* atk, const line_buf* dst, const line_buf* lsrc, const line_buf* hsrc, ui32 width, bool even) = NULL; - ///////////////////////////////////////////////////////////////////////// - void (*irv_vert_times_K) - (float K, const line_buf* aug, ui32 repeat) = NULL; - //////////////////////////////////////////////////////////////////////////// static bool wavelet_transform_functions_initialized = false; @@ -112,27 +102,24 @@ namespace ojph { #if !defined(OJPH_ENABLE_WASM_SIMD) || !defined(OJPH_EMSCRIPTEN) - rev_vert_ana_step = gen_rev_vert_ana_step; + rev_vert_step = gen_rev_vert_step; rev_horz_ana = gen_rev_horz_ana; - rev_vert_syn_step = gen_rev_vert_syn_step; rev_horz_syn = gen_rev_horz_syn; - irv_vert_ana_step = gen_irv_vert_ana_step; - irv_horz_ana = gen_irv_horz_ana; - irv_vert_syn_step = gen_irv_vert_syn_step; - irv_horz_syn = gen_irv_horz_syn; + irv_vert_step = gen_irv_vert_step; irv_vert_times_K = gen_irv_vert_times_K; + irv_horz_ana = gen_irv_horz_ana; + irv_horz_syn = gen_irv_horz_syn; #ifndef OJPH_DISABLE_INTEL_SIMD int level = get_cpu_ext_level(); if (level >= X86_CPU_EXT_LEVEL_SSE) { - irv_vert_ana_step = sse_irv_vert_ana_step; + irv_vert_step = sse_irv_vert_step; + irv_vert_times_K = sse_irv_vert_times_K; irv_horz_ana = sse_irv_horz_ana; - irv_vert_syn_step = sse_irv_vert_syn_step; irv_horz_syn = sse_irv_horz_syn; - irv_vert_times_K = sse_irv_vert_times_K; } //if (level >= X86_CPU_EXT_LEVEL_SSE2) @@ -145,11 +132,10 @@ namespace ojph { if (level >= X86_CPU_EXT_LEVEL_AVX) { - irv_vert_ana_step = avx_irv_vert_ana_step; + irv_vert_step = avx_irv_vert_step; + irv_vert_times_K = avx_irv_vert_times_K; irv_horz_ana = avx_irv_horz_ana; - irv_vert_syn_step = avx_irv_vert_syn_step; irv_horz_syn = avx_irv_horz_syn; - irv_vert_times_K = avx_irv_vert_times_K; } //if (level >= X86_CPU_EXT_LEVEL_AVX2) @@ -197,9 +183,9 @@ namespace ojph { #if !defined(OJPH_ENABLE_WASM_SIMD) || !defined(OJPH_EMSCRIPTEN) ///////////////////////////////////////////////////////////////////////// - void gen_rev_vert_ana_step(const lifting_step* s, const line_buf* sig, - const line_buf* other, const line_buf* aug, - ui32 repeat) + void gen_rev_vert_step(const lifting_step* s, const line_buf* sig, + const line_buf* other, const line_buf* aug, + ui32 repeat, bool synthesis) { const si32 a = s->rev.Aatk; const si32 b = s->rev.Batk; @@ -207,12 +193,35 @@ namespace ojph { si32* dst = aug->i32; const si32* src1 = sig->i32, * src2 = other->i32; - if (a >= 0) - for (ui32 i = repeat; i > 0; --i) - *dst++ += (b + a * (*src1++ + *src2++)) >> e; - else - for (ui32 i = repeat; i > 0; --i) - *dst++ -= (- b - a * (*src1++ + *src2++)) >> e; + // The general definition of the wavelet in Part 2 is slightly + // different to part 2, although they are mathematically equivalent + // here, we identify the simpler form from Part 1 and employ them + if (a == 1 && b == 2 && e == 2) + { // normal update + if (synthesis) + for (ui32 i = repeat; i > 0; --i) + *dst++ -= (b + (*src1++ + *src2++)) >> e; + else + for (ui32 i = repeat; i > 0; --i) + *dst++ += (b + (*src1++ + *src2++)) >> e; + } + else if (a == -1 && b == 1 && e == 1) + { // normal predict + if (synthesis) + for (ui32 i = repeat; i > 0; --i) + *dst++ += (*src1++ + *src2++) >> e; + else + for (ui32 i = repeat; i > 0; --i) + *dst++ -= (*src1++ + *src2++) >> e; + } + else { // general case + if (synthesis) + for (ui32 i = repeat; i > 0; --i) + *dst++ -= (b + a * (*src1++ + *src2++)) >> e; + else + for (ui32 i = repeat; i > 0; --i) + *dst++ += (b + a * (*src1++ + *src2++)) >> e; + } } ///////////////////////////////////////////////////////////////////////// @@ -258,12 +267,15 @@ namespace ojph { // lifting step const si32* sp = lp + (even ? 1 : 0); si32* dp = hp; - if (a >= 0) + if (a == 1 && b == 2 && e == 2) // normal update for (ui32 i = h_width; i > 0; --i, sp++, dp++) - *dp += (b + a * (sp[-1] + sp[0])) >> e; - else + *dp += (b + (sp[-1] + sp[0])) >> e; + else if (a == -1 && b == 1 && e == 1) // normal predict + for (ui32 i = h_width; i > 0; --i, sp++, dp++) + *dp -= (sp[-1] + sp[0]) >> e; + else // general case for (ui32 i = h_width; i > 0; --i, sp++, dp++) - *dp -= (- b - a * (sp[-1] + sp[0])) >> e; + *dp += (b + a * (sp[-1] + sp[0])) >> e; // swap buffers si32* t = lp; lp = hp; hp = t; @@ -279,25 +291,6 @@ namespace ojph { } } - ////////////////////////////////////////////////////////////////////////// - void gen_rev_vert_syn_step(const lifting_step* s, const line_buf* aug, - const line_buf* sig, const line_buf* other, - ui32 repeat) - { - const si32 a = s->rev.Aatk; - const si32 b = s->rev.Batk; - const ui32 e = s->rev.Eatk; - - si32* dst = aug->i32; - const si32* src1 = sig->i32, * src2 = other->i32; - if (a >= 0) - for (ui32 i = repeat; i > 0; --i) - *dst++ -= (b + a * (*src1++ + *src2++)) >> e; - else - for (ui32 i = repeat; i > 0; --i) - *dst++ += (- b - a * (*src1++ + *src2++)) >> e; - } - ////////////////////////////////////////////////////////////////////////// void gen_rev_horz_syn(const param_atk* atk, const line_buf* dst, const line_buf* lsrc, const line_buf* hsrc, @@ -323,12 +316,15 @@ namespace ojph { // lifting step const si32* sp = oth + (ev ? 0 : 1); si32* dp = aug; - if (a >= 0) + if (a == 1 && b == 2 && e == 2) // normal update for (ui32 i = aug_width; i > 0; --i, sp++, dp++) - *dp -= (b + a * (sp[-1] + sp[0])) >> e; - else + *dp -= (b + (sp[-1] + sp[0])) >> e; + else if (a == -1 && b == 1 && e == 1) // normal predict + for (ui32 i = aug_width; i > 0; --i, sp++, dp++) + *dp += (sp[-1] + sp[0]) >> e; + else // general case for (ui32 i = aug_width; i > 0; --i, sp++, dp++) - *dp += (- b - a * (sp[-1] + sp[0])) >> e; + *dp -= (b + a * (sp[-1] + sp[0])) >> e; // swap buffers si32* t = aug; aug = oth; oth = t; @@ -363,18 +359,29 @@ namespace ojph { } ////////////////////////////////////////////////////////////////////////// - void gen_irv_vert_ana_step(const lifting_step* s, const line_buf* sig, - const line_buf* other, const line_buf* aug, - ui32 repeat) + void gen_irv_vert_step(const lifting_step* s, const line_buf* sig, + const line_buf* other, const line_buf* aug, + ui32 repeat, bool synthesis) { float a = s->irv.Aatk; + if (synthesis) + a = -a; + float* dst = aug->f32; const float* src1 = sig->f32, * src2 = other->f32; for (ui32 i = repeat; i > 0; --i) *dst++ += a * (*src1++ + *src2++); } - + + ////////////////////////////////////////////////////////////////////////// + void gen_irv_vert_times_K(float K, const line_buf* aug, ui32 repeat) + { + float* dst = aug->f32; + for (ui32 i = repeat; i > 0; --i) + *dst++ *= K; + } + ///////////////////////////////////////////////////////////////////////// void gen_irv_horz_ana(const param_atk* atk, const line_buf* ldst, const line_buf* hdst, const line_buf* src, @@ -447,19 +454,6 @@ namespace ojph { } } - ////////////////////////////////////////////////////////////////////////// - void gen_irv_vert_syn_step(const lifting_step* s, const line_buf* aug, - const line_buf* sig, const line_buf* other, - ui32 repeat) - { - float a = s->irv.Aatk; - - float* dst = aug->f32; - const float* src1 = sig->f32, * src2 = other->f32; - for (ui32 i = repeat; i > 0; --i) - *dst++ -= a * (*src1++ + *src2++); - } - ////////////////////////////////////////////////////////////////////////// void gen_irv_horz_syn(const param_atk* atk, const line_buf* dst, const line_buf* lsrc, const line_buf* hsrc, @@ -527,14 +521,6 @@ namespace ojph { } } - ////////////////////////////////////////////////////////////////////////// - void gen_irv_vert_times_K(float K, const line_buf* aug, ui32 repeat) - { - float* dst = aug->f32; - for (ui32 i = repeat; i > 0; --i) - *dst++ *= K; - } - #endif // !OJPH_ENABLE_WASM_SIMD } diff --git a/src/core/transform/ojph_transform.h b/src/core/transform/ojph_transform.h index 1aae8b82..0e59632e 100644 --- a/src/core/transform/ojph_transform.h +++ b/src/core/transform/ojph_transform.h @@ -55,56 +55,43 @@ namespace ojph { ///////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////// - extern void (*rev_vert_ana_step) + extern void (*rev_vert_step) (const lifting_step* s, const line_buf* sig, const line_buf* other, - const line_buf* aug, ui32 repeat); + const line_buf* aug, ui32 repeat, bool synthesis); ///////////////////////////////////////////////////////////////////////// extern void (*rev_horz_ana) (const param_atk* atk, const line_buf* ldst, const line_buf* hdst, const line_buf* src, ui32 width, bool even); - ///////////////////////////////////////////////////////////////////////// - extern void (*rev_vert_syn_step) - (const lifting_step* s, const line_buf* aug, const line_buf* sig, - const line_buf* other, ui32 repeat); - ///////////////////////////////////////////////////////////////////////// extern void (*rev_horz_syn) (const param_atk* atk, const line_buf* dst, const line_buf* lsrc, const line_buf* hsrc, ui32 width, bool even); - - ///////////////////////////////////////////////////////////////////////// // Irreversible functions ///////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////// - extern void (*irv_vert_ana_step) + extern void (*irv_vert_step) (const lifting_step* s, const line_buf* sig, const line_buf* other, - const line_buf* aug, ui32 repeat); + const line_buf* aug, ui32 repeat, bool synthesis); + + ///////////////////////////////////////////////////////////////////////// + extern void (*irv_vert_times_K) + (float K, const line_buf* aug, ui32 repeat); ///////////////////////////////////////////////////////////////////////// extern void (*irv_horz_ana) (const param_atk* atk, const line_buf* ldst, const line_buf* hdst, const line_buf* src, ui32 width, bool even); - ///////////////////////////////////////////////////////////////////////// - extern void (*irv_vert_syn_step) - (const lifting_step* s, const line_buf* aug, const line_buf* sig, - const line_buf* other, ui32 repeat); - ///////////////////////////////////////////////////////////////////////// extern void (*irv_horz_syn) (const param_atk* atk, const line_buf* dst, const line_buf* lsrc, const line_buf* hsrc, ui32 width, bool even); - ///////////////////////////////////////////////////////////////////////// - extern void (*irv_vert_times_K) - (float K, const line_buf* aug, ui32 repeat); - - } } diff --git a/src/core/transform/ojph_transform_avx.cpp b/src/core/transform/ojph_transform_avx.cpp index 8499bf19..74f361ad 100644 --- a/src/core/transform/ojph_transform_avx.cpp +++ b/src/core/transform/ojph_transform_avx.cpp @@ -51,11 +51,15 @@ namespace ojph { namespace local { ////////////////////////////////////////////////////////////////////////// - void avx_irv_vert_ana_step(const lifting_step* s, const line_buf* sig, - const line_buf* other, const line_buf* aug, - ui32 repeat) + void avx_irv_vert_step(const lifting_step* s, const line_buf* sig, + const line_buf* other, const line_buf* aug, + ui32 repeat, bool synthesis) { - __m256 factor = _mm256_set1_ps(s->irv.Aatk); + float a = s->irv.Aatk; + if (synthesis) + a = -a; + + __m256 factor = _mm256_set1_ps(a); float* dst = aug->f32; const float* src1 = sig->f32, * src2 = other->f32; @@ -70,6 +74,19 @@ namespace ojph { } } + ////////////////////////////////////////////////////////////////////////// + void avx_irv_vert_times_K(float K, const line_buf* aug, ui32 repeat) + { + __m256 factor = _mm256_set1_ps(K); + float* dst = aug->f32; + int i = (int)repeat; + for (; i > 0; i -= 8, dst += 8) + { + __m256 s = _mm256_load_ps(dst); + _mm256_store_ps(dst, _mm256_mul_ps(factor, s)); + } + } + ///////////////////////////////////////////////////////////////////////// void avx_irv_horz_ana(const param_atk* atk, const line_buf* ldst, const line_buf* hdst, const line_buf* src, @@ -215,26 +232,6 @@ namespace ojph { } } - ////////////////////////////////////////////////////////////////////////// - void avx_irv_vert_syn_step(const lifting_step* s, const line_buf* aug, - const line_buf* sig, const line_buf* other, - ui32 repeat) - { - __m256 factor = _mm256_set1_ps(s->irv.Aatk); - - float* dst = aug->f32; - const float* src1 = sig->f32, * src2 = other->f32; - int i = (int)repeat; - for ( ; i > 0; i -= 8, dst += 8, src1 += 8, src2 += 8) - { - __m256 s1 = _mm256_load_ps(src1); - __m256 s2 = _mm256_load_ps(src2); - __m256 d = _mm256_load_ps(dst); - d = _mm256_sub_ps(d, _mm256_mul_ps(factor, _mm256_add_ps(s1, s2))); - _mm256_store_ps(dst, d); - } - } - ////////////////////////////////////////////////////////////////////////// void avx_irv_horz_syn(const param_atk* atk, const line_buf* dst, const line_buf* lsrc, const line_buf* hsrc, @@ -381,19 +378,5 @@ namespace ojph { } } - ////////////////////////////////////////////////////////////////////////// - void avx_irv_vert_times_K(float K, const line_buf* aug, ui32 repeat) - { - __m256 factor = _mm256_set1_ps(K); - float* dst = aug->f32; - int i = (int)repeat; - for ( ; i > 0; i -= 8, dst += 8 ) - { - __m256 s = _mm256_load_ps(dst); - _mm256_store_ps(dst, _mm256_mul_ps(factor, s)); - } - } - - } // !local } // !ojph diff --git a/src/core/transform/ojph_transform_local.h b/src/core/transform/ojph_transform_local.h index 816e9e8b..fe7d1f27 100644 --- a/src/core/transform/ojph_transform_local.h +++ b/src/core/transform/ojph_transform_local.h @@ -55,56 +55,46 @@ namespace ojph { ////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////// - // Reversible functions + // Irreversible functions ////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////// - void gen_rev_vert_ana_step(const lifting_step* s, const line_buf* sig, - const line_buf* other, const line_buf* aug, - ui32 repeat); + void gen_irv_vert_step(const lifting_step* s, const line_buf* sig, + const line_buf* other, const line_buf* aug, + ui32 repeat, bool synthesis); ///////////////////////////////////////////////////////////////////////// - void gen_rev_horz_ana(const param_atk* atk, const line_buf* ldst, - const line_buf* hdst, const line_buf* src, - ui32 width, bool even); + void gen_irv_vert_times_K(float K, const line_buf* aug, ui32 repeat); ///////////////////////////////////////////////////////////////////////// - void gen_rev_vert_syn_step(const lifting_step* s, const line_buf* aug, - const line_buf* sig, const line_buf* other, - ui32 repeat); + void gen_irv_horz_ana(const param_atk* atk, const line_buf* ldst, + const line_buf* hdst, const line_buf* src, + ui32 width, bool even); ///////////////////////////////////////////////////////////////////////// - void gen_rev_horz_syn(const param_atk* atk, const line_buf* dst, - const line_buf* lsrc, const line_buf* hsrc, + void gen_irv_horz_syn(const param_atk *atk, const line_buf* dst, + const line_buf *lsrc, const line_buf *hsrc, ui32 width, bool even); ////////////////////////////////////////////////////////////////////////// - // Irreversible functions + // Reversible functions ////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////// - void gen_irv_vert_ana_step(const lifting_step* s, const line_buf* sig, - const line_buf* other, const line_buf* aug, - ui32 repeat); + void gen_rev_vert_step(const lifting_step* s, const line_buf* sig, + const line_buf* other, const line_buf* aug, + ui32 repeat, bool synthesis); ///////////////////////////////////////////////////////////////////////// - void gen_irv_horz_ana(const param_atk* atk, const line_buf* ldst, + void gen_rev_horz_ana(const param_atk* atk, const line_buf* ldst, const line_buf* hdst, const line_buf* src, ui32 width, bool even); ///////////////////////////////////////////////////////////////////////// - void gen_irv_vert_syn_step(const lifting_step* s, const line_buf* aug, - const line_buf* sig, const line_buf* other, - ui32 repeat); - - ///////////////////////////////////////////////////////////////////////// - void gen_irv_horz_syn(const param_atk *atk, const line_buf* dst, - const line_buf *lsrc, const line_buf *hsrc, + void gen_rev_horz_syn(const param_atk* atk, const line_buf* dst, + const line_buf* lsrc, const line_buf* hsrc, ui32 width, bool even); - ///////////////////////////////////////////////////////////////////////// - void gen_irv_vert_times_K(float K, const line_buf* aug, ui32 repeat); - ////////////////////////////////////////////////////////////////////////// // // @@ -118,28 +108,23 @@ namespace ojph { ////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////// - void sse_irv_vert_ana_step(const lifting_step* s, const line_buf* sig, - const line_buf* other, const line_buf* aug, - ui32 repeat); + void sse_irv_vert_step(const lifting_step* s, const line_buf* sig, + const line_buf* other, const line_buf* aug, + ui32 repeat, bool synthesis); + + ///////////////////////////////////////////////////////////////////////// + void sse_irv_vert_times_K(float K, const line_buf* aug, ui32 repeat); ///////////////////////////////////////////////////////////////////////// void sse_irv_horz_ana(const param_atk* atk, const line_buf* ldst, const line_buf* hdst, const line_buf* src, ui32 width, bool even); - ///////////////////////////////////////////////////////////////////////// - void sse_irv_vert_syn_step(const lifting_step* s, const line_buf* aug, - const line_buf* sig, const line_buf* other, - ui32 repeat); - ///////////////////////////////////////////////////////////////////////// void sse_irv_horz_syn(const param_atk *atk, const line_buf* dst, const line_buf *lsrc, const line_buf *hsrc, ui32 width, bool even); - ///////////////////////////////////////////////////////////////////////// - void sse_irv_vert_times_K(float K, const line_buf* aug, ui32 repeat); - ////////////////////////////////////////////////////////////////////////// // // @@ -153,20 +138,15 @@ namespace ojph { ////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////// - void sse2_rev_vert_ana_step(const lifting_step* s, const line_buf* sig, - const line_buf* other, const line_buf* aug, - ui32 repeat); + void sse2_rev_vert_step(const lifting_step* s, const line_buf* sig, + const line_buf* other, const line_buf* aug, + ui32 repeat, bool synthesis); ///////////////////////////////////////////////////////////////////////// void sse2_rev_horz_ana(const param_atk* atk, const line_buf* ldst, const line_buf* hdst, const line_buf* src, ui32 width, bool even); - ///////////////////////////////////////////////////////////////////////// - void sse2_rev_vert_syn_step(const lifting_step* s, const line_buf* aug, - const line_buf* sig, const line_buf* other, - ui32 repeat); - ///////////////////////////////////////////////////////////////////////// void sse2_rev_horz_syn(const param_atk* atk, const line_buf* dst, const line_buf* lsrc, const line_buf* hsrc, @@ -186,28 +166,23 @@ namespace ojph { ////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////// - void avx_irv_vert_ana_step(const lifting_step* s, const line_buf* sig, - const line_buf* other, const line_buf* aug, - ui32 repeat); + void avx_irv_vert_step(const lifting_step* s, const line_buf* sig, + const line_buf* other, const line_buf* aug, + ui32 repeat, bool synthesis); + + ///////////////////////////////////////////////////////////////////////// + void avx_irv_vert_times_K(float K, const line_buf* aug, ui32 repeat); ///////////////////////////////////////////////////////////////////////// void avx_irv_horz_ana(const param_atk* atk, const line_buf* ldst, const line_buf* hdst, const line_buf* src, ui32 width, bool even); - ///////////////////////////////////////////////////////////////////////// - void avx_irv_vert_syn_step(const lifting_step* s, const line_buf* aug, - const line_buf* sig, const line_buf* other, - ui32 repeat); - ///////////////////////////////////////////////////////////////////////// void avx_irv_horz_syn(const param_atk *atk, const line_buf* dst, const line_buf *lsrc, const line_buf *hsrc, ui32 width, bool even); - ///////////////////////////////////////////////////////////////////////// - void avx_irv_vert_times_K(float K, const line_buf* aug, ui32 repeat); - ////////////////////////////////////////////////////////////////////////// // // @@ -221,20 +196,15 @@ namespace ojph { ////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////// - void avx2_rev_vert_ana_step(const lifting_step* s, const line_buf* sig, - const line_buf* other, const line_buf* aug, - ui32 repeat); + void avx2_rev_vert_step(const lifting_step* s, const line_buf* sig, + const line_buf* other, const line_buf* aug, + ui32 repeat, bool synthesis); ///////////////////////////////////////////////////////////////////////// void avx2_rev_horz_ana(const param_atk* atk, const line_buf* ldst, const line_buf* hdst, const line_buf* src, ui32 width, bool even); - ///////////////////////////////////////////////////////////////////////// - void avx2_rev_vert_syn_step(const lifting_step* s, const line_buf* aug, - const line_buf* sig, const line_buf* other, - ui32 repeat); - ///////////////////////////////////////////////////////////////////////// void avx2_rev_horz_syn(const param_atk* atk, const line_buf* dst, const line_buf* lsrc, const line_buf* hsrc, @@ -253,48 +223,38 @@ namespace ojph { ////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////// - void avx512_irv_vert_ana_step(const lifting_step* s, const line_buf* sig, - const line_buf* other, const line_buf* aug, - ui32 repeat); + void avx512_irv_vert_step(const lifting_step* s, const line_buf* sig, + const line_buf* other, const line_buf* aug, + ui32 repeat, bool synthesis); + + ///////////////////////////////////////////////////////////////////////// + void avx512_irv_vert_times_K(float K, const line_buf* aug, ui32 repeat); ///////////////////////////////////////////////////////////////////////// void avx512_irv_horz_ana(const param_atk* atk, const line_buf* ldst, const line_buf* hdst, const line_buf* src, ui32 width, bool even); - ///////////////////////////////////////////////////////////////////////// - void avx512_irv_vert_syn_step(const lifting_step* s, const line_buf* aug, - const line_buf* sig, const line_buf* other, - ui32 repeat); - ///////////////////////////////////////////////////////////////////////// void avx512_irv_horz_syn(const param_atk *atk, const line_buf* dst, const line_buf *lsrc, const line_buf *hsrc, ui32 width, bool even); - ///////////////////////////////////////////////////////////////////////// - void avx512_irv_vert_times_K(float K, const line_buf* aug, ui32 repeat); - ////////////////////////////////////////////////////////////////////////// // Reversible functions ////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////// - void avx512_rev_vert_ana_step(const lifting_step* s, const line_buf* sig, - const line_buf* other, const line_buf* aug, - ui32 repeat); + void avx512_rev_vert_step(const lifting_step* s, const line_buf* sig, + const line_buf* other, const line_buf* aug, + ui32 repeat, bool synthesis); ///////////////////////////////////////////////////////////////////////// void avx512_rev_horz_ana(const param_atk* atk, const line_buf* ldst, const line_buf* hdst, const line_buf* src, ui32 width, bool even); - ///////////////////////////////////////////////////////////////////////// - void avx512_rev_vert_syn_step(const lifting_step* s, const line_buf* aug, - const line_buf* sig, const line_buf* other, - ui32 repeat); - ///////////////////////////////////////////////////////////////////////// void avx512_rev_horz_syn(const param_atk* atk, const line_buf* dst, const line_buf* lsrc, const line_buf* hsrc, @@ -309,55 +269,45 @@ namespace ojph { ////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////// - // Reversible functions + // Irreversible functions ////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////// - void wasm_rev_vert_ana_step(const lifting_step* s, const line_buf* sig, - const line_buf* other, const line_buf* aug, - ui32 repeat); + void wasm_irv_vert_step(const lifting_step* s, const line_buf* sig, + const line_buf* other, const line_buf* aug, + ui32 repeat, bool synthesis); ///////////////////////////////////////////////////////////////////////// - void wasm_rev_horz_ana(const param_atk* atk, const line_buf* ldst, - const line_buf* hdst, const line_buf* src, - ui32 width, bool even); + void wasm_irv_vert_times_K(float K, const line_buf* aug, ui32 repeat); ///////////////////////////////////////////////////////////////////////// - void wasm_rev_vert_syn_step(const lifting_step* s, const line_buf* aug, - const line_buf* sig, const line_buf* other, - ui32 repeat); + void wasm_irv_horz_ana(const param_atk* atk, const line_buf* ldst, + const line_buf* hdst, const line_buf* src, + ui32 width, bool even); ///////////////////////////////////////////////////////////////////////// - void wasm_rev_horz_syn(const param_atk* atk, const line_buf* dst, - const line_buf* lsrc, const line_buf* hsrc, + void wasm_irv_horz_syn(const param_atk *atk, const line_buf* dst, + const line_buf *lsrc, const line_buf *hsrc, ui32 width, bool even); ////////////////////////////////////////////////////////////////////////// - // Irreversible functions + // Reversible functions ////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////// - void wasm_irv_vert_ana_step(const lifting_step* s, const line_buf* sig, - const line_buf* other, const line_buf* aug, - ui32 repeat); + void wasm_rev_vert_step(const lifting_step* s, const line_buf* sig, + const line_buf* other, const line_buf* aug, + ui32 repeat, bool synthesis); ///////////////////////////////////////////////////////////////////////// - void wasm_irv_horz_ana(const param_atk* atk, const line_buf* ldst, + void wasm_rev_horz_ana(const param_atk* atk, const line_buf* ldst, const line_buf* hdst, const line_buf* src, ui32 width, bool even); ///////////////////////////////////////////////////////////////////////// - void wasm_irv_vert_syn_step(const lifting_step* s, const line_buf* aug, - const line_buf* sig, const line_buf* other, - ui32 repeat); - - ///////////////////////////////////////////////////////////////////////// - void wasm_irv_horz_syn(const param_atk *atk, const line_buf* dst, - const line_buf *lsrc, const line_buf *hsrc, + void wasm_rev_horz_syn(const param_atk* atk, const line_buf* dst, + const line_buf* lsrc, const line_buf* hsrc, ui32 width, bool even); - - ///////////////////////////////////////////////////////////////////////// - void wasm_irv_vert_times_K(float K, const line_buf* aug, ui32 repeat); } } diff --git a/src/core/transform/ojph_transform_sse.cpp b/src/core/transform/ojph_transform_sse.cpp index 69907841..b61ea5e9 100644 --- a/src/core/transform/ojph_transform_sse.cpp +++ b/src/core/transform/ojph_transform_sse.cpp @@ -51,11 +51,15 @@ namespace ojph { namespace local { ////////////////////////////////////////////////////////////////////////// - void sse_irv_vert_ana_step(const lifting_step* s, const line_buf* sig, - const line_buf* other, const line_buf* aug, - ui32 repeat) + void sse_irv_vert_step(const lifting_step* s, const line_buf* sig, + const line_buf* other, const line_buf* aug, + ui32 repeat, bool synthesis) { - __m128 factor = _mm_set1_ps(s->irv.Aatk); + float a = s->irv.Aatk; + if (synthesis) + a = -a; + + __m128 factor = _mm_set1_ps(a); float* dst = aug->f32; const float* src1 = sig->f32, * src2 = other->f32; @@ -70,6 +74,19 @@ namespace ojph { } } + ////////////////////////////////////////////////////////////////////////// + void sse_irv_vert_times_K(float K, const line_buf* aug, ui32 repeat) + { + __m128 factor = _mm_set1_ps(K); + float* dst = aug->f32; + int i = (int)repeat; + for (; i > 0; i -= 4, dst += 4) + { + __m128 s = _mm_load_ps(dst); + _mm_store_ps(dst, _mm_mul_ps(factor, s)); + } + } + ///////////////////////////////////////////////////////////////////////// void sse_irv_horz_ana(const param_atk* atk, const line_buf* ldst, const line_buf* hdst, const line_buf* src, @@ -195,26 +212,6 @@ namespace ojph { } } - ////////////////////////////////////////////////////////////////////////// - void sse_irv_vert_syn_step(const lifting_step* s, const line_buf* aug, - const line_buf* sig, const line_buf* other, - ui32 repeat) - { - __m128 factor = _mm_set1_ps(s->irv.Aatk); - - float* dst = aug->f32; - const float* src1 = sig->f32, * src2 = other->f32; - int i = (int)repeat; - for ( ; i > 0; i -= 4, dst += 4, src1 += 4, src2 += 4) - { - __m128 s1 = _mm_load_ps(src1); - __m128 s2 = _mm_load_ps(src2); - __m128 d = _mm_load_ps(dst); - d = _mm_sub_ps(d, _mm_mul_ps(factor, _mm_add_ps(s1, s2))); - _mm_store_ps(dst, d); - } - } - ////////////////////////////////////////////////////////////////////////// void sse_irv_horz_syn(const param_atk* atk, const line_buf* dst, const line_buf* lsrc, const line_buf* hsrc, @@ -339,18 +336,5 @@ namespace ojph { } } - ////////////////////////////////////////////////////////////////////////// - void sse_irv_vert_times_K(float K, const line_buf* aug, ui32 repeat) - { - __m128 factor = _mm_set1_ps(K); - float* dst = aug->f32; - int i = (int)repeat; - for ( ; i > 0; i -= 4, dst += 4) - { - __m128 s = _mm_load_ps(dst); - _mm_store_ps(dst, _mm_mul_ps(factor, s)); - } - } - } // !local } // !ojph From 03ef77acbcc04da174b03d9312987b45b4c92e8c Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Fri, 12 Apr 2024 21:31:14 +1000 Subject: [PATCH 064/348] Completed sse, sse2, avx, avx2. Still wasm and avx512. --- src/core/transform/ojph_transform.cpp | 89 +-- src/core/transform/ojph_transform_avx.cpp | 168 +----- src/core/transform/ojph_transform_avx2.cpp | 617 +++++++++++++++------ src/core/transform/ojph_transform_local.h | 165 ++++++ src/core/transform/ojph_transform_sse.cpp | 135 +---- src/core/transform/ojph_transform_sse2.cpp | 554 ++++++++++++------ 6 files changed, 1086 insertions(+), 642 deletions(-) diff --git a/src/core/transform/ojph_transform.cpp b/src/core/transform/ojph_transform.cpp index 2a219bca..95ab686c 100644 --- a/src/core/transform/ojph_transform.cpp +++ b/src/core/transform/ojph_transform.cpp @@ -112,6 +112,7 @@ namespace ojph { irv_horz_syn = gen_irv_horz_syn; #ifndef OJPH_DISABLE_INTEL_SIMD + int level = get_cpu_ext_level(); if (level >= X86_CPU_EXT_LEVEL_SSE) @@ -122,13 +123,12 @@ namespace ojph { irv_horz_syn = sse_irv_horz_syn; } - //if (level >= X86_CPU_EXT_LEVEL_SSE2) - //{ - // rev_vert_ana_step = sse2_rev_vert_ana_step; - // rev_horz_ana = sse2_rev_horz_ana; - // rev_vert_syn_step = sse2_rev_vert_syn_step; - // rev_horz_syn = sse2_rev_horz_syn; - //} + if (level >= X86_CPU_EXT_LEVEL_SSE2) + { + rev_vert_step = sse2_rev_vert_step; + rev_horz_ana = sse2_rev_horz_ana; + rev_horz_syn = sse2_rev_horz_syn; + } if (level >= X86_CPU_EXT_LEVEL_AVX) { @@ -138,26 +138,23 @@ namespace ojph { irv_horz_syn = avx_irv_horz_syn; } - //if (level >= X86_CPU_EXT_LEVEL_AVX2) - //{ - // rev_vert_ana_step = avx2_rev_vert_ana_step; - // rev_horz_ana = avx2_rev_horz_ana; - // rev_vert_syn_step = avx2_rev_vert_syn_step; - // rev_horz_syn = avx2_rev_horz_syn; - //} + if (level >= X86_CPU_EXT_LEVEL_AVX2) + { + rev_vert_step = avx2_rev_vert_step; + rev_horz_ana = avx2_rev_horz_ana; + rev_horz_syn = avx2_rev_horz_syn; + } //if (level >= X86_CPU_EXT_LEVEL_AVX512) //{ - // rev_vert_ana_step = avx512_rev_vert_ana_step; + // rev_vert_step = avx512_rev_vert_ana_step; // rev_horz_ana = avx512_rev_horz_ana; - // rev_vert_syn_step = avx512_rev_vert_syn_step; // rev_horz_syn = avx512_rev_horz_syn; - // irv_vert_ana_step = avx512_irv_vert_ana_step; - // irv_horz_ana = avx512_irv_horz_ana; + // irv_vert_step = avx512_irv_vert_step; + // irv_vert_times_K = avx512_irv_vert_times_K; // irv_vert_syn_step = avx512_irv_vert_syn_step; // irv_horz_syn = avx512_irv_horz_syn; - // irv_vert_times_K = avx512_irv_vert_times_K; //} #endif // !OJPH_DISABLE_INTEL_SIMD @@ -196,17 +193,17 @@ namespace ojph { // The general definition of the wavelet in Part 2 is slightly // different to part 2, although they are mathematically equivalent // here, we identify the simpler form from Part 1 and employ them - if (a == 1 && b == 2 && e == 2) - { // normal update + if (a == 1) + { // 5/3 update and any case with a == 1 if (synthesis) for (ui32 i = repeat; i > 0; --i) - *dst++ -= (b + (*src1++ + *src2++)) >> e; + *dst++ -= (b + *src1++ + *src2++) >> e; else for (ui32 i = repeat; i > 0; --i) - *dst++ += (b + (*src1++ + *src2++)) >> e; + *dst++ += (b + *src1++ + *src2++) >> e; } else if (a == -1 && b == 1 && e == 1) - { // normal predict + { // 5/3 predict if (synthesis) for (ui32 i = repeat; i > 0; --i) *dst++ += (*src1++ + *src2++) >> e; @@ -214,6 +211,15 @@ namespace ojph { for (ui32 i = repeat; i > 0; --i) *dst++ -= (*src1++ + *src2++) >> e; } + else if (a == -1) + { // any case with a == -1, which is not 5/3 predict + if (synthesis) + for (ui32 i = repeat; i > 0; --i) + *dst++ -= (b - (*src1++ + *src2++)) >> e; + else + for (ui32 i = repeat; i > 0; --i) + *dst++ += (b - (*src1++ + *src2++)) >> e; + } else { // general case if (synthesis) for (ui32 i = repeat; i > 0; --i) @@ -267,15 +273,26 @@ namespace ojph { // lifting step const si32* sp = lp + (even ? 1 : 0); si32* dp = hp; - if (a == 1 && b == 2 && e == 2) // normal update + if (a == 1) + { // 5/3 update and any case with a == 1 for (ui32 i = h_width; i > 0; --i, sp++, dp++) *dp += (b + (sp[-1] + sp[0])) >> e; - else if (a == -1 && b == 1 && e == 1) // normal predict + } + else if (a == -1 && b == 1 && e == 1) + { // 5/3 predict for (ui32 i = h_width; i > 0; --i, sp++, dp++) *dp -= (sp[-1] + sp[0]) >> e; - else // general case + } + else if (a == -1) + { // any case with a == -1, which is not 5/3 predict + for (ui32 i = h_width; i > 0; --i, sp++, dp++) + *dp += (b - (sp[-1] + sp[0])) >> e; + } + else { + // general case for (ui32 i = h_width; i > 0; --i, sp++, dp++) *dp += (b + a * (sp[-1] + sp[0])) >> e; + } // swap buffers si32* t = lp; lp = hp; hp = t; @@ -316,15 +333,26 @@ namespace ojph { // lifting step const si32* sp = oth + (ev ? 0 : 1); si32* dp = aug; - if (a == 1 && b == 2 && e == 2) // normal update + if (a == 1) + { // 5/3 update and any case with a == 1 for (ui32 i = aug_width; i > 0; --i, sp++, dp++) *dp -= (b + (sp[-1] + sp[0])) >> e; - else if (a == -1 && b == 1 && e == 1) // normal predict + } + else if (a == -1 && b == 1 && e == 1) + { // 5/3 predict for (ui32 i = aug_width; i > 0; --i, sp++, dp++) *dp += (sp[-1] + sp[0]) >> e; - else // general case + } + else if (a == -1) + { // any case with a == -1, which is not 5/3 predict + for (ui32 i = aug_width; i > 0; --i, sp++, dp++) + *dp -= (b - (sp[-1] + sp[0])) >> e; + } + else { + // general case for (ui32 i = aug_width; i > 0; --i, sp++, dp++) *dp -= (b + a * (sp[-1] + sp[0])) >> e; + } // swap buffers si32* t = aug; aug = oth; oth = t; @@ -413,7 +441,6 @@ namespace ojph { ui32 num_steps = atk->get_num_steps(); for (ui32 j = num_steps; j > 0; --j) { - // first lifting step const lifting_step* s = atk->get_step(j - 1); const float a = s->irv.Aatk; diff --git a/src/core/transform/ojph_transform_avx.cpp b/src/core/transform/ojph_transform_avx.cpp index 74f361ad..e7933ff1 100644 --- a/src/core/transform/ojph_transform_avx.cpp +++ b/src/core/transform/ojph_transform_avx.cpp @@ -41,15 +41,26 @@ #include "ojph_defs.h" #include "ojph_arch.h" #include "ojph_mem.h" -#include "ojph_transform.h" -#include "ojph_transform_local.h" - #include "ojph_params.h" #include "../codestream/ojph_params_local.h" +#include "ojph_transform.h" +#include "ojph_transform_local.h" + namespace ojph { namespace local { + ////////////////////////////////////////////////////////////////////////// + static inline void avx_multiply_const(float* p, float f, int width) + { + __m256 factor = _mm256_set1_ps(f); + for (; width > 0; width -= 8, p += 8) + { + __m256 s = _mm256_load_ps(p); + _mm256_store_ps(p, _mm256_mul_ps(factor, s)); + } + } + ////////////////////////////////////////////////////////////////////////// void avx_irv_vert_step(const lifting_step* s, const line_buf* sig, const line_buf* other, const line_buf* aug, @@ -95,59 +106,12 @@ namespace ojph { if (width > 1) { // split src into ldst and hdst - if (even) { - float* dph = hdst->f32; float* dpl = ldst->f32; - float* sp = src->f32; - int i = (int)width; - for ( ; i > 8; i -= 16, sp += 16, dpl += 8, dph += 8) - { - __m256 a = _mm256_load_ps(sp); - __m256 b = _mm256_load_ps(sp + 8); - __m256 c = _mm256_permute2f128_ps(a, b, (2 << 4) | (0)); - __m256 d = _mm256_permute2f128_ps(a, b, (3 << 4) | (1)); - __m256 e = _mm256_shuffle_ps(c, d, _MM_SHUFFLE(2, 0, 2, 0)); - __m256 f = _mm256_shuffle_ps(c, d, _MM_SHUFFLE(3, 1, 3, 1)); - _mm256_store_ps(dpl, e); - _mm256_store_ps(dph, f); - } - for (; i > 0; i -= 8, sp += 8, dpl += 4, dph += 4) - { - __m128 a = _mm_load_ps(sp); - __m128 b = _mm_load_ps(sp + 4); - __m128 c = _mm_shuffle_ps(a, b, _MM_SHUFFLE(2, 0, 2, 0)); - __m128 d = _mm_shuffle_ps(a, b, _MM_SHUFFLE(3, 1, 3, 1)); - _mm_store_ps(dpl, c); - _mm_store_ps(dph, d); - } - } - else - { float* dph = hdst->f32; - float* dpl = ldst->f32; float* sp = src->f32; - int i = (int)width; - for ( ; i > 8; i -= 16, sp += 16, dpl += 8, dph += 8) - { - __m256 a = _mm256_load_ps(sp); - __m256 b = _mm256_load_ps(sp + 8); - __m256 c = _mm256_permute2f128_ps(a, b, (2 << 4) | (0)); - __m256 d = _mm256_permute2f128_ps(a, b, (3 << 4) | (1)); - __m256 e = _mm256_shuffle_ps(c, d, _MM_SHUFFLE(2, 0, 2, 0)); - __m256 f = _mm256_shuffle_ps(c, d, _MM_SHUFFLE(3, 1, 3, 1)); - _mm256_store_ps(dpl, f); - _mm256_store_ps(dph, e); - } - for (; i > 0; i -= 8, sp += 8, dpl += 4, dph += 4) - { - __m128 a = _mm_load_ps(sp); - __m128 b = _mm_load_ps(sp + 4); - __m128 c = _mm_shuffle_ps(a, b, _MM_SHUFFLE(2, 0, 2, 0)); - __m128 d = _mm_shuffle_ps(a, b, _MM_SHUFFLE(3, 1, 3, 1)); - _mm_store_ps(dpl, d); - _mm_store_ps(dph, c); - } + int w = (int)width; + AVX_DEINTERLEAVE(dpl, dph, sp, w, even); } // the actual horizontal transform @@ -157,7 +121,6 @@ namespace ojph { ui32 num_steps = atk->get_num_steps(); for (ui32 j = num_steps; j > 0; --j) { - // first lifting step const lifting_step* s = atk->get_step(j - 1); const float a = s->irv.Aatk; @@ -201,27 +164,8 @@ namespace ojph { { // multiply by K or 1/K float K = atk->get_K(); float K_inv = 1.0f / K; - float* dp; - int i; - __m256 factor; - - factor = _mm256_set1_ps(K_inv); - dp = lp; - i = (int)l_width; - for ( ; i > 0; i -= 8, dp += 8) - { - __m256 s = _mm256_load_ps(dp); - _mm256_store_ps(dp, _mm256_mul_ps(factor, s)); - } - - factor = _mm256_set1_ps(K); - dp = hp; - i = (int)h_width; - for ( ; i > 0; i -= 8, dp += 8) - { - __m256 s = _mm256_load_ps(dp); - _mm256_store_ps(dp, _mm256_mul_ps(factor, s)); - } + avx_multiply_const(lp, K_inv, (int)l_width); + avx_multiply_const(hp, K, (int)h_width); } } else { @@ -247,27 +191,8 @@ namespace ojph { { // multiply by K or 1/K float K = atk->get_K(); float K_inv = 1.0f / K; - float* dp; - int i; - __m256 factor; - - factor = _mm256_set1_ps(K); - dp = aug; - i = (int)aug_width; - for ( ; i > 0; i -= 8, dp += 8) - { - __m256 s = _mm256_load_ps(dp); - _mm256_store_ps(dp, _mm256_mul_ps(factor, s)); - } - - factor = _mm256_set1_ps(K_inv); - dp = oth; - i = (int)oth_width; - for ( ; i > 0; i -= 8, dp += 8) - { - __m256 s = _mm256_load_ps(dp); - _mm256_store_ps(dp, _mm256_mul_ps(factor, s)); - } + avx_multiply_const(aug, K, (int)aug_width); + avx_multiply_const(oth, K_inv, (int)oth_width); } // the actual horizontal transform @@ -315,59 +240,12 @@ namespace ojph { } // combine both lsrc and hsrc into dst - if (even) { - float* sph = hsrc->f32; - float* spl = lsrc->f32; float* dp = dst->f32; - int i = (int)width; - for ( ; i > 8; i -= 16, dp += 16, spl += 8, sph += 8) - { - __m256 a = _mm256_load_ps(spl); - __m256 b = _mm256_load_ps(sph); - __m256 c = _mm256_unpacklo_ps(a, b); - __m256 d = _mm256_unpackhi_ps(a, b); - __m256 e = _mm256_permute2f128_ps(c, d, (2 << 4) | (0)); - __m256 f = _mm256_permute2f128_ps(c, d, (3 << 4) | (1)); - _mm256_store_ps(dp, e); - _mm256_store_ps(dp + 8, f); - } - for (; i > 0; i -= 8, dp += 8, spl += 4, sph += 4) - { - __m128 a = _mm_load_ps(spl); - __m128 b = _mm_load_ps(sph); - __m128 c = _mm_unpacklo_ps(a, b); - __m128 d = _mm_unpackhi_ps(a, b); - _mm_store_ps(dp, c); - _mm_store_ps(dp + 4, d); - } - } - else - { - float* sph = hsrc->f32; float* spl = lsrc->f32; - float* dp = dst->f32; - int i = (int)width; - for (; i > 8; i -= 16, dp += 16, spl += 8, sph += 8) - { // i>=8 because we can exceed the aligned buffer by up to 7 - __m256 a = _mm256_load_ps(spl); - __m256 b = _mm256_load_ps(sph); - __m256 c = _mm256_unpacklo_ps(b, a); - __m256 d = _mm256_unpackhi_ps(b, a); - __m256 e = _mm256_permute2f128_ps(c, d, (2 << 4) | (0)); - __m256 f = _mm256_permute2f128_ps(c, d, (3 << 4) | (1)); - _mm256_store_ps(dp, e); - _mm256_store_ps(dp + 8, f); - } - for (; i > 0; i -= 8, dp += 8, spl += 4, sph += 4) - { - __m128 a = _mm_load_ps(spl); - __m128 b = _mm_load_ps(sph); - __m128 c = _mm_unpacklo_ps(b, a); - __m128 d = _mm_unpackhi_ps(b, a); - _mm_store_ps(dp, c); - _mm_store_ps(dp + 4, d); - } + float* sph = hsrc->f32; + int w = (int)width; + AVX_INTERLEAVE(dp, spl, sph, w, even); } } else { diff --git a/src/core/transform/ojph_transform_avx2.cpp b/src/core/transform/ojph_transform_avx2.cpp index 915e246c..a7b16ddb 100644 --- a/src/core/transform/ojph_transform_avx2.cpp +++ b/src/core/transform/ojph_transform_avx2.cpp @@ -40,6 +40,9 @@ #include "ojph_defs.h" #include "ojph_arch.h" #include "ojph_mem.h" +#include "ojph_params.h" +#include "../codestream/ojph_params_local.h" + #include "ojph_transform.h" #include "ojph_transform_local.h" @@ -48,218 +51,470 @@ namespace ojph { namespace local { - ////////////////////////////////////////////////////////////////////////// - void avx2_rev_vert_wvlt_fwd_predict(const line_buf* line_src1, - const line_buf* line_src2, - line_buf *line_dst, ui32 repeat) + ///////////////////////////////////////////////////////////////////////// + void avx2_rev_vert_step(const lifting_step* s, const line_buf* sig, + const line_buf* other, const line_buf* aug, + ui32 repeat, bool synthesis) { - si32 *dst = line_dst->i32; - const si32 *src1 = line_src1->i32, *src2 = line_src2->i32; + const si32 a = s->rev.Aatk; + const si32 b = s->rev.Batk; + const ui32 e = s->rev.Eatk; + __m256i va = _mm256_set1_epi32(a); + __m256i vb = _mm256_set1_epi32(b); - for (ui32 i = (repeat + 7) >> 3; i > 0; --i, dst+=8, src1+=8, src2+=8) - { - __m256i s1 = _mm256_load_si256((__m256i*)src1); - __m256i s2 = _mm256_load_si256((__m256i*)src2); - __m256i d = _mm256_load_si256((__m256i*)dst); - s1 = _mm256_srai_epi32(_mm256_add_epi32(s1, s2), 1); - d = _mm256_sub_epi32(d, s1); - _mm256_store_si256((__m256i*)dst, d); + si32* dst = aug->i32; + const si32* src1 = sig->i32, * src2 = other->i32; + // The general definition of the wavelet in Part 2 is slightly + // different to part 2, although they are mathematically equivalent + // here, we identify the simpler form from Part 1 and employ them + if (a == 1) + { // 5/3 update and any case with a == 1 + int i = (int)repeat; + if (synthesis) + for (; i > 0; i -= 8, dst += 8, src1 += 8, src2 += 8) + { + __m256i s1 = _mm256_load_si256((__m256i*)src1); + __m256i s2 = _mm256_load_si256((__m256i*)src2); + __m256i d = _mm256_load_si256((__m256i*)dst); + __m256i t = _mm256_add_epi32(s1, s2); + __m256i v = _mm256_add_epi32(vb, t); + __m256i w = _mm256_srai_epi32(v, e); + d = _mm256_sub_epi32(d, w); + _mm256_store_si256((__m256i*)dst, d); + } + else + for (; i > 0; i -= 8, dst += 8, src1 += 8, src2 += 8) + { + __m256i s1 = _mm256_load_si256((__m256i*)src1); + __m256i s2 = _mm256_load_si256((__m256i*)src2); + __m256i d = _mm256_load_si256((__m256i*)dst); + __m256i t = _mm256_add_epi32(s1, s2); + __m256i v = _mm256_add_epi32(vb, t); + __m256i w = _mm256_srai_epi32(v, e); + d = _mm256_add_epi32(d, w); + _mm256_store_si256((__m256i*)dst, d); + } } - } - - ////////////////////////////////////////////////////////////////////////// - void avx2_rev_vert_wvlt_fwd_update(const line_buf* line_src1, - const line_buf* line_src2, - line_buf *line_dst, ui32 repeat) - { - si32 *dst = line_dst->i32; - const si32 *src1 = line_src1->i32, *src2 = line_src2->i32; - - __m256i offset = _mm256_set1_epi32(2); - for (ui32 i = (repeat + 7) >> 3; i > 0; --i, dst+=8, src1+=8, src2+=8) - { - __m256i s1 = _mm256_load_si256((__m256i*)src1); - s1 = _mm256_add_epi32(s1, offset); - __m256i s2 = _mm256_load_si256((__m256i*)src2); - s2 = _mm256_add_epi32(s2, s1); - __m256i d = _mm256_load_si256((__m256i*)dst); - d = _mm256_add_epi32(d, _mm256_srai_epi32(s2, 2)); - _mm256_store_si256((__m256i*)dst, d); + else if (a == -1 && b == 1 && e == 1) + { // 5/3 predict + int i = (int)repeat; + if (synthesis) + for (; i > 0; i -= 8, dst += 8, src1 += 8, src2 += 8) + { + __m256i s1 = _mm256_load_si256((__m256i*)src1); + __m256i s2 = _mm256_load_si256((__m256i*)src2); + __m256i d = _mm256_load_si256((__m256i*)dst); + __m256i t = _mm256_add_epi32(s1, s2); + __m256i w = _mm256_srai_epi32(t, e); + d = _mm256_add_epi32(d, w); + _mm256_store_si256((__m256i*)dst, d); + } + else + for (; i > 0; i -= 8, dst += 8, src1 += 8, src2 += 8) + { + __m256i s1 = _mm256_load_si256((__m256i*)src1); + __m256i s2 = _mm256_load_si256((__m256i*)src2); + __m256i d = _mm256_load_si256((__m256i*)dst); + __m256i t = _mm256_add_epi32(s1, s2); + __m256i w = _mm256_srai_epi32(t, e); + d = _mm256_sub_epi32(d, w); + _mm256_store_si256((__m256i*)dst, d); + } + } + else if (a == -1) + { // any case with a == -1, which is not 5/3 predict + int i = (int)repeat; + if (synthesis) + for (; i > 0; i -= 8, dst += 8, src1 += 8, src2 += 8) + { + __m256i s1 = _mm256_load_si256((__m256i*)src1); + __m256i s2 = _mm256_load_si256((__m256i*)src2); + __m256i d = _mm256_load_si256((__m256i*)dst); + __m256i t = _mm256_add_epi32(s1, s2); + __m256i v = _mm256_sub_epi32(vb, t); + __m256i w = _mm256_srai_epi32(v, e); + d = _mm256_sub_epi32(d, w); + _mm256_store_si256((__m256i*)dst, d); + } + else + for (; i > 0; i -= 8, dst += 8, src1 += 8, src2 += 8) + { + __m256i s1 = _mm256_load_si256((__m256i*)src1); + __m256i s2 = _mm256_load_si256((__m256i*)src2); + __m256i d = _mm256_load_si256((__m256i*)dst); + __m256i t = _mm256_add_epi32(s1, s2); + __m256i v = _mm256_sub_epi32(vb, t); + __m256i w = _mm256_srai_epi32(v, e); + d = _mm256_add_epi32(d, w); + _mm256_store_si256((__m256i*)dst, d); + } + } + else { // general case + int i = (int)repeat; + if (synthesis) + for (; i > 0; i -= 8, dst += 8, src1 += 8, src2 += 8) + { + __m256i s1 = _mm256_load_si256((__m256i*)src1); + __m256i s2 = _mm256_load_si256((__m256i*)src2); + __m256i d = _mm256_load_si256((__m256i*)dst); + __m256i t = _mm256_add_epi32(s1, s2); + __m256i u = _mm256_mullo_epi32(va, t); + __m256i v = _mm256_add_epi32(vb, u); + __m256i w = _mm256_srai_epi32(v, e); + d = _mm256_sub_epi32(d, w); + _mm256_store_si256((__m256i*)dst, d); + } + else + for (; i > 0; i -= 8, dst += 8, src1 += 8, src2 += 8) + { + __m256i s1 = _mm256_load_si256((__m256i*)src1); + __m256i s2 = _mm256_load_si256((__m256i*)src2); + __m256i d = _mm256_load_si256((__m256i*)dst); + __m256i t = _mm256_add_epi32(s1, s2); + __m256i u = _mm256_mullo_epi32(va, t); + __m256i v = _mm256_add_epi32(vb, u); + __m256i w = _mm256_srai_epi32(v, e); + d = _mm256_add_epi32(d, w); + _mm256_store_si256((__m256i*)dst, d); + } } } - ////////////////////////////////////////////////////////////////////////// - void avx2_rev_horz_wvlt_fwd_tx(line_buf* line_src, line_buf *line_ldst, - line_buf *line_hdst,ui32 width, bool even) + ///////////////////////////////////////////////////////////////////////// + void avx2_rev_horz_ana(const param_atk* atk, const line_buf* ldst, + const line_buf* hdst, const line_buf* src, + ui32 width, bool even) { if (width > 1) { - si32 *src = line_src->i32; - si32 *ldst = line_ldst->i32, *hdst = line_hdst->i32; - - const ui32 L_width = (width + (even ? 1 : 0)) >> 1; - const ui32 H_width = (width + (even ? 0 : 1)) >> 1; - - // extension - src[-1] = src[1]; - src[width] = src[width-2]; - // predict - const si32* sp = src + (even ? 1 : 0); - si32 *dph = hdst; - const __m256i mask = _mm256_setr_epi32(0, 2, 4, 6, 1, 3, 5, 7); - for (ui32 i = (H_width + 7) >> 3; i > 0; --i, dph+=8) - { //this is doing twice the work it needs to do - //it can be definitely written better - __m256i s1 = _mm256_loadu_si256((__m256i*)(sp-1)); - __m256i s2 = _mm256_loadu_si256((__m256i*)(sp+1)); - __m256i d = _mm256_loadu_si256((__m256i*)sp); - s1 = _mm256_srai_epi32(_mm256_add_epi32(s1, s2), 1); - __m256i d1 = _mm256_sub_epi32(d, s1); - sp += 8; - s1 = _mm256_loadu_si256((__m256i*)(sp-1)); - s2 = _mm256_loadu_si256((__m256i*)(sp+1)); - d = _mm256_loadu_si256((__m256i*)sp); - s1 = _mm256_srai_epi32(_mm256_add_epi32(s1, s2), 1); - __m256i d2 = _mm256_sub_epi32(d, s1); - sp += 8; - d1 = _mm256_permutevar8x32_epi32(d1, mask); - d2 = _mm256_permutevar8x32_epi32(d2, mask); - d = _mm256_permute2x128_si256(d1, d2, (2 << 4) | 0); - _mm256_store_si256((__m256i*)dph, d); + // combine both lsrc and hsrc into dst + { + float* dpl = ldst->f32; + float* dph = hdst->f32; + float* sp = src->f32; + int w = (int)width; + AVX_DEINTERLEAVE(dpl, dph, sp, w, even); } - // extension - hdst[-1] = hdst[0]; - hdst[H_width] = hdst[H_width-1]; - // update - sp = src + (even ? 0 : 1); - const si32* sph = hdst + (even ? 0 : 1); - si32 *dpl = ldst; - __m256i offset = _mm256_set1_epi32(2); - for (ui32 i = (L_width + 7) >> 3; i > 0; --i, sp+=16, sph+=8, dpl+=8) + si32* hp = hdst->i32, * lp = ldst->i32; + ui32 l_width = (width + (even ? 1 : 0)) >> 1; // low pass + ui32 h_width = (width + (even ? 0 : 1)) >> 1; // high pass + ui32 num_steps = atk->get_num_steps(); + for (ui32 j = num_steps; j > 0; --j) { - __m256i s1 = _mm256_loadu_si256((__m256i*)(sph-1)); - s1 = _mm256_add_epi32(s1, offset); - __m256i s2 = _mm256_loadu_si256((__m256i*)sph); - s2 = _mm256_add_epi32(s2, s1); - __m256i d1 = _mm256_loadu_si256((__m256i*)sp); - __m256i d2 = _mm256_loadu_si256((__m256i*)sp + 1); - d1 = _mm256_permutevar8x32_epi32(d1, mask); - d2 = _mm256_permutevar8x32_epi32(d2, mask); - __m256i d = _mm256_permute2x128_si256(d1, d2, (2 << 4) | 0); - d = _mm256_add_epi32(d, _mm256_srai_epi32(s2, 2)); - _mm256_store_si256((__m256i*)dpl, d); + // first lifting step + const lifting_step* s = atk->get_step(j - 1); + const si32 a = s->rev.Aatk; + const si32 b = s->rev.Batk; + const ui32 e = s->rev.Eatk; + __m256i va = _mm256_set1_epi32(a); + __m256i vb = _mm256_set1_epi32(b); + + // extension + lp[-1] = lp[0]; + lp[l_width] = lp[l_width - 1]; + // lifting step + const si32* sp = lp; + si32* dp = hp; + if (a == 1) + { // 5/3 update and any case with a == 1 + int i = (int)h_width; + if (even) + { + for (; i > 0; i -= 8, sp += 8, dp += 8) + { + __m256i s1 = _mm256_load_si256((__m256i*)sp); + __m256i s2 = _mm256_loadu_si256((__m256i*)(sp + 1)); + __m256i d = _mm256_load_si256((__m256i*)dp); + __m256i t = _mm256_add_epi32(s1, s2); + __m256i v = _mm256_add_epi32(vb, t); + __m256i w = _mm256_srai_epi32(v, e); + d = _mm256_add_epi32(d, w); + _mm256_store_si256((__m256i*)dp, d); + } + } + else + { + for (; i > 0; i -= 8, sp += 8, dp += 8) + { + __m256i s1 = _mm256_load_si256((__m256i*)sp); + __m256i s2 = _mm256_loadu_si256((__m256i*)(sp - 1)); + __m256i d = _mm256_load_si256((__m256i*)dp); + __m256i t = _mm256_add_epi32(s1, s2); + __m256i v = _mm256_add_epi32(vb, t); + __m256i w = _mm256_srai_epi32(v, e); + d = _mm256_add_epi32(d, w); + _mm256_store_si256((__m256i*)dp, d); + } + } + } + else if (a == -1 && b == 1 && e == 1) + { // 5/3 predict + int i = (int)h_width; + if (even) + for (; i > 0; i -= 8, sp += 8, dp += 8) + { + __m256i s1 = _mm256_load_si256((__m256i*)sp); + __m256i s2 = _mm256_loadu_si256((__m256i*)(sp + 1)); + __m256i d = _mm256_load_si256((__m256i*)dp); + __m256i t = _mm256_add_epi32(s1, s2); + __m256i w = _mm256_srai_epi32(t, e); + d = _mm256_sub_epi32(d, w); + _mm256_store_si256((__m256i*)dp, d); + } + else + for (; i > 0; i -= 8, sp += 8, dp += 8) + { + __m256i s1 = _mm256_load_si256((__m256i*)sp); + __m256i s2 = _mm256_loadu_si256((__m256i*)(sp - 1)); + __m256i d = _mm256_load_si256((__m256i*)dp); + __m256i t = _mm256_add_epi32(s1, s2); + __m256i w = _mm256_srai_epi32(t, e); + d = _mm256_sub_epi32(d, w); + _mm256_store_si256((__m256i*)dp, d); + } + } + else if (a == -1) + { // any case with a == -1, which is not 5/3 predict + int i = (int)h_width; + if (even) + for (; i > 0; i -= 8, sp += 8, dp += 8) + { + __m256i s1 = _mm256_load_si256((__m256i*)sp); + __m256i s2 = _mm256_loadu_si256((__m256i*)(sp + 1)); + __m256i d = _mm256_load_si256((__m256i*)dp); + __m256i t = _mm256_add_epi32(s1, s2); + __m256i v = _mm256_sub_epi32(vb, t); + __m256i w = _mm256_srai_epi32(v, e); + d = _mm256_add_epi32(d, w); + _mm256_store_si256((__m256i*)dp, d); + } + else + for (; i > 0; i -= 8, sp += 8, dp += 8) + { + __m256i s1 = _mm256_load_si256((__m256i*)sp); + __m256i s2 = _mm256_loadu_si256((__m256i*)(sp - 1)); + __m256i d = _mm256_load_si256((__m256i*)dp); + __m256i t = _mm256_add_epi32(s1, s2); + __m256i v = _mm256_sub_epi32(vb, t); + __m256i w = _mm256_srai_epi32(v, e); + d = _mm256_add_epi32(d, w); + _mm256_store_si256((__m256i*)dp, d); + } + } + else { + // general case + int i = (int)h_width; + if (even) + for (; i > 0; i -= 8, sp += 8, dp += 8) + { + __m256i s1 = _mm256_load_si256((__m256i*)sp); + __m256i s2 = _mm256_loadu_si256((__m256i*)(sp + 1)); + __m256i d = _mm256_load_si256((__m256i*)dp); + __m256i t = _mm256_add_epi32(s1, s2); + __m256i u = _mm256_mullo_epi32(va, t); + __m256i v = _mm256_add_epi32(vb, u); + __m256i w = _mm256_srai_epi32(v, e); + d = _mm256_add_epi32(d, w); + _mm256_store_si256((__m256i*)dp, d); + } + else + for (; i > 0; i -= 8, sp += 8, dp += 8) + { + __m256i s1 = _mm256_load_si256((__m256i*)sp); + __m256i s2 = _mm256_loadu_si256((__m256i*)(sp - 1)); + __m256i d = _mm256_load_si256((__m256i*)dp); + __m256i t = _mm256_add_epi32(s1, s2); + __m256i u = _mm256_mullo_epi32(va, t); + __m256i v = _mm256_add_epi32(vb, u); + __m256i w = _mm256_srai_epi32(v, e); + d = _mm256_add_epi32(d, w); + _mm256_store_si256((__m256i*)dp, d); + } + } + + // swap buffers + si32* t = lp; lp = hp; hp = t; + even = !even; + ui32 w = l_width; l_width = h_width; h_width = w; } } - else - { + else { if (even) - line_ldst->i32[0] = line_src->i32[0]; + ldst->i32[0] = src->i32[0]; else - line_hdst->i32[0] = line_src->i32[0] << 1; + hdst->i32[0] = src->i32[0] << 1; } } - - ////////////////////////////////////////////////////////////////////////// - void avx2_rev_vert_wvlt_bwd_predict(const line_buf* line_src1, - const line_buf* line_src2, - line_buf *line_dst, ui32 repeat) - { - si32 *dst = line_dst->i32; - const si32 *src1 = line_src1->i32, *src2 = line_src2->i32; - for (ui32 i = (repeat + 7) >> 3; i > 0; --i, dst+=8, src1+=8, src2+=8) - { - __m256i s1 = _mm256_load_si256((__m256i*)src1); - __m256i s2 = _mm256_load_si256((__m256i*)src2); - __m256i d = _mm256_load_si256((__m256i*)dst); - s1 = _mm256_srai_epi32(_mm256_add_epi32(s1, s2), 1); - d = _mm256_add_epi32(d, s1); - _mm256_store_si256((__m256i*)dst, d); - } - } - ////////////////////////////////////////////////////////////////////////// - void avx2_rev_vert_wvlt_bwd_update(const line_buf* line_src1, - const line_buf* line_src2, - line_buf *line_dst, ui32 repeat) - { - si32 *dst = line_dst->i32; - const si32 *src1 = line_src1->i32, *src2 = line_src2->i32; - - __m256i offset = _mm256_set1_epi32(2); - for (ui32 i = (repeat + 7) >> 3; i > 0; --i, dst+=8, src1+=8, src2+=8) - { - __m256i s1 = _mm256_load_si256((__m256i*)src1); - s1 = _mm256_add_epi32(s1, offset); - __m256i s2 = _mm256_load_si256((__m256i*)src2); - s2 = _mm256_add_epi32(s2, s1); - __m256i d = _mm256_load_si256((__m256i*)dst); - d = _mm256_sub_epi32(d, _mm256_srai_epi32(s2, 2)); - _mm256_store_si256((__m256i*)dst, d); - } - } - - ////////////////////////////////////////////////////////////////////////// - void avx2_rev_horz_wvlt_bwd_tx(line_buf* line_dst, line_buf *line_lsrc, - line_buf *line_hsrc, ui32 width, bool even) + void avx2_rev_horz_syn(const param_atk* atk, const line_buf* dst, + const line_buf* lsrc, const line_buf* hsrc, + ui32 width, bool even) { if (width > 1) { - si32 *lsrc = line_lsrc->i32, *hsrc = line_hsrc->i32; - si32 *dst = line_dst->i32; + bool ev = even; + si32* oth = hsrc->i32, * aug = lsrc->i32; + ui32 aug_width = (width + (even ? 1 : 0)) >> 1; // low pass + ui32 oth_width = (width + (even ? 0 : 1)) >> 1; // high pass + ui32 num_steps = atk->get_num_steps(); + for (ui32 j = 0; j < num_steps; ++j) + { + const lifting_step* s = atk->get_step(j); + const si32 a = s->rev.Aatk; + const si32 b = s->rev.Batk; + const ui32 e = s->rev.Eatk; + __m256i va = _mm256_set1_epi32(a); + __m256i vb = _mm256_set1_epi32(b); - const ui32 L_width = (width + (even ? 1 : 0)) >> 1; - const ui32 H_width = (width + (even ? 0 : 1)) >> 1; + // extension + oth[-1] = oth[0]; + oth[oth_width] = oth[oth_width - 1]; + // lifting step + const si32* sp = oth; + si32* dp = aug; + if (a == 1) + { // 5/3 update and any case with a == 1 + int i = (int)aug_width; + if (ev) + { + for (; i > 0; i -= 8, sp += 8, dp += 8) + { + __m256i s1 = _mm256_load_si256((__m256i*)sp); + __m256i s2 = _mm256_loadu_si256((__m256i*)(sp - 1)); + __m256i d = _mm256_load_si256((__m256i*)dp); + __m256i t = _mm256_add_epi32(s1, s2); + __m256i v = _mm256_add_epi32(vb, t); + __m256i w = _mm256_srai_epi32(v, e); + d = _mm256_sub_epi32(d, w); + _mm256_store_si256((__m256i*)dp, d); + } + } + else + { + for (; i > 0; i -= 8, sp += 8, dp += 8) + { + __m256i s1 = _mm256_load_si256((__m256i*)sp); + __m256i s2 = _mm256_loadu_si256((__m256i*)(sp + 1)); + __m256i d = _mm256_load_si256((__m256i*)dp); + __m256i t = _mm256_add_epi32(s1, s2); + __m256i v = _mm256_add_epi32(vb, t); + __m256i w = _mm256_srai_epi32(v, e); + d = _mm256_sub_epi32(d, w); + _mm256_store_si256((__m256i*)dp, d); + } + } + } + else if (a == -1 && b == 1 && e == 1) + { // 5/3 predict + int i = (int)aug_width; + if (ev) + for (; i > 0; i -= 8, sp += 8, dp += 8) + { + __m256i s1 = _mm256_load_si256((__m256i*)sp); + __m256i s2 = _mm256_loadu_si256((__m256i*)(sp - 1)); + __m256i d = _mm256_load_si256((__m256i*)dp); + __m256i t = _mm256_add_epi32(s1, s2); + __m256i w = _mm256_srai_epi32(t, e); + d = _mm256_add_epi32(d, w); + _mm256_store_si256((__m256i*)dp, d); + } + else + for (; i > 0; i -= 8, sp += 8, dp += 8) + { + __m256i s1 = _mm256_load_si256((__m256i*)sp); + __m256i s2 = _mm256_loadu_si256((__m256i*)(sp + 1)); + __m256i d = _mm256_load_si256((__m256i*)dp); + __m256i t = _mm256_add_epi32(s1, s2); + __m256i w = _mm256_srai_epi32(t, e); + d = _mm256_add_epi32(d, w); + _mm256_store_si256((__m256i*)dp, d); + } + } + else if (a == -1) + { // any case with a == -1, which is not 5/3 predict + int i = (int)aug_width; + if (ev) + for (; i > 0; i -= 8, sp += 8, dp += 8) + { + __m256i s1 = _mm256_load_si256((__m256i*)sp); + __m256i s2 = _mm256_loadu_si256((__m256i*)(sp - 1)); + __m256i d = _mm256_load_si256((__m256i*)dp); + __m256i t = _mm256_add_epi32(s1, s2); + __m256i v = _mm256_sub_epi32(vb, t); + __m256i w = _mm256_srai_epi32(v, e); + d = _mm256_sub_epi32(d, w); + _mm256_store_si256((__m256i*)dp, d); + } + else + for (; i > 0; i -= 8, sp += 8, dp += 8) + { + __m256i s1 = _mm256_load_si256((__m256i*)sp); + __m256i s2 = _mm256_loadu_si256((__m256i*)(sp + 1)); + __m256i d = _mm256_load_si256((__m256i*)dp); + __m256i t = _mm256_add_epi32(s1, s2); + __m256i v = _mm256_sub_epi32(vb, t); + __m256i w = _mm256_srai_epi32(v, e); + d = _mm256_sub_epi32(d, w); + _mm256_store_si256((__m256i*)dp, d); + } + } + else { + // general case + int i = (int)aug_width; + if (ev) + for (; i > 0; i -= 8, sp += 8, dp += 8) + { + __m256i s1 = _mm256_load_si256((__m256i*)sp); + __m256i s2 = _mm256_loadu_si256((__m256i*)(sp - 1)); + __m256i d = _mm256_load_si256((__m256i*)dp); + __m256i t = _mm256_add_epi32(s1, s2); + __m256i u = _mm256_mullo_epi32(va, t); + __m256i v = _mm256_add_epi32(vb, u); + __m256i w = _mm256_srai_epi32(v, e); + d = _mm256_sub_epi32(d, w); + _mm256_store_si256((__m256i*)dp, d); + } + else + for (; i > 0; i -= 8, sp += 8, dp += 8) + { + __m256i s1 = _mm256_load_si256((__m256i*)sp); + __m256i s2 = _mm256_loadu_si256((__m256i*)(sp + 1)); + __m256i d = _mm256_load_si256((__m256i*)dp); + __m256i t = _mm256_add_epi32(s1, s2); + __m256i u = _mm256_mullo_epi32(va, t); + __m256i v = _mm256_add_epi32(vb, u); + __m256i w = _mm256_srai_epi32(v, e); + d = _mm256_sub_epi32(d, w); + _mm256_store_si256((__m256i*)dp, d); + } + } - // extension - hsrc[-1] = hsrc[0]; - hsrc[H_width] = hsrc[H_width-1]; - //inverse update - const si32 *sph = hsrc + (even ? 0 : 1); - si32 *spl = lsrc; - __m256i offset = _mm256_set1_epi32(2); - for (ui32 i = (L_width + 7) >> 3; i > 0; --i, sph+=8, spl+=8) - { - __m256i s1 = _mm256_loadu_si256((__m256i*)(sph-1)); - s1 = _mm256_add_epi32(s1, offset); - __m256i s2 = _mm256_loadu_si256((__m256i*)sph); - s2 = _mm256_add_epi32(s2, s1); - __m256i d = _mm256_load_si256((__m256i*)spl); - d = _mm256_sub_epi32(d, _mm256_srai_epi32(s2, 2)); - _mm256_store_si256((__m256i*)spl, d); + // swap buffers + si32* t = aug; aug = oth; oth = t; + ev = !ev; + ui32 w = aug_width; aug_width = oth_width; oth_width = w; } - // extension - lsrc[-1] = lsrc[0]; - lsrc[L_width] = lsrc[L_width - 1]; - // inverse predict and combine - si32 *dp = dst + (even ? 0 : -1); - spl = lsrc + (even ? 0 : -1); - sph = hsrc; - ui32 width = L_width + (even ? 0 : 1); - for (ui32 i = (width + 7) >> 3; i > 0; --i, sph+=8, spl+=8, dp+=16) + // combine both lsrc and hsrc into dst { - __m256i s1 = _mm256_loadu_si256((__m256i*)spl); - __m256i s2 = _mm256_loadu_si256((__m256i*)(spl+1)); - __m256i d = _mm256_load_si256((__m256i*)sph); - s2 = _mm256_srai_epi32(_mm256_add_epi32(s1, s2), 1); - d = _mm256_add_epi32(d, s2); - s2 = _mm256_unpackhi_epi32(s1, d); - s1 = _mm256_unpacklo_epi32(s1, d); - d = _mm256_permute2x128_si256(s1, s2, (2 << 4) | 0); - _mm256_storeu_si256((__m256i*)dp, d); - d = _mm256_permute2x128_si256(s1, s2, (3 << 4) | 1); - _mm256_storeu_si256((__m256i*)dp + 1, d); + float* dp = dst->f32; + float* spl = lsrc->f32; + float* sph = hsrc->f32; + int w = (int)width; + AVX_INTERLEAVE(dp, spl, sph, w, even); } } - else - { + else { if (even) - line_dst->i32[0] = line_lsrc->i32[0]; + dst->i32[0] = lsrc->i32[0]; else - line_dst->i32[0] = line_hsrc->i32[0] >> 1; + dst->i32[0] = hsrc->i32[0] >> 1; } } - } -} + + + + } // !local +} // !ojph diff --git a/src/core/transform/ojph_transform_local.h b/src/core/transform/ojph_transform_local.h index fe7d1f27..3ba9e6d0 100644 --- a/src/core/transform/ojph_transform_local.h +++ b/src/core/transform/ojph_transform_local.h @@ -45,6 +45,7 @@ namespace ojph { struct line_buf; namespace local { struct param_atk; + union lifting_step; ////////////////////////////////////////////////////////////////////////// // @@ -103,6 +104,60 @@ namespace ojph { // ////////////////////////////////////////////////////////////////////////// + ////////////////////////////////////////////////////////////////////////// + // Supporting macros + ////////////////////////////////////////////////////////////////////////// + + ////////////////////////////////////////////////////////////////////////// + #define SSE_DEINTERLEAVE(dpl, dph, sp, width, even) \ + { \ + if (even) \ + for (; width > 0; width -= 8, sp += 8, dpl += 4, dph += 4) \ + { \ + __m128 a = _mm_load_ps(sp); \ + __m128 b = _mm_load_ps(sp + 4); \ + __m128 c = _mm_shuffle_ps(a, b, _MM_SHUFFLE(2, 0, 2, 0)); \ + __m128 d = _mm_shuffle_ps(a, b, _MM_SHUFFLE(3, 1, 3, 1)); \ + _mm_store_ps(dpl, c); \ + _mm_store_ps(dph, d); \ + } \ + else \ + for (; width > 0; width -= 8, sp += 8, dpl += 4, dph += 4) \ + { \ + __m128 a = _mm_load_ps(sp); \ + __m128 b = _mm_load_ps(sp + 4); \ + __m128 c = _mm_shuffle_ps(a, b, _MM_SHUFFLE(2, 0, 2, 0)); \ + __m128 d = _mm_shuffle_ps(a, b, _MM_SHUFFLE(3, 1, 3, 1)); \ + _mm_store_ps(dpl, d); \ + _mm_store_ps(dph, c); \ + } \ + } + + ////////////////////////////////////////////////////////////////////////// + #define SSE_INTERLEAVE(dp, spl, sph, width, even) \ + { \ + if (even) \ + for (; width > 0; width -= 8, dp += 8, spl += 4, sph += 4) \ + { \ + __m128 a = _mm_load_ps(spl); \ + __m128 b = _mm_load_ps(sph); \ + __m128 c = _mm_unpacklo_ps(a, b); \ + __m128 d = _mm_unpackhi_ps(a, b); \ + _mm_store_ps(dp, c); \ + _mm_store_ps(dp + 4, d); \ + } \ + else \ + for (; width > 0; width -= 8, dp += 8, spl += 4, sph += 4) \ + { \ + __m128 a = _mm_load_ps(spl); \ + __m128 b = _mm_load_ps(sph); \ + __m128 c = _mm_unpacklo_ps(b, a); \ + __m128 d = _mm_unpackhi_ps(b, a); \ + _mm_store_ps(dp, c); \ + _mm_store_ps(dp + 4, d); \ + } \ + } + ////////////////////////////////////////////////////////////////////////// // Irreversible functions ////////////////////////////////////////////////////////////////////////// @@ -161,6 +216,116 @@ namespace ojph { // ////////////////////////////////////////////////////////////////////////// + ////////////////////////////////////////////////////////////////////////// + // Supporting macros + ////////////////////////////////////////////////////////////////////////// + + ////////////////////////////////////////////////////////////////////////// + // We split multiples of 16 followed by multiples of 8, because + // we assume byte_alignment == 32 + #define AVX_DEINTERLEAVE(dpl, dph, sp, width, even) \ + { \ + if (even) \ + { \ + for (; width > 8; width -= 16, sp += 16, dpl += 8, dph += 8) \ + { \ + __m256 a = _mm256_load_ps(sp); \ + __m256 b = _mm256_load_ps(sp + 8); \ + __m256 c = _mm256_permute2f128_ps(a, b, (2 << 4) | (0)); \ + __m256 d = _mm256_permute2f128_ps(a, b, (3 << 4) | (1)); \ + __m256 e = _mm256_shuffle_ps(c, d, _MM_SHUFFLE(2, 0, 2, 0)); \ + __m256 f = _mm256_shuffle_ps(c, d, _MM_SHUFFLE(3, 1, 3, 1)); \ + _mm256_store_ps(dpl, e); \ + _mm256_store_ps(dph, f); \ + } \ + for (; width > 0; width -= 8, sp += 8, dpl += 4, dph += 4) \ + { \ + __m128 a = _mm_load_ps(sp); \ + __m128 b = _mm_load_ps(sp + 4); \ + __m128 c = _mm_shuffle_ps(a, b, _MM_SHUFFLE(2, 0, 2, 0)); \ + __m128 d = _mm_shuffle_ps(a, b, _MM_SHUFFLE(3, 1, 3, 1)); \ + _mm_store_ps(dpl, c); \ + _mm_store_ps(dph, d); \ + } \ + } \ + else \ + { \ + for (; width > 8; width -= 16, sp += 16, dpl += 8, dph += 8) \ + { \ + __m256 a = _mm256_load_ps(sp); \ + __m256 b = _mm256_load_ps(sp + 8); \ + __m256 c = _mm256_permute2f128_ps(a, b, (2 << 4) | (0)); \ + __m256 d = _mm256_permute2f128_ps(a, b, (3 << 4) | (1)); \ + __m256 e = _mm256_shuffle_ps(c, d, _MM_SHUFFLE(2, 0, 2, 0)); \ + __m256 f = _mm256_shuffle_ps(c, d, _MM_SHUFFLE(3, 1, 3, 1)); \ + _mm256_store_ps(dpl, f); \ + _mm256_store_ps(dph, e); \ + } \ + for (; width > 0; width -= 8, sp += 8, dpl += 4, dph += 4) \ + { \ + __m128 a = _mm_load_ps(sp); \ + __m128 b = _mm_load_ps(sp + 4); \ + __m128 c = _mm_shuffle_ps(a, b, _MM_SHUFFLE(2, 0, 2, 0)); \ + __m128 d = _mm_shuffle_ps(a, b, _MM_SHUFFLE(3, 1, 3, 1)); \ + _mm_store_ps(dpl, d); \ + _mm_store_ps(dph, c); \ + } \ + } \ + } + + ////////////////////////////////////////////////////////////////////////// + // We split multiples of 16 followed by multiples of 8, because + // we assume byte_alignment == 32 + #define AVX_INTERLEAVE(dp, spl, sph, width, even) \ + { \ + if (even) \ + { \ + for (; width > 8; width -= 16, dp += 16, spl += 8, sph += 8) \ + { \ + __m256 a = _mm256_load_ps(spl); \ + __m256 b = _mm256_load_ps(sph); \ + __m256 c = _mm256_unpacklo_ps(a, b); \ + __m256 d = _mm256_unpackhi_ps(a, b); \ + __m256 e = _mm256_permute2f128_ps(c, d, (2 << 4) | (0)); \ + __m256 f = _mm256_permute2f128_ps(c, d, (3 << 4) | (1)); \ + _mm256_store_ps(dp, e); \ + _mm256_store_ps(dp + 8, f); \ + } \ + for (; width > 0; width -= 8, dp += 8, spl += 4, sph += 4) \ + { \ + __m128 a = _mm_load_ps(spl); \ + __m128 b = _mm_load_ps(sph); \ + __m128 c = _mm_unpacklo_ps(a, b); \ + __m128 d = _mm_unpackhi_ps(a, b); \ + _mm_store_ps(dp, c); \ + _mm_store_ps(dp + 4, d); \ + } \ + } \ + else \ + { \ + for (; width > 8; width -= 16, dp += 16, spl += 8, sph += 8) \ + { \ + __m256 a = _mm256_load_ps(spl); \ + __m256 b = _mm256_load_ps(sph); \ + __m256 c = _mm256_unpacklo_ps(b, a); \ + __m256 d = _mm256_unpackhi_ps(b, a); \ + __m256 e = _mm256_permute2f128_ps(c, d, (2 << 4) | (0)); \ + __m256 f = _mm256_permute2f128_ps(c, d, (3 << 4) | (1)); \ + _mm256_store_ps(dp, e); \ + _mm256_store_ps(dp + 8, f); \ + } \ + for (; width > 0; width -= 8, dp += 8, spl += 4, sph += 4) \ + { \ + __m128 a = _mm_load_ps(spl); \ + __m128 b = _mm_load_ps(sph); \ + __m128 c = _mm_unpacklo_ps(b, a); \ + __m128 d = _mm_unpackhi_ps(b, a); \ + _mm_store_ps(dp, c); \ + _mm_store_ps(dp + 4, d); \ + } \ + } \ + } + ////////////////////////////////////////////////////////////////////////// // Irreversible functions ////////////////////////////////////////////////////////////////////////// diff --git a/src/core/transform/ojph_transform_sse.cpp b/src/core/transform/ojph_transform_sse.cpp index b61ea5e9..897a1939 100644 --- a/src/core/transform/ojph_transform_sse.cpp +++ b/src/core/transform/ojph_transform_sse.cpp @@ -41,15 +41,26 @@ #include "ojph_defs.h" #include "ojph_arch.h" #include "ojph_mem.h" -#include "ojph_transform.h" -#include "ojph_transform_local.h" - #include "ojph_params.h" #include "../codestream/ojph_params_local.h" +#include "ojph_transform.h" +#include "ojph_transform_local.h" + namespace ojph { namespace local { + ////////////////////////////////////////////////////////////////////////// + static inline void sse_multiply_const(float* p, float f, int width) + { + __m128 factor = _mm_set1_ps(f); + for (; width > 0; width -= 4, p += 4) + { + __m128 s = _mm_load_ps(p); + _mm_store_ps(p, _mm_mul_ps(factor, s)); + } + } + ////////////////////////////////////////////////////////////////////////// void sse_irv_vert_step(const lifting_step* s, const line_buf* sig, const line_buf* other, const line_buf* aug, @@ -77,14 +88,7 @@ namespace ojph { ////////////////////////////////////////////////////////////////////////// void sse_irv_vert_times_K(float K, const line_buf* aug, ui32 repeat) { - __m128 factor = _mm_set1_ps(K); - float* dst = aug->f32; - int i = (int)repeat; - for (; i > 0; i -= 4, dst += 4) - { - __m128 s = _mm_load_ps(dst); - _mm_store_ps(dst, _mm_mul_ps(factor, s)); - } + sse_multiply_const(aug->f32, K, (int)repeat); } ///////////////////////////////////////////////////////////////////////// @@ -95,39 +99,12 @@ namespace ojph { if (width > 1) { // split src into ldst and hdst - if (even) { - float* dph = hdst->f32; float* dpl = ldst->f32; - float* sp = src->f32; - - int i = (int)width; - for ( ; i > 0; i -= 8, sp += 8, dpl += 4, dph += 4) - { - __m128 a = _mm_load_ps(sp); - __m128 b = _mm_load_ps(sp + 4); - __m128 c = _mm_shuffle_ps(a, b, _MM_SHUFFLE(2, 0, 2, 0)); - __m128 d = _mm_shuffle_ps(a, b, _MM_SHUFFLE(3, 1, 3, 1)); - _mm_store_ps(dpl, c); - _mm_store_ps(dph, d); - } - } - else - { float* dph = hdst->f32; - float* dpl = ldst->f32; float* sp = src->f32; - - int i = (int)width; - for ( ; i > 0; i -= 8, sp += 8, dpl += 4, dph += 4) - { - __m128 a = _mm_load_ps(sp); - __m128 b = _mm_load_ps(sp + 4); - __m128 c = _mm_shuffle_ps(a, b, _MM_SHUFFLE(2, 0, 2, 0)); - __m128 d = _mm_shuffle_ps(a, b, _MM_SHUFFLE(3, 1, 3, 1)); - _mm_store_ps(dpl, d); - _mm_store_ps(dph, c); - } + int w = (int)width; + SSE_DEINTERLEAVE(dpl, dph, sp, w, even); } // the actual horizontal transform @@ -137,7 +114,6 @@ namespace ojph { ui32 num_steps = atk->get_num_steps(); for (ui32 j = num_steps; j > 0; --j) { - // first lifting step const lifting_step* s = atk->get_step(j - 1); const float a = s->irv.Aatk; @@ -181,27 +157,8 @@ namespace ojph { { // multiply by K or 1/K float K = atk->get_K(); float K_inv = 1.0f / K; - float* dp; - int i; - __m128 factor; - - factor = _mm_set1_ps(K_inv); - dp = lp; - i = (int)l_width; - for ( ; i > 0; i -= 4, dp += 4) - { - __m128 s = _mm_load_ps(dp); - _mm_store_ps(dp, _mm_mul_ps(factor, s)); - } - - factor = _mm_set1_ps(K); - dp = hp; - i = (int)h_width; - for ( ; i > 0; i -= 4, dp += 4) - { - __m128 s = _mm_load_ps(dp); - _mm_store_ps(dp, _mm_mul_ps(factor, s)); - } + sse_multiply_const(lp, K_inv, (int)l_width); + sse_multiply_const(hp, K, (int)h_width); } } else { @@ -227,27 +184,8 @@ namespace ojph { { // multiply by K or 1/K float K = atk->get_K(); float K_inv = 1.0f / K; - float* dp; - int i; - __m128 factor; - - factor = _mm_set1_ps(K); - dp = aug; - i = (int)aug_width; - for ( ; i > 0; i -= 4, dp += 4) - { - __m128 s = _mm_load_ps(dp); - _mm_store_ps(dp, _mm_mul_ps(factor, s)); - } - - factor = _mm_set1_ps(K_inv); - dp = oth; - i = (int)oth_width; - for ( ; i > 0; i -= 4, dp += 4) - { - __m128 s = _mm_load_ps(dp); - _mm_store_ps(dp, _mm_mul_ps(factor, s)); - } + sse_multiply_const(aug, K, (int)aug_width); + sse_multiply_const(oth, K_inv, (int)oth_width); } // the actual horizontal transform @@ -295,37 +233,12 @@ namespace ojph { } // combine both lsrc and hsrc into dst - if (even) { - float* sph = hsrc->f32; - float* spl = lsrc->f32; float* dp = dst->f32; - int i = (int)width; - for ( ; i > 0; i -= 8, dp += 8, spl += 4, sph += 4) - { - __m128 a = _mm_load_ps(spl); - __m128 b = _mm_load_ps(sph); - __m128 c = _mm_unpacklo_ps(a, b); - __m128 d = _mm_unpackhi_ps(a, b); - _mm_store_ps(dp, c); - _mm_store_ps(dp + 4, d); - } - } - else - { - float* sph = hsrc->f32; float* spl = lsrc->f32; - float* dp = dst->f32; - int i = (int)width; - for ( ; i > 0; i -= 8, dp += 8, spl += 4, sph += 4) - { - __m128 a = _mm_load_ps(spl); - __m128 b = _mm_load_ps(sph); - __m128 c = _mm_unpacklo_ps(b, a); - __m128 d = _mm_unpackhi_ps(b, a); - _mm_store_ps(dp, c); - _mm_store_ps(dp + 4, d); - } + float* sph = hsrc->f32; + int w = (int)width; + SSE_INTERLEAVE(dp, spl, sph, w, even); } } else { diff --git a/src/core/transform/ojph_transform_sse2.cpp b/src/core/transform/ojph_transform_sse2.cpp index 5f3de49d..4939a219 100644 --- a/src/core/transform/ojph_transform_sse2.cpp +++ b/src/core/transform/ojph_transform_sse2.cpp @@ -40,6 +40,9 @@ #include "ojph_defs.h" #include "ojph_arch.h" #include "ojph_mem.h" +#include "ojph_params.h" +#include "../codestream/ojph_params_local.h" + #include "ojph_transform.h" #include "ojph_transform_local.h" @@ -48,211 +51,414 @@ namespace ojph { namespace local { - ////////////////////////////////////////////////////////////////////////// - void sse2_rev_vert_wvlt_fwd_predict(const line_buf* line_src1, - const line_buf* line_src2, - line_buf *line_dst, ui32 repeat) + ///////////////////////////////////////////////////////////////////////// + void sse2_rev_vert_step(const lifting_step* s, const line_buf* sig, + const line_buf* other, const line_buf* aug, + ui32 repeat, bool synthesis) { - si32 *dst = line_dst->i32; - const si32 *src1 = line_src1->i32, *src2 = line_src2->i32; + const si32 a = s->rev.Aatk; + const si32 b = s->rev.Batk; + const ui32 e = s->rev.Eatk; + __m128i va = _mm_set1_epi32(a); + __m128i vb = _mm_set1_epi32(b); - for (ui32 i = (repeat + 3) >> 2; i > 0; --i, dst+=4, src1+=4, src2+=4) - { - __m128i s1 = _mm_load_si128((__m128i*)src1); - __m128i s2 = _mm_load_si128((__m128i*)src2); - __m128i d = _mm_load_si128((__m128i*)dst); - s1 = _mm_srai_epi32(_mm_add_epi32(s1, s2), 1); - d = _mm_sub_epi32(d, s1); - _mm_store_si128((__m128i*)dst, d); + si32* dst = aug->i32; + const si32* src1 = sig->i32, * src2 = other->i32; + // The general definition of the wavelet in Part 2 is slightly + // different to part 2, although they are mathematically equivalent + // here, we identify the simpler form from Part 1 and employ them + if (a == 1) + { // 5/3 update and any case with a == 1 + int i = (int)repeat; + if (synthesis) + for (; i > 0; i -= 4, dst += 4, src1 += 4, src2 += 4) + { + __m128i s1 = _mm_load_si128((__m128i*)src1); + __m128i s2 = _mm_load_si128((__m128i*)src2); + __m128i d = _mm_load_si128((__m128i*)dst); + __m128i t = _mm_add_epi32(s1, s2); + __m128i v = _mm_add_epi32(vb, t); + __m128i w = _mm_srai_epi32(v, e); + d = _mm_sub_epi32(d, w); + _mm_store_si128((__m128i*)dst, d); + } + else + for (; i > 0; i -= 4, dst += 4, src1 += 4, src2 += 4) + { + __m128i s1 = _mm_load_si128((__m128i*)src1); + __m128i s2 = _mm_load_si128((__m128i*)src2); + __m128i d = _mm_load_si128((__m128i*)dst); + __m128i t = _mm_add_epi32(s1, s2); + __m128i v = _mm_add_epi32(vb, t); + __m128i w = _mm_srai_epi32(v, e); + d = _mm_add_epi32(d, w); + _mm_store_si128((__m128i*)dst, d); + } } - } - - ////////////////////////////////////////////////////////////////////////// - void sse2_rev_vert_wvlt_fwd_update(const line_buf* line_src1, - const line_buf* line_src2, - line_buf *line_dst, ui32 repeat) - { - si32 *dst = line_dst->i32; - const si32 *src1 = line_src1->i32, *src2 = line_src2->i32; - - __m128i offset = _mm_set1_epi32(2); - for (ui32 i = (repeat + 3) >> 2; i > 0; --i, dst+=4, src1+=4, src2+=4) - { - __m128i s1 = _mm_load_si128((__m128i*)src1); - s1 = _mm_add_epi32(s1, offset); - __m128i s2 = _mm_load_si128((__m128i*)src2); - s2 = _mm_add_epi32(s2, s1); - __m128i d = _mm_load_si128((__m128i*)dst); - d = _mm_add_epi32(d, _mm_srai_epi32(s2, 2)); - _mm_store_si128((__m128i*)dst, d); + else if (a == -1 && b == 1 && e == 1) + { // 5/3 predict + int i = (int)repeat; + if (synthesis) + for (; i > 0; i -= 4, dst += 4, src1 += 4, src2 += 4) + { + __m128i s1 = _mm_load_si128((__m128i*)src1); + __m128i s2 = _mm_load_si128((__m128i*)src2); + __m128i d = _mm_load_si128((__m128i*)dst); + __m128i t = _mm_add_epi32(s1, s2); + __m128i w = _mm_srai_epi32(t, e); + d = _mm_add_epi32(d, w); + _mm_store_si128((__m128i*)dst, d); + } + else + for (; i > 0; i -= 4, dst += 4, src1 += 4, src2 += 4) + { + __m128i s1 = _mm_load_si128((__m128i*)src1); + __m128i s2 = _mm_load_si128((__m128i*)src2); + __m128i d = _mm_load_si128((__m128i*)dst); + __m128i t = _mm_add_epi32(s1, s2); + __m128i w = _mm_srai_epi32(t, e); + d = _mm_sub_epi32(d, w); + _mm_store_si128((__m128i*)dst, d); + } + } + else if (a == -1) + { // any case with a == -1, which is not 5/3 predict + int i = (int)repeat; + if (synthesis) + for (; i > 0; i -= 4, dst += 4, src1 += 4, src2 += 4) + { + __m128i s1 = _mm_load_si128((__m128i*)src1); + __m128i s2 = _mm_load_si128((__m128i*)src2); + __m128i d = _mm_load_si128((__m128i*)dst); + __m128i t = _mm_add_epi32(s1, s2); + __m128i v = _mm_sub_epi32(vb, t); + __m128i w = _mm_srai_epi32(v, e); + d = _mm_sub_epi32(d, w); + _mm_store_si128((__m128i*)dst, d); + } + else + for (; i > 0; i -= 4, dst += 4, src1 += 4, src2 += 4) + { + __m128i s1 = _mm_load_si128((__m128i*)src1); + __m128i s2 = _mm_load_si128((__m128i*)src2); + __m128i d = _mm_load_si128((__m128i*)dst); + __m128i t = _mm_add_epi32(s1, s2); + __m128i v = _mm_sub_epi32(vb, t); + __m128i w = _mm_srai_epi32(v, e); + d = _mm_add_epi32(d, w); + _mm_store_si128((__m128i*)dst, d); + } + } + else { // general case + // 32bit multiplication is not supported in sse2; we need sse4.1, + // where we can use _mm_mullo_epi32, which multiplies 32bit x 32bit, + // keeping the LSBs + if (synthesis) + for (ui32 i = repeat; i > 0; --i) + *dst++ -= (b + a * (*src1++ + *src2++)) >> e; + else + for (ui32 i = repeat; i > 0; --i) + *dst++ += (b + a * (*src1++ + *src2++)) >> e; } } - ////////////////////////////////////////////////////////////////////////// - void sse2_rev_horz_wvlt_fwd_tx(line_buf *line_src, line_buf *line_ldst, - line_buf *line_hdst, ui32 width, bool even) + ///////////////////////////////////////////////////////////////////////// + void sse2_rev_horz_ana(const param_atk* atk, const line_buf* ldst, + const line_buf* hdst, const line_buf* src, + ui32 width, bool even) { if (width > 1) { - si32 *src = line_src->i32; - si32 *ldst = line_ldst->i32, *hdst = line_hdst->i32; - - const ui32 L_width = (width + (even ? 1 : 0)) >> 1; - const ui32 H_width = (width + (even ? 0 : 1)) >> 1; - - // extension - src[-1] = src[1]; - src[width] = src[width-2]; - // predict - const si32* sp = src + (even ? 1 : 0); - si32 *dph = hdst; - for (ui32 i = (H_width + 3) >> 2; i > 0; --i, dph+=4) - { //this is doing twice the work it needs to do - //it can be definitely written better - __m128i s1 = _mm_loadu_si128((__m128i*)(sp-1)); - __m128i s2 = _mm_loadu_si128((__m128i*)(sp+1)); - __m128i d = _mm_loadu_si128((__m128i*)sp); - s1 = _mm_srai_epi32(_mm_add_epi32(s1, s2), 1); - __m128i d1 = _mm_sub_epi32(d, s1); - sp += 4; - s1 = _mm_loadu_si128((__m128i*)(sp-1)); - s2 = _mm_loadu_si128((__m128i*)(sp+1)); - d = _mm_loadu_si128((__m128i*)sp); - s1 = _mm_srai_epi32(_mm_add_epi32(s1, s2), 1); - __m128i d2 = _mm_sub_epi32(d, s1); - sp += 4; - d = _mm_castps_si128(_mm_shuffle_ps( - _mm_castsi128_ps(d1), _mm_castsi128_ps(d2), 0x88)); - _mm_store_si128((__m128i*)dph, d); + // combine both lsrc and hsrc into dst + { + float* dpl = ldst->f32; + float* dph = hdst->f32; + float* sp = src->f32; + int w = (int)width; + SSE_DEINTERLEAVE(dpl, dph, sp, w, even); } - // extension - hdst[-1] = hdst[0]; - hdst[H_width] = hdst[H_width-1]; - // update - sp = src + (even ? 0 : 1); - const si32* sph = hdst + (even ? 0 : 1); - si32 *dpl = ldst; - __m128i offset = _mm_set1_epi32(2); - for (ui32 i = (L_width + 3) >> 2; i > 0; --i, sp+=8, sph+=4, dpl+=4) + si32* hp = hdst->i32, * lp = ldst->i32; + ui32 l_width = (width + (even ? 1 : 0)) >> 1; // low pass + ui32 h_width = (width + (even ? 0 : 1)) >> 1; // high pass + ui32 num_steps = atk->get_num_steps(); + for (ui32 j = num_steps; j > 0; --j) { - __m128i s1 = _mm_loadu_si128((__m128i*)(sph-1)); - s1 = _mm_add_epi32(s1, offset); - __m128i s2 = _mm_loadu_si128((__m128i*)sph); - s2 = _mm_add_epi32(s2, s1); - __m128i d1 = _mm_loadu_si128((__m128i*)sp); - __m128i d2 = _mm_loadu_si128((__m128i*)sp + 1); - __m128i d = _mm_castps_si128(_mm_shuffle_ps( - _mm_castsi128_ps(d1), _mm_castsi128_ps(d2), 0x88)); - d = _mm_add_epi32(d, _mm_srai_epi32(s2, 2)); - _mm_store_si128((__m128i*)dpl, d); + // first lifting step + const lifting_step* s = atk->get_step(j - 1); + const si32 a = s->rev.Aatk; + const si32 b = s->rev.Batk; + const ui32 e = s->rev.Eatk; + __m128i va = _mm_set1_epi32(a); + __m128i vb = _mm_set1_epi32(b); + + // extension + lp[-1] = lp[0]; + lp[l_width] = lp[l_width - 1]; + // lifting step + const si32* sp = lp; + si32* dp = hp; + if (a == 1) + { // 5/3 update and any case with a == 1 + int i = (int)h_width; + if (even) + { + for (; i > 0; i -= 4, sp += 4, dp += 4) + { + __m128i s1 = _mm_load_si128((__m128i*)sp); + __m128i s2 = _mm_loadu_si128((__m128i*)(sp + 1)); + __m128i d = _mm_load_si128((__m128i*)dp); + __m128i t = _mm_add_epi32(s1, s2); + __m128i v = _mm_add_epi32(vb, t); + __m128i w = _mm_srai_epi32(v, e); + d = _mm_add_epi32(d, w); + _mm_store_si128((__m128i*)dp, d); + } + } + else + { + for (; i > 0; i -= 4, sp += 4, dp += 4) + { + __m128i s1 = _mm_load_si128((__m128i*)sp); + __m128i s2 = _mm_loadu_si128((__m128i*)(sp - 1)); + __m128i d = _mm_load_si128((__m128i*)dp); + __m128i t = _mm_add_epi32(s1, s2); + __m128i v = _mm_add_epi32(vb, t); + __m128i w = _mm_srai_epi32(v, e); + d = _mm_add_epi32(d, w); + _mm_store_si128((__m128i*)dp, d); + } + } + } + else if (a == -1 && b == 1 && e == 1) + { // 5/3 predict + int i = (int)h_width; + if (even) + for (; i > 0; i -= 4, sp += 4, dp += 4) + { + __m128i s1 = _mm_load_si128((__m128i*)sp); + __m128i s2 = _mm_loadu_si128((__m128i*)(sp + 1)); + __m128i d = _mm_load_si128((__m128i*)dp); + __m128i t = _mm_add_epi32(s1, s2); + __m128i w = _mm_srai_epi32(t, e); + d = _mm_sub_epi32(d, w); + _mm_store_si128((__m128i*)dp, d); + } + else + for (; i > 0; i -= 4, sp += 4, dp += 4) + { + __m128i s1 = _mm_load_si128((__m128i*)sp); + __m128i s2 = _mm_loadu_si128((__m128i*)(sp - 1)); + __m128i d = _mm_load_si128((__m128i*)dp); + __m128i t = _mm_add_epi32(s1, s2); + __m128i w = _mm_srai_epi32(t, e); + d = _mm_sub_epi32(d, w); + _mm_store_si128((__m128i*)dp, d); + } + } + else if (a == -1) + { // any case with a == -1, which is not 5/3 predict + int i = (int)h_width; + if (even) + for (; i > 0; i -= 4, sp += 4, dp += 4) + { + __m128i s1 = _mm_load_si128((__m128i*)sp); + __m128i s2 = _mm_loadu_si128((__m128i*)(sp + 1)); + __m128i d = _mm_load_si128((__m128i*)dp); + __m128i t = _mm_add_epi32(s1, s2); + __m128i v = _mm_sub_epi32(vb, t); + __m128i w = _mm_srai_epi32(v, e); + d = _mm_add_epi32(d, w); + _mm_store_si128((__m128i*)dp, d); + } + else + for (; i > 0; i -= 4, sp += 4, dp += 4) + { + __m128i s1 = _mm_load_si128((__m128i*)sp); + __m128i s2 = _mm_loadu_si128((__m128i*)(sp - 1)); + __m128i d = _mm_load_si128((__m128i*)dp); + __m128i t = _mm_add_epi32(s1, s2); + __m128i v = _mm_sub_epi32(vb, t); + __m128i w = _mm_srai_epi32(v, e); + d = _mm_add_epi32(d, w); + _mm_store_si128((__m128i*)dp, d); + } + } + else { + // general case + // 32bit multiplication is not supported in sse2; we need sse4.1, + // where we can use _mm_mullo_epi32, which multiplies + // 32bit x 32bit, keeping the LSBs + if (even) + for (ui32 i = h_width; i > 0; --i, sp++, dp++) + *dp += (b + a * (sp[0] + sp[1])) >> e; + else + for (ui32 i = h_width; i > 0; --i, sp++, dp++) + *dp += (b + a * (sp[-1] + sp[0])) >> e; + } + + // swap buffers + si32* t = lp; lp = hp; hp = t; + even = !even; + ui32 w = l_width; l_width = h_width; h_width = w; } } - else - { + else { if (even) - line_ldst->i32[0] = line_src->i32[0]; + ldst->i32[0] = src->i32[0]; else - line_hdst->i32[0] = line_src->i32[0] << 1; - } - } - - ////////////////////////////////////////////////////////////////////////// - void sse2_rev_vert_wvlt_bwd_predict(const line_buf* line_src1, - const line_buf* line_src2, - line_buf *line_dst, ui32 repeat) - { - si32 *dst = line_dst->i32; - const si32 *src1 = line_src1->i32, *src2 = line_src2->i32; - - for (ui32 i = (repeat + 3) >> 2; i > 0; --i, dst+=4, src1+=4, src2+=4) - { - __m128i s1 = _mm_load_si128((__m128i*)src1); - __m128i s2 = _mm_load_si128((__m128i*)src2); - __m128i d = _mm_load_si128((__m128i*)dst); - s1 = _mm_srai_epi32(_mm_add_epi32(s1, s2), 1); - d = _mm_add_epi32(d, s1); - _mm_store_si128((__m128i*)dst, d); + hdst->i32[0] = src->i32[0] << 1; } } - - ////////////////////////////////////////////////////////////////////////// - void sse2_rev_vert_wvlt_bwd_update(const line_buf* line_src1, - const line_buf* line_src2, - line_buf *line_dst, ui32 repeat) - { - si32 *dst = line_dst->i32; - const si32 *src1 = line_src1->i32, *src2 = line_src2->i32; - __m128i offset = _mm_set1_epi32(2); - for (ui32 i = (repeat + 3) >> 2; i > 0; --i, dst+=4, src1+=4, src2+=4) - { - __m128i s1 = _mm_load_si128((__m128i*)src1); - s1 = _mm_add_epi32(s1, offset); - __m128i s2 = _mm_load_si128((__m128i*)src2); - s2 = _mm_add_epi32(s2, s1); - __m128i d = _mm_load_si128((__m128i*)dst); - d = _mm_sub_epi32(d, _mm_srai_epi32(s2, 2)); - _mm_store_si128((__m128i*)dst, d); - } - } - ////////////////////////////////////////////////////////////////////////// - void sse2_rev_horz_wvlt_bwd_tx(line_buf *line_dst, line_buf *line_lsrc, - line_buf *line_hsrc, ui32 width, bool even) + void sse2_rev_horz_syn(const param_atk* atk, const line_buf* dst, + const line_buf* lsrc, const line_buf* hsrc, + ui32 width, bool even) { if (width > 1) { - si32 *lsrc = line_lsrc->i32, *hsrc = line_hsrc->i32; - si32 *dst = line_dst->i32; + bool ev = even; + si32* oth = hsrc->i32, * aug = lsrc->i32; + ui32 aug_width = (width + (even ? 1 : 0)) >> 1; // low pass + ui32 oth_width = (width + (even ? 0 : 1)) >> 1; // high pass + ui32 num_steps = atk->get_num_steps(); + for (ui32 j = 0; j < num_steps; ++j) + { + const lifting_step* s = atk->get_step(j); + const si32 a = s->rev.Aatk; + const si32 b = s->rev.Batk; + const ui32 e = s->rev.Eatk; + __m128i va = _mm_set1_epi32(a); + __m128i vb = _mm_set1_epi32(b); - const ui32 L_width = (width + (even ? 1 : 0)) >> 1; - const ui32 H_width = (width + (even ? 0 : 1)) >> 1; + // extension + oth[-1] = oth[0]; + oth[oth_width] = oth[oth_width - 1]; + // lifting step + const si32* sp = oth; + si32* dp = aug; + if (a == 1) + { // 5/3 update and any case with a == 1 + int i = (int)aug_width; + if (ev) + { + for (; i > 0; i -= 4, sp += 4, dp += 4) + { + __m128i s1 = _mm_load_si128((__m128i*)sp); + __m128i s2 = _mm_loadu_si128((__m128i*)(sp - 1)); + __m128i d = _mm_load_si128((__m128i*)dp); + __m128i t = _mm_add_epi32(s1, s2); + __m128i v = _mm_add_epi32(vb, t); + __m128i w = _mm_srai_epi32(v, e); + d = _mm_sub_epi32(d, w); + _mm_store_si128((__m128i*)dp, d); + } + } + else + { + for (; i > 0; i -= 4, sp += 4, dp += 4) + { + __m128i s1 = _mm_load_si128((__m128i*)sp); + __m128i s2 = _mm_loadu_si128((__m128i*)(sp + 1)); + __m128i d = _mm_load_si128((__m128i*)dp); + __m128i t = _mm_add_epi32(s1, s2); + __m128i v = _mm_add_epi32(vb, t); + __m128i w = _mm_srai_epi32(v, e); + d = _mm_sub_epi32(d, w); + _mm_store_si128((__m128i*)dp, d); + } + } + } + else if (a == -1 && b == 1 && e == 1) + { // 5/3 predict + int i = (int)aug_width; + if (ev) + for (; i > 0; i -= 4, sp += 4, dp += 4) + { + __m128i s1 = _mm_load_si128((__m128i*)sp); + __m128i s2 = _mm_loadu_si128((__m128i*)(sp - 1)); + __m128i d = _mm_load_si128((__m128i*)dp); + __m128i t = _mm_add_epi32(s1, s2); + __m128i w = _mm_srai_epi32(t, e); + d = _mm_add_epi32(d, w); + _mm_store_si128((__m128i*)dp, d); + } + else + for (; i > 0; i -= 4, sp += 4, dp += 4) + { + __m128i s1 = _mm_load_si128((__m128i*)sp); + __m128i s2 = _mm_loadu_si128((__m128i*)(sp + 1)); + __m128i d = _mm_load_si128((__m128i*)dp); + __m128i t = _mm_add_epi32(s1, s2); + __m128i w = _mm_srai_epi32(t, e); + d = _mm_add_epi32(d, w); + _mm_store_si128((__m128i*)dp, d); + } + } + else if (a == -1) + { // any case with a == -1, which is not 5/3 predict + int i = (int)aug_width; + if (ev) + for (; i > 0; i -= 4, sp += 4, dp += 4) + { + __m128i s1 = _mm_load_si128((__m128i*)sp); + __m128i s2 = _mm_loadu_si128((__m128i*)(sp - 1)); + __m128i d = _mm_load_si128((__m128i*)dp); + __m128i t = _mm_add_epi32(s1, s2); + __m128i v = _mm_sub_epi32(vb, t); + __m128i w = _mm_srai_epi32(v, e); + d = _mm_sub_epi32(d, w); + _mm_store_si128((__m128i*)dp, d); + } + else + for (; i > 0; i -= 4, sp += 4, dp += 4) + { + __m128i s1 = _mm_load_si128((__m128i*)sp); + __m128i s2 = _mm_loadu_si128((__m128i*)(sp + 1)); + __m128i d = _mm_load_si128((__m128i*)dp); + __m128i t = _mm_add_epi32(s1, s2); + __m128i v = _mm_sub_epi32(vb, t); + __m128i w = _mm_srai_epi32(v, e); + d = _mm_sub_epi32(d, w); + _mm_store_si128((__m128i*)dp, d); + } + } + else { + // general case + // 32bit multiplication is not supported in sse2; we need sse4.1, + // where we can use _mm_mullo_epi32, which multiplies + // 32bit x 32bit, keeping the LSBs + if (ev) + for (ui32 i = aug_width; i > 0; --i, sp++, dp++) + *dp -= (b + a * (sp[-1] + sp[0])) >> e; + else + for (ui32 i = aug_width; i > 0; --i, sp++, dp++) + *dp -= (b + a * (sp[0] + sp[1])) >> e; + } - // extension - hsrc[-1] = hsrc[0]; - hsrc[H_width] = hsrc[H_width-1]; - //inverse update - const si32 *sph = hsrc + (even ? 0 : 1); - si32 *spl = lsrc; - __m128i offset = _mm_set1_epi32(2); - for (ui32 i = (L_width + 3) >> 2; i > 0; --i, sph+=4, spl+=4) - { - __m128i s1 = _mm_loadu_si128((__m128i*)(sph-1)); - s1 = _mm_add_epi32(s1, offset); - __m128i s2 = _mm_loadu_si128((__m128i*)sph); - s2 = _mm_add_epi32(s2, s1); - __m128i d = _mm_load_si128((__m128i*)spl); - d = _mm_sub_epi32(d, _mm_srai_epi32(s2, 2)); - _mm_store_si128((__m128i*)spl, d); + // swap buffers + si32* t = aug; aug = oth; oth = t; + ev = !ev; + ui32 w = aug_width; aug_width = oth_width; oth_width = w; } - // extension - lsrc[-1] = lsrc[0]; - lsrc[L_width] = lsrc[L_width - 1]; - // inverse predict and combine - si32 *dp = dst + (even ? 0 : -1); - spl = lsrc + (even ? 0 : -1); - sph = hsrc; - ui32 width = L_width + (even ? 0 : 1); - for (ui32 i = (width + 3) >> 2; i > 0; --i, sph+=4, spl+=4, dp+=8) + // combine both lsrc and hsrc into dst { - __m128i s1 = _mm_loadu_si128((__m128i*)spl); - __m128i s2 = _mm_loadu_si128((__m128i*)(spl+1)); - __m128i d = _mm_load_si128((__m128i*)sph); - s2 = _mm_srai_epi32(_mm_add_epi32(s1, s2), 1); - d = _mm_add_epi32(d, s2); - _mm_storeu_si128((__m128i*)dp, _mm_unpacklo_epi32(s1, d)); - _mm_storeu_si128((__m128i*)dp + 1, _mm_unpackhi_epi32(s1, d)); + float* dp = dst->f32; + float* spl = lsrc->f32; + float* sph = hsrc->f32; + int w = (int)width; + SSE_INTERLEAVE(dp, spl, sph, w, even); } } - else - { + else { if (even) - line_dst->i32[0] = line_lsrc->i32[0]; + dst->i32[0] = lsrc->i32[0]; else - line_dst->i32[0] = line_hsrc->i32[0] >> 1; + dst->i32[0] = hsrc->i32[0] >> 1; } } - } -} + + } // !local +} // !ojph From d1f505f2869c600c31532a0ae48aacb377336296 Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Fri, 12 Apr 2024 21:42:34 +1000 Subject: [PATCH 065/348] Addresses compilation warnings. --- src/core/transform/ojph_transform_avx2.cpp | 6 +++--- src/core/transform/ojph_transform_sse2.cpp | 9 +++------ 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/src/core/transform/ojph_transform_avx2.cpp b/src/core/transform/ojph_transform_avx2.cpp index a7b16ddb..243fe87f 100644 --- a/src/core/transform/ojph_transform_avx2.cpp +++ b/src/core/transform/ojph_transform_avx2.cpp @@ -58,7 +58,7 @@ namespace ojph { { const si32 a = s->rev.Aatk; const si32 b = s->rev.Batk; - const ui32 e = s->rev.Eatk; + const si32 e = s->rev.Eatk; __m256i va = _mm256_set1_epi32(a); __m256i vb = _mm256_set1_epi32(b); @@ -206,7 +206,7 @@ namespace ojph { const lifting_step* s = atk->get_step(j - 1); const si32 a = s->rev.Aatk; const si32 b = s->rev.Batk; - const ui32 e = s->rev.Eatk; + const si32 e = s->rev.Eatk; __m256i va = _mm256_set1_epi32(a); __m256i vb = _mm256_set1_epi32(b); @@ -364,7 +364,7 @@ namespace ojph { const lifting_step* s = atk->get_step(j); const si32 a = s->rev.Aatk; const si32 b = s->rev.Batk; - const ui32 e = s->rev.Eatk; + const si32 e = s->rev.Eatk; __m256i va = _mm256_set1_epi32(a); __m256i vb = _mm256_set1_epi32(b); diff --git a/src/core/transform/ojph_transform_sse2.cpp b/src/core/transform/ojph_transform_sse2.cpp index 4939a219..8328842a 100644 --- a/src/core/transform/ojph_transform_sse2.cpp +++ b/src/core/transform/ojph_transform_sse2.cpp @@ -58,8 +58,7 @@ namespace ojph { { const si32 a = s->rev.Aatk; const si32 b = s->rev.Batk; - const ui32 e = s->rev.Eatk; - __m128i va = _mm_set1_epi32(a); + const si32 e = s->rev.Eatk; __m128i vb = _mm_set1_epi32(b); si32* dst = aug->i32; @@ -188,8 +187,7 @@ namespace ojph { const lifting_step* s = atk->get_step(j - 1); const si32 a = s->rev.Aatk; const si32 b = s->rev.Batk; - const ui32 e = s->rev.Eatk; - __m128i va = _mm_set1_epi32(a); + const si32 e = s->rev.Eatk; __m128i vb = _mm_set1_epi32(b); // extension @@ -328,8 +326,7 @@ namespace ojph { const lifting_step* s = atk->get_step(j); const si32 a = s->rev.Aatk; const si32 b = s->rev.Batk; - const ui32 e = s->rev.Eatk; - __m128i va = _mm_set1_epi32(a); + const si32 e = s->rev.Eatk; __m128i vb = _mm_set1_epi32(b); // extension From 1c4a14ce94a3fcd2073318eb86027106033a396b Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Sat, 13 Apr 2024 09:45:20 +1000 Subject: [PATCH 066/348] avx512 dwt implemented --- src/core/CMakeLists.txt | 9 +- src/core/common/ojph_arch.h | 6 +- src/core/transform/ojph_transform.cpp | 24 +- src/core/transform/ojph_transform_avx.cpp | 9 +- src/core/transform/ojph_transform_avx2.cpp | 2 - src/core/transform/ojph_transform_avx512.cpp | 830 +++++++++++++++++++ src/core/transform/ojph_transform_local.h | 48 +- 7 files changed, 855 insertions(+), 73 deletions(-) create mode 100644 src/core/transform/ojph_transform_avx512.cpp diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 40b9649b..19123a2e 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -18,11 +18,12 @@ file(GLOB TRANSFORM_SSE "transform/*_sse.cpp") file(GLOB TRANSFORM_SSE2 "transform/*_sse2.cpp") file(GLOB TRANSFORM_AVX "transform/*_avx.cpp") file(GLOB TRANSFORM_AVX2 "transform/*_avx2.cpp") +file(GLOB TRANSFORM_AVX512 "transform/*_avx512.cpp") file(GLOB TRANSFORM_WASM "transform/*_wasm.cpp") list(REMOVE_ITEM CODESTREAM ${CODESTREAM_SSE} ${CODESTREAM_SSE2} ${CODESTREAM_AVX} ${CODESTREAM_AVX2} ${CODESTREAM_WASM}) list(REMOVE_ITEM CODING ${CODING_SSSE3} ${CODING_WASM} ${CODING_AVX512}) -list(REMOVE_ITEM TRANSFORM ${TRANSFORM_SSE} ${TRANSFORM_SSE2} ${TRANSFORM_AVX} ${TRANSFORM_AVX2} ${TRANSFORM_WASM}) +list(REMOVE_ITEM TRANSFORM ${TRANSFORM_SSE} ${TRANSFORM_SSE2} ${TRANSFORM_AVX} ${TRANSFORM_AVX2} ${TRANSFORM_AVX512} ${TRANSFORM_WASM}) list(APPEND SOURCES ${CODESTREAM} ${CODING} ${COMMON} ${OTHERS} ${TRANSFORM}) source_group("codestream" FILES ${CODESTREAM}) @@ -42,10 +43,10 @@ if(EMSCRIPTEN) source_group("coding" FILES ${CODING_WASM}) source_group("transform" FILES ${TRANSFORM_WASM}) elseif(NOT OJPH_DISABLE_INTEL_SIMD) - add_library(openjph ${SOURCES} ${CODESTREAM_SSE} ${CODESTREAM_SSE2} ${CODESTREAM_AVX} ${CODESTREAM_AVX2} ${CODING_SSSE3} ${TRANSFORM_SSE} ${TRANSFORM_SSE2} ${TRANSFORM_AVX} ${TRANSFORM_AVX2}) + add_library(openjph ${SOURCES} ${CODESTREAM_SSE} ${CODESTREAM_SSE2} ${CODESTREAM_AVX} ${CODESTREAM_AVX2} ${CODING_SSSE3} ${TRANSFORM_SSE} ${TRANSFORM_SSE2} ${TRANSFORM_AVX} ${TRANSFORM_AVX2} ${TRANSFORM_AVX512}) source_group("codestream" FILES ${CODESTREAM_SSE} ${CODESTREAM_SSE2} ${CODESTREAM_AVX} ${CODESTREAM_AVX2}) source_group("coding" FILES ${CODING_SSSE3}) - source_group("transform" FILES ${TRANSFORM_SSE} ${TRANSFORM_SSE2} ${TRANSFORM_AVX} ${TRANSFORM_AVX2}) + source_group("transform" FILES ${TRANSFORM_SSE} ${TRANSFORM_SSE2} ${TRANSFORM_AVX} ${TRANSFORM_AVX2} ${TRANSFORM_AVX512}) if (OJPH_ENABLE_INTEL_AVX512) target_sources(openjph PRIVATE ${CODING_AVX512}) source_group("coding" FILES ${CODING_AVX512}) @@ -71,6 +72,7 @@ if (MSVC) set_source_files_properties(transform/ojph_colour_avx2.cpp PROPERTIES COMPILE_FLAGS "/arch:AVX2") set_source_files_properties(transform/ojph_transform_avx.cpp PROPERTIES COMPILE_FLAGS "/arch:AVX") set_source_files_properties(transform/ojph_transform_avx2.cpp PROPERTIES COMPILE_FLAGS "/arch:AVX2") + set_source_files_properties(transform/ojph_transform_avx512.cpp PROPERTIES COMPILE_FLAGS "/arch:AVX512") else() set_source_files_properties(codestream/ojph_codestream_avx.cpp PROPERTIES COMPILE_FLAGS -mavx) set_source_files_properties(codestream/ojph_codestream_avx2.cpp PROPERTIES COMPILE_FLAGS -mavx2) @@ -80,6 +82,7 @@ else() set_source_files_properties(transform/ojph_colour_avx2.cpp PROPERTIES COMPILE_FLAGS -mavx2) set_source_files_properties(transform/ojph_transform_avx.cpp PROPERTIES COMPILE_FLAGS -mavx) set_source_files_properties(transform/ojph_transform_avx2.cpp PROPERTIES COMPILE_FLAGS -mavx2) + set_source_files_properties(transform/ojph_transform_avx512.cpp PROPERTIES COMPILE_FLAGS -mavx512f) endif() if (MSVC) diff --git a/src/core/common/ojph_arch.h b/src/core/common/ojph_arch.h index 62b630bb..fa9d077d 100644 --- a/src/core/common/ojph_arch.h +++ b/src/core/common/ojph_arch.h @@ -194,11 +194,7 @@ namespace ojph { //////////////////////////////////////////////////////////////////////////// // constants //////////////////////////////////////////////////////////////////////////// -#ifdef OJPH_ENABLE_INTEL_AVX512 - const ui32 byte_alignment = 64; //64 bytes == 512 bits -#else - const ui32 byte_alignment = 32; //32 bytes == 256 bits -#endif + const ui32 byte_alignment = 64; // 64 bytes == 512 bits const ui32 log_byte_alignment = 31 - count_leading_zeros(byte_alignment); const ui32 object_alignment = 8; diff --git a/src/core/transform/ojph_transform.cpp b/src/core/transform/ojph_transform.cpp index 95ab686c..83eed644 100644 --- a/src/core/transform/ojph_transform.cpp +++ b/src/core/transform/ojph_transform.cpp @@ -145,17 +145,19 @@ namespace ojph { rev_horz_syn = avx2_rev_horz_syn; } - //if (level >= X86_CPU_EXT_LEVEL_AVX512) - //{ - // rev_vert_step = avx512_rev_vert_ana_step; - // rev_horz_ana = avx512_rev_horz_ana; - // rev_horz_syn = avx512_rev_horz_syn; - - // irv_vert_step = avx512_irv_vert_step; - // irv_vert_times_K = avx512_irv_vert_times_K; - // irv_vert_syn_step = avx512_irv_vert_syn_step; - // irv_horz_syn = avx512_irv_horz_syn; - //} +#ifdef OJPH_ENABLE_INTEL_AVX512 + if (level >= X86_CPU_EXT_LEVEL_AVX512) + { + rev_vert_step = avx512_rev_vert_step; + rev_horz_ana = avx512_rev_horz_ana; + rev_horz_syn = avx512_rev_horz_syn; + + irv_vert_step = avx512_irv_vert_step; + irv_vert_times_K = avx512_irv_vert_times_K; + irv_horz_ana = avx512_irv_horz_ana; + irv_horz_syn = avx512_irv_horz_syn; + } +#endif // !OJPH_ENABLE_INTEL_AVX512 #endif // !OJPH_DISABLE_INTEL_SIMD diff --git a/src/core/transform/ojph_transform_avx.cpp b/src/core/transform/ojph_transform_avx.cpp index e7933ff1..08566624 100644 --- a/src/core/transform/ojph_transform_avx.cpp +++ b/src/core/transform/ojph_transform_avx.cpp @@ -88,14 +88,7 @@ namespace ojph { ////////////////////////////////////////////////////////////////////////// void avx_irv_vert_times_K(float K, const line_buf* aug, ui32 repeat) { - __m256 factor = _mm256_set1_ps(K); - float* dst = aug->f32; - int i = (int)repeat; - for (; i > 0; i -= 8, dst += 8) - { - __m256 s = _mm256_load_ps(dst); - _mm256_store_ps(dst, _mm256_mul_ps(factor, s)); - } + avx_multiply_const(aug->f32, K, (int)repeat); } ///////////////////////////////////////////////////////////////////////// diff --git a/src/core/transform/ojph_transform_avx2.cpp b/src/core/transform/ojph_transform_avx2.cpp index 243fe87f..847cd4c4 100644 --- a/src/core/transform/ojph_transform_avx2.cpp +++ b/src/core/transform/ojph_transform_avx2.cpp @@ -514,7 +514,5 @@ namespace ojph { } } - - } // !local } // !ojph diff --git a/src/core/transform/ojph_transform_avx512.cpp b/src/core/transform/ojph_transform_avx512.cpp new file mode 100644 index 00000000..efb7655a --- /dev/null +++ b/src/core/transform/ojph_transform_avx512.cpp @@ -0,0 +1,830 @@ +//***************************************************************************/ +// This software is released under the 2-Clause BSD license, included +// below. +// +// Copyright (c) 2019, Aous Naman +// Copyright (c) 2019, Kakadu Software Pty Ltd, Australia +// Copyright (c) 2019, The University of New South Wales, Australia +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************/ +// This file is part of the OpenJPH software implementation. +// File: ojph_transform_avx2.cpp +// Author: Aous Naman +// Date: 28 August 2019 +//***************************************************************************/ + +#include + +#include "ojph_defs.h" +#include "ojph_arch.h" +#include "ojph_mem.h" +#include "ojph_params.h" +#include "../codestream/ojph_params_local.h" + +#include "ojph_transform.h" +#include "ojph_transform_local.h" + +#include + +namespace ojph { + namespace local { + + ////////////////////////////////////////////////////////////////////////// + // We split multiples of 32 followed by multiples of 16, because + // we assume byte_alignment == 64 + static void avx512_deinterleave(float* dpl, float* dph, float* sp, + int width, bool even) + { + __m512i idx1 = _mm512_set_epi32( + 0x1E, 0x1C, 0x1A, 0x18, 0x16, 0x14, 0x12, 0x10, + 0x0E, 0x0C, 0x0A, 0x08, 0x06, 0x04, 0x02, 0x00 + ); + __m512i idx2 = _mm512_set_epi32( + 0x1F, 0x1D, 0x1B, 0x19, 0x17, 0x15, 0x13, 0x11, + 0x0F, 0x0D, 0x0B, 0x09, 0x07, 0x05, 0x03, 0x01 + ); + if (even) + { + for (; width > 16; width -= 32, sp += 32, dpl += 16, dph += 16) + { + __m512 a = _mm512_load_ps(sp); + __m512 b = _mm512_load_ps(sp + 16); + __m512 c = _mm512_permutex2var_ps(a, idx1, b); + __m512 d = _mm512_permutex2var_ps(a, idx2, b); + _mm512_store_ps(dpl, c); + _mm512_store_ps(dph, d); + } + for (; width > 0; width -= 16, sp += 16, dpl += 8, dph += 8) + { + __m256 a = _mm256_load_ps(sp); + __m256 b = _mm256_load_ps(sp + 8); + __m256 c = _mm256_permute2f128_ps(a, b, (2 << 4) | (0)); + __m256 d = _mm256_permute2f128_ps(a, b, (3 << 4) | (1)); + __m256 e = _mm256_shuffle_ps(c, d, _MM_SHUFFLE(2, 0, 2, 0)); + __m256 f = _mm256_shuffle_ps(c, d, _MM_SHUFFLE(3, 1, 3, 1)); + _mm256_store_ps(dpl, e); + _mm256_store_ps(dph, f); + } + } + else + { + for (; width > 16; width -= 32, sp += 32, dpl += 16, dph += 16) + { + __m512 a = _mm512_load_ps(sp); + __m512 b = _mm512_load_ps(sp + 16); + __m512 c = _mm512_permutex2var_ps(a, idx2, b); + __m512 d = _mm512_permutex2var_ps(a, idx1, b); + _mm512_store_ps(dpl, c); + _mm512_store_ps(dph, d); + } + for (; width > 0; width -= 16, sp += 16, dpl += 8, dph += 8) + { + __m256 a = _mm256_load_ps(sp); + __m256 b = _mm256_load_ps(sp + 8); + __m256 c = _mm256_permute2f128_ps(a, b, (2 << 4) | (0)); + __m256 d = _mm256_permute2f128_ps(a, b, (3 << 4) | (1)); + __m256 e = _mm256_shuffle_ps(c, d, _MM_SHUFFLE(2, 0, 2, 0)); + __m256 f = _mm256_shuffle_ps(c, d, _MM_SHUFFLE(3, 1, 3, 1)); + _mm256_store_ps(dpl, f); + _mm256_store_ps(dph, e); + } + } + } + + ////////////////////////////////////////////////////////////////////////// + // We split multiples of 32 followed by multiples of 16, because + // we assume byte_alignment == 64 + static void avx512_interleave(float* dp, float* spl, float* sph, + int width, bool even) + { + __m512i idx1 = _mm512_set_epi32( + 0x17, 0x7, 0x16, 0x6, 0x15, 0x5, 0x14, 0x4, + 0x13, 0x3, 0x12, 0x2, 0x11, 0x1, 0x10, 0x0 + ); + __m512i idx2 = _mm512_set_epi32( + 0x1F, 0xF, 0x1E, 0xE, 0x1D, 0xD, 0x1C, 0xC, + 0x1B, 0xB, 0x1A, 0xA, 0x19, 0x9, 0x18, 0x8 + ); + if (even) + { + for (; width > 16; width -= 32, dp += 32, spl += 16, sph += 16) + { + __m512 a = _mm512_load_ps(spl); + __m512 b = _mm512_load_ps(sph); + __m512 c = _mm512_permutex2var_ps(a, idx1, b); + __m512 d = _mm512_permutex2var_ps(a, idx2, b); + _mm512_store_ps(dp, c); + _mm512_store_ps(dp + 16, d); + } + for (; width > 0; width -= 16, dp += 16, spl += 8, sph += 8) + { + __m256 a = _mm256_load_ps(spl); + __m256 b = _mm256_load_ps(sph); + __m256 c = _mm256_unpacklo_ps(a, b); + __m256 d = _mm256_unpackhi_ps(a, b); + __m256 e = _mm256_permute2f128_ps(c, d, (2 << 4) | (0)); + __m256 f = _mm256_permute2f128_ps(c, d, (3 << 4) | (1)); + _mm256_store_ps(dp, e); + _mm256_store_ps(dp + 8, f); + } + } + else + { + for (; width > 16; width -= 32, dp += 32, spl += 16, sph += 16) + { + __m512 a = _mm512_load_ps(spl); + __m512 b = _mm512_load_ps(sph); + __m512 c = _mm512_permutex2var_ps(b, idx1, a); + __m512 d = _mm512_permutex2var_ps(b, idx2, a); + _mm512_store_ps(dp, c); + _mm512_store_ps(dp + 16, d); + } + for (; width > 0; width -= 16, dp += 16, spl += 8, sph += 8) + { + __m256 a = _mm256_load_ps(spl); + __m256 b = _mm256_load_ps(sph); + __m256 c = _mm256_unpacklo_ps(b, a); + __m256 d = _mm256_unpackhi_ps(b, a); + __m256 e = _mm256_permute2f128_ps(c, d, (2 << 4) | (0)); + __m256 f = _mm256_permute2f128_ps(c, d, (3 << 4) | (1)); + _mm256_store_ps(dp, e); + _mm256_store_ps(dp + 8, f); + } + } + } + + ////////////////////////////////////////////////////////////////////////// + static inline void avx512_multiply_const(float* p, float f, int width) + { + __m512 factor = _mm512_set1_ps(f); + for (; width > 0; width -= 16, p += 16) + { + __m512 s = _mm512_load_ps(p); + _mm512_store_ps(p, _mm512_mul_ps(factor, s)); + } + } + + ////////////////////////////////////////////////////////////////////////// + void avx512_irv_vert_step(const lifting_step* s, const line_buf* sig, + const line_buf* other, const line_buf* aug, + ui32 repeat, bool synthesis) + { + float a = s->irv.Aatk; + if (synthesis) + a = -a; + + __m512 factor = _mm512_set1_ps(a); + + float* dst = aug->f32; + const float* src1 = sig->f32, * src2 = other->f32; + int i = (int)repeat; + for ( ; i > 0; i -= 16, dst += 16, src1 += 16, src2 += 16) + { + __m512 s1 = _mm512_load_ps(src1); + __m512 s2 = _mm512_load_ps(src2); + __m512 d = _mm512_load_ps(dst); + d = _mm512_add_ps(d, _mm512_mul_ps(factor, _mm512_add_ps(s1, s2))); + _mm512_store_ps(dst, d); + } + } + + ////////////////////////////////////////////////////////////////////////// + void avx512_irv_vert_times_K(float K, const line_buf* aug, ui32 repeat) + { + avx512_multiply_const(aug->f32, K, (int)repeat); + } + + ///////////////////////////////////////////////////////////////////////// + void avx512_irv_horz_ana(const param_atk* atk, const line_buf* ldst, + const line_buf* hdst, const line_buf* src, + ui32 width, bool even) + { + if (width > 1) + { + // split src into ldst and hdst + { + float* dpl = ldst->f32; + float* dph = hdst->f32; + float* sp = src->f32; + int w = (int)width; + AVX_DEINTERLEAVE(dpl, dph, sp, w, even); + } + + // the actual horizontal transform + float* hp = hdst->f32, * lp = ldst->f32; + ui32 l_width = (width + (even ? 1 : 0)) >> 1; // low pass + ui32 h_width = (width + (even ? 0 : 1)) >> 1; // high pass + ui32 num_steps = atk->get_num_steps(); + for (ui32 j = num_steps; j > 0; --j) + { + const lifting_step* s = atk->get_step(j - 1); + const float a = s->irv.Aatk; + + // extension + lp[-1] = lp[0]; + lp[l_width] = lp[l_width - 1]; + // lifting step + const float* sp = lp; + float* dp = hp; + int i = (int)h_width; + __m512 f = _mm512_set1_ps(a); + if (even) + { + for (; i > 0; i -= 16, sp += 16, dp += 16) + { + __m512 m = _mm512_load_ps(sp); + __m512 n = _mm512_loadu_ps(sp + 1); + __m512 p = _mm512_load_ps(dp); + p = _mm512_add_ps(p, _mm512_mul_ps(f, _mm512_add_ps(m, n))); + _mm512_store_ps(dp, p); + } + } + else + { + for (; i > 0; i -= 16, sp += 16, dp += 16) + { + __m512 m = _mm512_load_ps(sp); + __m512 n = _mm512_loadu_ps(sp - 1); + __m512 p = _mm512_load_ps(dp); + p = _mm512_add_ps(p, _mm512_mul_ps(f, _mm512_add_ps(m, n))); + _mm512_store_ps(dp, p); + } + } + + // swap buffers + float* t = lp; lp = hp; hp = t; + even = !even; + ui32 w = l_width; l_width = h_width; h_width = w; + } + + { // multiply by K or 1/K + float K = atk->get_K(); + float K_inv = 1.0f / K; + avx512_multiply_const(lp, K_inv, (int)l_width); + avx512_multiply_const(hp, K, (int)h_width); + } + } + else { + if (even) + ldst->f32[0] = src->f32[0]; + else + hdst->f32[0] = src->f32[0] * 2.0f; + } + } + + ////////////////////////////////////////////////////////////////////////// + void avx512_irv_horz_syn(const param_atk* atk, const line_buf* dst, + const line_buf* lsrc, const line_buf* hsrc, + ui32 width, bool even) + { + if (width > 1) + { + bool ev = even; + float* oth = hsrc->f32, * aug = lsrc->f32; + ui32 aug_width = (width + (even ? 1 : 0)) >> 1; // low pass + ui32 oth_width = (width + (even ? 0 : 1)) >> 1; // high pass + + { // multiply by K or 1/K + float K = atk->get_K(); + float K_inv = 1.0f / K; + avx512_multiply_const(aug, K, (int)aug_width); + avx512_multiply_const(oth, K_inv, (int)oth_width); + } + + // the actual horizontal transform + ui32 num_steps = atk->get_num_steps(); + for (ui32 j = 0; j < num_steps; ++j) + { + const lifting_step* s = atk->get_step(j); + const float a = s->irv.Aatk; + + // extension + oth[-1] = oth[0]; + oth[oth_width] = oth[oth_width - 1]; + // lifting step + const float* sp = oth; + float* dp = aug; + int i = (int)aug_width; + __m512 f = _mm512_set1_ps(a); + if (ev) + { + for (; i > 0; i -= 16, sp += 16, dp += 16) + { + __m512 m = _mm512_load_ps(sp); + __m512 n = _mm512_loadu_ps(sp - 1); + __m512 p = _mm512_load_ps(dp); + p = _mm512_sub_ps(p, _mm512_mul_ps(f, _mm512_add_ps(m, n))); + _mm512_store_ps(dp, p); + } + } + else + { + for (; i > 0; i -= 16, sp += 16, dp += 16) + { + __m512 m = _mm512_load_ps(sp); + __m512 n = _mm512_loadu_ps(sp + 1); + __m512 p = _mm512_load_ps(dp); + p = _mm512_sub_ps(p, _mm512_mul_ps(f, _mm512_add_ps(m, n))); + _mm512_store_ps(dp, p); + } + } + + // swap buffers + float* t = aug; aug = oth; oth = t; + ev = !ev; + ui32 w = aug_width; aug_width = oth_width; oth_width = w; + } + + // combine both lsrc and hsrc into dst + avx512_interleave(dst->f32, lsrc->f32, hsrc->f32, (int)width, even); + } + else { + if (even) + dst->f32[0] = lsrc->f32[0]; + else + dst->f32[0] = hsrc->f32[0] * 0.5f; + } + } + + + ///////////////////////////////////////////////////////////////////////// + void avx512_rev_vert_step(const lifting_step* s, const line_buf* sig, + const line_buf* other, const line_buf* aug, + ui32 repeat, bool synthesis) + { + const si32 a = s->rev.Aatk; + const si32 b = s->rev.Batk; + const si32 e = s->rev.Eatk; + __m512i va = _mm512_set1_epi32(a); + __m512i vb = _mm512_set1_epi32(b); + + si32* dst = aug->i32; + const si32* src1 = sig->i32, * src2 = other->i32; + // The general definition of the wavelet in Part 2 is slightly + // different to part 2, although they are mathematically equivalent + // here, we identify the simpler form from Part 1 and employ them + if (a == 1) + { // 5/3 update and any case with a == 1 + int i = (int)repeat; + if (synthesis) + for (; i > 0; i -= 16, dst += 16, src1 += 16, src2 += 16) + { + __m512i s1 = _mm512_load_si512((__m512i*)src1); + __m512i s2 = _mm512_load_si512((__m512i*)src2); + __m512i d = _mm512_load_si512((__m512i*)dst); + __m512i t = _mm512_add_epi32(s1, s2); + __m512i v = _mm512_add_epi32(vb, t); + __m512i w = _mm512_srai_epi32(v, e); + d = _mm512_sub_epi32(d, w); + _mm512_store_si512((__m512i*)dst, d); + } + else + for (; i > 0; i -= 16, dst += 16, src1 += 16, src2 += 16) + { + __m512i s1 = _mm512_load_si512((__m512i*)src1); + __m512i s2 = _mm512_load_si512((__m512i*)src2); + __m512i d = _mm512_load_si512((__m512i*)dst); + __m512i t = _mm512_add_epi32(s1, s2); + __m512i v = _mm512_add_epi32(vb, t); + __m512i w = _mm512_srai_epi32(v, e); + d = _mm512_add_epi32(d, w); + _mm512_store_si512((__m512i*)dst, d); + } + } + else if (a == -1 && b == 1 && e == 1) + { // 5/3 predict + int i = (int)repeat; + if (synthesis) + for (; i > 0; i -= 16, dst += 16, src1 += 16, src2 += 16) + { + __m512i s1 = _mm512_load_si512((__m512i*)src1); + __m512i s2 = _mm512_load_si512((__m512i*)src2); + __m512i d = _mm512_load_si512((__m512i*)dst); + __m512i t = _mm512_add_epi32(s1, s2); + __m512i w = _mm512_srai_epi32(t, e); + d = _mm512_add_epi32(d, w); + _mm512_store_si512((__m512i*)dst, d); + } + else + for (; i > 0; i -= 16, dst += 16, src1 += 16, src2 += 16) + { + __m512i s1 = _mm512_load_si512((__m512i*)src1); + __m512i s2 = _mm512_load_si512((__m512i*)src2); + __m512i d = _mm512_load_si512((__m512i*)dst); + __m512i t = _mm512_add_epi32(s1, s2); + __m512i w = _mm512_srai_epi32(t, e); + d = _mm512_sub_epi32(d, w); + _mm512_store_si512((__m512i*)dst, d); + } + } + else if (a == -1) + { // any case with a == -1, which is not 5/3 predict + int i = (int)repeat; + if (synthesis) + for (; i > 0; i -= 16, dst += 16, src1 += 16, src2 += 16) + { + __m512i s1 = _mm512_load_si512((__m512i*)src1); + __m512i s2 = _mm512_load_si512((__m512i*)src2); + __m512i d = _mm512_load_si512((__m512i*)dst); + __m512i t = _mm512_add_epi32(s1, s2); + __m512i v = _mm512_sub_epi32(vb, t); + __m512i w = _mm512_srai_epi32(v, e); + d = _mm512_sub_epi32(d, w); + _mm512_store_si512((__m512i*)dst, d); + } + else + for (; i > 0; i -= 16, dst += 16, src1 += 16, src2 += 16) + { + __m512i s1 = _mm512_load_si512((__m512i*)src1); + __m512i s2 = _mm512_load_si512((__m512i*)src2); + __m512i d = _mm512_load_si512((__m512i*)dst); + __m512i t = _mm512_add_epi32(s1, s2); + __m512i v = _mm512_sub_epi32(vb, t); + __m512i w = _mm512_srai_epi32(v, e); + d = _mm512_add_epi32(d, w); + _mm512_store_si512((__m512i*)dst, d); + } + } + else { // general case + int i = (int)repeat; + if (synthesis) + for (; i > 0; i -= 16, dst += 16, src1 += 16, src2 += 16) + { + __m512i s1 = _mm512_load_si512((__m512i*)src1); + __m512i s2 = _mm512_load_si512((__m512i*)src2); + __m512i d = _mm512_load_si512((__m512i*)dst); + __m512i t = _mm512_add_epi32(s1, s2); + __m512i u = _mm512_mullo_epi32(va, t); + __m512i v = _mm512_add_epi32(vb, u); + __m512i w = _mm512_srai_epi32(v, e); + d = _mm512_sub_epi32(d, w); + _mm512_store_si512((__m512i*)dst, d); + } + else + for (; i > 0; i -= 16, dst += 16, src1 += 16, src2 += 16) + { + __m512i s1 = _mm512_load_si512((__m512i*)src1); + __m512i s2 = _mm512_load_si512((__m512i*)src2); + __m512i d = _mm512_load_si512((__m512i*)dst); + __m512i t = _mm512_add_epi32(s1, s2); + __m512i u = _mm512_mullo_epi32(va, t); + __m512i v = _mm512_add_epi32(vb, u); + __m512i w = _mm512_srai_epi32(v, e); + d = _mm512_add_epi32(d, w); + _mm512_store_si512((__m512i*)dst, d); + } + } + } + + ///////////////////////////////////////////////////////////////////////// + void avx512_rev_horz_ana(const param_atk* atk, const line_buf* ldst, + const line_buf* hdst, const line_buf* src, + ui32 width, bool even) + { + if (width > 1) + { + // combine both lsrc and hsrc into dst + { + float* dpl = ldst->f32; + float* dph = hdst->f32; + float* sp = src->f32; + int w = (int)width; + AVX_DEINTERLEAVE(dpl, dph, sp, w, even); + } + + si32* hp = hdst->i32, * lp = ldst->i32; + ui32 l_width = (width + (even ? 1 : 0)) >> 1; // low pass + ui32 h_width = (width + (even ? 0 : 1)) >> 1; // high pass + ui32 num_steps = atk->get_num_steps(); + for (ui32 j = num_steps; j > 0; --j) + { + // first lifting step + const lifting_step* s = atk->get_step(j - 1); + const si32 a = s->rev.Aatk; + const si32 b = s->rev.Batk; + const si32 e = s->rev.Eatk; + __m512i va = _mm512_set1_epi32(a); + __m512i vb = _mm512_set1_epi32(b); + + // extension + lp[-1] = lp[0]; + lp[l_width] = lp[l_width - 1]; + // lifting step + const si32* sp = lp; + si32* dp = hp; + if (a == 1) + { // 5/3 update and any case with a == 1 + int i = (int)h_width; + if (even) + { + for (; i > 0; i -= 16, sp += 16, dp += 16) + { + __m512i s1 = _mm512_load_si512((__m512i*)sp); + __m512i s2 = _mm512_loadu_si512((__m512i*)(sp + 1)); + __m512i d = _mm512_load_si512((__m512i*)dp); + __m512i t = _mm512_add_epi32(s1, s2); + __m512i v = _mm512_add_epi32(vb, t); + __m512i w = _mm512_srai_epi32(v, e); + d = _mm512_add_epi32(d, w); + _mm512_store_si512((__m512i*)dp, d); + } + } + else + { + for (; i > 0; i -= 16, sp += 16, dp += 16) + { + __m512i s1 = _mm512_load_si512((__m512i*)sp); + __m512i s2 = _mm512_loadu_si512((__m512i*)(sp - 1)); + __m512i d = _mm512_load_si512((__m512i*)dp); + __m512i t = _mm512_add_epi32(s1, s2); + __m512i v = _mm512_add_epi32(vb, t); + __m512i w = _mm512_srai_epi32(v, e); + d = _mm512_add_epi32(d, w); + _mm512_store_si512((__m512i*)dp, d); + } + } + } + else if (a == -1 && b == 1 && e == 1) + { // 5/3 predict + int i = (int)h_width; + if (even) + for (; i > 0; i -= 16, sp += 16, dp += 16) + { + __m512i s1 = _mm512_load_si512((__m512i*)sp); + __m512i s2 = _mm512_loadu_si512((__m512i*)(sp + 1)); + __m512i d = _mm512_load_si512((__m512i*)dp); + __m512i t = _mm512_add_epi32(s1, s2); + __m512i w = _mm512_srai_epi32(t, e); + d = _mm512_sub_epi32(d, w); + _mm512_store_si512((__m512i*)dp, d); + } + else + for (; i > 0; i -= 16, sp += 16, dp += 16) + { + __m512i s1 = _mm512_load_si512((__m512i*)sp); + __m512i s2 = _mm512_loadu_si512((__m512i*)(sp - 1)); + __m512i d = _mm512_load_si512((__m512i*)dp); + __m512i t = _mm512_add_epi32(s1, s2); + __m512i w = _mm512_srai_epi32(t, e); + d = _mm512_sub_epi32(d, w); + _mm512_store_si512((__m512i*)dp, d); + } + } + else if (a == -1) + { // any case with a == -1, which is not 5/3 predict + int i = (int)h_width; + if (even) + for (; i > 0; i -= 16, sp += 16, dp += 16) + { + __m512i s1 = _mm512_load_si512((__m512i*)sp); + __m512i s2 = _mm512_loadu_si512((__m512i*)(sp + 1)); + __m512i d = _mm512_load_si512((__m512i*)dp); + __m512i t = _mm512_add_epi32(s1, s2); + __m512i v = _mm512_sub_epi32(vb, t); + __m512i w = _mm512_srai_epi32(v, e); + d = _mm512_add_epi32(d, w); + _mm512_store_si512((__m512i*)dp, d); + } + else + for (; i > 0; i -= 16, sp += 16, dp += 16) + { + __m512i s1 = _mm512_load_si512((__m512i*)sp); + __m512i s2 = _mm512_loadu_si512((__m512i*)(sp - 1)); + __m512i d = _mm512_load_si512((__m512i*)dp); + __m512i t = _mm512_add_epi32(s1, s2); + __m512i v = _mm512_sub_epi32(vb, t); + __m512i w = _mm512_srai_epi32(v, e); + d = _mm512_add_epi32(d, w); + _mm512_store_si512((__m512i*)dp, d); + } + } + else { + // general case + int i = (int)h_width; + if (even) + for (; i > 0; i -= 16, sp += 16, dp += 16) + { + __m512i s1 = _mm512_load_si512((__m512i*)sp); + __m512i s2 = _mm512_loadu_si512((__m512i*)(sp + 1)); + __m512i d = _mm512_load_si512((__m512i*)dp); + __m512i t = _mm512_add_epi32(s1, s2); + __m512i u = _mm512_mullo_epi32(va, t); + __m512i v = _mm512_add_epi32(vb, u); + __m512i w = _mm512_srai_epi32(v, e); + d = _mm512_add_epi32(d, w); + _mm512_store_si512((__m512i*)dp, d); + } + else + for (; i > 0; i -= 16, sp += 16, dp += 16) + { + __m512i s1 = _mm512_load_si512((__m512i*)sp); + __m512i s2 = _mm512_loadu_si512((__m512i*)(sp - 1)); + __m512i d = _mm512_load_si512((__m512i*)dp); + __m512i t = _mm512_add_epi32(s1, s2); + __m512i u = _mm512_mullo_epi32(va, t); + __m512i v = _mm512_add_epi32(vb, u); + __m512i w = _mm512_srai_epi32(v, e); + d = _mm512_add_epi32(d, w); + _mm512_store_si512((__m512i*)dp, d); + } + } + + // swap buffers + si32* t = lp; lp = hp; hp = t; + even = !even; + ui32 w = l_width; l_width = h_width; h_width = w; + } + } + else { + if (even) + ldst->i32[0] = src->i32[0]; + else + hdst->i32[0] = src->i32[0] << 1; + } + } + + ////////////////////////////////////////////////////////////////////////// + void avx512_rev_horz_syn(const param_atk* atk, const line_buf* dst, + const line_buf* lsrc, const line_buf* hsrc, + ui32 width, bool even) + { + if (width > 1) + { + bool ev = even; + si32* oth = hsrc->i32, * aug = lsrc->i32; + ui32 aug_width = (width + (even ? 1 : 0)) >> 1; // low pass + ui32 oth_width = (width + (even ? 0 : 1)) >> 1; // high pass + ui32 num_steps = atk->get_num_steps(); + for (ui32 j = 0; j < num_steps; ++j) + { + const lifting_step* s = atk->get_step(j); + const si32 a = s->rev.Aatk; + const si32 b = s->rev.Batk; + const si32 e = s->rev.Eatk; + __m512i va = _mm512_set1_epi32(a); + __m512i vb = _mm512_set1_epi32(b); + + // extension + oth[-1] = oth[0]; + oth[oth_width] = oth[oth_width - 1]; + // lifting step + const si32* sp = oth; + si32* dp = aug; + if (a == 1) + { // 5/3 update and any case with a == 1 + int i = (int)aug_width; + if (ev) + { + for (; i > 0; i -= 16, sp += 16, dp += 16) + { + __m512i s1 = _mm512_load_si512((__m512i*)sp); + __m512i s2 = _mm512_loadu_si512((__m512i*)(sp - 1)); + __m512i d = _mm512_load_si512((__m512i*)dp); + __m512i t = _mm512_add_epi32(s1, s2); + __m512i v = _mm512_add_epi32(vb, t); + __m512i w = _mm512_srai_epi32(v, e); + d = _mm512_sub_epi32(d, w); + _mm512_store_si512((__m512i*)dp, d); + } + } + else + { + for (; i > 0; i -= 16, sp += 16, dp += 16) + { + __m512i s1 = _mm512_load_si512((__m512i*)sp); + __m512i s2 = _mm512_loadu_si512((__m512i*)(sp + 1)); + __m512i d = _mm512_load_si512((__m512i*)dp); + __m512i t = _mm512_add_epi32(s1, s2); + __m512i v = _mm512_add_epi32(vb, t); + __m512i w = _mm512_srai_epi32(v, e); + d = _mm512_sub_epi32(d, w); + _mm512_store_si512((__m512i*)dp, d); + } + } + } + else if (a == -1 && b == 1 && e == 1) + { // 5/3 predict + int i = (int)aug_width; + if (ev) + for (; i > 0; i -= 16, sp += 16, dp += 16) + { + __m512i s1 = _mm512_load_si512((__m512i*)sp); + __m512i s2 = _mm512_loadu_si512((__m512i*)(sp - 1)); + __m512i d = _mm512_load_si512((__m512i*)dp); + __m512i t = _mm512_add_epi32(s1, s2); + __m512i w = _mm512_srai_epi32(t, e); + d = _mm512_add_epi32(d, w); + _mm512_store_si512((__m512i*)dp, d); + } + else + for (; i > 0; i -= 16, sp += 16, dp += 16) + { + __m512i s1 = _mm512_load_si512((__m512i*)sp); + __m512i s2 = _mm512_loadu_si512((__m512i*)(sp + 1)); + __m512i d = _mm512_load_si512((__m512i*)dp); + __m512i t = _mm512_add_epi32(s1, s2); + __m512i w = _mm512_srai_epi32(t, e); + d = _mm512_add_epi32(d, w); + _mm512_store_si512((__m512i*)dp, d); + } + } + else if (a == -1) + { // any case with a == -1, which is not 5/3 predict + int i = (int)aug_width; + if (ev) + for (; i > 0; i -= 16, sp += 16, dp += 16) + { + __m512i s1 = _mm512_load_si512((__m512i*)sp); + __m512i s2 = _mm512_loadu_si512((__m512i*)(sp - 1)); + __m512i d = _mm512_load_si512((__m512i*)dp); + __m512i t = _mm512_add_epi32(s1, s2); + __m512i v = _mm512_sub_epi32(vb, t); + __m512i w = _mm512_srai_epi32(v, e); + d = _mm512_sub_epi32(d, w); + _mm512_store_si512((__m512i*)dp, d); + } + else + for (; i > 0; i -= 16, sp += 16, dp += 16) + { + __m512i s1 = _mm512_load_si512((__m512i*)sp); + __m512i s2 = _mm512_loadu_si512((__m512i*)(sp + 1)); + __m512i d = _mm512_load_si512((__m512i*)dp); + __m512i t = _mm512_add_epi32(s1, s2); + __m512i v = _mm512_sub_epi32(vb, t); + __m512i w = _mm512_srai_epi32(v, e); + d = _mm512_sub_epi32(d, w); + _mm512_store_si512((__m512i*)dp, d); + } + } + else { + // general case + int i = (int)aug_width; + if (ev) + for (; i > 0; i -= 16, sp += 16, dp += 16) + { + __m512i s1 = _mm512_load_si512((__m512i*)sp); + __m512i s2 = _mm512_loadu_si512((__m512i*)(sp - 1)); + __m512i d = _mm512_load_si512((__m512i*)dp); + __m512i t = _mm512_add_epi32(s1, s2); + __m512i u = _mm512_mullo_epi32(va, t); + __m512i v = _mm512_add_epi32(vb, u); + __m512i w = _mm512_srai_epi32(v, e); + d = _mm512_sub_epi32(d, w); + _mm512_store_si512((__m512i*)dp, d); + } + else + for (; i > 0; i -= 16, sp += 16, dp += 16) + { + __m512i s1 = _mm512_load_si512((__m512i*)sp); + __m512i s2 = _mm512_loadu_si512((__m512i*)(sp + 1)); + __m512i d = _mm512_load_si512((__m512i*)dp); + __m512i t = _mm512_add_epi32(s1, s2); + __m512i u = _mm512_mullo_epi32(va, t); + __m512i v = _mm512_add_epi32(vb, u); + __m512i w = _mm512_srai_epi32(v, e); + d = _mm512_sub_epi32(d, w); + _mm512_store_si512((__m512i*)dp, d); + } + } + + // swap buffers + si32* t = aug; aug = oth; oth = t; + ev = !ev; + ui32 w = aug_width; aug_width = oth_width; oth_width = w; + } + + // combine both lsrc and hsrc into dst + avx512_interleave(dst->f32, lsrc->f32, hsrc->f32, (int)width, even); + } + else { + if (even) + dst->i32[0] = lsrc->i32[0]; + else + dst->i32[0] = hsrc->i32[0] >> 1; + } + } + + } // !local +} // !ojph diff --git a/src/core/transform/ojph_transform_local.h b/src/core/transform/ojph_transform_local.h index 3ba9e6d0..ec2a2e12 100644 --- a/src/core/transform/ojph_transform_local.h +++ b/src/core/transform/ojph_transform_local.h @@ -221,13 +221,11 @@ namespace ojph { ////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////// - // We split multiples of 16 followed by multiples of 8, because - // we assume byte_alignment == 32 #define AVX_DEINTERLEAVE(dpl, dph, sp, width, even) \ { \ if (even) \ { \ - for (; width > 8; width -= 16, sp += 16, dpl += 8, dph += 8) \ + for (; width > 0; width -= 16, sp += 16, dpl += 8, dph += 8) \ { \ __m256 a = _mm256_load_ps(sp); \ __m256 b = _mm256_load_ps(sp + 8); \ @@ -238,19 +236,10 @@ namespace ojph { _mm256_store_ps(dpl, e); \ _mm256_store_ps(dph, f); \ } \ - for (; width > 0; width -= 8, sp += 8, dpl += 4, dph += 4) \ - { \ - __m128 a = _mm_load_ps(sp); \ - __m128 b = _mm_load_ps(sp + 4); \ - __m128 c = _mm_shuffle_ps(a, b, _MM_SHUFFLE(2, 0, 2, 0)); \ - __m128 d = _mm_shuffle_ps(a, b, _MM_SHUFFLE(3, 1, 3, 1)); \ - _mm_store_ps(dpl, c); \ - _mm_store_ps(dph, d); \ - } \ } \ else \ { \ - for (; width > 8; width -= 16, sp += 16, dpl += 8, dph += 8) \ + for (; width > 0; width -= 16, sp += 16, dpl += 8, dph += 8) \ { \ __m256 a = _mm256_load_ps(sp); \ __m256 b = _mm256_load_ps(sp + 8); \ @@ -261,26 +250,15 @@ namespace ojph { _mm256_store_ps(dpl, f); \ _mm256_store_ps(dph, e); \ } \ - for (; width > 0; width -= 8, sp += 8, dpl += 4, dph += 4) \ - { \ - __m128 a = _mm_load_ps(sp); \ - __m128 b = _mm_load_ps(sp + 4); \ - __m128 c = _mm_shuffle_ps(a, b, _MM_SHUFFLE(2, 0, 2, 0)); \ - __m128 d = _mm_shuffle_ps(a, b, _MM_SHUFFLE(3, 1, 3, 1)); \ - _mm_store_ps(dpl, d); \ - _mm_store_ps(dph, c); \ - } \ } \ } ////////////////////////////////////////////////////////////////////////// - // We split multiples of 16 followed by multiples of 8, because - // we assume byte_alignment == 32 #define AVX_INTERLEAVE(dp, spl, sph, width, even) \ { \ if (even) \ { \ - for (; width > 8; width -= 16, dp += 16, spl += 8, sph += 8) \ + for (; width > 0; width -= 16, dp += 16, spl += 8, sph += 8) \ { \ __m256 a = _mm256_load_ps(spl); \ __m256 b = _mm256_load_ps(sph); \ @@ -291,19 +269,10 @@ namespace ojph { _mm256_store_ps(dp, e); \ _mm256_store_ps(dp + 8, f); \ } \ - for (; width > 0; width -= 8, dp += 8, spl += 4, sph += 4) \ - { \ - __m128 a = _mm_load_ps(spl); \ - __m128 b = _mm_load_ps(sph); \ - __m128 c = _mm_unpacklo_ps(a, b); \ - __m128 d = _mm_unpackhi_ps(a, b); \ - _mm_store_ps(dp, c); \ - _mm_store_ps(dp + 4, d); \ - } \ } \ else \ { \ - for (; width > 8; width -= 16, dp += 16, spl += 8, sph += 8) \ + for (; width > 0; width -= 16, dp += 16, spl += 8, sph += 8) \ { \ __m256 a = _mm256_load_ps(spl); \ __m256 b = _mm256_load_ps(sph); \ @@ -314,15 +283,6 @@ namespace ojph { _mm256_store_ps(dp, e); \ _mm256_store_ps(dp + 8, f); \ } \ - for (; width > 0; width -= 8, dp += 8, spl += 4, sph += 4) \ - { \ - __m128 a = _mm_load_ps(spl); \ - __m128 b = _mm_load_ps(sph); \ - __m128 c = _mm_unpacklo_ps(b, a); \ - __m128 d = _mm_unpackhi_ps(b, a); \ - _mm_store_ps(dp, c); \ - _mm_store_ps(dp + 4, d); \ - } \ } \ } From 30b32cc67f61f3aa63d9fb99b3d87cbd04c72bfa Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Sat, 13 Apr 2024 09:51:18 +1000 Subject: [PATCH 067/348] Fix compilation, and a missing optimization. --- src/core/transform/ojph_transform_avx512.cpp | 22 +++++--------------- 1 file changed, 5 insertions(+), 17 deletions(-) diff --git a/src/core/transform/ojph_transform_avx512.cpp b/src/core/transform/ojph_transform_avx512.cpp index efb7655a..02edca60 100644 --- a/src/core/transform/ojph_transform_avx512.cpp +++ b/src/core/transform/ojph_transform_avx512.cpp @@ -224,13 +224,7 @@ namespace ojph { if (width > 1) { // split src into ldst and hdst - { - float* dpl = ldst->f32; - float* dph = hdst->f32; - float* sp = src->f32; - int w = (int)width; - AVX_DEINTERLEAVE(dpl, dph, sp, w, even); - } + avx512_deinterleave(ldst->f32, hdst->f32, src->f32, (int)width, even); // the actual horizontal transform float* hp = hdst->f32, * lp = ldst->f32; @@ -376,7 +370,7 @@ namespace ojph { { const si32 a = s->rev.Aatk; const si32 b = s->rev.Batk; - const si32 e = s->rev.Eatk; + const ui32 e = s->rev.Eatk; __m512i va = _mm512_set1_epi32(a); __m512i vb = _mm512_set1_epi32(b); @@ -506,13 +500,7 @@ namespace ojph { if (width > 1) { // combine both lsrc and hsrc into dst - { - float* dpl = ldst->f32; - float* dph = hdst->f32; - float* sp = src->f32; - int w = (int)width; - AVX_DEINTERLEAVE(dpl, dph, sp, w, even); - } + avx512_deinterleave(ldst->f32, hdst->f32, src->f32, (int)width, even); si32* hp = hdst->i32, * lp = ldst->i32; ui32 l_width = (width + (even ? 1 : 0)) >> 1; // low pass @@ -524,7 +512,7 @@ namespace ojph { const lifting_step* s = atk->get_step(j - 1); const si32 a = s->rev.Aatk; const si32 b = s->rev.Batk; - const si32 e = s->rev.Eatk; + const ui32 e = s->rev.Eatk; __m512i va = _mm512_set1_epi32(a); __m512i vb = _mm512_set1_epi32(b); @@ -682,7 +670,7 @@ namespace ojph { const lifting_step* s = atk->get_step(j); const si32 a = s->rev.Aatk; const si32 b = s->rev.Batk; - const si32 e = s->rev.Eatk; + const ui32 e = s->rev.Eatk; __m512i va = _mm512_set1_epi32(a); __m512i vb = _mm512_set1_epi32(b); From f28a90fce49edf94d44865add3299650058ebe6d Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Sat, 13 Apr 2024 17:57:45 +1000 Subject: [PATCH 068/348] Wasm completed -- not tested yet. --- src/apps/ojph_compress/CMakeLists.txt | 2 +- src/apps/ojph_expand/CMakeLists.txt | 2 +- src/core/CMakeLists.txt | 2 +- src/core/transform/ojph_transform.cpp | 18 +- src/core/transform/ojph_transform_avx512.cpp | 10 +- src/core/transform/ojph_transform_wasm.cpp | 957 +++++++++++-------- tests/CMakeLists.txt | 16 +- 7 files changed, 596 insertions(+), 411 deletions(-) diff --git a/src/apps/ojph_compress/CMakeLists.txt b/src/apps/ojph_compress/CMakeLists.txt index bbb77abc..dadcca9b 100644 --- a/src/apps/ojph_compress/CMakeLists.txt +++ b/src/apps/ojph_compress/CMakeLists.txt @@ -17,7 +17,7 @@ source_group("others" FILES ${OJPH_IMG_IO}) source_group("common" FILES ${OJPH_IMG_IO_H}) if(EMSCRIPTEN) - add_compile_options(-std=c++11 -O3 -fexceptions -DOJPH_DISABLE_INTEL_SIMD) + add_compile_options(-std=c++11 -O3 -fexceptions) add_executable(ojph_compress ${SOURCES}) add_executable(ojph_compress_simd ${SOURCES} ${OJPH_IMG_IO_SSE4}) target_compile_options(ojph_compress_simd PRIVATE -DOJPH_ENABLE_WASM_SIMD -msimd128 -msse4.1) diff --git a/src/apps/ojph_expand/CMakeLists.txt b/src/apps/ojph_expand/CMakeLists.txt index c0ac185e..d4b65523 100644 --- a/src/apps/ojph_expand/CMakeLists.txt +++ b/src/apps/ojph_expand/CMakeLists.txt @@ -17,7 +17,7 @@ source_group("others" FILES ${OJPH_IMG_IO}) source_group("common" FILES ${OJPH_IMG_IO_H}) if(EMSCRIPTEN) - add_compile_options(-std=c++11 -O3 -fexceptions -DOJPH_DISABLE_INTEL_SIMD) + add_compile_options(-std=c++11 -O3 -fexceptions) add_executable(ojph_expand ${SOURCES}) add_executable(ojph_expand_simd ${SOURCES} ${OJPH_IMG_IO_SSE4}) target_compile_options(ojph_expand_simd PRIVATE -DOJPH_ENABLE_WASM_SIMD -msimd128 -msse4.1) diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 19123a2e..40fffa48 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -33,7 +33,7 @@ source_group("others" FILES ${OTHERS}) source_group("transform" FILES ${TRANSFORM}) if(EMSCRIPTEN) - add_compile_options(-std=c++11 -O3 -fexceptions -DOJPH_DISABLE_INTEL_SIMD) + add_compile_options(-std=c++11 -O3 -fexceptions) add_library(openjph ${SOURCES}) add_library(openjphsimd ${SOURCES} ${CODESTREAM_WASM} ${CODING_WASM} ${TRANSFORM_WASM}) target_include_directories(openjph PUBLIC common) diff --git a/src/core/transform/ojph_transform.cpp b/src/core/transform/ojph_transform.cpp index 83eed644..0dc5f95c 100644 --- a/src/core/transform/ojph_transform.cpp +++ b/src/core/transform/ojph_transform.cpp @@ -162,16 +162,14 @@ namespace ojph { #endif // !OJPH_DISABLE_INTEL_SIMD #else // OJPH_ENABLE_WASM_SIMD - rev_vert_ana_step = wasm_rev_vert_ana_step; - rev_horz_ana = wasm_rev_horz_ana; - rev_vert_syn_step = wasm_rev_vert_syn_step; - rev_horz_syn = wasm_rev_horz_syn; - - irv_vert_ana_step = wasm_irv_vert_ana_step; - irv_horz_ana = wasm_irv_horz_ana; - irv_vert_syn_step = wasm_irv_vert_syn_step; - irv_horz_syn = wasm_irv_horz_syn; - irv_vert_times_K = wasm_irv_vert_times_K; + rev_vert_step = wasm_rev_vert_step; + rev_horz_ana = wasm_rev_horz_ana; + rev_horz_syn = wasm_rev_horz_syn; + + irv_vert_step = wasm_irv_vert_step; + irv_vert_times_K = wasm_irv_vert_times_K; + irv_horz_ana = wasm_irv_horz_ana; + irv_horz_syn = wasm_irv_horz_syn; #endif // !OJPH_ENABLE_WASM_SIMD wavelet_transform_functions_initialized = true; diff --git a/src/core/transform/ojph_transform_avx512.cpp b/src/core/transform/ojph_transform_avx512.cpp index 02edca60..504aa870 100644 --- a/src/core/transform/ojph_transform_avx512.cpp +++ b/src/core/transform/ojph_transform_avx512.cpp @@ -2,9 +2,9 @@ // This software is released under the 2-Clause BSD license, included // below. // -// Copyright (c) 2019, Aous Naman -// Copyright (c) 2019, Kakadu Software Pty Ltd, Australia -// Copyright (c) 2019, The University of New South Wales, Australia +// Copyright (c) 2019-2024, Aous Naman +// Copyright (c) 2019-2024, Kakadu Software Pty Ltd, Australia +// Copyright (c) 2019-2024, The University of New South Wales, Australia // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are @@ -30,9 +30,9 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. //***************************************************************************/ // This file is part of the OpenJPH software implementation. -// File: ojph_transform_avx2.cpp +// File: ojph_transform_avx512.cpp // Author: Aous Naman -// Date: 28 August 2019 +// Date: 13 April 2024 //***************************************************************************/ #include diff --git a/src/core/transform/ojph_transform_wasm.cpp b/src/core/transform/ojph_transform_wasm.cpp index 8f48e352..7b9ffb10 100644 --- a/src/core/transform/ojph_transform_wasm.cpp +++ b/src/core/transform/ojph_transform_wasm.cpp @@ -41,6 +41,9 @@ #include "ojph_defs.h" #include "ojph_arch.h" #include "ojph_mem.h" +#include "ojph_params.h" +#include "../codestream/ojph_params_local.h" + #include "ojph_transform.h" #include "ojph_transform_local.h" @@ -48,473 +51,645 @@ namespace ojph { namespace local { ////////////////////////////////////////////////////////////////////////// - void wasm_rev_vert_wvlt_fwd_predict(const line_buf* line_src1, - const line_buf* line_src2, - line_buf *line_dst, ui32 repeat) - { - si32 *dst = line_dst->i32; - const si32 *src1 = line_src1->i32, *src2 = line_src2->i32; - - for (ui32 i = (repeat + 3) >> 2; i > 0; --i, dst+=4, src1+=4, src2+=4) - { - v128_t s1 = wasm_v128_load(src1); - v128_t s2 = wasm_v128_load(src2); - v128_t d = wasm_v128_load(dst); - s1 = wasm_i32x4_shr(wasm_i32x4_add(s1, s2), 1); - d = wasm_i32x4_sub(d, s1); - wasm_v128_store(dst, d); - } - } - - ////////////////////////////////////////////////////////////////////////// - void wasm_rev_vert_wvlt_fwd_update(const line_buf* line_src1, - const line_buf* line_src2, - line_buf *line_dst, ui32 repeat) + void wasm_deinterleave(float* dpl, float* dph, float* sp, + int width, bool even) { - si32 *dst = line_dst->i32; - const si32 *src1 = line_src1->i32, *src2 = line_src2->i32; - - v128_t offset = wasm_i32x4_splat(2); - for (ui32 i = (repeat + 3) >> 2; i > 0; --i, dst+=4, src1+=4, src2+=4) - { - v128_t s1 = wasm_v128_load(src1); - s1 = wasm_i32x4_add(s1, offset); - v128_t s2 = wasm_v128_load(src2); - s2 = wasm_i32x4_add(s2, s1); - v128_t d = wasm_v128_load(dst); - d = wasm_i32x4_add(d, wasm_i32x4_shr(s2, 2)); - wasm_v128_store(dst, d); - } + if (even) + for (; width > 0; width -= 8, sp += 8, dpl += 4, dph += 4) + { + v128_t a = wasm_v128_load(sp); + v128_t b = wasm_v128_load(sp + 4); + v128_t c = wasm_i32x4_shuffle(a, b, 0, 2, 4 + 0, 4 + 2); + v128_t d = wasm_i32x4_shuffle(a, b, 1, 3, 4 + 1, 4 + 3); + // v128_t c = _mm_shuffle_ps(a, b, _MM_SHUFFLE(2, 0, 2, 0)); + // v128_t d = _mm_shuffle_ps(a, b, _MM_SHUFFLE(3, 1, 3, 1)); + wasm_v128_store(dpl, c); + wasm_v128_store(dph, d); + } + else + for (; width > 0; width -= 8, sp += 8, dpl += 4, dph += 4) + { + v128_t a = wasm_v128_load(sp); + v128_t b = wasm_v128_load(sp + 4); + v128_t c = wasm_i32x4_shuffle(a, b, 0, 2, 4 + 0, 4 + 2); + v128_t d = wasm_i32x4_shuffle(a, b, 1, 3, 4 + 1, 4 + 3); + // v128_t c = _mm_shuffle_ps(a, b, _MM_SHUFFLE(2, 0, 2, 0)); + // v128_t d = _mm_shuffle_ps(a, b, _MM_SHUFFLE(3, 1, 3, 1)); + wasm_v128_store(dpl, d); + wasm_v128_store(dph, c); + } } ////////////////////////////////////////////////////////////////////////// - void wasm_rev_horz_wvlt_fwd_tx(line_buf *line_src, line_buf *line_ldst, - line_buf *line_hdst, ui32 width, bool even) + void wasm_interleave(float* dp, float* spl, float* sph, + int width, bool even) { - if (width > 1) - { - si32 *src = line_src->i32; - si32 *ldst = line_ldst->i32, *hdst = line_hdst->i32; - - const ui32 L_width = (width + (even ? 1 : 0)) >> 1; - const ui32 H_width = (width + (even ? 0 : 1)) >> 1; - - // extension - src[-1] = src[1]; - src[width] = src[width-2]; - // predict - const si32* sp = src + (even ? 1 : 0); - si32 *dph = hdst; - for (ui32 i = (H_width + 3) >> 2; i > 0; --i, dph+=4) - { //this is doing twice the work it needs to do - //it can be definitely written better - v128_t s1 = wasm_v128_load(sp - 1); - v128_t s2 = wasm_v128_load(sp + 1); - v128_t d = wasm_v128_load(sp); - s1 = wasm_i32x4_shr(wasm_i32x4_add(s1, s2), 1); - v128_t d1 = wasm_i32x4_sub(d, s1); - sp += 4; - s1 = wasm_v128_load(sp - 1); - s2 = wasm_v128_load(sp + 1); - d = wasm_v128_load(sp); - s1 = wasm_i32x4_shr(wasm_i32x4_add(s1, s2), 1); - v128_t d2 = wasm_i32x4_sub(d, s1); - sp += 4; - d = wasm_i32x4_shuffle(d1, d2, 0, 2, 4, 6); - wasm_v128_store(dph, d); - } - - // extension - hdst[-1] = hdst[0]; - hdst[H_width] = hdst[H_width-1]; - // update - sp = src + (even ? 0 : 1); - const si32* sph = hdst + (even ? 0 : 1); - si32 *dpl = ldst; - v128_t offset = wasm_i32x4_splat(2); - for (ui32 i = (L_width + 3) >> 2; i > 0; --i, sp+=8, sph+=4, dpl+=4) + if (even) + for (; width > 0; width -= 8, dp += 8, spl += 4, sph += 4) { - v128_t s1 = wasm_v128_load(sph - 1); - s1 = wasm_i32x4_add(s1, offset); - v128_t s2 = wasm_v128_load(sph); - s2 = wasm_i32x4_add(s2, s1); - v128_t d1 = wasm_v128_load(sp); - v128_t d2 = wasm_v128_load(sp + 4); - v128_t d = wasm_i32x4_shuffle(d1, d2, 0, 2, 4, 6); - d = wasm_i32x4_add(d, wasm_i32x4_shr(s2, 2)); - wasm_v128_store(dpl, d); + v128_t a = wasm_v128_load(spl); + v128_t b = wasm_v128_load(sph); + v128_t c = wasm_i32x4_shuffle(a, b, 0, 4 + 0, 1, 4 + 1); + v128_t d = wasm_i32x4_shuffle(a, b, 2, 4 + 2, 3, 4 + 3); + // v128_t c = _mm_unpacklo_ps(a, b); + // v128_t d = _mm_unpackhi_ps(a, b); + wasm_v128_store(dp, c); + wasm_v128_store(dp + 4, d); } - } else - { - if (even) - line_ldst->i32[0] = line_src->i32[0]; - else - line_hdst->i32[0] = line_src->i32[0] << 1; - } + for (; width > 0; width -= 8, dp += 8, spl += 4, sph += 4) + { + v128_t a = wasm_v128_load(spl); + v128_t b = wasm_v128_load(sph); + v128_t c = wasm_i32x4_shuffle(b, a, 0, 4 + 0, 1, 4 + 1); + v128_t d = wasm_i32x4_shuffle(b, a, 2, 4 + 2, 3, 4 + 3); + // v128_t c = _mm_unpacklo_ps(b, a); + // v128_t d = _mm_unpackhi_ps(b, a); + wasm_v128_store(dp, c); + wasm_v128_store(dp + 4, d); + } } ////////////////////////////////////////////////////////////////////////// - void wasm_rev_vert_wvlt_bwd_predict(const line_buf *line_src1, - const line_buf *line_src2, - line_buf *line_dst, ui32 repeat) + static inline void wasm_multiply_const(float* p, float f, int width) { - si32 *dst = line_dst->i32; - const si32 *src1 = line_src1->i32, *src2 = line_src2->i32; - - for (ui32 i = (repeat + 3) >> 2; i > 0; --i, dst+=4, src1+=4, src2+=4) + v128_t factor = wasm_f32x4_splat(f); + for (; width > 0; width -= 4, p += 4) { - v128_t s1 = wasm_v128_load(src1); - v128_t s2 = wasm_v128_load(src2); - v128_t d = wasm_v128_load(dst); - s1 = wasm_i32x4_shr(wasm_i32x4_add(s1, s2), 1); - d = wasm_i32x4_add(d, s1); - wasm_v128_store(dst, d); + v128_t s = wasm_v128_load(p); + wasm_v128_store(p, wasm_f32x4_mul(factor, s)); } } ////////////////////////////////////////////////////////////////////////// - void wasm_rev_vert_wvlt_bwd_update(const line_buf *line_src1, - const line_buf *line_src2, - line_buf *line_dst, ui32 repeat) + void wasm_irv_vert_step(const lifting_step* s, const line_buf* sig, + const line_buf* other, const line_buf* aug, + ui32 repeat, bool synthesis) { - si32 *dst = line_dst->i32; - const si32 *src1 = line_src1->i32, *src2 = line_src2->i32; - - v128_t offset = wasm_i32x4_splat(2); - for (ui32 i = (repeat + 3) >> 2; i > 0; --i, dst+=4, src1+=4, src2+=4) + float a = s->irv.Aatk; + if (synthesis) + a = -a; + + v128_t factor = wasm_f32x4_splat(a); + + float* dst = aug->f32; + const float* src1 = sig->f32, * src2 = other->f32; + int i = (int)repeat; + for ( ; i > 0; i -= 4, dst += 4, src1 += 4, src2 += 4) { v128_t s1 = wasm_v128_load(src1); - s1 = wasm_i32x4_add(s1, offset); v128_t s2 = wasm_v128_load(src2); - s2 = wasm_i32x4_add(s2, s1); - v128_t d = wasm_v128_load(dst); - d = wasm_i32x4_sub(d, wasm_i32x4_shr(s2, 2)); + v128_t d = wasm_v128_load(dst); + d = wasm_f32x4_add(d, wasm_f32x4_mul(factor, wasm_f32x4_add(s1, s2))); wasm_v128_store(dst, d); } } ////////////////////////////////////////////////////////////////////////// - void wasm_rev_horz_wvlt_bwd_tx(line_buf *line_dst, line_buf *line_lsrc, - line_buf *line_hsrc, ui32 width, bool even) + void wasm_irv_vert_times_K(float K, const line_buf* aug, ui32 repeat) + { + wasm_multiply_const(aug->f32, K, (int)repeat); + } + + ///////////////////////////////////////////////////////////////////////// + void wasm_irv_horz_ana(const param_atk* atk, const line_buf* ldst, + const line_buf* hdst, const line_buf* src, + ui32 width, bool even) { if (width > 1) { - si32 *lsrc = line_lsrc->i32, *hsrc = line_hsrc->i32; - si32 *dst = line_dst->i32; - - const ui32 L_width = (width + (even ? 1 : 0)) >> 1; - const ui32 H_width = (width + (even ? 0 : 1)) >> 1; + // split src into ldst and hdst + wasm_deinterleave(ldst->f32, hdst->f32, src->f32, (int)width, even); - // extension - hsrc[-1] = hsrc[0]; - hsrc[H_width] = hsrc[H_width-1]; - //inverse update - const si32 *sph = hsrc + (even ? 0 : 1); - si32 *spl = lsrc; - v128_t offset = wasm_i32x4_splat(2); - for (ui32 i = (L_width + 3) >> 2; i > 0; --i, sph+=4, spl+=4) + // the actual horizontal transform + float* hp = hdst->f32, * lp = ldst->f32; + ui32 l_width = (width + (even ? 1 : 0)) >> 1; // low pass + ui32 h_width = (width + (even ? 0 : 1)) >> 1; // high pass + ui32 num_steps = atk->get_num_steps(); + for (ui32 j = num_steps; j > 0; --j) { - v128_t s1 = wasm_v128_load(sph - 1); - s1 = wasm_i32x4_add(s1, offset); - v128_t s2 = wasm_v128_load(sph); - s2 = wasm_i32x4_add(s2, s1); - v128_t d = wasm_v128_load(spl); - d = wasm_i32x4_sub(d, wasm_i32x4_shr(s2, 2)); - wasm_v128_store(spl, d); + const lifting_step* s = atk->get_step(j - 1); + const float a = s->irv.Aatk; + + // extension + lp[-1] = lp[0]; + lp[l_width] = lp[l_width - 1]; + // lifting step + const float* sp = lp; + float* dp = hp; + int i = (int)h_width; + v128_t f = wasm_f32x4_splat(a); + if (even) + { + for (; i > 0; i -= 4, sp += 4, dp += 4) + { + v128_t m = wasm_v128_load(sp); + v128_t n = wasm_v128_load(sp + 1); + v128_t p = wasm_v128_load(dp); + p = wasm_f32x4_add(p, wasm_f32x4_mul(f, wasm_f32x4_add(m, n))); + wasm_v128_store(dp, p); + } + } + else + { + for (; i > 0; i -= 4, sp += 4, dp += 4) + { + v128_t m = wasm_v128_load(sp); + v128_t n = wasm_v128_load(sp - 1); + v128_t p = wasm_v128_load(dp); + p = wasm_f32x4_add(p, wasm_f32x4_mul(f, wasm_f32x4_add(m, n))); + wasm_v128_store(dp, p); + } + } + + // swap buffers + float* t = lp; lp = hp; hp = t; + even = !even; + ui32 w = l_width; l_width = h_width; h_width = w; } - // extension - lsrc[-1] = lsrc[0]; - lsrc[L_width] = lsrc[L_width - 1]; - // inverse predict and combine - si32 *dp = dst + (even ? 0 : -1); - spl = lsrc + (even ? 0 : -1); - sph = hsrc; - ui32 width = L_width + (even ? 0 : 1); - for (ui32 i = (width + 3) >> 2; i > 0; --i, sph+=4, spl+=4, dp+=8) - { - v128_t s1 = wasm_v128_load(spl); - v128_t s2 = wasm_v128_load(spl + 1); - v128_t d = wasm_v128_load(sph); - s2 = wasm_i32x4_shr(wasm_i32x4_add(s1, s2), 1); - d = wasm_i32x4_add(d, s2); - wasm_v128_store(dp, wasm_i32x4_shuffle(s1, d, 0, 4, 1, 5)); - wasm_v128_store(dp + 4, wasm_i32x4_shuffle(s1, d, 2, 6, 3, 7)); + { // multiply by K or 1/K + float K = atk->get_K(); + float K_inv = 1.0f / K; + wasm_multiply_const(lp, K_inv, (int)l_width); + wasm_multiply_const(hp, K, (int)h_width); } } - else - { + else { if (even) - line_dst->i32[0] = line_lsrc->i32[0]; + ldst->f32[0] = src->f32[0]; else - line_dst->i32[0] = line_hsrc->i32[0] >> 1; + hdst->f32[0] = src->f32[0] * 2.0f; } } ////////////////////////////////////////////////////////////////////////// - void wasm_irrev_vert_wvlt_step(const line_buf *line_src1, - const line_buf *line_src2, - line_buf *line_dst, int step_num, - ui32 repeat) + void wasm_irv_horz_syn(const param_atk* atk, const line_buf* dst, + const line_buf* lsrc, const line_buf* hsrc, + ui32 width, bool even) { - float *dst = line_dst->f32; - const float *src1 = line_src1->f32, *src2 = line_src2->f32; - - v128_t factor = wasm_f32x4_splat(LIFTING_FACTORS::steps[step_num]); - for (ui32 i = (repeat + 3) >> 2; i > 0; --i, dst+=4, src1+=4, src2+=4) + if (width > 1) { - v128_t s1 = wasm_v128_load(src1); - v128_t s2 = wasm_v128_load(src2); - v128_t d = wasm_v128_load(dst); - d = wasm_f32x4_add(d, wasm_f32x4_mul(factor, wasm_f32x4_add(s1, s2))); - wasm_v128_store(dst, d); + bool ev = even; + float* oth = hsrc->f32, * aug = lsrc->f32; + ui32 aug_width = (width + (even ? 1 : 0)) >> 1; // low pass + ui32 oth_width = (width + (even ? 0 : 1)) >> 1; // high pass + + { // multiply by K or 1/K + float K = atk->get_K(); + float K_inv = 1.0f / K; + wasm_multiply_const(aug, K, (int)aug_width); + wasm_multiply_const(oth, K_inv, (int)oth_width); + } + + // the actual horizontal transform + ui32 num_steps = atk->get_num_steps(); + for (ui32 j = 0; j < num_steps; ++j) + { + const lifting_step* s = atk->get_step(j); + const float a = s->irv.Aatk; + + // extension + oth[-1] = oth[0]; + oth[oth_width] = oth[oth_width - 1]; + // lifting step + const float* sp = oth; + float* dp = aug; + int i = (int)aug_width; + v128_t f = wasm_f32x4_splat(a); + if (ev) + { + for ( ; i > 0; i -= 4, sp += 4, dp += 4) + { + v128_t m = wasm_v128_load(sp); + v128_t n = wasm_v128_load(sp - 1); + v128_t p = wasm_v128_load(dp); + p = wasm_f32x4_sub(p, wasm_f32x4_mul(f, wasm_f32x4_add(m, n))); + wasm_v128_store(dp, p); + } + } + else + { + for ( ; i > 0; i -= 4, sp += 4, dp += 4) + { + v128_t m = wasm_v128_load(sp); + v128_t n = wasm_v128_load(sp + 1); + v128_t p = wasm_v128_load(dp); + p = wasm_f32x4_sub(p, wasm_f32x4_mul(f, wasm_f32x4_add(m, n))); + wasm_v128_store(dp, p); + } + } + + // swap buffers + float* t = aug; aug = oth; oth = t; + ev = !ev; + ui32 w = aug_width; aug_width = oth_width; oth_width = w; + } + + // combine both lsrc and hsrc into dst + wasm_interleave(dst->f32, lsrc->f32, hsrc->f32, (int)width, even); + } + else { + if (even) + dst->f32[0] = lsrc->f32[0]; + else + dst->f32[0] = hsrc->f32[0] * 0.5f; } } ///////////////////////////////////////////////////////////////////////// - void wasm_irrev_vert_wvlt_K(const line_buf *line_src, line_buf *line_dst, - bool L_analysis_or_H_synthesis, ui32 repeat) + void wasm_rev_vert_step(const lifting_step* s, const line_buf* sig, + const line_buf* other, const line_buf* aug, + ui32 repeat, bool synthesis) { - float *dst = line_dst->f32; - const float *src = line_src->f32; + const si32 a = s->rev.Aatk; + const si32 b = s->rev.Batk; + const ui32 e = s->rev.Eatk; + v128_t vb = wasm_i32x4_splat(b); - float f = LIFTING_FACTORS::K_inv; - f = L_analysis_or_H_synthesis ? f : LIFTING_FACTORS::K; - v128_t factor = wasm_f32x4_splat(f); - for (ui32 i = (repeat + 3) >> 2; i > 0; --i, dst+=4, src+=4) - { - v128_t s = wasm_v128_load(src); - wasm_v128_store(dst, wasm_f32x4_mul(factor, s)); + si32* dst = aug->i32; + const si32* src1 = sig->i32, * src2 = other->i32; + // The general definition of the wavelet in Part 2 is slightly + // different to part 2, although they are mathematically equivalent + // here, we identify the simpler form from Part 1 and employ them + if (a == 1) + { // 5/3 update and any case with a == 1 + int i = (int)repeat; + if (synthesis) + for (; i > 0; i -= 4, dst += 4, src1 += 4, src2 += 4) + { + v128_t s1 = wasm_v128_load((v128_t*)src1); + v128_t s2 = wasm_v128_load((v128_t*)src2); + v128_t d = wasm_v128_load((v128_t*)dst); + v128_t t = wasm_i32x4_add(s1, s2); + v128_t v = wasm_i32x4_add(vb, t); + v128_t w = wasm_i32x4_shr(v, e); + d = wasm_i32x4_sub(d, w); + wasm_v128_store((v128_t*)dst, d); + } + else + for (; i > 0; i -= 4, dst += 4, src1 += 4, src2 += 4) + { + v128_t s1 = wasm_v128_load((v128_t*)src1); + v128_t s2 = wasm_v128_load((v128_t*)src2); + v128_t d = wasm_v128_load((v128_t*)dst); + v128_t t = wasm_i32x4_add(s1, s2); + v128_t v = wasm_i32x4_add(vb, t); + v128_t w = wasm_i32x4_shr(v, e); + d = wasm_i32x4_add(d, w); + wasm_v128_store((v128_t*)dst, d); + } + } + else if (a == -1 && b == 1 && e == 1) + { // 5/3 predict + int i = (int)repeat; + if (synthesis) + for (; i > 0; i -= 4, dst += 4, src1 += 4, src2 += 4) + { + v128_t s1 = wasm_v128_load((v128_t*)src1); + v128_t s2 = wasm_v128_load((v128_t*)src2); + v128_t d = wasm_v128_load((v128_t*)dst); + v128_t t = wasm_i32x4_add(s1, s2); + v128_t w = wasm_i32x4_shr(t, e); + d = wasm_i32x4_add(d, w); + wasm_v128_store((v128_t*)dst, d); + } + else + for (; i > 0; i -= 4, dst += 4, src1 += 4, src2 += 4) + { + v128_t s1 = wasm_v128_load((v128_t*)src1); + v128_t s2 = wasm_v128_load((v128_t*)src2); + v128_t d = wasm_v128_load((v128_t*)dst); + v128_t t = wasm_i32x4_add(s1, s2); + v128_t w = wasm_i32x4_shr(t, e); + d = wasm_i32x4_sub(d, w); + wasm_v128_store((v128_t*)dst, d); + } + } + else if (a == -1) + { // any case with a == -1, which is not 5/3 predict + int i = (int)repeat; + if (synthesis) + for (; i > 0; i -= 4, dst += 4, src1 += 4, src2 += 4) + { + v128_t s1 = wasm_v128_load((v128_t*)src1); + v128_t s2 = wasm_v128_load((v128_t*)src2); + v128_t d = wasm_v128_load((v128_t*)dst); + v128_t t = wasm_i32x4_add(s1, s2); + v128_t v = wasm_i32x4_sub(vb, t); + v128_t w = wasm_i32x4_shr(v, e); + d = wasm_i32x4_sub(d, w); + wasm_v128_store((v128_t*)dst, d); + } + else + for (; i > 0; i -= 4, dst += 4, src1 += 4, src2 += 4) + { + v128_t s1 = wasm_v128_load((v128_t*)src1); + v128_t s2 = wasm_v128_load((v128_t*)src2); + v128_t d = wasm_v128_load((v128_t*)dst); + v128_t t = wasm_i32x4_add(s1, s2); + v128_t v = wasm_i32x4_sub(vb, t); + v128_t w = wasm_i32x4_shr(v, e); + d = wasm_i32x4_add(d, w); + wasm_v128_store((v128_t*)dst, d); + } + } + else { // general case + // 32bit multiplication is not supported in sse2; we need sse4.1, + // where we can use _mm_mullo_epi32, which multiplies 32bit x 32bit, + // keeping the LSBs + if (synthesis) + for (ui32 i = repeat; i > 0; --i) + *dst++ -= (b + a * (*src1++ + *src2++)) >> e; + else + for (ui32 i = repeat; i > 0; --i) + *dst++ += (b + a * (*src1++ + *src2++)) >> e; } } ///////////////////////////////////////////////////////////////////////// - void wasm_irrev_horz_wvlt_fwd_tx(line_buf *line_src, line_buf *line_ldst, - line_buf *line_hdst, ui32 width, - bool even) + void wasm_rev_horz_ana(const param_atk* atk, const line_buf* ldst, + const line_buf* hdst, const line_buf* src, + ui32 width, bool even) { if (width > 1) { - float *src = line_src->f32; - float *ldst = line_ldst->f32, *hdst = line_hdst->f32; - - const ui32 L_width = (width + (even ? 1 : 0)) >> 1; - const ui32 H_width = (width + (even ? 0 : 1)) >> 1; - - //extension - src[-1] = src[1]; - src[width] = src[width-2]; - // predict - const float* sp = src + (even ? 1 : 0); - float *dph = hdst; - v128_t factor = wasm_f32x4_splat(LIFTING_FACTORS::steps[0]); - for (ui32 i = (H_width + 3) >> 2; i > 0; --i, dph+=4) - { //this is doing twice the work it needs to do - //it can be definitely written better - v128_t s1 = wasm_v128_load(sp - 1); - v128_t s2 = wasm_v128_load(sp + 1); - v128_t d = wasm_v128_load(sp); - s1 = wasm_f32x4_mul(factor, wasm_f32x4_add(s1, s2)); - v128_t d1 = wasm_f32x4_add(d, s1); - sp += 4; - s1 = wasm_v128_load(sp - 1); - s2 = wasm_v128_load(sp + 1); - d = wasm_v128_load(sp); - s1 = wasm_f32x4_mul(factor, wasm_f32x4_add(s1, s2)); - v128_t d2 = wasm_f32x4_add(d, s1); - sp += 4; - d = wasm_i32x4_shuffle(d1, d2, 0, 2, 4, 6); - wasm_v128_store(dph, d); - } + // combine both lsrc and hsrc into dst + wasm_deinterleave(ldst->f32, hdst->f32, src->f32, (int)width, even); - // extension - hdst[-1] = hdst[0]; - hdst[H_width] = hdst[H_width-1]; - // update - factor = wasm_f32x4_splat(LIFTING_FACTORS::steps[1]); - sp = src + (even ? 0 : 1); - const float* sph = hdst + (even ? 0 : 1); - float *dpl = ldst; - for (ui32 i = (L_width + 3) >> 2; i > 0; --i, sp+=8, sph+=4, dpl+=4) + si32* hp = hdst->i32, * lp = ldst->i32; + ui32 l_width = (width + (even ? 1 : 0)) >> 1; // low pass + ui32 h_width = (width + (even ? 0 : 1)) >> 1; // high pass + ui32 num_steps = atk->get_num_steps(); + for (ui32 j = num_steps; j > 0; --j) { - v128_t s1 = wasm_v128_load(sph - 1); - v128_t s2 = wasm_v128_load(sph); - s1 = wasm_f32x4_mul(factor, wasm_f32x4_add(s1, s2)); - v128_t d1 = wasm_v128_load(sp); - v128_t d2 = wasm_v128_load(sp + 4); - v128_t d = wasm_i32x4_shuffle(d1, d2, 0, 2, 4, 6); - d = wasm_f32x4_add(d, s1); - wasm_v128_store(dpl, d); - } + // first lifting step + const lifting_step* s = atk->get_step(j - 1); + const si32 a = s->rev.Aatk; + const si32 b = s->rev.Batk; + const ui32 e = s->rev.Eatk; + v128_t vb = wasm_i32x4_splat(b); - //extension - ldst[-1] = ldst[0]; - ldst[L_width] = ldst[L_width-1]; - //predict - factor = wasm_f32x4_splat(LIFTING_FACTORS::steps[2]); - const float* spl = ldst + (even ? 1 : 0); - dph = hdst; - for (ui32 i = (H_width + 3) >> 2; i > 0; --i, spl+=4, dph+=4) - { - v128_t s1 = wasm_v128_load(spl - 1); - v128_t s2 = wasm_v128_load(spl); - v128_t d = wasm_v128_load(dph); - s1 = wasm_f32x4_mul(factor, wasm_f32x4_add(s1, s2)); - d = wasm_f32x4_add(d, s1); - wasm_v128_store(dph, d); - } + // extension + lp[-1] = lp[0]; + lp[l_width] = lp[l_width - 1]; + // lifting step + const si32* sp = lp; + si32* dp = hp; + if (a == 1) + { // 5/3 update and any case with a == 1 + int i = (int)h_width; + if (even) + { + for (; i > 0; i -= 4, sp += 4, dp += 4) + { + v128_t s1 = wasm_v128_load((v128_t*)sp); + v128_t s2 = wasm_v128_load((v128_t*)(sp + 1)); + v128_t d = wasm_v128_load((v128_t*)dp); + v128_t t = wasm_i32x4_add(s1, s2); + v128_t v = wasm_i32x4_add(vb, t); + v128_t w = wasm_i32x4_shr(v, e); + d = wasm_i32x4_add(d, w); + wasm_v128_store((v128_t*)dp, d); + } + } + else + { + for (; i > 0; i -= 4, sp += 4, dp += 4) + { + v128_t s1 = wasm_v128_load((v128_t*)sp); + v128_t s2 = wasm_v128_load((v128_t*)(sp - 1)); + v128_t d = wasm_v128_load((v128_t*)dp); + v128_t t = wasm_i32x4_add(s1, s2); + v128_t v = wasm_i32x4_add(vb, t); + v128_t w = wasm_i32x4_shr(v, e); + d = wasm_i32x4_add(d, w); + wasm_v128_store((v128_t*)dp, d); + } + } + } + else if (a == -1 && b == 1 && e == 1) + { // 5/3 predict + int i = (int)h_width; + if (even) + for (; i > 0; i -= 4, sp += 4, dp += 4) + { + v128_t s1 = wasm_v128_load((v128_t*)sp); + v128_t s2 = wasm_v128_load((v128_t*)(sp + 1)); + v128_t d = wasm_v128_load((v128_t*)dp); + v128_t t = wasm_i32x4_add(s1, s2); + v128_t w = wasm_i32x4_shr(t, e); + d = wasm_i32x4_sub(d, w); + wasm_v128_store((v128_t*)dp, d); + } + else + for (; i > 0; i -= 4, sp += 4, dp += 4) + { + v128_t s1 = wasm_v128_load((v128_t*)sp); + v128_t s2 = wasm_v128_load((v128_t*)(sp - 1)); + v128_t d = wasm_v128_load((v128_t*)dp); + v128_t t = wasm_i32x4_add(s1, s2); + v128_t w = wasm_i32x4_shr(t, e); + d = wasm_i32x4_sub(d, w); + wasm_v128_store((v128_t*)dp, d); + } + } + else if (a == -1) + { // any case with a == -1, which is not 5/3 predict + int i = (int)h_width; + if (even) + for (; i > 0; i -= 4, sp += 4, dp += 4) + { + v128_t s1 = wasm_v128_load((v128_t*)sp); + v128_t s2 = wasm_v128_load((v128_t*)(sp + 1)); + v128_t d = wasm_v128_load((v128_t*)dp); + v128_t t = wasm_i32x4_add(s1, s2); + v128_t v = wasm_i32x4_sub(vb, t); + v128_t w = wasm_i32x4_shr(v, e); + d = wasm_i32x4_add(d, w); + wasm_v128_store((v128_t*)dp, d); + } + else + for (; i > 0; i -= 4, sp += 4, dp += 4) + { + v128_t s1 = wasm_v128_load((v128_t*)sp); + v128_t s2 = wasm_v128_load((v128_t*)(sp - 1)); + v128_t d = wasm_v128_load((v128_t*)dp); + v128_t t = wasm_i32x4_add(s1, s2); + v128_t v = wasm_i32x4_sub(vb, t); + v128_t w = wasm_i32x4_shr(v, e); + d = wasm_i32x4_add(d, w); + wasm_v128_store((v128_t*)dp, d); + } + } + else { + // general case + // 32bit multiplication is not supported in sse2; we need sse4.1, + // where we can use _mm_mullo_epi32, which multiplies + // 32bit x 32bit, keeping the LSBs + if (even) + for (ui32 i = h_width; i > 0; --i, sp++, dp++) + *dp += (b + a * (sp[0] + sp[1])) >> e; + else + for (ui32 i = h_width; i > 0; --i, sp++, dp++) + *dp += (b + a * (sp[-1] + sp[0])) >> e; + } - // extension - hdst[-1] = hdst[0]; - hdst[H_width] = hdst[H_width-1]; - // update - factor = wasm_f32x4_splat(LIFTING_FACTORS::steps[3]); - sph = hdst + (even ? 0 : 1); - dpl = ldst; - for (ui32 i = (L_width + 3) >> 2; i > 0; --i, sph+=4, dpl+=4) - { - v128_t s1 = wasm_v128_load(sph - 1); - v128_t s2 = wasm_v128_load(sph); - v128_t d = wasm_v128_load(dpl); - s1 = wasm_f32x4_mul(factor, wasm_f32x4_add(s1, s2)); - d = wasm_f32x4_add(d, s1); - wasm_v128_store(dpl, d); - } - - //multipliers - float *dp = ldst; - factor = wasm_f32x4_splat(LIFTING_FACTORS::K_inv); - for (ui32 i = (L_width + 3) >> 2; i > 0; --i, dp+=4) - { - v128_t d = wasm_v128_load(dp); - wasm_v128_store(dp, wasm_f32x4_mul(factor, d)); - } - dp = hdst; - factor = wasm_f32x4_splat(LIFTING_FACTORS::K); - for (int i = (H_width + 3) >> 2; i > 0; --i, dp+=4) - { - v128_t d = wasm_v128_load(dp); - wasm_v128_store(dp, wasm_f32x4_mul(factor, d)); + // swap buffers + si32* t = lp; lp = hp; hp = t; + even = !even; + ui32 w = l_width; l_width = h_width; h_width = w; } } - else - { + else { if (even) - line_ldst->f32[0] = line_src->f32[0]; + ldst->i32[0] = src->i32[0]; else - line_hdst->f32[0] = line_src->f32[0] + line_src->f32[0]; + hdst->i32[0] = src->i32[0] << 1; } } - - ///////////////////////////////////////////////////////////////////////// - void wasm_irrev_horz_wvlt_bwd_tx(line_buf *line_dst, line_buf *line_lsrc, - line_buf *line_hsrc, ui32 width, - bool even) + + ////////////////////////////////////////////////////////////////////////// + void wasm_rev_horz_syn(const param_atk* atk, const line_buf* dst, + const line_buf* lsrc, const line_buf* hsrc, + ui32 width, bool even) { if (width > 1) { - float *lsrc = line_lsrc->f32, *hsrc = line_hsrc->f32; - float *dst = line_dst->f32; - - const ui32 L_width = (width + (even ? 1 : 0)) >> 1; - const ui32 H_width = (width + (even ? 0 : 1)) >> 1; - - //multipliers - float *dp = lsrc; - v128_t factor = wasm_f32x4_splat(LIFTING_FACTORS::K); - for (ui32 i = (L_width + 3) >> 2; i > 0; --i, dp+=4) - { - v128_t d = wasm_v128_load(dp); - wasm_v128_store(dp, wasm_f32x4_mul(factor, d)); - } - dp = hsrc; - factor = wasm_f32x4_splat(LIFTING_FACTORS::K_inv); - for (ui32 i = (H_width + 3) >> 2; i > 0; --i, dp+=4) - { - v128_t d = wasm_v128_load(dp); - wasm_v128_store(dp, wasm_f32x4_mul(factor, d)); - } - - //extension - hsrc[-1] = hsrc[0]; - hsrc[H_width] = hsrc[H_width-1]; - //inverse update - factor = wasm_f32x4_splat(LIFTING_FACTORS::steps[7]); - const float *sph = hsrc + (even ? 0 : 1); - float *dpl = lsrc; - for (ui32 i = (L_width + 3) >> 2; i > 0; --i, dpl+=4, sph+=4) + bool ev = even; + si32* oth = hsrc->i32, * aug = lsrc->i32; + ui32 aug_width = (width + (even ? 1 : 0)) >> 1; // low pass + ui32 oth_width = (width + (even ? 0 : 1)) >> 1; // high pass + ui32 num_steps = atk->get_num_steps(); + for (ui32 j = 0; j < num_steps; ++j) { - v128_t s1 = wasm_v128_load(sph - 1); - v128_t s2 = wasm_v128_load(sph); - v128_t d = wasm_v128_load(dpl); - s1 = wasm_f32x4_mul(factor, wasm_f32x4_add(s1, s2)); - d = wasm_f32x4_add(d, s1); - wasm_v128_store(dpl, d); - } + const lifting_step* s = atk->get_step(j); + const si32 a = s->rev.Aatk; + const si32 b = s->rev.Batk; + const ui32 e = s->rev.Eatk; + v128_t vb = wasm_i32x4_splat(b); - //extension - lsrc[-1] = lsrc[0]; - lsrc[L_width] = lsrc[L_width-1]; - //inverse perdict - factor = wasm_f32x4_splat(LIFTING_FACTORS::steps[6]); - const float *spl = lsrc + (even ? 0 : -1); - float *dph = hsrc; - for (ui32 i = (H_width + 3) >> 2; i > 0; --i, dph+=4, spl+=4) - { - v128_t s1 = wasm_v128_load(spl); - v128_t s2 = wasm_v128_load(spl + 1); - v128_t d = wasm_v128_load(dph); - s1 = wasm_f32x4_mul(factor, wasm_f32x4_add(s1, s2)); - d = wasm_f32x4_add(d, s1); - wasm_v128_store(dph, d); - } + // extension + oth[-1] = oth[0]; + oth[oth_width] = oth[oth_width - 1]; + // lifting step + const si32* sp = oth; + si32* dp = aug; + if (a == 1) + { // 5/3 update and any case with a == 1 + int i = (int)aug_width; + if (ev) + { + for (; i > 0; i -= 4, sp += 4, dp += 4) + { + v128_t s1 = wasm_v128_load((v128_t*)sp); + v128_t s2 = wasm_v128_load((v128_t*)(sp - 1)); + v128_t d = wasm_v128_load((v128_t*)dp); + v128_t t = wasm_i32x4_add(s1, s2); + v128_t v = wasm_i32x4_add(vb, t); + v128_t w = wasm_i32x4_shr(v, e); + d = wasm_i32x4_sub(d, w); + wasm_v128_store((v128_t*)dp, d); + } + } + else + { + for (; i > 0; i -= 4, sp += 4, dp += 4) + { + v128_t s1 = wasm_v128_load((v128_t*)sp); + v128_t s2 = wasm_v128_load((v128_t*)(sp + 1)); + v128_t d = wasm_v128_load((v128_t*)dp); + v128_t t = wasm_i32x4_add(s1, s2); + v128_t v = wasm_i32x4_add(vb, t); + v128_t w = wasm_i32x4_shr(v, e); + d = wasm_i32x4_sub(d, w); + wasm_v128_store((v128_t*)dp, d); + } + } + } + else if (a == -1 && b == 1 && e == 1) + { // 5/3 predict + int i = (int)aug_width; + if (ev) + for (; i > 0; i -= 4, sp += 4, dp += 4) + { + v128_t s1 = wasm_v128_load((v128_t*)sp); + v128_t s2 = wasm_v128_load((v128_t*)(sp - 1)); + v128_t d = wasm_v128_load((v128_t*)dp); + v128_t t = wasm_i32x4_add(s1, s2); + v128_t w = wasm_i32x4_shr(t, e); + d = wasm_i32x4_add(d, w); + wasm_v128_store((v128_t*)dp, d); + } + else + for (; i > 0; i -= 4, sp += 4, dp += 4) + { + v128_t s1 = wasm_v128_load((v128_t*)sp); + v128_t s2 = wasm_v128_load((v128_t*)(sp + 1)); + v128_t d = wasm_v128_load((v128_t*)dp); + v128_t t = wasm_i32x4_add(s1, s2); + v128_t w = wasm_i32x4_shr(t, e); + d = wasm_i32x4_add(d, w); + wasm_v128_store((v128_t*)dp, d); + } + } + else if (a == -1) + { // any case with a == -1, which is not 5/3 predict + int i = (int)aug_width; + if (ev) + for (; i > 0; i -= 4, sp += 4, dp += 4) + { + v128_t s1 = wasm_v128_load((v128_t*)sp); + v128_t s2 = wasm_v128_load((v128_t*)(sp - 1)); + v128_t d = wasm_v128_load((v128_t*)dp); + v128_t t = wasm_i32x4_add(s1, s2); + v128_t v = wasm_i32x4_sub(vb, t); + v128_t w = wasm_i32x4_shr(v, e); + d = wasm_i32x4_sub(d, w); + wasm_v128_store((v128_t*)dp, d); + } + else + for (; i > 0; i -= 4, sp += 4, dp += 4) + { + v128_t s1 = wasm_v128_load((v128_t*)sp); + v128_t s2 = wasm_v128_load((v128_t*)(sp + 1)); + v128_t d = wasm_v128_load((v128_t*)dp); + v128_t t = wasm_i32x4_add(s1, s2); + v128_t v = wasm_i32x4_sub(vb, t); + v128_t w = wasm_i32x4_shr(v, e); + d = wasm_i32x4_sub(d, w); + wasm_v128_store((v128_t*)dp, d); + } + } + else { + // general case + // 32bit multiplication is not supported in sse2; we need sse4.1, + // where we can use _mm_mullo_epi32, which multiplies + // 32bit x 32bit, keeping the LSBs + if (ev) + for (ui32 i = aug_width; i > 0; --i, sp++, dp++) + *dp -= (b + a * (sp[-1] + sp[0])) >> e; + else + for (ui32 i = aug_width; i > 0; --i, sp++, dp++) + *dp -= (b + a * (sp[0] + sp[1])) >> e; + } - //extension - hsrc[-1] = hsrc[0]; - hsrc[H_width] = hsrc[H_width-1]; - //inverse update - factor = wasm_f32x4_splat(LIFTING_FACTORS::steps[5]); - sph = hsrc + (even ? 0 : 1); - dpl = lsrc; - for (ui32 i = (L_width + 3) >> 2; i > 0; --i, dpl+=4, sph+=4) - { - v128_t s1 = wasm_v128_load(sph - 1); - v128_t s2 = wasm_v128_load(sph); - v128_t d = wasm_v128_load(dpl); - s1 = wasm_f32x4_mul(factor, wasm_f32x4_add(s1, s2)); - d = wasm_f32x4_add(d, s1); - wasm_v128_store(dpl, d); + // swap buffers + si32* t = aug; aug = oth; oth = t; + ev = !ev; + ui32 w = aug_width; aug_width = oth_width; oth_width = w; } - //extension - lsrc[-1] = lsrc[0]; - lsrc[L_width] = lsrc[L_width-1]; - //inverse perdict and combine - factor = wasm_f32x4_splat(LIFTING_FACTORS::steps[4]); - dp = dst + (even ? 0 : -1); - spl = lsrc + (even ? 0 : -1); - sph = hsrc; - ui32 width = L_width + (even ? 0 : 1); - for (ui32 i = (width + 3) >> 2; i > 0; --i, spl+=4, sph+=4, dp+=8) - { - v128_t s1 = wasm_v128_load(spl); - v128_t s2 = wasm_v128_load(spl + 1); - v128_t d = wasm_v128_load(sph); - s2 = wasm_f32x4_mul(factor, wasm_f32x4_add(s1, s2)); - d = wasm_f32x4_add(d, s2); - wasm_v128_store(dp, wasm_i32x4_shuffle(s1, d, 0, 4, 1, 5)); - wasm_v128_store(dp + 4, wasm_i32x4_shuffle(s1, d, 2, 6, 3, 7)); - } + // combine both lsrc and hsrc into dst + wasm_interleave(dst->f32, lsrc->f32, hsrc->f32, (int)width, even); } - else - { + else { if (even) - line_dst->f32[0] = line_lsrc->f32[0]; + dst->i32[0] = lsrc->i32[0]; else - line_dst->f32[0] = line_hsrc->f32[0] * 0.5f; + dst->i32[0] = hsrc->i32[0] >> 1; } } - - } -} + + } // !local +} // !ojph diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 48c8f67d..000409ff 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -69,10 +69,22 @@ else() COMMAND ${CMAKE_COMMAND} -E copy "$" "./" COMMAND ${CMAKE_COMMAND} -E copy "$" "./" ) + if(EMSCRIPTEN) + add_custom_command(TARGET test_executables POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy "$" "./" + COMMAND ${CMAKE_COMMAND} -E copy "$" "./" + ) + add_custom_command(TARGET test_executables POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy "$/ojph_expand.wasm" "./" + COMMAND ${CMAKE_COMMAND} -E copy "$/ojph_compress.wasm" "./" + COMMAND ${CMAKE_COMMAND} -E copy "$/ojph_expand_simd.wasm" "./" + COMMAND ${CMAKE_COMMAND} -E copy "$/ojph_compress_simd.wasm" "./" + ) + endif(EMSCRIPTEN) if(MSYS) add_custom_command(TARGET test_executables POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy "../bin/msys-gtest.dll" "./" COMMAND ${CMAKE_COMMAND} -E copy "../bin/msys-gtest_main.dll" "./" ) - endif() -endif() + endif(MSYS) +endif(MSVC) From 21bc405c991dbcad3aae7876f157245ca41cbe3d Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Sat, 13 Apr 2024 20:08:27 +1000 Subject: [PATCH 069/348] Wasm simd is buggy. --- src/apps/ojph_compress/CMakeLists.txt | 1 + src/apps/ojph_expand/CMakeLists.txt | 1 + src/core/transform/ojph_transform_wasm.cpp | 110 ++++++++++++++++----- 3 files changed, 86 insertions(+), 26 deletions(-) diff --git a/src/apps/ojph_compress/CMakeLists.txt b/src/apps/ojph_compress/CMakeLists.txt index dadcca9b..27723789 100644 --- a/src/apps/ojph_compress/CMakeLists.txt +++ b/src/apps/ojph_compress/CMakeLists.txt @@ -18,6 +18,7 @@ source_group("common" FILES ${OJPH_IMG_IO_H}) if(EMSCRIPTEN) add_compile_options(-std=c++11 -O3 -fexceptions) + add_link_options(-sWASM=1 -sASSERTIONS=1 -sALLOW_MEMORY_GROWTH=1 -sNODERAWFS=1 -sENVIRONMENT=node -sEXIT_RUNTIME=1 -sEXCEPTION_CATCHING_ALLOWED=['fake']) add_executable(ojph_compress ${SOURCES}) add_executable(ojph_compress_simd ${SOURCES} ${OJPH_IMG_IO_SSE4}) target_compile_options(ojph_compress_simd PRIVATE -DOJPH_ENABLE_WASM_SIMD -msimd128 -msse4.1) diff --git a/src/apps/ojph_expand/CMakeLists.txt b/src/apps/ojph_expand/CMakeLists.txt index d4b65523..ac650c38 100644 --- a/src/apps/ojph_expand/CMakeLists.txt +++ b/src/apps/ojph_expand/CMakeLists.txt @@ -18,6 +18,7 @@ source_group("common" FILES ${OJPH_IMG_IO_H}) if(EMSCRIPTEN) add_compile_options(-std=c++11 -O3 -fexceptions) + add_link_options(-sWASM=1 -sASSERTIONS=1 -sALLOW_MEMORY_GROWTH=1 -sNODERAWFS=1 -sENVIRONMENT=node -sEXIT_RUNTIME=1 -sEXCEPTION_CATCHING_ALLOWED=['fake']) add_executable(ojph_expand ${SOURCES}) add_executable(ojph_expand_simd ${SOURCES} ${OJPH_IMG_IO_SSE4}) target_compile_options(ojph_expand_simd PRIVATE -DOJPH_ENABLE_WASM_SIMD -msimd128 -msse4.1) diff --git a/src/core/transform/ojph_transform_wasm.cpp b/src/core/transform/ojph_transform_wasm.cpp index 7b9ffb10..83cee30c 100644 --- a/src/core/transform/ojph_transform_wasm.cpp +++ b/src/core/transform/ojph_transform_wasm.cpp @@ -305,6 +305,7 @@ namespace ojph { const si32 a = s->rev.Aatk; const si32 b = s->rev.Batk; const ui32 e = s->rev.Eatk; + v128_t va = wasm_i32x4_splat(a); v128_t vb = wasm_i32x4_splat(b); si32* dst = aug->i32; @@ -394,16 +395,35 @@ namespace ojph { wasm_v128_store((v128_t*)dst, d); } } - else { // general case - // 32bit multiplication is not supported in sse2; we need sse4.1, - // where we can use _mm_mullo_epi32, which multiplies 32bit x 32bit, - // keeping the LSBs + else + { // general case + int i = (int)repeat; if (synthesis) - for (ui32 i = repeat; i > 0; --i) - *dst++ -= (b + a * (*src1++ + *src2++)) >> e; + for (; i > 0; i -= 4, dst += 4, src1 += 4, src2 += 4) + { + v128_t s1 = wasm_v128_load((v128_t*)src1); + v128_t s2 = wasm_v128_load((v128_t*)src2); + v128_t d = wasm_v128_load((v128_t*)dst); + v128_t t = wasm_i32x4_add(s1, s2); + v128_t u = wasm_i32x4_mul(va, t); + v128_t v = wasm_i32x4_add(vb, u); + v128_t w = wasm_i32x4_shr(v, e); + d = wasm_i32x4_sub(d, w); + wasm_v128_store((v128_t*)dst, d); + } else - for (ui32 i = repeat; i > 0; --i) - *dst++ += (b + a * (*src1++ + *src2++)) >> e; + for (; i > 0; i -= 4, dst += 4, src1 += 4, src2 += 4) + { + v128_t s1 = wasm_v128_load((v128_t*)src1); + v128_t s2 = wasm_v128_load((v128_t*)src2); + v128_t d = wasm_v128_load((v128_t*)dst); + v128_t t = wasm_i32x4_add(s1, s2); + v128_t u = wasm_i32x4_mul(va, t); + v128_t v = wasm_i32x4_add(vb, u); + v128_t w = wasm_i32x4_shr(v, e); + d = wasm_i32x4_add(d, w); + wasm_v128_store((v128_t*)dst, d); + } } } @@ -428,6 +448,7 @@ namespace ojph { const si32 a = s->rev.Aatk; const si32 b = s->rev.Batk; const ui32 e = s->rev.Eatk; + v128_t va = wasm_i32x4_splat(a); v128_t vb = wasm_i32x4_splat(b); // extension @@ -522,17 +543,35 @@ namespace ojph { wasm_v128_store((v128_t*)dp, d); } } - else { - // general case - // 32bit multiplication is not supported in sse2; we need sse4.1, - // where we can use _mm_mullo_epi32, which multiplies - // 32bit x 32bit, keeping the LSBs + else + { // general case + int i = (int)h_width; if (even) - for (ui32 i = h_width; i > 0; --i, sp++, dp++) - *dp += (b + a * (sp[0] + sp[1])) >> e; + for (; i > 0; i -= 4, sp += 4, dp += 4) + { + v128_t s1 = wasm_v128_load((v128_t*)sp); + v128_t s2 = wasm_v128_load((v128_t*)(sp - 1)); + v128_t d = wasm_v128_load((v128_t*)dp); + v128_t t = wasm_i32x4_add(s1, s2); + v128_t u = wasm_i32x4_mul(va, t); + v128_t v = wasm_i32x4_add(vb, u); + v128_t w = wasm_i32x4_shr(v, e); + d = wasm_i32x4_add(d, w); + wasm_v128_store((v128_t*)dp, d); + } else - for (ui32 i = h_width; i > 0; --i, sp++, dp++) - *dp += (b + a * (sp[-1] + sp[0])) >> e; + for (; i > 0; i -= 4, sp += 4, dp += 4) + { + v128_t s1 = wasm_v128_load((v128_t*)sp); + v128_t s2 = wasm_v128_load((v128_t*)(sp + 1)); + v128_t d = wasm_v128_load((v128_t*)dp); + v128_t t = wasm_i32x4_add(s1, s2); + v128_t u = wasm_i32x4_mul(va, t); + v128_t v = wasm_i32x4_add(vb, u); + v128_t w = wasm_i32x4_shr(v, e); + d = wasm_i32x4_add(d, w); + wasm_v128_store((v128_t*)dp, d); + } } // swap buffers @@ -567,6 +606,7 @@ namespace ojph { const si32 a = s->rev.Aatk; const si32 b = s->rev.Batk; const ui32 e = s->rev.Eatk; + v128_t va = wasm_i32x4_splat(a); v128_t vb = wasm_i32x4_splat(b); // extension @@ -661,17 +701,35 @@ namespace ojph { wasm_v128_store((v128_t*)dp, d); } } - else { - // general case - // 32bit multiplication is not supported in sse2; we need sse4.1, - // where we can use _mm_mullo_epi32, which multiplies - // 32bit x 32bit, keeping the LSBs + else + { // general case + int i = (int)aug_width; if (ev) - for (ui32 i = aug_width; i > 0; --i, sp++, dp++) - *dp -= (b + a * (sp[-1] + sp[0])) >> e; + for (; i > 0; i -= 4, sp += 4, dp += 4) + { + v128_t s1 = wasm_v128_load((v128_t*)sp); + v128_t s2 = wasm_v128_load((v128_t*)(sp - 1)); + v128_t d = wasm_v128_load((v128_t*)dp); + v128_t t = wasm_i32x4_add(s1, s2); + v128_t u = wasm_i32x4_mul(va, t); + v128_t v = wasm_i32x4_add(vb, u); + v128_t w = wasm_i32x4_shr(v, e); + d = wasm_i32x4_sub(d, w); + wasm_v128_store((v128_t*)dp, d); + } else - for (ui32 i = aug_width; i > 0; --i, sp++, dp++) - *dp -= (b + a * (sp[0] + sp[1])) >> e; + for (; i > 0; i -= 4, sp += 4, dp += 4) + { + v128_t s1 = wasm_v128_load((v128_t*)sp); + v128_t s2 = wasm_v128_load((v128_t*)(sp + 1)); + v128_t d = wasm_v128_load((v128_t*)dp); + v128_t t = wasm_i32x4_add(s1, s2); + v128_t u = wasm_i32x4_mul(va, t); + v128_t v = wasm_i32x4_add(vb, u); + v128_t w = wasm_i32x4_shr(v, e); + d = wasm_i32x4_sub(d, w); + wasm_v128_store((v128_t*)dp, d); + } } // swap buffers From e40fa17ccbd44e49251a880813a1b513fe184d6b Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Sat, 13 Apr 2024 20:12:30 +1000 Subject: [PATCH 070/348] A small bug fix. --- src/core/transform/ojph_transform_wasm.cpp | 4 ++-- tests/test_executables.cpp | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/core/transform/ojph_transform_wasm.cpp b/src/core/transform/ojph_transform_wasm.cpp index 83cee30c..bd652dfa 100644 --- a/src/core/transform/ojph_transform_wasm.cpp +++ b/src/core/transform/ojph_transform_wasm.cpp @@ -550,7 +550,7 @@ namespace ojph { for (; i > 0; i -= 4, sp += 4, dp += 4) { v128_t s1 = wasm_v128_load((v128_t*)sp); - v128_t s2 = wasm_v128_load((v128_t*)(sp - 1)); + v128_t s2 = wasm_v128_load((v128_t*)(sp + 1)); v128_t d = wasm_v128_load((v128_t*)dp); v128_t t = wasm_i32x4_add(s1, s2); v128_t u = wasm_i32x4_mul(va, t); @@ -563,7 +563,7 @@ namespace ojph { for (; i > 0; i -= 4, sp += 4, dp += 4) { v128_t s1 = wasm_v128_load((v128_t*)sp); - v128_t s2 = wasm_v128_load((v128_t*)(sp + 1)); + v128_t s2 = wasm_v128_load((v128_t*)(sp - 1)); v128_t d = wasm_v128_load((v128_t*)dp); v128_t t = wasm_i32x4_add(s1, s2); v128_t u = wasm_i32x4_mul(va, t); diff --git a/tests/test_executables.cpp b/tests/test_executables.cpp index f42174f6..99b4f8c0 100644 --- a/tests/test_executables.cpp +++ b/tests/test_executables.cpp @@ -128,7 +128,6 @@ void run_ojph_compress(const std::string& ref_filename, + " -i " + REF_FILE_DIR + ref_filename + " -o " + OUT_FILE_DIR + base_filename + extended_base_fname + "." + out_ext + " " + extra_options; - std::cerr << command << std::endl; EXPECT_EQ(execute(command, result), 0); } catch (const std::runtime_error& error) { From a92f9216bd81e482d62e8995be405cd32d3b8c77 Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Sat, 13 Apr 2024 21:52:16 +1000 Subject: [PATCH 071/348] Added one test. --- tests/test_executables.cpp | 646 +++++++++++++++-------------- tests/test_helpers/ht_cmdlines.txt | 1 + 2 files changed, 332 insertions(+), 315 deletions(-) diff --git a/tests/test_executables.cpp b/tests/test_executables.cpp index 99b4f8c0..8660f9d1 100644 --- a/tests/test_executables.cpp +++ b/tests/test_executables.cpp @@ -274,11 +274,11 @@ TEST(TestExecutables, OpenJPHExpandNoArguments) { // Command-line options used to obtain this file is: // -o simple_dec_irv97_64x64.jph -precise -quiet -rate 0.5 -full TEST(TestExecutables, SimpleDecIrv9764x64) { - double mse[3] = { 39.2812, 36.3819, 47.642 }; - int pae[3] = { 74, 77, 73 }; + double mse[3] = { 39.2812, 36.3819, 47.642}; + int pae[3] = { 74, 77, 73}; run_ojph_expand("simple_dec_irv97_64x64", "jph", "ppm"); run_mse_pae("simple_dec_irv97_64x64", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -286,11 +286,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64) { // Command-line options used to obtain this file is: // -o simple_dec_irv97_32x32.jph -precise -quiet -rate 1 Cblk={32,32} -full TEST(TestExecutables, SimpleDecIrv9732x32) { - double mse[3] = { 18.6979, 17.1208, 22.7539 }; - int pae[3] = { 51, 48, 46 }; + double mse[3] = { 18.6979, 17.1208, 22.7539}; + int pae[3] = { 51, 48, 46}; run_ojph_expand("simple_dec_irv97_32x32", "jph", "ppm"); run_mse_pae("simple_dec_irv97_32x32", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -298,11 +298,11 @@ TEST(TestExecutables, SimpleDecIrv9732x32) { // Command-line options used to obtain this file is: // -o simple_dec_irv97_16x16.jph -precise -quiet -rate 1 Cblk={16,16} -full TEST(TestExecutables, SimpleDecIrv9716x16) { - double mse[3] = { 20.1706, 18.5427, 24.6146 }; - int pae[3] = { 53, 51, 47 }; + double mse[3] = { 20.1706, 18.5427, 24.6146}; + int pae[3] = { 53, 51, 47}; run_ojph_expand("simple_dec_irv97_16x16", "jph", "ppm"); run_mse_pae("simple_dec_irv97_16x16", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -310,11 +310,11 @@ TEST(TestExecutables, SimpleDecIrv9716x16) { // Command-line options used to obtain this file is: // -o simple_dec_irv97_4x4.jph -precise -quiet -rate 1 Cblk={4,4} -full TEST(TestExecutables, SimpleDecIrv974x4) { - double mse[3] = { 40.8623, 37.9308, 49.7276 }; - int pae[3] = { 75, 77, 80 }; + double mse[3] = { 40.8623, 37.9308, 49.7276}; + int pae[3] = { 75, 77, 80}; run_ojph_expand("simple_dec_irv97_4x4", "jph", "ppm"); run_mse_pae("simple_dec_irv97_4x4", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -322,11 +322,11 @@ TEST(TestExecutables, SimpleDecIrv974x4) { // Command-line options used to obtain this file is: // -o simple_dec_irv97_1024x4.jph -precise -quiet -rate 1 Cblk={1024,4} -full TEST(TestExecutables, SimpleDecIrv971024x4) { - double mse[3] = { 19.8275, 18.2511, 24.2832 }; - int pae[3] = { 53, 52, 50 }; + double mse[3] = { 19.8275, 18.2511, 24.2832}; + int pae[3] = { 53, 52, 50}; run_ojph_expand("simple_dec_irv97_1024x4", "jph", "ppm"); run_mse_pae("simple_dec_irv97_1024x4", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -334,11 +334,11 @@ TEST(TestExecutables, SimpleDecIrv971024x4) { // Command-line options used to obtain this file is: // -o simple_dec_irv97_4x1024.jph -precise -quiet -rate 1 Cblk={4,1024} -full TEST(TestExecutables, SimpleDecIrv974x1024) { - double mse[3] = { 19.9635, 18.4063, 24.1719 }; - int pae[3] = { 51, 48, 51 }; + double mse[3] = { 19.9635, 18.4063, 24.1719}; + int pae[3] = { 51, 48, 51}; run_ojph_expand("simple_dec_irv97_4x1024", "jph", "ppm"); run_mse_pae("simple_dec_irv97_4x1024", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -346,11 +346,11 @@ TEST(TestExecutables, SimpleDecIrv974x1024) { // Command-line options used to obtain this file is: // -o simple_dec_irv97_512x8.jph -precise -quiet -rate 1 Cblk={512,8} -full TEST(TestExecutables, SimpleDecIrv97512x8) { - double mse[3] = { 18.7929, 17.2026, 22.9922 }; - int pae[3] = { 53, 52, 50 }; + double mse[3] = { 18.7929, 17.2026, 22.9922}; + int pae[3] = { 53, 52, 50}; run_ojph_expand("simple_dec_irv97_512x8", "jph", "ppm"); run_mse_pae("simple_dec_irv97_512x8", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -358,11 +358,11 @@ TEST(TestExecutables, SimpleDecIrv97512x8) { // Command-line options used to obtain this file is: // -o simple_dec_irv97_8x512.jph -precise -quiet -rate 1 Cblk={8,512} -full TEST(TestExecutables, SimpleDecIrv978x512) { - double mse[3] = { 19.3661, 17.8067, 23.4574 }; - int pae[3] = { 51, 48, 52 }; + double mse[3] = { 19.3661, 17.8067, 23.4574}; + int pae[3] = { 51, 48, 52}; run_ojph_expand("simple_dec_irv97_8x512", "jph", "ppm"); run_mse_pae("simple_dec_irv97_8x512", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -370,11 +370,11 @@ TEST(TestExecutables, SimpleDecIrv978x512) { // Command-line options used to obtain this file is: // -o simple_dec_irv97_256x16.jph -precise -quiet -rate 1 Cblk={256,16} -full TEST(TestExecutables, SimpleDecIrv97256x16) { - double mse[3] = { 18.6355, 17.0963, 22.6076 }; - int pae[3] = { 54, 51, 48 }; + double mse[3] = { 18.6355, 17.0963, 22.6076}; + int pae[3] = { 54, 51, 48}; run_ojph_expand("simple_dec_irv97_256x16", "jph", "ppm"); run_mse_pae("simple_dec_irv97_256x16", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -382,11 +382,11 @@ TEST(TestExecutables, SimpleDecIrv97256x16) { // Command-line options used to obtain this file is: // -o simple_dec_irv97_16x256.jph -precise -quiet -rate 1 Cblk={16,256} -full TEST(TestExecutables, SimpleDecIrv9716x256) { - double mse[3] = { 18.5933, 17.0208, 22.5709 }; - int pae[3] = { 51, 48, 47 }; + double mse[3] = { 18.5933, 17.0208, 22.5709}; + int pae[3] = { 51, 48, 47}; run_ojph_expand("simple_dec_irv97_16x256", "jph", "ppm"); run_mse_pae("simple_dec_irv97_16x256", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -394,11 +394,11 @@ TEST(TestExecutables, SimpleDecIrv9716x256) { // Command-line options used to obtain this file is: // -o simple_dec_irv97_128x32.jph -precise -quiet -rate 1 Cblk={128,32} -full TEST(TestExecutables, SimpleDecIrv97128x32) { - double mse[3] = { 18.4443, 16.9133, 22.4193 }; - int pae[3] = { 52, 50, 46 }; + double mse[3] = { 18.4443, 16.9133, 22.4193}; + int pae[3] = { 52, 50, 46}; run_ojph_expand("simple_dec_irv97_128x32", "jph", "ppm"); run_mse_pae("simple_dec_irv97_128x32", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -406,11 +406,11 @@ TEST(TestExecutables, SimpleDecIrv97128x32) { // Command-line options used to obtain this file is: // -o simple_dec_irv97_32x128.jph -precise -quiet -rate 1 Cblk={32,128} -full TEST(TestExecutables, SimpleDecIrv9732x128) { - double mse[3] = { 18.4874, 16.9379, 22.4855 }; - int pae[3] = { 51, 48, 45 }; + double mse[3] = { 18.4874, 16.9379, 22.4855}; + int pae[3] = { 51, 48, 45}; run_ojph_expand("simple_dec_irv97_32x128", "jph", "ppm"); run_mse_pae("simple_dec_irv97_32x128", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -418,11 +418,11 @@ TEST(TestExecutables, SimpleDecIrv9732x128) { // Command-line options used to obtain this file is: // -o simple_dec_rev53_64x64.jph -precise -quiet Creversible=yes -full TEST(TestExecutables, SimpleDecRev5364x64) { - double mse[3] = { 0, 0, 0 }; - int pae[3] = { 0, 0, 0 }; + double mse[3] = { 0, 0, 0}; + int pae[3] = { 0, 0, 0}; run_ojph_expand("simple_dec_rev53_64x64", "jph", "ppm"); run_mse_pae("simple_dec_rev53_64x64", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -431,11 +431,11 @@ TEST(TestExecutables, SimpleDecRev5364x64) { // -o simple_dec_rev53_32x32.jph -precise -quiet Creversible=yes Cblk={32,32} // -full TEST(TestExecutables, SimpleDecRev5332x32) { - double mse[3] = { 0, 0, 0 }; - int pae[3] = { 0, 0, 0 }; + double mse[3] = { 0, 0, 0}; + int pae[3] = { 0, 0, 0}; run_ojph_expand("simple_dec_rev53_32x32", "jph", "ppm"); run_mse_pae("simple_dec_rev53_32x32", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -444,11 +444,11 @@ TEST(TestExecutables, SimpleDecRev5332x32) { // -o simple_dec_rev53_4x4.jph -precise -quiet Creversible=yes Cblk={4,4} // -full TEST(TestExecutables, SimpleDecRev534x4) { - double mse[3] = { 0, 0, 0 }; - int pae[3] = { 0, 0, 0 }; + double mse[3] = { 0, 0, 0}; + int pae[3] = { 0, 0, 0}; run_ojph_expand("simple_dec_rev53_4x4", "jph", "ppm"); run_mse_pae("simple_dec_rev53_4x4", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -457,11 +457,11 @@ TEST(TestExecutables, SimpleDecRev534x4) { // -o simple_dec_rev53_1024x4.jph -precise -quiet Creversible=yes // Cblk={1024,4} -full TEST(TestExecutables, SimpleDecRev531024x4) { - double mse[3] = { 0, 0, 0 }; - int pae[3] = { 0, 0, 0 }; + double mse[3] = { 0, 0, 0}; + int pae[3] = { 0, 0, 0}; run_ojph_expand("simple_dec_rev53_1024x4", "jph", "ppm"); run_mse_pae("simple_dec_rev53_1024x4", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -470,11 +470,11 @@ TEST(TestExecutables, SimpleDecRev531024x4) { // -o simple_dec_rev53_4x1024.jph -precise -quiet Creversible=yes // Cblk={4,1024} -full TEST(TestExecutables, SimpleDecRev534x1024) { - double mse[3] = { 0, 0, 0 }; - int pae[3] = { 0, 0, 0 }; + double mse[3] = { 0, 0, 0}; + int pae[3] = { 0, 0, 0}; run_ojph_expand("simple_dec_rev53_4x1024", "jph", "ppm"); run_mse_pae("simple_dec_rev53_4x1024", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -485,11 +485,11 @@ TEST(TestExecutables, SimpleDecRev534x1024) { // Sdims={288,352},{144,176},{144,176} Ssampling={1,1},{2,2},{2,2} // Nprecision={8} Nsigned={no} -full TEST(TestExecutables, SimpleDecIrv9764x64Yuv) { - double mse[3] = { 20.2778, 6.27912, 4.15937 }; - int pae[3] = { 52, 22, 31 }; + double mse[3] = { 20.2778, 6.27912, 4.15937}; + int pae[3] = { 52, 22, 31}; run_ojph_expand("simple_dec_irv97_64x64_yuv", "jph", "yuv"); run_mse_pae("simple_dec_irv97_64x64_yuv", "yuv", "foreman_420.yuv", - ":352x288x8x420", 3, mse, pae); + ":352x288x8x420", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -500,11 +500,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64Yuv) { // Sdims={288,352},{144,176},{144,176} Ssampling={1,1},{2,2},{2,2} // Nprecision={8} Nsigned={no} -full TEST(TestExecutables, SimpleDecRev5364x64Yuv) { - double mse[3] = { 0, 0, 0 }; - int pae[3] = { 0, 0, 0 }; + double mse[3] = { 0, 0, 0}; + int pae[3] = { 0, 0, 0}; run_ojph_expand("simple_dec_rev53_64x64_yuv", "jph", "yuv"); run_mse_pae("simple_dec_rev53_64x64_yuv", "yuv", "foreman_420.yuv", - ":352x288x8x420", 3, mse, pae); + ":352x288x8x420", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -515,11 +515,11 @@ TEST(TestExecutables, SimpleDecRev5364x64Yuv) { // Sdims={288,352},{144,176},{144,176} Ssampling={1,1},{2,2},{2,2} // Nprecision={8} Nsigned={no} Stiles={33,257} -full TEST(TestExecutables, SimpleDecIrv9764x64TilesYuv) { - double mse[3] = { 34.4972, 10.1112, 7.96331 }; - int pae[3] = { 67, 30, 39 }; + double mse[3] = { 34.4972, 10.1112, 7.96331}; + int pae[3] = { 67, 30, 39}; run_ojph_expand("simple_dec_irv97_64x64_tiles_yuv", "jph", "yuv"); run_mse_pae("simple_dec_irv97_64x64_tiles_yuv", "yuv", "foreman_420.yuv", - ":352x288x8x420", 3, mse, pae); + ":352x288x8x420", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -530,11 +530,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64TilesYuv) { // Sdims={288,352},{144,176},{144,176} Ssampling={1,1},{2,2},{2,2} // Nprecision={8} Nsigned={no} Stiles={33,257} -full TEST(TestExecutables, SimpleDecRev5364x64TilesYuv) { - double mse[3] = { 0, 0, 0 }; - int pae[3] = { 0, 0, 0 }; + double mse[3] = { 0, 0, 0}; + int pae[3] = { 0, 0, 0}; run_ojph_expand("simple_dec_rev53_64x64_tiles_yuv", "jph", "yuv"); run_mse_pae("simple_dec_rev53_64x64_tiles_yuv", "yuv", "foreman_420.yuv", - ":352x288x8x420", 3, mse, pae); + ":352x288x8x420", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -544,11 +544,11 @@ TEST(TestExecutables, SimpleDecRev5364x64TilesYuv) { // Clevels=5 Corder=LRCP Cprecincts={2,256} Sorigin={374,1717} // Stile_origin={374,1717} -full TEST(TestExecutables, SimpleDecIrv9764x64TilesLRCP) { - double mse[3] = { 71.8149, 68.7115, 89.4001 }; - int pae[3] = { 78, 78, 83 }; + double mse[3] = { 71.8149, 68.7115, 89.4001}; + int pae[3] = { 78, 78, 83}; run_ojph_expand("simple_dec_irv97_64x64_tiles_LRCP", "jph", "ppm"); run_mse_pae("simple_dec_irv97_64x64_tiles_LRCP", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -558,11 +558,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64TilesLRCP) { // Clevels=5 Corder=RLCP Cprecincts={2,256} Sorigin={374,1717} // Stile_origin={374,1717} -full TEST(TestExecutables, SimpleDecIrv9764x64TilesRLCP) { - double mse[3] = { 71.8149, 68.7115, 89.4001 }; - int pae[3] = { 78, 78, 83 }; + double mse[3] = { 71.8149, 68.7115, 89.4001}; + int pae[3] = { 78, 78, 83}; run_ojph_expand("simple_dec_irv97_64x64_tiles_RLCP", "jph", "ppm"); run_mse_pae("simple_dec_irv97_64x64_tiles_RLCP", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -572,11 +572,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64TilesRLCP) { // Clevels=5 Corder=RPCL Cprecincts={2,256} Sorigin={374,1717} // Stile_origin={374,1717} -full TEST(TestExecutables, SimpleDecIrv9764x64TilesRPCL) { - double mse[3] = { 71.8149, 68.7115, 89.4001 }; - int pae[3] = { 78, 78, 83 }; + double mse[3] = { 71.8149, 68.7115, 89.4001}; + int pae[3] = { 78, 78, 83}; run_ojph_expand("simple_dec_irv97_64x64_tiles_RPCL", "jph", "ppm"); run_mse_pae("simple_dec_irv97_64x64_tiles_RPCL", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -586,11 +586,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64TilesRPCL) { // Clevels=5 Corder=PCRL Cprecincts={2,256} Sorigin={374,1717} // Stile_origin={374,1717} -full TEST(TestExecutables, SimpleDecIrv9764x64TilesPCRL) { - double mse[3] = { 71.8149, 68.7115, 89.4001 }; - int pae[3] = { 78, 78, 83 }; + double mse[3] = { 71.8149, 68.7115, 89.4001}; + int pae[3] = { 78, 78, 83}; run_ojph_expand("simple_dec_irv97_64x64_tiles_PCRL", "jph", "ppm"); run_mse_pae("simple_dec_irv97_64x64_tiles_PCRL", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -600,11 +600,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64TilesPCRL) { // Clevels=5 Corder=CPRL Cprecincts={2,256} Sorigin={374,1717} // Stile_origin={374,1717} -full TEST(TestExecutables, SimpleDecIrv9764x64TilesCPRL) { - double mse[3] = { 71.8149, 68.7115, 89.4001 }; - int pae[3] = { 78, 78, 83 }; + double mse[3] = { 71.8149, 68.7115, 89.4001}; + int pae[3] = { 78, 78, 83}; run_ojph_expand("simple_dec_irv97_64x64_tiles_CPRL", "jph", "ppm"); run_mse_pae("simple_dec_irv97_64x64_tiles_CPRL", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -614,11 +614,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64TilesCPRL) { // Clevels=5 Corder=LRCP Sorigin={5,33} Stile_origin={5,10} Stiles={33,257} // -full TEST(TestExecutables, SimpleDecIrv9764x64TilesLRCP33) { - double mse[3] = { 56.2139, 51.4121, 69.0107 }; - int pae[3] = { 80, 81, 98 }; + double mse[3] = { 56.2139, 51.4121, 69.0107}; + int pae[3] = { 80, 81, 98}; run_ojph_expand("simple_dec_irv97_64x64_tiles_LRCP33", "jph", "ppm"); run_mse_pae("simple_dec_irv97_64x64_tiles_LRCP33", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -628,11 +628,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64TilesLRCP33) { // Clevels=5 Corder=RLCP Sorigin={5,33} Stile_origin={5,10} Stiles={33,257} // -full TEST(TestExecutables, SimpleDecIrv9764x64TilesRLCP33) { - double mse[3] = { 56.2139, 51.4121, 69.0107 }; - int pae[3] = { 80, 81, 98 }; + double mse[3] = { 56.2139, 51.4121, 69.0107}; + int pae[3] = { 80, 81, 98}; run_ojph_expand("simple_dec_irv97_64x64_tiles_RLCP33", "jph", "ppm"); run_mse_pae("simple_dec_irv97_64x64_tiles_RLCP33", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -642,11 +642,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64TilesRLCP33) { // Clevels=5 Corder=RPCL Sorigin={5,33} Stile_origin={5,10} Stiles={33,257} // -full TEST(TestExecutables, SimpleDecIrv9764x64TilesRPCL33) { - double mse[3] = { 56.2139, 51.4121, 69.0107 }; - int pae[3] = { 80, 81, 98 }; + double mse[3] = { 56.2139, 51.4121, 69.0107}; + int pae[3] = { 80, 81, 98}; run_ojph_expand("simple_dec_irv97_64x64_tiles_RPCL33", "jph", "ppm"); run_mse_pae("simple_dec_irv97_64x64_tiles_RPCL33", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -656,11 +656,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64TilesRPCL33) { // Clevels=5 Corder=PCRL Sorigin={5,33} Stile_origin={5,10} Stiles={33,257} // -full TEST(TestExecutables, SimpleDecIrv9764x64TilesPCRL33) { - double mse[3] = { 56.2139, 51.4121, 69.0107 }; - int pae[3] = { 80, 81, 98 }; + double mse[3] = { 56.2139, 51.4121, 69.0107}; + int pae[3] = { 80, 81, 98}; run_ojph_expand("simple_dec_irv97_64x64_tiles_PCRL33", "jph", "ppm"); run_mse_pae("simple_dec_irv97_64x64_tiles_PCRL33", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -670,11 +670,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64TilesPCRL33) { // Clevels=5 Corder=CPRL Sorigin={5,33} Stile_origin={5,10} Stiles={33,257} // -full TEST(TestExecutables, SimpleDecIrv9764x64TilesCPRL33) { - double mse[3] = { 56.2139, 51.4121, 69.0107 }; - int pae[3] = { 80, 81, 98 }; + double mse[3] = { 56.2139, 51.4121, 69.0107}; + int pae[3] = { 80, 81, 98}; run_ojph_expand("simple_dec_irv97_64x64_tiles_CPRL33", "jph", "ppm"); run_mse_pae("simple_dec_irv97_64x64_tiles_CPRL33", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -684,11 +684,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64TilesCPRL33) { // Clevels=5 Corder=LRCP Sorigin={5,33} Stile_origin={5,10} Stiles={33,33} // -full TEST(TestExecutables, SimpleDecIrv9764x64TilesLRCP33x33) { - double mse[3] = { 210.283, 210.214, 257.276 }; - int pae[3] = { 165, 161, 166 }; + double mse[3] = { 210.283, 210.214, 257.276}; + int pae[3] = { 165, 161, 166}; run_ojph_expand("simple_dec_irv97_64x64_tiles_LRCP33x33", "jph", "ppm"); run_mse_pae("simple_dec_irv97_64x64_tiles_LRCP33x33", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -698,11 +698,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64TilesLRCP33x33) { // Clevels=5 Corder=RLCP Sorigin={5,33} Stile_origin={5,10} Stiles={33,33} // -full TEST(TestExecutables, SimpleDecIrv9764x64TilesRLCP33x33) { - double mse[3] = { 210.283, 210.214, 257.276 }; - int pae[3] = { 165, 161, 166 }; + double mse[3] = { 210.283, 210.214, 257.276}; + int pae[3] = { 165, 161, 166}; run_ojph_expand("simple_dec_irv97_64x64_tiles_RLCP33x33", "jph", "ppm"); run_mse_pae("simple_dec_irv97_64x64_tiles_RLCP33x33", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -712,11 +712,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64TilesRLCP33x33) { // Clevels=5 Corder=RPCL Sorigin={5,33} Stile_origin={5,10} Stiles={33,33} // -full TEST(TestExecutables, SimpleDecIrv9764x64TilesRPCL33x33) { - double mse[3] = { 210.283, 210.214, 257.276 }; - int pae[3] = { 165, 161, 166 }; + double mse[3] = { 210.283, 210.214, 257.276}; + int pae[3] = { 165, 161, 166}; run_ojph_expand("simple_dec_irv97_64x64_tiles_RPCL33x33", "jph", "ppm"); run_mse_pae("simple_dec_irv97_64x64_tiles_RPCL33x33", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -726,11 +726,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64TilesRPCL33x33) { // Clevels=5 Corder=PCRL Sorigin={5,33} Stile_origin={5,10} Stiles={33,33} // -full TEST(TestExecutables, SimpleDecIrv9764x64TilesPCRL33x33) { - double mse[3] = { 210.283, 210.214, 257.276 }; - int pae[3] = { 165, 161, 166 }; + double mse[3] = { 210.283, 210.214, 257.276}; + int pae[3] = { 165, 161, 166}; run_ojph_expand("simple_dec_irv97_64x64_tiles_PCRL33x33", "jph", "ppm"); run_mse_pae("simple_dec_irv97_64x64_tiles_PCRL33x33", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -740,11 +740,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64TilesPCRL33x33) { // Clevels=5 Corder=CPRL Sorigin={5,33} Stile_origin={5,10} Stiles={33,33} // -full TEST(TestExecutables, SimpleDecIrv9764x64TilesCPRL33x33) { - double mse[3] = { 210.283, 210.214, 257.276 }; - int pae[3] = { 165, 161, 166 }; + double mse[3] = { 210.283, 210.214, 257.276}; + int pae[3] = { 165, 161, 166}; run_ojph_expand("simple_dec_irv97_64x64_tiles_CPRL33x33", "jph", "ppm"); run_mse_pae("simple_dec_irv97_64x64_tiles_CPRL33x33", "ppm", "Malamute.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -753,11 +753,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64TilesCPRL33x33) { // -o simple_dec_rev53_64x64_gray_tiles.jph -precise -quiet Creversible=yes // Clevels=5 Stiles={33,257} -full TEST(TestExecutables, SimpleDecRev5364x64GrayTiles) { - double mse[1] = { 0 }; - int pae[1] = { 0 }; + double mse[1] = { 0}; + int pae[1] = { 0}; run_ojph_expand("simple_dec_rev53_64x64_gray_tiles", "jph", "pgm"); run_mse_pae("simple_dec_rev53_64x64_gray_tiles", "pgm", "monarch.pgm", - "", 1, mse, pae); + "", 1, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -766,11 +766,11 @@ TEST(TestExecutables, SimpleDecRev5364x64GrayTiles) { // -o simple_dec_irv97_64x64_gray_tiles.jph -precise -quiet -rate 0.5 // Clevels=5 Stiles={33,257} -full TEST(TestExecutables, SimpleDecIrv9764x64GrayTiles) { - double mse[1] = { 18.9601 }; - int pae[1] = { 56 }; + double mse[1] = { 18.9601}; + int pae[1] = { 56}; run_ojph_expand("simple_dec_irv97_64x64_gray_tiles", "jph", "pgm"); run_mse_pae("simple_dec_irv97_64x64_gray_tiles", "pgm", "monarch.pgm", - "", 1, mse, pae); + "", 1, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -778,11 +778,11 @@ TEST(TestExecutables, SimpleDecIrv9764x64GrayTiles) { // Command-line options used to obtain this file is: // -o simple_dec_irv97_64x64_16bit.jph -precise -quiet -rate 0.5 -full TEST(TestExecutables, SimpleDecIrv9764x6416bit) { - double mse[3] = { 60507.2, 36672.5, 64809.8 }; - int pae[3] = { 2547, 1974, 1922 }; + double mse[3] = { 60507.2, 36672.5, 64809.8}; + int pae[3] = { 2547, 1974, 1922}; run_ojph_expand("simple_dec_irv97_64x64_16bit", "jph", "ppm"); run_mse_pae("simple_dec_irv97_64x64_16bit", "ppm", "mm.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -790,11 +790,11 @@ TEST(TestExecutables, SimpleDecIrv9764x6416bit) { // Command-line options used to obtain this file is: // -o simple_dec_irv97_64x64_16bit_gray.jph -precise -quiet -rate 0.5 -full TEST(TestExecutables, SimpleDecIrv9764x6416bitGray) { - double mse[1] = { 19382.9 }; - int pae[1] = { 1618 }; + double mse[1] = { 19382.9}; + int pae[1] = { 1618}; run_ojph_expand("simple_dec_irv97_64x64_16bit_gray", "jph", "pgm"); run_mse_pae("simple_dec_irv97_64x64_16bit_gray", "pgm", "mm.pgm", - "", 1, mse, pae); + "", 1, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -802,11 +802,11 @@ TEST(TestExecutables, SimpleDecIrv9764x6416bitGray) { // Command-line options used to obtain this file is: // -o simple_dec_rev53_64x64_16bit.jph -precise -quiet Creversible=yes -full TEST(TestExecutables, SimpleDecRev5364x6416bit) { - double mse[3] = { 0, 0, 0 }; - int pae[3] = { 0, 0, 0 }; + double mse[3] = { 0, 0, 0}; + int pae[3] = { 0, 0, 0}; run_ojph_expand("simple_dec_rev53_64x64_16bit", "jph", "ppm"); run_mse_pae("simple_dec_rev53_64x64_16bit", "ppm", "mm.ppm", - "", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -815,11 +815,27 @@ TEST(TestExecutables, SimpleDecRev5364x6416bit) { // -o simple_dec_rev53_64x64_16bit_gray.jph -precise -quiet Creversible=yes // -full TEST(TestExecutables, SimpleDecRev5364x6416bitGray) { - double mse[1] = { 0 }; - int pae[1] = { 0 }; + double mse[1] = { 0}; + int pae[1] = { 0}; run_ojph_expand("simple_dec_rev53_64x64_16bit_gray", "jph", "pgm"); run_mse_pae("simple_dec_rev53_64x64_16bit_gray", "pgm", "mm.pgm", - "", 1, mse, pae); + "", 1, mse, pae); +} + +/////////////////////////////////////////////////////////////////////////////// +// Test ojph_expand with codeblocks when the rev53 wavelet is used. +// Command-line options used to obtain this file is: +// -o simple_dec_irv53_bhvhb_low_latency.jph -quiet Corder=PCRL Clevels=5 +// "Cmodes=HT|CAUSAL" -rate 2 -o simple_dec_irv53_bhvhb_low_latency.jph Catk=2 +// Kkernels:I2=I5X3 Cprecincts="{16,8192},{8,8192},{4,8192}" Cblk="{8,256}" +// Cdecomp="B(-:-:-),H(-),V(-),H(-),B(-:-:-)" Qstep=0.0001 -precise -no_weights +// -tolerance 0 +TEST(TestExecutables, SimpleDecIrv53BhvhbLowLatency) { + double mse[3] = { 5.52392, 4.01405, 6.8166}; + int pae[3] = { 16, 17, 23}; + run_ojph_expand("simple_dec_irv53_bhvhb_low_latency", "jph", "ppm"); + run_mse_pae("simple_dec_irv53_bhvhb_low_latency", "ppm", "Malamute.ppm", + ":I2=I5X3 Cprecincts=", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -828,14 +844,14 @@ TEST(TestExecutables, SimpleDecRev5364x6416bitGray) { // The compressed file is obtained using these command-line options: // -o simple_enc_irv97_64x64.j2c -qstep 0.1 TEST(TestExecutables, SimpleEncIrv9764x64) { - double mse[3] = { 46.2004, 43.622, 56.7452 }; - int pae[3] = { 48, 46, 52 }; + double mse[3] = { 46.2004, 43.622, 56.7452}; + int pae[3] = { 48, 46, 52}; run_ojph_compress("Malamute.ppm", - "simple_enc_irv97_64x64", "", "j2c", - "-qstep 0.1"); + "simple_enc_irv97_64x64", "", "j2c", + "-qstep 0.1"); run_ojph_compress_expand("simple_enc_irv97_64x64", "j2c", "ppm"); run_mse_pae("simple_enc_irv97_64x64", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -844,14 +860,14 @@ TEST(TestExecutables, SimpleEncIrv9764x64) { // The compressed file is obtained using these command-line options: // -o simple_enc_irv97_32x32.j2c -qstep 0.01 -block_size {32,32} TEST(TestExecutables, SimpleEncIrv9732x32) { - double mse[3] = { 1.78779, 1.26001, 2.38395 }; - int pae[3] = { 7, 6, 9 }; + double mse[3] = { 1.78779, 1.26001, 2.38395}; + int pae[3] = { 7, 6, 9}; run_ojph_compress("Malamute.ppm", - "simple_enc_irv97_32x32", "", "j2c", - "-qstep 0.01 -block_size \"{32,32}\""); + "simple_enc_irv97_32x32", "", "j2c", + "-qstep 0.01 -block_size \"{32,32}\""); run_ojph_compress_expand("simple_enc_irv97_32x32", "j2c", "ppm"); run_mse_pae("simple_enc_irv97_32x32", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -860,14 +876,14 @@ TEST(TestExecutables, SimpleEncIrv9732x32) { // The compressed file is obtained using these command-line options: // -o simple_enc_irv97_16x16.j2c -qstep 0.01 -block_size {16,16} TEST(TestExecutables, SimpleEncIrv9716x16) { - double mse[3] = { 1.78779, 1.26001, 2.38395 }; - int pae[3] = { 7, 6, 9 }; + double mse[3] = { 1.78779, 1.26001, 2.38395}; + int pae[3] = { 7, 6, 9}; run_ojph_compress("Malamute.ppm", - "simple_enc_irv97_16x16", "", "j2c", - "-qstep 0.01 -block_size \"{16,16}\""); + "simple_enc_irv97_16x16", "", "j2c", + "-qstep 0.01 -block_size \"{16,16}\""); run_ojph_compress_expand("simple_enc_irv97_16x16", "j2c", "ppm"); run_mse_pae("simple_enc_irv97_16x16", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -876,14 +892,14 @@ TEST(TestExecutables, SimpleEncIrv9716x16) { // The compressed file is obtained using these command-line options: // -o simple_enc_irv97_4x4.j2c -qstep 0.01 -block_size {4,4} TEST(TestExecutables, SimpleEncIrv974x4) { - double mse[3] = { 1.78779, 1.26001, 2.38395 }; - int pae[3] = { 7, 6, 9 }; + double mse[3] = { 1.78779, 1.26001, 2.38395}; + int pae[3] = { 7, 6, 9}; run_ojph_compress("Malamute.ppm", - "simple_enc_irv97_4x4", "", "j2c", - "-qstep 0.01 -block_size \"{4,4}\""); + "simple_enc_irv97_4x4", "", "j2c", + "-qstep 0.01 -block_size \"{4,4}\""); run_ojph_compress_expand("simple_enc_irv97_4x4", "j2c", "ppm"); run_mse_pae("simple_enc_irv97_4x4", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -892,14 +908,14 @@ TEST(TestExecutables, SimpleEncIrv974x4) { // The compressed file is obtained using these command-line options: // -o simple_enc_irv97_1024x4.j2c -qstep 0.01 -block_size {4,1024} TEST(TestExecutables, SimpleEncIrv971024x4) { - double mse[3] = { 1.78779, 1.26001, 2.38395 }; - int pae[3] = { 7, 6, 9 }; + double mse[3] = { 1.78779, 1.26001, 2.38395}; + int pae[3] = { 7, 6, 9}; run_ojph_compress("Malamute.ppm", - "simple_enc_irv97_1024x4", "", "j2c", - "-qstep 0.01 -block_size \"{4,1024}\""); + "simple_enc_irv97_1024x4", "", "j2c", + "-qstep 0.01 -block_size \"{4,1024}\""); run_ojph_compress_expand("simple_enc_irv97_1024x4", "j2c", "ppm"); run_mse_pae("simple_enc_irv97_1024x4", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -908,14 +924,14 @@ TEST(TestExecutables, SimpleEncIrv971024x4) { // The compressed file is obtained using these command-line options: // -o simple_enc_irv97_4x1024.j2c -qstep 0.01 -block_size {1024,4} TEST(TestExecutables, SimpleEncIrv974x1024) { - double mse[3] = { 1.78779, 1.26001, 2.38395 }; - int pae[3] = { 7, 6, 9 }; + double mse[3] = { 1.78779, 1.26001, 2.38395}; + int pae[3] = { 7, 6, 9}; run_ojph_compress("Malamute.ppm", - "simple_enc_irv97_4x1024", "", "j2c", - "-qstep 0.01 -block_size \"{1024,4}\""); + "simple_enc_irv97_4x1024", "", "j2c", + "-qstep 0.01 -block_size \"{1024,4}\""); run_ojph_compress_expand("simple_enc_irv97_4x1024", "j2c", "ppm"); run_mse_pae("simple_enc_irv97_4x1024", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -924,14 +940,14 @@ TEST(TestExecutables, SimpleEncIrv974x1024) { // The compressed file is obtained using these command-line options: // -o simple_enc_irv97_512x8.j2c -qstep 0.01 -block_size {8,512} TEST(TestExecutables, SimpleEncIrv97512x8) { - double mse[3] = { 1.78779, 1.26001, 2.38395 }; - int pae[3] = { 7, 6, 9 }; + double mse[3] = { 1.78779, 1.26001, 2.38395}; + int pae[3] = { 7, 6, 9}; run_ojph_compress("Malamute.ppm", - "simple_enc_irv97_512x8", "", "j2c", - "-qstep 0.01 -block_size \"{8,512}\""); + "simple_enc_irv97_512x8", "", "j2c", + "-qstep 0.01 -block_size \"{8,512}\""); run_ojph_compress_expand("simple_enc_irv97_512x8", "j2c", "ppm"); run_mse_pae("simple_enc_irv97_512x8", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -940,14 +956,14 @@ TEST(TestExecutables, SimpleEncIrv97512x8) { // The compressed file is obtained using these command-line options: // -o simple_enc_irv97_8x512.j2c -qstep 0.01 -block_size {512,8} TEST(TestExecutables, SimpleEncIrv978x512) { - double mse[3] = { 1.78779, 1.26001, 2.38395 }; - int pae[3] = { 7, 6, 9 }; + double mse[3] = { 1.78779, 1.26001, 2.38395}; + int pae[3] = { 7, 6, 9}; run_ojph_compress("Malamute.ppm", - "simple_enc_irv97_8x512", "", "j2c", - "-qstep 0.01 -block_size \"{512,8}\""); + "simple_enc_irv97_8x512", "", "j2c", + "-qstep 0.01 -block_size \"{512,8}\""); run_ojph_compress_expand("simple_enc_irv97_8x512", "j2c", "ppm"); run_mse_pae("simple_enc_irv97_8x512", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -956,14 +972,14 @@ TEST(TestExecutables, SimpleEncIrv978x512) { // The compressed file is obtained using these command-line options: // -o simple_enc_irv97_256x16.j2c -qstep 0.01 -block_size {16,256} TEST(TestExecutables, SimpleEncIrv97256x16) { - double mse[3] = { 1.78779, 1.26001, 2.38395 }; - int pae[3] = { 7, 6, 9 }; + double mse[3] = { 1.78779, 1.26001, 2.38395}; + int pae[3] = { 7, 6, 9}; run_ojph_compress("Malamute.ppm", - "simple_enc_irv97_256x16", "", "j2c", - "-qstep 0.01 -block_size \"{16,256}\""); + "simple_enc_irv97_256x16", "", "j2c", + "-qstep 0.01 -block_size \"{16,256}\""); run_ojph_compress_expand("simple_enc_irv97_256x16", "j2c", "ppm"); run_mse_pae("simple_enc_irv97_256x16", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -972,14 +988,14 @@ TEST(TestExecutables, SimpleEncIrv97256x16) { // The compressed file is obtained using these command-line options: // -o simple_enc_irv97_16x256.j2c -qstep 0.01 -block_size {256,16} TEST(TestExecutables, SimpleEncIrv9716x256) { - double mse[3] = { 1.78779, 1.26001, 2.38395 }; - int pae[3] = { 7, 6, 9 }; + double mse[3] = { 1.78779, 1.26001, 2.38395}; + int pae[3] = { 7, 6, 9}; run_ojph_compress("Malamute.ppm", - "simple_enc_irv97_16x256", "", "j2c", - "-qstep 0.01 -block_size \"{256,16}\""); + "simple_enc_irv97_16x256", "", "j2c", + "-qstep 0.01 -block_size \"{256,16}\""); run_ojph_compress_expand("simple_enc_irv97_16x256", "j2c", "ppm"); run_mse_pae("simple_enc_irv97_16x256", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -988,14 +1004,14 @@ TEST(TestExecutables, SimpleEncIrv9716x256) { // The compressed file is obtained using these command-line options: // -o simple_enc_irv97_128x32.j2c -qstep 0.01 -block_size {32,128} TEST(TestExecutables, SimpleEncIrv97128x32) { - double mse[3] = { 1.78779, 1.26001, 2.38395 }; - int pae[3] = { 7, 6, 9 }; + double mse[3] = { 1.78779, 1.26001, 2.38395}; + int pae[3] = { 7, 6, 9}; run_ojph_compress("Malamute.ppm", - "simple_enc_irv97_128x32", "", "j2c", - "-qstep 0.01 -block_size \"{32,128}\""); + "simple_enc_irv97_128x32", "", "j2c", + "-qstep 0.01 -block_size \"{32,128}\""); run_ojph_compress_expand("simple_enc_irv97_128x32", "j2c", "ppm"); run_mse_pae("simple_enc_irv97_128x32", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1004,14 +1020,14 @@ TEST(TestExecutables, SimpleEncIrv97128x32) { // The compressed file is obtained using these command-line options: // -o simple_enc_irv97_32x128.j2c -qstep 0.01 -block_size {128,32} TEST(TestExecutables, SimpleEncIrv9732x128) { - double mse[3] = { 1.78779, 1.26001, 2.38395 }; - int pae[3] = { 7, 6, 9 }; + double mse[3] = { 1.78779, 1.26001, 2.38395}; + int pae[3] = { 7, 6, 9}; run_ojph_compress("Malamute.ppm", - "simple_enc_irv97_32x128", "", "j2c", - "-qstep 0.01 -block_size \"{128,32}\""); + "simple_enc_irv97_32x128", "", "j2c", + "-qstep 0.01 -block_size \"{128,32}\""); run_ojph_compress_expand("simple_enc_irv97_32x128", "j2c", "ppm"); run_mse_pae("simple_enc_irv97_32x128", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1021,14 +1037,14 @@ TEST(TestExecutables, SimpleEncIrv9732x128) { // -o simple_enc_irv97_64x64_tiles_33x33_d5.j2c -qstep 0.01 -tile_size {33,33} // -num_decomps 5 TEST(TestExecutables, SimpleEncIrv9764x64Tiles33x33D5) { - double mse[3] = { 1.88906, 1.30757, 2.5347 }; - int pae[3] = { 9, 6, 10 }; + double mse[3] = { 1.88906, 1.30757, 2.5347}; + int pae[3] = { 9, 6, 10}; run_ojph_compress("Malamute.ppm", - "simple_enc_irv97_64x64_tiles_33x33_d5", "", "j2c", - "-qstep 0.01 -tile_size \"{33,33}\" -num_decomps 5"); + "simple_enc_irv97_64x64_tiles_33x33_d5", "", "j2c", + "-qstep 0.01 -tile_size \"{33,33}\" -num_decomps 5"); run_ojph_compress_expand("simple_enc_irv97_64x64_tiles_33x33_d5", "j2c", "ppm"); run_mse_pae("simple_enc_irv97_64x64_tiles_33x33_d5", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1038,14 +1054,14 @@ TEST(TestExecutables, SimpleEncIrv9764x64Tiles33x33D5) { // -o simple_enc_irv97_64x64_tiles_33x33_d6.j2c -qstep 0.01 -tile_size {33,33} // -num_decomps 6 TEST(TestExecutables, SimpleEncIrv9764x64Tiles33x33D6) { - double mse[3] = { 1.88751, 1.30673, 2.53378 }; - int pae[3] = { 8, 6, 10 }; + double mse[3] = { 1.88751, 1.30673, 2.53378}; + int pae[3] = { 8, 6, 10}; run_ojph_compress("Malamute.ppm", - "simple_enc_irv97_64x64_tiles_33x33_d6", "", "j2c", - "-qstep 0.01 -tile_size \"{33,33}\" -num_decomps 6"); + "simple_enc_irv97_64x64_tiles_33x33_d6", "", "j2c", + "-qstep 0.01 -tile_size \"{33,33}\" -num_decomps 6"); run_ojph_compress_expand("simple_enc_irv97_64x64_tiles_33x33_d6", "j2c", "ppm"); run_mse_pae("simple_enc_irv97_64x64_tiles_33x33_d6", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1054,14 +1070,14 @@ TEST(TestExecutables, SimpleEncIrv9764x64Tiles33x33D6) { // The compressed file is obtained using these command-line options: // -o simple_enc_irv97_64x64_16bit.j2c -qstep 0.01 TEST(TestExecutables, SimpleEncIrv9764x6416bit) { - double mse[3] = { 51727.3, 32596.4, 45897.8 }; - int pae[3] = { 1512, 1481, 1778 }; + double mse[3] = { 51727.3, 32596.4, 45897.8}; + int pae[3] = { 1512, 1481, 1778}; run_ojph_compress("mm.ppm", - "simple_enc_irv97_64x64_16bit", "", "j2c", - "-qstep 0.01"); + "simple_enc_irv97_64x64_16bit", "", "j2c", + "-qstep 0.01"); run_ojph_compress_expand("simple_enc_irv97_64x64_16bit", "j2c", "ppm"); run_mse_pae("simple_enc_irv97_64x64_16bit", "ppm", - "mm.ppm", "", 3, mse, pae); + "mm.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1070,14 +1086,14 @@ TEST(TestExecutables, SimpleEncIrv9764x6416bit) { // The compressed file is obtained using these command-line options: // -o simple_enc_irv97_64x64_16bit_gray.j2c -qstep 0.01 TEST(TestExecutables, SimpleEncIrv9764x6416bitGray) { - double mse[1] = { 25150.6 }; - int pae[1] = { 1081 }; + double mse[1] = { 25150.6}; + int pae[1] = { 1081}; run_ojph_compress("mm.pgm", - "simple_enc_irv97_64x64_16bit_gray", "", "j2c", - "-qstep 0.01"); + "simple_enc_irv97_64x64_16bit_gray", "", "j2c", + "-qstep 0.01"); run_ojph_compress_expand("simple_enc_irv97_64x64_16bit_gray", "j2c", "pgm"); run_mse_pae("simple_enc_irv97_64x64_16bit_gray", "pgm", - "mm.pgm", "", 1, mse, pae); + "mm.pgm", "", 1, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1086,14 +1102,14 @@ TEST(TestExecutables, SimpleEncIrv9764x6416bitGray) { // The compressed file is obtained using these command-line options: // -o simple_enc_rev53_64x64_16bit.j2c -reversible true TEST(TestExecutables, SimpleEncRev5364x6416bit) { - double mse[3] = { 0, 0, 0 }; - int pae[3] = { 0, 0, 0 }; + double mse[3] = { 0, 0, 0}; + int pae[3] = { 0, 0, 0}; run_ojph_compress("mm.ppm", - "simple_enc_rev53_64x64_16bit", "", "j2c", - "-reversible true"); + "simple_enc_rev53_64x64_16bit", "", "j2c", + "-reversible true"); run_ojph_compress_expand("simple_enc_rev53_64x64_16bit", "j2c", "ppm"); run_mse_pae("simple_enc_rev53_64x64_16bit", "ppm", - "mm.ppm", "", 3, mse, pae); + "mm.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1102,14 +1118,14 @@ TEST(TestExecutables, SimpleEncRev5364x6416bit) { // The compressed file is obtained using these command-line options: // -o simple_enc_rev53_64x64_16bit_gray.j2c -reversible true TEST(TestExecutables, SimpleEncRev5364x6416bitGray) { - double mse[1] = { 0 }; - int pae[1] = { 0 }; + double mse[1] = { 0}; + int pae[1] = { 0}; run_ojph_compress("mm.pgm", - "simple_enc_rev53_64x64_16bit_gray", "", "j2c", - "-reversible true"); + "simple_enc_rev53_64x64_16bit_gray", "", "j2c", + "-reversible true"); run_ojph_compress_expand("simple_enc_rev53_64x64_16bit_gray", "j2c", "pgm"); run_mse_pae("simple_enc_rev53_64x64_16bit_gray", "pgm", - "mm.pgm", "", 1, mse, pae); + "mm.pgm", "", 1, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1118,14 +1134,14 @@ TEST(TestExecutables, SimpleEncRev5364x6416bitGray) { // The compressed file is obtained using these command-line options: // -o simple_enc_rev53_64x64_16bit.j2c -reversible true TEST(TestExecutables, SimpleEncRev5364x64) { - double mse[3] = { 0, 0, 0 }; - int pae[3] = { 0, 0, 0 }; + double mse[3] = { 0, 0, 0}; + int pae[3] = { 0, 0, 0}; run_ojph_compress("Malamute.ppm", - "simple_enc_rev53_64x64", "", "j2c", - "-reversible true"); + "simple_enc_rev53_64x64", "", "j2c", + "-reversible true"); run_ojph_compress_expand("simple_enc_rev53_64x64", "j2c", "ppm"); run_mse_pae("simple_enc_rev53_64x64", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1134,14 +1150,14 @@ TEST(TestExecutables, SimpleEncRev5364x64) { // The compressed file is obtained using these command-line options: // -o simple_enc_rev53_32x32.j2c -reversible true -block_size {32,32} TEST(TestExecutables, SimpleEncRev5332x32) { - double mse[3] = { 0, 0, 0 }; - int pae[3] = { 0, 0, 0 }; + double mse[3] = { 0, 0, 0}; + int pae[3] = { 0, 0, 0}; run_ojph_compress("Malamute.ppm", - "simple_enc_rev53_32x32", "", "j2c", - "-reversible true -block_size \"{32,32}\""); + "simple_enc_rev53_32x32", "", "j2c", + "-reversible true -block_size \"{32,32}\""); run_ojph_compress_expand("simple_enc_rev53_32x32", "j2c", "ppm"); run_mse_pae("simple_enc_rev53_32x32", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1150,14 +1166,14 @@ TEST(TestExecutables, SimpleEncRev5332x32) { // The compressed file is obtained using these command-line options: // -o simple_enc_rev53_4x4.j2c -reversible true -block_size {4,4} TEST(TestExecutables, SimpleEncRev534x4) { - double mse[3] = { 0, 0, 0 }; - int pae[3] = { 0, 0, 0 }; + double mse[3] = { 0, 0, 0}; + int pae[3] = { 0, 0, 0}; run_ojph_compress("Malamute.ppm", - "simple_enc_rev53_4x4", "", "j2c", - "-reversible true -block_size \"{4,4}\""); + "simple_enc_rev53_4x4", "", "j2c", + "-reversible true -block_size \"{4,4}\""); run_ojph_compress_expand("simple_enc_rev53_4x4", "j2c", "ppm"); run_mse_pae("simple_enc_rev53_4x4", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1166,14 +1182,14 @@ TEST(TestExecutables, SimpleEncRev534x4) { // The compressed file is obtained using these command-line options: // -o simple_enc_rev53_1024x4.j2c -reversible true -block_size {4,1024} TEST(TestExecutables, SimpleEncRev531024x4) { - double mse[3] = { 0, 0, 0 }; - int pae[3] = { 0, 0, 0 }; + double mse[3] = { 0, 0, 0}; + int pae[3] = { 0, 0, 0}; run_ojph_compress("Malamute.ppm", - "simple_enc_rev53_1024x4", "", "j2c", - "-reversible true -block_size \"{4,1024}\""); + "simple_enc_rev53_1024x4", "", "j2c", + "-reversible true -block_size \"{4,1024}\""); run_ojph_compress_expand("simple_enc_rev53_1024x4", "j2c", "ppm"); run_mse_pae("simple_enc_rev53_1024x4", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1182,14 +1198,14 @@ TEST(TestExecutables, SimpleEncRev531024x4) { // The compressed file is obtained using these command-line options: // -o simple_enc_rev53_4x1024.j2c -reversible true -block_size {1024,4} TEST(TestExecutables, SimpleEncRev534x1024) { - double mse[3] = { 0, 0, 0 }; - int pae[3] = { 0, 0, 0 }; + double mse[3] = { 0, 0, 0}; + int pae[3] = { 0, 0, 0}; run_ojph_compress("Malamute.ppm", - "simple_enc_rev53_4x1024", "", "j2c", - "-reversible true -block_size \"{1024,4}\""); + "simple_enc_rev53_4x1024", "", "j2c", + "-reversible true -block_size \"{1024,4}\""); run_ojph_compress_expand("simple_enc_rev53_4x1024", "j2c", "ppm"); run_mse_pae("simple_enc_rev53_4x1024", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1199,14 +1215,14 @@ TEST(TestExecutables, SimpleEncRev534x1024) { // -o simple_enc_rev53_64x64_tiles_33x33_d5.j2c -reversible true -tile_size // {32,32} -num_decomps 5 TEST(TestExecutables, SimpleEncRev5364x64Tiles33x33D5) { - double mse[3] = { 0, 0, 0 }; - int pae[3] = { 0, 0, 0 }; + double mse[3] = { 0, 0, 0}; + int pae[3] = { 0, 0, 0}; run_ojph_compress("Malamute.ppm", - "simple_enc_rev53_64x64_tiles_33x33_d5", "", "j2c", - "-reversible true -tile_size \"{32,32}\" -num_decomps 5"); + "simple_enc_rev53_64x64_tiles_33x33_d5", "", "j2c", + "-reversible true -tile_size \"{32,32}\" -num_decomps 5"); run_ojph_compress_expand("simple_enc_rev53_64x64_tiles_33x33_d5", "j2c", "ppm"); run_mse_pae("simple_enc_rev53_64x64_tiles_33x33_d5", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1216,14 +1232,14 @@ TEST(TestExecutables, SimpleEncRev5364x64Tiles33x33D5) { // -o simple_enc_rev53_64x64_tiles_33x33_d6.j2c -reversible true -tile_size // {32,32} -num_decomps 6 TEST(TestExecutables, SimpleEncRev5364x64Tiles33x33D6) { - double mse[3] = { 0, 0, 0 }; - int pae[3] = { 0, 0, 0 }; + double mse[3] = { 0, 0, 0}; + int pae[3] = { 0, 0, 0}; run_ojph_compress("Malamute.ppm", - "simple_enc_rev53_64x64_tiles_33x33_d6", "", "j2c", - "-reversible true -tile_size \"{32,32}\" -num_decomps 6"); + "simple_enc_rev53_64x64_tiles_33x33_d6", "", "j2c", + "-reversible true -tile_size \"{32,32}\" -num_decomps 6"); run_ojph_compress_expand("simple_enc_rev53_64x64_tiles_33x33_d6", "j2c", "ppm"); run_mse_pae("simple_enc_rev53_64x64_tiles_33x33_d6", "ppm", - "Malamute.ppm", "", 3, mse, pae); + "Malamute.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1233,16 +1249,16 @@ TEST(TestExecutables, SimpleEncRev5364x64Tiles33x33D6) { // -o simple_enc_irv97_64x64_yuv.j2c -qstep 0.1 -dims {352,288} -num_comps 3 // -downsamp {1,1},{2,2},{2,2} -bit_depth 8,8,8 -signed false,false,false TEST(TestExecutables, SimpleEncIrv9764x64Yuv) { - double mse[3] = { 30.3548, 7.69602, 5.22246 }; - int pae[3] = { 49, 27, 26 }; + double mse[3] = { 30.3548, 7.69602, 5.22246}; + int pae[3] = { 49, 27, 26}; run_ojph_compress("foreman_420.yuv", - "simple_enc_irv97_64x64_yuv", "", "j2c", - "-qstep 0.1 -dims \"{352,288}\" -num_comps 3 -downsamp" - " \"{1,1}\",\"{2,2}\",\"{2,2}\" -bit_depth 8,8,8" - " -signed false,false,false"); + "simple_enc_irv97_64x64_yuv", "", "j2c", + "-qstep 0.1 -dims \"{352,288}\" -num_comps 3 -downsamp" + " \"{1,1}\",\"{2,2}\",\"{2,2}\" -bit_depth 8,8,8" + " -signed false,false,false"); run_ojph_compress_expand("simple_enc_irv97_64x64_yuv", "j2c", "yuv"); run_mse_pae("simple_enc_irv97_64x64_yuv", "yuv", - "foreman_420.yuv", ":352x288x8x420", 3, mse, pae); + "foreman_420.yuv", ":352x288x8x420", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1253,16 +1269,16 @@ TEST(TestExecutables, SimpleEncIrv9764x64Yuv) { // {352,288} -num_comps 3 -downsamp {1,1},{2,2},{2,2} -bit_depth 8,8,8 -signed // false,false,false TEST(TestExecutables, SimpleEncRev5364x64Yuv) { - double mse[3] = { 0, 0, 0 }; - int pae[3] = { 0, 0, 0 }; + double mse[3] = { 0, 0, 0}; + int pae[3] = { 0, 0, 0}; run_ojph_compress("foreman_420.yuv", - "simple_enc_rev53_64x64_yuv", "", "j2c", - "-reversible true -qstep 0.1 -dims \"{352,288}\"" - " -num_comps 3 -downsamp \"{1,1}\",\"{2,2}\",\"{2,2}\"" - " -bit_depth 8,8,8 -signed false,false,false"); + "simple_enc_rev53_64x64_yuv", "", "j2c", + "-reversible true -qstep 0.1 -dims \"{352,288}\"" + " -num_comps 3 -downsamp \"{1,1}\",\"{2,2}\",\"{2,2}\"" + " -bit_depth 8,8,8 -signed false,false,false"); run_ojph_compress_expand("simple_enc_rev53_64x64_yuv", "j2c", "yuv"); run_mse_pae("simple_enc_rev53_64x64_yuv", "yuv", - "foreman_420.yuv", ":352x288x8x420", 3, mse, pae); + "foreman_420.yuv", ":352x288x8x420", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1271,14 +1287,14 @@ TEST(TestExecutables, SimpleEncRev5364x64Yuv) { // The compressed file is obtained using these command-line options: // -o simple_enc_irv97_tall_narrow.j2c -qstep 0.1 TEST(TestExecutables, SimpleEncIrv97TallNarrow) { - double mse[3] = { 112.097, 79.2214, 71.1367 }; - int pae[3] = { 56, 41, 32 }; + double mse[3] = { 112.097, 79.2214, 71.1367}; + int pae[3] = { 56, 41, 32}; run_ojph_compress("tall_narrow.ppm", - "simple_enc_irv97_tall_narrow", "", "j2c", - "-qstep 0.1"); + "simple_enc_irv97_tall_narrow", "", "j2c", + "-qstep 0.1"); run_ojph_compress_expand("simple_enc_irv97_tall_narrow", "j2c", "ppm"); run_mse_pae("simple_enc_irv97_tall_narrow", "ppm", - "tall_narrow.ppm", "", 3, mse, pae); + "tall_narrow.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1287,14 +1303,14 @@ TEST(TestExecutables, SimpleEncIrv97TallNarrow) { // The compressed file is obtained using these command-line options: // -o simple_enc_irv97_tall_narrow1.j2c -image_offset {1,0} -qstep 0.1 TEST(TestExecutables, SimpleEncIrv97TallNarrow1) { - double mse[3] = { 100.906, 76.113, 72.8347 }; - int pae[3] = { 39, 35, 34 }; + double mse[3] = { 100.906, 76.113, 72.8347}; + int pae[3] = { 39, 35, 34}; run_ojph_compress("tall_narrow.ppm", - "simple_enc_irv97_tall_narrow1", "", "j2c", - "-image_offset \"{1,0}\" -qstep 0.1"); + "simple_enc_irv97_tall_narrow1", "", "j2c", + "-image_offset \"{1,0}\" -qstep 0.1"); run_ojph_compress_expand("simple_enc_irv97_tall_narrow1", "j2c", "ppm"); run_mse_pae("simple_enc_irv97_tall_narrow1", "ppm", - "tall_narrow.ppm", "", 3, mse, pae); + "tall_narrow.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1303,14 +1319,14 @@ TEST(TestExecutables, SimpleEncIrv97TallNarrow1) { // The compressed file is obtained using these command-line options: // -o simple_enc_rev53_tall_narrow.j2c -reversible true TEST(TestExecutables, SimpleEncRev53TallNarrow) { - double mse[3] = { 0, 0, 0 }; - int pae[3] = { 0, 0, 0 }; + double mse[3] = { 0, 0, 0}; + int pae[3] = { 0, 0, 0}; run_ojph_compress("tall_narrow.ppm", - "simple_enc_rev53_tall_narrow", "", "j2c", - "-reversible true"); + "simple_enc_rev53_tall_narrow", "", "j2c", + "-reversible true"); run_ojph_compress_expand("simple_enc_rev53_tall_narrow", "j2c", "ppm"); run_mse_pae("simple_enc_rev53_tall_narrow", "ppm", - "tall_narrow.ppm", "", 3, mse, pae); + "tall_narrow.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1319,14 +1335,14 @@ TEST(TestExecutables, SimpleEncRev53TallNarrow) { // The compressed file is obtained using these command-line options: // -o simple_enc_rev53_tall_narrow1.j2c -image_offset {1,0} -reversible true TEST(TestExecutables, SimpleEncRev53TallNarrow1) { - double mse[3] = { 0, 0, 0 }; - int pae[3] = { 0, 0, 0 }; + double mse[3] = { 0, 0, 0}; + int pae[3] = { 0, 0, 0}; run_ojph_compress("tall_narrow.ppm", - "simple_enc_rev53_tall_narrow1", "", "j2c", - "-image_offset \"{1,0}\" -reversible true"); + "simple_enc_rev53_tall_narrow1", "", "j2c", + "-image_offset \"{1,0}\" -reversible true"); run_ojph_compress_expand("simple_enc_rev53_tall_narrow1", "j2c", "ppm"); run_mse_pae("simple_enc_rev53_tall_narrow1", "ppm", - "tall_narrow.ppm", "", 3, mse, pae); + "tall_narrow.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1335,14 +1351,14 @@ TEST(TestExecutables, SimpleEncRev53TallNarrow1) { // The compressed file is obtained using these command-line options: // -o dpx_enc_1280x720_10bit_le_nuke11.j2c -reversible true TEST(TestExecutables, DpxEnc1280x72010bitLeNuke11) { - double mse[3] = { 0, 0, 0 }; - int pae[3] = { 0, 0, 0 }; + double mse[3] = { 0, 0, 0}; + int pae[3] = { 0, 0, 0}; run_ojph_compress("dpx_1280x720_10bit.ppm", - "dpx_enc_1280x720_10bit_le_nuke11", "", "j2c", - "-reversible true"); + "dpx_enc_1280x720_10bit_le_nuke11", "", "j2c", + "-reversible true"); run_ojph_compress_expand("dpx_enc_1280x720_10bit_le_nuke11", "j2c", "ppm"); run_mse_pae("dpx_enc_1280x720_10bit_le_nuke11", "ppm", - "dpx_1280x720_10bit.ppm", "", 3, mse, pae); + "dpx_1280x720_10bit.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1351,14 +1367,14 @@ TEST(TestExecutables, DpxEnc1280x72010bitLeNuke11) { // The compressed file is obtained using these command-line options: // -o dpx_enc_1280x720_10bit_be_nuke11.j2c -reversible true TEST(TestExecutables, DpxEnc1280x72010bitBeNuke11) { - double mse[3] = { 0, 0, 0 }; - int pae[3] = { 0, 0, 0 }; + double mse[3] = { 0, 0, 0}; + int pae[3] = { 0, 0, 0}; run_ojph_compress("dpx_1280x720_10bit.ppm", - "dpx_enc_1280x720_10bit_be_nuke11", "", "j2c", - "-reversible true"); + "dpx_enc_1280x720_10bit_be_nuke11", "", "j2c", + "-reversible true"); run_ojph_compress_expand("dpx_enc_1280x720_10bit_be_nuke11", "j2c", "ppm"); run_mse_pae("dpx_enc_1280x720_10bit_be_nuke11", "ppm", - "dpx_1280x720_10bit.ppm", "", 3, mse, pae); + "dpx_1280x720_10bit.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1367,14 +1383,14 @@ TEST(TestExecutables, DpxEnc1280x72010bitBeNuke11) { // The compressed file is obtained using these command-line options: // -o dpx_enc_1280x720_16bit_le_nuke11.j2c -reversible true TEST(TestExecutables, DpxEnc1280x72016bitLeNuke11) { - double mse[3] = { 0, 0, 0 }; - int pae[3] = { 0, 0, 0 }; + double mse[3] = { 0, 0, 0}; + int pae[3] = { 0, 0, 0}; run_ojph_compress("dpx_1280x720_16bit.ppm", - "dpx_enc_1280x720_16bit_le_nuke11", "", "j2c", - "-reversible true"); + "dpx_enc_1280x720_16bit_le_nuke11", "", "j2c", + "-reversible true"); run_ojph_compress_expand("dpx_enc_1280x720_16bit_le_nuke11", "j2c", "ppm"); run_mse_pae("dpx_enc_1280x720_16bit_le_nuke11", "ppm", - "dpx_1280x720_16bit.ppm", "", 3, mse, pae); + "dpx_1280x720_16bit.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1383,14 +1399,14 @@ TEST(TestExecutables, DpxEnc1280x72016bitLeNuke11) { // The compressed file is obtained using these command-line options: // -o dpx_enc_1280x720_16bit_be_nuke11.j2c -reversible true TEST(TestExecutables, DpxEnc1280x72016bitBeNuke11) { - double mse[3] = { 0, 0, 0 }; - int pae[3] = { 0, 0, 0 }; + double mse[3] = { 0, 0, 0}; + int pae[3] = { 0, 0, 0}; run_ojph_compress("dpx_1280x720_16bit.ppm", - "dpx_enc_1280x720_16bit_be_nuke11", "", "j2c", - "-reversible true"); + "dpx_enc_1280x720_16bit_be_nuke11", "", "j2c", + "-reversible true"); run_ojph_compress_expand("dpx_enc_1280x720_16bit_be_nuke11", "j2c", "ppm"); run_mse_pae("dpx_enc_1280x720_16bit_be_nuke11", "ppm", - "dpx_1280x720_16bit.ppm", "", 3, mse, pae); + "dpx_1280x720_16bit.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1399,14 +1415,14 @@ TEST(TestExecutables, DpxEnc1280x72016bitBeNuke11) { // The compressed file is obtained using these command-line options: // -o dpx_enc_1280x720_10bit_resolve18.j2c -reversible true TEST(TestExecutables, DpxEnc1280x72010bitResolve18) { - double mse[3] = { 0, 0, 0 }; - int pae[3] = { 0, 0, 0 }; + double mse[3] = { 0, 0, 0}; + int pae[3] = { 0, 0, 0}; run_ojph_compress("dpx_1280x720_10bit.ppm", - "dpx_enc_1280x720_10bit_resolve18", "", "j2c", - "-reversible true"); + "dpx_enc_1280x720_10bit_resolve18", "", "j2c", + "-reversible true"); run_ojph_compress_expand("dpx_enc_1280x720_10bit_resolve18", "j2c", "ppm"); run_mse_pae("dpx_enc_1280x720_10bit_resolve18", "ppm", - "dpx_1280x720_10bit.ppm", "", 3, mse, pae); + "dpx_1280x720_10bit.ppm", "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// @@ -1415,14 +1431,14 @@ TEST(TestExecutables, DpxEnc1280x72010bitResolve18) { // The compressed file is obtained using these command-line options: // -o dpx_enc_1280x720_16bit_resolve18.j2c -reversible true TEST(TestExecutables, DpxEnc1280x72016bitResolve18) { - double mse[3] = { 0, 0, 0 }; - int pae[3] = { 0, 0, 0 }; + double mse[3] = { 0, 0, 0}; + int pae[3] = { 0, 0, 0}; run_ojph_compress("dpx_1280x720_16bit.ppm", - "dpx_enc_1280x720_16bit_resolve18", "", "j2c", - "-reversible true"); + "dpx_enc_1280x720_16bit_resolve18", "", "j2c", + "-reversible true"); run_ojph_compress_expand("dpx_enc_1280x720_16bit_resolve18", "j2c", "ppm"); run_mse_pae("dpx_enc_1280x720_16bit_resolve18", "ppm", - "dpx_1280x720_16bit.ppm", "", 3, mse, pae); + "dpx_1280x720_16bit.ppm", "", 3, mse, pae); } //////////////////////////////////////////////////////////////////////////////// diff --git a/tests/test_helpers/ht_cmdlines.txt b/tests/test_helpers/ht_cmdlines.txt index a8c0987d..0542a2d6 100644 --- a/tests/test_helpers/ht_cmdlines.txt +++ b/tests/test_helpers/ht_cmdlines.txt @@ -52,6 +52,7 @@ add_test(NAME simple_dec_irv97_64x64_16bit_gray COMMAND ${CMAKE_CURRENT_SOURCE_D add_test(NAME simple_dec_rev53_64x64_16bit COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -rdec "-i ${images_folder}/mm.ppm -o simple_dec_rev53_64x64_16bit.jph -precise -quiet Creversible=yes -full" "-i simple_dec_rev53_64x64_16bit.jph -o test1.ppm -precise -quiet" "-i simple_dec_rev53_64x64_16bit.jph -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") add_test(NAME simple_dec_rev53_64x64_16bit_gray COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -rdec "-i ${images_folder}/mm.pgm -o simple_dec_rev53_64x64_16bit_gray.jph -precise -quiet Creversible=yes -full" "-i simple_dec_rev53_64x64_16bit_gray.jph -o test1.pgm -precise -quiet" "-i simple_dec_rev53_64x64_16bit_gray.jph -o test2.pgm" "${images_folder}/mm.pgm" "test1.pgm" "test2.pgm") +add_test(NAME simple_dec_irv53_bhvhb_low_latency COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -dec "-i ${images_folder}/mm.ppm -o simple_dec_irv53_bhvhb_low_latency.jph -quiet Corder=PCRL Clevels=5 "Cmodes=HT|CAUSAL" -rate 2 -o simple_dec_irv53_bhvhb_low_latency.jph Catk=2 Kkernels:I2=I5X3 Cprecincts="{16,8192},{8,8192},{4,8192}" Cblk="{8,256}" Cdecomp="B(-:-:-),H(-),V(-),H(-),B(-:-:-)" Qstep=0.0001 -precise -no_weights -tolerance 0" "-i simple_dec_irv53_bhvhb_low_latency.jph -o test1.ppm -precise -quiet" "-i simple_dec_irv53_bhvhb_low_latency.jph -o test2.ppm" "${images_folder}/mm.pgm" "test1.pgm" "test2.pgm") ############################################################# # Encoding From 9345152e05e654b795b389ecfa0a3045efa45a5b Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Sat, 13 Apr 2024 22:18:15 +1000 Subject: [PATCH 072/348] Fixing tests. --- tests/test_executables.cpp | 10 +++++----- tests/test_helpers/convert_mse_pae_to_tests.cpp | 7 +++++-- tests/test_helpers/ht_cmdlines.txt | 2 +- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/tests/test_executables.cpp b/tests/test_executables.cpp index 8660f9d1..9f77f75e 100644 --- a/tests/test_executables.cpp +++ b/tests/test_executables.cpp @@ -825,17 +825,17 @@ TEST(TestExecutables, SimpleDecRev5364x6416bitGray) { /////////////////////////////////////////////////////////////////////////////// // Test ojph_expand with codeblocks when the rev53 wavelet is used. // Command-line options used to obtain this file is: -// -o simple_dec_irv53_bhvhb_low_latency.jph -quiet Corder=PCRL Clevels=5 -// "Cmodes=HT|CAUSAL" -rate 2 -o simple_dec_irv53_bhvhb_low_latency.jph Catk=2 -// Kkernels:I2=I5X3 Cprecincts="{16,8192},{8,8192},{4,8192}" Cblk="{8,256}" -// Cdecomp="B(-:-:-),H(-),V(-),H(-),B(-:-:-)" Qstep=0.0001 -precise -no_weights +// -o simple_dec_irv53_bhvhb_low_latency.jph -quiet Corder=PCRL Clevels=5 +// Cmodes=HT|CAUSAL -rate 2 Catk=2 Kkernels:I2=I5X3 +// Cprecincts={16,8192},{8,8192},{4,8192} Cblk={8,256} +// Cdecomp=B(-:-:-),H(-),V(-),H(-),B(-:-:-) Qstep=0.0001 -precise -no_weights // -tolerance 0 TEST(TestExecutables, SimpleDecIrv53BhvhbLowLatency) { double mse[3] = { 5.52392, 4.01405, 6.8166}; int pae[3] = { 16, 17, 23}; run_ojph_expand("simple_dec_irv53_bhvhb_low_latency", "jph", "ppm"); run_mse_pae("simple_dec_irv53_bhvhb_low_latency", "ppm", "Malamute.ppm", - ":I2=I5X3 Cprecincts=", 3, mse, pae); + "", 3, mse, pae); } /////////////////////////////////////////////////////////////////////////////// diff --git a/tests/test_helpers/convert_mse_pae_to_tests.cpp b/tests/test_helpers/convert_mse_pae_to_tests.cpp index 25bf084c..630b6230 100644 --- a/tests/test_helpers/convert_mse_pae_to_tests.cpp +++ b/tests/test_helpers/convert_mse_pae_to_tests.cpp @@ -200,8 +200,11 @@ void process_cmdlines(std::ifstream& file, start_pos = line.find(":"); if (start_pos != std::string::npos) { - size_t end_pos = line.find("\"", start_pos); - yuv_specs = line.substr(start_pos, end_pos - start_pos); + if (std::isdigit(line.at(start_pos + 1))) + { + size_t end_pos = line.find("\"", start_pos); + yuv_specs = line.substr(start_pos, end_pos - start_pos); + } } break; } diff --git a/tests/test_helpers/ht_cmdlines.txt b/tests/test_helpers/ht_cmdlines.txt index 0542a2d6..3b94c887 100644 --- a/tests/test_helpers/ht_cmdlines.txt +++ b/tests/test_helpers/ht_cmdlines.txt @@ -52,7 +52,7 @@ add_test(NAME simple_dec_irv97_64x64_16bit_gray COMMAND ${CMAKE_CURRENT_SOURCE_D add_test(NAME simple_dec_rev53_64x64_16bit COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -rdec "-i ${images_folder}/mm.ppm -o simple_dec_rev53_64x64_16bit.jph -precise -quiet Creversible=yes -full" "-i simple_dec_rev53_64x64_16bit.jph -o test1.ppm -precise -quiet" "-i simple_dec_rev53_64x64_16bit.jph -o test2.ppm" "${images_folder}/mm.ppm" "test1.ppm" "test2.ppm") add_test(NAME simple_dec_rev53_64x64_16bit_gray COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -rdec "-i ${images_folder}/mm.pgm -o simple_dec_rev53_64x64_16bit_gray.jph -precise -quiet Creversible=yes -full" "-i simple_dec_rev53_64x64_16bit_gray.jph -o test1.pgm -precise -quiet" "-i simple_dec_rev53_64x64_16bit_gray.jph -o test2.pgm" "${images_folder}/mm.pgm" "test1.pgm" "test2.pgm") -add_test(NAME simple_dec_irv53_bhvhb_low_latency COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -dec "-i ${images_folder}/mm.ppm -o simple_dec_irv53_bhvhb_low_latency.jph -quiet Corder=PCRL Clevels=5 "Cmodes=HT|CAUSAL" -rate 2 -o simple_dec_irv53_bhvhb_low_latency.jph Catk=2 Kkernels:I2=I5X3 Cprecincts="{16,8192},{8,8192},{4,8192}" Cblk="{8,256}" Cdecomp="B(-:-:-),H(-),V(-),H(-),B(-:-:-)" Qstep=0.0001 -precise -no_weights -tolerance 0" "-i simple_dec_irv53_bhvhb_low_latency.jph -o test1.ppm -precise -quiet" "-i simple_dec_irv53_bhvhb_low_latency.jph -o test2.ppm" "${images_folder}/mm.pgm" "test1.pgm" "test2.pgm") +add_test(NAME simple_dec_irv53_bhvhb_low_latency COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/com_decom.sh -dec "-i ${images_folder}/mm.ppm -o simple_dec_irv53_bhvhb_low_latency.jph -quiet Corder=PCRL Clevels=5 Cmodes=HT|CAUSAL -rate 2 Catk=2 Kkernels:I2=I5X3 Cprecincts=\{16,8192\},\{8,8192\},\{4,8192\} Cblk=\{8,256\} Cdecomp=B(-:-:-),H(-),V(-),H(-),B(-:-:-) Qstep=0.0001 -precise -no_weights -tolerance 0" "-i simple_dec_irv53_bhvhb_low_latency.jph -o test1.ppm -precise -quiet" "-i simple_dec_irv53_bhvhb_low_latency.jph -o test2.ppm" "${images_folder}/mm.pgm" "test1.pgm" "test2.pgm") ############################################################# # Encoding From bd95dfb286892f2a412cd6e1f3b2af63e2aab972 Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Wed, 17 Apr 2024 10:35:50 +1000 Subject: [PATCH 073/348] First commit of ojph_stream_expand --- CMakeLists.txt | 2 + src/apps/CMakeLists.txt | 1 + src/apps/common/ojph_socket.h | 129 ++++++++++++++ src/apps/ojph_expand/ojph_expand.cpp | 22 +-- src/apps/ojph_stream_expand/CMakeLists.txt | 26 +++ .../ojph_stream_expand/ojph_stream_expand.cpp | 100 +++++++++++ src/apps/others/ojph_socket.cpp | 160 ++++++++++++++++++ src/core/common/ojph_version.h | 2 +- src/core/others/ojph_message.cpp | 9 + 9 files changed, 439 insertions(+), 12 deletions(-) create mode 100644 src/apps/common/ojph_socket.h create mode 100644 src/apps/ojph_stream_expand/CMakeLists.txt create mode 100644 src/apps/ojph_stream_expand/ojph_stream_expand.cpp create mode 100644 src/apps/others/ojph_socket.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index c9c6db92..7f509f0d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,12 +18,14 @@ option(BUILD_SHARED_LIBS "Shared Libraries" ON) option(OJPH_ENABLE_TIFF_SUPPORT "Enables input and output support for TIFF files" ON) option(OJPH_BUILD_TESTS "Enables building test code" OFF) option(OJPH_BUILD_EXECUTABLES "Enables building command line executables" ON) +option(OJPH_BUILD_STREAM_EXPAND "Enables building ojph_stream_expand executable" ON) ## Setting some of the options if EMSCRIPTEN is the compiler if(EMSCRIPTEN) set(OJPH_DISABLE_INTEL_SIMD ON) set(BUILD_SHARED_LIBS OFF) set(OJPH_ENABLE_TIFF_SUPPORT OFF) + set(OJPH_BUILD_STREAM_EXPAND OFF) endif() # This is related to how the timestamp is set for URL downloaded files. diff --git a/src/apps/CMakeLists.txt b/src/apps/CMakeLists.txt index b3ef06d5..e4f14a3f 100644 --- a/src/apps/CMakeLists.txt +++ b/src/apps/CMakeLists.txt @@ -21,3 +21,4 @@ endif() ## Build executables add_subdirectory(ojph_expand) add_subdirectory(ojph_compress) +add_subdirectory(ojph_stream_expand) diff --git a/src/apps/common/ojph_socket.h b/src/apps/common/ojph_socket.h new file mode 100644 index 00000000..f14cebac --- /dev/null +++ b/src/apps/common/ojph_socket.h @@ -0,0 +1,129 @@ +//***************************************************************************/ +// This software is released under the 2-Clause BSD license, included +// below. +// +// Copyright (c) 2024, Aous Naman +// Copyright (c) 2024, Kakadu Software Pty Ltd, Australia +// Copyright (c) 2024, The University of New South Wales, Australia +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************/ +// This file is part of the OpenJPH software implementation. +// File: ojph_socket.h +// Author: Aous Naman +// Date: 17 April 2024 +//***************************************************************************/ + +#ifndef OJPH_SOCKET_H +#define OJPH_SOCKET_H + +#include +#include "ojph_arch.h" + +#ifdef OJPH_OS_WINDOWS + #include + #include + + typedef SOCKET ojph_socket; + #define OJPH_INVALID_SOCKET (INVALID_SOCKET) +#else + #include + #include + #include + #include + #include + #include + #include + + typedef int ojph_socket; + #define OJPH_INVALID_SOCKET (-1) +#endif + +namespace ojph +{ + namespace net + { + /////////////////////////////////////////////////////////////////////////// + // + // + // + // + // + /////////////////////////////////////////////////////////////////////////// + + //************************************************************************/ + /** @brief A small wrapper for socket that only abstract Winsock2 + * + * This is a small wrapper that only abstracts the difference between + * Windows and Linux/MacOS socket implementations. + * It does not not do much other than carry int for Linux/OS and + * SOCKET for Windows, which is unsigned int/int64. + */ + class socket { + public: + socket() { s = OJPH_INVALID_SOCKET; } + socket(ojph_socket s); + void close(); + ojph_socket intern() { return s; } + + private: + ojph_socket s; //! + }; + + /////////////////////////////////////////////////////////////////////////// + // + // + // + // + // + /////////////////////////////////////////////////////////////////////////// + + //************************************************************************/ + /** @brief A small wrapper for some Winsock2 functionality + * + * This is useful for windows, as it initializes and destroys the library. + * It keeps a count of how many times the constructor is called, + * reducing the count whenever the destructor is called. When the + * count reaches zero, the library is destroyed -- Windows only. + * + * It also allows the creation of a socket, access to the last error + * in a portable way, and the translation of an error into a text + * message. + */ + class socket_manager { + public: + socket_manager(); + ~socket_manager(); + + socket create_socket(int domain, int type, int protocol); + int get_last_error(); + std::string get_last_error_message(); + }; + + } // !net namespace +} // !ojph namespace + + + +#endif // !OJPH_SOCKET_H \ No newline at end of file diff --git a/src/apps/ojph_expand/ojph_expand.cpp b/src/apps/ojph_expand/ojph_expand.cpp index 6b836680..52e29306 100644 --- a/src/apps/ojph_expand/ojph_expand.cpp +++ b/src/apps/ojph_expand/ojph_expand.cpp @@ -203,7 +203,7 @@ int main(int argc, char *argv[]) { try { if (output_filename == NULL) - OJPH_ERROR(0x020000008, + OJPH_ERROR(0x02000001, "Please provide an output file using the -o option\n"); ojph::j2c_infile j2c_file; @@ -231,7 +231,7 @@ int main(int argc, char *argv[]) { { if (siz.get_num_components() != 1) - OJPH_ERROR(0x020000001, + OJPH_ERROR(0x02000002, "The file has more than one color component, but .pgm can " "contain only one color component\n"); ppm.configure(siz.get_recon_width(0), siz.get_recon_height(0), @@ -245,7 +245,7 @@ int main(int argc, char *argv[]) { ojph::param_siz siz = codestream.access_siz(); if (siz.get_num_components() != 3) - OJPH_ERROR(0x020000002, + OJPH_ERROR(0x02000003, "The file has %d color components; this cannot be saved to" " a .ppm file\n", siz.get_num_components()); bool all_same = true; @@ -256,7 +256,7 @@ int main(int argc, char *argv[]) { all_same = all_same && (p1.x == p.x) && (p1.y == p.y); } if (!all_same) - OJPH_ERROR(0x020000003, + OJPH_ERROR(0x02000004, "To save an image to ppm, all the components must have the " "same downsampling ratio\n"); ppm.configure(siz.get_recon_width(0), siz.get_recon_height(0), @@ -278,7 +278,7 @@ int main(int argc, char *argv[]) { all_same = all_same && (p1.x == p.x) && (p1.y == p.y); } if (!all_same) - OJPH_ERROR(0x020000008, + OJPH_ERROR(0x02000005, "To save an image to tif(f), all the components must have the " "same downsampling ratio\n"); ojph::ui32 bit_depths[4] = { 0, 0, 0, 0 }; @@ -298,12 +298,12 @@ int main(int argc, char *argv[]) { ojph::param_siz siz = codestream.access_siz(); if (siz.get_num_components() != 3 && siz.get_num_components() != 1) - OJPH_ERROR(0x020000004, + OJPH_ERROR(0x02000006, "The file has %d color components; this cannot be saved to" " .yuv file\n", siz.get_num_components()); ojph::param_cod cod = codestream.access_cod(); if (cod.is_using_color_transform()) - OJPH_ERROR(0x020000005, + OJPH_ERROR(0x02000007, "The current implementation of yuv file object does not" " support saving file when conversion from yuv to rgb is" " needed; in any case, this is not the normal usage of yuv" @@ -325,7 +325,7 @@ int main(int argc, char *argv[]) { ojph::param_siz siz = codestream.access_siz(); if (siz.get_num_components() != 1) - OJPH_ERROR(0x020000006, + OJPH_ERROR(0x02000008, "The file has %d color components; this cannot be saved to" " .raw file (only one component is allowed).\n", siz.get_num_components()); @@ -338,17 +338,17 @@ int main(int argc, char *argv[]) { } else #ifdef OJPH_ENABLE_TIFF_SUPPORT - OJPH_ERROR(0x020000007, + OJPH_ERROR(0x02000009, "unknown output file extension; only pgm, ppm, tif(f) and raw(yuv))" " are supported\n"); #else - OJPH_ERROR(0x020000006, + OJPH_ERROR(0x0200000A, "unknown output file extension; only pgm, ppm, and raw(yuv) are" " supported\n"); #endif // !OJPH_ENABLE_TIFF_SUPPORT } else - OJPH_ERROR(0x020000007, + OJPH_ERROR(0x0200000B, "Please supply a proper output filename with a proper extension\n"); codestream.create(); diff --git a/src/apps/ojph_stream_expand/CMakeLists.txt b/src/apps/ojph_stream_expand/CMakeLists.txt new file mode 100644 index 00000000..5923950b --- /dev/null +++ b/src/apps/ojph_stream_expand/CMakeLists.txt @@ -0,0 +1,26 @@ +## building ojph_stream_expand +############################## + +if (OJPH_BUILD_STREAM_EXPAND) + + include_directories(../common) + include_directories(../../core/common) + + file(GLOB OJPH_STREAM_EXPAND "ojph_stream_expand.cpp") + file(GLOB OJPH_SOCKET "../others/ojph_socket.cpp") + file(GLOB OJPH_SOCKET_H "../common/ojph_socket.h") + + list(APPEND SOURCES ${OJPH_STREAM_EXPAND} ${OJPH_SOCKET} ${OJPH_SOCKET_H}) + + source_group("main" FILES ${OJPH_STREAM_EXPAND}) + source_group("others" FILES ${OJPH_SOCKET}) + source_group("common" FILES ${OJPH_SOCKET_H}) + + add_executable(ojph_stream_expand ${SOURCES}) + if(MSVC) + target_link_libraries(ojph_stream_expand PUBLIC openjph ws2_32) + else() + target_link_libraries(ojph_stream_expand PUBLIC openjph) + endif(MSVC) + +endif(OJPH_BUILD_STREAM_EXPAND) diff --git a/src/apps/ojph_stream_expand/ojph_stream_expand.cpp b/src/apps/ojph_stream_expand/ojph_stream_expand.cpp new file mode 100644 index 00000000..c4b3acae --- /dev/null +++ b/src/apps/ojph_stream_expand/ojph_stream_expand.cpp @@ -0,0 +1,100 @@ +//***************************************************************************/ +// This software is released under the 2-Clause BSD license, included +// below. +// +// Copyright (c) 2024, Aous Naman +// Copyright (c) 2024, Kakadu Software Pty Ltd, Australia +// Copyright (c) 2024, The University of New South Wales, Australia +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************/ +// This file is part of the OpenJPH software implementation. +// File: ojph_stream_expand.cpp +// Author: Aous Naman +// Date: 17 April 2024 +//***************************************************************************/ + +#include +#include +#include +#include "ojph_message.h" +#include "ojph_socket.h" + +#define BUFLEN 2048 //Max length of buffer +#define PORT 8080 //The port on which to listen for incoming data + +int main() +{ + ojph::net::socket_manager smanager; + + //Create a socket + ojph::net::socket s = + smanager.create_socket(AF_INET , SOCK_DGRAM , IPPROTO_UDP); + if(s.intern() < 0) + { + std::string err = smanager.get_last_error_message(); + OJPH_ERROR(0x02000001, "Could not create socket : %s\n", err.data()); + } + + //Prepare the sockaddr_in structure + struct sockaddr_in server; + server.sin_family = AF_INET; + server.sin_addr.s_addr = htonl(INADDR_ANY); + server.sin_port = htons(PORT); + + //Bind + if( bind(s.intern(), (struct sockaddr *)&server, sizeof(server)) == -1) + { + std::string err = smanager.get_last_error_message(); + OJPH_ERROR(0x02000002, "Could not create socket : %s\n", err.data()); + } + + //keep listening for data + while(1) + { + char buf[BUFLEN]; + memset(buf, 0, BUFLEN); + + // receive data -- this is a blocking call + struct sockaddr_in si_other; + socklen_t socklen = sizeof(si_other); + int recv_len = recvfrom( + s.intern(), buf, BUFLEN, 0, (struct sockaddr *) &si_other, &socklen); + if (recv_len < 0) + { + std::string err = smanager.get_last_error_message(); + OJPH_ERROR(0x02000003, "Could not create socket : %s\n", err.data()); + } + + // print details of the client/peer and the data received + char src_addr[1024]; + inet_ntop(AF_INET, &si_other.sin_addr, src_addr, sizeof(src_addr)); + printf("Received packet from %s:%d .. ", src_addr, ntohs(si_other.sin_port)); + printf("Data: %02x\n" , buf[0]); + } + + s.close(); + return 0; +} + diff --git a/src/apps/others/ojph_socket.cpp b/src/apps/others/ojph_socket.cpp new file mode 100644 index 00000000..bbff963f --- /dev/null +++ b/src/apps/others/ojph_socket.cpp @@ -0,0 +1,160 @@ +//***************************************************************************/ +// This software is released under the 2-Clause BSD license, included +// below. +// +// Copyright (c) 2024, Aous Naman +// Copyright (c) 2024, Kakadu Software Pty Ltd, Australia +// Copyright (c) 2024, The University of New South Wales, Australia +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************/ +// This file is part of the OpenJPH software implementation. +// File: ojph_socket.cpp +// Author: Aous Naman +// Date: 17 April 2024 +//***************************************************************************/ + +#include +#include +#include "ojph_message.h" +#include "ojph_socket.h" + +int ojph_socket_manager_counter = 0; + +namespace ojph +{ + namespace net + { + + /////////////////////////////////////////////////////////////////////////// + // + // + // + // + // + /////////////////////////////////////////////////////////////////////////// + + /////////////////////////////////////////////////////////////////////////// + socket::socket(ojph_socket s) + { + this->s = s; + } + + /////////////////////////////////////////////////////////////////////////// + void socket::close() + { + + if (s != OJPH_INVALID_SOCKET) + { + #ifdef OJPH_OS_WINDOWS + ::closesocket(s); + #else + ::close(s); + #endif + s = OJPH_INVALID_SOCKET; + } + } + + /////////////////////////////////////////////////////////////////////////// + // + // + // + // + // + /////////////////////////////////////////////////////////////////////////// + + /////////////////////////////////////////////////////////////////////////// + socket_manager::socket_manager() + { + if (ojph_socket_manager_counter == 0) + { + #ifdef OJPH_OS_WINDOWS + WSADATA wsa; + if (WSAStartup(MAKEWORD(2,2), &wsa) != 0) + { + std::string err = get_last_error_message(); + OJPH_ERROR(0x00080001, "Could not create socket : %s\n", err.data()); + } + #endif + } + ++ojph_socket_manager_counter; + } + + /////////////////////////////////////////////////////////////////////////// + socket_manager::~socket_manager() + { + assert(ojph_socket_manager_counter >= 1); + --ojph_socket_manager_counter; + if (ojph_socket_manager_counter == 0) + { + #ifdef _MSC_VER + WSACleanup(); + #endif + } + } + + /////////////////////////////////////////////////////////////////////////// + socket socket_manager::create_socket(int domain, int type, int protocol) + { + socket s(::socket(domain, type, protocol)); + return s; + } + + /////////////////////////////////////////////////////////////////////////// + int socket_manager::get_last_error() + { + #ifdef OJPH_OS_WINDOWS + return WSAGetLastError(); + #else + return errno; + #endif + } + + /////////////////////////////////////////////////////////////////////////// + std::string socket_manager::get_last_error_message() + { + int errnum = get_last_error(); + if( errnum == 0 ) + return std::string(""); + const int max_buf_size = 1024; + char buf[max_buf_size]; + char *v = buf; + #ifdef OJPH_OS_WINDOWS + size_t size = FormatMessage( FORMAT_MESSAGE_FROM_SYSTEM + | FORMAT_MESSAGE_IGNORE_INSERTS, + NULL, errnum, + MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), + buf, max_buf_size, NULL); + buf[max_buf_size - 1] = 0; + #else + // it is not clear if the returned value is in buf or in v + v = strerror_r(errnum, (char*)buf, max_buf_size); + v[max_buf_size - 1] = 0; + #endif + std::string str; + str = v; + return str; + } + } // !net namespace +} // !ojph namespace diff --git a/src/core/common/ojph_version.h b/src/core/common/ojph_version.h index ff62f0aa..debd7be3 100644 --- a/src/core/common/ojph_version.h +++ b/src/core/common/ojph_version.h @@ -34,5 +34,5 @@ //***************************************************************************/ #define OPENJPH_VERSION_MAJOR 0 -#define OPENJPH_VERSION_MINOR 11 +#define OPENJPH_VERSION_MINOR 12 #define OPENJPH_VERSION_PATCH 0 diff --git a/src/core/others/ojph_message.cpp b/src/core/others/ojph_message.cpp index d703b6ad..40134e45 100644 --- a/src/core/others/ojph_message.cpp +++ b/src/core/others/ojph_message.cpp @@ -72,6 +72,9 @@ namespace ojph { void message_info::operator()(int info_code, const char* file_name, int line_num, const char* fmt, ...) { + if (info_stream == NULL) + return; + fprintf(info_stream, "ojph info 0x%08X at %s:%d: ", info_code, file_name, line_num); va_list args; @@ -110,6 +113,9 @@ namespace ojph { void message_warning::operator()(int warn_code, const char* file_name, int line_num, const char *fmt, ...) { + if (warning_stream == NULL) + return; + fprintf(warning_stream, "ojph warning 0x%08X at %s:%d: ", warn_code, file_name, line_num); va_list args; @@ -148,6 +154,9 @@ namespace ojph { void message_error::operator()(int error_code, const char* file_name, int line_num, const char *fmt, ...) { + if (error_stream == NULL) + return; + fprintf(error_stream, "ojph error 0x%08X at %s:%d: ", error_code, file_name, line_num); va_list args; From 65a4beb6413966e6867fc85b5cc0be27d2c3ee88 Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Wed, 17 Apr 2024 11:56:42 +1000 Subject: [PATCH 074/348] A bug fix for MacOS, and other small fixes. --- CMakeLists.txt | 6 +- src/apps/ojph_stream_expand/CMakeLists.txt | 1 + .../ojph_stream_expand/ojph_stream_expand.cpp | 64 +++++++++---------- src/apps/others/ojph_socket.cpp | 10 ++- 4 files changed, 44 insertions(+), 37 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7f509f0d..e3398eb4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -52,7 +52,7 @@ message(STATUS "Building ${CMAKE_BUILD_TYPE}") # C++14 is needed for gtest, otherwise, C++11 is sufficient for the library set(CMAKE_CXX_STANDARD 14) if (MSVC) - add_compile_options(-D_CRT_SECURE_NO_WARNINGS) + add_compile_definitions(_CRT_SECURE_NO_WARNINGS) endif() if (CMAKE_CXX_COMPILER_ID MATCHES "Clang|GNU") add_compile_options( @@ -66,9 +66,9 @@ endif() ## The option OJPH_DISABLE_INTEL_SIMD and OJPH_ENABLE_INTEL_AVX512 if (OJPH_DISABLE_INTEL_SIMD) - add_compile_options(-DOJPH_DISABLE_INTEL_SIMD) + add_compile_definitions(OJPH_DISABLE_INTEL_SIMD) elseif (OJPH_ENABLE_INTEL_AVX512) - add_compile_options(-DOJPH_ENABLE_INTEL_AVX512) + add_compile_definitions(OJPH_ENABLE_INTEL_AVX512) endif() ## Build library and applications diff --git a/src/apps/ojph_stream_expand/CMakeLists.txt b/src/apps/ojph_stream_expand/CMakeLists.txt index 5923950b..cee9493f 100644 --- a/src/apps/ojph_stream_expand/CMakeLists.txt +++ b/src/apps/ojph_stream_expand/CMakeLists.txt @@ -5,6 +5,7 @@ if (OJPH_BUILD_STREAM_EXPAND) include_directories(../common) include_directories(../../core/common) + add_compile_options(-std=c++11) file(GLOB OJPH_STREAM_EXPAND "ojph_stream_expand.cpp") file(GLOB OJPH_SOCKET "../others/ojph_socket.cpp") diff --git a/src/apps/ojph_stream_expand/ojph_stream_expand.cpp b/src/apps/ojph_stream_expand/ojph_stream_expand.cpp index c4b3acae..1a59cf5d 100644 --- a/src/apps/ojph_stream_expand/ojph_stream_expand.cpp +++ b/src/apps/ojph_stream_expand/ojph_stream_expand.cpp @@ -48,53 +48,53 @@ int main() { ojph::net::socket_manager smanager; - //Create a socket + // create a socket ojph::net::socket s = - smanager.create_socket(AF_INET , SOCK_DGRAM , IPPROTO_UDP); - if(s.intern() < 0) - { + smanager.create_socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); + if(s.intern() < 0) + { std::string err = smanager.get_last_error_message(); OJPH_ERROR(0x02000001, "Could not create socket : %s\n", err.data()); - } - - //Prepare the sockaddr_in structure + } + + // prepare the sockaddr_in structure struct sockaddr_in server; - server.sin_family = AF_INET; - server.sin_addr.s_addr = htonl(INADDR_ANY); - server.sin_port = htons(PORT); - - //Bind - if( bind(s.intern(), (struct sockaddr *)&server, sizeof(server)) == -1) - { + server.sin_family = AF_INET; + server.sin_addr.s_addr = htonl(INADDR_ANY); + server.sin_port = htons(PORT); + + //Bind + if( bind(s.intern(), (struct sockaddr *)&server, sizeof(server)) == -1) + { std::string err = smanager.get_last_error_message(); OJPH_ERROR(0x02000002, "Could not create socket : %s\n", err.data()); - } + } + + // keep listening for data + while(1) + { + char buf[BUFLEN]; + memset(buf, 0, BUFLEN); - //keep listening for data - while(1) - { - char buf[BUFLEN]; - memset(buf, 0, BUFLEN); - - // receive data -- this is a blocking call - struct sockaddr_in si_other; + // receive data -- this is a blocking call + struct sockaddr_in si_other; socklen_t socklen = sizeof(si_other); - int recv_len = recvfrom( + int recv_len = (int)recvfrom( s.intern(), buf, BUFLEN, 0, (struct sockaddr *) &si_other, &socklen); - if (recv_len < 0) - { + if (recv_len < 0) + { std::string err = smanager.get_last_error_message(); OJPH_ERROR(0x02000003, "Could not create socket : %s\n", err.data()); - } - - // print details of the client/peer and the data received + } + + // print details of the client/peer and the data received char src_addr[1024]; inet_ntop(AF_INET, &si_other.sin_addr, src_addr, sizeof(src_addr)); - printf("Received packet from %s:%d .. ", src_addr, ntohs(si_other.sin_port)); + printf("Received packet from %s:%d .. ", src_addr, ntohs(si_other.sin_port)); printf("Data: %02x\n" , buf[0]); - } + } s.close(); - return 0; + return 0; } diff --git a/src/apps/others/ojph_socket.cpp b/src/apps/others/ojph_socket.cpp index bbff963f..b6f797c7 100644 --- a/src/apps/others/ojph_socket.cpp +++ b/src/apps/others/ojph_socket.cpp @@ -147,10 +147,16 @@ namespace ojph MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), buf, max_buf_size, NULL); buf[max_buf_size - 1] = 0; - #else + #elif (defined OJPH_OS_APPLE) || \ + ((_POSIX_C_SOURCE >= 200112L) && !_GNU_SOURCE) // it is not clear if the returned value is in buf or in v + int t = strerror_r(errnum, (char*)buf, max_buf_size); + if (t != 0) + OJPH_ERROR(0x00080002, "Error retrieving a text message for " + "socket error number %d\n", errnum); + buf[max_buf_size - 1] = 0; + #else v = strerror_r(errnum, (char*)buf, max_buf_size); - v[max_buf_size - 1] = 0; #endif std::string str; str = v; From cc7d60eedb26d9ba4010ee0369099465b3196314 Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Wed, 17 Apr 2024 16:19:46 +1000 Subject: [PATCH 075/348] Add commandline arguments. Other small changes. --- README.md | 2 +- src/apps/common/ojph_socket.h | 1 + src/apps/ojph_expand/ojph_expand.cpp | 8 +- .../ojph_stream_expand/ojph_stream_expand.cpp | 156 +++++++++++++++--- src/apps/others/ojph_socket.cpp | 15 +- src/core/common/ojph_arg.h | 11 ++ 6 files changed, 159 insertions(+), 34 deletions(-) diff --git a/README.md b/README.md index da4d2d61..15d65972 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ The interested reader is referred to the [short HTJ2K white paper](http://ds.jpe The standard is available free of charge from [ITU website](https://www.itu.int/rec/T-REC-T.814/en). It can also be purchased from the [ISO website](https://www.iso.org/standard/76621.html). -# Tabke of Contents # +# Table of Contents # * [Status](/docs/status.md) * [Compiling](./docs/compiling.md) diff --git a/src/apps/common/ojph_socket.h b/src/apps/common/ojph_socket.h index f14cebac..73f5c9ab 100644 --- a/src/apps/common/ojph_socket.h +++ b/src/apps/common/ojph_socket.h @@ -118,6 +118,7 @@ namespace ojph socket create_socket(int domain, int type, int protocol); int get_last_error(); + std::string get_error_message(int errnum); std::string get_last_error_message(); }; diff --git a/src/apps/ojph_expand/ojph_expand.cpp b/src/apps/ojph_expand/ojph_expand.cpp index 52e29306..3e86a4f3 100644 --- a/src/apps/ojph_expand/ojph_expand.cpp +++ b/src/apps/ojph_expand/ojph_expand.cpp @@ -173,13 +173,13 @@ int main(int argc, char *argv[]) { if (argc <= 1) { std::cout << "\nThe following arguments are necessary:\n" - " -i input file name\n" + " -i \n" #ifdef OJPH_ENABLE_TIFF_SUPPORT - " -o output file name (either pgm, ppm, tif(f), or raw(yuv))\n\n" + " -o (either pgm, ppm, tif(f), or raw(yuv))\n\n" #else - " -o output file name (either pgm, ppm, or raw(yuv))\n\n" + " -o (either pgm, ppm, or raw(yuv))\n\n" #endif // !OJPH_ENABLE_TIFF_SUPPORT - "The following arguments are options:\n" + "The following arguments are optional:\n" " -skip_res x,y a comma-separated list of two elements containing the\n" " number of resolutions to skip. You can specify 1 or 2\n" " parameters; the first specifies the number of resolution\n" diff --git a/src/apps/ojph_stream_expand/ojph_stream_expand.cpp b/src/apps/ojph_stream_expand/ojph_stream_expand.cpp index 1a59cf5d..f6d46332 100644 --- a/src/apps/ojph_stream_expand/ojph_stream_expand.cpp +++ b/src/apps/ojph_stream_expand/ojph_stream_expand.cpp @@ -35,63 +35,169 @@ // Date: 17 April 2024 //***************************************************************************/ -#include -#include -#include +#include #include "ojph_message.h" +#include "ojph_arg.h" #include "ojph_socket.h" -#define BUFLEN 2048 //Max length of buffer -#define PORT 8080 //The port on which to listen for incoming data +#ifdef OJPH_OS_WINDOWS -int main() +#else + #include +#endif + +////////////////////////////////////////////////////////////////////////////// +static +bool get_arguments(int argc, char *argv[], + char *&recv_addr, char *&recv_port, char *&target_name, + ojph::ui32& num_threads, + ojph::ui32& num_inflight_packets, + bool& display, bool& decode, bool& store) { - ojph::net::socket_manager smanager; + ojph::cli_interpreter interpreter; + interpreter.init(argc, argv); + + interpreter.reinterpret("-addr", recv_addr); + interpreter.reinterpret("-port", recv_port); + interpreter.reinterpret("-o", target_name); + interpreter.reinterpret("-num_threads", num_threads); + interpreter.reinterpret("-num_packets", num_inflight_packets); + + display = interpreter.reinterpret("-display"); + decode = interpreter.reinterpret("-decode"); + store = interpreter.reinterpret("-store"); + + if (interpreter.is_exhausted() == false) { + printf("The following arguments were not interpreted:\n"); + ojph::argument t = interpreter.get_argument_zero(); + t = interpreter.get_next_avail_argument(t); + while (t.is_valid()) { + printf("%s\n", t.arg); + t = interpreter.get_next_avail_argument(t); + } + return false; + } + + if (recv_addr == NULL) + { + printf("Please use \"-addr\" to provide a receiving address, " + "\"localhost\" or a local network card IP address.\n"); + return false; + } + if (recv_port == NULL) + { + printf("Please use \"-port\" to provide a port number.\n"); + return false; + } + + return true; +} + +////////////////////////////////////////////////////////////////////////////// +int main(int argc, char* argv[]) +{ + constexpr int buffer_size = 2048; // buffer size + + char *recv_addr = NULL; + char *recv_port = NULL; + char *target_name = NULL; + ojph::ui32 num_threads = 0; + ojph::ui32 num_inflight_packets = 5; + bool display = false; + bool decode = false; + bool store = true; - // create a socket - ojph::net::socket s = - smanager.create_socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); - if(s.intern() < 0) + if (argc <= 1) { + printf( + "\n" + "The following arguments are necessary:\n" + " -addr , or\n" + " The address should be either localhost, or\n" + " a local network card IP address\n" + " example: -addr 127.0.0.1\n" + " -port \n" + "\n" + "\n" + "The following arguments are options:\n" + " -skip_res x,y a comma-separated list of two elements containing the\n" + " number of resolutions to skip. You can specify 1 or 2\n" + " parameters; the first specifies the number of resolution\n" + " for which data reading is skipped. The second is the\n" + " number of skipped resolution for reconstruction, which is\n" + " either equal to the first or smaller. If the second is not\n" + " specified, it is made to equal to the first.\n" + " -resilient true if you want the decoder to be more tolerant of errors\n" + " in the codestream\n\n" + ); + exit(-1); + } + if (!get_arguments(argc, argv, recv_addr, recv_port, + target_name, num_threads, num_inflight_packets, + display, decode, store)) { - std::string err = smanager.get_last_error_message(); - OJPH_ERROR(0x02000001, "Could not create socket : %s\n", err.data()); + exit(-1); } - // prepare the sockaddr_in structure + ojph::net::socket_manager smanager; + struct sockaddr_in server; server.sin_family = AF_INET; - server.sin_addr.s_addr = htonl(INADDR_ANY); - server.sin_port = htons(PORT); + int result = inet_pton(AF_INET, recv_addr, &server.sin_addr); + if (result != 1) + OJPH_ERROR(0x02000001, "Please provide a valid ip address, " + "the provided address %s is not valid\n", recv_addr); + { + ojph::ui16 port_number = 0; + port_number = (ojph::ui16)atoi(recv_port); + if (port_number == 0) + { + OJPH_ERROR(0x02000003, "Please provide a valid port number. " + "The number you provided is %d\n", recv_port); + } + server.sin_port = htons(port_number); + } + + // create a socket + ojph::net::socket s; + s = smanager.create_socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); + if(s.intern() == OJPH_INVALID_SOCKET) + { + std::string err = smanager.get_last_error_message(); + OJPH_ERROR(0x02000004, "Could not create socket : %s\n", err.data()); + } - //Bind + // bind if( bind(s.intern(), (struct sockaddr *)&server, sizeof(server)) == -1) { std::string err = smanager.get_last_error_message(); - OJPH_ERROR(0x02000002, "Could not create socket : %s\n", err.data()); + OJPH_ERROR(0x02000005, "Could not create socket : %s\n", err.data()); } // keep listening for data while(1) { - char buf[BUFLEN]; - memset(buf, 0, BUFLEN); + char buf[buffer_size]; + memset(buf, 0, buffer_size); // receive data -- this is a blocking call struct sockaddr_in si_other; socklen_t socklen = sizeof(si_other); - int recv_len = (int)recvfrom( - s.intern(), buf, BUFLEN, 0, (struct sockaddr *) &si_other, &socklen); + int recv_len = (int)recvfrom(s.intern(), buf, buffer_size, 0, + (struct sockaddr *) &si_other, &socklen); if (recv_len < 0) { std::string err = smanager.get_last_error_message(); - OJPH_ERROR(0x02000003, "Could not create socket : %s\n", err.data()); + OJPH_ERROR(0x02000004, "Could not create socket : %s\n", err.data()); } // print details of the client/peer and the data received char src_addr[1024]; inet_ntop(AF_INET, &si_other.sin_addr, src_addr, sizeof(src_addr)); - printf("Received packet from %s:%d .. ", src_addr, ntohs(si_other.sin_port)); - printf("Data: %02x\n" , buf[0]); + if (buf[12] != 0) + { + printf("Received packet from %s:%d .. ", src_addr, ntohs(si_other.sin_port)); + printf("Data: %02x\n" , buf[0]); + } } s.close(); diff --git a/src/apps/others/ojph_socket.cpp b/src/apps/others/ojph_socket.cpp index b6f797c7..3660dd3e 100644 --- a/src/apps/others/ojph_socket.cpp +++ b/src/apps/others/ojph_socket.cpp @@ -132,11 +132,10 @@ namespace ojph } /////////////////////////////////////////////////////////////////////////// - std::string socket_manager::get_last_error_message() + std::string socket_manager::get_error_message(int errnum) { - int errnum = get_last_error(); if( errnum == 0 ) - return std::string(""); + return std::string(""); const int max_buf_size = 1024; char buf[max_buf_size]; char *v = buf; @@ -160,7 +159,15 @@ namespace ojph #endif std::string str; str = v; - return str; + return str; } + + /////////////////////////////////////////////////////////////////////////// + std::string socket_manager::get_last_error_message() + { + int errnum = get_last_error(); + return get_error_message(errnum); + } + } // !net namespace } // !ojph namespace diff --git a/src/core/common/ojph_arg.h b/src/core/common/ojph_arg.h index 6cac09d1..5743f950 100644 --- a/src/core/common/ojph_arg.h +++ b/src/core/common/ojph_arg.h @@ -201,6 +201,17 @@ namespace ojph { } } + /////////////////////////////////////////////////////////////////////////// + bool reinterpret(const char *str) { + argument t = find_argument(str); + if (t.is_valid()) { + release_argument(t); + return true; + } + else + return false; + } + /////////////////////////////////////////////////////////////////////////// void reinterpret_to_bool(const char *str, int& val) { argument t = find_argument(str); From 121ee8fbab8821dbb6554b47eb6b13c08fe935f7 Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Wed, 17 Apr 2024 20:41:28 +1000 Subject: [PATCH 076/348] Small usage message improvement --- src/apps/ojph_stream_expand/CMakeLists.txt | 4 +-- .../ojph_stream_expand/ojph_stream_expand.cpp | 36 ++++++++++--------- 2 files changed, 22 insertions(+), 18 deletions(-) diff --git a/src/apps/ojph_stream_expand/CMakeLists.txt b/src/apps/ojph_stream_expand/CMakeLists.txt index cee9493f..f9b3b98f 100644 --- a/src/apps/ojph_stream_expand/CMakeLists.txt +++ b/src/apps/ojph_stream_expand/CMakeLists.txt @@ -5,8 +5,8 @@ if (OJPH_BUILD_STREAM_EXPAND) include_directories(../common) include_directories(../../core/common) - add_compile_options(-std=c++11) - + set(CMAKE_CXX_STANDARD 14) + file(GLOB OJPH_STREAM_EXPAND "ojph_stream_expand.cpp") file(GLOB OJPH_SOCKET "../others/ojph_socket.cpp") file(GLOB OJPH_SOCKET_H "../common/ojph_socket.h") diff --git a/src/apps/ojph_stream_expand/ojph_stream_expand.cpp b/src/apps/ojph_stream_expand/ojph_stream_expand.cpp index f6d46332..a36dfbc0 100644 --- a/src/apps/ojph_stream_expand/ojph_stream_expand.cpp +++ b/src/apps/ojph_stream_expand/ojph_stream_expand.cpp @@ -105,29 +105,33 @@ int main(int argc, char* argv[]) ojph::ui32 num_inflight_packets = 5; bool display = false; bool decode = false; - bool store = true; + bool store = false; if (argc <= 1) { printf( "\n" "The following arguments are necessary:\n" - " -addr , or\n" - " The address should be either localhost, or\n" - " a local network card IP address\n" - " example: -addr 127.0.0.1\n" - " -port \n" - "\n" + " -addr , or\n" + " The address should be either localhost, or\n" + " a local network card IP address\n" + " example: -addr 127.0.0.1\n" + " -port \n" "\n" "The following arguments are options:\n" - " -skip_res x,y a comma-separated list of two elements containing the\n" - " number of resolutions to skip. You can specify 1 or 2\n" - " parameters; the first specifies the number of resolution\n" - " for which data reading is skipped. The second is the\n" - " number of skipped resolution for reconstruction, which is\n" - " either equal to the first or smaller. If the second is not\n" - " specified, it is made to equal to the first.\n" - " -resilient true if you want the decoder to be more tolerant of errors\n" - " in the codestream\n\n" + " -num_threads number of threads for decoding and saving\n" + " files. You can specify 0 here, and the main thread\n" + " will be responsible for saving files as well.\n" + " -num_packets number of in-flight packets; this is the\n" + " maximum number of packets to wait before an out-of-order\n" + " or lost packet is considered lost.\n" + " -target_name target file name without extension; the same\n" + " printf formating can be used. For example, output_%%05d.\n" + " An extension will be added, either .j2c for original\n" + " frames, or .ppm for decoded images.\n" + " -display use this to display decoded frames.\n" + " -decode use this to decode files before saving them.\n" + " -store use this to store files\n." + "\n" ); exit(-1); } From a69bfdcecbe7b70a11cc67e4bad5b9c37059f0bc Mon Sep 17 00:00:00 2001 From: aous72 Date: Thu, 18 Apr 2024 15:36:01 +1000 Subject: [PATCH 077/348] Small change --- src/apps/ojph_stream_expand/CMakeLists.txt | 3 +- .../ojph_str_ex_support.cpp | 63 +++++++++++++++ .../ojph_stream_expand/ojph_str_ex_support.h | 78 +++++++++++++++++++ .../ojph_stream_expand/ojph_stream_expand.cpp | 6 ++ 4 files changed, 149 insertions(+), 1 deletion(-) create mode 100644 src/apps/ojph_stream_expand/ojph_str_ex_support.cpp create mode 100644 src/apps/ojph_stream_expand/ojph_str_ex_support.h diff --git a/src/apps/ojph_stream_expand/CMakeLists.txt b/src/apps/ojph_stream_expand/CMakeLists.txt index f9b3b98f..a32a3f88 100644 --- a/src/apps/ojph_stream_expand/CMakeLists.txt +++ b/src/apps/ojph_stream_expand/CMakeLists.txt @@ -7,7 +7,7 @@ if (OJPH_BUILD_STREAM_EXPAND) include_directories(../../core/common) set(CMAKE_CXX_STANDARD 14) - file(GLOB OJPH_STREAM_EXPAND "ojph_stream_expand.cpp") + file(GLOB OJPH_STREAM_EXPAND "*.cpp") file(GLOB OJPH_SOCKET "../others/ojph_socket.cpp") file(GLOB OJPH_SOCKET_H "../common/ojph_socket.h") @@ -24,4 +24,5 @@ if (OJPH_BUILD_STREAM_EXPAND) target_link_libraries(ojph_stream_expand PUBLIC openjph) endif(MSVC) + install(TARGETS ojph_stream_expand DESTINATION bin) endif(OJPH_BUILD_STREAM_EXPAND) diff --git a/src/apps/ojph_stream_expand/ojph_str_ex_support.cpp b/src/apps/ojph_stream_expand/ojph_str_ex_support.cpp new file mode 100644 index 00000000..64527491 --- /dev/null +++ b/src/apps/ojph_stream_expand/ojph_str_ex_support.cpp @@ -0,0 +1,63 @@ +//***************************************************************************/ +// This software is released under the 2-Clause BSD license, included +// below. +// +// Copyright (c) 2024, Aous Naman +// Copyright (c) 2024, Kakadu Software Pty Ltd, Australia +// Copyright (c) 2024, The University of New South Wales, Australia +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************/ +// This file is part of the OpenJPH software implementation. +// File: ojph_str_ex_support.h +// Author: Aous Naman +// Date: 18 April 2024 +//***************************************************************************/ + +#include "ojph_str_ex_support.h" + +/////////////////////////////////////////////////////////////////////////////// +// +// +// +// +// +/////////////////////////////////////////////////////////////////////////////// + +/////////////////////////////////////////////////////////////////////////////// + + + + + + +/////////////////////////////////////////////////////////////////////////////// +// +// +// +// +// +/////////////////////////////////////////////////////////////////////////////// + +/////////////////////////////////////////////////////////////////////////////// diff --git a/src/apps/ojph_stream_expand/ojph_str_ex_support.h b/src/apps/ojph_stream_expand/ojph_str_ex_support.h new file mode 100644 index 00000000..e1c95fbe --- /dev/null +++ b/src/apps/ojph_stream_expand/ojph_str_ex_support.h @@ -0,0 +1,78 @@ +//***************************************************************************/ +// This software is released under the 2-Clause BSD license, included +// below. +// +// Copyright (c) 2024, Aous Naman +// Copyright (c) 2024, Kakadu Software Pty Ltd, Australia +// Copyright (c) 2024, The University of New South Wales, Australia +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************/ +// This file is part of the OpenJPH software implementation. +// File: ojph_str_ex_support.cpp +// Author: Aous Naman +// Date: 18 April 2024 +//***************************************************************************/ + +#ifndef OJPH_STR_EX_SUPPORT_H +#define OJPH_STR_EX_SUPPORT_H + +/////////////////////////////////////////////////////////////////////////////// +// +// +// +// +// +/////////////////////////////////////////////////////////////////////////////// + +/** @brief + * + */ +class ojph_packet_buffer +{ + + + +}; + + +/////////////////////////////////////////////////////////////////////////////// +// +// +// +// +// +/////////////////////////////////////////////////////////////////////////////// + +/** @brief + * + */ +class ojph_file_handler +{ + + + +}; + +#endif //!OJPH_STR_EX_SUPPORT_H \ No newline at end of file diff --git a/src/apps/ojph_stream_expand/ojph_stream_expand.cpp b/src/apps/ojph_stream_expand/ojph_stream_expand.cpp index a36dfbc0..b8e48d9f 100644 --- a/src/apps/ojph_stream_expand/ojph_stream_expand.cpp +++ b/src/apps/ojph_stream_expand/ojph_stream_expand.cpp @@ -39,6 +39,7 @@ #include "ojph_message.h" #include "ojph_arg.h" #include "ojph_socket.h" +#include "ojph_str_ex_support.h" #ifdef OJPH_OS_WINDOWS @@ -89,6 +90,11 @@ bool get_arguments(int argc, char *argv[], printf("Please use \"-port\" to provide a port number.\n"); return false; } + if (store && target_name == NULL) + { + printf("Please use \"-o\" to provide a target file name.\n"); + return false; + } return true; } From 17896dd49964635223b697b9de6ae4eff87e7431 Mon Sep 17 00:00:00 2001 From: aous72 Date: Thu, 18 Apr 2024 16:03:17 +1000 Subject: [PATCH 078/348] WIP --- src/apps/common/ojph_socket.h | 3 +- .../ojph_str_ex_support.cpp | 16 +++++-- .../ojph_stream_expand/ojph_str_ex_support.h | 46 +++++++++++++++++-- .../ojph_stream_expand/ojph_stream_expand.cpp | 19 ++++---- 4 files changed, 68 insertions(+), 16 deletions(-) diff --git a/src/apps/common/ojph_socket.h b/src/apps/common/ojph_socket.h index 73f5c9ab..6ed97f61 100644 --- a/src/apps/common/ojph_socket.h +++ b/src/apps/common/ojph_socket.h @@ -102,7 +102,8 @@ namespace ojph //************************************************************************/ /** @brief A small wrapper for some Winsock2 functionality * - * This is useful for windows, as it initializes and destroys the library. + * This is useful for windows, as it initializes and destroys + * WinSock2 library. * It keeps a count of how many times the constructor is called, * reducing the count whenever the destructor is called. When the * count reaches zero, the library is destroyed -- Windows only. diff --git a/src/apps/ojph_stream_expand/ojph_str_ex_support.cpp b/src/apps/ojph_stream_expand/ojph_str_ex_support.cpp index 64527491..77541df7 100644 --- a/src/apps/ojph_stream_expand/ojph_str_ex_support.cpp +++ b/src/apps/ojph_stream_expand/ojph_str_ex_support.cpp @@ -35,6 +35,7 @@ // Date: 18 April 2024 //***************************************************************************/ +#include #include "ojph_str_ex_support.h" /////////////////////////////////////////////////////////////////////////////// @@ -46,11 +47,18 @@ /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// +void ojph_packets_handler::init(int num_packets) +{ + assert(num_packets == 0); + avail = packet_store = new packet[num_packets]; + this->num_packets = num_packets; +} - - - - +/////////////////////////////////////////////////////////////////////////////// +packet* ojph_packets_handler::exchange(packet* p) +{ + return NULL; +} /////////////////////////////////////////////////////////////////////////////// // diff --git a/src/apps/ojph_stream_expand/ojph_str_ex_support.h b/src/apps/ojph_stream_expand/ojph_str_ex_support.h index e1c95fbe..a2109ec3 100644 --- a/src/apps/ojph_stream_expand/ojph_str_ex_support.h +++ b/src/apps/ojph_stream_expand/ojph_str_ex_support.h @@ -38,6 +38,8 @@ #ifndef OJPH_STR_EX_SUPPORT_H #define OJPH_STR_EX_SUPPORT_H +#include "ojph_base.h" + /////////////////////////////////////////////////////////////////////////////// // // @@ -46,17 +48,50 @@ // /////////////////////////////////////////////////////////////////////////////// +/*****************************************************************************/ /** @brief * */ -class ojph_packet_buffer +struct packet { + static constexpr int max_size = 2048; + + packet() { num_bytes = 0; next = NULL; } + char data[max_size]; + int num_bytes; + packet* next; +}; + +/////////////////////////////////////////////////////////////////////////////// +// +// +// +// +// +/////////////////////////////////////////////////////////////////////////////// +/*****************************************************************************/ +/** @brief + * + */ +class ojph_packets_handler +{ +public: + ojph_packets_handler() + { avail = in_use = NULL; num_packets = 0; packet_store = NULL; } + ~ojph_packets_handler() + { if (packet_store) delete[] packet_store; } + void init(int num_packets); + packet* exchange(packet* p); +private: + packet* avail; + packet* in_use; + int num_packets; + packet* packet_store; }; - /////////////////////////////////////////////////////////////////////////////// // // @@ -65,13 +100,18 @@ class ojph_packet_buffer // /////////////////////////////////////////////////////////////////////////////// +/*****************************************************************************/ /** @brief * */ -class ojph_file_handler +class ojph_files_handler { +public: + ojph_files_handler(); + ~ojph_files_handler(); +private: }; diff --git a/src/apps/ojph_stream_expand/ojph_stream_expand.cpp b/src/apps/ojph_stream_expand/ojph_stream_expand.cpp index b8e48d9f..2da67181 100644 --- a/src/apps/ojph_stream_expand/ojph_stream_expand.cpp +++ b/src/apps/ojph_stream_expand/ojph_stream_expand.cpp @@ -148,6 +148,8 @@ int main(int argc, char* argv[]) exit(-1); } + ojph_packets_handler packets_handler; + packets_handler.init(num_inflight_packets); ojph::net::socket_manager smanager; struct sockaddr_in server; @@ -184,15 +186,16 @@ int main(int argc, char* argv[]) } // keep listening for data + packet* pac = NULL; while(1) { - char buf[buffer_size]; - memset(buf, 0, buffer_size); + pac = packets_handler.exchange(pac); + memset(pac->data, 0, packet::max_size); // receive data -- this is a blocking call struct sockaddr_in si_other; socklen_t socklen = sizeof(si_other); - int recv_len = (int)recvfrom(s.intern(), buf, buffer_size, 0, + int recv_len = (int)recvfrom(s.intern(), pac->data, buffer_size, 0, (struct sockaddr *) &si_other, &socklen); if (recv_len < 0) { @@ -203,11 +206,11 @@ int main(int argc, char* argv[]) // print details of the client/peer and the data received char src_addr[1024]; inet_ntop(AF_INET, &si_other.sin_addr, src_addr, sizeof(src_addr)); - if (buf[12] != 0) - { - printf("Received packet from %s:%d .. ", src_addr, ntohs(si_other.sin_port)); - printf("Data: %02x\n" , buf[0]); - } + // if (buf[12] != 0) + // { + // printf("Received packet from %s:%d .. ", src_addr, ntohs(si_other.sin_port)); + // printf("Data: %02x\n" , buf[0]); + // } } s.close(); From 42b3705179d198b830ab7a4eed3e733cdc8eea5d Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Thu, 18 Apr 2024 16:44:18 +1000 Subject: [PATCH 079/348] WIP --- .../ojph_str_ex_support.cpp | 31 +++++++++++++++++-- .../ojph_stream_expand/ojph_str_ex_support.h | 20 +++++++++--- .../ojph_stream_expand/ojph_stream_expand.cpp | 19 +++++++----- 3 files changed, 57 insertions(+), 13 deletions(-) diff --git a/src/apps/ojph_stream_expand/ojph_str_ex_support.cpp b/src/apps/ojph_stream_expand/ojph_str_ex_support.cpp index 77541df7..393c0855 100644 --- a/src/apps/ojph_stream_expand/ojph_str_ex_support.cpp +++ b/src/apps/ojph_stream_expand/ojph_str_ex_support.cpp @@ -36,8 +36,14 @@ //***************************************************************************/ #include +#include #include "ojph_str_ex_support.h" +namespace ojph +{ +namespace str_ex +{ + /////////////////////////////////////////////////////////////////////////////// // // @@ -47,17 +53,26 @@ /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// -void ojph_packets_handler::init(int num_packets) +void ojph_packets_handler::init(int num_packets, ojph_frames_handler* frames) { assert(num_packets == 0); avail = packet_store = new packet[num_packets]; this->num_packets = num_packets; + this->frames = frames; } /////////////////////////////////////////////////////////////////////////////// packet* ojph_packets_handler::exchange(packet* p) { - return NULL; + if (avail != NULL) + { + + } + else + { + + } + return NULL; } /////////////////////////////////////////////////////////////////////////////// @@ -69,3 +84,15 @@ packet* ojph_packets_handler::exchange(packet* p) /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// +ojph_frames_handler::ojph_frames_handler() {} +/////////////////////////////////////////////////////////////////////////////// +ojph_frames_handler::~ojph_frames_handler() {} +/////////////////////////////////////////////////////////////////////////////// +void ojph_frames_handler::init(int num_threads) +{ + +} + + +} // !str_ex namespace +} // !ojph namespace \ No newline at end of file diff --git a/src/apps/ojph_stream_expand/ojph_str_ex_support.h b/src/apps/ojph_stream_expand/ojph_str_ex_support.h index a2109ec3..af83deb9 100644 --- a/src/apps/ojph_stream_expand/ojph_str_ex_support.h +++ b/src/apps/ojph_stream_expand/ojph_str_ex_support.h @@ -40,6 +40,13 @@ #include "ojph_base.h" +namespace ojph +{ +namespace str_ex +{ + + class ojph_packets_handler; + class ojph_frames_handler; /////////////////////////////////////////////////////////////////////////////// // // @@ -82,7 +89,7 @@ class ojph_packets_handler ~ojph_packets_handler() { if (packet_store) delete[] packet_store; } - void init(int num_packets); + void init(int num_packets, ojph_frames_handler* frames); packet* exchange(packet* p); private: @@ -90,6 +97,7 @@ class ojph_packets_handler packet* in_use; int num_packets; packet* packet_store; + ojph_frames_handler* frames; }; /////////////////////////////////////////////////////////////////////////////// @@ -104,15 +112,19 @@ class ojph_packets_handler /** @brief * */ -class ojph_files_handler +class ojph_frames_handler { public: - ojph_files_handler(); - ~ojph_files_handler(); + ojph_frames_handler(); + ~ojph_frames_handler(); + void init(int num_threads); private: }; +} // !str_ex namespace +} // !ojph namespace + #endif //!OJPH_STR_EX_SUPPORT_H \ No newline at end of file diff --git a/src/apps/ojph_stream_expand/ojph_stream_expand.cpp b/src/apps/ojph_stream_expand/ojph_stream_expand.cpp index 2da67181..8fe173ae 100644 --- a/src/apps/ojph_stream_expand/ojph_stream_expand.cpp +++ b/src/apps/ojph_stream_expand/ojph_stream_expand.cpp @@ -124,9 +124,10 @@ int main(int argc, char* argv[]) " -port \n" "\n" "The following arguments are options:\n" - " -num_threads number of threads for decoding and saving\n" - " files. You can specify 0 here, and the main thread\n" - " will be responsible for saving files as well.\n" + " -num_threads number of threads for decoding and\n" + " displaying files. This number is ignored unless\n" + " decode or display is selected, where it represents the\n" + " number of working threads." " -num_packets number of in-flight packets; this is the\n" " maximum number of packets to wait before an out-of-order\n" " or lost packet is considered lost.\n" @@ -148,8 +149,10 @@ int main(int argc, char* argv[]) exit(-1); } - ojph_packets_handler packets_handler; - packets_handler.init(num_inflight_packets); + ojph::str_ex::ojph_frames_handler frames_handler; + frames_handler.init(num_threads); + ojph::str_ex::ojph_packets_handler packets_handler; + packets_handler.init(num_inflight_packets, &frames_handler); ojph::net::socket_manager smanager; struct sockaddr_in server; @@ -186,11 +189,12 @@ int main(int argc, char* argv[]) } // keep listening for data - packet* pac = NULL; + ojph::str_ex::packet* pac = NULL; + while(1) { pac = packets_handler.exchange(pac); - memset(pac->data, 0, packet::max_size); + memset(pac->data, 0, ojph::str_ex::packet::max_size); // receive data -- this is a blocking call struct sockaddr_in si_other; @@ -206,6 +210,7 @@ int main(int argc, char* argv[]) // print details of the client/peer and the data received char src_addr[1024]; inet_ntop(AF_INET, &si_other.sin_addr, src_addr, sizeof(src_addr)); + inet_pton(AF_INET, "0.0.0.0", "0"); // if (buf[12] != 0) // { // printf("Received packet from %s:%d .. ", src_addr, ntohs(si_other.sin_port)); From c7549c932ceccf8b2ba602256351ba84fb6a2a6b Mon Sep 17 00:00:00 2001 From: aous72 Date: Thu, 18 Apr 2024 18:14:54 +1000 Subject: [PATCH 080/348] WIP --- .../ojph_str_ex_support.cpp | 10 ++-- .../ojph_stream_expand/ojph_stream_expand.cpp | 58 ++++++++++--------- 2 files changed, 36 insertions(+), 32 deletions(-) diff --git a/src/apps/ojph_stream_expand/ojph_str_ex_support.cpp b/src/apps/ojph_stream_expand/ojph_str_ex_support.cpp index 393c0855..f912d247 100644 --- a/src/apps/ojph_stream_expand/ojph_str_ex_support.cpp +++ b/src/apps/ojph_stream_expand/ojph_str_ex_support.cpp @@ -55,10 +55,10 @@ namespace str_ex /////////////////////////////////////////////////////////////////////////////// void ojph_packets_handler::init(int num_packets, ojph_frames_handler* frames) { - assert(num_packets == 0); - avail = packet_store = new packet[num_packets]; - this->num_packets = num_packets; - this->frames = frames; + assert(this->num_packets == 0); + avail = packet_store = new packet[num_packets]; + this->num_packets = num_packets; + this->frames = frames; } /////////////////////////////////////////////////////////////////////////////// @@ -72,7 +72,7 @@ packet* ojph_packets_handler::exchange(packet* p) { } - return NULL; + return packet_store; } /////////////////////////////////////////////////////////////////////////////// diff --git a/src/apps/ojph_stream_expand/ojph_stream_expand.cpp b/src/apps/ojph_stream_expand/ojph_stream_expand.cpp index 8fe173ae..3c6b7d6c 100644 --- a/src/apps/ojph_stream_expand/ojph_stream_expand.cpp +++ b/src/apps/ojph_stream_expand/ojph_stream_expand.cpp @@ -136,8 +136,8 @@ int main(int argc, char* argv[]) " An extension will be added, either .j2c for original\n" " frames, or .ppm for decoded images.\n" " -display use this to display decoded frames.\n" - " -decode use this to decode files before saving them.\n" - " -store use this to store files\n." + " -decode use this to decode files and store them.\n" + " -store use this to store encoded files.\n." "\n" ); exit(-1); @@ -161,14 +161,13 @@ int main(int argc, char* argv[]) if (result != 1) OJPH_ERROR(0x02000001, "Please provide a valid ip address, " "the provided address %s is not valid\n", recv_addr); + { ojph::ui16 port_number = 0; port_number = (ojph::ui16)atoi(recv_port); if (port_number == 0) - { OJPH_ERROR(0x02000003, "Please provide a valid port number. " "The number you provided is %d\n", recv_port); - } server.sin_port = htons(port_number); } @@ -185,37 +184,42 @@ int main(int argc, char* argv[]) if( bind(s.intern(), (struct sockaddr *)&server, sizeof(server)) == -1) { std::string err = smanager.get_last_error_message(); - OJPH_ERROR(0x02000005, "Could not create socket : %s\n", err.data()); + OJPH_ERROR(0x02000005, + "Could not bind address to socket : %s\n", err.data()); } - // keep listening for data + // listen to incoming data, and forward it to packet_handler + bool first_packet = true; + ULONG src_addr; + USHORT src_port; ojph::str_ex::packet* pac = NULL; - while(1) { pac = packets_handler.exchange(pac); - memset(pac->data, 0, ojph::str_ex::packet::max_size); // receive data -- this is a blocking call - struct sockaddr_in si_other; - socklen_t socklen = sizeof(si_other); - int recv_len = (int)recvfrom(s.intern(), pac->data, buffer_size, 0, - (struct sockaddr *) &si_other, &socklen); - if (recv_len < 0) - { - std::string err = smanager.get_last_error_message(); - OJPH_ERROR(0x02000004, "Could not create socket : %s\n", err.data()); - } - - // print details of the client/peer and the data received - char src_addr[1024]; - inet_ntop(AF_INET, &si_other.sin_addr, src_addr, sizeof(src_addr)); - inet_pton(AF_INET, "0.0.0.0", "0"); - // if (buf[12] != 0) - // { - // printf("Received packet from %s:%d .. ", src_addr, ntohs(si_other.sin_port)); - // printf("Data: %02x\n" , buf[0]); - // } + bool success = true; + do { + struct sockaddr_in si_other; + socklen_t socklen = sizeof(si_other); + pac->num_bytes = (int)recvfrom(s.intern(), pac->data, buffer_size, 0, + (struct sockaddr *) &si_other, &socklen); + if (pac->num_bytes < 0) + { + std::string err = smanager.get_last_error_message(); + OJPH_INFO(0x02000006, "Could not receive data : %s\n", err.data()); + continue; // if we wish to continue + } + if (first_packet) { + // this is to ignore packets from source other than the first source + first_packet = false; + src_addr = si_other.sin_addr.S_un.S_addr; + src_port = si_other.sin_port; + break; + } + success = (si_other.sin_addr.S_un.S_addr == src_addr); + success = success && (si_other.sin_port != src_port); + } while (!success); } s.close(); From 9f21be6f7dff0f17227b0592143980390607dda6 Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Fri, 19 Apr 2024 13:15:58 +1000 Subject: [PATCH 081/348] WIP. First version of rtp_packet. --- src/apps/common/ojph_socket.h | 1 + .../ojph_stream_expand/ojph_str_ex_support.h | 130 ---------- .../ojph_stream_expand/ojph_stream_expand.cpp | 140 ++++++---- ..._support.cpp => stream_expand_support.cpp} | 36 ++- .../stream_expand_support.h | 240 ++++++++++++++++++ src/apps/others/ojph_socket.cpp | 10 + src/core/common/ojph_params.h | 11 - 7 files changed, 373 insertions(+), 195 deletions(-) delete mode 100644 src/apps/ojph_stream_expand/ojph_str_ex_support.h rename src/apps/ojph_stream_expand/{ojph_str_ex_support.cpp => stream_expand_support.cpp} (78%) create mode 100644 src/apps/ojph_stream_expand/stream_expand_support.h diff --git a/src/apps/common/ojph_socket.h b/src/apps/common/ojph_socket.h index 6ed97f61..02160c7e 100644 --- a/src/apps/common/ojph_socket.h +++ b/src/apps/common/ojph_socket.h @@ -121,6 +121,7 @@ namespace ojph int get_last_error(); std::string get_error_message(int errnum); std::string get_last_error_message(); + ui32 get_addr(const sockaddr_in& addr); }; } // !net namespace diff --git a/src/apps/ojph_stream_expand/ojph_str_ex_support.h b/src/apps/ojph_stream_expand/ojph_str_ex_support.h deleted file mode 100644 index af83deb9..00000000 --- a/src/apps/ojph_stream_expand/ojph_str_ex_support.h +++ /dev/null @@ -1,130 +0,0 @@ -//***************************************************************************/ -// This software is released under the 2-Clause BSD license, included -// below. -// -// Copyright (c) 2024, Aous Naman -// Copyright (c) 2024, Kakadu Software Pty Ltd, Australia -// Copyright (c) 2024, The University of New South Wales, Australia -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -//***************************************************************************/ -// This file is part of the OpenJPH software implementation. -// File: ojph_str_ex_support.cpp -// Author: Aous Naman -// Date: 18 April 2024 -//***************************************************************************/ - -#ifndef OJPH_STR_EX_SUPPORT_H -#define OJPH_STR_EX_SUPPORT_H - -#include "ojph_base.h" - -namespace ojph -{ -namespace str_ex -{ - - class ojph_packets_handler; - class ojph_frames_handler; -/////////////////////////////////////////////////////////////////////////////// -// -// -// -// -// -/////////////////////////////////////////////////////////////////////////////// - -/*****************************************************************************/ -/** @brief - * - */ -struct packet -{ - static constexpr int max_size = 2048; - - packet() { num_bytes = 0; next = NULL; } - char data[max_size]; - int num_bytes; - packet* next; -}; - -/////////////////////////////////////////////////////////////////////////////// -// -// -// -// -// -/////////////////////////////////////////////////////////////////////////////// - -/*****************************************************************************/ -/** @brief - * - */ -class ojph_packets_handler -{ -public: - ojph_packets_handler() - { avail = in_use = NULL; num_packets = 0; packet_store = NULL; } - ~ojph_packets_handler() - { if (packet_store) delete[] packet_store; } - - void init(int num_packets, ojph_frames_handler* frames); - packet* exchange(packet* p); - -private: - packet* avail; - packet* in_use; - int num_packets; - packet* packet_store; - ojph_frames_handler* frames; -}; - -/////////////////////////////////////////////////////////////////////////////// -// -// -// -// -// -/////////////////////////////////////////////////////////////////////////////// - -/*****************************************************************************/ -/** @brief - * - */ -class ojph_frames_handler -{ -public: - ojph_frames_handler(); - ~ojph_frames_handler(); - - void init(int num_threads); - -private: - -}; - -} // !str_ex namespace -} // !ojph namespace - -#endif //!OJPH_STR_EX_SUPPORT_H \ No newline at end of file diff --git a/src/apps/ojph_stream_expand/ojph_stream_expand.cpp b/src/apps/ojph_stream_expand/ojph_stream_expand.cpp index 3c6b7d6c..02a10708 100644 --- a/src/apps/ojph_stream_expand/ojph_stream_expand.cpp +++ b/src/apps/ojph_stream_expand/ojph_stream_expand.cpp @@ -39,7 +39,7 @@ #include "ojph_message.h" #include "ojph_arg.h" #include "ojph_socket.h" -#include "ojph_str_ex_support.h" +#include "stream_expand_support.h" #ifdef OJPH_OS_WINDOWS @@ -50,16 +50,19 @@ ////////////////////////////////////////////////////////////////////////////// static bool get_arguments(int argc, char *argv[], - char *&recv_addr, char *&recv_port, char *&target_name, - ojph::ui32& num_threads, + char *&recv_addr, char *&recv_port, + char *&src_addr, char *&src_port, + char *&target_name, ojph::ui32& num_threads, ojph::ui32& num_inflight_packets, - bool& display, bool& decode, bool& store) + bool& display, bool& decode, bool& store, bool& quiet) { ojph::cli_interpreter interpreter; interpreter.init(argc, argv); interpreter.reinterpret("-addr", recv_addr); interpreter.reinterpret("-port", recv_port); + interpreter.reinterpret("-src_addr", src_addr); + interpreter.reinterpret("-src_port", src_port); interpreter.reinterpret("-o", target_name); interpreter.reinterpret("-num_threads", num_threads); interpreter.reinterpret("-num_packets", num_inflight_packets); @@ -67,6 +70,7 @@ bool get_arguments(int argc, char *argv[], display = interpreter.reinterpret("-display"); decode = interpreter.reinterpret("-decode"); store = interpreter.reinterpret("-store"); + quiet = interpreter.reinterpret("-quiet"); if (interpreter.is_exhausted() == false) { printf("The following arguments were not interpreted:\n"); @@ -95,6 +99,11 @@ bool get_arguments(int argc, char *argv[], printf("Please use \"-o\" to provide a target file name.\n"); return false; } + if (num_threads < 1) + { + printf("Please set \"-num_threads\" to 1 or more.\n"); + return false; + } return true; } @@ -106,12 +115,15 @@ int main(int argc, char* argv[]) char *recv_addr = NULL; char *recv_port = NULL; + char *src_addr = NULL; + char *src_port = NULL; char *target_name = NULL; - ojph::ui32 num_threads = 0; + ojph::ui32 num_threads = 1; ojph::ui32 num_inflight_packets = 5; bool display = false; bool decode = false; bool store = false; + bool quiet = false; if (argc <= 1) { printf( @@ -124,10 +136,16 @@ int main(int argc, char* argv[]) " -port \n" "\n" "The following arguments are options:\n" + " -src_addr , packets from other sources\n" + " will be ignored. If not specified, then packets\n" + " from any source are accepted.\n" + " -src_port , packets from other source ports are\n" + " ignored. If not specified, then packets from any\n" + " port are accepted -- I would recommend not leaving\n" + " this one out." " -num_threads number of threads for decoding and\n" - " displaying files. This number is ignored unless\n" - " decode or display is selected, where it represents the\n" - " number of working threads." + " displaying files. It is also the number of files that\n" + " are in flight; i.e., not completely saved yet.\n" " -num_packets number of in-flight packets; this is the\n" " maximum number of packets to wait before an out-of-order\n" " or lost packet is considered lost.\n" @@ -138,31 +156,37 @@ int main(int argc, char* argv[]) " -display use this to display decoded frames.\n" " -decode use this to decode files and store them.\n" " -store use this to store encoded files.\n." + " -quiet use to stop printing informative messages.\n." "\n" ); exit(-1); } - if (!get_arguments(argc, argv, recv_addr, recv_port, + if (!get_arguments(argc, argv, recv_addr, recv_port, src_addr, src_port, target_name, num_threads, num_inflight_packets, - display, decode, store)) + display, decode, store, quiet)) { exit(-1); } - ojph::str_ex::ojph_frames_handler frames_handler; - frames_handler.init(num_threads); - ojph::str_ex::ojph_packets_handler packets_handler; - packets_handler.init(num_inflight_packets, &frames_handler); + ojph::stex::frames_handler frames_handler; + frames_handler.init(quiet, num_threads, store, target_name, decode, display); + ojph::stex::packets_handler packets_handler; + packets_handler.init(quiet, num_inflight_packets, &frames_handler); ojph::net::socket_manager smanager; + // listening address/port struct sockaddr_in server; - server.sin_family = AF_INET; - int result = inet_pton(AF_INET, recv_addr, &server.sin_addr); - if (result != 1) - OJPH_ERROR(0x02000001, "Please provide a valid ip address, " - "the provided address %s is not valid\n", recv_addr); - { + server.sin_family = AF_INET; + const char *p = recv_addr; + const char localhost[] = "127.0.0.1"; + if (strcmp(recv_addr, "localhost") == 0) + p = localhost; + int result = inet_pton(AF_INET, p, &server.sin_addr); + if (result != 1) + OJPH_ERROR(0x02000001, "Please provide a valid IP address when " + "using \"-addr,\" the provided address %s is not valid\n", + recv_addr); ojph::ui16 port_number = 0; port_number = (ojph::ui16)atoi(recv_port); if (port_number == 0) @@ -180,7 +204,7 @@ int main(int argc, char* argv[]) OJPH_ERROR(0x02000004, "Could not create socket : %s\n", err.data()); } - // bind + // bind to listening address if( bind(s.intern(), (struct sockaddr *)&server, sizeof(server)) == -1) { std::string err = smanager.get_last_error_message(); @@ -189,37 +213,65 @@ int main(int argc, char* argv[]) } // listen to incoming data, and forward it to packet_handler - bool first_packet = true; - ULONG src_addr; - USHORT src_port; - ojph::str_ex::packet* pac = NULL; - while(1) + ojph::ui32 saddr = 0; + if (src_addr) + { + const char *p = src_addr; + const char localhost[] = "127.0.0.1"; + if (strcmp(src_addr, "localhost") == 0) + p = localhost; + struct sockaddr_in t; + int result = inet_pton(AF_INET, p, &t.sin_addr); + if (result != 1) + OJPH_ERROR(0x02000006, "Please provide a valid IP address when " + "using \"-src_addr,\" the provided address %s is not valid\n", + src_addr); + saddr = smanager.get_addr(t); + } + ojph::ui16 sport = 0; + if (src_addr) { - pac = packets_handler.exchange(pac); + sport = (ojph::ui16)atoi(src_port); + if (sport == 0) + OJPH_ERROR(0x02000007, "Please provide a valid port number. " + "The number you provided is %d\n", src_port); + } - // receive data -- this is a blocking call - bool success = true; - do { + bool src_printed = false; + ojph::stex::rtp_packet* packet = NULL; + while(1) + { + packet = packets_handler.exchange(packet); + while (1) + { struct sockaddr_in si_other; - socklen_t socklen = sizeof(si_other); - pac->num_bytes = (int)recvfrom(s.intern(), pac->data, buffer_size, 0, - (struct sockaddr *) &si_other, &socklen); - if (pac->num_bytes < 0) + socklen_t socklen = sizeof(si_other); + // receive data -- this is a blocking call + packet->num_bytes = (int)recvfrom(s.intern(), packet->data, buffer_size, + 0, (struct sockaddr *) &si_other, &socklen); + if (packet->num_bytes < 0) { std::string err = smanager.get_last_error_message(); - OJPH_INFO(0x02000006, "Could not receive data : %s\n", err.data()); + OJPH_INFO(0x02000008, "Failed to receive data : %s\n", err.data()); continue; // if we wish to continue } - if (first_packet) { - // this is to ignore packets from source other than the first source - first_packet = false; - src_addr = si_other.sin_addr.S_un.S_addr; - src_port = si_other.sin_port; - break; + if ((src_addr && saddr != smanager.get_addr(si_other)) || + (src_port && sport != si_other.sin_port)) + continue; + if (!quiet && !src_printed) + { + constexpr int buf_size = 128; + char buf[buf_size]; + if (!inet_ntop(AF_INET, &si_other, buf, buf_size)) { + std::string err = smanager.get_last_error_message(); + OJPH_INFO(0x02000009, + "Error converting source address.\n", err.data()); + } + printf("Receiving data from %s, port %d\n", + buf, ntohs(si_other.sin_port)); + src_printed = true; } - success = (si_other.sin_addr.S_un.S_addr == src_addr); - success = success && (si_other.sin_port != src_port); - } while (!success); + } } s.close(); diff --git a/src/apps/ojph_stream_expand/ojph_str_ex_support.cpp b/src/apps/ojph_stream_expand/stream_expand_support.cpp similarity index 78% rename from src/apps/ojph_stream_expand/ojph_str_ex_support.cpp rename to src/apps/ojph_stream_expand/stream_expand_support.cpp index f912d247..52b1a383 100644 --- a/src/apps/ojph_stream_expand/ojph_str_ex_support.cpp +++ b/src/apps/ojph_stream_expand/stream_expand_support.cpp @@ -37,11 +37,11 @@ #include #include -#include "ojph_str_ex_support.h" +#include "stream_expand_support.h" namespace ojph { -namespace str_ex +namespace stex { /////////////////////////////////////////////////////////////////////////////// @@ -53,16 +53,18 @@ namespace str_ex /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// -void ojph_packets_handler::init(int num_packets, ojph_frames_handler* frames) +void packets_handler::init(bool quiet, ui32 num_packets, + frames_handler* frames) { assert(this->num_packets == 0); - avail = packet_store = new packet[num_packets]; + avail = packet_store = new rtp_packet[num_packets]; + this->quiet = quiet; this->num_packets = num_packets; this->frames = frames; } /////////////////////////////////////////////////////////////////////////////// -packet* ojph_packets_handler::exchange(packet* p) +rtp_packet* packets_handler::exchange(rtp_packet* p) { if (avail != NULL) { @@ -84,15 +86,29 @@ packet* ojph_packets_handler::exchange(packet* p) /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// -ojph_frames_handler::ojph_frames_handler() {} -/////////////////////////////////////////////////////////////////////////////// -ojph_frames_handler::~ojph_frames_handler() {} +frames_handler::~frames_handler() +{ + if (files) + delete[] files; +} + /////////////////////////////////////////////////////////////////////////////// -void ojph_frames_handler::init(int num_threads) +void frames_handler::init(bool quiet, ui32 num_threads, + bool store, const char *target_name, + bool decode, bool display) { + this->quiet = quiet; + this->num_threads = num_threads; + this->store = store; + this->target_name = target_name; + this->decode = decode; + this->display = display; + num_files = num_threads + 1; + files = new stex_file[num_files]; } +/////////////////////////////////////////////////////////////////////////////// -} // !str_ex namespace +} // !stex namespace } // !ojph namespace \ No newline at end of file diff --git a/src/apps/ojph_stream_expand/stream_expand_support.h b/src/apps/ojph_stream_expand/stream_expand_support.h new file mode 100644 index 00000000..6fc6f823 --- /dev/null +++ b/src/apps/ojph_stream_expand/stream_expand_support.h @@ -0,0 +1,240 @@ +//***************************************************************************/ +// This software is released under the 2-Clause BSD license, included +// below. +// +// Copyright (c) 2024, Aous Naman +// Copyright (c) 2024, Kakadu Software Pty Ltd, Australia +// Copyright (c) 2024, The University of New South Wales, Australia +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************/ +// This file is part of the OpenJPH software implementation. +// File: ojph_str_ex_support.cpp +// Author: Aous Naman +// Date: 18 April 2024 +//***************************************************************************/ + +#ifndef OJPH_STR_EX_SUPPORT_H +#define OJPH_STR_EX_SUPPORT_H + +#include "ojph_base.h" +#include "ojph_file.h" +#include "ojph_socket.h" + +namespace ojph +{ +namespace stex // stream expand +{ + class packets_handler; + class frames_handler; + +/////////////////////////////////////////////////////////////////////////////// +// +// +// +// +// +/////////////////////////////////////////////////////////////////////////////// + +/*****************************************************************************/ +/** @brief inteprets RTP and j2k header, and holds received packets. + * + * This object interpret RFC 3550 and draft-ietf-avtcore-rtp-j2k-scl-00. + */ +struct rtp_packet +{ + enum packet_type : ui32 + { + PT_BODY = 0, // this is body packet + PT_MAIN_FOLLOWED_BY_MAIN = 1, + PT_MAIN_FOLLOWED_BY_BODY = 2, + PT_MAIN = 3, // frame has only one main packet + }; +public: + rtp_packet() { num_bytes = 0; next = NULL; } + +public: + ui32 get_rtp_version() { return ((ui32)data[0]) >> 6; } + bool is_padded() { return (data[0] & 0x20) != 0; } + bool is_extended() { return (data[0] & 0x10) != 0; } + ui32 get_csrc_count() { return (ui32)(data[0]) & 0xF; } + bool is_marked() { return (data[1] & 0x80) != 0; } + ui32 get_payload_type() { return (ui32)(data[1]) & 0x7F; } + ui32 get_sequence_number() { + ui32 result = ntohs(*(ui16*)(data + 2)); + result |= ((ui32)data[15]) << 16; // extended sequence (ESEQ) + return result; + } + ui32 get_time_stamp() + { return ntohl(*(ui32*)(data + 4)); } + ui32 get_ssrc() // not used for the time being + { return ntohl(*(ui32*)(data + 8)); } + ui32 get_packet_type() + { return ((ui32)data[12]) >> 6; } + ui32 get_TP() + { return (((ui32)data[12]) >> 3) & 0x7; } + ui32 get_ORDH() + { return ((ui32)data[12]) & 0x7; } + bool is_PTSTAMP_used() + { return (((ui32)data[13]) & 0x80) != 0; } + ui32 get_XTRAC() + { return (((ui32)data[13]) >> 4) & 0x7; } + ui32 get_PTSTAMP() { + ui32 result = (((ui32)data[13]) & 0xF) << 8; + result |= (ui32)data[14]; + return result; + } + bool is_codestream_header_reusable() + { return (((ui32)data[16] >> 7) & 1) != 0; } + bool is_component_colorimetry_used() + { return (((ui32)data[16] >> 6) & 1) != 0; } + bool is_codeblock_caching_used() + { return (((ui32)data[16] >> 5) & 1) != 0; } + bool is_RANGE() + { return ((ui32)data[16] & 1) != 0; } + ui32 get_PRIMS() + { return (ui32)data[17]; } + ui32 get_TRANS() + { return (ui32)data[18]; } + ui32 get_MAT() + { return (ui32)data[19]; } + +public: + static constexpr int max_size = 2048; //! Date: Fri, 19 Apr 2024 17:20:41 +1000 Subject: [PATCH 082/348] WIP. --- .../ojph_stream_expand/ojph_stream_expand.cpp | 70 ++++++++----------- .../stream_expand_support.cpp | 20 ++++-- .../stream_expand_support.h | 10 +-- 3 files changed, 45 insertions(+), 55 deletions(-) diff --git a/src/apps/ojph_stream_expand/ojph_stream_expand.cpp b/src/apps/ojph_stream_expand/ojph_stream_expand.cpp index 02a10708..0148b36b 100644 --- a/src/apps/ojph_stream_expand/ojph_stream_expand.cpp +++ b/src/apps/ojph_stream_expand/ojph_stream_expand.cpp @@ -54,7 +54,7 @@ bool get_arguments(int argc, char *argv[], char *&src_addr, char *&src_port, char *&target_name, ojph::ui32& num_threads, ojph::ui32& num_inflight_packets, - bool& display, bool& decode, bool& store, bool& quiet) + bool& display, bool& quiet) { ojph::cli_interpreter interpreter; interpreter.init(argc, argv); @@ -68,8 +68,6 @@ bool get_arguments(int argc, char *argv[], interpreter.reinterpret("-num_packets", num_inflight_packets); display = interpreter.reinterpret("-display"); - decode = interpreter.reinterpret("-decode"); - store = interpreter.reinterpret("-store"); quiet = interpreter.reinterpret("-quiet"); if (interpreter.is_exhausted() == false) { @@ -94,11 +92,6 @@ bool get_arguments(int argc, char *argv[], printf("Please use \"-port\" to provide a port number.\n"); return false; } - if (store && target_name == NULL) - { - printf("Please use \"-o\" to provide a target file name.\n"); - return false; - } if (num_threads < 1) { printf("Please set \"-num_threads\" to 1 or more.\n"); @@ -121,8 +114,6 @@ int main(int argc, char* argv[]) ojph::ui32 num_threads = 1; ojph::ui32 num_inflight_packets = 5; bool display = false; - bool decode = false; - bool store = false; bool quiet = false; if (argc <= 1) { @@ -149,13 +140,11 @@ int main(int argc, char* argv[]) " -num_packets number of in-flight packets; this is the\n" " maximum number of packets to wait before an out-of-order\n" " or lost packet is considered lost.\n" - " -target_name target file name without extension; the same\n" + " -o target file name without extension; the same\n" " printf formating can be used. For example, output_%%05d.\n" " An extension will be added, either .j2c for original\n" " frames, or .ppm for decoded images.\n" " -display use this to display decoded frames.\n" - " -decode use this to decode files and store them.\n" - " -store use this to store encoded files.\n." " -quiet use to stop printing informative messages.\n." "\n" ); @@ -163,13 +152,13 @@ int main(int argc, char* argv[]) } if (!get_arguments(argc, argv, recv_addr, recv_port, src_addr, src_port, target_name, num_threads, num_inflight_packets, - display, decode, store, quiet)) + display, quiet)) { exit(-1); } ojph::stex::frames_handler frames_handler; - frames_handler.init(quiet, num_threads, store, target_name, decode, display); + frames_handler.init(quiet, num_threads, target_name, display); ojph::stex::packets_handler packets_handler; packets_handler.init(quiet, num_inflight_packets, &frames_handler); ojph::net::socket_manager smanager; @@ -239,38 +228,35 @@ int main(int argc, char* argv[]) bool src_printed = false; ojph::stex::rtp_packet* packet = NULL; - while(1) + while (1) { packet = packets_handler.exchange(packet); - while (1) + struct sockaddr_in si_other; + socklen_t socklen = sizeof(si_other); + // receive data -- this is a blocking call + packet->num_bytes = (int)recvfrom(s.intern(), (char*)packet->data, + buffer_size, 0, (struct sockaddr *) &si_other, &socklen); + if (packet->num_bytes < 0) { - struct sockaddr_in si_other; - socklen_t socklen = sizeof(si_other); - // receive data -- this is a blocking call - packet->num_bytes = (int)recvfrom(s.intern(), packet->data, buffer_size, - 0, (struct sockaddr *) &si_other, &socklen); - if (packet->num_bytes < 0) - { + std::string err = smanager.get_last_error_message(); + OJPH_INFO(0x02000008, "Failed to receive data : %s\n", err.data()); + continue; // if we wish to continue + } + if ((src_addr && saddr != smanager.get_addr(si_other)) || + (src_port && sport != si_other.sin_port)) + continue; + if (!quiet && !src_printed) + { + constexpr int buf_size = 128; + char buf[buf_size]; + if (!inet_ntop(AF_INET, &si_other, buf, buf_size)) { std::string err = smanager.get_last_error_message(); - OJPH_INFO(0x02000008, "Failed to receive data : %s\n", err.data()); - continue; // if we wish to continue - } - if ((src_addr && saddr != smanager.get_addr(si_other)) || - (src_port && sport != si_other.sin_port)) - continue; - if (!quiet && !src_printed) - { - constexpr int buf_size = 128; - char buf[buf_size]; - if (!inet_ntop(AF_INET, &si_other, buf, buf_size)) { - std::string err = smanager.get_last_error_message(); - OJPH_INFO(0x02000009, - "Error converting source address.\n", err.data()); - } - printf("Receiving data from %s, port %d\n", - buf, ntohs(si_other.sin_port)); - src_printed = true; + OJPH_INFO(0x02000009, + "Error converting source address.\n", err.data()); } + printf("Receiving data from %s, port %d\n", + buf, ntohs(si_other.sin_port)); + src_printed = true; } } diff --git a/src/apps/ojph_stream_expand/stream_expand_support.cpp b/src/apps/ojph_stream_expand/stream_expand_support.cpp index 52b1a383..a8602538 100644 --- a/src/apps/ojph_stream_expand/stream_expand_support.cpp +++ b/src/apps/ojph_stream_expand/stream_expand_support.cpp @@ -66,13 +66,24 @@ void packets_handler::init(bool quiet, ui32 num_packets, /////////////////////////////////////////////////////////////////////////////// rtp_packet* packets_handler::exchange(rtp_packet* p) { - if (avail != NULL) + + if (p != NULL) + { // check validity/supported features in p + } + + if (p) { + ui32 packet_type = p->get_packet_type(); + if (packet_type == rtp_packet::packet_type::PT_MAIN) + printf("A new frame %d\n", p->get_time_stamp()); + } + + if (avail != NULL) + { } else { - } return packet_store; } @@ -94,14 +105,11 @@ frames_handler::~frames_handler() /////////////////////////////////////////////////////////////////////////////// void frames_handler::init(bool quiet, ui32 num_threads, - bool store, const char *target_name, - bool decode, bool display) + const char *target_name, bool display) { this->quiet = quiet; this->num_threads = num_threads; - this->store = store; this->target_name = target_name; - this->decode = decode; this->display = display; num_files = num_threads + 1; diff --git a/src/apps/ojph_stream_expand/stream_expand_support.h b/src/apps/ojph_stream_expand/stream_expand_support.h index 6fc6f823..eb1a226f 100644 --- a/src/apps/ojph_stream_expand/stream_expand_support.h +++ b/src/apps/ojph_stream_expand/stream_expand_support.h @@ -123,7 +123,7 @@ struct rtp_packet public: static constexpr int max_size = 2048; //! Date: Sat, 20 Apr 2024 08:58:12 +1000 Subject: [PATCH 083/348] Mostly rearrange OJPH_EXPORT positions. --- .../stream_expand_support.h | 8 +-- src/core/common/ojph_codestream.h | 26 +--------- src/core/common/ojph_file.h | 10 ++-- src/core/common/ojph_params.h | 49 ++----------------- 4 files changed, 16 insertions(+), 77 deletions(-) diff --git a/src/apps/ojph_stream_expand/stream_expand_support.h b/src/apps/ojph_stream_expand/stream_expand_support.h index eb1a226f..e044d87e 100644 --- a/src/apps/ojph_stream_expand/stream_expand_support.h +++ b/src/apps/ojph_stream_expand/stream_expand_support.h @@ -58,9 +58,10 @@ namespace stex // stream expand /////////////////////////////////////////////////////////////////////////////// /*****************************************************************************/ -/** @brief inteprets RTP and j2k header, and holds received packets. +/** @brief inteprets RTP header and payload, and holds received packets. * * This object interpret RFC 3550 and draft-ietf-avtcore-rtp-j2k-scl-00. + * */ struct rtp_packet { @@ -183,12 +184,13 @@ class packets_handler struct stex_file { public: stex_file() - { ssrc = 0; next = NULL; } + { ssrc = timestamp = last_seen_seq = 0; next = NULL; } public: - ojph::mem_infile f; //! Date: Sun, 21 Apr 2024 09:57:16 +1000 Subject: [PATCH 084/348] I think packets_handler is complete, yet to be tested --- .../ojph_stream_expand/ojph_stream_expand.cpp | 41 ++++-- .../stream_expand_support.cpp | 108 ++++++++++++--- .../stream_expand_support.h | 127 +++++++++++++----- 3 files changed, 215 insertions(+), 61 deletions(-) diff --git a/src/apps/ojph_stream_expand/ojph_stream_expand.cpp b/src/apps/ojph_stream_expand/ojph_stream_expand.cpp index 0148b36b..b662f8ec 100644 --- a/src/apps/ojph_stream_expand/ojph_stream_expand.cpp +++ b/src/apps/ojph_stream_expand/ojph_stream_expand.cpp @@ -54,7 +54,7 @@ bool get_arguments(int argc, char *argv[], char *&src_addr, char *&src_port, char *&target_name, ojph::ui32& num_threads, ojph::ui32& num_inflight_packets, - bool& display, bool& quiet) + bool& quiet, bool& display, bool& decode) { ojph::cli_interpreter interpreter; interpreter.init(argc, argv); @@ -67,8 +67,9 @@ bool get_arguments(int argc, char *argv[], interpreter.reinterpret("-num_threads", num_threads); interpreter.reinterpret("-num_packets", num_inflight_packets); - display = interpreter.reinterpret("-display"); quiet = interpreter.reinterpret("-quiet"); + display = interpreter.reinterpret("-display"); + decode = interpreter.reinterpret("-decode"); if (interpreter.is_exhausted() == false) { printf("The following arguments were not interpreted:\n"); @@ -97,6 +98,17 @@ bool get_arguments(int argc, char *argv[], printf("Please set \"-num_threads\" to 1 or more.\n"); return false; } + if (num_inflight_packets < 1) + { + printf("Please set \"-num_packets\" to 1 or more.\n"); + return false; + } + if (decode && target_name == NULL) + { + printf("Since \"-decode\" was specified, please set \"-target_name\" " + "for the target name of decoded files.\n"); + return false; + } return true; } @@ -104,8 +116,6 @@ bool get_arguments(int argc, char *argv[], ////////////////////////////////////////////////////////////////////////////// int main(int argc, char* argv[]) { - constexpr int buffer_size = 2048; // buffer size - char *recv_addr = NULL; char *recv_port = NULL; char *src_addr = NULL; @@ -113,8 +123,9 @@ int main(int argc, char* argv[]) char *target_name = NULL; ojph::ui32 num_threads = 1; ojph::ui32 num_inflight_packets = 5; - bool display = false; bool quiet = false; + bool display = false; + bool decode = false; if (argc <= 1) { printf( @@ -152,13 +163,13 @@ int main(int argc, char* argv[]) } if (!get_arguments(argc, argv, recv_addr, recv_port, src_addr, src_port, target_name, num_threads, num_inflight_packets, - display, quiet)) + quiet, display, decode)) { exit(-1); } ojph::stex::frames_handler frames_handler; - frames_handler.init(quiet, num_threads, target_name, display); + frames_handler.init(quiet, display, decode, num_threads, target_name); ojph::stex::packets_handler packets_handler; packets_handler.init(quiet, num_inflight_packets, &frames_handler); ojph::net::socket_manager smanager; @@ -230,21 +241,27 @@ int main(int argc, char* argv[]) ojph::stex::rtp_packet* packet = NULL; while (1) { - packet = packets_handler.exchange(packet); + if (packet == NULL || packet->num_bytes != 0) // num_bytes == 0 + packet = packets_handler.exchange(packet); // if packet was ignored + struct sockaddr_in si_other; socklen_t socklen = sizeof(si_other); // receive data -- this is a blocking call - packet->num_bytes = (int)recvfrom(s.intern(), (char*)packet->data, - buffer_size, 0, (struct sockaddr *) &si_other, &socklen); - if (packet->num_bytes < 0) + packet->num_bytes = 0; // if we ignore the packet, we can continue + int num_bytes = (int)recvfrom(s.intern(), (char*)packet->data, + packet->max_size, 0, (struct sockaddr *) &si_other, &socklen); + if (num_bytes < 0) { std::string err = smanager.get_last_error_message(); OJPH_INFO(0x02000008, "Failed to receive data : %s\n", err.data()); continue; // if we wish to continue } if ((src_addr && saddr != smanager.get_addr(si_other)) || - (src_port && sport != si_other.sin_port)) + (src_port && sport != si_other.sin_port)) { continue; + } + packet->num_bytes = (ojph::ui32)num_bytes; + if (!quiet && !src_printed) { constexpr int buf_size = 128; diff --git a/src/apps/ojph_stream_expand/stream_expand_support.cpp b/src/apps/ojph_stream_expand/stream_expand_support.cpp index a8602538..29d26ae4 100644 --- a/src/apps/ojph_stream_expand/stream_expand_support.cpp +++ b/src/apps/ojph_stream_expand/stream_expand_support.cpp @@ -57,7 +57,9 @@ void packets_handler::init(bool quiet, ui32 num_packets, frames_handler* frames) { assert(this->num_packets == 0); - avail = packet_store = new rtp_packet[num_packets]; + avail = packet_store = new rtp_packet[num_packets]; + for (ui32 i = 0; i < num_packets - 1; ++i) + packet_store[i].next = packet_store + i + 1; this->quiet = quiet; this->num_packets = num_packets; this->frames = frames; @@ -66,26 +68,91 @@ void packets_handler::init(bool quiet, ui32 num_packets, /////////////////////////////////////////////////////////////////////////////// rtp_packet* packets_handler::exchange(rtp_packet* p) { - - if (p != NULL) - { // check validity/supported features in p + if (p == NULL) { + assert(in_use == NULL && num_packets > 0); + // move from avail to in_use + rtp_packet* p = avail; + avail = avail->next; + p->next = in_use; + in_use = p; + return p; } + if (p->num_bytes == 0) + return p; + + // We can a series of test to remove/warn about unsupported options + // but we currently do not do that yet - if (p) + bool result = frames->push(p); + if (result == false) { - ui32 packet_type = p->get_packet_type(); - if (packet_type == rtp_packet::packet_type::PT_MAIN) - printf("A new frame %d\n", p->get_time_stamp()); + if (avail) + { // move from avail to in_use + p = avail; + avail = avail->next; + p->next = in_use; + in_use = p; + } + else + { + assert(p->next != NULL || num_packets == 1); + if (p->next != NULL) + { // use the oldest/last packet in in_use + assert(p == in_use); + rtp_packet *pp = p; // previous p + p = p->next; + while(p->next != NULL) { pp = p; p = p->next; } + pp->next = NULL; + p->next = in_use; + in_use = p; + } + } + return p; } + else { + // move packet to avail + assert(p == in_use); + in_use = in_use->next; + p->next = avail; + avail = p; + // test if you can push more packets + p = in_use; + rtp_packet *pp = p; // previous p + while (p != NULL) + { + result = frames->push(p); + if (result) + { + // move packet to avail + if (p == in_use) + { + in_use = in_use->next; + p->next = avail; + avail = p; + p = in_use; + } + else + { + pp->next = p->next; + p->next = avail; + avail = p; + p = pp->next; + } + } + else { + pp = p; + p = p->next; + } + } - if (avail != NULL) - { + // get one from avail and move it to in_use + p = avail; + avail = avail->next; + p->next = in_use; + in_use = p; + return p; } - else - { - } - return packet_store; } /////////////////////////////////////////////////////////////////////////////// @@ -104,19 +171,24 @@ frames_handler::~frames_handler() } /////////////////////////////////////////////////////////////////////////////// -void frames_handler::init(bool quiet, ui32 num_threads, - const char *target_name, bool display) +void frames_handler::init(bool quiet, bool display, bool decode, + ui32 num_threads, const char *target_name) { this->quiet = quiet; + this->display = display; + this->decode = decode; this->num_threads = num_threads; this->target_name = target_name; - this->display = display; - num_files = num_threads + 1; files = new stex_file[num_files]; } /////////////////////////////////////////////////////////////////////////////// +bool frames_handler::push(rtp_packet* p) +{ + + return false; +} } // !stex namespace } // !ojph namespace \ No newline at end of file diff --git a/src/apps/ojph_stream_expand/stream_expand_support.h b/src/apps/ojph_stream_expand/stream_expand_support.h index e044d87e..5feaf782 100644 --- a/src/apps/ojph_stream_expand/stream_expand_support.h +++ b/src/apps/ojph_stream_expand/stream_expand_support.h @@ -76,6 +76,7 @@ struct rtp_packet rtp_packet() { num_bytes = 0; next = NULL; } public: + // RTP header ui32 get_rtp_version() { return ((ui32)data[0]) >> 6; } bool is_padded() { return (data[0] & 0x20) != 0; } bool is_extended() { return (data[0] & 0x10) != 0; } @@ -91,41 +92,95 @@ struct rtp_packet { return ntohl(*(ui32*)(data + 4)); } ui32 get_ssrc() // not used for the time being { return ntohl(*(ui32*)(data + 8)); } + + // common in main and body payload headers ui32 get_packet_type() { return ((ui32)data[12]) >> 6; } - ui32 get_TP() + ui32 get_TP() { return (((ui32)data[12]) >> 3) & 0x7; } - ui32 get_ORDH() - { return ((ui32)data[12]) & 0x7; } - bool is_PTSTAMP_used() - { return (((ui32)data[13]) & 0x80) != 0; } - ui32 get_XTRAC() - { return (((ui32)data[13]) >> 4) & 0x7; } - ui32 get_PTSTAMP() { + ui32 get_ORDH() { + if (get_packet_type() != PT_BODY) return ((ui32)data[12]) & 0x7; + else return (((ui32)data[13]) >> 7) & 0x1; + } + ui32 get_PTSTAMP() { ui32 result = (((ui32)data[13]) & 0xF) << 8; result |= (ui32)data[14]; return result; } - bool is_codestream_header_reusable() - { return (((ui32)data[16] >> 7) & 1) != 0; } - bool is_component_colorimetry_used() - { return (((ui32)data[16] >> 6) & 1) != 0; } - bool is_codeblock_caching_used() - { return (((ui32)data[16] >> 5) & 1) != 0; } - bool is_RANGE() - { return ((ui32)data[16] & 1) != 0; } - ui32 get_PRIMS() - { return (ui32)data[17]; } - ui32 get_TRANS() - { return (ui32)data[18]; } - ui32 get_MAT() - { return (ui32)data[19]; } + ui8* get_data() + { return data + 20; } + ui32 get_data_size() + { return (ui32)num_bytes - 20; } + + // only in main payload header + bool is_PTSTAMP_used() { + assert(get_packet_type() != PT_BODY); + return (((ui32)data[13]) & 0x80) != 0; + } + ui32 get_XTRAC() { + assert(get_packet_type() != PT_BODY); + return (((ui32)data[13]) >> 4) & 0x7; + } + bool is_codestream_header_reusable() { + assert(get_packet_type() != PT_BODY); + return (((ui32)data[16]) & 0x80) != 0; + } + bool is_component_colorimetry_used() { + assert(get_packet_type() != PT_BODY); + return (((ui32)data[16]) & 0x40) != 0; + } + bool is_codeblock_caching_used() { + assert(get_packet_type() != PT_BODY); + return (((ui32)data[16]) & 0x20) != 0; + } + bool is_RANGE() { + assert(get_packet_type() != PT_BODY); + return ((ui32)data[16] & 1) != 0; + } + ui32 get_PRIMS(){ + assert(get_packet_type() != PT_BODY); + return (ui32)data[17]; + } + ui32 get_TRANS() { + assert(get_packet_type() != PT_BODY); + return (ui32)data[18]; + } + ui32 get_MAT() { + assert(get_packet_type() != PT_BODY); + return (ui32)data[19]; + } + + // only in body payload header + ui32 get_RES() { + assert(get_packet_type() == PT_BODY); + return ((ui32)data[12]) & 0x7; + } + ui32 get_QUAL() { + assert(get_packet_type() == PT_BODY); + return (((ui32)data[13]) >> 4) & 0x7; + } + ui32 get_data_pos() { + ui32 result = 0; + if (get_packet_type() == PT_BODY) { + result = ((ui32)data[16]) << 4; + result |= (((ui32)data[17]) >> 4) & 0xF; + } + return result; + } + ui32 get_PID() { + assert(get_packet_type() == PT_BODY); + ui32 result = (((ui32)data[17]) & 0xF) << 16; + result |= ((ui32)data[18]) << 8; + result |= ((ui32)data[19]); + return result; + } + public: static constexpr int max_size = 2048; //! Date: Mon, 22 Apr 2024 12:40:04 +1000 Subject: [PATCH 085/348] frames_handler largely complete. Modified mem_outfile. It is not clear when packets_handler and frames_handler can be flushed. --- .../ojph_stream_expand/ojph_stream_expand.cpp | 202 +++++++++--------- .../stream_expand_support.cpp | 197 ++++++++++++++++- .../stream_expand_support.h | 70 ++++-- src/core/common/ojph_file.h | 55 ++++- src/core/others/ojph_file.cpp | 96 ++++++--- 5 files changed, 462 insertions(+), 158 deletions(-) diff --git a/src/apps/ojph_stream_expand/ojph_stream_expand.cpp b/src/apps/ojph_stream_expand/ojph_stream_expand.cpp index b662f8ec..7e4f91a0 100644 --- a/src/apps/ojph_stream_expand/ojph_stream_expand.cpp +++ b/src/apps/ojph_stream_expand/ojph_stream_expand.cpp @@ -168,116 +168,126 @@ int main(int argc, char* argv[]) exit(-1); } - ojph::stex::frames_handler frames_handler; - frames_handler.init(quiet, display, decode, num_threads, target_name); - ojph::stex::packets_handler packets_handler; - packets_handler.init(quiet, num_inflight_packets, &frames_handler); - ojph::net::socket_manager smanager; + try { + ojph::stex::frames_handler frames_handler; + frames_handler.init(quiet, display, decode, num_inflight_packets, + num_threads, target_name); + ojph::stex::packets_handler packets_handler; + packets_handler.init(quiet, num_inflight_packets, &frames_handler); + ojph::net::socket_manager smanager; - // listening address/port - struct sockaddr_in server; - { - server.sin_family = AF_INET; - const char *p = recv_addr; - const char localhost[] = "127.0.0.1"; - if (strcmp(recv_addr, "localhost") == 0) - p = localhost; - int result = inet_pton(AF_INET, p, &server.sin_addr); - if (result != 1) - OJPH_ERROR(0x02000001, "Please provide a valid IP address when " - "using \"-addr,\" the provided address %s is not valid\n", - recv_addr); - ojph::ui16 port_number = 0; - port_number = (ojph::ui16)atoi(recv_port); - if (port_number == 0) - OJPH_ERROR(0x02000003, "Please provide a valid port number. " - "The number you provided is %d\n", recv_port); - server.sin_port = htons(port_number); - } - - // create a socket - ojph::net::socket s; - s = smanager.create_socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); - if(s.intern() == OJPH_INVALID_SOCKET) - { - std::string err = smanager.get_last_error_message(); - OJPH_ERROR(0x02000004, "Could not create socket : %s\n", err.data()); - } - - // bind to listening address - if( bind(s.intern(), (struct sockaddr *)&server, sizeof(server)) == -1) - { - std::string err = smanager.get_last_error_message(); - OJPH_ERROR(0x02000005, - "Could not bind address to socket : %s\n", err.data()); - } - - // listen to incoming data, and forward it to packet_handler - ojph::ui32 saddr = 0; - if (src_addr) - { - const char *p = src_addr; - const char localhost[] = "127.0.0.1"; - if (strcmp(src_addr, "localhost") == 0) - p = localhost; - struct sockaddr_in t; - int result = inet_pton(AF_INET, p, &t.sin_addr); - if (result != 1) - OJPH_ERROR(0x02000006, "Please provide a valid IP address when " - "using \"-src_addr,\" the provided address %s is not valid\n", - src_addr); - saddr = smanager.get_addr(t); - } - ojph::ui16 sport = 0; - if (src_addr) - { - sport = (ojph::ui16)atoi(src_port); - if (sport == 0) - OJPH_ERROR(0x02000007, "Please provide a valid port number. " - "The number you provided is %d\n", src_port); - } + // listening address/port + struct sockaddr_in server; + { + server.sin_family = AF_INET; + const char *p = recv_addr; + const char localhost[] = "127.0.0.1"; + if (strcmp(recv_addr, "localhost") == 0) + p = localhost; + int result = inet_pton(AF_INET, p, &server.sin_addr); + if (result != 1) + OJPH_ERROR(0x02000001, "Please provide a valid IP address when " + "using \"-addr,\" the provided address %s is not valid\n", + recv_addr); + ojph::ui16 port_number = 0; + port_number = (ojph::ui16)atoi(recv_port); + if (port_number == 0) + OJPH_ERROR(0x02000003, "Please provide a valid port number. " + "The number you provided is %d\n", recv_port); + server.sin_port = htons(port_number); + } - bool src_printed = false; - ojph::stex::rtp_packet* packet = NULL; - while (1) - { - if (packet == NULL || packet->num_bytes != 0) // num_bytes == 0 - packet = packets_handler.exchange(packet); // if packet was ignored + // create a socket + ojph::net::socket s; + s = smanager.create_socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); + if(s.intern() == OJPH_INVALID_SOCKET) + { + std::string err = smanager.get_last_error_message(); + OJPH_ERROR(0x02000004, "Could not create socket : %s\n", err.data()); + } - struct sockaddr_in si_other; - socklen_t socklen = sizeof(si_other); - // receive data -- this is a blocking call - packet->num_bytes = 0; // if we ignore the packet, we can continue - int num_bytes = (int)recvfrom(s.intern(), (char*)packet->data, - packet->max_size, 0, (struct sockaddr *) &si_other, &socklen); - if (num_bytes < 0) + // bind to listening address + if( bind(s.intern(), (struct sockaddr *)&server, sizeof(server)) == -1) { std::string err = smanager.get_last_error_message(); - OJPH_INFO(0x02000008, "Failed to receive data : %s\n", err.data()); - continue; // if we wish to continue + OJPH_ERROR(0x02000005, + "Could not bind address to socket : %s\n", err.data()); + } + + // listen to incoming data, and forward it to packet_handler + ojph::ui32 saddr = 0; + if (src_addr) + { + const char *p = src_addr; + const char localhost[] = "127.0.0.1"; + if (strcmp(src_addr, "localhost") == 0) + p = localhost; + struct sockaddr_in t; + int result = inet_pton(AF_INET, p, &t.sin_addr); + if (result != 1) + OJPH_ERROR(0x02000006, "Please provide a valid IP address when " + "using \"-src_addr,\" the provided address %s is not valid\n", + src_addr); + saddr = smanager.get_addr(t); } - if ((src_addr && saddr != smanager.get_addr(si_other)) || - (src_port && sport != si_other.sin_port)) { - continue; + ojph::ui16 sport = 0; + if (src_addr) + { + sport = (ojph::ui16)atoi(src_port); + if (sport == 0) + OJPH_ERROR(0x02000007, "Please provide a valid port number. " + "The number you provided is %d\n", src_port); } - packet->num_bytes = (ojph::ui32)num_bytes; - if (!quiet && !src_printed) + bool src_printed = false; + ojph::stex::rtp_packet* packet = NULL; + while (1) { - constexpr int buf_size = 128; - char buf[buf_size]; - if (!inet_ntop(AF_INET, &si_other, buf, buf_size)) { + if (packet == NULL || packet->num_bytes != 0) // num_bytes == 0 + packet = packets_handler.exchange(packet); // if packet was ignored + + struct sockaddr_in si_other; + socklen_t socklen = sizeof(si_other); + // receive data -- this is a blocking call + packet->num_bytes = 0; // if we ignore the packet, we can continue + int num_bytes = (int)recvfrom(s.intern(), (char*)packet->data, + packet->max_size, 0, (struct sockaddr *) &si_other, &socklen); + if (num_bytes < 0) + { std::string err = smanager.get_last_error_message(); - OJPH_INFO(0x02000009, - "Error converting source address.\n", err.data()); + OJPH_INFO(0x02000008, "Failed to receive data : %s\n", err.data()); + continue; // if we wish to continue + } + if ((src_addr && saddr != smanager.get_addr(si_other)) || + (src_port && sport != si_other.sin_port)) { + continue; + } + packet->num_bytes = (ojph::ui32)num_bytes; + + if (!quiet && !src_printed) + { + constexpr int buf_size = 128; + char buf[buf_size]; + if (!inet_ntop(AF_INET, &si_other, buf, buf_size)) { + std::string err = smanager.get_last_error_message(); + OJPH_INFO(0x02000009, + "Error converting source address.\n", err.data()); + } + printf("Receiving data from %s, port %d\n", + buf, ntohs(si_other.sin_port)); + src_printed = true; } - printf("Receiving data from %s, port %d\n", - buf, ntohs(si_other.sin_port)); - src_printed = true; } + s.close(); } + catch (const std::exception& e) + { + const char *p = e.what(); + if (strncmp(p, "ojph error", 10) != 0) + printf("%s\n", p); + exit(-1); + } - s.close(); return 0; } diff --git a/src/apps/ojph_stream_expand/stream_expand_support.cpp b/src/apps/ojph_stream_expand/stream_expand_support.cpp index 29d26ae4..0191ca67 100644 --- a/src/apps/ojph_stream_expand/stream_expand_support.cpp +++ b/src/apps/ojph_stream_expand/stream_expand_support.cpp @@ -109,22 +109,28 @@ rtp_packet* packets_handler::exchange(rtp_packet* p) } return p; } - else { + else + { + // sequence number of the most recent packet + ui32 seq = p->get_sequence_number(); + // move packet to avail assert(p == in_use); in_use = in_use->next; p->next = avail; avail = p; - // test if you can push more packets + // test if you can push more packets, also remove old packets p = in_use; - rtp_packet *pp = p; // previous p + rtp_packet *pp = p; // previous p -- will be updated before use while (p != NULL) { + // if packet is used or it is old result = frames->push(p); + result = result | (seq > p->get_sequence_number() + num_packets); if (result) { - // move packet to avail + // move packet from in_use to avail if (p == in_use) { in_use = in_use->next; @@ -155,6 +161,34 @@ rtp_packet* packets_handler::exchange(rtp_packet* p) } } +/////////////////////////////////////////////////////////////////////////////// +void packets_handler::flush() +{ + // move all packets from in_use to avail + while (in_use) + { + rtp_packet *p = in_use; + in_use = in_use->next; + p->next = avail; + avail = p; + } +} + +/////////////////////////////////////////////////////////////////////////////// +// +// +// +// +// +/////////////////////////////////////////////////////////////////////////////// + +/////////////////////////////////////////////////////////////////////////////// +void stex_file::notify_file_completion() +{ + done = true; + parent->increment_num_complete_files(); +} + /////////////////////////////////////////////////////////////////////////////// // // @@ -166,29 +200,174 @@ rtp_packet* packets_handler::exchange(rtp_packet* p) /////////////////////////////////////////////////////////////////////////////// frames_handler::~frames_handler() { - if (files) - delete[] files; + if (files_store) + delete[] files_store; } /////////////////////////////////////////////////////////////////////////////// -void frames_handler::init(bool quiet, bool display, bool decode, - ui32 num_threads, const char *target_name) +void frames_handler::init(bool quiet, bool display, bool decode, + ui32 packet_queue_length, ui32 num_threads, + const char *target_name) { this->quiet = quiet; this->display = display; this->decode = decode; + this->packet_queue_length = packet_queue_length; this->num_threads = num_threads; this->target_name = target_name; num_files = num_threads + 1; - files = new stex_file[num_files]; + avail = files_store = new stex_file[num_files]; + for (ui32 i = 0; i < num_files - 1; ++i) + files_store[i].init(this, files_store + i + 1); + files_store[num_files - 1].init(this, NULL); } /////////////////////////////////////////////////////////////////////////////// bool frames_handler::push(rtp_packet* p) { + // check if any of the frames processed in other threads are done + check_files_in_processing(); + + // check if we have any old files that have no hope is updating + if (in_use) + { + ui32 seq = p->get_sequence_number(); + stex_file* f = in_use, *pf = NULL; + while (f != NULL) + { + if (seq > f->last_seen_seq + packet_queue_length) + { + // move from in_use to processing + if (f == in_use) + { + in_use = in_use->next; + f->next = processing; + processing = f; + //<=============================================== queue f for + //<=============================================== further execution + f = in_use; + } + else { + pf->next = f->next; + f->next = processing; + processing = f; + //<=============================================== queue f for + //<=============================================== further execution + f = pf->next; + } + } + else { + pf = f; + f = f->next; + } + } + } + // process newly received packet + if (p->get_packet_type() != rtp_packet::PT_BODY) + { // main payload header + printf("A new file %d\n", p->get_time_stamp()); + if (avail) + { + // move from avail to in_use + stex_file* f = avail; + avail = avail->next; + f->next = processing; + processing = f; + f->timestamp = p->get_time_stamp(); + f->last_seen_seq = p->get_sequence_number(); + f->f.open(1<<20, true); // start with 1MB + f->write(p); + return true; + } + else + return false; + } + else + { // body payload header + stex_file* f = in_use; + while (f != NULL && f->timestamp != p->get_time_stamp()) + f = f->next; + if (f == NULL) + return false; + + f->write(p); + + if (p->is_marked()) + f->marked = true; + + if (f->marked && f->are_packets_missing() == false) + //<=============================================== queue f for + //<=============================================== further execution + ; + return true; + } return false; } +/////////////////////////////////////////////////////////////////////////////// +bool frames_handler::flush() +{ + // check if any of the frames processed in other threads are done + check_files_in_processing(); + + // check files in_use and move them to processing + while (in_use != NULL) + { + // move from in_use to processing + stex_file* f = in_use; + in_use = in_use->next; + f->next = processing; + processing = f; + //<=============================================== queue f for + //<=============================================== further execution + } + + return (processing != NULL); +} + +/////////////////////////////////////////////////////////////////////////////// +void frames_handler::check_files_in_processing() +{ + // check if any of the frames processed in other threads are done + int nf = num_complete_files.load(std::memory_order_acquire); + if (nf > 0) + { + stex_file* f = processing, *pf = NULL; + while(f != NULL && nf > 0) + { + num_complete_files.fetch_add(-1, std::memory_order_relaxed); + + if (f->done == true) + { + // move f from processing to avail + f->timestamp = 0; + f->last_seen_seq = 0; + f->done = f->marked = false; + f->estimated_size = f->actual_size = 0; + if (f == processing) + { + processing = processing->next; + f->next = avail; + avail = f; + f = processing; // for next test + } + else { + pf->next = f->next; + f->next = avail; + avail = f; + f = pf->next; + } + } + else + { + pf = f; + f = f->next; + } + nf = num_complete_files.load(std::memory_order_acquire); + } + } +} + } // !stex namespace } // !ojph namespace \ No newline at end of file diff --git a/src/apps/ojph_stream_expand/stream_expand_support.h b/src/apps/ojph_stream_expand/stream_expand_support.h index 5feaf782..6d3d01f1 100644 --- a/src/apps/ojph_stream_expand/stream_expand_support.h +++ b/src/apps/ojph_stream_expand/stream_expand_support.h @@ -38,6 +38,7 @@ #ifndef OJPH_STR_EX_SUPPORT_H #define OJPH_STR_EX_SUPPORT_H +#include #include "ojph_base.h" #include "ojph_file.h" #include "ojph_socket.h" @@ -197,7 +198,7 @@ struct rtp_packet * * This object primarily attempts to process the RTP packet. * The main purpose is to buffer received packets if it is not clear where - * they fit. It also drops packets if they become stale. + * they fit. It also drops packets if they become old. * * This object basically works as follows. * The object interact with frames_handler, using the "push" member function. @@ -219,6 +220,7 @@ class packets_handler public: void init(bool quiet, ui32 num_packets, frames_handler* frames); rtp_packet* exchange(rtp_packet* p); + void flush(); private: bool quiet; //!parent = parent; + this->next = next; + } + + void write(rtp_packet *p) + { + ui32 pos = p->get_data_pos(); + ui32 len = p->get_data_size(); + estimated_size = ojph_max(estimated_size, pos + len); + actual_size += len; + f.seek(pos, outfile_base::OJPH_SEEK_SET); + f.write(p->get_data(), len); + } + bool are_packets_missing() + { return (estimated_size != actual_size); } + void notify_file_completion(); public: - ojph::mem_outfile f; //!is_open == false); - assert(this->buf_size == 0); - assert(this->buf == NULL); - assert(this->cur_ptr == NULL); + assert(this->cur_ptr == this->buf); - // do initial buffer allocation + // do initial buffer allocation or buffer expansion this->is_open = true; - this->buf_size = initial_size; - if (initial_size) - this->buf = (ui8*)malloc(this->buf_size); + this->clear_mem = clear_mem; + expand_storage(initial_size, true); this->cur_ptr = this->buf; } /** */ void mem_outfile::close() { - if (buf) - free(buf); is_open = false; - buf_size = 0; - buf = cur_ptr = NULL; + cur_ptr = buf; } - /** The function starts with a buffer size of 65536. Then, whenever the - * need arises, this buffer is expanded by a factor approx 1.5x + /** The seek function expands the buffer whenever offset goes beyond + * the buffer end */ - size_t mem_outfile::write(const void *ptr, size_t size) + int mem_outfile::seek(si64 offset, enum outfile_base::seek origin) + { + if (origin == OJPH_SEEK_SET) + ; // do nothing + else if (origin == OJPH_SEEK_CUR) + offset += tell(); + else if (origin == OJPH_SEEK_END) + offset += (si64)buf_size; + else { + assert(0); + return -1; + } + + if (offset >= 0) + expand_storage((size_t)offset, false); + else + return -1; + + cur_ptr = buf + offset; + return 0; + } + + /** Whenever the need arises, the buffer is expanded by a factor approx 1.5x + */ + size_t mem_outfile::write(const void *ptr, size_t new_size) { assert(this->is_open); assert(this->buf_size); @@ -149,23 +171,37 @@ namespace ojph { // expand buffer if needed to make sure it has room for this write si64 used_size = tell(); //current used size - size_t new_used_size = (size_t)used_size + size; //needed size - if (new_used_size > this->buf_size) //only expand when there is need + size_t needed_size = (size_t)used_size + new_size; //needed size + expand_storage(needed_size, false); + + // copy bytes into buffer and adjust cur_ptr + memcpy(this->cur_ptr, ptr, new_size); + cur_ptr += new_size; + + return new_size; + } + + /** */ + void mem_outfile::expand_storage(size_t needed_size, bool clear_all) + { + needed_size += (needed_size + 1) >> 1; // x1.5 + if (needed_size > buf_size) { - size_t new_buf_size = this->buf_size; - while (new_used_size > new_buf_size) - new_buf_size += new_buf_size >> 1; //expand by ~1.5x + si64 used_size = tell(); // current used size + + if (this->buf) + this->buf = (ui8*)realloc(this->buf, needed_size); + else + this->buf = (ui8*)malloc(needed_size); - this->buf = (ui8*)realloc(this->buf, new_buf_size); - this->buf_size = new_buf_size; + if (clear_mem && !clear_all) // will be cleared later + memset(this->buf + buf_size, 0, needed_size - this->buf_size); + + this->buf_size = needed_size; this->cur_ptr = this->buf + used_size; } - - // copy bytes into buffer and adjust cur_ptr - memcpy(this->cur_ptr, ptr, size); - cur_ptr += size; - - return size; + if (clear_all) + memset(this->buf, 0, this->buf_size); } From 7fdd911c0bf10b56f29f744049d4c95879a12787 Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Mon, 22 Apr 2024 13:06:20 +1000 Subject: [PATCH 086/348] Created ojph_threads. Rename socket to sockets, and fixed a warning. --- .../common/{ojph_socket.h => ojph_sockets.h} | 0 src/apps/common/ojph_threads.h | 54 +++++++++++++++++++ src/apps/ojph_stream_expand/CMakeLists.txt | 12 +++-- .../ojph_stream_expand/ojph_stream_expand.cpp | 2 +- .../stream_expand_support.cpp | 2 +- .../stream_expand_support.h | 4 +- .../{ojph_socket.cpp => ojph_sockets.cpp} | 2 +- src/apps/others/ojph_threads.cpp | 38 +++++++++++++ src/core/common/ojph_file.h | 5 +- 9 files changed, 108 insertions(+), 11 deletions(-) rename src/apps/common/{ojph_socket.h => ojph_sockets.h} (100%) create mode 100644 src/apps/common/ojph_threads.h rename src/apps/others/{ojph_socket.cpp => ojph_sockets.cpp} (99%) create mode 100644 src/apps/others/ojph_threads.cpp diff --git a/src/apps/common/ojph_socket.h b/src/apps/common/ojph_sockets.h similarity index 100% rename from src/apps/common/ojph_socket.h rename to src/apps/common/ojph_sockets.h diff --git a/src/apps/common/ojph_threads.h b/src/apps/common/ojph_threads.h new file mode 100644 index 00000000..8ea1b0d4 --- /dev/null +++ b/src/apps/common/ojph_threads.h @@ -0,0 +1,54 @@ +//***************************************************************************/ +// This software is released under the 2-Clause BSD license, included +// below. +// +// Copyright (c) 2024, Aous Naman +// Copyright (c) 2024, Kakadu Software Pty Ltd, Australia +// Copyright (c) 2024, The University of New South Wales, Australia +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************/ +// This file is part of the OpenJPH software implementation. +// File: ojph_threads.h +// Author: Aous Naman +// Date: 22 April 2024 +//***************************************************************************/ + +#ifndef OJPH_THREADS_H +#define OJPH_THREADS_H + +namespace ojph +{ +namespace thds +{ + +} // !thds namespace +} // !ojph namespace + + + + + + +#endif // !OJPH_THREADS_H \ No newline at end of file diff --git a/src/apps/ojph_stream_expand/CMakeLists.txt b/src/apps/ojph_stream_expand/CMakeLists.txt index a32a3f88..bbc81056 100644 --- a/src/apps/ojph_stream_expand/CMakeLists.txt +++ b/src/apps/ojph_stream_expand/CMakeLists.txt @@ -8,14 +8,16 @@ if (OJPH_BUILD_STREAM_EXPAND) set(CMAKE_CXX_STANDARD 14) file(GLOB OJPH_STREAM_EXPAND "*.cpp") - file(GLOB OJPH_SOCKET "../others/ojph_socket.cpp") - file(GLOB OJPH_SOCKET_H "../common/ojph_socket.h") + file(GLOB OJPH_SOCKETS "../others/ojph_sockets.cpp") + file(GLOB OJPH_SOCKETS_H "../common/ojph_sockets.h") + file(GLOB OJPH_THREADS "../others/ojph_threads.cpp") + file(GLOB OJPH_THREADS_H "../common/ojph_threads.h") - list(APPEND SOURCES ${OJPH_STREAM_EXPAND} ${OJPH_SOCKET} ${OJPH_SOCKET_H}) + list(APPEND SOURCES ${OJPH_STREAM_EXPAND} ${OJPH_SOCKETS} ${OJPH_SOCKETS_H} ${OJPH_THREADS} ${OJPH_THREADS_H}) source_group("main" FILES ${OJPH_STREAM_EXPAND}) - source_group("others" FILES ${OJPH_SOCKET}) - source_group("common" FILES ${OJPH_SOCKET_H}) + source_group("others" FILES ${OJPH_SOCKETS} ${OJPH_THREADS}) + source_group("common" FILES ${OJPH_SOCKETS_H} ${OJPH_THREADS_H}) add_executable(ojph_stream_expand ${SOURCES}) if(MSVC) diff --git a/src/apps/ojph_stream_expand/ojph_stream_expand.cpp b/src/apps/ojph_stream_expand/ojph_stream_expand.cpp index 7e4f91a0..cd0b4589 100644 --- a/src/apps/ojph_stream_expand/ojph_stream_expand.cpp +++ b/src/apps/ojph_stream_expand/ojph_stream_expand.cpp @@ -38,7 +38,7 @@ #include #include "ojph_message.h" #include "ojph_arg.h" -#include "ojph_socket.h" +#include "ojph_sockets.h" #include "stream_expand_support.h" #ifdef OJPH_OS_WINDOWS diff --git a/src/apps/ojph_stream_expand/stream_expand_support.cpp b/src/apps/ojph_stream_expand/stream_expand_support.cpp index 0191ca67..05ebd60e 100644 --- a/src/apps/ojph_stream_expand/stream_expand_support.cpp +++ b/src/apps/ojph_stream_expand/stream_expand_support.cpp @@ -30,7 +30,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. //***************************************************************************/ // This file is part of the OpenJPH software implementation. -// File: ojph_str_ex_support.h +// File: stream_expand_support.h // Author: Aous Naman // Date: 18 April 2024 //***************************************************************************/ diff --git a/src/apps/ojph_stream_expand/stream_expand_support.h b/src/apps/ojph_stream_expand/stream_expand_support.h index 6d3d01f1..a9862bed 100644 --- a/src/apps/ojph_stream_expand/stream_expand_support.h +++ b/src/apps/ojph_stream_expand/stream_expand_support.h @@ -30,7 +30,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. //***************************************************************************/ // This file is part of the OpenJPH software implementation. -// File: ojph_str_ex_support.cpp +// File: stream_expand_support.cpp // Author: Aous Naman // Date: 18 April 2024 //***************************************************************************/ @@ -41,7 +41,7 @@ #include #include "ojph_base.h" #include "ojph_file.h" -#include "ojph_socket.h" +#include "ojph_sockets.h" namespace ojph { diff --git a/src/apps/others/ojph_socket.cpp b/src/apps/others/ojph_sockets.cpp similarity index 99% rename from src/apps/others/ojph_socket.cpp rename to src/apps/others/ojph_sockets.cpp index a1ce6f3b..ef01a227 100644 --- a/src/apps/others/ojph_socket.cpp +++ b/src/apps/others/ojph_sockets.cpp @@ -38,7 +38,7 @@ #include #include #include "ojph_message.h" -#include "ojph_socket.h" +#include "ojph_sockets.h" int ojph_socket_manager_counter = 0; diff --git a/src/apps/others/ojph_threads.cpp b/src/apps/others/ojph_threads.cpp new file mode 100644 index 00000000..1672c0cb --- /dev/null +++ b/src/apps/others/ojph_threads.cpp @@ -0,0 +1,38 @@ +//***************************************************************************/ +// This software is released under the 2-Clause BSD license, included +// below. +// +// Copyright (c) 2024, Aous Naman +// Copyright (c) 2024, Kakadu Software Pty Ltd, Australia +// Copyright (c) 2024, The University of New South Wales, Australia +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************/ +// This file is part of the OpenJPH software implementation. +// File: ojph_threads.h +// Author: Aous Naman +// Date: 22 April 2024 +//***************************************************************************/ + +#include "ojph_threads.h" \ No newline at end of file diff --git a/src/core/common/ojph_file.h b/src/core/common/ojph_file.h index c4c8dc1c..e82ebc44 100644 --- a/src/core/common/ojph_file.h +++ b/src/core/common/ojph_file.h @@ -85,7 +85,10 @@ namespace ojph { virtual size_t write(const void *ptr, size_t size) = 0; virtual si64 tell() { return 0; } virtual int seek(si64 offset, enum outfile_base::seek origin) - { return -1; /* always fail, to remind you to write an implementation */ } + { + ojph_unused(offset); ojph_unused(origin); + return -1; /* always fail, to remind you to write an implementation */ + } virtual void flush() {} virtual void close() {} }; From a1766671d1547416fe6bda44f340fb3867731ca0 Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Wed, 24 Apr 2024 17:38:00 +1000 Subject: [PATCH 087/348] Before major change to buffering. Added thread_pool. Simple worker threads. --- src/apps/common/ojph_threads.h | 59 +++++++ .../ojph_stream_expand/ojph_stream_expand.cpp | 5 +- .../stream_expand_support.cpp | 95 +++++++--- .../stream_expand_support.h | 55 ++++-- .../threaded_frame_processors.cpp | 61 +++++++ .../threaded_frame_processors.h | 165 ++++++++++++++++++ src/apps/others/ojph_threads.cpp | 71 +++++++- src/core/common/ojph_file.h | 7 + src/core/others/ojph_file.cpp | 19 +- 9 files changed, 499 insertions(+), 38 deletions(-) create mode 100644 src/apps/ojph_stream_expand/threaded_frame_processors.cpp create mode 100644 src/apps/ojph_stream_expand/threaded_frame_processors.h diff --git a/src/apps/common/ojph_threads.h b/src/apps/common/ojph_threads.h index 8ea1b0d4..7d858d8b 100644 --- a/src/apps/common/ojph_threads.h +++ b/src/apps/common/ojph_threads.h @@ -38,11 +38,70 @@ #ifndef OJPH_THREADS_H #define OJPH_THREADS_H +#include +#include +#include +#include +#include + namespace ojph { namespace thds { +/////////////////////////////////////////////////////////////////////////////// +// +// +// +// +// +/////////////////////////////////////////////////////////////////////////////// + +/*****************************************************************************/ +/** @brief + * + */ +class worker_thread_base +{ +public: + virtual ~worker_thread_base() { } + virtual void execute() = 0; +}; + + +/////////////////////////////////////////////////////////////////////////////// +// +// +// +// +// +/////////////////////////////////////////////////////////////////////////////// + +/*****************************************************************************/ +/** @brief + * + */ +class thread_pool +{ +public: + thread_pool() { stop.store(false, std::memory_order_relaxed); } + ~thread_pool(); + +public: + void init(size_t num_threads); + void add_task(worker_thread_base* task); + +private: + static void start_thread(thread_pool* tp); + +private: + std::vector threads; + std::deque tasks; + std::mutex mutex; + std::condition_variable condition; + std::atomic_bool stop; +}; + } // !thds namespace } // !ojph namespace diff --git a/src/apps/ojph_stream_expand/ojph_stream_expand.cpp b/src/apps/ojph_stream_expand/ojph_stream_expand.cpp index cd0b4589..e360c6c8 100644 --- a/src/apps/ojph_stream_expand/ojph_stream_expand.cpp +++ b/src/apps/ojph_stream_expand/ojph_stream_expand.cpp @@ -39,6 +39,7 @@ #include "ojph_message.h" #include "ojph_arg.h" #include "ojph_sockets.h" +#include "ojph_threads.h" #include "stream_expand_support.h" #ifdef OJPH_OS_WINDOWS @@ -169,9 +170,11 @@ int main(int argc, char* argv[]) } try { + ojph::thds::thread_pool thread_pool; + thread_pool.init(num_threads); ojph::stex::frames_handler frames_handler; frames_handler.init(quiet, display, decode, num_inflight_packets, - num_threads, target_name); + num_threads, target_name, &thread_pool); ojph::stex::packets_handler packets_handler; packets_handler.init(quiet, num_inflight_packets, &frames_handler); ojph::net::socket_manager smanager; diff --git a/src/apps/ojph_stream_expand/stream_expand_support.cpp b/src/apps/ojph_stream_expand/stream_expand_support.cpp index 05ebd60e..69d5ffce 100644 --- a/src/apps/ojph_stream_expand/stream_expand_support.cpp +++ b/src/apps/ojph_stream_expand/stream_expand_support.cpp @@ -37,6 +37,8 @@ #include #include +#include "ojph_threads.h" +#include "threaded_frame_processors.h" #include "stream_expand_support.h" namespace ojph @@ -84,7 +86,7 @@ rtp_packet* packets_handler::exchange(rtp_packet* p) // but we currently do not do that yet bool result = frames->push(p); - if (result == false) + if (result == false) // cannot use the packet for the time being { if (avail) { // move from avail to in_use @@ -185,8 +187,9 @@ void packets_handler::flush() /////////////////////////////////////////////////////////////////////////////// void stex_file::notify_file_completion() { - done = true; - parent->increment_num_complete_files(); + int t = done.fetch_add(-1, std::memory_order_acq_rel); + if (t == 1) // done is 0 + parent->increment_num_complete_files(); } /////////////////////////////////////////////////////////////////////////////// @@ -200,6 +203,10 @@ void stex_file::notify_file_completion() /////////////////////////////////////////////////////////////////////////////// frames_handler::~frames_handler() { + if (renderers_store) + delete[] renderers_store; + if (storers_store) + delete[] storers_store; if (files_store) delete[] files_store; } @@ -207,7 +214,8 @@ frames_handler::~frames_handler() /////////////////////////////////////////////////////////////////////////////// void frames_handler::init(bool quiet, bool display, bool decode, ui32 packet_queue_length, ui32 num_threads, - const char *target_name) + const char *target_name, + thds::thread_pool* thread_pool) { this->quiet = quiet; this->display = display; @@ -217,9 +225,20 @@ void frames_handler::init(bool quiet, bool display, bool decode, this->target_name = target_name; num_files = num_threads + 1; avail = files_store = new stex_file[num_files]; - for (ui32 i = 0; i < num_files - 1; ++i) - files_store[i].init(this, files_store + i + 1); - files_store[num_files - 1].init(this, NULL); + storers_store = new j2k_frame_storer[num_files]; + renderers_store = new j2k_frame_renderer[num_files]; + ui32 i = 0; + for (; i < num_files - 1; ++i) { + files_store[i].init(this, files_store + i + 1, storers_store + i, + renderers_store + i, target_name); + storers_store[i].init(files_store + i, target_name); + renderers_store[i].init(files_store + i, target_name); + } + files_store[i].init(this, NULL, storers_store + i, renderers_store + i, + target_name); + storers_store[i].init(files_store + i, target_name); + renderers_store[i].init(files_store + i, target_name); + this->thread_pool = thread_pool; } /////////////////////////////////////////////////////////////////////////////// @@ -243,16 +262,22 @@ bool frames_handler::push(rtp_packet* p) in_use = in_use->next; f->next = processing; processing = f; - //<=============================================== queue f for - //<=============================================== further execution + if (target_name != NULL) + { + f->f.close(); + thread_pool->add_task(f->storer); + } f = in_use; } else { pf->next = f->next; f->next = processing; processing = f; - //<=============================================== queue f for - //<=============================================== further execution + if (target_name != NULL) + { + f->f.close(); + thread_pool->add_task(f->storer); + } f = pf->next; } } @@ -272,10 +297,13 @@ bool frames_handler::push(rtp_packet* p) // move from avail to in_use stex_file* f = avail; avail = avail->next; - f->next = processing; - processing = f; + f->next = in_use; + in_use = f; f->timestamp = p->get_time_stamp(); f->last_seen_seq = p->get_sequence_number(); + f->marked = false; + f->estimated_size = f->actual_size = 0; + f->frame_idx = frame_idx++; f->f.open(1<<20, true); // start with 1MB f->write(p); return true; @@ -285,21 +313,36 @@ bool frames_handler::push(rtp_packet* p) } else { // body payload header - stex_file* f = in_use; - while (f != NULL && f->timestamp != p->get_time_stamp()) + stex_file* f = in_use, *pf; + while (f != NULL && f->timestamp != p->get_time_stamp()) { + pf = f; f = f->next; - if (f == NULL) + } + if (f == NULL) return false; - + + f->last_seen_seq = ojph_max(f->last_seen_seq, p->get_sequence_number()); f->write(p); if (p->is_marked()) f->marked = true; if (f->marked && f->are_packets_missing() == false) - //<=============================================== queue f for - //<=============================================== further execution - ; + { + // move from from in_use to processing + if (f == in_use) + in_use = in_use->next; + else + pf->next = f->next; + f->next = processing; + processing = f; + + f->f.close(); + thread_pool->add_task(f->storer); + } + // else + // printf("%02x %02x\n", p->data[0], p->data[1]); + return true; } return false; @@ -319,8 +362,11 @@ bool frames_handler::flush() in_use = in_use->next; f->next = processing; processing = f; - //<=============================================== queue f for - //<=============================================== further execution + if (target_name != NULL) + { + f->f.close(); + thread_pool->add_task(f->storer); + } } return (processing != NULL); @@ -338,13 +384,14 @@ void frames_handler::check_files_in_processing() { num_complete_files.fetch_add(-1, std::memory_order_relaxed); - if (f->done == true) + if (f->done.load(std::memory_order_acquire) == 0) { // move f from processing to avail f->timestamp = 0; f->last_seen_seq = 0; - f->done = f->marked = false; + f->marked = false; f->estimated_size = f->actual_size = 0; + f->frame_idx = 0; if (f == processing) { processing = processing->next; diff --git a/src/apps/ojph_stream_expand/stream_expand_support.h b/src/apps/ojph_stream_expand/stream_expand_support.h index a9862bed..6011e622 100644 --- a/src/apps/ojph_stream_expand/stream_expand_support.h +++ b/src/apps/ojph_stream_expand/stream_expand_support.h @@ -39,16 +39,28 @@ #define OJPH_STR_EX_SUPPORT_H #include +#include #include "ojph_base.h" #include "ojph_file.h" #include "ojph_sockets.h" namespace ojph { + +namespace thds +{ class thread_pool; } + namespace stex // stream expand { - class packets_handler; - class frames_handler; + +// defined here +class packets_handler; +class frames_handler; + +// defined elsewhere +struct j2k_frame_storer; +struct decoded_frame_storer; +struct j2k_frame_renderer; /////////////////////////////////////////////////////////////////////////////// // @@ -251,15 +263,22 @@ struct stex_file { stex_file() { timestamp = last_seen_seq = 0; - done = marked = false; - estimated_size = actual_size = 0; + marked = false; + done.store(0, std::memory_order_relaxed); + estimated_size = actual_size = frame_idx = 0; parent = NULL; + name_template = NULL; + storer = NULL; + renderer = NULL; next = NULL; } - void init(frames_handler* parent, stex_file* next) + void init(frames_handler* parent, stex_file* next, j2k_frame_storer *storer, + j2k_frame_renderer* renderer, const char *name_template) { this->parent = parent; this->next = next; + this->storer = storer; + this->renderer = renderer; } void write(rtp_packet *p) @@ -279,11 +298,17 @@ struct stex_file { ojph::mem_outfile f; //!frame_idx); +} + + +} // !stex namespace +} // !ojph namespace \ No newline at end of file diff --git a/src/apps/ojph_stream_expand/threaded_frame_processors.h b/src/apps/ojph_stream_expand/threaded_frame_processors.h new file mode 100644 index 00000000..bf3624d5 --- /dev/null +++ b/src/apps/ojph_stream_expand/threaded_frame_processors.h @@ -0,0 +1,165 @@ +//***************************************************************************/ +// This software is released under the 2-Clause BSD license, included +// below. +// +// Copyright (c) 2024, Aous Naman +// Copyright (c) 2024, Kakadu Software Pty Ltd, Australia +// Copyright (c) 2024, The University of New South Wales, Australia +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************/ +// This file is part of the OpenJPH software implementation. +// File: threaded_frame_processors.h +// Author: Aous Naman +// Date: 23 April 2024 +//***************************************************************************/ + +#ifndef THREADED_FRAME_PROCESSOR_H +#define THREADED_FRAME_PROCESSOR_H + +#include "ojph_threads.h" +#include "stream_expand_support.h" + +namespace ojph +{ + namespace thds + { class thread_pool; } + +namespace stex +{ + +/////////////////////////////////////////////////////////////////////////////// +// +// +// +// +// +/////////////////////////////////////////////////////////////////////////////// + +/*****************************************************************************/ +/** @brief Store a j2k frame as is. + * + */ +struct j2k_frame_storer : public thds::worker_thread_base +{ +public: + j2k_frame_storer() { + file = NULL; + name_template = NULL; + } + ~j2k_frame_storer() override {} + +public: + void init(stex_file* file, const char* name_template) + { + this->file = file; + this->name_template = name_template; + } + + void execute() override; + +private: + stex_file* file; + const char* name_template; +}; + +/////////////////////////////////////////////////////////////////////////////// +// +// +// +// +// +/////////////////////////////////////////////////////////////////////////////// + +/*****************************************************************************/ +/** @brief Store a decoded j2k frame + * + */ +struct decoded_frame_storer : public thds::worker_thread_base +{ +public: + enum file_type : ui32 { + FT_UNKNOWN = 0, + FT_PGM = 1, + FT_PPM = 2, + FT_YUV = 3, + }; + +public: + decoded_frame_storer() { + file = NULL; + name_template = NULL; + ft = FT_UNKNOWN; + } + ~decoded_frame_storer() override {} + +public: + void execute() override {} + + stex_file* file; + const char* name_template; + file_type ft; + ojph::mem_outfile outfile; +}; + +/////////////////////////////////////////////////////////////////////////////// +// +// +// +// +// +/////////////////////////////////////////////////////////////////////////////// + +/*****************************************************************************/ +/** @brief Decodes and displays a j2k frame + * + */ +struct j2k_frame_renderer : public thds::worker_thread_base +{ +public: + j2k_frame_renderer() { + file = NULL; + name_template = NULL; + } + ~j2k_frame_renderer() override {} + +public: + void init(stex_file* file, const char* name_template) + { + this->file = file; + this->name_template = name_template; + } + + void execute() override {} + +private: + stex_file* file; + const char* name_template; + decoded_frame_storer storer; +}; + +} // !stex namespace +} // !ojph namespace + +#endif // !THREADED_FRAME_PROCESSOR_H \ No newline at end of file diff --git a/src/apps/others/ojph_threads.cpp b/src/apps/others/ojph_threads.cpp index 1672c0cb..03912fcd 100644 --- a/src/apps/others/ojph_threads.cpp +++ b/src/apps/others/ojph_threads.cpp @@ -35,4 +35,73 @@ // Date: 22 April 2024 //***************************************************************************/ -#include "ojph_threads.h" \ No newline at end of file +#include "ojph_threads.h" + +namespace ojph +{ +namespace thds +{ + +/////////////////////////////////////////////////////////////////////////////// +// +// +// +// +// +/////////////////////////////////////////////////////////////////////////////// + +/////////////////////////////////////////////////////////////////////////////// +thread_pool::~thread_pool() +{ + stop.store(true, std::memory_order_release); + condition.notify_all(); + for (int i = 0; i < threads.size(); ++i) + threads[i].join(); +} + +/////////////////////////////////////////////////////////////////////////////// +void thread_pool::init(size_t num_threads) +{ + if (threads.size() < num_threads) + threads.resize(num_threads); + + for (size_t i = 0; i < num_threads; ++i) + threads[i] = std::thread(start_thread, this); +} + +/////////////////////////////////////////////////////////////////////////////// +void thread_pool::add_task(worker_thread_base* task) +{ + mutex.lock(); + tasks.push_back(task); + mutex.unlock(); +} + +/////////////////////////////////////////////////////////////////////////////// +void thread_pool::start_thread(thread_pool* tp) +{ + while (1) + { + // setup the condition variable + std::unique_lock lock(tp->mutex); + // wait releases the mutex, blocks until notified (or spuriously), + // and acquire the mutex + tp->condition.wait(lock); + + if(tp->stop.load(std::memory_order_acquire)) + return; + + worker_thread_base* task = NULL; + if (!tp->tasks.empty()) + { + task = tp->tasks.front(); + tp->tasks.pop_front(); + } + lock.unlock(); + if (task) + task->execute(); + } +} + +} // !thds namespace +} // !ojph namespace \ No newline at end of file diff --git a/src/core/common/ojph_file.h b/src/core/common/ojph_file.h index e82ebc44..a297aa0a 100644 --- a/src/core/common/ojph_file.h +++ b/src/core/common/ojph_file.h @@ -197,6 +197,12 @@ namespace ojph { */ const ui8* get_data() const { return buf; } + /** + * @brief Call this function to write the memory file data to a file + * + */ + void write_to_file(const char *file_name) const; + private: /** * @brief This function expands storage by x1.5 needed space. @@ -213,6 +219,7 @@ namespace ojph { bool is_open; bool clear_mem; size_t buf_size; + size_t used_size; ui8 *buf; ui8 *cur_ptr; }; diff --git a/src/core/others/ojph_file.cpp b/src/core/others/ojph_file.cpp index 7096fbe5..b4cc4857 100644 --- a/src/core/others/ojph_file.cpp +++ b/src/core/others/ojph_file.cpp @@ -102,7 +102,7 @@ namespace ojph { mem_outfile::mem_outfile() { is_open = clear_mem = false; - buf_size = 0; + buf_size = used_size = 0; buf = cur_ptr = NULL; } @@ -112,7 +112,7 @@ namespace ojph { if (buf) free(buf); is_open = clear_mem = false; - buf_size = 0; + buf_size = used_size = 0; buf = cur_ptr = NULL; } @@ -126,6 +126,7 @@ namespace ojph { this->is_open = true; this->clear_mem = clear_mem; expand_storage(initial_size, true); + this->used_size = 0; this->cur_ptr = this->buf; } @@ -177,10 +178,24 @@ namespace ojph { // copy bytes into buffer and adjust cur_ptr memcpy(this->cur_ptr, ptr, new_size); cur_ptr += new_size; + used_size = ojph_max(used_size, tell()); return new_size; } + /** */ + void mem_outfile::write_to_file(const char *file_name) const + { + assert(is_open == false); + FILE *f = fopen(file_name, "wb"); + if (f == NULL) + OJPH_ERROR(0x00060003, "failed to open %s for writing", file_name); + if (f != NULL) + if (fwrite(this->buf, 1, used_size, f) != used_size) + OJPH_ERROR(0x00060004, "failed writing to %s", file_name); + fclose(f); + } + /** */ void mem_outfile::expand_storage(size_t needed_size, bool clear_all) { From 46bb2e3b24636b28e0ae9f8592e199c65b1289c8 Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Wed, 24 Apr 2024 17:43:44 +1000 Subject: [PATCH 088/348] Fix compilation. --- src/apps/common/ojph_threads.h | 225 ++++++++++++++++--------------- src/apps/others/ojph_threads.cpp | 212 ++++++++++++++--------------- 2 files changed, 219 insertions(+), 218 deletions(-) diff --git a/src/apps/common/ojph_threads.h b/src/apps/common/ojph_threads.h index 7d858d8b..8c73efa7 100644 --- a/src/apps/common/ojph_threads.h +++ b/src/apps/common/ojph_threads.h @@ -1,113 +1,114 @@ -//***************************************************************************/ -// This software is released under the 2-Clause BSD license, included -// below. -// -// Copyright (c) 2024, Aous Naman -// Copyright (c) 2024, Kakadu Software Pty Ltd, Australia -// Copyright (c) 2024, The University of New South Wales, Australia -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -//***************************************************************************/ -// This file is part of the OpenJPH software implementation. -// File: ojph_threads.h -// Author: Aous Naman -// Date: 22 April 2024 -//***************************************************************************/ - -#ifndef OJPH_THREADS_H -#define OJPH_THREADS_H - -#include -#include -#include -#include -#include - -namespace ojph -{ -namespace thds -{ - -/////////////////////////////////////////////////////////////////////////////// -// -// -// -// -// -/////////////////////////////////////////////////////////////////////////////// - -/*****************************************************************************/ -/** @brief - * - */ -class worker_thread_base -{ -public: - virtual ~worker_thread_base() { } - virtual void execute() = 0; -}; - - -/////////////////////////////////////////////////////////////////////////////// -// -// -// -// -// -/////////////////////////////////////////////////////////////////////////////// - -/*****************************************************************************/ -/** @brief - * - */ -class thread_pool -{ -public: - thread_pool() { stop.store(false, std::memory_order_relaxed); } - ~thread_pool(); - -public: - void init(size_t num_threads); - void add_task(worker_thread_base* task); - -private: - static void start_thread(thread_pool* tp); - -private: - std::vector threads; - std::deque tasks; - std::mutex mutex; - std::condition_variable condition; - std::atomic_bool stop; -}; - -} // !thds namespace -} // !ojph namespace - - - - - - +//***************************************************************************/ +// This software is released under the 2-Clause BSD license, included +// below. +// +// Copyright (c) 2024, Aous Naman +// Copyright (c) 2024, Kakadu Software Pty Ltd, Australia +// Copyright (c) 2024, The University of New South Wales, Australia +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************/ +// This file is part of the OpenJPH software implementation. +// File: ojph_threads.h +// Author: Aous Naman +// Date: 22 April 2024 +//***************************************************************************/ + +#ifndef OJPH_THREADS_H +#define OJPH_THREADS_H + +#include +#include +#include +#include +#include +#include + +namespace ojph +{ +namespace thds +{ + +/////////////////////////////////////////////////////////////////////////////// +// +// +// +// +// +/////////////////////////////////////////////////////////////////////////////// + +/*****************************************************************************/ +/** @brief + * + */ +class worker_thread_base +{ +public: + virtual ~worker_thread_base() { } + virtual void execute() = 0; +}; + + +/////////////////////////////////////////////////////////////////////////////// +// +// +// +// +// +/////////////////////////////////////////////////////////////////////////////// + +/*****************************************************************************/ +/** @brief + * + */ +class thread_pool +{ +public: + thread_pool() { stop.store(false, std::memory_order_relaxed); } + ~thread_pool(); + +public: + void init(size_t num_threads); + void add_task(worker_thread_base* task); + +private: + static void start_thread(thread_pool* tp); + +private: + std::vector threads; + std::deque tasks; + std::mutex mutex; + std::condition_variable condition; + std::atomic_bool stop; +}; + +} // !thds namespace +} // !ojph namespace + + + + + + #endif // !OJPH_THREADS_H \ No newline at end of file diff --git a/src/apps/others/ojph_threads.cpp b/src/apps/others/ojph_threads.cpp index 03912fcd..76968157 100644 --- a/src/apps/others/ojph_threads.cpp +++ b/src/apps/others/ojph_threads.cpp @@ -1,107 +1,107 @@ -//***************************************************************************/ -// This software is released under the 2-Clause BSD license, included -// below. -// -// Copyright (c) 2024, Aous Naman -// Copyright (c) 2024, Kakadu Software Pty Ltd, Australia -// Copyright (c) 2024, The University of New South Wales, Australia -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -//***************************************************************************/ -// This file is part of the OpenJPH software implementation. -// File: ojph_threads.h -// Author: Aous Naman -// Date: 22 April 2024 -//***************************************************************************/ - -#include "ojph_threads.h" - -namespace ojph -{ -namespace thds -{ - -/////////////////////////////////////////////////////////////////////////////// -// -// -// -// -// -/////////////////////////////////////////////////////////////////////////////// - -/////////////////////////////////////////////////////////////////////////////// -thread_pool::~thread_pool() -{ - stop.store(true, std::memory_order_release); - condition.notify_all(); - for (int i = 0; i < threads.size(); ++i) - threads[i].join(); -} - -/////////////////////////////////////////////////////////////////////////////// -void thread_pool::init(size_t num_threads) -{ - if (threads.size() < num_threads) - threads.resize(num_threads); - - for (size_t i = 0; i < num_threads; ++i) - threads[i] = std::thread(start_thread, this); -} - -/////////////////////////////////////////////////////////////////////////////// -void thread_pool::add_task(worker_thread_base* task) -{ - mutex.lock(); - tasks.push_back(task); - mutex.unlock(); -} - -/////////////////////////////////////////////////////////////////////////////// -void thread_pool::start_thread(thread_pool* tp) -{ - while (1) - { - // setup the condition variable - std::unique_lock lock(tp->mutex); - // wait releases the mutex, blocks until notified (or spuriously), - // and acquire the mutex - tp->condition.wait(lock); - - if(tp->stop.load(std::memory_order_acquire)) - return; - - worker_thread_base* task = NULL; - if (!tp->tasks.empty()) - { - task = tp->tasks.front(); - tp->tasks.pop_front(); - } - lock.unlock(); - if (task) - task->execute(); - } -} - -} // !thds namespace +//***************************************************************************/ +// This software is released under the 2-Clause BSD license, included +// below. +// +// Copyright (c) 2024, Aous Naman +// Copyright (c) 2024, Kakadu Software Pty Ltd, Australia +// Copyright (c) 2024, The University of New South Wales, Australia +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************/ +// This file is part of the OpenJPH software implementation. +// File: ojph_threads.h +// Author: Aous Naman +// Date: 22 April 2024 +//***************************************************************************/ + +#include "ojph_threads.h" + +namespace ojph +{ +namespace thds +{ + +/////////////////////////////////////////////////////////////////////////////// +// +// +// +// +// +/////////////////////////////////////////////////////////////////////////////// + +/////////////////////////////////////////////////////////////////////////////// +thread_pool::~thread_pool() +{ + stop.store(true, std::memory_order_release); + condition.notify_all(); + for (size_t i = 0; i < threads.size(); ++i) + threads[i].join(); +} + +/////////////////////////////////////////////////////////////////////////////// +void thread_pool::init(size_t num_threads) +{ + if (threads.size() < num_threads) + threads.resize(num_threads); + + for (size_t i = 0; i < num_threads; ++i) + threads[i] = std::thread(start_thread, this); +} + +/////////////////////////////////////////////////////////////////////////////// +void thread_pool::add_task(worker_thread_base* task) +{ + mutex.lock(); + tasks.push_back(task); + mutex.unlock(); +} + +/////////////////////////////////////////////////////////////////////////////// +void thread_pool::start_thread(thread_pool* tp) +{ + while (1) + { + // setup the condition variable + std::unique_lock lock(tp->mutex); + // wait releases the mutex, blocks until notified (or spuriously), + // and acquire the mutex + tp->condition.wait(lock); + + if(tp->stop.load(std::memory_order_acquire)) + return; + + worker_thread_base* task = NULL; + if (!tp->tasks.empty()) + { + task = tp->tasks.front(); + tp->tasks.pop_front(); + } + lock.unlock(); + if (task) + task->execute(); + } +} + +} // !thds namespace } // !ojph namespace \ No newline at end of file From fcb59815c2717a886c511f1560e4cfe99eeab085 Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Wed, 24 Apr 2024 17:48:56 +1000 Subject: [PATCH 089/348] Fix Compilation 2. --- src/apps/ojph_stream_expand/CMakeLists.txt | 2 +- .../stream_expand_support.cpp | 2 +- .../stream_expand_support.h | 1 + .../threaded_frame_processors.cpp | 120 +++---- .../threaded_frame_processors.h | 328 +++++++++--------- 5 files changed, 227 insertions(+), 226 deletions(-) diff --git a/src/apps/ojph_stream_expand/CMakeLists.txt b/src/apps/ojph_stream_expand/CMakeLists.txt index bbc81056..d8b74d1d 100644 --- a/src/apps/ojph_stream_expand/CMakeLists.txt +++ b/src/apps/ojph_stream_expand/CMakeLists.txt @@ -23,7 +23,7 @@ if (OJPH_BUILD_STREAM_EXPAND) if(MSVC) target_link_libraries(ojph_stream_expand PUBLIC openjph ws2_32) else() - target_link_libraries(ojph_stream_expand PUBLIC openjph) + target_link_libraries(ojph_stream_expand PUBLIC openjph pthread) endif(MSVC) install(TARGETS ojph_stream_expand DESTINATION bin) diff --git a/src/apps/ojph_stream_expand/stream_expand_support.cpp b/src/apps/ojph_stream_expand/stream_expand_support.cpp index 69d5ffce..dbb3513c 100644 --- a/src/apps/ojph_stream_expand/stream_expand_support.cpp +++ b/src/apps/ojph_stream_expand/stream_expand_support.cpp @@ -313,7 +313,7 @@ bool frames_handler::push(rtp_packet* p) } else { // body payload header - stex_file* f = in_use, *pf; + stex_file* f = in_use, *pf = in_use; while (f != NULL && f->timestamp != p->get_time_stamp()) { pf = f; f = f->next; diff --git a/src/apps/ojph_stream_expand/stream_expand_support.h b/src/apps/ojph_stream_expand/stream_expand_support.h index 6011e622..517b503e 100644 --- a/src/apps/ojph_stream_expand/stream_expand_support.h +++ b/src/apps/ojph_stream_expand/stream_expand_support.h @@ -276,6 +276,7 @@ struct stex_file { j2k_frame_renderer* renderer, const char *name_template) { this->parent = parent; + this->name_template = name_template; this->next = next; this->storer = storer; this->renderer = renderer; diff --git a/src/apps/ojph_stream_expand/threaded_frame_processors.cpp b/src/apps/ojph_stream_expand/threaded_frame_processors.cpp index 8d28737b..08de1892 100644 --- a/src/apps/ojph_stream_expand/threaded_frame_processors.cpp +++ b/src/apps/ojph_stream_expand/threaded_frame_processors.cpp @@ -1,61 +1,61 @@ -//***************************************************************************/ -// This software is released under the 2-Clause BSD license, included -// below. -// -// Copyright (c) 2024, Aous Naman -// Copyright (c) 2024, Kakadu Software Pty Ltd, Australia -// Copyright (c) 2024, The University of New South Wales, Australia -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -//***************************************************************************/ -// This file is part of the OpenJPH software implementation. -// File: threaded_frame_processors.cpp -// Author: Aous Naman -// Date: 23 April 2024 -//***************************************************************************/ - -#include "threaded_frame_processors.h" - -namespace ojph -{ - -namespace stex -{ - -/////////////////////////////////////////////////////////////////////////////// -// -// -// -// -// -/////////////////////////////////////////////////////////////////////////////// - -void j2k_frame_storer::execute() -{ - printf("saving file with index %d\n", file->frame_idx); -} - - -} // !stex namespace +//***************************************************************************/ +// This software is released under the 2-Clause BSD license, included +// below. +// +// Copyright (c) 2024, Aous Naman +// Copyright (c) 2024, Kakadu Software Pty Ltd, Australia +// Copyright (c) 2024, The University of New South Wales, Australia +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************/ +// This file is part of the OpenJPH software implementation. +// File: threaded_frame_processors.cpp +// Author: Aous Naman +// Date: 23 April 2024 +//***************************************************************************/ + +#include "threaded_frame_processors.h" + +namespace ojph +{ + +namespace stex +{ + +/////////////////////////////////////////////////////////////////////////////// +// +// +// +// +// +/////////////////////////////////////////////////////////////////////////////// + +void j2k_frame_storer::execute() +{ + printf("saving file with index %d\n", file->frame_idx); +} + + +} // !stex namespace } // !ojph namespace \ No newline at end of file diff --git a/src/apps/ojph_stream_expand/threaded_frame_processors.h b/src/apps/ojph_stream_expand/threaded_frame_processors.h index bf3624d5..f44d9b22 100644 --- a/src/apps/ojph_stream_expand/threaded_frame_processors.h +++ b/src/apps/ojph_stream_expand/threaded_frame_processors.h @@ -1,165 +1,165 @@ -//***************************************************************************/ -// This software is released under the 2-Clause BSD license, included -// below. -// -// Copyright (c) 2024, Aous Naman -// Copyright (c) 2024, Kakadu Software Pty Ltd, Australia -// Copyright (c) 2024, The University of New South Wales, Australia -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -//***************************************************************************/ -// This file is part of the OpenJPH software implementation. -// File: threaded_frame_processors.h -// Author: Aous Naman -// Date: 23 April 2024 -//***************************************************************************/ - -#ifndef THREADED_FRAME_PROCESSOR_H -#define THREADED_FRAME_PROCESSOR_H - -#include "ojph_threads.h" -#include "stream_expand_support.h" - -namespace ojph -{ - namespace thds - { class thread_pool; } - -namespace stex -{ - -/////////////////////////////////////////////////////////////////////////////// -// -// -// -// -// -/////////////////////////////////////////////////////////////////////////////// - -/*****************************************************************************/ -/** @brief Store a j2k frame as is. - * - */ -struct j2k_frame_storer : public thds::worker_thread_base -{ -public: - j2k_frame_storer() { - file = NULL; - name_template = NULL; - } - ~j2k_frame_storer() override {} - -public: - void init(stex_file* file, const char* name_template) - { - this->file = file; - this->name_template = name_template; - } - - void execute() override; - -private: - stex_file* file; - const char* name_template; -}; - -/////////////////////////////////////////////////////////////////////////////// -// -// -// -// -// -/////////////////////////////////////////////////////////////////////////////// - -/*****************************************************************************/ -/** @brief Store a decoded j2k frame - * - */ -struct decoded_frame_storer : public thds::worker_thread_base -{ -public: - enum file_type : ui32 { - FT_UNKNOWN = 0, - FT_PGM = 1, - FT_PPM = 2, - FT_YUV = 3, - }; - -public: - decoded_frame_storer() { - file = NULL; - name_template = NULL; - ft = FT_UNKNOWN; - } - ~decoded_frame_storer() override {} - -public: - void execute() override {} - - stex_file* file; - const char* name_template; - file_type ft; - ojph::mem_outfile outfile; -}; - -/////////////////////////////////////////////////////////////////////////////// -// -// -// -// -// -/////////////////////////////////////////////////////////////////////////////// - -/*****************************************************************************/ -/** @brief Decodes and displays a j2k frame - * - */ -struct j2k_frame_renderer : public thds::worker_thread_base -{ -public: - j2k_frame_renderer() { - file = NULL; - name_template = NULL; - } - ~j2k_frame_renderer() override {} - -public: - void init(stex_file* file, const char* name_template) - { - this->file = file; - this->name_template = name_template; - } - - void execute() override {} - -private: - stex_file* file; - const char* name_template; - decoded_frame_storer storer; -}; - -} // !stex namespace -} // !ojph namespace - +//***************************************************************************/ +// This software is released under the 2-Clause BSD license, included +// below. +// +// Copyright (c) 2024, Aous Naman +// Copyright (c) 2024, Kakadu Software Pty Ltd, Australia +// Copyright (c) 2024, The University of New South Wales, Australia +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************/ +// This file is part of the OpenJPH software implementation. +// File: threaded_frame_processors.h +// Author: Aous Naman +// Date: 23 April 2024 +//***************************************************************************/ + +#ifndef THREADED_FRAME_PROCESSOR_H +#define THREADED_FRAME_PROCESSOR_H + +#include "ojph_threads.h" +#include "stream_expand_support.h" + +namespace ojph +{ + namespace thds + { class thread_pool; } + +namespace stex +{ + +/////////////////////////////////////////////////////////////////////////////// +// +// +// +// +// +/////////////////////////////////////////////////////////////////////////////// + +/*****************************************************************************/ +/** @brief Store a j2k frame as is. + * + */ +struct j2k_frame_storer : public thds::worker_thread_base +{ +public: + j2k_frame_storer() { + file = NULL; + name_template = NULL; + } + ~j2k_frame_storer() override {} + +public: + void init(stex_file* file, const char* name_template) + { + this->file = file; + this->name_template = name_template; + } + + void execute() override; + +private: + stex_file* file; + const char* name_template; +}; + +/////////////////////////////////////////////////////////////////////////////// +// +// +// +// +// +/////////////////////////////////////////////////////////////////////////////// + +/*****************************************************************************/ +/** @brief Store a decoded j2k frame + * + */ +struct decoded_frame_storer : public thds::worker_thread_base +{ +public: + enum file_type : ui32 { + FT_UNKNOWN = 0, + FT_PGM = 1, + FT_PPM = 2, + FT_YUV = 3, + }; + +public: + decoded_frame_storer() { + file = NULL; + name_template = NULL; + ft = FT_UNKNOWN; + } + ~decoded_frame_storer() override {} + +public: + void execute() override {} + + stex_file* file; + const char* name_template; + file_type ft; + ojph::mem_outfile outfile; +}; + +/////////////////////////////////////////////////////////////////////////////// +// +// +// +// +// +/////////////////////////////////////////////////////////////////////////////// + +/*****************************************************************************/ +/** @brief Decodes and displays a j2k frame + * + */ +struct j2k_frame_renderer : public thds::worker_thread_base +{ +public: + j2k_frame_renderer() { + file = NULL; + name_template = NULL; + } + ~j2k_frame_renderer() override {} + +public: + void init(stex_file* file, const char* name_template) + { + this->file = file; + this->name_template = name_template; + } + + void execute() override {} + +private: + stex_file* file; + const char* name_template; + decoded_frame_storer storer; +}; + +} // !stex namespace +} // !ojph namespace + #endif // !THREADED_FRAME_PROCESSOR_H \ No newline at end of file From c5b8f4af98b9614d175192e0380ea518476721dd Mon Sep 17 00:00:00 2001 From: Aous Naman Date: Fri, 26 Apr 2024 21:00:35 +1000 Subject: [PATCH 090/348] WIP. It is working better. Need to handle out-of-order packets. --- src/apps/common/ojph_sockets.h | 5 + src/apps/ojph_stream_expand/CMakeLists.txt | 2 +- .../ojph_stream_expand/ojph_stream_expand.cpp | 158 +++++++++---- .../stream_expand_support.cpp | 219 +++++++++--------- .../stream_expand_support.h | 17 +- .../threaded_frame_processors.cpp | 7 +- src/apps/others/ojph_sockets.cpp | 18 ++ src/apps/others/ojph_threads.cpp | 1 + src/core/others/ojph_file.cpp | 7 +- 9 files changed, 261 insertions(+), 173 deletions(-) diff --git a/src/apps/common/ojph_sockets.h b/src/apps/common/ojph_sockets.h index 02160c7e..50de0c9e 100644 --- a/src/apps/common/ojph_sockets.h +++ b/src/apps/common/ojph_sockets.h @@ -47,6 +47,7 @@ typedef SOCKET ojph_socket; #define OJPH_INVALID_SOCKET (INVALID_SOCKET) + #define OJPH_EWOULDBLOCK (WSAEWOULDBLOCK) #else #include #include @@ -55,9 +56,11 @@ #include #include #include + #include typedef int ojph_socket; #define OJPH_INVALID_SOCKET (-1) + #define OJPH_EWOULDBLOCK (EWOULDBLOCK) #endif namespace ojph @@ -85,6 +88,8 @@ namespace ojph socket() { s = OJPH_INVALID_SOCKET; } socket(ojph_socket s); void close(); + bool set_blocking_mode(bool block); + ojph_socket intern() { return s; } private: diff --git a/src/apps/ojph_stream_expand/CMakeLists.txt b/src/apps/ojph_stream_expand/CMakeLists.txt index d8b74d1d..024510cd 100644 --- a/src/apps/ojph_stream_expand/CMakeLists.txt +++ b/src/apps/ojph_stream_expand/CMakeLists.txt @@ -7,7 +7,7 @@ if (OJPH_BUILD_STREAM_EXPAND) include_directories(../../core/common) set(CMAKE_CXX_STANDARD 14) - file(GLOB OJPH_STREAM_EXPAND "*.cpp") + file(GLOB OJPH_STREAM_EXPAND "*.cpp" "*.h") file(GLOB OJPH_SOCKETS "../others/ojph_sockets.cpp") file(GLOB OJPH_SOCKETS_H "../common/ojph_sockets.h") file(GLOB OJPH_THREADS "../others/ojph_threads.cpp") diff --git a/src/apps/ojph_stream_expand/ojph_stream_expand.cpp b/src/apps/ojph_stream_expand/ojph_stream_expand.cpp index e360c6c8..5fb9962b 100644 --- a/src/apps/ojph_stream_expand/ojph_stream_expand.cpp +++ b/src/apps/ojph_stream_expand/ojph_stream_expand.cpp @@ -55,6 +55,7 @@ bool get_arguments(int argc, char *argv[], char *&src_addr, char *&src_port, char *&target_name, ojph::ui32& num_threads, ojph::ui32& num_inflight_packets, + ojph::ui32& recvfrm_buf_size, bool& blocking, bool& quiet, bool& display, bool& decode) { ojph::cli_interpreter interpreter; @@ -67,7 +68,9 @@ bool get_arguments(int argc, char *argv[], interpreter.reinterpret("-o", target_name); interpreter.reinterpret("-num_threads", num_threads); interpreter.reinterpret("-num_packets", num_inflight_packets); + interpreter.reinterpret("-recv_buf_size", recvfrm_buf_size); + blocking = interpreter.reinterpret("-blocking"); quiet = interpreter.reinterpret("-quiet"); display = interpreter.reinterpret("-display"); decode = interpreter.reinterpret("-decode"); @@ -124,6 +127,8 @@ int main(int argc, char* argv[]) char *target_name = NULL; ojph::ui32 num_threads = 1; ojph::ui32 num_inflight_packets = 5; + ojph::ui32 recvfrm_buf_size = 65536; + bool blocking = false; bool quiet = false; bool display = false; bool decode = false; @@ -132,39 +137,49 @@ int main(int argc, char* argv[]) printf( "\n" "The following arguments are necessary:\n" - " -addr , or\n" - " The address should be either localhost, or\n" - " a local network card IP address\n" - " example: -addr 127.0.0.1\n" - " -port \n" + " -addr , or\n" + " The address should be either localhost, or\n" + " a local network card IP address\n" + " example: -addr 127.0.0.1\n" + " -port \n" "\n" "The following arguments are options:\n" - " -src_addr , packets from other sources\n" - " will be ignored. If not specified, then packets\n" - " from any source are accepted.\n" - " -src_port , packets from other source ports are\n" - " ignored. If not specified, then packets from any\n" - " port are accepted -- I would recommend not leaving\n" - " this one out." - " -num_threads number of threads for decoding and\n" - " displaying files. It is also the number of files that\n" - " are in flight; i.e., not completely saved yet.\n" - " -num_packets number of in-flight packets; this is the\n" - " maximum number of packets to wait before an out-of-order\n" - " or lost packet is considered lost.\n" - " -o target file name without extension; the same\n" - " printf formating can be used. For example, output_%%05d.\n" - " An extension will be added, either .j2c for original\n" - " frames, or .ppm for decoded images.\n" - " -display use this to display decoded frames.\n" - " -quiet use to stop printing informative messages.\n." + " -src_addr , packets from other sources\n" + " will be ignored. If not specified, then packets\n" + " from any source are accepted.\n" + " -src_port , packets from other source ports are\n" + " ignored. If not specified, then packets from any\n" + " port are accepted -- I would recommend not leaving\n" + " this one out.\n" + " -recv_buf_size recvfrom buffer size; default is 65536.\n" + " -blocking sets the receiving socket blocking mode to blocking.\n" + " The default mode is non-blocking. A blocking socket\n" + " increases the likelihood of not receiving some\n" + " packets; this is because the thread get into sleep\n" + " state, and therefore takes sometime to wakeup. A\n" + " non-blocking socket increase power consumption,\n" + " because it prevents the thread from sleeping.\n" + " -num_threads number of threads for decoding and\n" + " displaying files. This number also determines the\n" + " number of in-flight files, not completely\n" + " saved/processed yet. The number of files is set to\n" + " number of threads + 1\n" + " -num_packets number of in-flight packets; this is the\n" + " maximum number of packets to wait before an\n" + " out-of-order or lost packet is considered lost.\n" + " -o target file name without extension; the same\n" + " printf formating can be used. For example,\n" + " output_%%05d. An extension will be added, either .j2c\n" + " for original frames, or .ppm for decoded images.\n" + " -display use this to display decoded frames.\n" + " -quiet use to stop printing informative messages.\n." "\n" ); exit(-1); } if (!get_arguments(argc, argv, recv_addr, recv_port, src_addr, src_port, target_name, num_threads, num_inflight_packets, - quiet, display, decode)) + recvfrm_buf_size, blocking, quiet, display, decode)) { exit(-1); } @@ -190,13 +205,13 @@ int main(int argc, char* argv[]) int result = inet_pton(AF_INET, p, &server.sin_addr); if (result != 1) OJPH_ERROR(0x02000001, "Please provide a valid IP address when " - "using \"-addr,\" the provided address %s is not valid\n", + "using \"-addr,\" the provided address %s is not valid", recv_addr); ojph::ui16 port_number = 0; port_number = (ojph::ui16)atoi(recv_port); if (port_number == 0) - OJPH_ERROR(0x02000003, "Please provide a valid port number. " - "The number you provided is %d\n", recv_port); + OJPH_ERROR(0x02000002, "Please provide a valid port number. " + "The number you provided is %d", recv_port); server.sin_port = htons(port_number); } @@ -206,18 +221,37 @@ int main(int argc, char* argv[]) if(s.intern() == OJPH_INVALID_SOCKET) { std::string err = smanager.get_last_error_message(); - OJPH_ERROR(0x02000004, "Could not create socket : %s\n", err.data()); + OJPH_ERROR(0x02000003, "Could not create socket: %s", err.data()); + } + + // change recv buffer size; default is 65536 + int32_t nsize = recvfrm_buf_size; + if (setsockopt(s.intern(), SOL_SOCKET, SO_RCVBUF, + (char*)&nsize, sizeof(nsize)) == -1) + { + std::string err = smanager.get_last_error_message(); + OJPH_INFO(0x02000001, + "Failed to expand receive buffer: %s", err.data()); + } + + // set socket to non-blocking + if (s.set_blocking_mode(blocking) == false) + { + std::string err = smanager.get_last_error_message(); + OJPH_INFO(0x02000002, + "Failed to set the socket's blocking mode to %s, with error %s", + blocking ? "blocking" : "non-blocking", err.data()); } // bind to listening address - if( bind(s.intern(), (struct sockaddr *)&server, sizeof(server)) == -1) + if(bind(s.intern(), (struct sockaddr *)&server, sizeof(server)) == -1) { std::string err = smanager.get_last_error_message(); - OJPH_ERROR(0x02000005, - "Could not bind address to socket : %s\n", err.data()); + OJPH_ERROR(0x02000004, + "Could not bind address to socket: %s", err.data()); } - // listen to incoming data, and forward it to packet_handler + // process the source ip address and port ojph::ui32 saddr = 0; if (src_addr) { @@ -228,8 +262,8 @@ int main(int argc, char* argv[]) struct sockaddr_in t; int result = inet_pton(AF_INET, p, &t.sin_addr); if (result != 1) - OJPH_ERROR(0x02000006, "Please provide a valid IP address when " - "using \"-src_addr,\" the provided address %s is not valid\n", + OJPH_ERROR(0x02000005, "Please provide a valid IP address when " + "using \"-src_addr,\" the provided address %s is not valid", src_addr); saddr = smanager.get_addr(t); } @@ -238,46 +272,76 @@ int main(int argc, char* argv[]) { sport = (ojph::ui16)atoi(src_port); if (sport == 0) - OJPH_ERROR(0x02000007, "Please provide a valid port number. " - "The number you provided is %d\n", src_port); + OJPH_ERROR(0x02000006, "Please provide a valid port number. " + "The number you provided is %d", src_port); } + // listen to incoming data, and forward it to packet_handler + struct sockaddr_in si_other; + socklen_t socklen = sizeof(si_other); bool src_printed = false; ojph::stex::rtp_packet* packet = NULL; + ojph::ui32 lost_packets = 0, last_seq = 0; while (1) { if (packet == NULL || packet->num_bytes != 0) // num_bytes == 0 packet = packets_handler.exchange(packet); // if packet was ignored + if (packet == NULL) + continue; + packet->num_bytes = 0; - struct sockaddr_in si_other; - socklen_t socklen = sizeof(si_other); // receive data -- this is a blocking call - packet->num_bytes = 0; // if we ignore the packet, we can continue int num_bytes = (int)recvfrom(s.intern(), (char*)packet->data, packet->max_size, 0, (struct sockaddr *) &si_other, &socklen); + if (num_bytes < 0) { - std::string err = smanager.get_last_error_message(); - OJPH_INFO(0x02000008, "Failed to receive data : %s\n", err.data()); + int last_error = smanager.get_last_error(); + if (last_error != OJPH_EWOULDBLOCK) + { + std::string err = smanager.get_error_message(last_error); + OJPH_INFO(0x02000003, "Failed to receive data: %s", err.data()); + } continue; // if we wish to continue } + if ((src_addr && saddr != smanager.get_addr(si_other)) || (src_port && sport != si_other.sin_port)) { + constexpr int buf_size = 128; + char buf[buf_size]; + ojph::ui32 addr = smanager.get_addr(si_other); + const char* t = inet_ntop(AF_INET, &addr, buf, buf_size); + if (t == NULL) { + std::string err = smanager.get_last_error_message(); + OJPH_INFO(0x02000004, + "Error converting source address: %s", err.data()); + } + printf("Source mistmatch %s, port %d\n", + t, ntohs(si_other.sin_port)); continue; } - packet->num_bytes = (ojph::ui32)num_bytes; + + packet->num_bytes = (ojph::ui32)num_bytes; + + if (last_seq + 1 != packet->get_sequence_number()) { + //lost_packets = packet->get_sequence_number() - last_seq - 1; + printf("lost_packets %d %d\n", last_seq, packet->get_sequence_number()); + } + last_seq = packet->get_sequence_number(); if (!quiet && !src_printed) { constexpr int buf_size = 128; char buf[buf_size]; - if (!inet_ntop(AF_INET, &si_other, buf, buf_size)) { + ojph::ui32 addr = smanager.get_addr(si_other); + const char* t = inet_ntop(AF_INET, &addr, buf, buf_size); + if (t == NULL) { std::string err = smanager.get_last_error_message(); - OJPH_INFO(0x02000009, - "Error converting source address.\n", err.data()); + OJPH_INFO(0x02000005, + "Error converting source address: %s", err.data()); } printf("Receiving data from %s, port %d\n", - buf, ntohs(si_other.sin_port)); + t, ntohs(si_other.sin_port)); src_printed = true; } } diff --git a/src/apps/ojph_stream_expand/stream_expand_support.cpp b/src/apps/ojph_stream_expand/stream_expand_support.cpp index dbb3513c..a2b7ac44 100644 --- a/src/apps/ojph_stream_expand/stream_expand_support.cpp +++ b/src/apps/ojph_stream_expand/stream_expand_support.cpp @@ -70,97 +70,103 @@ void packets_handler::init(bool quiet, ui32 num_packets, /////////////////////////////////////////////////////////////////////////////// rtp_packet* packets_handler::exchange(rtp_packet* p) { - if (p == NULL) { - assert(in_use == NULL && num_packets > 0); - // move from avail to in_use - rtp_packet* p = avail; - avail = avail->next; - p->next = in_use; - in_use = p; - return p; - } - if (p->num_bytes == 0) - return p; + // if (p == NULL) { + // assert(in_use == NULL && num_packets > 0); + // // move from avail to in_use + // rtp_packet* p = avail; + // avail = avail->next; + // p->next = in_use; + // in_use = p; + // return p; + // } + // if (p->num_bytes == 0) + // return p; + + if (p != NULL) + frames->push(p); + else + p = avail; + return p; // We can a series of test to remove/warn about unsupported options // but we currently do not do that yet - bool result = frames->push(p); - if (result == false) // cannot use the packet for the time being - { - if (avail) - { // move from avail to in_use - p = avail; - avail = avail->next; - p->next = in_use; - in_use = p; - } - else - { - assert(p->next != NULL || num_packets == 1); - if (p->next != NULL) - { // use the oldest/last packet in in_use - assert(p == in_use); - rtp_packet *pp = p; // previous p - p = p->next; - while(p->next != NULL) { pp = p; p = p->next; } - pp->next = NULL; - p->next = in_use; - in_use = p; - } - } - return p; - } - else - { - // sequence number of the most recent packet - ui32 seq = p->get_sequence_number(); + // bool result = frames->push(p); + // if (result == false) // cannot use the packet for the time being + // { + // if (avail) + // { // move from avail to in_use + // p = avail; + // avail = avail->next; + // p->next = in_use; + // in_use = p; + // } + // else + // { + // assert(p->next != NULL || num_packets == 1); + // if (p->next != NULL) + // { // use the oldest/last packet in in_use + // assert(p == in_use); + // rtp_packet *pp = p; // previous p + // p = p->next; + // while(p->next != NULL) { pp = p; p = p->next; } + // pp->next = NULL; + // p->next = in_use; + // in_use = p; + // } + // } + // return p; + // } + // else + // { + // // sequence number of the most recent packet + // ui32 seq = p->get_sequence_number(); - // move packet to avail - assert(p == in_use); - in_use = in_use->next; - p->next = avail; - avail = p; + // // move packet to avail + // assert(p == in_use); + // in_use = in_use->next; + // p->next = avail; + // avail = p; - // test if you can push more packets, also remove old packets - p = in_use; - rtp_packet *pp = p; // previous p -- will be updated before use - while (p != NULL) - { - // if packet is used or it is old - result = frames->push(p); - result = result | (seq > p->get_sequence_number() + num_packets); - if (result) - { - // move packet from in_use to avail - if (p == in_use) - { - in_use = in_use->next; - p->next = avail; - avail = p; - p = in_use; - } - else - { - pp->next = p->next; - p->next = avail; - avail = p; - p = pp->next; - } - } - else { - pp = p; - p = p->next; - } - } + // // test if you can push more packets, also remove old packets + // p = in_use; + // rtp_packet *pp = p; // previous p -- will be updated before use + // while (p != NULL) + // { + // // if packet is used or it is old + // result = frames->push(p); + // result = result | (seq > p->get_sequence_number() + num_packets); + // if (result) + // { + // // move packet from in_use to avail + // if (p == in_use) + // { + // in_use = in_use->next; + // p->next = avail; + // avail = p; + // p = in_use; + // } + // else + // { + // pp->next = p->next; + // p->next = avail; + // avail = p; + // p = pp->next; + // } + // } + // else { + // pp = p; + // p = p->next; + // } + // } - // get one from avail and move it to in_use - p = avail; - avail = avail->next; - p->next = in_use; - in_use = p; - return p; - } + // // get one from avail and move it to in_use + // p = avail; + // avail = avail->next; + // p->next = in_use; + // in_use = p; + // return p; + // } } /////////////////////////////////////////////////////////////////////////////// @@ -229,11 +235,13 @@ void frames_handler::init(bool quiet, bool display, bool decode, renderers_store = new j2k_frame_renderer[num_files]; ui32 i = 0; for (; i < num_files - 1; ++i) { + files_store[i].f.open(2<<20, false); files_store[i].f.close(); files_store[i].init(this, files_store + i + 1, storers_store + i, renderers_store + i, target_name); storers_store[i].init(files_store + i, target_name); renderers_store[i].init(files_store + i, target_name); } + files_store[i].f.open(2<<20, false); files_store[i].f.close(); files_store[i].init(this, NULL, storers_store + i, renderers_store + i, target_name); storers_store[i].init(files_store + i, target_name); @@ -265,6 +273,7 @@ bool frames_handler::push(rtp_packet* p) if (target_name != NULL) { f->f.close(); + f->done.store(1, std::memory_order_relaxed); thread_pool->add_task(f->storer); } f = in_use; @@ -276,6 +285,7 @@ bool frames_handler::push(rtp_packet* p) if (target_name != NULL) { f->f.close(); + f->done.store(1, std::memory_order_relaxed); thread_pool->add_task(f->storer); } f = pf->next; @@ -288,10 +298,15 @@ bool frames_handler::push(rtp_packet* p) } } + static int count = -1; + ++count; + static bool happened = false; + // process newly received packet if (p->get_packet_type() != rtp_packet::PT_BODY) - { // main payload header - printf("A new file %d\n", p->get_time_stamp()); + { // main packet payload + printf("A new file %d %d\n", p->get_time_stamp(), count); + count = 0; if (avail) { // move from avail to in_use @@ -301,33 +316,34 @@ bool frames_handler::push(rtp_packet* p) in_use = f; f->timestamp = p->get_time_stamp(); f->last_seen_seq = p->get_sequence_number(); - f->marked = false; - f->estimated_size = f->actual_size = 0; f->frame_idx = frame_idx++; - f->f.open(1<<20, true); // start with 1MB - f->write(p); + f->f.open(); + f->f.write(p->get_data(), p->get_data_size()); + happened = false; return true; } else return false; } else - { // body payload header - stex_file* f = in_use, *pf = in_use; + { // body packet payload + stex_file* f = in_use, *pf = NULL; while (f != NULL && f->timestamp != p->get_time_stamp()) { pf = f; f = f->next; } if (f == NULL) return false; + if (f->last_seen_seq + 1 != p->get_sequence_number()) { + if (!happened) + printf("expected %d, found %d, count %d\n", f->last_seen_seq + 1, p->get_sequence_number(), count); + happened = true; + return false; + } - f->last_seen_seq = ojph_max(f->last_seen_seq, p->get_sequence_number()); - f->write(p); - + f->last_seen_seq = p->get_sequence_number(); + f->f.write(p->get_data(), p->get_data_size()); if (p->is_marked()) - f->marked = true; - - if (f->marked && f->are_packets_missing() == false) { // move from from in_use to processing if (f == in_use) @@ -336,13 +352,10 @@ bool frames_handler::push(rtp_packet* p) pf->next = f->next; f->next = processing; processing = f; - f->f.close(); + f->done.store(1, std::memory_order_relaxed); thread_pool->add_task(f->storer); } - // else - // printf("%02x %02x\n", p->data[0], p->data[1]); - return true; } return false; @@ -389,8 +402,6 @@ void frames_handler::check_files_in_processing() // move f from processing to avail f->timestamp = 0; f->last_seen_seq = 0; - f->marked = false; - f->estimated_size = f->actual_size = 0; f->frame_idx = 0; if (f == processing) { diff --git a/src/apps/ojph_stream_expand/stream_expand_support.h b/src/apps/ojph_stream_expand/stream_expand_support.h index 517b503e..575f3663 100644 --- a/src/apps/ojph_stream_expand/stream_expand_support.h +++ b/src/apps/ojph_stream_expand/stream_expand_support.h @@ -263,9 +263,8 @@ struct stex_file { stex_file() { timestamp = last_seen_seq = 0; - marked = false; done.store(0, std::memory_order_relaxed); - estimated_size = actual_size = frame_idx = 0; + frame_idx = 0; parent = NULL; name_template = NULL; storer = NULL; @@ -282,17 +281,6 @@ struct stex_file { this->renderer = renderer; } - void write(rtp_packet *p) - { - ui32 pos = p->get_data_pos(); - ui32 len = p->get_data_size(); - estimated_size = ojph_max(estimated_size, pos + len); - actual_size += len; - f.seek(pos, outfile_base::OJPH_SEEK_SET); - f.write(p->get_data(), len); - } - bool are_packets_missing() - { return (estimated_size != actual_size); } void notify_file_completion(); public: @@ -300,9 +288,6 @@ struct stex_file { ui32 timestamp; //!