Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
139 changes: 86 additions & 53 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,12 @@
# flags manually on cmd line
# 2/ Should we standardise on just AVX? As machine we run on
# may be different to machine we build on
set(CMAKE_OSX_DEPLOYMENT_TARGET "10.9" CACHE STRING "Minimum OS X deployment version")

if(APPLE AND CMAKE_OSX_ARCHITECTURES MATCHES "ppc|ppc64")
set(CMAKE_OSX_DEPLOYMENT_TARGET "10.5" CACHE STRING "Minimum OS X deployment version")
else()
set(CMAKE_OSX_DEPLOYMENT_TARGET "10.9" CACHE STRING "Minimum OS X deployment version")
endif()

cmake_minimum_required(VERSION 3.0)

Expand All @@ -14,10 +19,23 @@ endif()
project(LPCNet C)

option(DISABLE_CPU_OPTIMIZATION "Disable CPU optimization discovery." OFF)

option(AVX2 "Enable AVX2 CPU optimizations." OFF)
option(AVX "Enable AVX CPU optimizations." ON)
option(SSE "Enable SSE CPU optimizations." ON)
option(NEON "Enable NEON CPU optimizations for RPi." ON)

if(DISABLE_CPU_OPTIMIZATION)
option(AVX "Enable AVX CPU optimizations." OFF)
option(SSE "Enable SSE CPU optimizations." OFF)
option(NEON "Enable NEON CPU optimizations for RPi." OFF)
option(PPC_OPTS "Enable PowerPC CPU optimizations." OFF)
else()
if(CMAKE_SYSTEM_PROCESSOR MATCHES "powerpc")
option(PPC_OPTS "Enable PowerPC CPU optimizations." ON)
else()
option(AVX "Enable AVX CPU optimizations." ON)
option(SSE "Enable SSE CPU optimizations." ON)
option(NEON "Enable NEON CPU optimizations for RPi." ON)
endif()
endif()

include(GNUInstallDirs)
mark_as_advanced(CLEAR
Expand All @@ -26,9 +44,13 @@ mark_as_advanced(CLEAR
CMAKE_INSTALL_LIBDIR
)

# Build universal ARM64 and x86_64 binaries on Mac.
# Build universal ARM64 and x86_64 binaries on Mac, unless on PowerPC.
if(BUILD_OSX_UNIVERSAL)
set(CMAKE_OSX_ARCHITECTURES "x86_64;arm64")
if(CMAKE_SYSTEM_PROCESSOR MATCHES "powerpc")
set(CMAKE_OSX_ARCHITECTURES "ppc;ppc64")
else()
set(CMAKE_OSX_ARCHITECTURES "x86_64;arm64")
endif()
endif(BUILD_OSX_UNIVERSAL)

#
Expand Down Expand Up @@ -93,36 +115,46 @@ message(STATUS "Host system arch is: ${CMAKE_SYSTEM_PROCESSOR}")
# Detection of available CPU optimizations
if(NOT DISABLE_CPU_OPTIMIZATION)
if(UNIX AND NOT APPLE)
message(STATUS "Looking for available CPU optimizations on Linux/BSD system...")
execute_process(COMMAND grep -c "avx2" /proc/cpuinfo
OUTPUT_VARIABLE AVX2_PRESENT)
execute_process(COMMAND grep -c "avx " /proc/cpuinfo
OUTPUT_VARIABLE AVX_PRESENT)
execute_process(COMMAND grep -c "sse4_1 " /proc/cpuinfo
OUTPUT_VARIABLE SSE_PRESENT)
execute_process(COMMAND grep -c "neon" /proc/cpuinfo
OUTPUT_VARIABLE NEON_PRESENT)
elseif(APPLE)
if(BUILD_OSX_UNIVERSAL)
# Presume AVX and SSE are enabled on the x86 side. (AVX2 is not guaranteed depending
# on model.) The ARM side will auto-enable NEON optimizations by virtue of being aarch64.
set(AVX_PRESENT TRUE)
set(SSE_PRESENT TRUE)
set(NEON_PRESENT TRUE)
if(CMAKE_SYSTEM_PROCESSOR MATCHES "powerpc")
message(STATUS "PowerPC processor detected.")
set(PPC_PRESENT TRUE)
else()
# Under OSX we need to look through a few sysctl entries to determine what our CPU supports.
message(STATUS "Looking for available CPU optimizations on an OSX system...")
execute_process(COMMAND sysctl -a COMMAND grep machdep.cpu.leaf7_features COMMAND grep -c AVX2
message(STATUS "Looking for available CPU optimizations on Linux/BSD system...")
execute_process(COMMAND grep -c "avx2" /proc/cpuinfo
OUTPUT_VARIABLE AVX2_PRESENT)
execute_process(COMMAND sysctl -a COMMAND grep machdep.cpu.features COMMAND grep -c AVX
execute_process(COMMAND grep -c "avx " /proc/cpuinfo
OUTPUT_VARIABLE AVX_PRESENT)
execute_process(COMMAND sysctl -a COMMAND grep machdep.cpu.features COMMAND grep -c SSE4.1
execute_process(COMMAND grep -c "sse4_1 " /proc/cpuinfo
OUTPUT_VARIABLE SSE_PRESENT)
execute_process(COMMAND grep -c "neon" /proc/cpuinfo
OUTPUT_VARIABLE NEON_PRESENT)
endif()
elseif(APPLE)
if(CMAKE_SYSTEM_PROCESSOR MATCHES "powerpc")
message(STATUS "PowerPC processor detected.")
set(PPC_PRESENT TRUE)
else()
if(BUILD_OSX_UNIVERSAL)
# Presume AVX and SSE are enabled on the x86 side. (AVX2 is not guaranteed depending
# on model.) The ARM side will auto-enable NEON optimizations by virtue of being aarch64.
set(AVX_PRESENT TRUE)
set(SSE_PRESENT TRUE)
set(NEON_PRESENT TRUE)
else()
# Under OSX we need to look through a few sysctl entries to determine what our CPU supports.
message(STATUS "Looking for available CPU optimizations on an OSX system...")
execute_process(COMMAND sysctl -a COMMAND grep machdep.cpu.leaf7_features COMMAND grep -c AVX2
OUTPUT_VARIABLE AVX2_PRESENT)
execute_process(COMMAND sysctl -a COMMAND grep machdep.cpu.features COMMAND grep -c AVX
OUTPUT_VARIABLE AVX_PRESENT)
execute_process(COMMAND sysctl -a COMMAND grep machdep.cpu.features COMMAND grep -c SSE4.1
OUTPUT_VARIABLE SSE_PRESENT)

# Unlike with the above, NEON *is* guaranteed if on ARM as there were never any ARM32 Macs
# available. We don't need any specific compiler flags for this, though.
set(NEON_PRESENT TRUE)
endif(BUILD_OSX_UNIVERSAL)
# Unlike with the above, NEON *is* guaranteed if on ARM as there were never any ARM32 Macs
# available. We don't need any specific compiler flags for this, though.
set(NEON_PRESENT TRUE)
endif(BUILD_OSX_UNIVERSAL)
endif()
elseif(WIN32)
message(STATUS "No detection capability on Windows, assuming AVX is available.")
set(AVX_PRESENT TRUE)
Expand All @@ -136,32 +168,33 @@ if(NOT DISABLE_CPU_OPTIMIZATION)
set(SSE_PRESENT TRUE)
set(NEON_PRESENT TRUE)
endif()
else()
# Presume all optimizations are available as the user is likely setting them themselves
# (e.g. cross-compiling)
set(AVX2_PRESENT TRUE)
set(AVX_PRESENT TRUE)
set(SSE_PRESENT TRUE)
set(NEON_PRESENT TRUE)
endif()

set(LPCNET_C_PROC_FLAGS "")

if(${AVX2} AND (${AVX2_PRESENT} OR ${AVX2_PRESENT} GREATER 0))
message(STATUS "avx2 processor flags found or enabled.")
set(LPCNET_C_PROC_FLAGS -mavx2 -mfma)
elseif(${AVX} AND (${AVX_PRESENT} OR ${AVX_PRESENT} GREATER 0))
# AVX2 machines will also match on AVX
message(STATUS "avx processor flags found or enabled.")
set(LPCNET_C_PROC_FLAGS -mavx)
elseif(${SSE} AND (${SSE_PRESENT} OR ${SSE_PRESENT} GREATER 0))
# AVX and AVX2 machines will also match on SSE
message(STATUS "sse processor flags found or enabled.")
set(LPCNET_C_PROC_FLAGS -msse4.1)
elseif(${NEON} AND (${NEON_PRESENT} OR ${NEON_PRESENT} GREATER 0))
# RPi / ARM 32bit
message(STATUS "neon processor flags found or enabled.")
set(LPCNET_C_PROC_FLAGS -mfpu=neon -march=armv8-a -mtune=cortex-a53)
if(CMAKE_SYSTEM_PROCESSOR MATCHES "powerpc")
if(${PPC_OPTS} AND (${PPC_PRESENT} OR ${PPC_PRESENT} GREATER 0))
# PowerPC
message(STATUS "PowerPC processor flags found or enabled.")
set(LPCNET_C_PROC_FLAGS "-mcpu=native -mtune=native")
endif()
else()
if(${AVX2} AND (${AVX2_PRESENT} OR ${AVX2_PRESENT} GREATER 0))
message(STATUS "avx2 processor flags found or enabled.")
set(LPCNET_C_PROC_FLAGS -mavx2 -mfma)
elseif(${AVX} AND (${AVX_PRESENT} OR ${AVX_PRESENT} GREATER 0))
# AVX2 machines will also match on AVX
message(STATUS "avx processor flags found or enabled.")
set(LPCNET_C_PROC_FLAGS -mavx)
elseif(${SSE} AND (${SSE_PRESENT} OR ${SSE_PRESENT} GREATER 0))
# AVX and AVX2 machines will also match on SSE
message(STATUS "sse processor flags found or enabled.")
set(LPCNET_C_PROC_FLAGS -msse4.1)
elseif(${NEON} AND (${NEON_PRESENT} OR ${NEON_PRESENT} GREATER 0))
# RPi / ARM 32bit
message(STATUS "neon processor flags found or enabled.")
set(LPCNET_C_PROC_FLAGS -mfpu=neon -march=armv8-a -mtune=cortex-a53)
endif()
endif()

# grab latest NN model (or substitute your own)
Expand Down
30 changes: 17 additions & 13 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -72,20 +72,24 @@ target_link_libraries(dump_data lpcnetfreedv m)
add_executable(test_lpcnet test_lpcnet.c nnet_rw.c)
target_link_libraries(test_lpcnet lpcnetfreedv m)

if(
(${SSE} AND (${SSE_PRESENT} OR ${SSE_PRESENT} GREATER 0)) OR
(${AVX} AND (${AVX_PRESENT} OR ${AVX_PRESENT} GREATER 0)) OR
(${AVX2} AND (${AVX2_PRESENT} OR ${AVX2_PRESENT} GREATER 0)) OR
(${NEON} AND (${NEON_PRESENT} OR ${NEON_PRESENT} GREATER 0)) OR
CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
add_executable(test_vec test_vec.c)
target_link_libraries(test_vec m)

if(LPCNET_C_PROC_FLAGS)
set_source_files_properties(test_vec.c PROPERTIES COMPILE_FLAGS ${LPCNET_C_PROC_FLAGS})
endif(LPCNET_C_PROC_FLAGS)
if(CMAKE_SYSTEM_PROCESSOR MATCHES "powerpc")
message(WARNING "Tests not implemented for PowerPC, not building test_vec.")
else()
message(WARNING "No SSE/AVX/AVX2 CPU flags identified, not building test_vec.")
if(
(${SSE} AND (${SSE_PRESENT} OR ${SSE_PRESENT} GREATER 0)) OR
(${AVX} AND (${AVX_PRESENT} OR ${AVX_PRESENT} GREATER 0)) OR
(${AVX2} AND (${AVX2_PRESENT} OR ${AVX2_PRESENT} GREATER 0)) OR
(${NEON} AND (${NEON_PRESENT} OR ${NEON_PRESENT} GREATER 0)) OR
CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
add_executable(test_vec test_vec.c)
target_link_libraries(test_vec m)

if(LPCNET_C_PROC_FLAGS)
set_source_files_properties(test_vec.c PROPERTIES COMPILE_FLAGS ${LPCNET_C_PROC_FLAGS})
endif(LPCNET_C_PROC_FLAGS)
else()
message(WARNING "No SSE/AVX/AVX2 CPU flags identified, not building test_vec.")
endif()
endif()

add_executable(quant_feat quant_feat.c)
Expand Down