Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ if (USE_CUDA)
message("---------------------------------------- CUDA")
# Enable CUDA and include CudaToolchain
add_definitions(-DUSE_CUDA=TRUE)
# Set CUDA architectures BEFORE enabling CUDA language (required for CMake 3.21+)
set(CMAKE_CUDA_ARCHITECTURES "35;50;72")
enable_language(CUDA)
include(toolchains/CudaToolchain)
# Set BLA_VENDOR to NVHPC for CUDA-enabled builds
Expand Down
62 changes: 61 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,67 @@ predict_data(

```

This example walks through initializing hardware, simulating spatial data, estimating model parameters, and making predictions using **ExaGeoStatCPP** in R.

```
## R Example
Here is another example demonstrating how to use **ExaGeoStatCPP** with nugget in R:

```r
# Load the ExaGeoStatCPP library
library(ExaGeoStatCPP)

# Set parameters for the simulation
ncores <- 30
ngpus <- 0
problem_size <- 1600
dts <- 320
lts <- 0
computation <- "exact"
dimension <- "2D"
kernel <- "UnivariateMaternNuggetsStationary"
initial_theta <- c(1,0.1,0.5,0.1)
lower_bound <- c(0.05,0.005,0.05,0.005)
upper_bound <- c(5,5,5,5)
acc <- 1e-9
p <- 1
q <- 1
opt_itrs <- 100

# Initialize hardware configuration
hardware <- new(Hardware, computation, ncores, ngpus, p, q)

# Simulate spatial data based on the specified kernel and parameters
exageostat_data <- simulate_data(
kernel = kernel,
initial_theta = initial_theta,
problem_size = problem_size,
dts = dts,
dimension = dimension
)

# Estimate model parameters using MLE
estimated_theta <- model_data(
matrix=exageostat_data$m,
x=exageostat_data$x,
y=exageostat_data$y,
kernel=kernel, dts=dts,
dimension=dimension,
lb=lower_bound,
ub=upper_bound,
mle_itr=opt_itrs)

# Perform spatial prediction using the estimated parameters
test_x <- c(0.2, 0.330)
test_y <- c(0.104, 0.14)
predict_data(
train_data=list(x=exageostat_data$x, y=exageostat_data$y, exageostat_data$m),
test_data=list(test_x, test_y),
kernel=kernel,
dts=dts,
estimated_theta=estimated_theta)
```

These two examples walk through initializing hardware, simulating spatial data, estimating model parameters, and making predictions using **ExaGeoStatCPP** in R.

> **Note:** Please take a look at the end-to-end examples in the `examples/` directory as a reference for using all the operations.

Expand Down
13 changes: 6 additions & 7 deletions cmake/ImportNLOPT.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,12 @@
# Configuration settings for the integration of the NLOPT library
# 'name' is assigned to "NLOPT", serving as the identifier for this library within the script.
set(name "NLOPT")
# 'tag' defines "v2.7.1" as the version tag of NLOPT, indicating the specific release to be utilized.
set(tag "v2.7.1")
# 'version' specifies "2.7.1" as the version of the NLOPT library, ensuring compatibility with the project's requirements.
set(version "2.7.1")
# 'flag' is intended for additional configuration options during the build process. Disable Python and SWIG to avoid Python compatibility issues.
set(flag "-DNLOPT_PYTHON=OFF -DNLOPT_SWIG=OFF")
# 'tag' defines "v2.8.0" as the version tag of NLOPT, indicating the specific release to be utilized.
set(tag "v2.8.0")
# 'version' specifies "2.8.0" as the version of the NLOPT library, ensuring compatibility with the project's requirements (Python 3.13+ compatible).
set(version "2.8.0")
# 'flag' is intended for additional configuration options during the build process. Disable ALL language bindings to avoid Python compatibility issues.
set(flag -DNLOPT_PYTHON=OFF \-DNLOPT_SWIG=OFF \-DNLOPT_OCTAVE=OFF \-DNLOPT_MATLAB=OFF \-DNLOPT_GUILE=OFF)
# 'is_cmake' indicates that NLOPT uses CMake for its build system, which is set to ON.
set(is_cmake ON)
# 'is_git' denotes that the NLOPT source code is hosted in a Git repository, which is set to ON.
Expand All @@ -35,4 +35,3 @@ ImportDependency(${name} ${tag} ${version} ${url} "${flag}" "" ${is_cmake} ${is_

# A status message is outputted to indicate the successful integration of the NLOPT library into the project.
message(STATUS "${name} done")

5 changes: 4 additions & 1 deletion cmake/toolchains/CudaToolchain.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,10 @@ set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)

# Set the CUDA architectures to be targeted
set(CUDA_ARCHITECTURES "35;50;72")
# Note: CMAKE_CUDA_ARCHITECTURES should be set before enable_language(CUDA) in main CMakeLists.txt
if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
set(CMAKE_CUDA_ARCHITECTURES "35;50;72")
endif()

# Find the CUDA toolkit
find_package(CUDAToolkit REQUIRED)
Expand Down
2 changes: 1 addition & 1 deletion configurations/config.json
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
"FileLogPath": "logs.log",
"FileLogName": "logs.log",
"DistanceMetric": "euclidean",
"MaxMleIterations": "1",
"MaxMleIterations": "1000",
"Accuracy": "0",
"Tolerance": "1",
"ZMiss": "1",
Expand Down
95 changes: 58 additions & 37 deletions src/configurations/Configurations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,13 +57,9 @@ Configurations::Configurations() {
SetAccuracy(0);
SetIsNonGaussian(false);
mIsThetaInit = false;

#if !DEFAULT_RUNTIME
// Set default values for Hicma-Parsec params
#if !DEFAULT_RUNTIME
// Set default values for PaRSEC runtime params
SetTolerance(0);
//TODO:currently,we support real data only in parsec.In the future,we should support synthetic and real data for both runtimes
SetIsSynthetic(false);
SetMeanTrendRemoval(false);
#endif
}

Expand Down Expand Up @@ -93,6 +89,9 @@ void Configurations::ValidateConfiguration() {
if (!GetDataPath().empty()) {
SetIsSynthetic(false);
}
if (GetMeanTrendRemoval()) {
SetIsSynthetic(false);
}

if (GetIsMSPE() || GetIsMLOEMMOM() || GetIsIDW()) {
if (GetUnknownObservationsNb() <= 1) {
Expand All @@ -101,8 +100,9 @@ void Configurations::ValidateConfiguration() {
}
}

// Auto-enable logging if log path is provided
if (!GetLoggerPath().empty() && !GetLogger()) {
throw domain_error("To enable logging, please utilize the '--log' option in order to specify a log file.");
SetLogger(true);
}

if (GetUnknownObservationsNb() >= GetProblemSize()) {
Expand All @@ -116,6 +116,10 @@ void Configurations::ValidateConfiguration() {
}

if (GetMeanTrendRemoval()) {
if (GetDataPath().empty()) {
throw domain_error("You need to set the data path (--datapath) for Mean Trend Removal");
}

if (GetResultsPath().empty()) {
throw domain_error("You need to set the results path (--resultspath) before starting");
}
Expand All @@ -130,29 +134,44 @@ void Configurations::ValidateConfiguration() {
}

#if DEFAULT_RUNTIME
// Throw Errors if any of these arguments aren't given by the user.
// StarPU runtime: kernel always required
if (GetKernelName().empty()) {
throw domain_error("You need to set the Kernel, before starting");
}
if (GetMaxRank() == -1) {
SetMaxRank(1);
}
//#else
#else
// PaRSEC runtime: kernel required for synthetic data or Mean Trend Removal
if (GetKernelName().empty() && (GetIsSynthetic() || GetMeanTrendRemoval())) {
throw domain_error("You need to set the Kernel, before starting");
}
if(GetMaxRank() == -1){
SetMaxRank(GetDenseTileSize() / 2);
}
if (mDictionary.find("tolerance") == mDictionary.end()) {
SetTolerance(8);
}
if (GetDataPath().empty()) {
// Only require data path when NOT generating synthetic data
if (GetDataPath().empty() && !GetIsSynthetic()) {
throw domain_error("You need to set the data path, before starting");
}
#else
// PaRSEC runtime validations
if(GetMeanTrendRemoval() && GetKernelName().empty()){
throw domain_error("You need to set the Kernel for Mean Trend Removal, before starting");
}
// Climate Emulator requires data path for loading NetCDF files
if(GetIsClimateEmulator() && GetDataPath().empty()){
throw domain_error("You need to set the data path (--datapath) for Climate Emulator");
}
#endif

// Both runtimes: data_path required if not synthetic OR if Mean Trend Removal
if ((!GetIsSynthetic() || GetMeanTrendRemoval()) && GetDataPath().empty()) {
throw domain_error("You need to set the data path (use --data_path), before starting");
}

size_t found = GetKernelName().find("NonGaussian");
// Check if the substring was found
if (found != std::string::npos) {
Expand Down Expand Up @@ -217,51 +236,53 @@ void Configurations::PrintUsage() {
LOGGER("\n\t*** Available Arguments For ExaGeoStat Configurations ***")
LOGGER("--N=value : Problem size.")
LOGGER("--kernel=value : Used Kernel.")
LOGGER("--dimension=value : Used Dimension.")
LOGGER("--dimension=value : Used Dimension (2D, 3D, ST).")
LOGGER("--p=value : Used P-Grid.")
LOGGER("--q=value : Used P-Grid.")
LOGGER("--q=value : Used Q-Grid.")
LOGGER("--time_slot=value : Time slot value for ST.")
LOGGER("--computation=value : Used computation.")
LOGGER("--precision=value : Used precision.")
LOGGER("--precision=value : Used precision (single/double/mixed).")
LOGGER("--cores=value : Used to set the number of cores.")
LOGGER("--gpus=value : Used to set the number of GPUs.")
LOGGER("--dts=value : Used to set the Dense Tile size.")
LOGGER("--lts=value : Used to set the Low Tile size.")
LOGGER("--band=value : Used to set the Tile diagonal thickness.")
LOGGER("--band=value : Used to set the Tile diagonal thickness for TLR. Used with Chameleon/StarPU runtime.")
LOGGER("--Zmiss=value : Used to set number of unknown observation to be predicted.")
LOGGER("--observations_file=PATH/TO/File : Used to pass the observations file path.")
LOGGER("--max_rank=value : Used to the max rank value.")
LOGGER("--initial_theta=value : Initial theta parameters for optimization.")
LOGGER("--estimated_theta=value : Estimated kernel parameters for optimization.")
LOGGER("--seed=value : Seed value for random number generation.")
LOGGER("--verbose=value : Run mode whether quiet/standard/detailed.")
LOGGER("--log_path=value : Path to log file.")
LOGGER("--log=true/false : Enable logging to file (default: false).")
LOGGER("--logpath=PATH : Directory path for log and output files.")
LOGGER("--distance_metric=value : Used distance metric either eg or gcd.")
LOGGER("--max_mle_iterations=value : Maximum number of MLE iterations.")
LOGGER("--tolerance : MLE tolerance between two iterations.")
LOGGER("--data_path : Used to enter the path to the real data file.")
LOGGER("--mspe: Used to enable mean square prediction error.")
LOGGER("--fisher: Used to enable fisher tile prediction function.")
LOGGER("--idw: Used to IDW prediction auxiliary function.")
LOGGER("--mloe-mmom: Used to enable MLOE MMOM.")
LOGGER("--OOC : Used to enable Out of core technology.")
LOGGER("--approximation_mode : Used to enable Approximation mode.")
LOGGER("--log : Enable logging.")
LOGGER("--accuracy : Used to set the accuracy when using tlr.")
LOGGER("--max_mle_iterations=value : Maximum number of MLE iterations (default: 1000).")
LOGGER("--tolerance=value : MLE tolerance between two iterations.")
LOGGER("--datapath=PATH : Path to input data file. Format depends on mode:")
LOGGER(" - MLE/Modeling: CSV file (X,Y,Z format) with unique spatial locations")
LOGGER(" - Emulator (Mean Trend Removal): Directory with NetCDF files (longitude, latitude, timestep)")
LOGGER(" - Emulator (Climate): Directory with z_*.csv files from Mean Trend Removal output")
LOGGER("--mspe=true/false : (Used in prediction) Enable mean square prediction error computation.")
LOGGER("--fisher=true/false : (Used in prediction) Enable Fisher information matrix computation.")
LOGGER("--idw=true/false : (Used in prediction) Enable IDW (Inverse Distance Weighting) prediction.")
LOGGER("--mloe-mmom=true/false : (Used in prediction) Enable MLOE-MMOM auxiliary function.")
LOGGER("--OOC=true/false : Enable Out-of-Core technology.")
LOGGER("--approximation_mode=value : Enable Approximation mode (1=enabled, 0=disabled).")
LOGGER("--accuracy : Used to set the accuracy when using tlr. e.g. --accuracy=10 for 1e-10.")
LOGGER("--band_dense=value : Used to set the dense band double precision, Used with PaRSEC runtime only.")
LOGGER("--objects_number=value : Used to set the number of objects (number of viruses within a population), Used with PaRSEC runtime only.")
LOGGER("--adaptive_decision=value : Used to set the adaptive decision of each tile's format using norm approach, if enabled, otherwise 0, Used with PaRSEC runtime only.")
LOGGER("--add_diagonal=value : Used to add this number to diagonal elements to make the matrix positive definite in electrodynamics problem, Used with PaRSEC runtime only.")
LOGGER("--add_diagonal=value : Used to add this number to diagonal elements to make the matrix positive definite, Used with PaRSEC runtime only.")
LOGGER("--file_time_slot=value : Used to set time slot per file, Used with PaRSEC runtime only.")
LOGGER("--file_number=value : Used to set file number, Used with PaRSEC runtime only.")
LOGGER("--enable-inverse : Used to enable inverse spherical harmonics transform, Used with PaRSEC runtime only.")
LOGGER("--mpiio : Used to enable MPI IO, Used with PaRSEC runtime only.")
LOGGER("--log-file-path: Used to set path of file where events and results are logged.")
LOGGER("--start-year=value : Used to set the starting year for NetCDF data processing (MeanTrendRemoval).")
LOGGER("--end-year=value : Used to set the ending year for NetCDF data processing (MeanTrendRemoval).")
LOGGER("--lat=value : Used to set the latitude band index for MeanTrendRemoval climate data processing (required for MeanTrendRemoval).")
LOGGER("--lon=value : Used to set the longitude count for MeanTrendRemoval climate data processing (required for MeanTrendRemoval).")
LOGGER("--resultspath=PATH : Used to set the output directory path for MeanTrendRemoval results (required for MeanTrendRemoval).")
LOGGER("--enable-inverse=true/false : Used to enable inverse spherical harmonics transform, Used with PaRSEC runtime only.")
LOGGER("--mpiio=true/false : Used to enable MPI IO, Used with PaRSEC runtime only.")
LOGGER("--start-year=value : (Emulator only) Starting year for NetCDF data processing.")
LOGGER("--end-year=value : (Emulator only) Ending year for NetCDF data processing.")
LOGGER("--lat=value : (Emulator only) Latitude band index for climate data processing (required).")
LOGGER("--lon=value : (Emulator only) Longitude count for climate data processing (required).")
LOGGER("--meantrendremoval=true/false : (Emulator only) Enable Mean Trend Removal pipeline.")
LOGGER("--resultspath=PATH : (Emulator only) Output directory path for Mean Trend Removal results (required).")
LOGGER("\n\n")

exit(0);
Expand Down Expand Up @@ -302,10 +323,10 @@ void Configurations::PrintSummary() {
#if DEFAULT_RUNTIME
if (this->GetIsSynthetic()) {
LOGGER("#Synthetic Data generation")
LOGGER("#Number of Locations: " << this->GetProblemSize())
} else {
LOGGER("#Real Data loader")
}
LOGGER("#Number of Locations: " << this->GetProblemSize())
LOGGER("#Threads per node: " << this->GetCoresNumber())
LOGGER("#GPUs: " << this->GetGPUsNumbers())
if (this->GetPrecision() == 1) {
Expand Down
7 changes: 7 additions & 0 deletions src/configurations/Parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include <nlohmann/json.hpp>

#include <configurations/Parser.hpp>
#include <configurations/Configurations.hpp>

using namespace std;
using namespace exageostat::configurations::parser;
Expand All @@ -32,6 +33,12 @@ void Parser::ParseCLI(const int &aArgC, char **apArgV, unordered_map<string, any

for (int i = 1; i < aArgC; ++i) {
argument = apArgV[i];

// Check for help flag before processing
if (argument == "--help" || argument == "-h") {
exageostat::configurations::Configurations::PrintUsage();
}

argument = argument.substr(2);
equal_sign_Idx = static_cast<int>(argument.find('='));
argument_name = argument.substr(0, equal_sign_Idx);
Expand Down
1 change: 1 addition & 0 deletions src/data-loader/concrete/CSVLoader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ void CSVLoader<T>::ReadData(Configurations &aConfigurations, vector<T> &aMeasure

file.close();
LOGGER("\tData is read from " << data_path << " successfully.")
LOGGER("\tNumber of Locations: " << aConfigurations.GetProblemSize())
}

template<typename T>
Expand Down