diff --git a/include/Conversion/ConversionPasses.h b/include/Conversion/ConversionPasses.h index 14e27a03..a8380d07 100644 --- a/include/Conversion/ConversionPasses.h +++ b/include/Conversion/ConversionPasses.h @@ -21,6 +21,7 @@ std::unique_ptr createLowerBuiltinToNeuraPass(); std::unique_ptr createLowerAffineToNeuraPass(); // TaskFlow Conversion Passes. +std::unique_ptr createAssignTaskTargetPass(); std::unique_ptr createConvertAffineToTaskflowPass(); std::unique_ptr createConvertTaskflowToNeuraPass(); #define GEN_PASS_REGISTRATION diff --git a/include/Conversion/ConversionPasses.td b/include/Conversion/ConversionPasses.td index e2d727d2..af6f1984 100644 --- a/include/Conversion/ConversionPasses.td +++ b/include/Conversion/ConversionPasses.td @@ -48,6 +48,29 @@ def LowerAffineToNeura : Pass<"lower-affine-to-neura", "func::FuncOp">{ // TaskFlow Conversion Passes. //=========================================================// +def AssignTaskTarget : Pass<"assign-task-target", "ModuleOp">{ + let summary = "Assign hardware targets to compute tasks (functions)"; + let description = [{ + This pass assigns hardware target attributes (target.device) to functions + based on their names. It enables heterogeneous workload partitioning across + different hardware units such as CPU, CGRA, and DOE. + + The pass applies simple pattern matching rules: + - Functions containing "ray_sampler" or "sampler" -> CPU + - Functions containing "hash_encoder" or "encoder" -> DOE + - Functions containing "nerf_mlp" or "mlp" -> CGRA + - Top-level orchestrator functions (e.g., "nerf_forward") -> CPU + - Default -> CPU + + Example output: + func.func @ray_sampler_func(...) attributes {target.device = "cpu"} { ... } + func.func @hash_encoder_func(...) attributes {target.device = "doe"} { ... } + func.func @nerf_mlp_func(...) attributes {target.device = "cgra"} { ... } + }]; + let constructor = "mlir::createAssignTaskTargetPass()"; + let dependentDialects = ["mlir::func::FuncDialect"]; +} + def ConvertAffineToTaskflow : Pass<"convert-affine-to-taskflow", "ModuleOp">{ let summary = "Convert top-level affine.for operations to Taskflow dialect"; let description = [{ diff --git a/lib/Conversion/AssignTaskTarget/AssignTaskTargetPass.cpp b/lib/Conversion/AssignTaskTarget/AssignTaskTargetPass.cpp new file mode 100644 index 00000000..66ebfc06 --- /dev/null +++ b/lib/Conversion/AssignTaskTarget/AssignTaskTargetPass.cpp @@ -0,0 +1,133 @@ +//===- AssignTaskTargetPass.cpp - Assign hardware targets to tasks --------===// +// +// This pass assigns hardware target attributes to compute tasks (functions) +// based on task names. It helps partition the workload across different +// hardware units (CPU, CGRA, DOE, etc.) in heterogeneous computing systems. +// +//===----------------------------------------------------------------------===// + +#include "Conversion/ConversionPasses.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/IR/Builders.h" +#include "mlir/IR/BuiltinOps.h" +#include "mlir/Pass/Pass.h" +#include "llvm/Support/raw_ostream.h" + +using namespace mlir; + +namespace { + +//===----------------------------------------------------------------------===// +// Helper Functions +//===----------------------------------------------------------------------===// + +/// Determines the hardware target for a given function based on its name. +/// This function implements a simple pattern-matching strategy: +/// - ray_sampler* -> CPU +/// - hash_encoder* -> DOE +/// - nerf_mlp* -> CGRA +/// - nerf_forward (top-level) -> CPU +/// - default -> CPU +static StringRef matchHardwareTarget(StringRef funcName) { + // Top-level function: runs on CPU as coordinator + if (funcName == "nerf_forward") { + return "cpu"; + } + + // Pattern matching for compute tasks + if (funcName.contains("ray_sampler") || funcName.contains("sampler")) { + return "cpu"; + } + + if (funcName.contains("hash_encoder") || funcName.contains("encoder")) { + return "doe"; + } + + if (funcName.contains("nerf_mlp") || funcName.contains("mlp")) { + return "cgra"; + } + + // Default target + return "cpu"; +} + +//===----------------------------------------------------------------------===// +// AssignTaskTarget Pass +//===----------------------------------------------------------------------===// + +struct AssignTaskTargetPass + : public PassWrapper> { + + MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(AssignTaskTargetPass) + + StringRef getArgument() const final { return "assign-task-target"; } + + StringRef getDescription() const final { + return "Assign hardware targets to compute tasks (functions) based on " + "task names"; + } + + void runOnOperation() override { + ModuleOp module = getOperation(); + OpBuilder builder(&getContext()); + + // Statistics + unsigned totalFuncs = 0; + unsigned assignedFuncs = 0; + llvm::DenseMap targetStats; + + llvm::errs() << "\n"; + llvm::errs() << "========================================\n"; + llvm::errs() << "AssignTaskTarget Pass\n"; + llvm::errs() << "========================================\n\n"; + + // Walk through all functions in the module + module.walk([&](func::FuncOp funcOp) { + totalFuncs++; + StringRef funcName = funcOp.getName(); + + // Determine hardware target based on function name + StringRef target = matchHardwareTarget(funcName); + + // Set the target.device attribute + funcOp->setAttr("target.device", builder.getStringAttr(target)); + + assignedFuncs++; + targetStats[target]++; + + llvm::errs() << " [ASSIGN] " << funcName << " -> " << target << "\n"; + }); + + // Print summary + llvm::errs() << "\n"; + llvm::errs() << "========================================\n"; + llvm::errs() << "Summary\n"; + llvm::errs() << "========================================\n"; + llvm::errs() << "Total functions: " << totalFuncs << "\n"; + llvm::errs() << "Assigned functions: " << assignedFuncs << "\n"; + + if (!targetStats.empty()) { + llvm::errs() << "\nTarget distribution:\n"; + for (auto &entry : targetStats) { + llvm::errs() << " " << entry.first << ": " << entry.second + << " function(s)\n"; + } + } + + llvm::errs() << "========================================\n\n"; + } +}; + +} // namespace + +//===----------------------------------------------------------------------===// +// Pass Registration +//===----------------------------------------------------------------------===// + +namespace mlir { + +std::unique_ptr createAssignTaskTargetPass() { + return std::make_unique(); +} + +} // namespace mlir diff --git a/lib/Conversion/AssignTaskTarget/CMakeLists.txt b/lib/Conversion/AssignTaskTarget/CMakeLists.txt new file mode 100644 index 00000000..44c02c8c --- /dev/null +++ b/lib/Conversion/AssignTaskTarget/CMakeLists.txt @@ -0,0 +1,14 @@ +include_directories(${CMAKE_CURRENT_BINARY_DIR}) + +add_mlir_conversion_library(MLIRAssignTaskTargetPass + AssignTaskTargetPass.cpp + + DEPENDS + MLIRConversionIncGen + + LINK_LIBS PUBLIC + MLIRFuncDialect + MLIRIR + MLIRPass + MLIRSupport +) diff --git a/lib/Conversion/CMakeLists.txt b/lib/Conversion/CMakeLists.txt index 690dae25..3bf336e1 100644 --- a/lib/Conversion/CMakeLists.txt +++ b/lib/Conversion/CMakeLists.txt @@ -5,6 +5,7 @@ add_subdirectory(AffineToNeura) add_subdirectory(LlvmToNeura) add_subdirectory(MemRefToNeura) add_subdirectory(BuiltinToNeura) +add_subdirectory(AssignTaskTarget) add_subdirectory(AffineToTaskflow) add_subdirectory(TaskflowToNeura) @@ -23,6 +24,7 @@ target_link_libraries(MLIRConversion INTERFACE MLIRNeuraLlvmToNeuraPass MLIRNeuraMemRefToNeuraPass MLIRNeuraBuiltinToNeuraPass + MLIRAssignTaskTargetPass MLIRAffineToTaskflowPass MLIRTaskflowToNeuraPass ${dialect_libs} diff --git a/lib/TaskflowDialect/Transforms/CMakeLists.txt b/lib/TaskflowDialect/Transforms/CMakeLists.txt index e44401d8..afe4c8eb 100644 --- a/lib/TaskflowDialect/Transforms/CMakeLists.txt +++ b/lib/TaskflowDialect/Transforms/CMakeLists.txt @@ -3,6 +3,7 @@ get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS) add_mlir_library(MLIRTaskflowTransforms ConstructHyperblockFromTaskPass.cpp CanonicalizeTaskPass.cpp + PartitionTaskByTarget.cpp ClassifyCountersPass.cpp DEPENDS diff --git a/lib/TaskflowDialect/Transforms/PartitionTaskByTarget.cpp b/lib/TaskflowDialect/Transforms/PartitionTaskByTarget.cpp new file mode 100644 index 00000000..0603303a --- /dev/null +++ b/lib/TaskflowDialect/Transforms/PartitionTaskByTarget.cpp @@ -0,0 +1,208 @@ +///===- PartitionTaskByTarget.cpp - Partition tasks by hardware target --===// +// +// This pass analyzes taskflow.channel operations and annotates cross-boundary +// channels (channels connecting tasks on different hardware targets). +// +//===----------------------------------------------------------------------===// + +#include "TaskflowDialect/TaskflowOps.h" +#include "TaskflowDialect/TaskflowPasses.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/IR/Builders.h" +#include "mlir/IR/BuiltinOps.h" +#include "mlir/Pass/Pass.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/raw_ostream.h" + +using namespace mlir; +using namespace mlir::taskflow; + +namespace { + +//===----------------------------------------------------------------------===// +// Helper Functions +//===----------------------------------------------------------------------===// + +/// Get the target hardware of a task operation +static StringRef getTaskTarget(TaskflowTaskOp taskOp) { + if (auto targetAttr = taskOp->getAttrOfType("target")) { + return targetAttr.getValue(); + } + return "CPU"; // Default target +} + +/// Check if a value is produced by a TaskflowTaskOp +static TaskflowTaskOp getProducerTask(Value value) { + if (auto taskOp = value.getDefiningOp()) { + return taskOp; + } + // Handle block arguments (function parameters) + return nullptr; +} + +/// Get all consumer tasks of a value +static void getConsumerTasks(Value value, + SmallVectorImpl &consumers) { + for (OpOperand &use : value.getUses()) { + Operation *owner = use.getOwner(); + + // Direct consumer + if (auto taskOp = dyn_cast(owner)) { + consumers.push_back(taskOp); + } + // Through channel + else if (auto channelOp = dyn_cast(owner)) { + getConsumerTasks(channelOp.getTarget(), consumers); + } + } +} + +//===----------------------------------------------------------------------===// +// PartitionTaskByTarget Pass +//===----------------------------------------------------------------------===// + +struct PartitionTaskByTargetPass + : public PassWrapper> { + + MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(PartitionTaskByTargetPass) + + StringRef getArgument() const final { return "partition-taskflow-by-target"; } + + StringRef getDescription() const final { + return "Annotate cross-boundary channels in taskflow graph"; + } + + void runOnOperation() override { + func::FuncOp func = getOperation(); + OpBuilder builder(&getContext()); + + // Statistics + unsigned totalChannels = 0; + unsigned crossBoundaryChannels = 0; + DenseMap, unsigned> transferStats; + + llvm::errs() << "\n"; + llvm::errs() << "========================================\n"; + llvm::errs() << "PartitionTaskByTarget Pass\n"; + llvm::errs() << "========================================\n"; + llvm::errs() << "Function: " << func.getName() << "\n\n"; + + // Step 1: Collect all tasks and their targets + SmallVector tasks; + func.walk([&](TaskflowTaskOp taskOp) { + tasks.push_back(taskOp); + StringRef target = getTaskTarget(taskOp); + llvm::errs() << " Task: " << taskOp.getTaskName() + << " -> " << target << "\n"; + }); + + llvm::errs() << "\nTotal tasks: " << tasks.size() << "\n\n"; + + // Step 2: Process all channels + llvm::errs() << "Analyzing channels:\n"; + llvm::errs() << "----------------------------------------\n"; + + func.walk([&](TaskflowChannelOp channelOp) { + totalChannels++; + + Value source = channelOp.getSource(); + + // Get producer task + TaskflowTaskOp producerTask = getProducerTask(source); + if (!producerTask) { + llvm::errs() << " Channel #" << totalChannels + << ": skipped (no producer task)\n"; + return; + } + + StringRef producerTarget = getTaskTarget(producerTask); + + // Get consumer tasks + SmallVector consumerTasks; + getConsumerTasks(channelOp.getTarget(), consumerTasks); + + if (consumerTasks.empty()) { + llvm::errs() << " Channel #" << totalChannels + << ": " << producerTarget + << " -> (no consumers)\n"; + return; + } + + // Check all consumers + bool isCrossBoundary = false; + StringRef consumerTarget; + + for (auto consumerTask : consumerTasks) { + consumerTarget = getTaskTarget(consumerTask); + + if (producerTarget != consumerTarget) { + isCrossBoundary = true; + + // Annotate the channel + channelOp->setAttr("cross_boundary", + builder.getUnitAttr()); + channelOp->setAttr("from", + builder.getStringAttr(producerTarget)); + channelOp->setAttr("to", + builder.getStringAttr(consumerTarget)); + + crossBoundaryChannels++; + transferStats[{producerTarget, consumerTarget}]++; + + llvm::errs() << " Channel #" << totalChannels << ": " + << producerTask.getTaskName() << " (" << producerTarget + << ") -> " + << consumerTask.getTaskName() << " (" << consumerTarget + << ") [CROSS-BOUNDARY]\n"; + + break; // Only need to annotate once + } + } + + if (!isCrossBoundary) { + llvm::errs() << " Channel #" << totalChannels << ": " + << producerTarget << " -> " << producerTarget + << " [same target]\n"; + } + }); + + // Step 3: Print summary + llvm::errs() << "\n"; + llvm::errs() << "========================================\n"; + llvm::errs() << "Summary\n"; + llvm::errs() << "========================================\n"; + llvm::errs() << "Total channels: " << totalChannels << "\n"; + llvm::errs() << "Cross-boundary channels: " << crossBoundaryChannels << "\n"; + llvm::errs() << "Same-target channels: " + << (totalChannels - crossBoundaryChannels) << "\n"; + + if (!transferStats.empty()) { + llvm::errs() << "\nCross-boundary transfer breakdown:\n"; + for (auto &entry : transferStats) { + llvm::errs() << " " << entry.first.first << " -> " + << entry.first.second << ": " + << entry.second << " transfer(s)\n"; + } + } + + llvm::errs() << "========================================\n\n"; + } +}; + +} // namespace + +//===----------------------------------------------------------------------===// +// Pass Registration +//===----------------------------------------------------------------------===// + +namespace mlir { +namespace taskflow { + +std::unique_ptr createPartitionTaskByTargetPass() { + return std::make_unique(); +} + +} // namespace taskflow +} // namespace mlir diff --git a/test/Conversion/AssignTaskTarget/assign-task-target.mlir b/test/Conversion/AssignTaskTarget/assign-task-target.mlir new file mode 100644 index 00000000..32d2e848 --- /dev/null +++ b/test/Conversion/AssignTaskTarget/assign-task-target.mlir @@ -0,0 +1,73 @@ +// RUN: mlir-neura-opt %s --assign-task-target -o %S/Output/assign-task-target.mlir.tmp +// RUN: mlir-neura-opt %s --assign-task-target | FileCheck %s + +// Test the AssignTaskTarget pass with NeRF modular functions + +module { + // CHECK-LABEL: func.func @ray_sampler_func + // CHECK-SAME: attributes {target.device = "cpu"} + func.func @ray_sampler_func(%arg0: tensor<2x3xf32>, %arg1: tensor<2x3xf32>) + -> tensor<2x16x3xf32> { + %0 = arith.constant 0.0 : f32 + %1 = tensor.empty() : tensor<2x16x3xf32> + return %1 : tensor<2x16x3xf32> + } + + // CHECK-LABEL: func.func @hash_encoder_func + // CHECK-SAME: attributes {target.device = "doe"} + func.func @hash_encoder_func(%arg0: tensor<2x16x3xf32>) + -> tensor<2x16x4xf32> { + %0 = tensor.empty() : tensor<2x16x4xf32> + return %0 : tensor<2x16x4xf32> + } + + // CHECK-LABEL: func.func @nerf_mlp_func + // CHECK-SAME: attributes {target.device = "cgra"} + func.func @nerf_mlp_func(%arg0: tensor<2x16x4xf32>, %arg1: tensor<2x3xf32>) + -> (tensor<2x16x1xf32>, tensor<2x16x3xf32>) { + %0 = tensor.empty() : tensor<2x16x1xf32> + %1 = tensor.empty() : tensor<2x16x3xf32> + return %0, %1 : tensor<2x16x1xf32>, tensor<2x16x3xf32> + } + + // CHECK-LABEL: func.func @nerf_forward + // CHECK-SAME: attributes {target.device = "cpu"} + func.func @nerf_forward(%arg0: tensor<2x3xf32>, %arg1: tensor<2x3xf32>) + -> (tensor<2x16x1xf32>, tensor<2x16x3xf32>) { + %positions = func.call @ray_sampler_func(%arg0, %arg1) + : (tensor<2x3xf32>, tensor<2x3xf32>) -> tensor<2x16x3xf32> + + %encoded = func.call @hash_encoder_func(%positions) + : (tensor<2x16x3xf32>) -> tensor<2x16x4xf32> + + %density, %rgb = func.call @nerf_mlp_func(%encoded, %arg1) + : (tensor<2x16x4xf32>, tensor<2x3xf32>) + -> (tensor<2x16x1xf32>, tensor<2x16x3xf32>) + + return %density, %rgb : tensor<2x16x1xf32>, tensor<2x16x3xf32> + } + + // CHECK-LABEL: func.func @generic_sampler + // CHECK-SAME: attributes {target.device = "cpu"} + func.func @generic_sampler() { + return + } + + // CHECK-LABEL: func.func @custom_encoder + // CHECK-SAME: attributes {target.device = "doe"} + func.func @custom_encoder() { + return + } + + // CHECK-LABEL: func.func @some_mlp + // CHECK-SAME: attributes {target.device = "cgra"} + func.func @some_mlp() { + return + } + + // CHECK-LABEL: func.func @unknown_function + // CHECK-SAME: attributes {target.device = "cpu"} + func.func @unknown_function() { + return + } +} diff --git a/test/Conversion/AssignTaskTarget/run.log b/test/Conversion/AssignTaskTarget/run.log new file mode 100644 index 00000000..df32c269 --- /dev/null +++ b/test/Conversion/AssignTaskTarget/run.log @@ -0,0 +1,74 @@ +-- Testing: 1 tests, 1 workers -- +PASS: Neura Dialect Tests :: Conversion/AssignTaskTarget/assign-task-target.mlir (1 of 1) +Exit Code: 0 + +Command Output (stderr): +-- +RUN: at line 1: /workspace/dataflow/build/tools/mlir-neura-opt/mlir-neura-opt /workspace/dataflow/test/Conversion/AssignTaskTarget/assign-task-target.mlir --assign-task-target -o /workspace/dataflow/test/Conversion/AssignTaskTarget/Output/assign-task-target.mlir.tmp ++ /workspace/dataflow/build/tools/mlir-neura-opt/mlir-neura-opt /workspace/dataflow/test/Conversion/AssignTaskTarget/assign-task-target.mlir --assign-task-target -o /workspace/dataflow/test/Conversion/AssignTaskTarget/Output/assign-task-target.mlir.tmp +[mlir-neura-opt] No architecture specification file provided, using default configuration + +======================================== +AssignTaskTarget Pass +======================================== + + [ASSIGN] ray_sampler_func -> cpu + [ASSIGN] hash_encoder_func -> doe + [ASSIGN] nerf_mlp_func -> cgra + [ASSIGN] nerf_forward -> cpu + [ASSIGN] generic_sampler -> cpu + [ASSIGN] custom_encoder -> doe + [ASSIGN] some_mlp -> cgra + [ASSIGN] unknown_function -> cpu + +======================================== +Summary +======================================== +Total functions: 8 +Assigned functions: 8 + +Target distribution: + doe: 2 function(s) + cpu: 4 function(s) + cgra: 2 function(s) +======================================== + +RUN: at line 2: /workspace/dataflow/build/tools/mlir-neura-opt/mlir-neura-opt /workspace/dataflow/test/Conversion/AssignTaskTarget/assign-task-target.mlir --assign-task-target | /workspace/llvm-project/build/./bin/FileCheck /workspace/dataflow/test/Conversion/AssignTaskTarget/assign-task-target.mlir ++ /workspace/dataflow/build/tools/mlir-neura-opt/mlir-neura-opt /workspace/dataflow/test/Conversion/AssignTaskTarget/assign-task-target.mlir --assign-task-target ++ /workspace/llvm-project/build/./bin/FileCheck /workspace/dataflow/test/Conversion/AssignTaskTarget/assign-task-target.mlir +[mlir-neura-opt] No architecture specification file provided, using default configuration + +======================================== +AssignTaskTarget Pass +======================================== + + [ASSIGN] ray_sampler_func -> cpu + [ASSIGN] hash_encoder_func -> doe + [ASSIGN] nerf_mlp_func -> cgra + [ASSIGN] nerf_forward -> cpu + [ASSIGN] generic_sampler -> cpu + [ASSIGN] custom_encoder -> doe + [ASSIGN] some_mlp -> cgra + [ASSIGN] unknown_function -> cpu + +======================================== +Summary +======================================== +Total functions: 8 +Assigned functions: 8 + +Target distribution: + doe: 2 function(s) + cpu: 4 function(s) + cgra: 2 function(s) +======================================== + + +-- + +******************** + +Testing Time: 0.22s + +Total Discovered Tests: 1 + Passed: 1 (100.00%) diff --git a/test/benchmark/CGRA-Bench b/test/benchmark/CGRA-Bench index ccc0f9f1..2beecc59 160000 --- a/test/benchmark/CGRA-Bench +++ b/test/benchmark/CGRA-Bench @@ -1 +1 @@ -Subproject commit ccc0f9f100462a83942b8bf06247cca032fb817e +Subproject commit 2beecc599bd268f8665344ba2271f48c97db7aa0 diff --git a/test/e2e/bicg/bicg_int_kernel.mlir b/test/e2e/bicg/bicg_int_kernel.mlir new file mode 100644 index 00000000..32f17705 --- /dev/null +++ b/test/e2e/bicg/bicg_int_kernel.mlir @@ -0,0 +1,359 @@ +// Compile the int BiCG kernel to LLVM IR. +// Use -I %S so local headers are visible if needed. +// RUN: clang -S -emit-llvm -O3 -fno-vectorize -fno-unroll-loops -std=c11 \ +// RUN: -I %S/../../benchmark/CGRA-Bench/kernels/bicg -DSMALL_DATASET \ +// RUN: -o %t-kernel-full.ll %S/../../benchmark/CGRA-Bench/kernels/bicg/bicg_int.c + +// RUN: llvm-extract --rfunc=".*kernel.*" %t-kernel-full.ll -o %t-kernel-only.ll +// RUN: mlir-translate --import-llvm %t-kernel-only.ll -o %t-kernel.mlir + +// Lower and map to the Neura accelerator, then generate code. +// RUN: mlir-neura-opt %t-kernel.mlir \ +// RUN: --assign-accelerator \ +// RUN: --lower-llvm-to-neura \ +// RUN: --promote-func-arg-to-const \ +// RUN: --fold-constant \ +// RUN: --canonicalize-return \ +// RUN: --canonicalize-live-in \ +// RUN: --leverage-predicated-value \ +// RUN: --transform-ctrl-to-data-flow \ +// RUN: --fold-constant \ +// RUN: --insert-data-mov \ +// RUN: --map-to-accelerator="mapping-strategy=heuristic" \ +// RUN: --architecture-spec=%S/../../arch_spec/architecture.yaml \ +// RUN: --generate-code -o %t-mapping.mlir + +// RUN: FileCheck %s --input-file=%t-mapping.mlir --check-prefix=MAPPING +// RUN: FileCheck %s --input-file=tmp-generated-instructions.yaml --check-prefix=YAML +// RUN: FileCheck %s --input-file=tmp-generated-instructions.asm --check-prefix=ASM + +// MAPPING: func.func @kernel_bicg_int(%arg0: !llvm.ptr {llvm.nocapture, llvm.noundef, llvm.readonly}, %arg1: !llvm.ptr {llvm.nocapture, llvm.noundef, llvm.readonly}, %arg2: !llvm.ptr {llvm.nocapture, llvm.noundef, llvm.readonly}, %arg3: !llvm.ptr {llvm.nocapture, llvm.noundef}, %arg4: !llvm.ptr {llvm.nocapture, llvm.noundef}) -> !llvm.void attributes {CConv = #llvm.cconv, accelerator = "neura", dataflow_mode = "predicate", linkage = #llvm.linkage, mapping_info = {compiled_ii = 10 : i32, mapping_mode = "spatial-temporal", mapping_strategy = "heuristic", rec_mii = 9 : i32, res_mii = 3 : i32, x_tiles = 4 : i32, y_tiles = 4 : i32}, memory_effects = #llvm.memory_effects, no_unwind, passthrough = ["nofree", "norecurse", "nosync", ["uwtable", "2"], ["min-legal-vector-width", "0"], ["no-trapping-math", "true"], ["stack-protector-buffer-size", "8"], ["target-cpu", "x86-64"]], target_cpu = "x86-64", target_features = #llvm.target_features<["+cmov", "+cx8", "+fxsr", "+mmx", "+sse", "+sse2", "+x87"]>, tune_cpu = "generic", unnamed_addr = 1 : i64, visibility_ = 0 : i64} { +// MAPPING-NEXT: %0 = "neura.constant"() <{value = "%arg3"}> {dfg_id = 0 : i32, mapping_locs = [{id = 12 : i32, index_per_ii = 8 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 8 : i32, x = 0 : i32, y = 3 : i32}]} : () -> !neura.data +// MAPPING-NEXT: %1 = "neura.constant"() <{value = 0 : i8}> {dfg_id = 1 : i32, mapping_locs = [{id = 5 : i32, index_per_ii = 8 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 8 : i32, x = 1 : i32, y = 1 : i32}]} : () -> !neura.data +// MAPPING-NEXT: %2 = "neura.constant"() <{value = 32 : i64}> {dfg_id = 2 : i32, mapping_locs = [{id = 14 : i32, index_per_ii = 8 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 8 : i32, x = 2 : i32, y = 3 : i32}]} : () -> !neura.data +// MAPPING-NEXT: %3 = "neura.grant_once"() <{constant_value = 0 : i64}> {dfg_id = 3 : i32, mapping_locs = [{id = 0 : i32, index_per_ii = 0 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 0 : i32, x = 0 : i32, y = 0 : i32}]} : () -> !neura.data +// MAPPING-NEXT: %4 = "neura.data_mov"(%0) {dfg_id = 10 : i32, mapping_locs = [{id = 38 : i32, index_per_ii = 8 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 8 : i32}, {id = 416 : i32, index_per_ii = 9 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 9 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %5 = "neura.data_mov"(%1) {dfg_id = 11 : i32, mapping_locs = [{id = 16 : i32, index_per_ii = 8 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 8 : i32}, {id = 30 : i32, index_per_ii = 9 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 9 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %6 = "neura.data_mov"(%2) {dfg_id = 12 : i32, mapping_locs = [{id = 43 : i32, index_per_ii = 8 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 8 : i32}, {id = 417 : i32, index_per_ii = 9 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 9 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: "neura.memset"(%4, %5, %6) <{is_volatile = false}> {dfg_id = 15 : i32, mapping_locs = [{id = 13 : i32, index_per_ii = 0 : i32, invalid_iterations = 1 : i32, resource = "tile", time_step = 10 : i32, x = 1 : i32, y = 3 : i32}]} : (!neura.data, !neura.data, !neura.data) -> () +// MAPPING-NEXT: %7 = neura.reserve {dfg_id = 4 : i32} : !neura.data +// MAPPING-NEXT: %8 = "neura.data_mov"(%3) {dfg_id = 13 : i32, mapping_locs = [{id = 1 : i32, index_per_ii = 0 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 0 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %9 = neura.phi_start %8, %7 {dfg_id = 16 : i32, mapping_locs = [{id = 4 : i32, index_per_ii = 1 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 1 : i32, x = 0 : i32, y = 1 : i32}]} : !neura.data, !neura.data -> !neura.data +// MAPPING-NEXT: %10 = neura.reserve {dfg_id = 5 : i32} : !neura.data +// MAPPING-NEXT: %11 = "neura.data_mov"(%3) {dfg_id = 14 : i32, mapping_locs = [{id = 0 : i32, index_per_ii = 0 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 0 : i32}, {id = 0 : i32, index_per_ii = 1 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 1 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %12 = neura.phi_start %11, %10 {dfg_id = 17 : i32, mapping_locs = [{id = 0 : i32, index_per_ii = 2 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 2 : i32, x = 0 : i32, y = 0 : i32}]} : !neura.data, !neura.data -> !neura.data +// MAPPING-NEXT: %13 = "neura.data_mov"(%12) {dfg_id = 23 : i32, mapping_locs = [{id = 2 : i32, index_per_ii = 2 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 2 : i32, resource = "register", time_step = 2 : i32}, {id = 2 : i32, index_per_ii = 3 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 2 : i32, resource = "register", time_step = 3 : i32}, {id = 2 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 2 : i32, resource = "register", time_step = 4 : i32}, {id = 2 : i32, index_per_ii = 5 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 2 : i32, resource = "register", time_step = 5 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %14 = "neura.gep"(%13) <{operandSegmentSizes = array}> {dfg_id = 29 : i32, lhs_value = "%arg4", mapping_locs = [{id = 0 : i32, index_per_ii = 6 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 6 : i32, x = 0 : i32, y = 0 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %15 = "neura.data_mov"(%14) {dfg_id = 42 : i32, mapping_locs = [{id = 0 : i32, index_per_ii = 6 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 6 : i32}, {id = 0 : i32, index_per_ii = 7 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 7 : i32}, {id = 32 : i32, index_per_ii = 8 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 8 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: "neura.store"(%15) {dfg_id = 49 : i32, lhs_value = 0 : i32, mapping_locs = [{id = 1 : i32, index_per_ii = 9 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 9 : i32, x = 1 : i32, y = 0 : i32}]} : (!neura.data) -> () +// MAPPING-NEXT: %16 = "neura.data_mov"(%12) {dfg_id = 22 : i32, mapping_locs = [{id = 0 : i32, index_per_ii = 2 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 2 : i32}, {id = 32 : i32, index_per_ii = 3 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 3 : i32}, {id = 32 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 4 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %17 = "neura.gep"(%16) <{operandSegmentSizes = array}> {dfg_id = 28 : i32, lhs_value = "%arg2", mapping_locs = [{id = 1 : i32, index_per_ii = 5 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 5 : i32, x = 1 : i32, y = 0 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %18 = "neura.data_mov"(%12) {dfg_id = 21 : i32, mapping_locs = [{id = 0 : i32, index_per_ii = 2 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 2 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %19 = "neura.shl"(%18) {dfg_id = 27 : i32, mapping_locs = [{id = 0 : i32, index_per_ii = 3 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 3 : i32, x = 0 : i32, y = 0 : i32}], rhs_value = 5 : i64} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %20 = "neura.data_mov"(%19) {dfg_id = 38 : i32, mapping_locs = [{id = 0 : i32, index_per_ii = 3 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 3 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %21 = "neura.gep"(%20) <{operandSegmentSizes = array}> {dfg_id = 46 : i32, lhs_value = "%arg0", mapping_locs = [{id = 0 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 4 : i32, x = 0 : i32, y = 0 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %22 = neura.reserve {dfg_id = 6 : i32} : !neura.data +// MAPPING-NEXT: %23 = "neura.data_mov"(%9) {dfg_id = 19 : i32, mapping_locs = [{id = 12 : i32, index_per_ii = 1 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 1 : i32}, {id = 257 : i32, index_per_ii = 2 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 2 : i32}, {id = 257 : i32, index_per_ii = 3 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 3 : i32}, {id = 257 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 4 : i32}, {id = 257 : i32, index_per_ii = 5 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 5 : i32}, {id = 257 : i32, index_per_ii = 6 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 6 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %24 = neura.phi_start %23, %22 {dfg_id = 25 : i32, mapping_locs = [{id = 8 : i32, index_per_ii = 7 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 7 : i32, x = 0 : i32, y = 2 : i32}]} : !neura.data, !neura.data -> !neura.data +// MAPPING-NEXT: %25 = neura.reserve {dfg_id = 7 : i32} : !neura.data +// MAPPING-NEXT: %26 = "neura.data_mov"(%12) {dfg_id = 20 : i32, mapping_locs = [{id = 1 : i32, index_per_ii = 2 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 2 : i32}, {id = 128 : i32, index_per_ii = 3 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 3 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %27 = neura.phi_start %26, %25 {dfg_id = 26 : i32, mapping_locs = [{id = 4 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 4 : i32, x = 0 : i32, y = 1 : i32}]} : !neura.data, !neura.data -> !neura.data +// MAPPING-NEXT: %28 = neura.reserve {dfg_id = 8 : i32} : !neura.data +// MAPPING-NEXT: %29 = "neura.data_mov"(%9) {dfg_id = 18 : i32, mapping_locs = [{id = 128 : i32, index_per_ii = 1 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 1 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %30 = neura.phi_start %29, %28 {dfg_id = 24 : i32, mapping_locs = [{id = 4 : i32, index_per_ii = 2 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 2 : i32, x = 0 : i32, y = 1 : i32}]} : !neura.data, !neura.data -> !neura.data +// MAPPING-NEXT: %31 = "neura.data_mov"(%21) {dfg_id = 55 : i32, mapping_locs = [{id = 0 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 4 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %32 = "neura.data_mov"(%30) {dfg_id = 33 : i32, mapping_locs = [{id = 11 : i32, index_per_ii = 2 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 2 : i32}, {id = 1 : i32, index_per_ii = 3 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 3 : i32}, {id = 1 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 4 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %33 = "neura.gep"(%31, %32) <{operandSegmentSizes = array}> {dfg_id = 61 : i32, mapping_locs = [{id = 0 : i32, index_per_ii = 5 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 5 : i32, x = 0 : i32, y = 0 : i32}]} : (!neura.data, !neura.data) -> !neura.data +// MAPPING-NEXT: %34 = "neura.data_mov"(%33) {dfg_id = 67 : i32, mapping_locs = [{id = 0 : i32, index_per_ii = 5 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 5 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %35 = "neura.load"(%34) {dfg_id = 71 : i32, mapping_locs = [{id = 1 : i32, index_per_ii = 6 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 6 : i32, x = 1 : i32, y = 0 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %36 = "neura.data_mov"(%17) {dfg_id = 39 : i32, mapping_locs = [{id = 3 : i32, index_per_ii = 5 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 5 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %37 = "neura.load"(%36) {dfg_id = 47 : i32, mapping_locs = [{id = 2 : i32, index_per_ii = 6 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 6 : i32, x = 2 : i32, y = 0 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %38 = "neura.data_mov"(%37) {dfg_id = 56 : i32, mapping_locs = [{id = 5 : i32, index_per_ii = 6 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 6 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %39 = "neura.data_mov"(%35) {dfg_id = 78 : i32, mapping_locs = [{id = 32 : i32, index_per_ii = 6 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 6 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %40 = "neura.mul"(%38, %39) {dfg_id = 84 : i32, mapping_locs = [{id = 1 : i32, index_per_ii = 7 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 7 : i32, x = 1 : i32, y = 0 : i32}]} : (!neura.data, !neura.data) -> !neura.data +// MAPPING-NEXT: %41 = "neura.data_mov"(%30) {dfg_id = 32 : i32, mapping_locs = [{id = 129 : i32, index_per_ii = 2 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 2 : i32}, {id = 129 : i32, index_per_ii = 3 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 3 : i32}, {id = 129 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 4 : i32}, {id = 129 : i32, index_per_ii = 5 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 5 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %42 = "neura.gep"(%41) <{operandSegmentSizes = array}> {dfg_id = 45 : i32, lhs_value = "%arg3", mapping_locs = [{id = 4 : i32, index_per_ii = 6 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 6 : i32, x = 0 : i32, y = 1 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %43 = "neura.data_mov"(%42) {dfg_id = 54 : i32, mapping_locs = [{id = 128 : i32, index_per_ii = 6 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 6 : i32}, {id = 128 : i32, index_per_ii = 7 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 7 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %44 = "neura.load"(%43) {dfg_id = 60 : i32, mapping_locs = [{id = 4 : i32, index_per_ii = 8 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 8 : i32, x = 0 : i32, y = 1 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %45 = "neura.data_mov"(%44) {dfg_id = 66 : i32, mapping_locs = [{id = 10 : i32, index_per_ii = 8 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 8 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %46 = "neura.data_mov"(%40) {dfg_id = 91 : i32, mapping_locs = [{id = 4 : i32, index_per_ii = 7 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 7 : i32}, {id = 160 : i32, index_per_ii = 8 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 8 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %47 = "neura.add"(%45, %46) {dfg_id = 94 : i32, mapping_locs = [{id = 5 : i32, index_per_ii = 9 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 9 : i32, x = 1 : i32, y = 1 : i32}]} : (!neura.data, !neura.data) -> !neura.data +// MAPPING-NEXT: %48 = "neura.data_mov"(%47) {dfg_id = 99 : i32, mapping_locs = [{id = 162 : i32, index_per_ii = 9 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 2 : i32, resource = "register", time_step = 9 : i32}, {id = 162 : i32, index_per_ii = 0 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 2 : i32, resource = "register", time_step = 10 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %49 = "neura.data_mov"(%42) {dfg_id = 53 : i32, mapping_locs = [{id = 10 : i32, index_per_ii = 6 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 6 : i32}, {id = 163 : i32, index_per_ii = 7 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 3 : i32, resource = "register", time_step = 7 : i32}, {id = 163 : i32, index_per_ii = 8 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 3 : i32, resource = "register", time_step = 8 : i32}, {id = 163 : i32, index_per_ii = 9 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 3 : i32, resource = "register", time_step = 9 : i32}, {id = 163 : i32, index_per_ii = 0 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 3 : i32, resource = "register", time_step = 10 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: "neura.store"(%48, %49) {dfg_id = 103 : i32, mapping_locs = [{id = 5 : i32, index_per_ii = 1 : i32, invalid_iterations = 1 : i32, resource = "tile", time_step = 11 : i32, x = 1 : i32, y = 1 : i32}]} : (!neura.data, !neura.data) -> () +// MAPPING-NEXT: %50 = "neura.data_mov"(%30) {dfg_id = 31 : i32, mapping_locs = [{id = 12 : i32, index_per_ii = 2 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 2 : i32}, {id = 256 : i32, index_per_ii = 3 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 3 : i32}, {id = 256 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 4 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %51 = "neura.gep"(%50) <{operandSegmentSizes = array}> {dfg_id = 44 : i32, lhs_value = "%arg1", mapping_locs = [{id = 8 : i32, index_per_ii = 5 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 5 : i32, x = 0 : i32, y = 2 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %52 = "neura.data_mov"(%51) {dfg_id = 52 : i32, mapping_locs = [{id = 256 : i32, index_per_ii = 5 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 5 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %53 = "neura.load"(%52) {dfg_id = 59 : i32, mapping_locs = [{id = 8 : i32, index_per_ii = 6 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 6 : i32, x = 0 : i32, y = 2 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %54 = "neura.data_mov"(%53) {dfg_id = 65 : i32, mapping_locs = [{id = 25 : i32, index_per_ii = 6 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 6 : i32}, {id = 11 : i32, index_per_ii = 7 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 7 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %55 = "neura.data_mov"(%35) {dfg_id = 77 : i32, mapping_locs = [{id = 2 : i32, index_per_ii = 6 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 6 : i32}, {id = 0 : i32, index_per_ii = 7 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 7 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %56 = "neura.mul"(%54, %55) {dfg_id = 83 : i32, mapping_locs = [{id = 0 : i32, index_per_ii = 8 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 8 : i32, x = 0 : i32, y = 0 : i32}]} : (!neura.data, !neura.data) -> !neura.data +// MAPPING-NEXT: %57 = "neura.data_mov"(%14) {dfg_id = 41 : i32, mapping_locs = [{id = 0 : i32, index_per_ii = 6 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 6 : i32}, {id = 32 : i32, index_per_ii = 7 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 7 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %58 = "neura.load"(%57) {dfg_id = 48 : i32, mapping_locs = [{id = 1 : i32, index_per_ii = 8 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 8 : i32, x = 1 : i32, y = 0 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %59 = "neura.data_mov"(%58) {dfg_id = 57 : i32, mapping_locs = [{id = 2 : i32, index_per_ii = 8 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 8 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %60 = "neura.data_mov"(%56) {dfg_id = 90 : i32, mapping_locs = [{id = 0 : i32, index_per_ii = 8 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 8 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %61 = "neura.add"(%59, %60) {dfg_id = 93 : i32, mapping_locs = [{id = 0 : i32, index_per_ii = 9 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 9 : i32, x = 0 : i32, y = 0 : i32}]} : (!neura.data, !neura.data) -> !neura.data +// MAPPING-NEXT: %62 = "neura.data_mov"(%61) {dfg_id = 98 : i32, mapping_locs = [{id = 1 : i32, index_per_ii = 9 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 9 : i32}, {id = 1 : i32, index_per_ii = 0 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 10 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %63 = "neura.data_mov"(%14) {dfg_id = 40 : i32, mapping_locs = [{id = 2 : i32, index_per_ii = 6 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 2 : i32, resource = "register", time_step = 6 : i32}, {id = 2 : i32, index_per_ii = 7 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 2 : i32, resource = "register", time_step = 7 : i32}, {id = 2 : i32, index_per_ii = 8 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 2 : i32, resource = "register", time_step = 8 : i32}, {id = 2 : i32, index_per_ii = 9 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 2 : i32, resource = "register", time_step = 9 : i32}, {id = 2 : i32, index_per_ii = 0 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 2 : i32, resource = "register", time_step = 10 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: "neura.store"(%62, %63) {dfg_id = 102 : i32, mapping_locs = [{id = 0 : i32, index_per_ii = 1 : i32, invalid_iterations = 1 : i32, resource = "tile", time_step = 11 : i32, x = 0 : i32, y = 0 : i32}]} : (!neura.data, !neura.data) -> () +// MAPPING-NEXT: %64 = "neura.data_mov"(%30) {dfg_id = 30 : i32, mapping_locs = [{id = 10 : i32, index_per_ii = 2 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 2 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %65 = "neura.add"(%64) {dfg_id = 43 : i32, mapping_locs = [{id = 5 : i32, index_per_ii = 3 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 3 : i32, x = 1 : i32, y = 1 : i32}], rhs_value = 1 : i64} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %66 = "neura.data_mov"(%65) {dfg_id = 51 : i32, mapping_locs = [{id = 160 : i32, index_per_ii = 3 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 3 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %67 = "neura.icmp"(%66) <{cmpType = "eq"}> {dfg_id = 58 : i32, mapping_locs = [{id = 5 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 4 : i32, x = 1 : i32, y = 1 : i32}], rhs_value = 8 : i64} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %68 = "neura.data_mov"(%67) {dfg_id = 64 : i32, mapping_locs = [{id = 160 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 4 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %69 = "neura.not"(%68) {dfg_id = 70 : i32, mapping_locs = [{id = 5 : i32, index_per_ii = 5 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 5 : i32, x = 1 : i32, y = 1 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %70 = "neura.data_mov"(%65) {dfg_id = 50 : i32, mapping_locs = [{id = 161 : i32, index_per_ii = 3 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 3 : i32}, {id = 161 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 4 : i32}, {id = 161 : i32, index_per_ii = 5 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 5 : i32}, {id = 161 : i32, index_per_ii = 6 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 6 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %71 = "neura.data_mov"(%69) {dfg_id = 76 : i32, mapping_locs = [{id = 160 : i32, index_per_ii = 5 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 5 : i32}, {id = 160 : i32, index_per_ii = 6 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 6 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %72 = neura.grant_predicate %70, %71 {dfg_id = 82 : i32, mapping_locs = [{id = 5 : i32, index_per_ii = 7 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 7 : i32, x = 1 : i32, y = 1 : i32}]} : !neura.data, !neura.data -> !neura.data +// MAPPING-NEXT: neura.ctrl_mov %72 -> %28 {dfg_id = 89 : i32, mapping_locs = [{id = 13 : i32, index_per_ii = 7 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 7 : i32}, {id = 129 : i32, index_per_ii = 8 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 8 : i32}, {id = 129 : i32, index_per_ii = 9 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 9 : i32}, {id = 129 : i32, index_per_ii = 0 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 10 : i32}, {id = 129 : i32, index_per_ii = 1 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 11 : i32}]} : !neura.data !neura.data +// MAPPING-NEXT: %73 = "neura.data_mov"(%27) {dfg_id = 37 : i32, mapping_locs = [{id = 130 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 2 : i32, resource = "register", time_step = 4 : i32}, {id = 130 : i32, index_per_ii = 5 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 2 : i32, resource = "register", time_step = 5 : i32}, {id = 130 : i32, index_per_ii = 6 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 2 : i32, resource = "register", time_step = 6 : i32}, {id = 130 : i32, index_per_ii = 7 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 2 : i32, resource = "register", time_step = 7 : i32}, {id = 130 : i32, index_per_ii = 8 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 2 : i32, resource = "register", time_step = 8 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %74 = "neura.data_mov"(%69) {dfg_id = 75 : i32, mapping_locs = [{id = 13 : i32, index_per_ii = 5 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 5 : i32}, {id = 131 : i32, index_per_ii = 6 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 3 : i32, resource = "register", time_step = 6 : i32}, {id = 131 : i32, index_per_ii = 7 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 3 : i32, resource = "register", time_step = 7 : i32}, {id = 131 : i32, index_per_ii = 8 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 3 : i32, resource = "register", time_step = 8 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %75 = neura.grant_predicate %73, %74 {dfg_id = 81 : i32, mapping_locs = [{id = 4 : i32, index_per_ii = 9 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 9 : i32, x = 0 : i32, y = 1 : i32}]} : !neura.data, !neura.data -> !neura.data +// MAPPING-NEXT: neura.ctrl_mov %75 -> %25 {dfg_id = 88 : i32, mapping_locs = [{id = 130 : i32, index_per_ii = 9 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 2 : i32, resource = "register", time_step = 9 : i32}, {id = 130 : i32, index_per_ii = 0 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 2 : i32, resource = "register", time_step = 10 : i32}, {id = 130 : i32, index_per_ii = 1 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 2 : i32, resource = "register", time_step = 11 : i32}, {id = 130 : i32, index_per_ii = 2 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 2 : i32, resource = "register", time_step = 12 : i32}, {id = 130 : i32, index_per_ii = 3 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 2 : i32, resource = "register", time_step = 13 : i32}]} : !neura.data !neura.data +// MAPPING-NEXT: %76 = "neura.data_mov"(%24) {dfg_id = 35 : i32, mapping_locs = [{id = 256 : i32, index_per_ii = 7 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 7 : i32}, {id = 24 : i32, index_per_ii = 8 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 8 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %77 = "neura.data_mov"(%69) {dfg_id = 74 : i32, mapping_locs = [{id = 16 : i32, index_per_ii = 5 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 5 : i32}, {id = 289 : i32, index_per_ii = 6 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 6 : i32}, {id = 289 : i32, index_per_ii = 7 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 7 : i32}, {id = 289 : i32, index_per_ii = 8 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 8 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %78 = neura.grant_predicate %76, %77 {dfg_id = 80 : i32, mapping_locs = [{id = 9 : i32, index_per_ii = 9 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 9 : i32, x = 1 : i32, y = 2 : i32}]} : !neura.data, !neura.data -> !neura.data +// MAPPING-NEXT: neura.ctrl_mov %78 -> %22 {dfg_id = 87 : i32, mapping_locs = [{id = 27 : i32, index_per_ii = 9 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 9 : i32}, {id = 258 : i32, index_per_ii = 0 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 2 : i32, resource = "register", time_step = 10 : i32}, {id = 258 : i32, index_per_ii = 1 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 2 : i32, resource = "register", time_step = 11 : i32}, {id = 258 : i32, index_per_ii = 2 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 2 : i32, resource = "register", time_step = 12 : i32}, {id = 258 : i32, index_per_ii = 3 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 2 : i32, resource = "register", time_step = 13 : i32}, {id = 258 : i32, index_per_ii = 4 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 2 : i32, resource = "register", time_step = 14 : i32}, {id = 258 : i32, index_per_ii = 5 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 2 : i32, resource = "register", time_step = 15 : i32}, {id = 258 : i32, index_per_ii = 6 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 2 : i32, resource = "register", time_step = 16 : i32}]} : !neura.data !neura.data +// MAPPING-NEXT: %79 = "neura.data_mov"(%27) {dfg_id = 36 : i32, mapping_locs = [{id = 128 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 4 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %80 = "neura.data_mov"(%67) {dfg_id = 63 : i32, mapping_locs = [{id = 13 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 4 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %81 = neura.grant_predicate %79, %80 {dfg_id = 69 : i32, mapping_locs = [{id = 4 : i32, index_per_ii = 5 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 5 : i32, x = 0 : i32, y = 1 : i32}]} : !neura.data, !neura.data -> !neura.data +// MAPPING-NEXT: %82 = "neura.data_mov"(%24) {dfg_id = 34 : i32, mapping_locs = [{id = 24 : i32, index_per_ii = 7 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 7 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %83 = "neura.data_mov"(%67) {dfg_id = 62 : i32, mapping_locs = [{id = 16 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 4 : i32}, {id = 288 : i32, index_per_ii = 5 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 5 : i32}, {id = 288 : i32, index_per_ii = 6 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 6 : i32}, {id = 288 : i32, index_per_ii = 7 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 7 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %84 = neura.grant_predicate %82, %83 {dfg_id = 68 : i32, mapping_locs = [{id = 9 : i32, index_per_ii = 8 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 8 : i32, x = 1 : i32, y = 2 : i32}]} : !neura.data, !neura.data -> !neura.data +// MAPPING-NEXT: %85 = "neura.data_mov"(%81) {dfg_id = 73 : i32, mapping_locs = [{id = 10 : i32, index_per_ii = 5 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 5 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %86 = "neura.add"(%85) {dfg_id = 79 : i32, mapping_locs = [{id = 5 : i32, index_per_ii = 6 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 6 : i32, x = 1 : i32, y = 1 : i32}], rhs_value = 1 : i64} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %87 = "neura.data_mov"(%86) {dfg_id = 86 : i32, mapping_locs = [{id = 14 : i32, index_per_ii = 6 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 6 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %88 = "neura.icmp"(%87) <{cmpType = "eq"}> {dfg_id = 92 : i32, mapping_locs = [{id = 6 : i32, index_per_ii = 7 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 7 : i32, x = 2 : i32, y = 1 : i32}], rhs_value = 8 : i64} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %89 = "neura.data_mov"(%88) {dfg_id = 97 : i32, mapping_locs = [{id = 192 : i32, index_per_ii = 7 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 7 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %90 = "neura.not"(%89) {dfg_id = 101 : i32, mapping_locs = [{id = 6 : i32, index_per_ii = 8 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 8 : i32, x = 2 : i32, y = 1 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %91 = "neura.data_mov"(%86) {dfg_id = 85 : i32, mapping_locs = [{id = 15 : i32, index_per_ii = 6 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 6 : i32}, {id = 3 : i32, index_per_ii = 7 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 7 : i32}, {id = 64 : i32, index_per_ii = 8 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 8 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %92 = "neura.data_mov"(%90) {dfg_id = 106 : i32, mapping_locs = [{id = 19 : i32, index_per_ii = 8 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 8 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %93 = neura.grant_predicate %91, %92 {dfg_id = 109 : i32, mapping_locs = [{id = 2 : i32, index_per_ii = 9 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 9 : i32, x = 2 : i32, y = 0 : i32}]} : !neura.data, !neura.data -> !neura.data +// MAPPING-NEXT: neura.ctrl_mov %93 -> %10 {dfg_id = 111 : i32, mapping_locs = [{id = 5 : i32, index_per_ii = 9 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 9 : i32}, {id = 2 : i32, index_per_ii = 0 : i32, invalid_iterations = 1 : i32, resource = "link", time_step = 10 : i32}, {id = 1 : i32, index_per_ii = 1 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 11 : i32}]} : !neura.data !neura.data +// MAPPING-NEXT: %94 = "neura.data_mov"(%84) {dfg_id = 72 : i32, mapping_locs = [{id = 29 : i32, index_per_ii = 8 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 8 : i32}, {id = 160 : i32, index_per_ii = 9 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 9 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %95 = "neura.data_mov"(%90) {dfg_id = 105 : i32, mapping_locs = [{id = 17 : i32, index_per_ii = 8 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 8 : i32}, {id = 161 : i32, index_per_ii = 9 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 9 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %96 = neura.grant_predicate %94, %95 {dfg_id = 108 : i32, mapping_locs = [{id = 5 : i32, index_per_ii = 0 : i32, invalid_iterations = 1 : i32, resource = "tile", time_step = 10 : i32, x = 1 : i32, y = 1 : i32}]} : !neura.data, !neura.data -> !neura.data +// MAPPING-NEXT: neura.ctrl_mov %96 -> %7 {dfg_id = 110 : i32, mapping_locs = [{id = 13 : i32, index_per_ii = 0 : i32, invalid_iterations = 1 : i32, resource = "link", time_step = 10 : i32}]} : !neura.data !neura.data +// MAPPING-NEXT: %97 = "neura.data_mov"(%88) {dfg_id = 95 : i32, mapping_locs = [{id = 193 : i32, index_per_ii = 7 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 7 : i32}, {id = 193 : i32, index_per_ii = 8 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 8 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %98 = "neura.data_mov"(%88) {dfg_id = 96 : i32, mapping_locs = [{id = 194 : i32, index_per_ii = 7 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 2 : i32, resource = "register", time_step = 7 : i32}, {id = 194 : i32, index_per_ii = 8 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 2 : i32, resource = "register", time_step = 8 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: %99 = neura.grant_predicate %97, %98 {dfg_id = 100 : i32, mapping_locs = [{id = 6 : i32, index_per_ii = 9 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 9 : i32, x = 2 : i32, y = 1 : i32}]} : !neura.data, !neura.data -> !neura.data +// MAPPING-NEXT: %100 = "neura.data_mov"(%99) {dfg_id = 104 : i32, mapping_locs = [{id = 192 : i32, index_per_ii = 9 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 9 : i32}]} : (!neura.data) -> !neura.data +// MAPPING-NEXT: neura.return_void %100 : !neura.data {dfg_id = 107 : i32, mapping_locs = [{id = 6 : i32, index_per_ii = 0 : i32, invalid_iterations = 1 : i32, resource = "tile", time_step = 10 : i32, x = 2 : i32, y = 1 : i32}]} +// MAPPING-NEXT: neura.yield {dfg_id = 9 : i32} +// MAPPING-NEXT: } +// MAPPING-NEXT: } + + +// YAML: - column: 0 +// YAML-NEXT: row: 0 +// YAML-NEXT: core_id: "0" +// YAML-NEXT: entries: +// YAML-NEXT: - entry_id: "entry0" +// YAML-NEXT: instructions: +// YAML-NEXT: - index_per_ii: 0 +// YAML-NEXT: operations: +// YAML-NEXT: - opcode: "GRANT_ONCE" +// YAML-NEXT: id: 3 +// YAML-NEXT: time_step: 0 +// YAML-NEXT: invalid_iterations: 0 +// YAML-NEXT: src_operands: +// YAML-NEXT: - operand: "#0" +// YAML-NEXT: color: "RED" +// YAML-NEXT: dst_operands: +// YAML-NEXT: - operand: "NORTH" +// YAML-NEXT: color: "RED" +// YAML-NEXT: - operand: "$0" +// YAML-NEXT: color: "RED" +// YAML-NEXT: - index_per_ii: 1 +// YAML-NEXT: operations: +// YAML-NEXT: - opcode: "STORE" +// YAML-NEXT: id: 102 +// YAML-NEXT: time_step: 11 +// YAML-NEXT: invalid_iterations: 1 +// YAML-NEXT: src_operands: +// YAML-NEXT: - operand: "$1" +// YAML-NEXT: color: "RED" +// YAML-NEXT: - operand: "$2" +// YAML-NEXT: color: "RED" +// YAML-NEXT: - opcode: "CTRL_MOV" +// YAML-NEXT: id: 111 +// YAML-NEXT: time_step: 11 +// YAML-NEXT: invalid_iterations: 1 +// YAML-NEXT: src_operands: +// YAML-NEXT: - operand: "EAST" +// YAML-NEXT: color: "RED" +// YAML-NEXT: dst_operands: +// YAML-NEXT: - operand: "$1" +// YAML-NEXT: color: "RED" +// YAML-NEXT: - index_per_ii: 2 +// YAML-NEXT: operations: +// YAML-NEXT: - opcode: "PHI_START" +// YAML-NEXT: id: 17 +// YAML-NEXT: time_step: 2 +// YAML-NEXT: invalid_iterations: 0 +// YAML-NEXT: src_operands: +// YAML-NEXT: - operand: "$0" +// YAML-NEXT: color: "RED" +// YAML-NEXT: - operand: "$1" +// YAML-NEXT: color: "RED" +// YAML-NEXT: dst_operands: +// YAML-NEXT: - operand: "$2" +// YAML-NEXT: color: "RED" +// YAML-NEXT: - operand: "EAST" +// YAML-NEXT: color: "RED" +// YAML-NEXT: - operand: "$0" +// YAML-NEXT: color: "RED" +// YAML-NEXT: - operand: "NORTH" +// YAML-NEXT: color: "RED" +// YAML-NEXT: - index_per_ii: 3 +// YAML-NEXT: operations: +// YAML-NEXT: - opcode: "SHL" +// YAML-NEXT: id: 27 +// YAML-NEXT: time_step: 3 +// YAML-NEXT: invalid_iterations: 0 +// YAML-NEXT: src_operands: +// YAML-NEXT: - operand: "$0" +// YAML-NEXT: color: "RED" +// YAML-NEXT: - operand: "#5" +// YAML-NEXT: color: "RED" +// YAML-NEXT: dst_operands: +// YAML-NEXT: - operand: "$0" +// YAML-NEXT: color: "RED" +// YAML-NEXT: - opcode: "DATA_MOV" +// YAML-NEXT: id: 33 +// YAML-NEXT: time_step: 3 +// YAML-NEXT: invalid_iterations: 0 +// YAML-NEXT: src_operands: +// YAML-NEXT: - operand: "NORTH" +// YAML-NEXT: color: "RED" +// YAML-NEXT: dst_operands: +// YAML-NEXT: - operand: "$1" +// YAML-NEXT: color: "RED" +// YAML-NEXT: - index_per_ii: 4 +// YAML-NEXT: operations: +// YAML-NEXT: - opcode: "GEP" +// YAML-NEXT: id: 46 +// YAML-NEXT: time_step: 4 +// YAML-NEXT: invalid_iterations: 0 +// YAML-NEXT: src_operands: +// YAML-NEXT: - operand: "$0" +// YAML-NEXT: color: "RED" +// YAML-NEXT: dst_operands: +// YAML-NEXT: - operand: "$0" +// YAML-NEXT: color: "RED" +// YAML-NEXT: - index_per_ii: 5 +// YAML-NEXT: operations: +// YAML-NEXT: - opcode: "GEP" +// YAML-NEXT: id: 61 +// YAML-NEXT: time_step: 5 +// YAML-NEXT: invalid_iterations: 0 +// YAML-NEXT: src_operands: +// YAML-NEXT: - operand: "$0" +// YAML-NEXT: color: "RED" +// YAML-NEXT: - operand: "$1" +// YAML-NEXT: color: "RED" +// YAML-NEXT: dst_operands: +// YAML-NEXT: - operand: "EAST" +// YAML-NEXT: color: "RED" +// YAML-NEXT: - index_per_ii: 6 +// YAML-NEXT: operations: +// YAML-NEXT: - opcode: "GEP" +// YAML-NEXT: id: 29 +// YAML-NEXT: time_step: 6 +// YAML-NEXT: invalid_iterations: 0 +// YAML-NEXT: src_operands: +// YAML-NEXT: - operand: "$2" +// YAML-NEXT: color: "RED" +// YAML-NEXT: dst_operands: +// YAML-NEXT: - operand: "$0" +// YAML-NEXT: color: "RED" +// YAML-NEXT: - operand: "EAST" +// YAML-NEXT: color: "RED" +// YAML-NEXT: - operand: "$2" +// YAML-NEXT: color: "RED" +// YAML-NEXT: - index_per_ii: 7 +// YAML-NEXT: operations: +// YAML-NEXT: - opcode: "DATA_MOV" +// YAML-NEXT: id: 420000 +// YAML-NEXT: time_step: 7 +// YAML-NEXT: invalid_iterations: 0 +// YAML-NEXT: src_operands: +// YAML-NEXT: - operand: "$0" +// YAML-NEXT: color: "RED" +// YAML-NEXT: dst_operands: +// YAML-NEXT: - operand: "EAST" +// YAML-NEXT: color: "RED" +// YAML-NEXT: - opcode: "DATA_MOV" +// YAML-NEXT: id: 77 +// YAML-NEXT: time_step: 7 +// YAML-NEXT: invalid_iterations: 0 +// YAML-NEXT: src_operands: +// YAML-NEXT: - operand: "EAST" +// YAML-NEXT: color: "RED" +// YAML-NEXT: dst_operands: +// YAML-NEXT: - operand: "$0" +// YAML-NEXT: color: "RED" +// YAML-NEXT: - index_per_ii: 8 +// YAML-NEXT: operations: +// YAML-NEXT: - opcode: "MUL" +// YAML-NEXT: id: 83 +// YAML-NEXT: time_step: 8 +// YAML-NEXT: invalid_iterations: 0 +// YAML-NEXT: src_operands: +// YAML-NEXT: - operand: "NORTH" +// YAML-NEXT: color: "RED" +// YAML-NEXT: - operand: "$0" +// YAML-NEXT: color: "RED" +// YAML-NEXT: dst_operands: +// YAML-NEXT: - operand: "$0" +// YAML-NEXT: color: "RED" +// YAML-NEXT: - index_per_ii: 9 +// YAML-NEXT: operations: +// YAML-NEXT: - opcode: "ADD" +// YAML-NEXT: id: 93 +// YAML-NEXT: time_step: 9 +// YAML-NEXT: invalid_iterations: 0 +// YAML-NEXT: src_operands: +// YAML-NEXT: - operand: "EAST" +// YAML-NEXT: color: "RED" +// YAML-NEXT: - operand: "$0" +// YAML-NEXT: color: "RED" +// YAML-NEXT: dst_operands: +// YAML-NEXT: - operand: "$1" +// YAML-NEXT: color: "RED" + +// ASM: PE(0,0): +// ASM-NEXT: { +// ASM-NEXT: GRANT_ONCE, [#0] -> [NORTH, RED], [$0] (t=0, inv_iters=0) +// ASM-NEXT: } (idx_per_ii=0) +// ASM-NEXT: { +// ASM-NEXT: STORE, [$1], [$2] (t=11, inv_iters=1) +// ASM-NEXT: CTRL_MOV, [EAST, RED] -> [$1] (t=11, inv_iters=1) +// ASM-NEXT: } (idx_per_ii=1) +// ASM-NEXT: { +// ASM-NEXT: PHI_START, [$0], [$1] -> [$2], [EAST, RED], [$0], [NORTH, RED] (t=2, inv_iters=0) +// ASM-NEXT: } (idx_per_ii=2) +// ASM-NEXT: { +// ASM-NEXT: SHL, [$0], [#5] -> [$0] (t=3, inv_iters=0) +// ASM-NEXT: DATA_MOV, [NORTH, RED] -> [$1] (t=3, inv_iters=0) +// ASM-NEXT: } (idx_per_ii=3) +// ASM-NEXT: { +// ASM-NEXT: GEP, [$0] -> [$0] (t=4, inv_iters=0) +// ASM-NEXT: } (idx_per_ii=4) +// ASM-NEXT: { +// ASM-NEXT: GEP, [$0], [$1] -> [EAST, RED] (t=5, inv_iters=0) +// ASM-NEXT: } (idx_per_ii=5) +// ASM-NEXT: { +// ASM-NEXT: GEP, [$2] -> [$0], [EAST, RED], [$2] (t=6, inv_iters=0) +// ASM-NEXT: } (idx_per_ii=6) +// ASM-NEXT: { +// ASM-NEXT: DATA_MOV, [$0] -> [EAST, RED] (t=7, inv_iters=0) +// ASM-NEXT: DATA_MOV, [EAST, RED] -> [$0] (t=7, inv_iters=0) +// ASM-NEXT: } (idx_per_ii=7) +// ASM-NEXT: { +// ASM-NEXT: MUL, [NORTH, RED], [$0] -> [$0] (t=8, inv_iters=0) +// ASM-NEXT: } (idx_per_ii=8) +// ASM-NEXT: { +// ASM-NEXT: ADD, [EAST, RED], [$0] -> [$1] (t=9, inv_iters=0) +// ASM-NEXT: } (idx_per_ii=9) + diff --git a/test/e2e/relu/relu_kernel.mlir b/test/e2e/relu/relu_kernel.mlir index b5c46f98..a6588a54 100644 --- a/test/e2e/relu/relu_kernel.mlir +++ b/test/e2e/relu/relu_kernel.mlir @@ -33,51 +33,52 @@ // Check the mapped MLIR contains key operations with full statements. // RUN: FileCheck %s --input-file=%t-mapping.mlir -check-prefix=MAPPING // MAPPING: %0 = "neura.grant_once"() <{constant_value = 0 : i64}> {dfg_id = 0 : i32, mapping_locs = [{id = 11 : i32, index_per_ii = 0 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 0 : i32, x = 3 : i32, y = 2 : i32}]} : () -> !neura.data -// MAPPING: %1 = neura.reserve {dfg_id = 1 : i32} : !neura.data -// MAPPING: %2 = "neura.data_mov"(%0) {dfg_id = 3 : i32, mapping_locs = [{id = 35 : i32, index_per_ii = 0 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 0 : i32}]} : (!neura.data) -> !neura.data -// MAPPING: %3 = neura.phi_start %2, %1 {dfg_id = 4 : i32, mapping_locs = [{id = 10 : i32, index_per_ii = 1 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 1 : i32, x = 2 : i32, y = 2 : i32}]} : !neura.data, !neura.data -> !neura.data -// MAPPING: %4 = "neura.data_mov"(%3) {dfg_id = 7 : i32, mapping_locs = [{id = 32 : i32, index_per_ii = 1 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 1 : i32}]} : (!neura.data) -> !neura.data -// MAPPING: %5 = "neura.gep"(%4) <{operandSegmentSizes = array}> {dfg_id = 9 : i32, lhs_value = "%arg0", mapping_locs = [{id = 11 : i32, index_per_ii = 2 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 2 : i32, x = 3 : i32, y = 2 : i32}]} : (!neura.data) -> !neura.data -// MAPPING: %6 = "neura.data_mov"(%5) {dfg_id = 12 : i32, mapping_locs = [{id = 352 : i32, index_per_ii = 2 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 2 : i32}]} : (!neura.data) -> !neura.data -// MAPPING: %7 = "neura.load"(%6) {dfg_id = 14 : i32, mapping_locs = [{id = 11 : i32, index_per_ii = 3 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 3 : i32, x = 3 : i32, y = 2 : i32}]} : (!neura.data) -> !neura.data -// MAPPING: %8 = "neura.data_mov"(%7) {dfg_id = 19 : i32, mapping_locs = [{id = 352 : i32, index_per_ii = 3 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 3 : i32}]} : (!neura.data) -> !neura.data -// MAPPING: %9 = "neura.icmp"(%8) <{cmpType = "sgt"}> {dfg_id = 22 : i32, mapping_locs = [{id = 11 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 4 : i32, x = 3 : i32, y = 2 : i32}], rhs_value = 0 : i32} : (!neura.data) -> !neura.data -// MAPPING: %10 = "neura.data_mov"(%3) {dfg_id = 6 : i32, mapping_locs = [{id = 33 : i32, index_per_ii = 1 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 1 : i32}, {id = 18 : i32, index_per_ii = 2 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 2 : i32}, {id = 224 : i32, index_per_ii = 3 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 3 : i32}, {id = 224 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 4 : i32}]} : (!neura.data) -> !neura.data -// MAPPING: %11 = "neura.data_mov"(%9) {dfg_id = 26 : i32, mapping_locs = [{id = 36 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 4 : i32}]} : (!neura.data) -> !neura.data -// MAPPING: %12 = neura.grant_predicate %10, %11 {dfg_id = 30 : i32, mapping_locs = [{id = 7 : i32, index_per_ii = 0 : i32, invalid_iterations = 1 : i32, resource = "tile", time_step = 5 : i32, x = 3 : i32, y = 1 : i32}]} : !neura.data, !neura.data -> !neura.data -// MAPPING: %13 = "neura.data_mov"(%7) {dfg_id = 18 : i32, mapping_locs = [{id = 37 : i32, index_per_ii = 3 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 3 : i32}, {id = 480 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 4 : i32}, {id = 480 : i32, index_per_ii = 0 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 5 : i32}, {id = 480 : i32, index_per_ii = 1 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 6 : i32}]} : (!neura.data) -> !neura.data -// MAPPING: %14 = "neura.data_mov"(%9) {dfg_id = 25 : i32, mapping_locs = [{id = 37 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 4 : i32}, {id = 481 : i32, index_per_ii = 0 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 5 : i32}, {id = 481 : i32, index_per_ii = 1 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 6 : i32}]} : (!neura.data) -> !neura.data -// MAPPING: %15 = neura.grant_predicate %13, %14 {dfg_id = 29 : i32, mapping_locs = [{id = 15 : i32, index_per_ii = 2 : i32, invalid_iterations = 1 : i32, resource = "tile", time_step = 7 : i32, x = 3 : i32, y = 3 : i32}]} : !neura.data, !neura.data -> !neura.data -// MAPPING: %16 = "neura.data_mov"(%12) {dfg_id = 33 : i32, mapping_locs = [{id = 224 : i32, index_per_ii = 0 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 5 : i32}]} : (!neura.data) -> !neura.data -// MAPPING: %17 = "neura.gep"(%16) <{operandSegmentSizes = array}> {dfg_id = 34 : i32, lhs_value = "%arg1", mapping_locs = [{id = 7 : i32, index_per_ii = 1 : i32, invalid_iterations = 1 : i32, resource = "tile", time_step = 6 : i32, x = 3 : i32, y = 1 : i32}]} : (!neura.data) -> !neura.data -// MAPPING: %18 = "neura.data_mov"(%17) {dfg_id = 36 : i32, mapping_locs = [{id = 21 : i32, index_per_ii = 1 : i32, invalid_iterations = 1 : i32, resource = "link", time_step = 6 : i32}]} : (!neura.data) -> !neura.data -// MAPPING: %19 = "neura.load"(%18) {dfg_id = 37 : i32, mapping_locs = [{id = 6 : i32, index_per_ii = 2 : i32, invalid_iterations = 1 : i32, resource = "tile", time_step = 7 : i32, x = 2 : i32, y = 1 : i32}]} : (!neura.data) -> !neura.data -// MAPPING: %20 = "neura.data_mov"(%19) {dfg_id = 38 : i32, mapping_locs = [{id = 20 : i32, index_per_ii = 2 : i32, invalid_iterations = 1 : i32, resource = "link", time_step = 7 : i32}, {id = 34 : i32, index_per_ii = 3 : i32, invalid_iterations = 1 : i32, resource = "link", time_step = 8 : i32}]} : (!neura.data) -> !neura.data -// MAPPING: %21 = "neura.data_mov"(%15) {dfg_id = 32 : i32, mapping_locs = [{id = 46 : i32, index_per_ii = 2 : i32, invalid_iterations = 1 : i32, resource = "link", time_step = 7 : i32}, {id = 448 : i32, index_per_ii = 3 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 8 : i32}]} : (!neura.data) -> !neura.data -// MAPPING: %22 = "neura.add"(%20, %21) {dfg_id = 39 : i32, mapping_locs = [{id = 14 : i32, index_per_ii = 4 : i32, invalid_iterations = 1 : i32, resource = "tile", time_step = 9 : i32, x = 2 : i32, y = 3 : i32}]} : (!neura.data, !neura.data) -> !neura.data -// MAPPING: %23 = "neura.data_mov"(%22) {dfg_id = 40 : i32, mapping_locs = [{id = 448 : i32, index_per_ii = 4 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 9 : i32}]} : (!neura.data) -> !neura.data -// MAPPING: %24 = "neura.data_mov"(%17) {dfg_id = 35 : i32, mapping_locs = [{id = 23 : i32, index_per_ii = 1 : i32, invalid_iterations = 1 : i32, resource = "link", time_step = 6 : i32}, {id = 37 : i32, index_per_ii = 2 : i32, invalid_iterations = 1 : i32, resource = "link", time_step = 7 : i32}, {id = 46 : i32, index_per_ii = 3 : i32, invalid_iterations = 1 : i32, resource = "link", time_step = 8 : i32}, {id = 449 : i32, index_per_ii = 4 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 9 : i32}]} : (!neura.data) -> !neura.data -// MAPPING: "neura.store"(%23, %24) {dfg_id = 41 : i32, mapping_locs = [{id = 14 : i32, index_per_ii = 0 : i32, invalid_iterations = 2 : i32, resource = "tile", time_step = 10 : i32, x = 2 : i32, y = 3 : i32}]} : (!neura.data, !neura.data) -> () -// MAPPING: %25 = "neura.data_mov"(%3) {dfg_id = 5 : i32, mapping_locs = [{id = 320 : i32, index_per_ii = 1 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 1 : i32}]} : (!neura.data) -> !neura.data -// MAPPING: %26 = "neura.add"(%25) {dfg_id = 8 : i32, mapping_locs = [{id = 10 : i32, index_per_ii = 2 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 2 : i32, x = 2 : i32, y = 2 : i32}], rhs_value = 1 : i64} : (!neura.data) -> !neura.data -// MAPPING: %27 = "neura.data_mov"(%26) {dfg_id = 11 : i32, mapping_locs = [{id = 320 : i32, index_per_ii = 2 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 2 : i32}]} : (!neura.data) -> !neura.data -// MAPPING: %28 = "neura.icmp"(%27) <{cmpType = "eq"}> {dfg_id = 13 : i32, mapping_locs = [{id = 10 : i32, index_per_ii = 3 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 3 : i32, x = 2 : i32, y = 2 : i32}], rhs_value = 32 : i64} : (!neura.data) -> !neura.data -// MAPPING: %29 = "neura.data_mov"(%28) {dfg_id = 17 : i32, mapping_locs = [{id = 320 : i32, index_per_ii = 3 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 3 : i32}]} : (!neura.data) -> !neura.data -// MAPPING: %30 = "neura.not"(%29) {dfg_id = 21 : i32, mapping_locs = [{id = 10 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 4 : i32, x = 2 : i32, y = 2 : i32}]} : (!neura.data) -> !neura.data -// MAPPING: %31 = "neura.data_mov"(%26) {dfg_id = 10 : i32, mapping_locs = [{id = 321 : i32, index_per_ii = 2 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 2 : i32}, {id = 321 : i32, index_per_ii = 3 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 3 : i32}, {id = 321 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 4 : i32}]} : (!neura.data) -> !neura.data -// MAPPING: %32 = "neura.data_mov"(%30) {dfg_id = 24 : i32, mapping_locs = [{id = 320 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 4 : i32}]} : (!neura.data) -> !neura.data -// MAPPING: %33 = neura.grant_predicate %31, %32 {dfg_id = 28 : i32, mapping_locs = [{id = 10 : i32, index_per_ii = 0 : i32, invalid_iterations = 1 : i32, resource = "tile", time_step = 5 : i32, x = 2 : i32, y = 2 : i32}]} : !neura.data, !neura.data -> !neura.data -// MAPPING: neura.ctrl_mov %33 -> %1 {dfg_id = 31 : i32, mapping_locs = [{id = 320 : i32, index_per_ii = 0 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 5 : i32}]} : !neura.data !neura.data -// MAPPING: %34 = "neura.data_mov"(%28) {dfg_id = 15 : i32, mapping_locs = [{id = 33 : i32, index_per_ii = 3 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 3 : i32}, {id = 192 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 4 : i32}, {id = 192 : i32, index_per_ii = 0 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 5 : i32}, {id = 192 : i32, index_per_ii = 1 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 6 : i32}, {id = 192 : i32, index_per_ii = 2 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 7 : i32}]} : (!neura.data) -> !neura.data -// MAPPING: %35 = "neura.data_mov"(%28) {dfg_id = 16 : i32, mapping_locs = [{id = 322 : i32, index_per_ii = 3 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 2 : i32, resource = "register", time_step = 3 : i32}, {id = 33 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 4 : i32}, {id = 193 : i32, index_per_ii = 0 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 5 : i32}, {id = 193 : i32, index_per_ii = 1 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 6 : i32}, {id = 193 : i32, index_per_ii = 2 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 7 : i32}]} : (!neura.data) -> !neura.data -// MAPPING: %36 = neura.grant_predicate %34, %35 {dfg_id = 20 : i32, mapping_locs = [{id = 6 : i32, index_per_ii = 3 : i32, invalid_iterations = 1 : i32, resource = "tile", time_step = 8 : i32, x = 2 : i32, y = 1 : i32}]} : !neura.data, !neura.data -> !neura.data -// MAPPING: %37 = "neura.data_mov"(%36) {dfg_id = 23 : i32, mapping_locs = [{id = 192 : i32, index_per_ii = 3 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 8 : i32}]} : (!neura.data) -> !neura.data -// MAPPING: neura.return_void %37 : !neura.data {dfg_id = 27 : i32, mapping_locs = [{id = 6 : i32, index_per_ii = 4 : i32, invalid_iterations = 1 : i32, resource = "tile", time_step = 9 : i32, x = 2 : i32, y = 1 : i32}]} -// MAPPING: neura.yield {dfg_id = 2 : i32} +// MAPPING: %1 = "neura.grant_once"() <{constant_value = 0 : i32}> {dfg_id = 1 : i32, mapping_locs = [{id = 12 : i32, index_per_ii = 3 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 3 : i32, x = 0 : i32, y = 3 : i32}]} : () -> !neura.data +// MAPPING: %2 = neura.reserve {dfg_id = 2 : i32} : !neura.data +// MAPPING: %3 = "neura.data_mov"(%1) {dfg_id = 6 : i32, mapping_locs = [{id = 39 : i32, index_per_ii = 3 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 3 : i32}]} : (!neura.data) -> !neura.data +// MAPPING: %4 = neura.phi_start %3, %2 {dfg_id = 8 : i32, mapping_locs = [{id = 8 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 4 : i32, x = 0 : i32, y = 2 : i32}]} : !neura.data, !neura.data -> !neura.data +// MAPPING: %5 = neura.reserve {dfg_id = 3 : i32} : !neura.data +// MAPPING: %6 = "neura.data_mov"(%0) {dfg_id = 5 : i32, mapping_locs = [{id = 35 : i32, index_per_ii = 0 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 0 : i32}]} : (!neura.data) -> !neura.data +// MAPPING: %7 = neura.phi_start %6, %5 {dfg_id = 7 : i32, mapping_locs = [{id = 10 : i32, index_per_ii = 1 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 1 : i32, x = 2 : i32, y = 2 : i32}]} : !neura.data, !neura.data -> !neura.data +// MAPPING: %8 = "neura.data_mov"(%7) {dfg_id = 11 : i32, mapping_locs = [{id = 32 : i32, index_per_ii = 1 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 1 : i32}]} : (!neura.data) -> !neura.data +// MAPPING: %9 = "neura.gep"(%8) <{operandSegmentSizes = array}> {dfg_id = 16 : i32, lhs_value = "%arg0", mapping_locs = [{id = 11 : i32, index_per_ii = 2 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 2 : i32, x = 3 : i32, y = 2 : i32}]} : (!neura.data) -> !neura.data +// MAPPING: %10 = "neura.data_mov"(%9) {dfg_id = 20 : i32, mapping_locs = [{id = 352 : i32, index_per_ii = 2 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 2 : i32}]} : (!neura.data) -> !neura.data +// MAPPING: %11 = "neura.load"(%10) {dfg_id = 22 : i32, mapping_locs = [{id = 11 : i32, index_per_ii = 3 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 3 : i32, x = 3 : i32, y = 2 : i32}]} : (!neura.data) -> !neura.data +// MAPPING: %12 = "neura.data_mov"(%11) {dfg_id = 27 : i32, mapping_locs = [{id = 352 : i32, index_per_ii = 3 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 3 : i32}]} : (!neura.data) -> !neura.data +// MAPPING: %13 = "neura.icmp"(%12) <{cmpType = "sge"}> {dfg_id = 30 : i32, mapping_locs = [{id = 11 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 4 : i32, x = 3 : i32, y = 2 : i32}], rhs_value = 0 : i32} : (!neura.data) -> !neura.data +// MAPPING: %14 = "neura.data_mov"(%13) {dfg_id = 34 : i32, mapping_locs = [{id = 35 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 4 : i32}, {id = 31 : i32, index_per_ii = 0 : i32, invalid_iterations = 1 : i32, resource = "link", time_step = 5 : i32}]} : (!neura.data) -> !neura.data +// MAPPING: %15 = "neura.data_mov"(%11) {dfg_id = 26 : i32, mapping_locs = [{id = 35 : i32, index_per_ii = 3 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 3 : i32}, {id = 31 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 4 : i32}, {id = 288 : i32, index_per_ii = 0 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 5 : i32}]} : (!neura.data) -> !neura.data +// MAPPING: %16 = "neura.data_mov"(%4) {dfg_id = 13 : i32, mapping_locs = [{id = 24 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 4 : i32}, {id = 289 : i32, index_per_ii = 0 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 5 : i32}]} : (!neura.data) -> !neura.data +// MAPPING: %17 = "neura.sel"(%14, %15, %16) {dfg_id = 38 : i32, mapping_locs = [{id = 9 : i32, index_per_ii = 1 : i32, invalid_iterations = 1 : i32, resource = "tile", time_step = 6 : i32, x = 1 : i32, y = 2 : i32}]} : (!neura.data, !neura.data, !neura.data) -> !neura.data +// MAPPING: %18 = "neura.data_mov"(%7) {dfg_id = 10 : i32, mapping_locs = [{id = 34 : i32, index_per_ii = 1 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 1 : i32}, {id = 448 : i32, index_per_ii = 2 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 2 : i32}, {id = 448 : i32, index_per_ii = 3 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 3 : i32}, {id = 448 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 4 : i32}]} : (!neura.data) -> !neura.data +// MAPPING: %19 = "neura.gep"(%18) <{operandSegmentSizes = array}> {dfg_id = 15 : i32, lhs_value = "%arg1", mapping_locs = [{id = 14 : i32, index_per_ii = 0 : i32, invalid_iterations = 1 : i32, resource = "tile", time_step = 5 : i32, x = 2 : i32, y = 3 : i32}]} : (!neura.data) -> !neura.data +// MAPPING: %20 = "neura.data_mov"(%17) {dfg_id = 41 : i32, mapping_locs = [{id = 30 : i32, index_per_ii = 1 : i32, invalid_iterations = 1 : i32, resource = "link", time_step = 6 : i32}]} : (!neura.data) -> !neura.data +// MAPPING: %21 = "neura.data_mov"(%19) {dfg_id = 19 : i32, mapping_locs = [{id = 448 : i32, index_per_ii = 0 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 5 : i32}, {id = 43 : i32, index_per_ii = 1 : i32, invalid_iterations = 1 : i32, resource = "link", time_step = 6 : i32}]} : (!neura.data) -> !neura.data +// MAPPING: "neura.store"(%20, %21) {dfg_id = 42 : i32, mapping_locs = [{id = 13 : i32, index_per_ii = 2 : i32, invalid_iterations = 1 : i32, resource = "tile", time_step = 7 : i32, x = 1 : i32, y = 3 : i32}]} : (!neura.data, !neura.data) -> () +// MAPPING: %22 = "neura.data_mov"(%7) {dfg_id = 9 : i32, mapping_locs = [{id = 320 : i32, index_per_ii = 1 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 1 : i32}]} : (!neura.data) -> !neura.data +// MAPPING: %23 = "neura.add"(%22) {dfg_id = 14 : i32, mapping_locs = [{id = 10 : i32, index_per_ii = 2 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 2 : i32, x = 2 : i32, y = 2 : i32}], rhs_value = 1 : i64} : (!neura.data) -> !neura.data +// MAPPING: %24 = "neura.data_mov"(%23) {dfg_id = 18 : i32, mapping_locs = [{id = 320 : i32, index_per_ii = 2 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 2 : i32}]} : (!neura.data) -> !neura.data +// MAPPING: %25 = "neura.icmp"(%24) <{cmpType = "eq"}> {dfg_id = 21 : i32, mapping_locs = [{id = 10 : i32, index_per_ii = 3 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 3 : i32, x = 2 : i32, y = 2 : i32}], rhs_value = 32 : i64} : (!neura.data) -> !neura.data +// MAPPING: %26 = "neura.data_mov"(%25) {dfg_id = 25 : i32, mapping_locs = [{id = 320 : i32, index_per_ii = 3 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 3 : i32}]} : (!neura.data) -> !neura.data +// MAPPING: %27 = "neura.not"(%26) {dfg_id = 29 : i32, mapping_locs = [{id = 10 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, resource = "tile", time_step = 4 : i32, x = 2 : i32, y = 2 : i32}]} : (!neura.data) -> !neura.data +// MAPPING: %28 = "neura.data_mov"(%23) {dfg_id = 17 : i32, mapping_locs = [{id = 321 : i32, index_per_ii = 2 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 2 : i32}, {id = 321 : i32, index_per_ii = 3 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 3 : i32}, {id = 321 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 1 : i32, resource = "register", time_step = 4 : i32}]} : (!neura.data) -> !neura.data +// MAPPING: %29 = "neura.data_mov"(%27) {dfg_id = 33 : i32, mapping_locs = [{id = 320 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 4 : i32}]} : (!neura.data) -> !neura.data +// MAPPING: %30 = neura.grant_predicate %28, %29 {dfg_id = 37 : i32, mapping_locs = [{id = 10 : i32, index_per_ii = 0 : i32, invalid_iterations = 1 : i32, resource = "tile", time_step = 5 : i32, x = 2 : i32, y = 2 : i32}]} : !neura.data, !neura.data -> !neura.data +// MAPPING: neura.ctrl_mov %30 -> %5 {dfg_id = 40 : i32, mapping_locs = [{id = 320 : i32, index_per_ii = 0 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 5 : i32}]} : !neura.data !neura.data +// MAPPING: %31 = "neura.data_mov"(%4) {dfg_id = 12 : i32, mapping_locs = [{id = 256 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 4 : i32}, {id = 24 : i32, index_per_ii = 0 : i32, invalid_iterations = 1 : i32, resource = "link", time_step = 5 : i32}, {id = 288 : i32, index_per_ii = 1 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 6 : i32}]} : (!neura.data) -> !neura.data +// MAPPING: %32 = "neura.data_mov"(%27) {dfg_id = 32 : i32, mapping_locs = [{id = 34 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 4 : i32}, {id = 43 : i32, index_per_ii = 0 : i32, invalid_iterations = 1 : i32, resource = "link", time_step = 5 : i32}, {id = 42 : i32, index_per_ii = 1 : i32, invalid_iterations = 1 : i32, resource = "link", time_step = 6 : i32}]} : (!neura.data) -> !neura.data +// MAPPING: %33 = neura.grant_predicate %31, %32 {dfg_id = 36 : i32, mapping_locs = [{id = 9 : i32, index_per_ii = 2 : i32, invalid_iterations = 1 : i32, resource = "tile", time_step = 7 : i32, x = 1 : i32, y = 2 : i32}]} : !neura.data, !neura.data -> !neura.data +// MAPPING: neura.ctrl_mov %33 -> %2 {dfg_id = 39 : i32, mapping_locs = [{id = 27 : i32, index_per_ii = 2 : i32, invalid_iterations = 1 : i32, resource = "link", time_step = 7 : i32}, {id = 256 : i32, index_per_ii = 3 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 8 : i32}]} : !neura.data !neura.data +// MAPPING: %34 = "neura.data_mov"(%25) {dfg_id = 23 : i32, mapping_locs = [{id = 33 : i32, index_per_ii = 3 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 3 : i32}, {id = 192 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 4 : i32}]} : (!neura.data) -> !neura.data +// MAPPING: %35 = "neura.data_mov"(%25) {dfg_id = 24 : i32, mapping_locs = [{id = 322 : i32, index_per_ii = 3 : i32, invalid_iterations = 0 : i32, per_tile_register_id = 2 : i32, resource = "register", time_step = 3 : i32}, {id = 33 : i32, index_per_ii = 4 : i32, invalid_iterations = 0 : i32, resource = "link", time_step = 4 : i32}]} : (!neura.data) -> !neura.data +// MAPPING: %36 = neura.grant_predicate %34, %35 {dfg_id = 28 : i32, mapping_locs = [{id = 6 : i32, index_per_ii = 0 : i32, invalid_iterations = 1 : i32, resource = "tile", time_step = 5 : i32, x = 2 : i32, y = 1 : i32}]} : !neura.data, !neura.data -> !neura.data +// MAPPING: %37 = "neura.data_mov"(%36) {dfg_id = 31 : i32, mapping_locs = [{id = 192 : i32, index_per_ii = 0 : i32, invalid_iterations = 1 : i32, per_tile_register_id = 0 : i32, resource = "register", time_step = 5 : i32}]} : (!neura.data) -> !neura.data +// MAPPING: neura.return_void %37 : !neura.data {dfg_id = 35 : i32, mapping_locs = [{id = 6 : i32, index_per_ii = 1 : i32, invalid_iterations = 1 : i32, resource = "tile", time_step = 6 : i32, x = 2 : i32, y = 1 : i32}]} +// MAPPING: neura.yield {dfg_id = 4 : i32} // MAPPING: } // MAPPING: } -// YAML: array_config: +// YAML: array_config: // YAML-NEXT: columns: 4 // YAML-NEXT: rows: 4 // YAML-NEXT: compiled_ii: 5 @@ -90,56 +91,31 @@ // YAML-NEXT: instructions: // YAML-NEXT: - index_per_ii: 0 // YAML-NEXT: operations: -// YAML-NEXT: - opcode: "DATA_MOV" -// YAML-NEXT: id: 16 +// YAML-NEXT: - opcode: "GRANT_PREDICATE" +// YAML-NEXT: id: 28 // YAML-NEXT: time_step: 5 // YAML-NEXT: invalid_iterations: 1 // YAML-NEXT: src_operands: -// YAML-NEXT: - operand: "NORTH" -// YAML-NEXT: color: "RED" -// YAML-NEXT: dst_operands: -// YAML-NEXT: - operand: "$1" +// YAML-NEXT: - operand: "$0" // YAML-NEXT: color: "RED" -// YAML-NEXT: - index_per_ii: 2 -// YAML-NEXT: operations: -// YAML-NEXT: - opcode: "DATA_MOV" -// YAML-NEXT: id: 60001 -// YAML-NEXT: time_step: 2 -// YAML-NEXT: invalid_iterations: 0 -// YAML-NEXT: src_operands: // YAML-NEXT: - operand: "NORTH" // YAML-NEXT: color: "RED" // YAML-NEXT: dst_operands: -// YAML-NEXT: - operand: "EAST" -// YAML-NEXT: color: "RED" -// YAML-NEXT: - opcode: "LOAD" -// YAML-NEXT: id: 37 -// YAML-NEXT: time_step: 7 -// YAML-NEXT: invalid_iterations: 1 -// YAML-NEXT: src_operands: -// YAML-NEXT: - operand: "EAST" -// YAML-NEXT: color: "RED" -// YAML-NEXT: dst_operands: -// YAML-NEXT: - operand: "NORTH" +// YAML-NEXT: - operand: "$0" // YAML-NEXT: color: "RED" -// YAML-NEXT: - index_per_ii: 3 +// YAML-NEXT: - index_per_ii: 1 // YAML-NEXT: operations: -// YAML-NEXT: - opcode: "GRANT_PREDICATE" -// YAML-NEXT: id: 20 -// YAML-NEXT: time_step: 8 +// YAML-NEXT: - opcode: "RETURN_VOID" +// YAML-NEXT: id: 35 +// YAML-NEXT: time_step: 6 // YAML-NEXT: invalid_iterations: 1 // YAML-NEXT: src_operands: // YAML-NEXT: - operand: "$0" // YAML-NEXT: color: "RED" -// YAML-NEXT: - operand: "$1" -// YAML-NEXT: color: "RED" -// YAML-NEXT: dst_operands: -// YAML-NEXT: - operand: "$0" -// YAML-NEXT: color: "RED" // YAML-NEXT: - index_per_ii: 4 // YAML-NEXT: operations: // YAML-NEXT: - opcode: "DATA_MOV" -// YAML-NEXT: id: 15 +// YAML-NEXT: id: 23 // YAML-NEXT: time_step: 4 // YAML-NEXT: invalid_iterations: 0 // YAML-NEXT: src_operands: @@ -148,26 +124,15 @@ // YAML-NEXT: dst_operands: // YAML-NEXT: - operand: "$0" // YAML-NEXT: color: "RED" -// YAML-NEXT: - opcode: "RETURN_VOID" -// YAML-NEXT: id: 27 -// YAML-NEXT: time_step: 9 -// YAML-NEXT: invalid_iterations: 1 -// YAML-NEXT: src_operands: -// YAML-NEXT: - operand: "$0" -// YAML-NEXT: color: "RED" // ASM: # Compiled II: 5 -// ASM: PE(3,2): +// ASM: PE(2,1): // ASM-NEXT: { -// ASM-NEXT: GRANT_ONCE, [#0] -> [WEST, RED] (t=0, inv_iters=0) +// ASM-NEXT: GRANT_PREDICATE, [$0], [NORTH, RED] -> [$0] (t=5, inv_iters=1) // ASM-NEXT: } (idx_per_ii=0) // ASM-NEXT: { -// ASM-NEXT: GEP, [WEST, RED] -> [$0] (t=2, inv_iters=0) -// ASM-NEXT: DATA_MOV, [SOUTH, RED] -> [NORTH, RED] (t=7, inv_iters=1) -// ASM-NEXT: } (idx_per_ii=2) -// ASM-NEXT: { -// ASM-NEXT: LOAD, [$0] -> [$0], [NORTH, RED] (t=3, inv_iters=0) -// ASM-NEXT: } (idx_per_ii=3) +// ASM-NEXT: RETURN_VOID, [$0] (t=6, inv_iters=1) +// ASM-NEXT: } (idx_per_ii=1) // ASM-NEXT: { -// ASM-NEXT: ICMP_SGT, [$0], [#0] -> [SOUTH, RED], [NORTH, RED] (t=4, inv_iters=0) +// ASM-NEXT: DATA_MOV, [NORTH, RED] -> [$0] (t=4, inv_iters=0) // ASM-NEXT: } (idx_per_ii=4) diff --git a/test/multi-cgra/taskflow/nerf_hash_grid/build_modular_mlir.py b/test/multi-cgra/taskflow/nerf_hash_grid/build_modular_mlir.py new file mode 100755 index 00000000..6b79e5ca --- /dev/null +++ b/test/multi-cgra/taskflow/nerf_hash_grid/build_modular_mlir.py @@ -0,0 +1,616 @@ +#!/cluster/home/tangyz/.conda/envs/torch-mlir-env/bin/python +"""Build modular MLIR from NeRF PyTorch components. + +This script compiles individual NeRF components (ray sampler, hash encoder, +MLP) into separate MLIR modules and merges them into a single modular MLIR +file with a top-level orchestrator function. + +Features: + - Automatic function signature extraction + - Signature-based top-level function generation + - MLIR verification with mlir-opt + - Command-line output path specification +""" + +import argparse +import os +import re +import subprocess +import sys + +import torch +import torch_mlir + +from nerf_components import HashGridEncoder +from nerf_components import NeRFMLP +from nerf_components import RaySampler + + +def compile_single_module(module, inputs, module_name): + """Compiles a single PyTorch module to Linalg MLIR. + + Args: + module: PyTorch module to compile. + inputs: Tuple of input tensors for tracing. + module_name: Name for the module (used in debug output). + + Returns: + MLIR string representation, or None if compilation fails. + """ + print(f'\nCompiling module: {module_name}') + print('-' * 70) + print(f' Input shapes: {[x.shape for x in inputs]}') + + try: + mlir_module = torch_mlir.compile( + module, + inputs, + output_type=torch_mlir.OutputType.LINALG_ON_TENSORS, + use_tracing=True) + + mlir_str = str(mlir_module) + + # Save debug file. + debug_file = f'{module_name}_module.mlir' + with open(debug_file, 'w') as f: + f.write(mlir_str) + + print(f' ✓ Compilation successful: {debug_file}') + print(f' Size: {len(mlir_str):,} characters') + + return mlir_str + + except Exception as e: + print(f' ✗ Compilation failed: {e}') + import traceback + traceback.print_exc() + return None + + +def extract_function_signature(mlir_str): + """Extracts function signature from MLIR. + + Args: + mlir_str: MLIR string containing a @forward function. + + Returns: + Tuple of (input_types, output_types, full_signature_string). + Returns (None, None, None) if extraction fails. + """ + # Match function signature: + # func.func @forward(%arg0: type0, ...) -> (type_out0, ...) + pattern = r'func\.func @forward\((.*?)\)\s*->\s*\(([^)]+)\)' + match = re.search(pattern, mlir_str, re.DOTALL) + + if not match: + # Try single return value: -> type + pattern = r'func\.func @forward\((.*?)\)\s*->\s*([^\s{]+)' + match = re.search(pattern, mlir_str, re.DOTALL) + if not match: + print(' ⚠ Cannot extract function signature') + return None, None, None + + inputs_str = match.group(1).strip() + outputs_str = match.group(2).strip() + output_types = [outputs_str] + else: + inputs_str = match.group(1).strip() + outputs_str = match.group(2).strip() + output_types = [t.strip() for t in outputs_str.split(',') if t.strip()] + + # Extract input types. + input_types = [] + for param in inputs_str.split(','): + if ':' in param: + type_part = param.split(':', 1)[1].strip() + input_types.append(type_part) + + full_signature = f"({inputs_str}) -> ({', '.join(output_types)})" + + return input_types, output_types, full_signature + + +def extract_and_rename_function(mlir_str, new_name): + """Extracts @forward function and renames it. + + Args: + mlir_str: MLIR string containing the function. + new_name: New name for the function. + + Returns: + Renamed function as string, or None if extraction fails. + """ + lines = mlir_str.split('\n') + func_lines = [] + brace_count = 0 + in_function = False + + for line in lines: + if 'func.func @forward(' in line: + in_function = True + # Rename function + line = line.replace('func.func @forward', + f'func.func @{new_name}') + + if in_function: + func_lines.append(line) + brace_count += line.count('{') + brace_count -= line.count('}') + + if brace_count == 0 and len(func_lines) > 1: + break + + return '\n'.join(func_lines) if func_lines else None + + +def collect_map_definitions(mlir_str): + """Collects all affine_map definitions from MLIR. + + Args: + mlir_str: MLIR string. + + Returns: + List of tuples (map_name, map_definition) where map_name is like 'map' + or 'map1' and map_definition is the full affine_map expression. + """ + maps = [] + for line in mlir_str.split('\n'): + if line.startswith('#map'): + # Parse: #map = affine_map<...> + # or: #map1 = affine_map<...> + match = re.match(r'#(map\d*)\s*=\s*(.+)', line) + if match: + map_name = match.group(1) + map_def = match.group(2).strip() + maps.append((map_name, map_def)) + return maps + + +def build_global_map_definitions(maps_list1, maps_list2, maps_list3): + """Builds global map definitions and renaming mappings for each module. + + Args: + maps_list1: List of (map_name, map_def) tuples from module 1. + maps_list2: List of (map_name, map_def) tuples from module 2. + maps_list3: List of (map_name, map_def) tuples from module 3. + + Returns: + Tuple of (global_map_lines, rename_map1, rename_map2, rename_map3) where: + - global_map_lines: List of global map definition strings. + - rename_mapX: Dict mapping old map name to new global map name for module X. + """ + # Track unique map definitions and assign global names. + unique_maps = {} # map_def -> global_name + global_map_lines = [] + global_counter = 0 + + # Process all maps from all modules. + all_module_maps = [ + ('module1', maps_list1), + ('module2', maps_list2), + ('module3', maps_list3), + ] + + rename_maps = [{}, {}, {}] # One dict per module. + + for module_idx, (module_name, maps_list) in enumerate(all_module_maps): + for old_name, map_def in maps_list: + if map_def not in unique_maps: + # New unique map definition - assign global name. + if global_counter == 0: + global_name = 'map' + else: + global_name = f'map{global_counter}' + global_counter += 1 + + unique_maps[map_def] = global_name + global_map_lines.append(f'#{global_name} = {map_def}') + + # Record the renaming: old_name -> global_name. + global_name = unique_maps[map_def] + rename_maps[module_idx][old_name] = global_name + + return global_map_lines, rename_maps[0], rename_maps[1], rename_maps[2] + + +def rename_maps_in_function(func_str, rename_map): + """Renames map references in a function body. + + Args: + func_str: Function definition as string. + rename_map: Dict mapping old map names to new map names. + + Returns: + Function string with renamed map references. + """ + # Use a callback function for atomic replacements to avoid chaining + def replace_callback(match): + map_name = match.group(1) # Capture the map name without '#' + return '#' + rename_map.get(map_name, map_name) + + # Build pattern that matches any of the old map names + # Sort by length (descending) to match longer names first (e.g., map10 before map1) + sorted_names = sorted(rename_map.keys(), key=len, reverse=True) + if not sorted_names: + return func_str + + # Create pattern: #(map10|map1|map|...)(?=\W|$) + pattern = r'#(' + '|'.join(re.escape(name) for name in sorted_names) + r')(?=\W|$)' + + # Replace all matches in a single pass (atomic operation) + result = re.sub(pattern, replace_callback, func_str) + + return result + + +def build_wrapper_function(sig1, sig2, sig3): + """Generates top-level orchestrator function based on signatures. + + Args: + sig1: Ray sampler signature (input_types, output_types, full_sig). + sig2: Hash encoder signature. + sig3: NeRF MLP signature. + + Returns: + Top-level function as string. + """ + in1, out1, _ = sig1 + in2, out2, _ = sig2 + in3, out3, _ = sig3 + + # Validate type compatibility. + print('\nValidating type compatibility:') + print(f' ray_sampler output: {out1}') + print(f' hash_encoder input: {in2}') + print(f' hash_encoder output: {out2}') + print(f' nerf_mlp input: {in3}') + print(f' nerf_mlp output: {out3}') + + if len(out1) != 1 or len(in2) != 1: + print(' ⚠ Warning: ray_sampler → hash_encoder type mismatch') + if len(out2) != 1 or len(in3) < 1: + print(' ⚠ Warning: hash_encoder → nerf_mlp type mismatch') + + # Generate top-level function. + # Inputs: Same as ray_sampler. + # Outputs: Same as nerf_mlp. + wrapper_inputs = ', '.join([f'%arg{i}: {t}' for i, t in enumerate(in1)]) + wrapper_outputs = ', '.join(out3) + + wrapper = f''' func.func @nerf_forward({wrapper_inputs}) + -> ({wrapper_outputs}) {{ + // ================================================ + // Task 1: Ray Sampling + // ================================================ + %positions = func.call @ray_sampler_func({', '.join([f'%arg{i}' for i in range(len(in1))])}) + : ({', '.join(in1)}) -> {out1[0]} + + // ================================================ + // Task 2: Hash Encoding + // ================================================ + %encoded = func.call @hash_encoder_func(%positions) + : ({out1[0]}) -> {out2[0]} + + // ================================================ + // Task 3: MLP Inference + // ================================================ +''' + + # Handle MLP's multiple inputs (encoded + view_dirs). + if len(in3) == 2: + wrapper += f''' %density, %rgb = func.call @nerf_mlp_func(%encoded, %arg{len(in1)-1}) + : ({out2[0]}, {in1[-1]}) -> ({', '.join(out3)}) + + return %density, %rgb : {', '.join(out3)} + }} +''' + else: + wrapper += f''' %result = func.call @nerf_mlp_func(%encoded) + : ({out2[0]}) -> ({', '.join(out3)}) + + return %result : {', '.join(out3)} + }} +''' + + return wrapper + + +def merge_mlir_modules(mlir1, mlir2, mlir3): + """Merges three MLIR modules into a single modular MLIR file. + + Args: + mlir1: MLIR string for ray sampler. + mlir2: MLIR string for hash encoder. + mlir3: MLIR string for NeRF MLP. + + Returns: + Merged MLIR string, or None if merging fails. + """ + print('\n' + '=' * 70) + print('Merging Modules') + print('=' * 70) + + # Extract signatures. + print('\nExtracting function signatures...') + sig1 = extract_function_signature(mlir1) + sig2 = extract_function_signature(mlir2) + sig3 = extract_function_signature(mlir3) + + if None in [sig1[0], sig2[0], sig3[0]]: + print('✗ Failed to extract function signatures') + return None + + print(' ✓ Signature extraction successful') + + # Extract function definitions. + print('\nExtracting function definitions...') + func1 = extract_and_rename_function(mlir1, 'ray_sampler_func') + func2 = extract_and_rename_function(mlir2, 'hash_encoder_func') + func3 = extract_and_rename_function(mlir3, 'nerf_mlp_func') + + if not all([func1, func2, func3]): + print('✗ Failed to extract function definitions') + return None + + print(' ✓ Function extraction successful') + + # Collect and rename all map definitions. + print('\nCollecting affine_map definitions...') + maps1 = collect_map_definitions(mlir1) + maps2 = collect_map_definitions(mlir2) + maps3 = collect_map_definitions(mlir3) + + print(f' Module 1: {len(maps1)} maps') + print(f' Module 2: {len(maps2)} maps') + print(f' Module 3: {len(maps3)} maps') + + # Build global map definitions and rename mappings. + print('\nBuilding global map definitions with renaming...') + global_map_lines, rename_map1, rename_map2, rename_map3 = \ + build_global_map_definitions(maps1, maps2, maps3) + + print(f' ✓ Created {len(global_map_lines)} unique global map definitions') + + # Rename map references in each function. + print('\nRenaming map references in functions...') + func1 = rename_maps_in_function(func1, rename_map1) + func2 = rename_maps_in_function(func2, rename_map2) + func3 = rename_maps_in_function(func3, rename_map3) + print(' ✓ Map references renamed successfully') + + # Generate top-level function. + print('\nGenerating top-level function...') + wrapper = build_wrapper_function(sig1, sig2, sig3) + print(' ✓ Top-level function generation successful') + + # Assemble final MLIR. + merged = '\n'.join(global_map_lines) + '\n' if global_map_lines else '' + merged += 'module {\n' + merged += (' ml_program.global private mutable @global_seed' + '(dense<0> : tensor) : tensor\n\n') + merged += ' // ============================================\n' + merged += ' // Module 1: Ray Sampler\n' + merged += ' // ============================================\n' + merged += indent_mlir(func1, 2) + '\n\n' + merged += ' // ============================================\n' + merged += ' // Module 2: Hash Grid Encoder\n' + merged += ' // ============================================\n' + merged += indent_mlir(func2, 2) + '\n\n' + merged += ' // ============================================\n' + merged += ' // Module 3: NeRF MLP\n' + merged += ' // ============================================\n' + merged += indent_mlir(func3, 2) + '\n\n' + merged += ' // ============================================\n' + merged += ' // Top-level Function (Auto-generated)\n' + merged += ' // ============================================\n' + merged += wrapper + '\n' + merged += '}\n' + + return merged + + +def fix_tensor_expand_shape_syntax(mlir_str): + """Fixes tensor.expand_shape syntax for LLVM 20+ compatibility. + + Converts old syntax: + %x = tensor.expand_shape %y [[0, 1]] : tensor<16xf32> into tensor<1x16xf32> + + To new syntax: + %x = tensor.expand_shape %y [[0, 1]] output_shape [1, 16] : tensor<16xf32> into tensor<1x16xf32> + + Args: + mlir_str: MLIR string to fix. + + Returns: + Fixed MLIR string. + """ + lines = mlir_str.split('\n') + fixed_lines = [] + + for line in lines: + # Match tensor.expand_shape pattern + # Pattern: tensor.expand_shape %var [[...]] : tensor<...> into tensor + match = re.search( + r'(.*tensor\.expand_shape\s+%\S+\s+\[\[.*?\]\])\s*:\s*(tensor<[^>]+>)\s+into\s+tensor<([^>]+)>', + line + ) + + if match: + prefix = match.group(1) # Everything before ':' + input_type = match.group(2) # tensor<16xf32> + output_shape = match.group(3) # 1x16xf32 + + # Extract shape dimensions from output_shape + # Remove type suffix (e.g., 'xf32', 'xi64') + shape_str = re.sub(r'x[a-z]\w+$', '', output_shape) + # Split by 'x' to get dimensions + dims = shape_str.split('x') + + # Build output_shape attribute + output_shape_attr = f"output_shape [{', '.join(dims)}]" + + # Reconstruct the line with output_shape attribute + fixed_line = f"{prefix} {output_shape_attr} : {input_type} into tensor<{output_shape}>" + + # Preserve any trailing content (like comments) + trailing = line[match.end():] + fixed_line += trailing + + fixed_lines.append(fixed_line) + else: + # No match, keep original line + fixed_lines.append(line) + + return '\n'.join(fixed_lines) + + +def indent_mlir(mlir_str, spaces): + """Adds indentation to MLIR string. + + Args: + mlir_str: MLIR string to indent. + spaces: Number of spaces for indentation. + + Returns: + Indented MLIR string. + """ + lines = mlir_str.split('\n') + indent = ' ' * spaces + return '\n'.join(indent + line if line.strip() else line for line in lines) + + +def verify_mlir(mlir_file): + """Verifies MLIR file using mlir-opt. + + Args: + mlir_file: Path to MLIR file to verify. + + Returns: + True if verification succeeds, False otherwise. + """ + print('\nVerifying MLIR file...') + + mlir_opt = '../../../../../build/tools/mlir-neura-opt/mlir-neura-opt' + + if not os.path.exists(mlir_opt): + print(' ⚠ mlir-neura-opt not found, skipping verification') + return True + + result = subprocess.run( + [mlir_opt, mlir_file, '--verify-each=true', '-o', '/dev/null'], + capture_output=True, + text=True) + + if result.returncode == 0: + print(' ✅ MLIR verification passed!') + return True + else: + print(' ✗ MLIR verification failed:') + print(result.stderr) + return False + + +def main(): + """Main workflow.""" + # Parse command-line arguments. + parser = argparse.ArgumentParser( + description='Build modular MLIR from NeRF components') + parser.add_argument( + '--output', + '-o', + default='nerf_modular_3funcs.mlir', + help='Output file path (default: nerf_modular_3funcs.mlir)') + args = parser.parse_args() + + print('=' * 70) + print('Build Modular MLIR (Auto Signature Extraction)') + print('=' * 70) + print(f'Output file: {args.output}') + print('=' * 70) + + device = torch.device('cpu') + + # Compile 3 modules. + sampler = RaySampler(num_samples=16) + sampler.eval() + mlir1 = compile_single_module(sampler, + (torch.randn(2, 3), torch.randn(2, 3)), + 'ray_sampler') + + encoder = HashGridEncoder( + num_levels=2, features_per_level=2, log2_hashmap_size=8) + encoder.eval() + mlir2 = compile_single_module(encoder, (torch.randn(2, 16, 3),), + 'hash_encoder') + + mlp = NeRFMLP(input_dim=4, hidden_dim=32, num_layers=2) + mlp.eval() + mlir3 = compile_single_module( + mlp, (torch.randn(2, 16, 4), torch.randn(2, 3)), 'nerf_mlp') + + if not all([mlir1, mlir2, mlir3]): + print('\n✗ Some modules failed to compile') + return 1 + + # Merge modules. + merged = merge_mlir_modules(mlir1, mlir2, mlir3) + + if not merged: + print('\n✗ Module merging failed') + return 1 + + # Fix tensor.expand_shape syntax for LLVM 20+ compatibility. + print('\nApplying syntax fixes for LLVM 20+ compatibility...') + merged = fix_tensor_expand_shape_syntax(merged) + + if 'output_shape [' in merged: + print(' ✓ Fixed tensor.expand_shape syntax') + else: + print(' ℹ No tensor.expand_shape operations found') + + # Save output. + output_file = args.output + + # Ensure output directory exists. + output_dir = os.path.dirname(output_file) + if output_dir and not os.path.exists(output_dir): + os.makedirs(output_dir, exist_ok=True) + + with open(output_file, 'w') as f: + f.write(merged) + + print('\n' + '=' * 70) + print('✓ Modular MLIR generated successfully!') + print('=' * 70) + print(f' File: {output_file}') + print(f' Size: {len(merged):,} characters') + + # Statistics. + num_funcs = merged.count('func.func') + num_calls = merged.count('func.call') + + print('\nStructure:') + print(f' Function definitions: {num_funcs} (3 modules + 1 top-level)') + print(f' Function calls: {num_calls} (top-level calls 3 modules)') + + # Verification. + if verify_mlir(output_file): + print('\n' + '=' * 70) + print('Next Step: Compile to Taskflow') + print('=' * 70) + print(f'\nmlir-neura-opt {output_file} \\') + print(' --one-shot-bufferize \\') + print(' --pass-pipeline=\'func.func(convert-linalg-to-affine-loops)\' \\') + print(' --convert-affine-to-taskflow \\') + print(' -o nerf_taskflow_3tasks.mlir') + print('\nExpected: Generate 3 taskflow.task operations') + + return 0 + else: + print('\n⚠ MLIR verification failed, but file was generated') + print(f' You can try manual inspection: {output_file}') + return 1 + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/test/multi-cgra/taskflow/nerf_hash_grid/nerf_components.py b/test/multi-cgra/taskflow/nerf_hash_grid/nerf_components.py new file mode 100644 index 00000000..309e0cca --- /dev/null +++ b/test/multi-cgra/taskflow/nerf_hash_grid/nerf_components.py @@ -0,0 +1,369 @@ +"""NeRF components for modular MLIR compilation. + +This module contains PyTorch implementations of NeRF components: + - RaySampler: Samples 3D positions along rays + - HashGridEncoder: Multi-resolution hash encoding (Instant-NGP style) + - NeRFMLP: Neural network for density and color prediction + - HashGridNeRF: Complete NeRF pipeline + +These components are designed to be compiled individually to MLIR and then +combined into a modular heterogeneous computing system. +""" + +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class RaySampler(nn.Module): + """Samples 3D positions along rays for volume rendering.""" + + def __init__(self, num_samples=64, near=2.0, far=6.0): + """Initializes ray sampler. + + Args: + num_samples: Number of samples per ray. + near: Near plane distance. + far: Far plane distance. + """ + super().__init__() + self.num_samples = num_samples + # Register constants as buffers to avoid torch.constant issues. + self.register_buffer('near', torch.tensor(near, dtype=torch.float32)) + self.register_buffer('far', torch.tensor(far, dtype=torch.float32)) + + def forward(self, rays_o, rays_d): + """Samples positions along rays. + + Args: + rays_o: Ray origins [batch_size, 3]. + rays_d: Ray directions [batch_size, 3]. + + Returns: + Sampled 3D positions [batch_size, num_samples, 3]. + """ + batch_size = rays_o.shape[0] + + # Manually implement linspace for compatibility. + # Original: t_vals = torch.linspace(self.near, self.far, ...) + # Compatible: Use arange + scaling. + indices = torch.arange( + self.num_samples, device=rays_o.device, dtype=rays_o.dtype) + step = (self.far - self.near) / (self.num_samples - 1) + t_vals = self.near + indices * step # [num_samples] + + t_vals = t_vals.unsqueeze(0).expand(batch_size, -1) # [B, N] + + # positions = rays_o + t * rays_d + positions = (rays_o.unsqueeze(1) + + t_vals.unsqueeze(2) * rays_d.unsqueeze(1)) + + return positions # [batch_size, num_samples, 3] + + +class HashGridEncoder(nn.Module): + """Multi-resolution hash grid encoding (Instant-NGP style).""" + + def __init__(self, + num_levels=16, + features_per_level=2, + log2_hashmap_size=19, + base_resolution=16, + finest_resolution=512): + """Initializes hash grid encoder. + + Args: + num_levels: Number of resolution levels. + features_per_level: Feature dimension per level. + log2_hashmap_size: Log2 of hash table size. + base_resolution: Coarsest grid resolution. + finest_resolution: Finest grid resolution. + """ + super().__init__() + self.num_levels = num_levels + self.features_per_level = features_per_level + self.log2_hashmap_size = log2_hashmap_size + self.hashmap_size = 2**log2_hashmap_size + self.base_resolution = base_resolution + self.finest_resolution = finest_resolution + + # Compute resolution growth factor per level. + self.b = np.exp( + (np.log(finest_resolution) - np.log(base_resolution)) / + (num_levels - 1)) + + # Hash tables for each level (learnable parameters). + self.hash_tables = nn.ParameterList([ + nn.Parameter( + torch.randn(self.hashmap_size, features_per_level) * 0.01) + for _ in range(num_levels) + ]) + + def hash_function(self, coords, level): + """Hashes 3D integer coordinates to hash table indices. + + Uses modulo operation instead of bitwise operations for compatibility. + Converts to int32 for better compatibility with downstream operations. + + Args: + coords: Integer coordinates [batch_size, 3]. + level: Resolution level index. + + Returns: + Hash indices [batch_size]. + """ + # Convert to int32 for compatibility. + x = coords[:, 0].int() + y = coords[:, 1].int() + z = coords[:, 2].int() + + # Spatial hash using prime numbers (avoid int32 overflow). + hashed = x * 1 + y * 73856093 + z * 19349663 + + # Use modulo instead of bitwise AND. + return hashed % self.hashmap_size + + def grid_sample_3d(self, positions, level): + """Samples features from hash grid at given level using trilinear + interpolation. + + Args: + positions: Normalized positions [batch_size, num_samples, 3] in [0, 1]. + level: Resolution level index. + + Returns: + Interpolated features [batch_size, num_samples, features_per_level]. + """ + batch_size, num_samples, _ = positions.shape + resolution = int(np.floor(self.base_resolution * (self.b**level))) + + # Scale positions to grid resolution. + scaled_pos = positions * (resolution - 1) # [B, N, 3] + + # Get integer grid coordinates (8 corners of cube). + base_coords = torch.floor(scaled_pos).int() # [B, N, 3] - int32 + + # Trilinear interpolation weights. + frac = scaled_pos - base_coords.float() # [B, N, 3] + + # Flatten batch and samples for processing. + base_coords_flat = base_coords.view(-1, 3) # [B*N, 3] + frac_flat = frac.view(-1, 3) # [B*N, 3] + + # Sample from 8 corners and compute trilinear interpolation. + features_list = [] + for dx in [0, 1]: + for dy in [0, 1]: + for dz in [0, 1]: + # Compute offset coordinates. + offset_x = base_coords_flat[:, 0] + dx + offset_y = base_coords_flat[:, 1] + dy + offset_z = base_coords_flat[:, 2] + dz + + # Stack into coordinates. + corner_coords = torch.stack([offset_x, offset_y, offset_z], dim=1) + + # Hash coordinates to table indices. + indices = self.hash_function(corner_coords, level) # [B*N] + + # Convert to long for tensor indexing. + indices = indices.long() + + # Lookup features from hash table. + corner_features = self.hash_tables[level][indices] # [B*N, F] + + # Compute trilinear weight. + weight = 1.0 + weight *= (1 - frac_flat[:, 0]) if dx == 0 else frac_flat[:, 0] + weight *= (1 - frac_flat[:, 1]) if dy == 0 else frac_flat[:, 1] + weight *= (1 - frac_flat[:, 2]) if dz == 0 else frac_flat[:, 2] + + features_list.append(corner_features * weight.unsqueeze(1)) + + # Sum contributions from all corners. + interpolated_features = sum(features_list) # [B*N, F] + + # Reshape back. + interpolated_features = interpolated_features.view( + batch_size, num_samples, self.features_per_level) + + return interpolated_features + + def forward(self, positions): + """Encodes 3D positions with multi-resolution hash encoding. + + Args: + positions: 3D positions [batch_size, num_samples, 3] in range [-1, 1]. + + Returns: + Encoded features [batch_size, num_samples, num_levels * + features_per_level]. + """ + # Normalize positions to [0, 1]. + positions_normalized = (positions + 1.0) / 2.0 + + # Encode at all levels. + encoded_features = [] + for level in range(self.num_levels): + level_features = self.grid_sample_3d(positions_normalized, level) + encoded_features.append(level_features) + + # Concatenate features from all levels. + encoded = torch.cat(encoded_features, dim=-1) # [B, N, L*F] + + return encoded + + +class NeRFMLP(nn.Module): + """MLP for NeRF: predicts density and color from encoded features.""" + + def __init__(self, input_dim=32, hidden_dim=64, num_layers=3): + """Initializes NeRF MLP. + + Args: + input_dim: Input feature dimension. + hidden_dim: Hidden layer dimension. + num_layers: Number of hidden layers. + """ + super().__init__() + + # Density network. + self.density_net = nn.ModuleList([nn.Linear(input_dim, hidden_dim)]) + for _ in range(num_layers - 1): + self.density_net.append(nn.Linear(hidden_dim, hidden_dim)) + self.density_out = nn.Linear(hidden_dim, 1) + + # Color network (conditioned on view direction). + self.color_net = nn.ModuleList( + [nn.Linear(hidden_dim + 3, hidden_dim)]) # +3 for view direction + for _ in range(num_layers - 2): + self.color_net.append(nn.Linear(hidden_dim, hidden_dim)) + self.color_out = nn.Linear(hidden_dim, 3) + + def forward(self, encoded_features, view_dirs): + """Predicts density and color from encoded features. + + Args: + encoded_features: Encoded position features [batch_size, num_samples, + input_dim]. + view_dirs: View directions [batch_size, 3]. + + Returns: + Tuple of: + density: Volume density [batch_size, num_samples, 1]. + rgb: RGB color [batch_size, num_samples, 3]. + """ + batch_size, num_samples, _ = encoded_features.shape + + # Density prediction. + x = encoded_features + for layer in self.density_net: + x = torch.relu(layer(x)) + density = self.density_out(x) # [B, N, 1] + + # Get features for color prediction. + density_features = x # [B, N, hidden_dim] + + # Expand view directions. + view_dirs_expanded = view_dirs.unsqueeze(1).expand( + -1, num_samples, -1) # [B, N, 3] + + # Concatenate density features with view directions. + color_input = torch.cat([density_features, view_dirs_expanded], dim=-1) + + # Color prediction. + x = color_input + for layer in self.color_net: + x = torch.relu(layer(x)) + rgb = torch.sigmoid(self.color_out(x)) # [B, N, 3] + + return density, rgb + + +class HashGridNeRF(nn.Module): + """Complete NeRF pipeline with hash grid encoding.""" + + def __init__(self, + num_samples=64, + num_levels=16, + features_per_level=2, + hidden_dim=64): + """Initializes complete NeRF model. + + Args: + num_samples: Number of samples per ray. + num_levels: Number of hash grid levels. + features_per_level: Features per hash grid level. + hidden_dim: MLP hidden dimension. + """ + super().__init__() + self.ray_sampler = RaySampler(num_samples=num_samples) + self.hash_encoder = HashGridEncoder( + num_levels=num_levels, features_per_level=features_per_level) + self.nerf_mlp = NeRFMLP( + input_dim=num_levels * features_per_level, hidden_dim=hidden_dim) + + def forward(self, rays_o, rays_d): + """Full NeRF forward pass. + + Args: + rays_o: Ray origins [batch_size, 3]. + rays_d: Ray directions [batch_size, 3]. + + Returns: + Tuple of: + density: Volume density [batch_size, num_samples, 1]. + rgb: RGB color [batch_size, num_samples, 3]. + """ + # 1. Sample positions along rays. + positions = self.ray_sampler(rays_o, rays_d) # [B, N, 3] + + # 2. Hash encoding. + encoded = self.hash_encoder(positions) # [B, N, L*F] + + # 3. MLP prediction. + density, rgb = self.nerf_mlp(encoded, rays_d) + + return density, rgb + + +if __name__ == '__main__': + print('=' * 70) + print('NeRF Components Test') + print('=' * 70) + + # Test RaySampler. + print('\n1. Testing RaySampler...') + sampler = RaySampler(num_samples=16) + rays_o = torch.randn(2, 3) + rays_d = torch.randn(2, 3) + positions = sampler(rays_o, rays_d) + print(f'✓ RaySampler output shape: {positions.shape}') + + # Test HashGridEncoder. + print('\n2. Testing HashGridEncoder...') + encoder = HashGridEncoder( + num_levels=2, features_per_level=2, log2_hashmap_size=8) + encoded = encoder(positions) + print(f'✓ HashGridEncoder output shape: {encoded.shape}') + + # Test NeRFMLP. + print('\n3. Testing NeRFMLP...') + mlp = NeRFMLP(input_dim=4, hidden_dim=32) + density, rgb = mlp(encoded, rays_d) + print(f'✓ NeRFMLP density shape: {density.shape}') + print(f'✓ NeRFMLP rgb shape: {rgb.shape}') + + # Test full model. + print('\n4. Testing Complete Model...') + model = HashGridNeRF( + num_samples=16, num_levels=2, features_per_level=2, hidden_dim=32) + density, rgb = model(rays_o, rays_d) + print('✓ Complete model works!') + print(f' Density shape: {density.shape}') + print(f' RGB shape: {rgb.shape}') + + print('\n' + '=' * 70) + print('All tests passed!') + print('=' * 70) diff --git a/test/multi-cgra/taskflow/nerf_hash_grid/test_assign_target.mlir b/test/multi-cgra/taskflow/nerf_hash_grid/test_assign_target.mlir new file mode 100644 index 00000000..dac103be --- /dev/null +++ b/test/multi-cgra/taskflow/nerf_hash_grid/test_assign_target.mlir @@ -0,0 +1,26 @@ +// RUN: mkdir -p %S/Output +// RUN: cd %S && python build_modular_mlir.py --output %S/Output/nerf_modular_3funcs.mlir +// RUN: mlir-neura-opt %S/Output/nerf_modular_3funcs.mlir --assign-task-target -o %S/Output/nerf_with_target.mlir +// RUN: mlir-neura-opt %S/Output/nerf_modular_3funcs.mlir --assign-task-target | FileCheck %s + +// Test AssignTaskTarget pass on NeRF modular functions +// This test verifies the complete workflow: +// 1. Generate modular MLIR from PyTorch NeRF components using build_modular_mlir.py +// 2. Run AssignTaskTarget pass to assign hardware targets to functions +// 3. Verify that targets are correctly assigned based on function names: +// - ray_sampler_func -> CPU (sampling operations) +// - hash_encoder_func -> DOE (encoding operations) +// - nerf_mlp_func -> CGRA (neural network inference) +// - nerf_forward -> CPU (top-level coordinator) + +// CHECK-LABEL: func.func @ray_sampler_func +// CHECK-SAME: attributes {target.device = "cpu"} + +// CHECK-LABEL: func.func @hash_encoder_func +// CHECK-SAME: attributes {target.device = "doe"} + +// CHECK-LABEL: func.func @nerf_mlp_func +// CHECK-SAME: attributes {target.device = "cgra"} + +// CHECK-LABEL: func.func @nerf_forward +// CHECK-SAME: attributes {target.device = "cpu"}