Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
18be6d2
[SRC] WIP: Add multi-tile support in xbar.
DiyouS Nov 21, 2025
93a103a
[SRC] WIP: Update pkg for multi-tile configuration.
DiyouS Nov 26, 2025
b7ab5e3
[SRC] WIP: Add group-level.
DiyouS Nov 26, 2025
b70f183
[SRC] WIP: add group-level.
DiyouS Nov 26, 2025
8132a9c
WIP: [SRC] multi-tile support.
DiyouS Dec 11, 2025
7b35ab6
[SRC] WIP: Move prepheral to the cluster level.
DiyouS Jan 5, 2026
cfc71fb
[TB] Fix a bug in testbench of incorrectly using negedge instead of p…
DiyouS Jan 5, 2026
36b6554
[SRC] WIP: Adapt package for readability, keep working on multi-tile …
DiyouS Jan 6, 2026
2efeaa5
[SRC] WIP: Adapt package for readability
DiyouS Jan 7, 2026
5290115
[SRC] WIP: continue work on multi-tile support.
DiyouS Jan 7, 2026
683f7f5
[SRC] Fix the core_id and tile_id.
DiyouS Jan 8, 2026
a7a78f7
[SRC] Change hardware barrier to the new two-level hardware barrier f…
DiyouS Jan 9, 2026
dfe9b0a
[SW] Add software for insitu cache byte/half-word/word access test.
Aquaticfuller Jan 7, 2026
835cc34
[SW] Add more info output for cache byte access test.
Aquaticfuller Jan 8, 2026
bf08961
[RTL] wire byte strobes into L1 data banks
Aquaticfuller Jan 9, 2026
36cf9b2
Update the insitu-cache dep which supports byte access.
Aquaticfuller Jan 9, 2026
13485e5
[SW] Add vector byte/half-word test.
Aquaticfuller Jan 9, 2026
6b49e50
[SCRIPT] Update the auto-benchmark scripts: 1.add new benchmarks; 2. …
Aquaticfuller Jan 13, 2026
b322716
[Lint] Fix a line length exceeds max linting issue.
Aquaticfuller Jan 13, 2026
1b5999a
[SRC] WIP: connect the cross-tile interconnection.
DiyouS Jan 13, 2026
4f16064
[SRC] Fix several warnings in simulation.
DiyouS Jan 13, 2026
fe27ce3
[SRC] Fix a comb loop in multi-tile configuration.
DiyouS Jan 15, 2026
8c3b6d9
[SRC] Support two-tile conifguration.
DiyouS Jan 15, 2026
541c6e3
[SRC] Clean code for multi-tile support.
DiyouS Jan 16, 2026
ca7d98f
[Runtime] Add tile id and number of tile functions in runtime.
DiyouS Jan 16, 2026
64f49ac
[CFG] Switch configurations to use 4 tile 16 cores by default.
DiyouS Jan 16, 2026
001f375
[SRC] Fix ID width mismatch and port width mismatch
DiyouS Jan 19, 2026
a9c96a2
[SW] WIP: Update dotp algorithm for better performance in multi-tile …
DiyouS Jan 19, 2026
0fcfcf5
[SW] Update the multi-tile dotp kernel.
DiyouS Jan 21, 2026
11e1cc1
[SW] Add optimized gemv kernel for multi-tile.
DiyouS Jan 21, 2026
7fa53a8
[CI] Update CI for multi-tile support.
DiyouS Jan 27, 2026
01a6604
[CI] Increase timeout to 5h.
DiyouS Jan 27, 2026
d798fd5
[Deps] Update insitu-cache dependancy.
DiyouS Jan 29, 2026
b3019f6
[CI] Remove +acc for CI flow.
DiyouS Jan 29, 2026
af442d5
[Periph] Move memory-mapped registers outside of spatz cluster.
DiyouS Jan 29, 2026
70458d7
[Periph] Move peripheral and bootrom outside of spatz cluster. Now fu…
DiyouS Jan 29, 2026
ae3097f
[Periph] Remove unused registsers.
DiyouS Jan 29, 2026
501040c
[Periph] Add private partition register
DiyouS Jan 30, 2026
5153acc
[SRC] WIP: Add cache partitioning support.
DiyouS Jan 30, 2026
78306df
[SW] Update load-store test.
DiyouS Jan 30, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ stages:
build-vsim:
extends: .base
stage: build
timeout: 5h
script:
- echo "Using CC=$CC"
- echo "Using CXX=$CXX"
Expand All @@ -44,6 +45,4 @@ build-vsim:

artifacts:
paths:
- software/build
- sim/bin
- util/auto-benchmark/logs
4 changes: 2 additions & 2 deletions Bender.lock
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ packages:
- common_verification
- register_interface
insitu-cache:
revision: 04f72a7ac7e9091f1820f0dac59bb778b134d7f7
revision: fa761ddebc946f9b46509d84945bf41ee1a9ec49
version: null
source:
Git: https://github.com/pulp-platform/Insitu-Cache.git
Expand All @@ -97,7 +97,7 @@ packages:
- common_cells
- tech_cells_generic
spatz:
revision: b60f7aee5e3be31facf566955aadbd5b6eac5da3
revision: 94ff5f6ca70e4dfef6168e0ac25b0bdd88e40132
version: null
source:
Git: https://github.com/pulp-platform/spatz.git
Expand Down
11 changes: 11 additions & 0 deletions Bender.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,23 @@ sources:
- hardware/src/tcdm_cache_interco.sv
- hardware/src/tcdm_id_remapper.sv
- hardware/src/spatz_cache_amo.sv
# Memory-mapped register
- hardware/cachepool_peripheral/cachepool_peripheral_reg_pkg.sv
- hardware/cachepool_peripheral/cachepool_peripheral_reg_top.sv
- hardware/cachepool_peripheral/cachepool_peripheral.sv
# Bootrom
- hardware/bootrom/bootrom.sv
# Barrier
- hardware/src/cachepool_tile_barrier.sv
- hardware/src/cachepool_cluster_barrier.sv
# Level 1
- hardware/src/cachepool_pkg.sv
- hardware/src/cachepool_cc.sv
# Level 2
- hardware/src/cachepool_tile.sv
# Level 3
- hardware/src/cachepool_group.sv

- hardware/src/cachepool_cluster.sv
# Level 4
- hardware/tb/cachepool_cluster_wrapper.sv
Expand Down
68 changes: 58 additions & 10 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ WORK_DIR := ${SIM_DIR}/work
SIMBIN_DIR := ${SIM_DIR}/bin
DPI_PATH := ${HARDWARE_DIR}/tb/dpi
DPI_LIB ?= work-dpi
# Set to 0 will turn off +acc target for faster simulation
DEBUG ?= 1

## Bender usage (binary comes from toolchain.mk install)
BENDER ?= ${BENDER_INSTALL_DIR}/bender
Expand All @@ -66,6 +68,7 @@ CACHE_PATH := $(shell [ -x "$(BENDER)" ] && $(BENDER) path insitu-cac
CFG_DIR ?= ${CACHEPOOL_DIR}/config
config ?= cachepool_512


# Compiler choice for SW cmake
COMPILER ?= llvm

Expand Down Expand Up @@ -123,12 +126,11 @@ init:
.PHONY: quick-tool
quick-tool:
ln -sf /home/dishen/cachepool-32b/install $(CACHEPOOL_DIR)/install
# ln -sf /usr/scratch2/calanda/diyou/toolchain/cachepool-32b/install $(CACHEPOOL_DIR)/install

# Build bootrom and spatz (depends on opcodes repo being present)
.PHONY: generate
generate: update_opcodes gen-spatz-cfg
$(MAKE) -C $(SPZ_CLS_DIR) generate bootrom SPATZ_CLUSTER_CFG=${CFG_DIR}/cachepool.hjson
$(MAKE) -C $(SPZ_CLS_DIR) generate SPATZ_CLUSTER_CFG=${CFG_DIR}/cachepool.hjson

.PHONY: cache-init
cache-init:
Expand All @@ -138,6 +140,50 @@ else
@echo "insitu-cache path unavailable (bender not installed yet?)"
endif

# Paths

# Paths
BOOTROM_DIR := $(HARDWARE_DIR)/bootrom
SCRIPTS_DIR := $(CACHEPOOL_DIR)/util/scripts
TPL_DIR := $(SPATZ_DIR)/hw/system/spatz_cluster/test

.PHONY: bootrom
bootrom: $(BOOTROM_DIR)/bootrom.sv

# STEP 1: Generate the C++ bootdata file from HJSON
$(BOOTROM_DIR)/bootdata_bootrom.cc: $(SCRIPTS_DIR)/generate_bootdata.py $(HJSON_OUT)
${PYTHON} $< -c $(HJSON_OUT) -d $(BOOTROM_DIR) -t bootdata_bootrom.cc.tpl -o $@

# Rule for bootdata.cc
$(BOOTROM_DIR)/bootdata.cc: $(SCRIPTS_DIR)/generate_bootdata.py $(HJSON_OUT)
${PYTHON} $< -c $(HJSON_OUT) -d $(BOOTROM_DIR) -t bootdata.cc.tpl -o $@

# STEP 2: Compile to ELF, then create Disassembly and Raw Binary
$(BOOTROM_DIR)/bootrom.elf $(BOOTROM_DIR)/bootrom.dump $(BOOTROM_DIR)/bootrom.bin: \
$(BOOTROM_DIR)/bootrom.S $(BOOTROM_DIR)/bootdata_bootrom.cc $(BOOTROM_DIR)/bootrom.ld Makefile
# Compile and Link
riscv -riscv64-gcc-9.5.0 riscv64-unknown-elf-gcc \
-mabi=ilp32 -march=rv32imaf -static -nostartfiles \
-T$(BOOTROM_DIR)/bootrom.ld \
$(BOOTROM_DIR)/bootrom.S \
$(BOOTROM_DIR)/bootdata_bootrom.cc \
-I$(SPATZ_DIR)/hw/ip/snitch_test/src \
-I$(SOFTWARE_DIR)/snRuntime/include \
-o $(BOOTROM_DIR)/bootrom.elf
# Generate human-readable disassembly
riscv -riscv64-gcc-9.5.0 riscv64-unknown-elf-objdump -D $(BOOTROM_DIR)/bootrom.elf > $(BOOTROM_DIR)/bootrom.dump
# Extract raw binary for the ROM generator
riscv -riscv64-gcc-9.5.0 riscv64-unknown-elf-objcopy -O binary $(BOOTROM_DIR)/bootrom.elf $(BOOTROM_DIR)/bootrom.bin

# STEP 3: Convert the binary into a SystemVerilog ROM module
$(BOOTROM_DIR)/bootrom.sv: $(BOOTROM_DIR)/bootrom.bin $(BOOTROM_DIR)/bootdata.cc
${PYTHON} $(SCRIPTS_DIR)/generate_bootrom.py \
$< -c $(HJSON_OUT) --output $@

# # Rule for bootdata_bootrom.cc
$(BOOTROM_DIR)/bootdata_bootrom.cc: $(SCRIPTS_DIR)/generate_bootdata.py $(HJSON_OUT)
${PYTHON} $< -c $(HJSON_OUT) -d $(BOOTROM_DIR) -t bootdata_bootrom.cc.tpl -o $@

###########
# DramSys #
###########
Expand All @@ -163,11 +209,6 @@ VSIM_HOME = /usr/pack/${QUESTA_VER}/questasim
FESVR ?= ${SIM_DIR}/work
FESVR_VERSION ?= c663ea20a53f4316db8cb4d591b1c8e437f4a0c4

VSIM_FLAGS += -sv_lib $(SIM_DIR)/${DPI_LIB}/cachepool_dpi
VSIM_FLAGS += -t 1ps
VSIM_FLAGS += -voptargs=+acc
VSIM_FLAGS += -suppress vsim-3999

VLOG_FLAGS += -svinputport=compat
VLOG_FLAGS += -override_timescale 1ns/1ps
VLOG_FLAGS += -suppress 2583
Expand All @@ -182,6 +223,7 @@ VLOG_DEFS = -DCACHEPOOL

# Cluster configuration
VLOG_DEFS += -DNUM_TILES=$(num_tiles)
VLOG_DEFS += -DNumRemotePortTile=$(num_remote_ports_per_tile)
VLOG_DEFS += -DNUM_CORES=$(num_cores)
VLOG_DEFS += -DDATA_WIDTH=$(data_width)
VLOG_DEFS += -DADDR_WIDTH=$(addr_width)
Expand Down Expand Up @@ -244,7 +286,7 @@ clean.sw:
rm -rf ${SOFTWARE_DIR}/build

.PHONY: sw
sw: clean.sw
sw: generate bootrom clean.sw
echo ${SOFTWARE_DIR}
mkdir -p ${SOFTWARE_DIR}/build
cd ${SOFTWARE_DIR}/build && ${CMAKE} \
Expand All @@ -258,7 +300,7 @@ sw: clean.sw
-DBUILD_TESTS=ON .. && $(MAKE)

.PHONY: vsim
vsim: dpi ${SIMBIN_DIR}/cachepool_cluster.vsim
vsim: generate bootrom dpi ${SIMBIN_DIR}/cachepool_cluster.vsim
echo ${SOFTWARE_DIR}
mkdir -p ${SOFTWARE_DIR}/build
cd ${SOFTWARE_DIR}/build && ${CMAKE} \
Expand All @@ -274,6 +316,11 @@ vsim: dpi ${SIMBIN_DIR}/cachepool_cluster.vsim

.PHONY: clean
clean: clean.sw clean.vsim
rm -rf $(HJSON_OUT) $(BOOTROM_DIR)/bootdata.cc \
$(BOOTROM_DIR)/bootdata_bootrom.cc \
$(BOOTROM_DIR)/bootrom.sv \
$(BOOTROM_DIR)/bootrom.dump \
$(BOOTROM_DIR)/bootrom.elf

########
# Lint #
Expand Down Expand Up @@ -306,7 +353,8 @@ help:
@echo "*init*: clone the git submodules"
@echo "*toolchain*: build the necessary toolchains (LLVM/GCC/Spike)"
@echo "*quick-tool*: *ETH Member Only* soft link to prebuilt toolchains"
@echo "*generate*: generate the Spatz package, bootrom and opcodes"
@echo "*generate*: generate the Spatz package and opcodes"
@echo "*bootrom*: generate the bootrom"
@echo "*dram-build*: build DRAMSys for simulation"
@echo ""
@echo "SW Build:"
Expand Down
12 changes: 7 additions & 5 deletions config/cachepool.hjson
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
data_width: 32,
id_width_in: 6, // fixed for now
id_width_out: 2, // fixed for now
user_width: 17,
user_width: 21,
axi_cdc_enable: false,
sw_rst_enable: true,
axi_isolate_enable: false,
Expand All @@ -29,9 +29,9 @@

// Spatz parameters
vlen: 512,
n_fpu: 0,
n_fpu: 4,
n_ipu: 4,
spatz_fpu: false,
spatz_fpu: true,
norvd: true,

// Timing parameters (fixed defaults; tweak in template if needed)
Expand All @@ -53,9 +53,11 @@
register_offload_rsp: true
},

// Repeat the compute core template N times (driven by 4)
nr_tiles: 4,

// Repeat the compute core template N times (driven by 16)
cores: [
{ $ref: "#/compute_core_template" },{ $ref: "#/compute_core_template" },{ $ref: "#/compute_core_template" },{ $ref: "#/compute_core_template" }
{ $ref: "#/compute_core_template" },{ $ref: "#/compute_core_template" },{ $ref: "#/compute_core_template" },{ $ref: "#/compute_core_template" },{ $ref: "#/compute_core_template" },{ $ref: "#/compute_core_template" },{ $ref: "#/compute_core_template" },{ $ref: "#/compute_core_template" },{ $ref: "#/compute_core_template" },{ $ref: "#/compute_core_template" },{ $ref: "#/compute_core_template" },{ $ref: "#/compute_core_template" },{ $ref: "#/compute_core_template" },{ $ref: "#/compute_core_template" },{ $ref: "#/compute_core_template" },{ $ref: "#/compute_core_template" }
],

icache: {
Expand Down
2 changes: 2 additions & 0 deletions config/cachepool.hjson.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@
register_offload_rsp: true
},

nr_tiles: ${num_tiles},

// Repeat the compute core template N times (driven by ${num_cores})
cores: [
${cores_array}
Expand Down
4 changes: 2 additions & 2 deletions config/cachepool_128.mk
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@
#########################

# Number of tiles
num_tiles ?= 1
num_tiles ?= 4

# Number of cores
num_cores ?= 4
num_cores ?= 16

# Core datawidth
data_width ?= 32
Expand Down
8 changes: 4 additions & 4 deletions config/cachepool_512.mk
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@
#########################

# Number of tiles
num_tiles ?= 1
num_tiles ?= 4

# Number of cores
num_cores ?= 4
num_cores ?= 16

# Core datawidth
data_width ?= 32
Expand Down Expand Up @@ -48,11 +48,11 @@ l1d_coal_window ?= 2
# L1 data cache number of ways per
l1d_num_way ?= 4

# L1 data cache size per tile (KiB)
# L1 data cache size **per tile** (KiB)
l1d_tile_size ?= 256

# L1 data cache tag width (TODO: should be calcualted)
l1d_tag_data_width ?= 52
l1d_tag_data_width ?= 92

####################
## CachePool CC ##
Expand Down
6 changes: 3 additions & 3 deletions config/cachepool_fpu_128.mk
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@
#########################

# Number of tiles
num_tiles ?= 1
num_tiles ?= 4

# Number of cores
num_cores ?= 4
num_cores ?= 16

# Core datawidth
data_width ?= 32
Expand Down Expand Up @@ -43,7 +43,7 @@ l1d_size ?= 256
l1d_bank_factor ?= 1

# L1 coalecsing window
l1d_coal_window ?= 2
l1d_coal_window ?= 1

# L1 data cache number of ways per
l1d_num_way ?= 4
Expand Down
4 changes: 2 additions & 2 deletions config/cachepool_fpu_256.mk
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@
#########################

# Number of tiles
num_tiles ?= 1
num_tiles ?= 4

# Number of cores
num_cores ?= 4
num_cores ?= 16

# Core datawidth
data_width ?= 32
Expand Down
8 changes: 5 additions & 3 deletions config/cachepool_fpu_512.mk
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,19 @@
#########################

# Number of tiles
num_tiles ?= 1
num_tiles ?= 4

# Number of cores
num_cores ?= 4
num_cores ?= 16

# Core datawidth
data_width ?= 32

# Core addrwidth
addr_width ?= 32

num_remote_ports_per_tile ?= 1


######################
## CachePool Tile ##
Expand Down Expand Up @@ -52,7 +54,7 @@ l1d_num_way ?= 4
l1d_tile_size ?= 256

# L1 data cache tag width (TODO: should be calcualted)
l1d_tag_data_width ?= 52
l1d_tag_data_width ?= 92

####################
## CachePool CC ##
Expand Down
9 changes: 3 additions & 6 deletions config/config.mk
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ include $(CACHEPOOL_DIR)/config/$(config).mk
# Number of tiles
num_tiles ?= 1

num_remote_ports_per_tile ?= 1

# Number of cores
num_cores ?= 4

Expand Down Expand Up @@ -102,16 +104,11 @@ snitch_max_trans ?= 16
#########################
## AXI configuration ##
#########################
# --- AXI user width derived from L1D cacheline width ---
# supported widths only
ifneq ($(filter $(l1d_cacheline_width),128 256 512),$(l1d_cacheline_width))
$(error l1d_cacheline_width must be one of {128,256,512} (got $(l1d_cacheline_width)))
endif

ifeq ($(l1d_cacheline_width),512)
axi_user_width := 17
else ifeq ($(l1d_cacheline_width),256)
axi_user_width := 19
axi_user_width := 18
else ifeq ($(l1d_cacheline_width),128)
axi_user_width := 21
endif
Expand Down
19 changes: 19 additions & 0 deletions hardware/bootrom/bootdata.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
// Copyright 2021 ETH Zurich and University of Bologna.
// Solderpad Hardware License, Version 0.51, see LICENSE for details.
// SPDX-License-Identifier: SHL-0.51

#include <tb_lib.hh>

namespace sim {

const BootData BOOTDATA = {.boot_addr = 0x1000,
.core_count = 16,
.hartid_base = 0,
.tcdm_start = 0xbffff800,
.tcdm_size = 0x800,
.tcdm_offset = 0x0,
.global_mem_start = 0x80000000,
.global_mem_end = 0xa0000000,
.tile_count = 4};

} // namespace sim
Loading
Loading