Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
54 commits
Select commit Hold shift + click to select a range
3f2e5c5
add better camera
SimonDanisch Dec 22, 2025
7360498
get things working
SimonDanisch Dec 23, 2025
cb3ca12
fixes tests and docs
SimonDanisch Dec 24, 2025
b3312da
bvh4 experiment
SimonDanisch Jan 1, 2026
cd1701c
bvh4
SimonDanisch Jan 2, 2026
ad827a0
Merge branch 'sd/gpu-instanced-bvh' of https://github.com/JuliaGeomet…
SimonDanisch Jan 2, 2026
4d56d99
fixes
SimonDanisch Jan 4, 2026
3d579cd
unrolling and gpu tools
SimonDanisch Jan 7, 2026
058e724
refactor our unroll strategy
SimonDanisch Jan 7, 2026
286c7a2
getindex unrolled
SimonDanisch Jan 13, 2026
0203b6d
implement multitype vec
SimonDanisch Jan 23, 2026
a4a3651
api refactor
SimonDanisch Jan 25, 2026
7494706
refactor
SimonDanisch Jan 27, 2026
09d5d76
fixes
SimonDanisch Jan 28, 2026
6dc4e3a
add mapreduce
SimonDanisch Jan 28, 2026
59d3498
renaming and fixes
SimonDanisch Jan 29, 2026
819d1a4
add deref for array for more uniform handling
SimonDanisch Feb 1, 2026
2c5cb2c
add comment
SimonDanisch Feb 1, 2026
12b2ccb
small fixes
SimonDanisch Feb 9, 2026
ab4184c
improve updating support
SimonDanisch Feb 10, 2026
23e0465
fix empty blas?
SimonDanisch Feb 11, 2026
70b2ea3
change for per triangle meta
SimonDanisch Feb 14, 2026
0bdbb63
allow submesh materials
SimonDanisch Feb 15, 2026
875706a
refactor and cleanup
SimonDanisch Feb 16, 2026
3e6b3c8
fix setkey for OpenCL
SimonDanisch Feb 23, 2026
f132fbe
use less depth
SimonDanisch Feb 23, 2026
7c0123f
Merge branch 'sd/multitype-vec' of https://github.com/JuliaGeometry/R…
SimonDanisch Feb 23, 2026
a2c23ee
fix(heterovec): fix type mismatch in with_index generated comparison …
jkrumbiegel Mar 1, 2026
811a580
Use flat arrays with offset-based indexing for BLAS traversal
jkrumbiegel Mar 1, 2026
46133d2
polish for release
SimonDanisch Mar 2, 2026
b8b2be5
Merge remote-tracking branch 'origin/sd/multitype-vec' into jk/fix-me…
SimonDanisch Mar 2, 2026
1712d5e
final cleanup and removal of unused apis
SimonDanisch Mar 2, 2026
21678d7
use tlas consistently
SimonDanisch Mar 2, 2026
f41d746
synchronize?
SimonDanisch Mar 7, 2026
f0f57ed
Skip GPU kernel tests under --check-bounds=yes
SimonDanisch Mar 16, 2026
94bc45a
fix docs: viewfactors FaceView mismatch, GPU tutorial .primitives, typos
SimonDanisch Mar 28, 2026
dc3c159
update GPU benchmark plot — AMD RX 7900 XTX (RDNA3)
SimonDanisch Mar 28, 2026
114fbb8
fix wavefront renderer default camera_up to Y-up
SimonDanisch Mar 28, 2026
85f3aa3
add hardware RT acceleration tutorial
SimonDanisch Mar 28, 2026
ba56ecb
add ImplicitBVH vs Raycore GPU benchmark results
SimonDanisch Mar 29, 2026
f418da2
add collision, small fix and update hw-accel
SimonDanisch Apr 3, 2026
3283a91
move hw acceleration to Raycore
SimonDanisch Apr 6, 2026
17f62ae
cleaner api
SimonDanisch Apr 6, 2026
b811519
Fix TLAS build stability + keepalive + sync
SimonDanisch Apr 10, 2026
701d58d
actually mark dirty
SimonDanisch Apr 14, 2026
11d076f
Replace TLAS.gpu_blas_arrays::Vector{Any} with Vector{BLASArrays}
SimonDanisch Apr 14, 2026
d7cc5aa
Repurpose InstanceDescriptor.instance_id as interface-override slot
SimonDanisch Apr 14, 2026
5ea2c51
closest_hit/any_hit return instance array index, not instance_id
SimonDanisch Apr 14, 2026
80e8e3b
HW RT path: instance_custom_index carries override, gl_InstanceID dri…
SimonDanisch Apr 14, 2026
ee98343
merge with mark dirty fix
SimonDanisch Apr 17, 2026
31a7f63
memory cleanup and tests
SimonDanisch Apr 21, 2026
eb797a7
merges
SimonDanisch Apr 21, 2026
0b9d531
fix refit update
SimonDanisch Apr 21, 2026
84eb954
Merge branch 'sd/multitype-vec' of https://github.com/JuliaGeometry/R…
SimonDanisch Apr 21, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ jobs:
version: 1
arch: x64
- uses: julia-actions/cache@v2
- name: Install pkgs dependencies
run: julia --project=@. -e 'using Pkg; Pkg.test("Raycore", coverage=true)'
- uses: julia-actions/julia-runtest@v1
with:
# --check-bounds=auto: GPU kernels (OpenCL/pocl) crash with --check-bounds=yes
# because bounds-check code generates SPIR-V that pocl cannot handle
check_bounds: auto
27 changes: 21 additions & 6 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ version = "0.1.1"
authors = ["Anton Smirnov <tonysmn97@gmail.com>", "Simon Danisch <sdanisch@protonmail.com"]

[deps]
AcceleratedKernels = "6a4ca0a5-0e36-4168-a932-d9be78d558f1"
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
Atomix = "a9b6321e-bd34-4604-b9c9-b65b8de01458"
GPUArraysCore = "46192b85-c4d5-4398-a991-12ede77f4527"
GeometryBasics = "5c1252a2-5f33-56bf-86c9-59e7332b4326"
KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
Expand All @@ -12,28 +16,39 @@ StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"

[weakdeps]
Lava = "3a680b1f-cb25-4bee-9cf7-bc880b76dc8c"
Makie = "ee78f7c6-11fb-53f2-987a-cfe4a2b5a57a"

[extensions]
RaycoreLavaExt = "Lava"
RaycoreMakieExt = "Makie"

[compat]
julia = "1.10"
AcceleratedKernels = "0.4.3"
Adapt = "4.4.0"
Aqua = "0.8"
Atomix = "1.1.2"
GPUArraysCore = "0.2.0"
GeometryBasics = "0.5"
JET = "0.11"
KernelAbstractions = "0.9, 0.10"
LinearAlgebra = "1"
Makie = "0.24"
OpenCL = "0.10.8"
Random = "1"
StaticArrays = "1.9.7"
Statistics = "1"
Makie = "0.24"
Aqua = "0.8"
JET = "0.11"
Test = "1"
julia = "1.10"
pocl_jll = "7.1.0"

[extras]
Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
JET = "c3a54625-cd67-489e-a8e7-0a5a0ff4e31b"
OpenCL = "08131aa3-fb12-5dee-8b74-c09406e224a2"
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
pocl_jll = "627d6b7a-bbe6-5189-83e7-98cc0a5aeadd"

[targets]
test = ["Test", "JET", "Aqua"]
test = ["Test", "JET", "Aqua", "pocl_jll", "OpenCL", "StaticArrays"]
29 changes: 20 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,14 @@
[![](https://img.shields.io/badge/docs-stable-blue.svg)](https://juliageometry.github.io/Raycore.jl/stable/)
[![](https://img.shields.io/badge/docs-dev-blue.svg)](https://juliageometry.github.io/Raycore.jl/dev/)

High-performance ray-triangle intersection engine with BVH acceleration for CPU and GPU.
High-performance ray-triangle intersection engine with TLAS/BLAS acceleration for CPU and GPU.

## Features

- **Fast BVH acceleration** for ray-triangle intersection
- **Fast TLAS/BLAS acceleration** for ray-triangle intersection
- **CPU and GPU support** via KernelAbstractions.jl
- **MultiTypeSet**: GPU-safe heterogeneous collections with compile-time type-stable dispatch for materials, textures, lights, etc.
- **GPU TLAS**: Two-level acceleration structure (BLAS/TLAS) with instanced geometry, per-instance transforms, and GPU-first design
- **Analysis tools**: centroid calculation, illumination analysis, view factors for radiosity
- **Makie integration** for visualization

Expand All @@ -26,14 +28,14 @@ Pkg.add(url="https://github.com/JuliaGeometry/Raycore.jl")
using Raycore, GeometryBasics, LinearAlgebra

# Create geometry
sphere = Tesselation(Sphere(Point3f(0, 0, 2), 1.0f0), 20)
mesh = normal_mesh(Sphere(Point3f(0, 0, 2), 1.0f0))

# Build BVH acceleration structure
bvh = BVH([sphere])
# Build TLAS acceleration structure
tlas = TLAS([mesh], (mi, ti) -> UInt32(mi))

# Cast rays and find intersections
ray = Ray(o=Point3f(0, 0, 0), d=Vec3f(0, 0, 1))
hit_found, triangle, distance, bary_coords = closest_hit(bvh, ray)
hit_found, triangle, distance, bary_coords, instance_id = closest_hit(tlas, ray)

if hit_found
hit_point = ray.o + ray.d * distance
Expand All @@ -46,13 +48,22 @@ end
```julia
# Calculate scene centroid from a viewing direction
viewdir = normalize(Vec3f(0, 0, -1))
hitpoints, centroid = get_centroid(bvh, viewdir)
hitpoints, centroid = get_centroid(tlas, viewdir)

# Analyze illumination
illumination = get_illumination(bvh, viewdir)
illumination = get_illumination(tlas, viewdir)

# Compute view factors for radiosity
vf_matrix = view_factors(bvh; rays_per_triangle=1000)
vf_matrix = view_factors(tlas; rays_per_triangle=1000)
```

## Testing

Run tests with `--check-bounds=auto` (not the `Pkg.test` default of `--check-bounds=yes`), because GPU kernels compiled with bounds checking generate SPIR-V that crashes pocl:

```julia
using Pkg
Pkg.test("Raycore"; julia_args=`--check-bounds=auto`)
```

## Documentation
Expand Down
65 changes: 65 additions & 0 deletions benchmarks/implicitbvh_comparison.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# ImplicitBVH.jl vs Raycore.jl — GPU Benchmark

**Date**: 2026-03-29
**GPU**: AMD RX 7900 XTX (RDNA3)
**Backend**: AMDGPU.jl (ROCArray)
**Mesh**: xyzrgb_dragon.obj (249,882 triangles) + procedural random geometry

## BVH Build

| Triangles | ImplicitBVH | Raycore | Ratio |
|-----------|-------------|---------|-------|
| 250K | 0.98 ms | 4.93 ms | ImplicitBVH 5.0x faster |
| 1M | 2.25 ms | 7.46 ms | ImplicitBVH 3.3x faster |
| 4M | 8.41 ms | 16.16 ms | ImplicitBVH 1.9x faster |

ImplicitBVH builds faster due to simpler construction (Morton sort + bottom-up aggregate).
Raycore does more work: topology emission, parent pointers, leaf creation, atomic refit — all separate kernel launches.

## Ray Tracing — Dragon Mesh (249K triangles)

**Important**: ImplicitBVH `traverse_rays` returns bounding volume candidates (broad-phase only).
Raycore `closest_hit` returns the actual closest triangle intersection (full narrow-phase).
These are fundamentally different operations — ImplicitBVH does less work per ray but doesn't give a usable hit result.

| Rays | ImplicitBVH (LVT) | Raycore | Speedup (Raycore) |
|------|--------------------|---------|--------------------|
| 100K | 4.60 ms | 1.33 ms | 3.5x |
| 500K | 11.06 ms | 3.14 ms | 3.5x |
| 1M | 20.84 ms | 3.00 ms | 6.9x |
| 2M | 41.52 ms | 6.00 ms | 6.9x |
| 4M | 83.31 ms | 5.91 ms | 14.1x |

## Ray Tracing — Scaling with Triangle Count (1M rays)

| Triangles | ImplicitBVH (BFS) | Raycore | Speedup (Raycore) |
|-----------|--------------------|---------|--------------------|
| 250K | 43.89 ms | 8.99 ms | 4.9x |
| 1M | 217.37 ms | 11.08 ms | 19.6x |
| 4M | 313.0 ms | 15.41 ms | 20.3x |

## Why Raycore Is Faster for Ray Tracing

| Factor | ImplicitBVH | Raycore |
|--------|-------------|---------|
| Output | All BV candidates (variable-size list) | Single closest hit (fixed) |
| Triangle test | None (BSphere overlap only) | Moller-Trumbore per leaf |
| Passes | Two-pass (count + write) | Single-pass |
| Early termination | No — finds all overlaps | Yes — t_max shrinks on hit |
| Node layout | Implicit tree + skip array | Inline leaves (BVH2IL) |
| Allocations | Output buffer per trace | None |

## What ImplicitBVH Does Better

- **Build speed** (2-5x faster) — fewer kernel launches, implicit indexing
- **Collision detection** — LVT (leaf-vs-tree) two-pass is designed for finding all contact pairs
- **Two-BVH collision** — native support for inter-object contact detection
- **Cache reuse** — `BVHTraversal` cache avoids re-allocation across frames
- **Mixed bounding volumes** — BSphere leaves with BBox internal nodes

## Reference: ImplicitBVH README Numbers (Nvidia A100)

From the ImplicitBVH.jl README (249,882 triangles, BSphere/BBox):
- Build: 410 us
- Contact detection: 1.14 ms
- 100K rays: 2.00 ms
5 changes: 5 additions & 0 deletions docs/Project.toml
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
[deps]
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
Bonito = "824d6782-a2ef-11e9-3a09-e5662e0c26f8"
BonitoBook = "b416d416-7a6e-4336-8c1a-1f8a8cd59518"
Expand All @@ -9,9 +10,13 @@ GeometryBasics = "5c1252a2-5f33-56bf-86c9-59e7332b4326"
ImageShow = "4e3cecfd-b093-5904-9786-8bbb286a6a31"
JET = "c3a54625-cd67-489e-a8e7-0a5a0ff4e31b"
KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
Makie = "ee78f7c6-11fb-53f2-987a-cfe4a2b5a57a"
MeshIO = "7269a6da-0436-5bbc-96c2-40638cbb6118"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
Raycore = "afc56b53-c9a9-482a-a956-d1d800e05559"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
WGLMakie = "276b4fcb-3e11-5398-bf8b-a0c2d153d008"

[sources]
Expand Down
7 changes: 5 additions & 2 deletions docs/examples.jl
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,11 @@ begin
l = 0.5
floor = Rect3f(-l, -l, -0.01, 2l, 2l, 0.01)
cat = load(Makie.assetpath("cat.obj"))
bvh = Raycore.BVH([s1, s2, s3, s4, cat]);
world_mesh = GeometryBasics.Mesh(bvh)
bvh = Raycore.TLAS([normal_mesh(s1), normal_mesh(s2), normal_mesh(s3), normal_mesh(s4), cat], (mi, ti) -> UInt32(mi));
# TODO: examples.jl needs rewrite for TLAS API
# bvh.primitives → iterate tlas.blas_array[i].primitives
# GeometryBasics.Mesh(bvh) → use Makie extension: plot(tlas)
world_mesh = Makie.convert_arguments(Makie.Mesh, bvh)[1]
f, ax, pl = Makie.mesh(world_mesh; color=:teal)
center!(ax.scene)
viewdir = normalize(ax.scene.camera.view_direction[])
Expand Down
Loading
Loading