Skip to content

Commit 94494bf

Browse files
authored
Merge pull request #169 from AdaWorldAPI/claude/pr-x2-pad-to-lanes
PR-X2 Worker B: soa_struct! #[soa(pad_to_lanes = N)] field attribute
2 parents fb95cb3 + 9d492db commit 94494bf

1 file changed

Lines changed: 284 additions & 1 deletion

File tree

src/hpc/soa.rs

Lines changed: 284 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -323,8 +323,48 @@ impl<'a, T, const N: usize> Iterator for SoaChunks<'a, T, N> {
323323
/// assert_eq!(b.means_y.as_slice(), &[2.0, 5.0]);
324324
/// assert_eq!(b.means_z.as_slice(), &[3.0, 6.0]);
325325
/// ```
326+
///
327+
/// # Example — `#[soa(pad_to_lanes = N)]` field attribute (PR-X2 Worker B)
328+
///
329+
/// Tag a field with `#[soa(pad_to_lanes = N)]` to make `push` pad the
330+
/// underlying `Vec` up to the next multiple of `N` (filling with
331+
/// `Default::default()`). SIMD-staged kernels then walk the field with
332+
/// one uniform N-lane loop — no tail-case branch.
333+
///
334+
/// `len()` returns the **logical** row count (unchanged by padding);
335+
/// `self.<field>.len()` returns the **physical** Vec length. The difference
336+
/// is the lane-alignment tail.
337+
///
338+
/// ```
339+
/// use ndarray::soa_struct;
340+
///
341+
/// soa_struct! {
342+
/// pub struct Cells {
343+
/// #[soa(pad_to_lanes = 8)]
344+
/// pub palette: u8,
345+
/// pub label: u32, // unpadded
346+
/// }
347+
/// }
348+
///
349+
/// let mut c = Cells::new();
350+
/// c.push(7, 100);
351+
/// assert_eq!(c.len(), 1); // logical: 1 row
352+
/// assert_eq!(c.palette.len(), 8); // physical: rounded up to lane 8
353+
/// assert_eq!(c.label.len(), 1); // unpadded: physical == logical
354+
/// assert_eq!(c.palette[0], 7);
355+
/// assert_eq!(c.palette[1..8], [0u8; 7]); // padded tail is Default::default()
356+
/// ```
326357
#[macro_export]
327358
macro_rules! soa_struct {
359+
// ───────────────────────────────────────────────────────────────────
360+
// Arm 1 — unpadded (no `#[soa(...)]` attribute on any field).
361+
// This is byte-for-byte the pre-PR-X2 emit: no `_logical_len` field,
362+
// `len()` reads from field lengths under `debug_assert`. Existing
363+
// callers (struct-literal construction, exhaustive patterns) are
364+
// unaffected. macro_rules! tries this arm first; if any field has
365+
// a `#[soa(pad_to_lanes = N)]` attribute the pattern fails to match
366+
// and arm 2 is tried.
367+
// ───────────────────────────────────────────────────────────────────
328368
(
329369
$(#[$meta:meta])*
330370
$vis:vis struct $name:ident {
@@ -376,6 +416,125 @@ macro_rules! soa_struct {
376416
fn default() -> Self { Self::new() }
377417
}
378418
};
419+
420+
// ───────────────────────────────────────────────────────────────────
421+
// Arm 2 — padded (at least one field has `#[soa(pad_to_lanes = N)]`).
422+
// Adds a `#[doc(hidden)] _logical_len: usize` field so `len()` can
423+
// return the semantic row count independent of lane-tail padding.
424+
// Reached only when arm 1's no-attribute pattern fails to match —
425+
// existing callers without padding never see this struct shape.
426+
// ───────────────────────────────────────────────────────────────────
427+
(
428+
$(#[$meta:meta])*
429+
$vis:vis struct $name:ident {
430+
$(
431+
$(#[soa(pad_to_lanes = $pad:literal)])?
432+
$field_vis:vis $field:ident : $ty:ty
433+
),* $(,)?
434+
}
435+
) => {
436+
$(#[$meta])*
437+
$vis struct $name {
438+
$($field_vis $field: ::std::vec::Vec<$ty>,)*
439+
/// Shared logical row count across all fields. Padded fields may
440+
/// have `self.<field>.len() > _logical_len` after `push`.
441+
/// Updated by `push` / `clear`; treat as private.
442+
///
443+
/// Only present on padded structs (at least one field has
444+
/// `#[soa(pad_to_lanes = N)]`); unpadded structs keep the
445+
/// pre-PR-X2 all-public shape.
446+
#[doc(hidden)]
447+
_logical_len: usize,
448+
}
449+
450+
impl $name {
451+
/// Construct an empty instance.
452+
pub fn new() -> Self {
453+
Self {
454+
$($field: ::std::vec::Vec::new(),)*
455+
_logical_len: 0,
456+
}
457+
}
458+
459+
/// Construct with each field pre-allocated to `cap`.
460+
///
461+
/// Padded fields per `#[soa(pad_to_lanes = N)]` get
462+
/// `cap` worth of physical capacity, not `cap.div_ceil(N) * N` —
463+
/// the lane padding happens lazily inside `push` so the up-front
464+
/// reservation is a hint, not a hard size guarantee.
465+
pub fn with_capacity(cap: usize) -> Self {
466+
Self {
467+
$($field: ::std::vec::Vec::with_capacity(cap),)*
468+
_logical_len: 0,
469+
}
470+
}
471+
472+
/// Append one row across all fields.
473+
///
474+
/// For fields tagged `#[soa(pad_to_lanes = N)]`, the underlying
475+
/// `Vec` is padded with `<$ty as Default>::default()` up to the
476+
/// next multiple of `N` before the new value is written. Padded
477+
/// elements occupy slots `[_logical_len + 1 .. padded_len)` and
478+
/// are guaranteed to compare equal to `Default::default()`.
479+
#[allow(clippy::too_many_arguments)]
480+
pub fn push(&mut self, $($field: $ty),*) {
481+
let logical = self._logical_len;
482+
$(
483+
$crate::soa_struct!(@push_field
484+
self, $field, $field, $ty, logical
485+
$(, pad = $pad)?
486+
);
487+
)*
488+
self._logical_len = logical + 1;
489+
}
490+
491+
/// Logical row count (shared across all fields).
492+
///
493+
/// For padded fields this may be **less than** `self.<field>.len()`;
494+
/// the difference is the lane-alignment tail. Use `len()` for the
495+
/// semantic count, `self.<field>.len()` for the physical Vec length.
496+
pub fn len(&self) -> usize {
497+
self._logical_len
498+
}
499+
500+
/// Returns `true` if there are zero logical rows.
501+
pub fn is_empty(&self) -> bool { self._logical_len == 0 }
502+
503+
/// Clear all fields. Capacity is retained; logical length resets to 0.
504+
///
505+
/// Padded fields' physical `Vec`s are cleared along with the
506+
/// unpadded ones — re-pushing into a cleared struct rebuilds the
507+
/// padding from scratch.
508+
pub fn clear(&mut self) {
509+
$(self.$field.clear();)*
510+
self._logical_len = 0;
511+
}
512+
}
513+
514+
impl ::std::default::Default for $name {
515+
fn default() -> Self { Self::new() }
516+
}
517+
};
518+
519+
// Internal — padded field push: grow Vec to the next multiple of $pad
520+
// with Default::default() before writing the new value at `logical`.
521+
(@push_field $self:ident, $vec:ident, $val:ident, $ty:ty, $logical:ident, pad = $pad:literal) => {{
522+
const _: () = {
523+
// Compile-time guard: pad_to_lanes = 0 is nonsensical.
524+
assert!($pad > 0, "soa_struct! #[soa(pad_to_lanes = N)] requires N > 0");
525+
};
526+
let needed = ($logical + 1).div_ceil($pad) * $pad;
527+
while $self.$vec.len() < needed {
528+
$self.$vec.push(<$ty as ::std::default::Default>::default());
529+
}
530+
$self.$vec[$logical] = $val;
531+
}};
532+
533+
// Internal — plain (unpadded) field push inside a padded struct
534+
// (mixed cadence: some fields padded, others not).
535+
(@push_field $self:ident, $vec:ident, $val:ident, $ty:ty, $logical:ident) => {{
536+
$self.$vec.push($val);
537+
}};
379538
}
380539

381540
/// Deinterleave an AoS slice into a [`SoaVec<U, N>`] by extracting `N`
@@ -791,7 +950,9 @@ mod tests {
791950
#[test]
792951
fn macro_public_visibility_passthrough() {
793952
// Soa3 has `pub` fields; verify the field is accessible
794-
// (compilation alone proves visibility).
953+
// (compilation alone proves visibility). Soa3 is unpadded → uses
954+
// arm 1 of the macro → fields drive `len()` directly, so pushing
955+
// into individual fields still gives the right count.
795956
let mut s = Soa3::new();
796957
s.x.push(1.0);
797958
s.y.push(2.0);
@@ -994,6 +1155,128 @@ mod tests {
9941155
assert_eq!(back, aos);
9951156
}
9961157

1158+
// ------------------------------------------------------------------
1159+
// PR-X2 Worker B — `#[soa(pad_to_lanes = N)]` field attribute
1160+
// ------------------------------------------------------------------
1161+
1162+
soa_struct! {
1163+
/// 3-field SoA with two padded fields at different lane widths and
1164+
/// one unpadded field. Exercises the mixed-cadence macro arm.
1165+
pub struct PadMixed {
1166+
#[soa(pad_to_lanes = 8)]
1167+
pub palette: u8,
1168+
#[soa(pad_to_lanes = 16)]
1169+
pub depth: u16,
1170+
pub label: u32,
1171+
}
1172+
}
1173+
1174+
/// Single push into a `pad_to_lanes = 8` field rounds the physical Vec
1175+
/// up to 8 elements; logical len is 1.
1176+
#[test]
1177+
fn pad_to_lanes_single_push_grows_to_lane() {
1178+
let mut s = PadMixed::new();
1179+
s.push(7u8, 0x1234u16, 99u32);
1180+
assert_eq!(s.len(), 1, "logical len = 1");
1181+
assert_eq!(s.palette.len(), 8, "palette padded to lane 8");
1182+
assert_eq!(s.depth.len(), 16, "depth padded to lane 16");
1183+
assert_eq!(s.label.len(), 1, "label unpadded — physical = logical");
1184+
assert_eq!(s.palette[0], 7);
1185+
assert_eq!(s.depth[0], 0x1234);
1186+
assert_eq!(s.label[0], 99);
1187+
// Padded tail is Default::default().
1188+
for &b in &s.palette[1..8] {
1189+
assert_eq!(b, 0u8);
1190+
}
1191+
for &d in &s.depth[1..16] {
1192+
assert_eq!(d, 0u16);
1193+
}
1194+
}
1195+
1196+
/// Crossing a lane boundary on a padded field grows the Vec by another N.
1197+
#[test]
1198+
fn pad_to_lanes_crosses_lane_boundary() {
1199+
let mut s = PadMixed::new();
1200+
for i in 0..9u8 {
1201+
s.push(i, i as u16, i as u32);
1202+
}
1203+
assert_eq!(s.len(), 9);
1204+
// palette: 9 pushes → next multiple of 8 is 16
1205+
assert_eq!(s.palette.len(), 16);
1206+
// depth: 9 pushes → still inside lane 16
1207+
assert_eq!(s.depth.len(), 16);
1208+
// label: unpadded
1209+
assert_eq!(s.label.len(), 9);
1210+
// first 9 slots carry user values
1211+
for i in 0..9 {
1212+
assert_eq!(s.palette[i], i as u8);
1213+
assert_eq!(s.depth[i], i as u16);
1214+
assert_eq!(s.label[i], i as u32);
1215+
}
1216+
// tail is default-zeroed
1217+
for &b in &s.palette[9..16] {
1218+
assert_eq!(b, 0u8);
1219+
}
1220+
}
1221+
1222+
/// `clear()` resets logical_len and clears physical Vecs.
1223+
#[test]
1224+
fn pad_to_lanes_clear_resets_both() {
1225+
let mut s = PadMixed::new();
1226+
s.push(1, 2, 3);
1227+
s.push(4, 5, 6);
1228+
assert_eq!(s.len(), 2);
1229+
s.clear();
1230+
assert_eq!(s.len(), 0);
1231+
assert!(s.is_empty());
1232+
assert_eq!(s.palette.len(), 0);
1233+
assert_eq!(s.depth.len(), 0);
1234+
assert_eq!(s.label.len(), 0);
1235+
// Reuse after clear works — padding rebuilds from scratch.
1236+
s.push(99, 0xFFFF, 7);
1237+
assert_eq!(s.len(), 1);
1238+
assert_eq!(s.palette.len(), 8);
1239+
assert_eq!(s.depth.len(), 16);
1240+
}
1241+
1242+
soa_struct! {
1243+
/// All-padded variant — every field gets the same lane width.
1244+
pub struct PadUniform {
1245+
#[soa(pad_to_lanes = 4)]
1246+
pub a: i32,
1247+
#[soa(pad_to_lanes = 4)]
1248+
pub b: i32,
1249+
}
1250+
}
1251+
1252+
/// All-padded struct: every field grows in sync with the lane cadence.
1253+
#[test]
1254+
fn pad_to_lanes_uniform_cadence() {
1255+
let mut s = PadUniform::new();
1256+
s.push(10, 20);
1257+
s.push(30, 40);
1258+
s.push(50, 60);
1259+
assert_eq!(s.len(), 3);
1260+
// 3 pushes → next multiple of 4 is 4
1261+
assert_eq!(s.a.len(), 4);
1262+
assert_eq!(s.b.len(), 4);
1263+
assert_eq!(s.a[0..3], [10, 30, 50]);
1264+
assert_eq!(s.b[0..3], [20, 40, 60]);
1265+
assert_eq!(s.a[3], 0);
1266+
assert_eq!(s.b[3], 0);
1267+
}
1268+
1269+
/// `with_capacity` initialises an empty padded struct correctly.
1270+
#[test]
1271+
fn pad_to_lanes_with_capacity_empty() {
1272+
let s = PadMixed::with_capacity(64);
1273+
assert_eq!(s.len(), 0);
1274+
assert!(s.is_empty());
1275+
assert_eq!(s.palette.len(), 0);
1276+
assert_eq!(s.depth.len(), 0);
1277+
assert_eq!(s.label.len(), 0);
1278+
}
1279+
9971280
/// Inference-only entry: caller relies on closure return-type ascription,
9981281
/// no turbofish at all.
9991282
#[test]

0 commit comments

Comments
 (0)