@@ -323,8 +323,48 @@ impl<'a, T, const N: usize> Iterator for SoaChunks<'a, T, N> {
323323/// assert_eq!(b.means_y.as_slice(), &[2.0, 5.0]);
324324/// assert_eq!(b.means_z.as_slice(), &[3.0, 6.0]);
325325/// ```
326+ ///
327+ /// # Example — `#[soa(pad_to_lanes = N)]` field attribute (PR-X2 Worker B)
328+ ///
329+ /// Tag a field with `#[soa(pad_to_lanes = N)]` to make `push` pad the
330+ /// underlying `Vec` up to the next multiple of `N` (filling with
331+ /// `Default::default()`). SIMD-staged kernels then walk the field with
332+ /// one uniform N-lane loop — no tail-case branch.
333+ ///
334+ /// `len()` returns the **logical** row count (unchanged by padding);
335+ /// `self.<field>.len()` returns the **physical** Vec length. The difference
336+ /// is the lane-alignment tail.
337+ ///
338+ /// ```
339+ /// use ndarray::soa_struct;
340+ ///
341+ /// soa_struct! {
342+ /// pub struct Cells {
343+ /// #[soa(pad_to_lanes = 8)]
344+ /// pub palette: u8,
345+ /// pub label: u32, // unpadded
346+ /// }
347+ /// }
348+ ///
349+ /// let mut c = Cells::new();
350+ /// c.push(7, 100);
351+ /// assert_eq!(c.len(), 1); // logical: 1 row
352+ /// assert_eq!(c.palette.len(), 8); // physical: rounded up to lane 8
353+ /// assert_eq!(c.label.len(), 1); // unpadded: physical == logical
354+ /// assert_eq!(c.palette[0], 7);
355+ /// assert_eq!(c.palette[1..8], [0u8; 7]); // padded tail is Default::default()
356+ /// ```
326357#[ macro_export]
327358macro_rules! soa_struct {
359+ // ───────────────────────────────────────────────────────────────────
360+ // Arm 1 — unpadded (no `#[soa(...)]` attribute on any field).
361+ // This is byte-for-byte the pre-PR-X2 emit: no `_logical_len` field,
362+ // `len()` reads from field lengths under `debug_assert`. Existing
363+ // callers (struct-literal construction, exhaustive patterns) are
364+ // unaffected. macro_rules! tries this arm first; if any field has
365+ // a `#[soa(pad_to_lanes = N)]` attribute the pattern fails to match
366+ // and arm 2 is tried.
367+ // ───────────────────────────────────────────────────────────────────
328368 (
329369 $( #[ $meta: meta] ) *
330370 $vis: vis struct $name: ident {
@@ -376,6 +416,125 @@ macro_rules! soa_struct {
376416 fn default ( ) -> Self { Self :: new( ) }
377417 }
378418 } ;
419+
420+ // ───────────────────────────────────────────────────────────────────
421+ // Arm 2 — padded (at least one field has `#[soa(pad_to_lanes = N)]`).
422+ // Adds a `#[doc(hidden)] _logical_len: usize` field so `len()` can
423+ // return the semantic row count independent of lane-tail padding.
424+ // Reached only when arm 1's no-attribute pattern fails to match —
425+ // existing callers without padding never see this struct shape.
426+ // ───────────────────────────────────────────────────────────────────
427+ (
428+ $( #[ $meta: meta] ) *
429+ $vis: vis struct $name: ident {
430+ $(
431+ $( #[ soa( pad_to_lanes = $pad: literal) ] ) ?
432+ $field_vis: vis $field: ident : $ty: ty
433+ ) ,* $( , ) ?
434+ }
435+ ) => {
436+ $( #[ $meta] ) *
437+ $vis struct $name {
438+ $( $field_vis $field: :: std:: vec:: Vec <$ty>, ) *
439+ /// Shared logical row count across all fields. Padded fields may
440+ /// have `self.<field>.len() > _logical_len` after `push`.
441+ /// Updated by `push` / `clear`; treat as private.
442+ ///
443+ /// Only present on padded structs (at least one field has
444+ /// `#[soa(pad_to_lanes = N)]`); unpadded structs keep the
445+ /// pre-PR-X2 all-public shape.
446+ #[ doc( hidden) ]
447+ _logical_len: usize ,
448+ }
449+
450+ impl $name {
451+ /// Construct an empty instance.
452+ pub fn new( ) -> Self {
453+ Self {
454+ $( $field: :: std:: vec:: Vec :: new( ) , ) *
455+ _logical_len: 0 ,
456+ }
457+ }
458+
459+ /// Construct with each field pre-allocated to `cap`.
460+ ///
461+ /// Padded fields per `#[soa(pad_to_lanes = N)]` get
462+ /// `cap` worth of physical capacity, not `cap.div_ceil(N) * N` —
463+ /// the lane padding happens lazily inside `push` so the up-front
464+ /// reservation is a hint, not a hard size guarantee.
465+ pub fn with_capacity( cap: usize ) -> Self {
466+ Self {
467+ $( $field: :: std:: vec:: Vec :: with_capacity( cap) , ) *
468+ _logical_len: 0 ,
469+ }
470+ }
471+
472+ /// Append one row across all fields.
473+ ///
474+ /// For fields tagged `#[soa(pad_to_lanes = N)]`, the underlying
475+ /// `Vec` is padded with `<$ty as Default>::default()` up to the
476+ /// next multiple of `N` before the new value is written. Padded
477+ /// elements occupy slots `[_logical_len + 1 .. padded_len)` and
478+ /// are guaranteed to compare equal to `Default::default()`.
479+ #[ allow( clippy:: too_many_arguments) ]
480+ pub fn push( & mut self , $( $field: $ty) ,* ) {
481+ let logical = self . _logical_len;
482+ $(
483+ $crate:: soa_struct!( @push_field
484+ self , $field, $field, $ty, logical
485+ $( , pad = $pad) ?
486+ ) ;
487+ ) *
488+ self . _logical_len = logical + 1 ;
489+ }
490+
491+ /// Logical row count (shared across all fields).
492+ ///
493+ /// For padded fields this may be **less than** `self.<field>.len()`;
494+ /// the difference is the lane-alignment tail. Use `len()` for the
495+ /// semantic count, `self.<field>.len()` for the physical Vec length.
496+ pub fn len( & self ) -> usize {
497+ self . _logical_len
498+ }
499+
500+ /// Returns `true` if there are zero logical rows.
501+ pub fn is_empty( & self ) -> bool { self . _logical_len == 0 }
502+
503+ /// Clear all fields. Capacity is retained; logical length resets to 0.
504+ ///
505+ /// Padded fields' physical `Vec`s are cleared along with the
506+ /// unpadded ones — re-pushing into a cleared struct rebuilds the
507+ /// padding from scratch.
508+ pub fn clear( & mut self ) {
509+ $( self . $field. clear( ) ; ) *
510+ self . _logical_len = 0 ;
511+ }
512+ }
513+
514+ impl :: std:: default :: Default for $name {
515+ fn default ( ) -> Self { Self :: new( ) }
516+ }
517+ } ;
518+
519+ // Internal — padded field push: grow Vec to the next multiple of $pad
520+ // with Default::default() before writing the new value at `logical`.
521+ ( @push_field $self: ident, $vec: ident, $val: ident, $ty: ty, $logical: ident, pad = $pad: literal) => { {
522+ const _: ( ) = {
523+ // Compile-time guard: pad_to_lanes = 0 is nonsensical.
524+ assert!( $pad > 0 , "soa_struct! #[soa(pad_to_lanes = N)] requires N > 0" ) ;
525+ } ;
526+ let needed = ( $logical + 1 ) . div_ceil( $pad) * $pad;
527+ while $self. $vec. len( ) < needed {
528+ $self. $vec. push( <$ty as :: std:: default :: Default >:: default ( ) ) ;
529+ }
530+ $self. $vec[ $logical] = $val;
531+ } } ;
532+
533+ // Internal — plain (unpadded) field push inside a padded struct
534+ // (mixed cadence: some fields padded, others not).
535+ ( @push_field $self: ident, $vec: ident, $val: ident, $ty: ty, $logical: ident) => { {
536+ $self. $vec. push( $val) ;
537+ } } ;
379538}
380539
381540/// Deinterleave an AoS slice into a [`SoaVec<U, N>`] by extracting `N`
@@ -791,7 +950,9 @@ mod tests {
791950 #[ test]
792951 fn macro_public_visibility_passthrough ( ) {
793952 // Soa3 has `pub` fields; verify the field is accessible
794- // (compilation alone proves visibility).
953+ // (compilation alone proves visibility). Soa3 is unpadded → uses
954+ // arm 1 of the macro → fields drive `len()` directly, so pushing
955+ // into individual fields still gives the right count.
795956 let mut s = Soa3 :: new ( ) ;
796957 s. x . push ( 1.0 ) ;
797958 s. y . push ( 2.0 ) ;
@@ -994,6 +1155,128 @@ mod tests {
9941155 assert_eq ! ( back, aos) ;
9951156 }
9961157
1158+ // ------------------------------------------------------------------
1159+ // PR-X2 Worker B — `#[soa(pad_to_lanes = N)]` field attribute
1160+ // ------------------------------------------------------------------
1161+
1162+ soa_struct ! {
1163+ /// 3-field SoA with two padded fields at different lane widths and
1164+ /// one unpadded field. Exercises the mixed-cadence macro arm.
1165+ pub struct PadMixed {
1166+ #[ soa( pad_to_lanes = 8 ) ]
1167+ pub palette: u8 ,
1168+ #[ soa( pad_to_lanes = 16 ) ]
1169+ pub depth: u16 ,
1170+ pub label: u32 ,
1171+ }
1172+ }
1173+
1174+ /// Single push into a `pad_to_lanes = 8` field rounds the physical Vec
1175+ /// up to 8 elements; logical len is 1.
1176+ #[ test]
1177+ fn pad_to_lanes_single_push_grows_to_lane ( ) {
1178+ let mut s = PadMixed :: new ( ) ;
1179+ s. push ( 7u8 , 0x1234u16 , 99u32 ) ;
1180+ assert_eq ! ( s. len( ) , 1 , "logical len = 1" ) ;
1181+ assert_eq ! ( s. palette. len( ) , 8 , "palette padded to lane 8" ) ;
1182+ assert_eq ! ( s. depth. len( ) , 16 , "depth padded to lane 16" ) ;
1183+ assert_eq ! ( s. label. len( ) , 1 , "label unpadded — physical = logical" ) ;
1184+ assert_eq ! ( s. palette[ 0 ] , 7 ) ;
1185+ assert_eq ! ( s. depth[ 0 ] , 0x1234 ) ;
1186+ assert_eq ! ( s. label[ 0 ] , 99 ) ;
1187+ // Padded tail is Default::default().
1188+ for & b in & s. palette [ 1 ..8 ] {
1189+ assert_eq ! ( b, 0u8 ) ;
1190+ }
1191+ for & d in & s. depth [ 1 ..16 ] {
1192+ assert_eq ! ( d, 0u16 ) ;
1193+ }
1194+ }
1195+
1196+ /// Crossing a lane boundary on a padded field grows the Vec by another N.
1197+ #[ test]
1198+ fn pad_to_lanes_crosses_lane_boundary ( ) {
1199+ let mut s = PadMixed :: new ( ) ;
1200+ for i in 0 ..9u8 {
1201+ s. push ( i, i as u16 , i as u32 ) ;
1202+ }
1203+ assert_eq ! ( s. len( ) , 9 ) ;
1204+ // palette: 9 pushes → next multiple of 8 is 16
1205+ assert_eq ! ( s. palette. len( ) , 16 ) ;
1206+ // depth: 9 pushes → still inside lane 16
1207+ assert_eq ! ( s. depth. len( ) , 16 ) ;
1208+ // label: unpadded
1209+ assert_eq ! ( s. label. len( ) , 9 ) ;
1210+ // first 9 slots carry user values
1211+ for i in 0 ..9 {
1212+ assert_eq ! ( s. palette[ i] , i as u8 ) ;
1213+ assert_eq ! ( s. depth[ i] , i as u16 ) ;
1214+ assert_eq ! ( s. label[ i] , i as u32 ) ;
1215+ }
1216+ // tail is default-zeroed
1217+ for & b in & s. palette [ 9 ..16 ] {
1218+ assert_eq ! ( b, 0u8 ) ;
1219+ }
1220+ }
1221+
1222+ /// `clear()` resets logical_len and clears physical Vecs.
1223+ #[ test]
1224+ fn pad_to_lanes_clear_resets_both ( ) {
1225+ let mut s = PadMixed :: new ( ) ;
1226+ s. push ( 1 , 2 , 3 ) ;
1227+ s. push ( 4 , 5 , 6 ) ;
1228+ assert_eq ! ( s. len( ) , 2 ) ;
1229+ s. clear ( ) ;
1230+ assert_eq ! ( s. len( ) , 0 ) ;
1231+ assert ! ( s. is_empty( ) ) ;
1232+ assert_eq ! ( s. palette. len( ) , 0 ) ;
1233+ assert_eq ! ( s. depth. len( ) , 0 ) ;
1234+ assert_eq ! ( s. label. len( ) , 0 ) ;
1235+ // Reuse after clear works — padding rebuilds from scratch.
1236+ s. push ( 99 , 0xFFFF , 7 ) ;
1237+ assert_eq ! ( s. len( ) , 1 ) ;
1238+ assert_eq ! ( s. palette. len( ) , 8 ) ;
1239+ assert_eq ! ( s. depth. len( ) , 16 ) ;
1240+ }
1241+
1242+ soa_struct ! {
1243+ /// All-padded variant — every field gets the same lane width.
1244+ pub struct PadUniform {
1245+ #[ soa( pad_to_lanes = 4 ) ]
1246+ pub a: i32 ,
1247+ #[ soa( pad_to_lanes = 4 ) ]
1248+ pub b: i32 ,
1249+ }
1250+ }
1251+
1252+ /// All-padded struct: every field grows in sync with the lane cadence.
1253+ #[ test]
1254+ fn pad_to_lanes_uniform_cadence ( ) {
1255+ let mut s = PadUniform :: new ( ) ;
1256+ s. push ( 10 , 20 ) ;
1257+ s. push ( 30 , 40 ) ;
1258+ s. push ( 50 , 60 ) ;
1259+ assert_eq ! ( s. len( ) , 3 ) ;
1260+ // 3 pushes → next multiple of 4 is 4
1261+ assert_eq ! ( s. a. len( ) , 4 ) ;
1262+ assert_eq ! ( s. b. len( ) , 4 ) ;
1263+ assert_eq ! ( s. a[ 0 ..3 ] , [ 10 , 30 , 50 ] ) ;
1264+ assert_eq ! ( s. b[ 0 ..3 ] , [ 20 , 40 , 60 ] ) ;
1265+ assert_eq ! ( s. a[ 3 ] , 0 ) ;
1266+ assert_eq ! ( s. b[ 3 ] , 0 ) ;
1267+ }
1268+
1269+ /// `with_capacity` initialises an empty padded struct correctly.
1270+ #[ test]
1271+ fn pad_to_lanes_with_capacity_empty ( ) {
1272+ let s = PadMixed :: with_capacity ( 64 ) ;
1273+ assert_eq ! ( s. len( ) , 0 ) ;
1274+ assert ! ( s. is_empty( ) ) ;
1275+ assert_eq ! ( s. palette. len( ) , 0 ) ;
1276+ assert_eq ! ( s. depth. len( ) , 0 ) ;
1277+ assert_eq ! ( s. label. len( ) , 0 ) ;
1278+ }
1279+
9971280 /// Inference-only entry: caller relies on closure return-type ascription,
9981281 /// no turbofish at all.
9991282 #[ test]
0 commit comments