Skip to content

Commit 6f267ae

Browse files
committed
feat(hpc): W4 bulk polish — bulk_for_each + deprecated bulk_scan alias
Rename bulk_scan to bulk_for_each (bulk_scan kept as a #[deprecated] forwarding alias), un-gate the bulk_apply x aos_to_soa integration test, add #[inline]. Updated the one in-repo caller in blocked_grid/tests.rs. https://claude.ai/code/session_017GFLBnDy23AWBqvkbHHC41
1 parent fecf60a commit 6f267ae

2 files changed

Lines changed: 84 additions & 33 deletions

File tree

src/hpc/blocked_grid/tests.rs

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
//! Test groups
88
//! -----------
99
//! 1. W4 bulk_apply composition — map_l1 composes with `hpc::bulk::bulk_apply`
10-
//! and `bulk_scan` over per-row slices inside the closure.
10+
//! and `bulk_for_each` over per-row slices inside the closure.
1111
//! 2. L1→L2 cascade — 256×256 ShaderMantissaGrid map_l1 populates
1212
//! cell-by-cell, then map_l2 aggregates per super-block.
1313
//! 3. Half-square AMX INT8 — AmxInt8Grid::new(32, 128), blocks_base coords.
@@ -22,24 +22,24 @@ use crate::hpc::blocked_grid::{
2222
};
2323

2424
// ============================================================
25-
// 1. W4 bulk_apply / bulk_scan composition
25+
// 1. W4 bulk_apply / bulk_for_each composition
2626
//
2727
// Demonstrates that PR-X3's map_l1 composes with the W4 primitives:
2828
// - outer loop = map_l1 (one closure per 64×64 base block)
29-
// - inner loop = bulk_apply / bulk_scan over each row slice in the block
29+
// - inner loop = bulk_apply / bulk_for_each over each row slice in the block
3030
//
3131
// This proves the two design layers nest without either re-implementing the
3232
// other's chunking logic.
3333
// ============================================================
3434

35-
/// map_l1 closure using bulk_scan to read each row and compute a per-row sum,
36-
/// storing it into the first cell of the corresponding output row.
35+
/// map_l1 closure using bulk_for_each to read each row and compute a per-row
36+
/// sum, storing it into the first cell of the corresponding output row.
3737
///
38-
/// Demonstrates: bulk_scan(row_slice, chunk_size, closure) correctly
38+
/// Demonstrates: bulk_for_each(row_slice, chunk_size, closure) correctly
3939
/// accumulates the sum; no re-implemented chunking inside map_l1.
4040
#[test]
41-
fn w4_bulk_scan_inside_map_l1_row_sum() {
42-
use crate::hpc::bulk::bulk_scan;
41+
fn w4_bulk_for_each_inside_map_l1_row_sum() {
42+
use crate::hpc::bulk::bulk_for_each;
4343

4444
// Build a 64×64 grid filled with known values.
4545
let mut g = BlockedGrid::<u64>::new(64, 64);
@@ -50,13 +50,13 @@ fn w4_bulk_scan_inside_map_l1_row_sum() {
5050
}
5151
}
5252

53-
// map_l1: for each block row, use bulk_scan to compute the row sum and
53+
// map_l1: for each block row, use bulk_for_each to compute the row sum and
5454
// store it in the first cell of the output row.
5555
let out = g.map_l1::<u64, _>(|inp, outp| {
5656
for r in 0..64 {
5757
let row = inp.row(r);
5858
let mut row_sum = 0u64;
59-
bulk_scan(row, 16, |chunk, _start| {
59+
bulk_for_each(row, 16, |chunk, _start| {
6060
row_sum += chunk.iter().sum::<u64>();
6161
});
6262
outp.row_mut(r)[0] = row_sum;

src/hpc/bulk.rs

Lines changed: 74 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@
55
//! (chunk_size matched to L1 working-set) or when staging chunks to SoA for
66
//! SIMD processing inside the closure.
77
//!
8-
//! [`bulk_scan`] is the read-only sibling for non-mutating traversal.
8+
//! [`bulk_for_each`] is the read-only sibling for non-mutating traversal.
9+
//! [`bulk_scan`] is a deprecated alias for [`bulk_for_each`].
910
//!
1011
//! Both helpers are scalar wrappers — no `#[target_feature]`, no per-arch
1112
//! dispatch. They are user-level code per the layering rule in
@@ -30,7 +31,7 @@
3031
//! .map(|i| Item { a: i as f32, b: (i * 2) as f32, c: (i * 3) as f32 })
3132
//! .collect();
3233
//! bulk_apply(&mut items, 16, |chunk, _start| {
33-
//! let soa = aos_to_soa::<_, _, 3, _>(chunk, |it| [it.a, it.b, it.c]);
34+
//! let soa = aos_to_soa::<_, f32, 3, _>(chunk, |it| [it.a, it.b, it.c]);
3435
//! // ... per-field SIMD-style loops over soa.field(0), soa.field(1), ...
3536
//! let _ = soa;
3637
//! });
@@ -65,6 +66,7 @@
6566
/// });
6667
/// assert_eq!(v, vec![0, 10, 20, 30, 40, 50, 60, 70, 80, 90]);
6768
/// ```
69+
#[inline]
6870
pub fn bulk_apply<T, F>(items: &mut [T], chunk_size: usize, mut f: F)
6971
where
7072
F: FnMut(&mut [T], usize),
@@ -89,19 +91,20 @@ where
8991
///
9092
/// # Example
9193
/// ```
92-
/// use ndarray::hpc::bulk::bulk_scan;
94+
/// use ndarray::hpc::bulk::bulk_for_each;
9395
/// let v: Vec<i32> = (0..10).collect();
9496
/// let mut sum = 0i32;
95-
/// bulk_scan(&v, 4, |chunk, _start| {
97+
/// bulk_for_each(&v, 4, |chunk, _start| {
9698
/// sum += chunk.iter().sum::<i32>();
9799
/// });
98100
/// assert_eq!(sum, 45);
99101
/// ```
100-
pub fn bulk_scan<T, F>(items: &[T], chunk_size: usize, mut f: F)
102+
#[inline]
103+
pub fn bulk_for_each<T, F>(items: &[T], chunk_size: usize, mut f: F)
101104
where
102105
F: FnMut(&[T], usize),
103106
{
104-
assert!(chunk_size > 0, "bulk_scan: chunk_size must be > 0");
107+
assert!(chunk_size > 0, "bulk_for_each: chunk_size must be > 0");
105108
let mut start = 0;
106109
for chunk in items.chunks(chunk_size) {
107110
let n = chunk.len();
@@ -110,6 +113,32 @@ where
110113
}
111114
}
112115

116+
/// Deprecated alias for [`bulk_for_each`].
117+
///
118+
/// Use [`bulk_for_each`] instead. This alias exists only to avoid breaking
119+
/// callers from before the rename and will be removed in a future release.
120+
///
121+
/// # Example
122+
/// ```
123+
/// #[allow(deprecated)]
124+
/// use ndarray::hpc::bulk::bulk_scan;
125+
/// let v: Vec<i32> = (0..10).collect();
126+
/// let mut sum = 0i32;
127+
/// #[allow(deprecated)]
128+
/// bulk_scan(&v, 4, |chunk, _start| {
129+
/// sum += chunk.iter().sum::<i32>();
130+
/// });
131+
/// assert_eq!(sum, 45);
132+
/// ```
133+
#[deprecated(note = "renamed to `bulk_for_each`")]
134+
#[inline]
135+
pub fn bulk_scan<T, F>(items: &[T], chunk_size: usize, f: F)
136+
where
137+
F: FnMut(&[T], usize),
138+
{
139+
bulk_for_each(items, chunk_size, f)
140+
}
141+
113142
#[cfg(test)]
114143
mod tests {
115144
use super::*;
@@ -206,71 +235,71 @@ mod tests {
206235
assert_eq!(count, 0);
207236
}
208237

209-
// ----- bulk_scan -----
238+
// ----- bulk_for_each -----
210239

211240
#[test]
212-
fn bulk_scan_chunk_size_divides_len() {
241+
fn bulk_for_each_chunk_size_divides_len() {
213242
let v: Vec<i32> = (0..10).collect();
214243
let mut sizes = Vec::new();
215-
bulk_scan(&v, 5, |chunk, _start| {
244+
bulk_for_each(&v, 5, |chunk, _start| {
216245
sizes.push(chunk.len());
217246
});
218247
assert_eq!(sizes, vec![5, 5]);
219248
}
220249

221250
#[test]
222-
fn bulk_scan_chunk_size_does_not_divide_len() {
251+
fn bulk_for_each_chunk_size_does_not_divide_len() {
223252
let v: Vec<i32> = (0..10).collect();
224253
let mut sizes = Vec::new();
225-
bulk_scan(&v, 3, |chunk, _start| {
254+
bulk_for_each(&v, 3, |chunk, _start| {
226255
sizes.push(chunk.len());
227256
});
228257
assert_eq!(sizes, vec![3, 3, 3, 1]);
229258
}
230259

231260
#[test]
232-
fn bulk_scan_chunk_size_greater_than_len() {
261+
fn bulk_for_each_chunk_size_greater_than_len() {
233262
let v: Vec<i32> = (0..10).collect();
234263
let mut sizes = Vec::new();
235-
bulk_scan(&v, 100, |chunk, start| {
264+
bulk_for_each(&v, 100, |chunk, start| {
236265
assert_eq!(start, 0);
237266
sizes.push(chunk.len());
238267
});
239268
assert_eq!(sizes, vec![10]);
240269
}
241270

242271
#[test]
243-
fn bulk_scan_start_indices_3_3_3_1() {
272+
fn bulk_for_each_start_indices_3_3_3_1() {
244273
let v: Vec<i32> = (0..10).collect();
245274
let mut start_indices: Vec<usize> = Vec::new();
246-
bulk_scan(&v, 3, |_chunk, start| {
275+
bulk_for_each(&v, 3, |_chunk, start| {
247276
start_indices.push(start);
248277
});
249278
assert_eq!(start_indices, vec![0, 3, 6, 9]);
250279
}
251280

252281
#[test]
253-
fn bulk_scan_sums_chunks() {
282+
fn bulk_for_each_sums_chunks() {
254283
let v: Vec<i32> = (0..10).collect();
255284
let mut sum = 0i32;
256-
bulk_scan(&v, 4, |chunk, _start| {
285+
bulk_for_each(&v, 4, |chunk, _start| {
257286
sum += chunk.iter().sum::<i32>();
258287
});
259288
assert_eq!(sum, 45);
260289
}
261290

262291
#[test]
263292
#[should_panic(expected = "chunk_size must be > 0")]
264-
fn bulk_scan_panics_on_zero_chunk_size() {
293+
fn bulk_for_each_panics_on_zero_chunk_size() {
265294
let v: Vec<i32> = (0..4).collect();
266-
bulk_scan(&v, 0, |_, _| {});
295+
bulk_for_each(&v, 0, |_, _| {});
267296
}
268297

269298
#[test]
270-
fn bulk_scan_chunk_size_usize_max_single_chunk() {
299+
fn bulk_for_each_chunk_size_usize_max_single_chunk() {
271300
let v: Vec<i32> = (0..4).collect();
272301
let mut count = 0;
273-
bulk_scan(&v, usize::MAX, |chunk, start| {
302+
bulk_for_each(&v, usize::MAX, |chunk, start| {
274303
count += 1;
275304
assert_eq!(start, 0);
276305
assert_eq!(chunk.len(), 4);
@@ -279,15 +308,37 @@ mod tests {
279308
}
280309

281310
#[test]
282-
fn bulk_scan_empty_slice() {
311+
fn bulk_for_each_empty_slice() {
283312
let v: Vec<i32> = Vec::new();
284313
let mut count = 0;
285-
bulk_scan(&v, 4, |_, _| {
314+
bulk_for_each(&v, 4, |_, _| {
286315
count += 1;
287316
});
288317
assert_eq!(count, 0);
289318
}
290319

320+
// ----- bulk_scan (deprecated alias) -----
321+
// These tests verify the alias still compiles and delegates correctly.
322+
323+
#[test]
324+
#[allow(deprecated)]
325+
fn bulk_scan_deprecated_alias_still_works() {
326+
let v: Vec<i32> = (0..10).collect();
327+
let mut sum = 0i32;
328+
bulk_scan(&v, 4, |chunk, _start| {
329+
sum += chunk.iter().sum::<i32>();
330+
});
331+
assert_eq!(sum, 45);
332+
}
333+
334+
#[test]
335+
#[allow(deprecated)]
336+
#[should_panic(expected = "chunk_size must be > 0")]
337+
fn bulk_scan_deprecated_alias_panics_on_zero_chunk_size() {
338+
let v: Vec<i32> = (0..4).collect();
339+
bulk_scan(&v, 0, |_, _| {});
340+
}
341+
291342
// ----- integration with aos_to_soa -----
292343
//
293344
// hpc::soa and hpc::bulk co-merge in PR #156, so the worker-isolation

0 commit comments

Comments
 (0)