From 7cd226a7e7b9d266c5da6e82a68c503827abd895 Mon Sep 17 00:00:00 2001 From: Andrzej Warzynski Date: Mon, 23 Mar 2026 16:41:40 +0000 Subject: [PATCH] Move "vector maximum" intrinsics to the appropriate section Some "vector maximum" intrinsics were listed under the "vector minimum" section. --- neon_intrinsics/advsimd.md | 44 +++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/neon_intrinsics/advsimd.md b/neon_intrinsics/advsimd.md index 630ca2c6..80bf0077 100644 --- a/neon_intrinsics/advsimd.md +++ b/neon_intrinsics/advsimd.md @@ -923,24 +923,28 @@ The intrinsics in this section are guarded by the macro ``__ARM_NEON``. #### Maximum -| Intrinsic | Argument preparation | AArch64 Instruction | Result | Supported architectures | -|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------|-----------------------------|--------------------|---------------------------| -| int8x8_t vmax_s8(
     int8x8_t a,
     int8x8_t b)
| `a -> Vn.8B`
`b -> Vm.8B` | `SMAX Vd.8B,Vn.8B,Vm.8B` | `Vd.8B -> result` | `v7/A32/A64` | -| int8x16_t vmaxq_s8(
     int8x16_t a,
     int8x16_t b)
| `a -> Vn.16B`
`b -> Vm.16B` | `SMAX Vd.16B,Vn.16B,Vm.16B` | `Vd.16B -> result` | `v7/A32/A64` | -| int16x4_t vmax_s16(
     int16x4_t a,
     int16x4_t b)
| `a -> Vn.4H`
`b -> Vm.4H` | `SMAX Vd.4H,Vn.4H,Vm.4H` | `Vd.4H -> result` | `v7/A32/A64` | -| int16x8_t vmaxq_s16(
     int16x8_t a,
     int16x8_t b)
| `a -> Vn.8H`
`b -> Vm.8H` | `SMAX Vd.8H,Vn.8H,Vm.8H` | `Vd.8H -> result` | `v7/A32/A64` | -| int32x2_t vmax_s32(
     int32x2_t a,
     int32x2_t b)
| `a -> Vn.2S`
`b -> Vm.2S` | `SMAX Vd.2S,Vn.2S,Vm.2S` | `Vd.2S -> result` | `v7/A32/A64` | -| int32x4_t vmaxq_s32(
     int32x4_t a,
     int32x4_t b)
| `a -> Vn.4S`
`b -> Vm.4S` | `SMAX Vd.4S,Vn.4S,Vm.4S` | `Vd.4S -> result` | `v7/A32/A64` | -| uint8x8_t vmax_u8(
     uint8x8_t a,
     uint8x8_t b)
| `a -> Vn.8B`
`b -> Vm.8B` | `UMAX Vd.8B,Vn.8B,Vm.8B` | `Vd.8B -> result` | `v7/A32/A64` | -| uint8x16_t vmaxq_u8(
     uint8x16_t a,
     uint8x16_t b)
| `a -> Vn.16B`
`b -> Vm.16B` | `UMAX Vd.16B,Vn.16B,Vm.16B` | `Vd.16B -> result` | `v7/A32/A64` | -| uint16x4_t vmax_u16(
     uint16x4_t a,
     uint16x4_t b)
| `a -> Vn.4H`
`b -> Vm.4H` | `UMAX Vd.4H,Vn.4H,Vm.4H` | `Vd.4H -> result` | `v7/A32/A64` | -| uint16x8_t vmaxq_u16(
     uint16x8_t a,
     uint16x8_t b)
| `a -> Vn.8H`
`b -> Vm.8H` | `UMAX Vd.8H,Vn.8H,Vm.8H` | `Vd.8H -> result` | `v7/A32/A64` | -| uint32x2_t vmax_u32(
     uint32x2_t a,
     uint32x2_t b)
| `a -> Vn.2S`
`b -> Vm.2S` | `UMAX Vd.2S,Vn.2S,Vm.2S` | `Vd.2S -> result` | `v7/A32/A64` | -| uint32x4_t vmaxq_u32(
     uint32x4_t a,
     uint32x4_t b)
| `a -> Vn.4S`
`b -> Vm.4S` | `UMAX Vd.4S,Vn.4S,Vm.4S` | `Vd.4S -> result` | `v7/A32/A64` | -| float32x2_t vmax_f32(
     float32x2_t a,
     float32x2_t b)
| `a -> Vn.2S`
`b -> Vm.2S` | `FMAX Vd.2S,Vn.2S,Vm.2S` | `Vd.2S -> result` | `v7/A32/A64` | -| float32x4_t vmaxq_f32(
     float32x4_t a,
     float32x4_t b)
| `a -> Vn.4S`
`b -> Vm.4S` | `FMAX Vd.4S,Vn.4S,Vm.4S` | `Vd.4S -> result` | `v7/A32/A64` | -| float64x1_t vmax_f64(
     float64x1_t a,
     float64x1_t b)
| `a -> Dn`
`b -> Dm` | `FMAX Dd,Dn,Dm` | `Dd -> result` | `A64` | -| float64x2_t vmaxq_f64(
     float64x2_t a,
     float64x2_t b)
| `a -> Vn.2D`
`b -> Vm.2D` | `FMAX Vd.2D,Vn.2D,Vm.2D` | `Vd.2D -> result` | `A64` | +| Intrinsic | Argument preparation | AArch64 Instruction | Result | Supported architectures | +|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------|-----------------------------|--------------------|---------------------------| +| int8x8_t vmax_s8(
     int8x8_t a,
     int8x8_t b)
| `a -> Vn.8B`
`b -> Vm.8B` | `SMAX Vd.8B,Vn.8B,Vm.8B` | `Vd.8B -> result` | `v7/A32/A64` | +| int8x16_t vmaxq_s8(
     int8x16_t a,
     int8x16_t b)
| `a -> Vn.16B`
`b -> Vm.16B` | `SMAX Vd.16B,Vn.16B,Vm.16B` | `Vd.16B -> result` | `v7/A32/A64` | +| int16x4_t vmax_s16(
     int16x4_t a,
     int16x4_t b)
| `a -> Vn.4H`
`b -> Vm.4H` | `SMAX Vd.4H,Vn.4H,Vm.4H` | `Vd.4H -> result` | `v7/A32/A64` | +| int16x8_t vmaxq_s16(
     int16x8_t a,
     int16x8_t b)
| `a -> Vn.8H`
`b -> Vm.8H` | `SMAX Vd.8H,Vn.8H,Vm.8H` | `Vd.8H -> result` | `v7/A32/A64` | +| int32x2_t vmax_s32(
     int32x2_t a,
     int32x2_t b)
| `a -> Vn.2S`
`b -> Vm.2S` | `SMAX Vd.2S,Vn.2S,Vm.2S` | `Vd.2S -> result` | `v7/A32/A64` | +| int32x4_t vmaxq_s32(
     int32x4_t a,
     int32x4_t b)
| `a -> Vn.4S`
`b -> Vm.4S` | `SMAX Vd.4S,Vn.4S,Vm.4S` | `Vd.4S -> result` | `v7/A32/A64` | +| uint8x8_t vmax_u8(
     uint8x8_t a,
     uint8x8_t b)
| `a -> Vn.8B`
`b -> Vm.8B` | `UMAX Vd.8B,Vn.8B,Vm.8B` | `Vd.8B -> result` | `v7/A32/A64` | +| uint8x16_t vmaxq_u8(
     uint8x16_t a,
     uint8x16_t b)
| `a -> Vn.16B`
`b -> Vm.16B` | `UMAX Vd.16B,Vn.16B,Vm.16B` | `Vd.16B -> result` | `v7/A32/A64` | +| uint16x4_t vmax_u16(
     uint16x4_t a,
     uint16x4_t b)
| `a -> Vn.4H`
`b -> Vm.4H` | `UMAX Vd.4H,Vn.4H,Vm.4H` | `Vd.4H -> result` | `v7/A32/A64` | +| uint16x8_t vmaxq_u16(
     uint16x8_t a,
     uint16x8_t b)
| `a -> Vn.8H`
`b -> Vm.8H` | `UMAX Vd.8H,Vn.8H,Vm.8H` | `Vd.8H -> result` | `v7/A32/A64` | +| uint32x2_t vmax_u32(
     uint32x2_t a,
     uint32x2_t b)
| `a -> Vn.2S`
`b -> Vm.2S` | `UMAX Vd.2S,Vn.2S,Vm.2S` | `Vd.2S -> result` | `v7/A32/A64` | +| uint32x4_t vmaxq_u32(
     uint32x4_t a,
     uint32x4_t b)
| `a -> Vn.4S`
`b -> Vm.4S` | `UMAX Vd.4S,Vn.4S,Vm.4S` | `Vd.4S -> result` | `v7/A32/A64` | +| float32x2_t vmax_f32(
     float32x2_t a,
     float32x2_t b)
| `a -> Vn.2S`
`b -> Vm.2S` | `FMAX Vd.2S,Vn.2S,Vm.2S` | `Vd.2S -> result` | `v7/A32/A64` | +| float32x4_t vmaxq_f32(
     float32x4_t a,
     float32x4_t b)
| `a -> Vn.4S`
`b -> Vm.4S` | `FMAX Vd.4S,Vn.4S,Vm.4S` | `Vd.4S -> result` | `v7/A32/A64` | +| float64x1_t vmax_f64(
     float64x1_t a,
     float64x1_t b)
| `a -> Dn`
`b -> Dm` | `FMAX Dd,Dn,Dm` | `Dd -> result` | `A64` | +| float64x2_t vmaxq_f64(
     float64x2_t a,
     float64x2_t b)
| `a -> Vn.2D`
`b -> Vm.2D` | `FMAX Vd.2D,Vn.2D,Vm.2D` | `Vd.2D -> result` | `A64` | +| float32x2_t vmaxnm_f32(
     float32x2_t a,
     float32x2_t b)
| `a -> Vn.2S`
`b -> Vm.2S` | `FMAXNM Vd.2S,Vn.2S,Vm.2S` | `Vd.2S -> result` | `A32/A64` | +| float32x4_t vmaxnmq_f32(
     float32x4_t a,
     float32x4_t b)
| `a -> Vn.4S`
`b -> Vm.4S` | `FMAXNM Vd.4S,Vn.4S,Vm.4S` | `Vd.4S -> result` | `A32/A64` | +| float64x1_t vmaxnm_f64(
     float64x1_t a,
     float64x1_t b)
| `a -> Dn`
`b -> Dm` | `FMAXNM Dd,Dn,Dm` | `Dd -> result` | `A64` | +| float64x2_t vmaxnmq_f64(
     float64x2_t a,
     float64x2_t b)
| `a -> Vn.2D`
`b -> Vm.2D` | `FMAXNM Vd.2D,Vn.2D,Vm.2D` | `Vd.2D -> result` | `A64` | #### Minimum @@ -962,10 +966,6 @@ The intrinsics in this section are guarded by the macro ``__ARM_NEON``. | float32x4_t vminq_f32(
     float32x4_t a,
     float32x4_t b)
| `a -> Vn.4S`
`b -> Vm.4S` | `FMIN Vd.4S,Vn.4S,Vm.4S` | `Vd.4S -> result` | `v7/A32/A64` | | float64x1_t vmin_f64(
     float64x1_t a,
     float64x1_t b)
| `a -> Dn`
`b -> Dm` | `FMIN Dd,Dn,Dm` | `Dd -> result` | `A64` | | float64x2_t vminq_f64(
     float64x2_t a,
     float64x2_t b)
| `a -> Vn.2D`
`b -> Vm.2D` | `FMIN Vd.2D,Vn.2D,Vm.2D` | `Vd.2D -> result` | `A64` | -| float32x2_t vmaxnm_f32(
     float32x2_t a,
     float32x2_t b)
| `a -> Vn.2S`
`b -> Vm.2S` | `FMAXNM Vd.2S,Vn.2S,Vm.2S` | `Vd.2S -> result` | `A32/A64` | -| float32x4_t vmaxnmq_f32(
     float32x4_t a,
     float32x4_t b)
| `a -> Vn.4S`
`b -> Vm.4S` | `FMAXNM Vd.4S,Vn.4S,Vm.4S` | `Vd.4S -> result` | `A32/A64` | -| float64x1_t vmaxnm_f64(
     float64x1_t a,
     float64x1_t b)
| `a -> Dn`
`b -> Dm` | `FMAXNM Dd,Dn,Dm` | `Dd -> result` | `A64` | -| float64x2_t vmaxnmq_f64(
     float64x2_t a,
     float64x2_t b)
| `a -> Vn.2D`
`b -> Vm.2D` | `FMAXNM Vd.2D,Vn.2D,Vm.2D` | `Vd.2D -> result` | `A64` | | float32x2_t vminnm_f32(
     float32x2_t a,
     float32x2_t b)
| `a -> Vn.2S`
`b -> Vm.2S` | `FMINNM Vd.2S,Vn.2S,Vm.2S` | `Vd.2S -> result` | `A32/A64` | | float32x4_t vminnmq_f32(
     float32x4_t a,
     float32x4_t b)
| `a -> Vn.4S`
`b -> Vm.4S` | `FMINNM Vd.4S,Vn.4S,Vm.4S` | `Vd.4S -> result` | `A32/A64` | | float64x1_t vminnm_f64(
     float64x1_t a,
     float64x1_t b)
| `a -> Dn`
`b -> Dm` | `FMINNM Dd,Dn,Dm` | `Dd -> result` | `A64` |