From b25c1837d79e786342325691c68181a5221b9296 Mon Sep 17 00:00:00 2001 From: Sewer56 Date: Tue, 17 Feb 2026 04:26:44 +0000 Subject: [PATCH 1/6] Added: Internal Hash64 type, backed by ahash --- src/Cargo.lock | 25 ++++++++ src/llm-coding-tools-core/Cargo.toml | 6 ++ .../src/internal/hash64.rs | 61 +++++++++++++++++++ src/llm-coding-tools-core/src/internal/mod.rs | 6 ++ src/llm-coding-tools-core/src/lib.rs | 2 + 5 files changed, 100 insertions(+) create mode 100644 src/llm-coding-tools-core/src/internal/hash64.rs create mode 100644 src/llm-coding-tools-core/src/internal/mod.rs diff --git a/src/Cargo.lock b/src/Cargo.lock index b09fe5ac..0dd3138f 100644 --- a/src/Cargo.lock +++ b/src/Cargo.lock @@ -2,6 +2,19 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "ahash" +version = "0.8.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" +dependencies = [ + "cfg-if", + "getrandom 0.3.4", + "once_cell", + "version_check", + "zerocopy", +] + [[package]] name = "aho-corasick" version = "1.1.4" @@ -1083,6 +1096,7 @@ checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77" name = "llm-coding-tools-core" version = "0.2.0" dependencies = [ + "ahash", "globset", "grep-regex", "grep-searcher", @@ -1099,6 +1113,7 @@ dependencies = [ "serde_json", "tempfile", "thiserror 2.0.18", + "tinyvec_string", "tokio", "wiremock", ] @@ -2360,6 +2375,16 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" +[[package]] +name = "tinyvec_string" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be28110ab94064f103a2a24597ed8f556f831d7334751ab991967de16c12d18f" +dependencies = [ + "tinyvec", + "tinyvec_macros", +] + [[package]] name = "tokio" version = "1.49.0" diff --git a/src/llm-coding-tools-core/Cargo.toml b/src/llm-coding-tools-core/Cargo.toml index c40b0814..efadbc3c 100644 --- a/src/llm-coding-tools-core/Cargo.toml +++ b/src/llm-coding-tools-core/Cargo.toml @@ -22,6 +22,12 @@ blocking = ["maybe-async/is_sync", "dep:reqwest", "reqwest?/blocking", "process- serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" +# High-performance hashing for permission keys +ahash = "0.8" + +# Inline string storage for patterns +tinyvec_string = { version = "0.3", features = ["alloc"] } + # ToolError type uses thiserror for ergonomic error definitions thiserror = "2.0" diff --git a/src/llm-coding-tools-core/src/internal/hash64.rs b/src/llm-coding-tools-core/src/internal/hash64.rs new file mode 100644 index 00000000..6cc32a9e --- /dev/null +++ b/src/llm-coding-tools-core/src/internal/hash64.rs @@ -0,0 +1,61 @@ +//! Wrapper for an internal 64-bit hash. +//! +//! Currently uses ahash64 under the hood, based on performance requirements +//! (handling short strings, while also scaling well); but given this is an +//! internal type, that's an implementation detail. + +/// A 64-bit hash value using the ahash algorithm. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub(crate) struct Hash64(u64); + +impl Hash64 { + /// Creates a new Hash64 from a raw u64 value. + #[inline] + #[allow(dead_code)] // public API + pub(crate) fn from_u64(value: u64) -> Self { + Self(value) + } + + /// Returns the underlying u64 value. + #[inline] + #[allow(dead_code)] // public API + pub(crate) fn as_u64(&self) -> u64 { + self.0 + } +} + +/// Hashes a string to Hash64 using ahash64. +#[inline(always)] +#[allow(dead_code)] // public API +pub(crate) fn hash_u64(s: &str) -> Hash64 { + hash_u64_bytes(s.as_bytes()) +} + +/// Hashes raw bytes to Hash64 using ahash64. +#[inline(always)] +#[allow(dead_code)] // public API +pub(crate) fn hash_u64_bytes(bytes: &[u8]) -> Hash64 { + Hash64(ahash::RandomState::with_seed(0xDEAD_CAFE).hash_one(bytes)) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn hash_is_deterministic() { + let hash1 = hash_u64("bash"); + let hash2 = hash_u64("bash"); + assert_eq!(hash1, hash2); + } + + #[test] + fn different_inputs_produce_different_hashes() { + let h1 = hash_u64("bash"); + let h2 = hash_u64("read"); + let h3 = hash_u64("write"); + assert_ne!(h1, h2); + assert_ne!(h1, h3); + assert_ne!(h2, h3); + } +} diff --git a/src/llm-coding-tools-core/src/internal/mod.rs b/src/llm-coding-tools-core/src/internal/mod.rs new file mode 100644 index 00000000..850d924f --- /dev/null +++ b/src/llm-coding-tools-core/src/internal/mod.rs @@ -0,0 +1,6 @@ +//! Internal implementation details. +//! +//! This module contains private implementation details that are not part of +//! the public API. Items here may change without notice. + +pub mod hash64; diff --git a/src/llm-coding-tools-core/src/lib.rs b/src/llm-coding-tools-core/src/lib.rs index ee99e157..7c5f62e4 100644 --- a/src/llm-coding-tools-core/src/lib.rs +++ b/src/llm-coding-tools-core/src/lib.rs @@ -18,6 +18,8 @@ pub mod tool_names; pub mod tools; pub mod util; +mod internal; + pub use context::ToolContext; pub use error::{ToolError, ToolResult}; pub use output::ToolOutput; From 11441c69ba7e1343c4acf3139eecfc4702925b95 Mon Sep 17 00:00:00 2001 From: Sewer56 Date: Tue, 17 Feb 2026 04:39:46 +0000 Subject: [PATCH 2/6] Added: Hash63 newtype wrapper for 63-bit hash values --- .../src/internal/hash63.rs | 32 +++++++++++++++++++ src/llm-coding-tools-core/src/internal/mod.rs | 1 + 2 files changed, 33 insertions(+) create mode 100644 src/llm-coding-tools-core/src/internal/hash63.rs diff --git a/src/llm-coding-tools-core/src/internal/hash63.rs b/src/llm-coding-tools-core/src/internal/hash63.rs new file mode 100644 index 00000000..5aaa29b8 --- /dev/null +++ b/src/llm-coding-tools-core/src/internal/hash63.rs @@ -0,0 +1,32 @@ +//! 63-bit hash type. +//! +//! A 63-bit hash value representing the upper 63 bits of an original hash. +//! More specifically, a 64-bit hash value which has been `>> 1`. +//! i.e. 64th bit is always 0. + +/// A 63-bit hash value representing the upper 63 bits of an original hash. +/// Result of right shifting a hash `>> 1`. +/// +/// # Bit Layout +/// - Bits 0-62: Original hash bits 1-63 (upper 63 bits) +/// - Bit 63: Always 0 +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub(crate) struct Hash63(u64); + +impl Hash63 { + /// Creates a new Hash63 from a raw u64 value. + /// + /// The caller is responsible for ensuring bit 63 is 0. + #[inline] + #[allow(dead_code)] // public API + pub(crate) const fn from_u64(value: u64) -> Self { + Self(value) + } + + /// Returns the underlying u64 value. + #[inline] + #[allow(dead_code)] // public API + pub(crate) const fn as_u64(&self) -> u64 { + self.0 + } +} diff --git a/src/llm-coding-tools-core/src/internal/mod.rs b/src/llm-coding-tools-core/src/internal/mod.rs index 850d924f..011ace00 100644 --- a/src/llm-coding-tools-core/src/internal/mod.rs +++ b/src/llm-coding-tools-core/src/internal/mod.rs @@ -3,4 +3,5 @@ //! This module contains private implementation details that are not part of //! the public API. Items here may change without notice. +pub mod hash63; pub mod hash64; From c18d388474d0481e346bf3271f45d8aad947ee3e Mon Sep 17 00:00:00 2001 From: Sewer56 Date: Tue, 17 Feb 2026 04:41:03 +0000 Subject: [PATCH 3/6] 'Public API' -> 'Internal Public API' --- src/llm-coding-tools-core/src/internal/hash63.rs | 4 ++-- src/llm-coding-tools-core/src/internal/hash64.rs | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/llm-coding-tools-core/src/internal/hash63.rs b/src/llm-coding-tools-core/src/internal/hash63.rs index 5aaa29b8..e51ed7a5 100644 --- a/src/llm-coding-tools-core/src/internal/hash63.rs +++ b/src/llm-coding-tools-core/src/internal/hash63.rs @@ -18,14 +18,14 @@ impl Hash63 { /// /// The caller is responsible for ensuring bit 63 is 0. #[inline] - #[allow(dead_code)] // public API + #[allow(dead_code)] // internal public API pub(crate) const fn from_u64(value: u64) -> Self { Self(value) } /// Returns the underlying u64 value. #[inline] - #[allow(dead_code)] // public API + #[allow(dead_code)] // internal public API pub(crate) const fn as_u64(&self) -> u64 { self.0 } diff --git a/src/llm-coding-tools-core/src/internal/hash64.rs b/src/llm-coding-tools-core/src/internal/hash64.rs index 6cc32a9e..e880c044 100644 --- a/src/llm-coding-tools-core/src/internal/hash64.rs +++ b/src/llm-coding-tools-core/src/internal/hash64.rs @@ -11,14 +11,14 @@ pub(crate) struct Hash64(u64); impl Hash64 { /// Creates a new Hash64 from a raw u64 value. #[inline] - #[allow(dead_code)] // public API + #[allow(dead_code)] // internal public API pub(crate) fn from_u64(value: u64) -> Self { Self(value) } /// Returns the underlying u64 value. #[inline] - #[allow(dead_code)] // public API + #[allow(dead_code)] // internal public API pub(crate) fn as_u64(&self) -> u64 { self.0 } @@ -26,14 +26,14 @@ impl Hash64 { /// Hashes a string to Hash64 using ahash64. #[inline(always)] -#[allow(dead_code)] // public API +#[allow(dead_code)] // internal public API pub(crate) fn hash_u64(s: &str) -> Hash64 { hash_u64_bytes(s.as_bytes()) } /// Hashes raw bytes to Hash64 using ahash64. #[inline(always)] -#[allow(dead_code)] // public API +#[allow(dead_code)] // internal public API pub(crate) fn hash_u64_bytes(bytes: &[u8]) -> Hash64 { Hash64(ahash::RandomState::with_seed(0xDEAD_CAFE).hash_one(bytes)) } From f097e7d256cb0425ff7dff27759177679c32613f Mon Sep 17 00:00:00 2001 From: Sewer56 Date: Tue, 17 Feb 2026 04:50:30 +0000 Subject: [PATCH 4/6] Added: PackedPermission for bit-packed permission storage Implement bit-packed permission storage using 63 bits for hash and 1 bit for PermissionAction. Enables compact permission representation with minimal memory overhead. Changes: - Add PackedPermission struct with new(), hash(), and action() methods - Use bit masks (ACTION_MASK, HASH_MASK) for efficient packing/unpacking - Use transmute for zero-cost action extraction from bit 63 - Add pack_unpack_roundtrip test for correctness validation Benefits: - Reduces memory usage for permission storage by packing into single u64 - Enables efficient hash-based permission lookups with compact representation - Compile-time assertions ensure correct PermissionAction size --- .../src/internal/hash63.rs | 6 ++ src/llm-coding-tools-core/src/internal/mod.rs | 1 + .../src/internal/packed_permission.rs | 76 +++++++++++++++++++ 3 files changed, 83 insertions(+) create mode 100644 src/llm-coding-tools-core/src/internal/packed_permission.rs diff --git a/src/llm-coding-tools-core/src/internal/hash63.rs b/src/llm-coding-tools-core/src/internal/hash63.rs index e51ed7a5..56df018f 100644 --- a/src/llm-coding-tools-core/src/internal/hash63.rs +++ b/src/llm-coding-tools-core/src/internal/hash63.rs @@ -29,4 +29,10 @@ impl Hash63 { pub(crate) const fn as_u64(&self) -> u64 { self.0 } + + /// Creates a Hash63 from a Hash64 by extracting upper 63 bits. + #[inline] + pub(crate) fn from_hash64(hash: crate::internal::hash64::Hash64) -> Self { + Self(hash.as_u64() >> 1) + } } diff --git a/src/llm-coding-tools-core/src/internal/mod.rs b/src/llm-coding-tools-core/src/internal/mod.rs index 011ace00..5b2bf28d 100644 --- a/src/llm-coding-tools-core/src/internal/mod.rs +++ b/src/llm-coding-tools-core/src/internal/mod.rs @@ -5,3 +5,4 @@ pub mod hash63; pub mod hash64; +pub mod packed_permission; diff --git a/src/llm-coding-tools-core/src/internal/packed_permission.rs b/src/llm-coding-tools-core/src/internal/packed_permission.rs new file mode 100644 index 00000000..0d717b64 --- /dev/null +++ b/src/llm-coding-tools-core/src/internal/packed_permission.rs @@ -0,0 +1,76 @@ +//! Bit-packed permission storage. +//! +//! Packs permission hash and action into a single u64. +//! - Lower 63 bits: hash63 (upper 63 bits of original hash) +//! - Upper 1 bit (bit 63): PermissionAction + +use crate::internal::hash63::Hash63; +use crate::internal::hash64::Hash64; +use crate::permissions::PermissionAction; + +/// Action bit mask - highest bit (bit 63). +const ACTION_MASK: u64 = 1u64 << 63; + +/// Hash mask - lower 63 bits. +const HASH_MASK: u64 = !ACTION_MASK; + +// Compile-time assertion: PermissionAction must be 1 byte for bit-packing +const _: () = assert!( + std::mem::size_of::() == 1, + "PermissionAction must be 1 byte for bit-packing" +); + +/// A u64 containing both permission hash and action. +/// +/// Layout: +/// - Bits 0-62: hash63 (upper 63 bits of original hash) +/// - Bit 63: PermissionAction +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub(crate) struct PackedPermission(u64); + +impl PackedPermission { + /// Creates a packed permission from hash and action. + #[inline] + pub(crate) fn new(hash: Hash64, action: PermissionAction) -> Self { + let hash63 = Hash63::from_hash64(hash); + let action_bit = (action as u64) << 63; + Self(hash63.as_u64() | action_bit) + } + + /// Returns the hash portion (lower 63 bits) as a [`Hash63`]. + /// Use `Hash63::from_hash64()` to compare with an original Hash64. + #[inline] + pub(crate) fn hash(&self) -> Hash63 { + Hash63::from_u64(self.0 & HASH_MASK) + } + + /// Returns the PermissionAction stored in bit 63. + #[inline] + pub(crate) fn action(&self) -> PermissionAction { + unsafe { std::mem::transmute(((self.0 >> 63) & 1) as u8) } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::internal::hash63::Hash63; + use crate::internal::hash64::Hash64; + + #[test] + fn pack_unpack_roundtrip() { + // Use distinctive pattern: 0x1122334455667788 (easily detect if bits are lost) + let hash = Hash64::from_u64(0x1122334455667788u64); + let hash_shifted = Hash63::from_hash64(hash); + + // Test roundtrip for Allow + let packed_allow = PackedPermission::new(hash, PermissionAction::Allow); + assert_eq!(packed_allow.hash(), hash_shifted); + assert_eq!(packed_allow.action(), PermissionAction::Allow); + + // Test roundtrip for Deny + let packed_deny = PackedPermission::new(hash, PermissionAction::Deny); + assert_eq!(packed_deny.hash(), hash_shifted); + assert_eq!(packed_deny.action(), PermissionAction::Deny); + } +} From e37974319a612af5bb471dfd86a15877d7ba200e Mon Sep 17 00:00:00 2001 From: Sewer56 Date: Tue, 17 Feb 2026 04:56:51 +0000 Subject: [PATCH 5/6] Changed: Optimize Rule layout with packed permission and inline TinyString Complete the Rule struct optimization by storing permission/action in packed form and pattern as inline TinyString<[u8; 14]>. Changes: - Store permission as PackedPermission (63-bit hash + action bit) - Store pattern as TinyString<[u8; 14]> with direct type usage - Use Hash63::from_hash64(hash_u64(permission_lower)) for matching - Remove intermediate pattern capacity constants - Add rule_size_is_32_bytes test for memory layout validation - Add hash case-insensitivity test for permission matching Benefits: - Rule struct now exactly 32 bytes (down from String-based layout) - Faster hashed permission matching vs string comparison - Zero-allocation pattern storage for short patterns - Maintains case-insensitive permission and pattern semantics --- src/llm-coding-tools-core/src/permissions.rs | 62 +++++++++++++------- 1 file changed, 41 insertions(+), 21 deletions(-) diff --git a/src/llm-coding-tools-core/src/permissions.rs b/src/llm-coding-tools-core/src/permissions.rs index 6b99128f..6c8ab424 100644 --- a/src/llm-coding-tools-core/src/permissions.rs +++ b/src/llm-coding-tools-core/src/permissions.rs @@ -48,6 +48,11 @@ //! ``` use serde::{Deserialize, Serialize}; +use tinyvec_string::TinyString; + +use crate::internal::hash63::Hash63; +use crate::internal::hash64::hash_u64; +use crate::internal::packed_permission::PackedPermission; /// Permission level for tool access. #[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)] @@ -62,49 +67,52 @@ pub enum PermissionAction { /// A single permission rule with pattern-based matching. /// -/// Fields are private to enforce the lowercasing invariant. Use [`Rule::new`] to create -/// rules, which normalizes permission and pattern to lowercase. +/// Fields are private to enforce normalization and packing invariants. +/// Use [`Rule::new`] to create rules. #[derive(Debug, Clone, PartialEq, Eq)] pub struct Rule { - /// Permission key (tool name), normalized to lowercase. - permission: String, + /// Packed permission hash and action. + permission: PackedPermission, /// Pattern to match against (e.g., "*", "orchestrator-*"), normalized to lowercase. - pattern: String, - /// Action to take when matched. - action: PermissionAction, + pattern: TinyString<[u8; 14]>, } impl Rule { - /// Creates a new rule with normalized (lowercase) permission and pattern. + /// Creates a new rule with normalized (ascii-lowercase) permission and pattern. #[inline] pub fn new( permission: impl Into, pattern: impl Into, action: PermissionAction, ) -> Self { + let mut permission = permission.into(); + permission.make_ascii_lowercase(); + + let mut pattern = pattern.into(); + pattern.make_ascii_lowercase(); + Self { - permission: permission.into().to_lowercase(), - pattern: pattern.into().to_lowercase(), - action, + permission: PackedPermission::new(hash_u64(&permission), action), + pattern: TinyString::<[u8; 14]>::from(pattern.as_str()), } } - /// Returns the permission key (tool name), already normalized to lowercase. + /// Returns the stored 63-bit permission hash. #[inline] - pub fn permission(&self) -> &str { - &self.permission + pub fn permission_hash(&self) -> u64 { + self.permission.hash().as_u64() } /// Returns the pattern, already normalized to lowercase. #[inline] pub fn pattern(&self) -> &str { - &self.pattern + self.pattern.as_str() } /// Returns the action for this rule. #[inline] pub fn action(&self) -> PermissionAction { - self.action + self.permission.action() } } @@ -172,6 +180,7 @@ impl Ruleset { /// * `subject` - The subject to match against rule patterns (e.g., agent name, path) pub fn evaluate(&self, permission: &str, subject: &str) -> PermissionAction { let permission_lower = permission.to_ascii_lowercase(); + let permission_hash = Hash63::from_hash64(hash_u64(&permission_lower)); let subject_lower = subject.to_ascii_lowercase(); // Last-match-wins: iterate forward, keep overwriting result @@ -180,9 +189,10 @@ impl Ruleset { for rule in &self.rules { // Permission key: exact match only (no wildcards) // Pattern: wildcard match against subject - if rule.permission == permission_lower && wildcard_match(&subject_lower, &rule.pattern) + if rule.permission.hash() == permission_hash + && wildcard_match(&subject_lower, rule.pattern.as_str()) { - result = rule.action; + result = rule.permission.action(); } } @@ -374,20 +384,30 @@ mod tests { // ===== Rule tests ===== #[test] - fn rule_normalizes_to_lowercase() { + fn rule_normalizes_pattern_to_lowercase() { let rule = Rule::new("BASH", "PATTERN", PermissionAction::Allow); - assert_eq!(rule.permission(), "bash"); assert_eq!(rule.pattern(), "pattern"); } + #[test] + fn rule_permission_hash_is_case_insensitive() { + let upper = Rule::new("BASH", "*", PermissionAction::Allow); + let lower = Rule::new("bash", "*", PermissionAction::Allow); + assert_eq!(upper.permission_hash(), lower.permission_hash()); + } + #[test] fn rule_getters_return_correct_values() { let rule = Rule::new("task", "orchestrator-*", PermissionAction::Allow); - assert_eq!(rule.permission(), "task"); assert_eq!(rule.pattern(), "orchestrator-*"); assert_eq!(rule.action(), PermissionAction::Allow); } + #[test] + fn rule_size_is_32_bytes() { + assert_eq!(std::mem::size_of::(), 32); + } + // ===== Ruleset tests ===== #[test] From b738be475c04721868e9c2a3b0076277d1490fd0 Mon Sep 17 00:00:00 2001 From: Sewer56 Date: Tue, 17 Feb 2026 06:08:53 +0000 Subject: [PATCH 6/6] Added: Link to PR for permissions --- src/llm-coding-tools-core/src/permissions.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/llm-coding-tools-core/src/permissions.rs b/src/llm-coding-tools-core/src/permissions.rs index 6c8ab424..4d111309 100644 --- a/src/llm-coding-tools-core/src/permissions.rs +++ b/src/llm-coding-tools-core/src/permissions.rs @@ -69,6 +69,10 @@ pub enum PermissionAction { /// /// Fields are private to enforce normalization and packing invariants. /// Use [`Rule::new`] to create rules. +/// +/// # Memory Optimizations +/// +/// See: https://github.com/Sewer56/llm-coding-tools/pull/32 #[derive(Debug, Clone, PartialEq, Eq)] pub struct Rule { /// Packed permission hash and action.