Skip to content

Commit e1c622c

Browse files
committed
[optimize] improve Str.hashCode()
Make Str.hashCode() discriminate better on 1st character of the string That improvement is actually noticeable in the timing of HashMap.get() in Configuration.getProperty() And improve Str.isEmpty()
1 parent 5caf6b4 commit e1c622c

2 files changed

Lines changed: 22 additions & 9 deletions

File tree

exist-core/src/main/java/org/exist/util/Str.java

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,11 @@
55
import com.github.benmanes.caffeine.cache.stats.CacheStats;
66

77
/**
8-
* "Fast" string which uses a combination of cacheing and fingerprinting
8+
* "Fast" string which uses "fingerprinting"
99
* to make both equality comparison, and string ordering efficient for
1010
* shorter/simpler strings.
1111
* <p>
12-
* The first tactic is to cache {@code String} to {@code Str} mappings for
13-
* most recently used {@code String}s of {@code Str}s.
14-
* This allows equality comparisons to often succeed based on identity.
15-
* <p>
16-
* The second tactic is to create a fingerprint (a {@code long}, i.e. 64 bits) for a string.
12+
* The tactic is to create a fingerprint (a {@code long}, i.e. 64 bits) for a string.
1713
* The fingerprint holds
1814
* - a prefix (the first 8 bits of the first 6 characters)
1915
* - a length (0...254, or 255 representing length >= 255).
@@ -80,7 +76,7 @@ private static Str from(final String s) {
8076
* @return true iff the string is empty
8177
*/
8278
public boolean isEmpty() {
83-
return value.isEmpty();
79+
return ((fingerprint & LENGTH_MASK) == 0L);
8480
}
8581

8682
public CharSequence toCharSequence() {
@@ -137,18 +133,21 @@ private boolean lexicographicPrefix() {
137133
* @return the length of the string which this @{Str represents}
138134
*/
139135
public int length() {
140-
long lsb = (fingerprint >> LEN_SHIFT) & 0xff;
136+
long lsb = (fingerprint >>> LEN_SHIFT) & 0xff;
141137
if (lsb < 0xff) {
142138
return (int) lsb;
143139
}
144140
return value.length();
145141
}
146142

143+
// Configuration for masking
147144
private final static int LEN_SHIFT = 48;
148145
private final static int MAX_FINGERPRINT_ENCODED_LEN = 6;
149146

150147
private final static long PREFIX_MASK = 0xffffffffffffL;
151148

149+
private final static long LENGTH_MASK = 0xffL << LEN_SHIFT;
150+
152151
/**
153152
* Create a fingerprint for a string, to make comparison more efficient
154153
* <p>
@@ -210,7 +209,18 @@ public boolean equals(Object o) {
210209
*/
211210
@Override
212211
public int hashCode() {
213-
return (int) (fingerprint ^ (fingerprint >>> 32));
212+
// fingerprint bytes
213+
// 7 or of top 8 bits of each char
214+
// 6 length up to 255
215+
// 5 char 0 (bottom 8 bits)
216+
// 4 char 1
217+
// 3 char 2
218+
// 2 char 3
219+
// 1 char 4
220+
// 0 char 5
221+
// >>> 40 gets char 0 into the least bit of the hash, length above it
222+
// >>> 20 gets char 1 into the middle of the hash
223+
return (int) (fingerprint ^ (fingerprint >>> 40) ^ (fingerprint >>> 20));
214224
}
215225

216226

exist-core/src/test/java/org/exist/util/StrTest.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@ public void testNullOrEmptyStr() {
1919

2020
assertThat(Str.of("")).isNotEqualTo(Str.of("1"));
2121
assertThat(Str.of("1")).isNotEqualTo(Str.of(""));
22+
23+
assertThat(Str.of("").isEmpty()).isTrue();
24+
assertThat(Str.of("1").isEmpty()).isFalse();
2225
}
2326

2427
@Test

0 commit comments

Comments
 (0)