Skip to content

Commit 896a0ef

Browse files
authored
add support for might-exist lookups (#660)
* add support for might-exist lookups. Such lookups are faster---they check only the memtables and filters---but may have false positives. * fix suspiciously old-looking memory leak --------- Signed-off-by: Rob Johnson <rob@robjohnson.io>
1 parent 8939c84 commit 896a0ef

22 files changed

Lines changed: 610 additions & 481 deletions

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,3 +14,7 @@ tags
1414
unit_tests_db
1515
compile_flags.txt
1616
build
17+
\#*\#
18+
compile_commands.json
19+
*~
20+
.cache/

examples/splinterdb_intro_example.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,8 @@ main()
7272

7373
// Retrieve a key-value pair.
7474
splinterdb_lookup_result result;
75-
splinterdb_lookup_result_init(spl_handle, &result, 0, NULL);
75+
splinterdb_lookup_result_init(
76+
spl_handle, &result, SPLINTERDB_LOOKUP_VALUE, 0, NULL);
7677

7778
fruit = "Orange";
7879
key = slice_create((size_t)strlen(fruit), fruit);

examples/splinterdb_wide_values_example.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,8 @@ main()
7575
char outbuf[USER_AVG_VALUE_SIZE];
7676

7777
splinterdb_lookup_result result;
78-
splinterdb_lookup_result_init(spl_handle, &result, sizeof(outbuf), outbuf);
78+
splinterdb_lookup_result_init(
79+
spl_handle, &result, SPLINTERDB_LOOKUP_VALUE, sizeof(outbuf), outbuf);
7980

8081
printf("Retrieve values of different lengths using output buffer of"
8182
" fixed size=%lu bytes:\n",

include/splinterdb/splinterdb.h

Lines changed: 22 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -174,24 +174,28 @@ splinterdb_open(const splinterdb_config *cfg, splinterdb **kvs);
174174
void
175175
splinterdb_close(splinterdb **kvs);
176176

177-
// Insert a key and value.
178-
// Relies on data_config->encode_message
177+
// Insert a key and value. Overwrites any previous value associated with the
178+
// key.
179179
int
180180
splinterdb_insert(splinterdb *kvsb, slice key, slice value);
181181

182182
// Delete a given key and any associated value / messages
183183
int
184184
splinterdb_delete(splinterdb *kvsb, slice key);
185185

186-
// Insert a key and value.
187-
// Relies on data_config->encode_message
186+
// Update the value associated with key.
188187
int
189188
splinterdb_update(splinterdb *kvsb, slice key, slice delta);
190189

191190
// Lookups
192191

192+
typedef uint64 splinterdb_lookup_flags;
193+
194+
#define SPLINTERDB_LOOKUP_VALUE ((splinterdb_lookup_flags)0)
195+
#define SPLINTERDB_LOOKUP_MIGHT_EXIST ((splinterdb_lookup_flags)1 << 0)
196+
193197
// Size of opaque data required to hold a lookup result
194-
#define SPLINTERDB_LOOKUP_BUFSIZE (18 * sizeof(void *))
198+
#define SPLINTERDB_LOOKUP_BUFSIZE (20 * sizeof(void *))
195199

196200
// A lookup result is stored and parsed from here
197201
//
@@ -203,6 +207,12 @@ typedef struct {
203207

204208
// Initialize a lookup result object.
205209
//
210+
// flags may be SPLINTERDB_LOOKUP_VALUE or SPLINTERDB_LOOKUP_MIGHT_EXIST.
211+
//
212+
// When SPLINTERDB_LOOKUP_MIGHT_EXIST is set, splinterdb_lookup_found() reports
213+
// true if the key might exist, and false if the key definitely does not exist.
214+
// splinterdb_lookup_result_value() will return EINVAL for such results.
215+
//
206216
// If buffer is NULL, then the library will allocate and manage memory.
207217
//
208218
// If the caller provides a buffer, that will be used, unless a lookup
@@ -212,15 +222,16 @@ typedef struct {
212222
// After this function returns, the caller must ensure that
213223
// 1. *result is only used in conjunction with the kvs
214224
// Attempting to use one lookup_result with multiple instances of splinterdb
215-
// may cause problems in future versions of splinterdb
225+
// will cause problems
216226
// 2. The lifetime of *result must not exceed the lifetime of kvs
217227
// The result should be deinit'ed before calling splinterdb_close on kvs
218228
//
219-
// While the current version of SplinterDB does not rely on these rules, future
229+
// While the current version of SplinterDB does not rely on Rule 2, future
220230
// versions may store pointers to Splinter's own memory in the lookup_result.
221231
void
222232
splinterdb_lookup_result_init(const splinterdb *kvs, // IN
223233
splinterdb_lookup_result *result, // IN/OUT
234+
splinterdb_lookup_flags flags, // IN
224235
uint64 buffer_len, // IN
225236
char *buffer // IN
226237
);
@@ -231,7 +242,10 @@ splinterdb_lookup_result_init(const splinterdb *kvs, // IN
231242
void
232243
splinterdb_lookup_result_deinit(splinterdb_lookup_result *result); // IN
233244

234-
// Returns true if the result was found
245+
// Returns true if the result was found.
246+
//
247+
// For existence-only lookups, this returns true when the key exists or might
248+
// exist.
235249
_Bool
236250
splinterdb_lookup_found(const splinterdb_lookup_result *result); // IN
237251

@@ -243,11 +257,6 @@ splinterdb_lookup_result_value(const splinterdb_lookup_result *result, // IN
243257
slice *value // OUT
244258
);
245259

246-
int
247-
splinterdb_lookup_result_key(const splinterdb_lookup_result *result, // IN
248-
slice *key // OUT
249-
);
250-
251260
// Lookup the message for a given key
252261
//
253262
// result must have first been initialized using splinterdb_lookup_result_init

src/btree.c

Lines changed: 48 additions & 104 deletions
Original file line numberDiff line numberDiff line change
@@ -2079,7 +2079,6 @@ btree_lookup_node(cache *cc, // IN
20792079
*
20802080
* LOCAL Variables:
20812081
* - state->h: the height of the current node
2082-
* - state->found: whether the target was found
20832082
* - state->child_node: the child node
20842083
*/
20852084
static inline async_status
@@ -2114,11 +2113,12 @@ btree_lookup_node_async(btree_lookup_async_state *state, uint64 depth)
21142113
state->h > state->stop_at_height;
21152114
state->h--)
21162115
{
2117-
int64 child_idx =
2116+
bool32 found;
2117+
int64 child_idx =
21182118
key_is_positive_infinity(state->target)
2119-
? btree_num_entries(state->node.hdr) - 1
2120-
: btree_find_pivot(
2121-
state->cfg, state->node.hdr, state->target, &state->found);
2119+
? btree_num_entries(state->node.hdr) - 1
2120+
: btree_find_pivot(
2121+
state->cfg, state->node.hdr, state->target, &found);
21222122
if (child_idx < 0) {
21232123
child_idx = 0;
21242124
}
@@ -2165,7 +2165,7 @@ btree_lookup_node_async(btree_lookup_async_state *state, uint64 depth)
21652165
*
21662166
* OUT Parameters:
21672167
* - state->node: the node found
2168-
* - state->found: whether the target was found in the leaf
2168+
* - state->found_key: the key found in the leaf, or NULL_KEY otherwise.
21692169
* - state->msg: the message of the target
21702170
*
21712171
* LOCAL Variables:
@@ -2183,9 +2183,11 @@ btree_lookup_with_ref_async(btree_lookup_async_state *state, uint64 depth)
21832183
state->stats = NULL;
21842184
async_await_subroutine(state, btree_lookup_node_async);
21852185

2186-
int64 idx = btree_find_tuple(
2187-
state->cfg, state->node.hdr, state->target, &state->found);
2188-
if (state->found) {
2186+
state->found_key = NULL_KEY;
2187+
bool32 found;
2188+
int64 idx =
2189+
btree_find_tuple(state->cfg, state->node.hdr, state->target, &found);
2190+
if (found) {
21892191
leaf_entry *entry =
21902192
btree_get_leaf_entry(state->cfg, state->node.hdr, idx);
21912193
state->found_key = leaf_entry_key(entry);
@@ -2205,13 +2207,15 @@ btree_lookup_with_ref(cache *cc, // IN
22052207
key target, // IN
22062208
btree_node *node, // OUT
22072209
key *found_key, // OUT
2208-
message *msg, // OUT
2209-
bool32 *found) // OUT
2210+
message *msg) // OUT
22102211
{
2211-
platform_status rc = STATUS_OK;
2212+
platform_status rc = STATUS_OK;
2213+
bool32 found = FALSE;
2214+
2215+
*found_key = NULL_KEY;
22122216
btree_lookup_node(cc, cfg, root_addr, target, 0, type, node, NULL);
2213-
int64 idx = btree_find_tuple(cfg, node->hdr, target, found);
2214-
if (*found) {
2217+
int64 idx = btree_find_tuple(cfg, node->hdr, target, &found);
2218+
if (found) {
22152219
leaf_entry *entry = btree_get_leaf_entry(cfg, node->hdr, idx);
22162220
*found_key = leaf_entry_key(entry);
22172221
*msg = leaf_entry_message(entry);
@@ -2221,57 +2225,17 @@ btree_lookup_with_ref(cache *cc, // IN
22212225
return rc;
22222226
}
22232227

2224-
async_status
2225-
btree_lookup_async(btree_lookup_async_state *state)
2226-
{
2227-
async_begin(state, 0);
2228-
2229-
async_await_subroutine(state, btree_lookup_with_ref_async);
2230-
2231-
platform_status rc = STATUS_OK;
2232-
if (state->found) {
2233-
if (state->keybuf != NULL) {
2234-
rc = key_buffer_copy_key(state->keybuf, state->found_key);
2235-
}
2236-
bool32 success =
2237-
merge_accumulator_copy_message(state->result, state->msg);
2238-
if (!success) {
2239-
rc = STATUS_NO_MEMORY;
2240-
}
2241-
btree_node_unget(state->cc, state->cfg, &state->node);
2242-
}
2243-
async_return(state, rc);
2244-
}
2245-
2246-
22472228
platform_status
2248-
btree_lookup(cache *cc, // IN
2249-
btree_config *cfg, // IN
2250-
uint64 root_addr, // IN
2251-
page_type type, // IN
2252-
key target, // IN
2253-
key_buffer *keybuf, // OUT
2254-
merge_accumulator *result) // OUT
2229+
btree_lookup(cache *cc, // IN
2230+
btree_config *cfg, // IN
2231+
uint64 root_addr, // IN
2232+
page_type type, // IN
2233+
key target, // IN
2234+
lookup_result *result) // OUT
22552235
{
2256-
btree_node node;
2257-
key found_key;
2258-
message data;
2259-
platform_status rc = STATUS_OK;
2260-
bool32 local_found;
2261-
2262-
btree_lookup_with_ref(
2263-
cc, cfg, root_addr, type, target, &node, &found_key, &data, &local_found);
2264-
if (local_found) {
2265-
if (keybuf != NULL) {
2266-
rc = key_buffer_copy_key(keybuf, found_key);
2267-
}
2268-
bool32 success = merge_accumulator_copy_message(result, data);
2269-
if (!success) {
2270-
rc = STATUS_NO_MEMORY;
2271-
}
2272-
btree_node_unget(cc, cfg, &node);
2273-
}
2274-
return rc;
2236+
lookup_result_reset(result);
2237+
return btree_lookup_and_merge(
2238+
cc, cfg, root_addr, type, target, result, NULL);
22752239
}
22762240

22772241
platform_status
@@ -2280,8 +2244,7 @@ btree_lookup_and_merge(cache *cc, // IN
22802244
uint64 root_addr, // IN
22812245
page_type type, // IN
22822246
key target, // IN
2283-
key_buffer *keybuf, // OUT
2284-
merge_accumulator *data, // OUT
2247+
lookup_result *result, // IN/OUT
22852248
bool32 *local_found) // OUT
22862249
{
22872250
btree_node node;
@@ -2291,27 +2254,17 @@ btree_lookup_and_merge(cache *cc, // IN
22912254

22922255
log_trace_key(target, "btree_lookup");
22932256

2294-
btree_lookup_with_ref(cc,
2295-
cfg,
2296-
root_addr,
2297-
type,
2298-
target,
2299-
&node,
2300-
&found_key,
2301-
&local_data,
2302-
local_found);
2303-
if (*local_found) {
2304-
if (keybuf != NULL) {
2305-
rc = key_buffer_copy_key(keybuf, found_key);
2306-
}
2307-
if (merge_accumulator_is_null(data)) {
2308-
bool32 success = merge_accumulator_copy_message(data, local_data);
2309-
if (!success) {
2310-
rc = STATUS_NO_MEMORY;
2311-
}
2312-
} else if (btree_merge_tuples(cfg, target, local_data, data)) {
2313-
rc = STATUS_NO_MEMORY;
2257+
if (local_found != NULL) {
2258+
*local_found = FALSE;
2259+
}
2260+
2261+
btree_lookup_with_ref(
2262+
cc, cfg, root_addr, type, target, &node, &found_key, &local_data);
2263+
if (!key_is_null(found_key)) {
2264+
if (local_found != NULL) {
2265+
*local_found = TRUE;
23142266
}
2267+
rc = lookup_result_update(result, found_key, local_data);
23152268
btree_node_unget(cc, cfg, &node);
23162269
}
23172270
return rc;
@@ -2325,11 +2278,8 @@ btree_lookup_and_merge(cache *cc, // IN
23252278
* - state->type: the type of the root node
23262279
* - state->target: the key to look up
23272280
*
2328-
* IN/OUT Parameters:
2329-
* - state->result: the result of the lookup
2330-
*
23312281
* OUT Parameters:
2332-
* - state->found: whether the target was found in the leaf
2282+
* - state->found_key: the key found in the leaf, or NULL_KEY otherwise.
23332283
*
23342284
* LOCAL Variables:
23352285
* - state->node: the node found
@@ -2347,26 +2297,20 @@ btree_lookup_and_merge_async(btree_lookup_async_state *state)
23472297
async_await_subroutine(state, btree_lookup_with_ref_async);
23482298

23492299
platform_status rc = STATUS_OK;
2350-
if (state->found) {
2351-
if (state->keybuf != NULL) {
2352-
rc = key_buffer_copy_key(state->keybuf, state->found_key);
2353-
}
2354-
if (merge_accumulator_is_null(state->result)) {
2355-
bool32 success =
2356-
merge_accumulator_copy_message(state->result, state->msg);
2357-
if (!success) {
2358-
rc = STATUS_NO_MEMORY;
2359-
}
2360-
} else if (btree_merge_tuples(
2361-
state->cfg, state->target, state->msg, state->result))
2362-
{
2363-
rc = STATUS_NO_MEMORY;
2364-
}
2300+
if (!key_is_null(state->found_key)) {
2301+
rc = lookup_result_update(state->result, state->found_key, state->msg);
23652302
btree_node_unget(state->cc, state->cfg, &state->node);
23662303
}
23672304
async_return(state, rc);
23682305
}
23692306

2307+
async_status
2308+
btree_lookup_async(btree_lookup_async_state *state)
2309+
{
2310+
lookup_result_reset(state->result);
2311+
return btree_lookup_and_merge_async(state);
2312+
}
2313+
23702314
/*
23712315
*-----------------------------------------------------------------------------
23722316
* btree_iterator_init --

0 commit comments

Comments
 (0)