-
Notifications
You must be signed in to change notification settings - Fork 53
Open
Description
We have support for reading the information in the footer:
arrow-nanoarrow/src/nanoarrow/ipc/decoder.c
Lines 1312 to 1345 in bdb7d0c
| ArrowErrorCode ArrowIpcDecoderDecodeFooter(struct ArrowIpcDecoder* decoder, | |
| struct ArrowBufferView data, | |
| struct ArrowError* error) { | |
| struct ArrowIpcDecoderPrivate* private_data = | |
| (struct ArrowIpcDecoderPrivate*)decoder->private_data; | |
| int32_t footer_and_size_and_magic_size = | |
| decoder->header_size_bytes + sizeof(int32_t) + strlen(NANOARROW_IPC_MAGIC); | |
| const uint8_t* footer_data = | |
| data.data.as_uint8 + data.size_bytes - footer_and_size_and_magic_size; | |
| ns(Footer_table_t) footer = ns(Footer_as_root(footer_data)); | |
| NANOARROW_RETURN_NOT_OK( | |
| ArrowIpcDecoderDecodeSchemaHeader(decoder, ns(Footer_schema(footer)), error)); | |
| NANOARROW_RETURN_NOT_OK(ArrowIpcDecoderDecodeSchemaImpl( | |
| ns(Footer_schema(footer)), &private_data->footer.schema, error)); | |
| ns(Block_vec_t) blocks = ns(Footer_recordBatches(footer)); | |
| int64_t n = ns(Block_vec_len(blocks)); | |
| NANOARROW_RETURN_NOT_OK(ArrowBufferResize(&private_data->footer.record_batch_blocks, | |
| sizeof(struct ArrowIpcFileBlock) * n, | |
| /*shrink_to_fit=*/0)); | |
| struct ArrowIpcFileBlock* record_batches = | |
| (struct ArrowIpcFileBlock*)private_data->footer.record_batch_blocks.data; | |
| for (int64_t i = 0; i < n; i++) { | |
| record_batches[i].offset = ns(Block_offset(blocks + i)); | |
| record_batches[i].metadata_length = ns(Block_metaDataLength(blocks + i)); | |
| record_batches[i].body_length = ns(Block_bodyLength(blocks + i)); | |
| } | |
| decoder->footer = &private_data->footer; | |
| return NANOARROW_OK; | |
| } |
...but this is not well integrated into the rest of the read process.
duckdb-nanoarrow implements the ability to read an arrow file as if were a stream (by skipping the ARROW1\0\0 at the start of the file and erroring if it sees any dictionaries), which we should possibly implement as part of our stream reading as well (at least as an option).
eitsupi, ianmcook and joeltg
Metadata
Metadata
Assignees
Labels
No labels