Skip to content

Commit e4caad9

Browse files
committed
Update to libbbf 1.1.0
1 parent c91bc53 commit e4caad9

File tree

6 files changed

+151
-62
lines changed

6 files changed

+151
-62
lines changed

setup.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,8 @@
2424
"libbbf",
2525
[
2626
"src/bindings.cpp",
27-
"src/libbbf.cpp"
27+
"src/libbbf.cpp",
28+
"src/xxhash.c"
2829
],
2930
include_dirs=["src"],
3031
cxx_std=17,
@@ -33,7 +34,7 @@
3334

3435
setup(
3536
name="libbbf",
36-
version="0.2.10",
37+
version="0.2.12",
3738
author="EF1500",
3839
author_email="rosemilovelockofficial@proton.me",
3940
description="Bound Book Format (BBF) tools and bindings",

src/bbf_reader.h

Lines changed: 28 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
#include "xxhash.h"
55

66
#include <string>
7-
#include <string_view> // C++17: Essential for zero-copy parsing
7+
#include <string_view>
88
#include <vector>
99
#include <map>
1010
#include <cstring>
@@ -78,7 +78,6 @@ struct MemoryMappedFile {
7878

7979
class BBFReader {
8080
private:
81-
// Cached pointers to avoid recalculating offsets repeatedly
8281
const char* data_ptr = nullptr;
8382
const BBFSection* sections_ = nullptr;
8483
const BBFMetadata* meta_ = nullptr;
@@ -108,8 +107,9 @@ class BBFReader {
108107
std::memcpy(&footer, data_ptr + mmap.size - sizeof(BBFFooter), sizeof(BBFFooter));
109108
if (std::memcmp(footer.magic, "BBF1", 4) != 0) return;
110109

111-
// Cache Table Pointers
112-
// Note: In production, you should add bounds checks here to ensure offsets are within mmap.size
110+
// Safety: Ensure tables point inside the file
111+
if (footer.assetTableOffset >= mmap.size || footer.pageTableOffset >= mmap.size) return;
112+
113113
sections_ = reinterpret_cast<const BBFSection*>(data_ptr + footer.sectionTableOffset);
114114
meta_ = reinterpret_cast<const BBFMetadata*>(data_ptr + footer.metaTableOffset);
115115
pages_ = reinterpret_cast<const BBFPageEntry*>(data_ptr + footer.pageTableOffset);
@@ -121,12 +121,8 @@ class BBFReader {
121121
isValid = true;
122122
}
123123

124-
// Optimized: Returns string_view (no allocation)
125-
// Helper to allow returning std::string for legacy binding support if needed,
126-
// but internal logic should prefer views.
127124
std::string_view getStringView(uint32_t offset) const {
128125
if (offset >= stringPoolSize_) return {};
129-
// Requires strings in file to be null-terminated.
130126
return std::string_view(stringPool_ + offset);
131127
}
132128

@@ -140,9 +136,8 @@ class BBFReader {
140136
std::vector<PySection> result;
141137
if (!isValid) return result;
142138

143-
result.reserve(footer.sectionCount); // Optimization: Reserve memory
139+
result.reserve(footer.sectionCount);
144140
for (uint32_t i = 0; i < footer.sectionCount; i++) {
145-
// Explicit conversion to std::string here is okay as we are handing off to Python
146141
result.push_back({
147142
std::string(getStringView(sections_[i].sectionTitleOffset)),
148143
sections_[i].sectionStartIndex,
@@ -166,21 +161,14 @@ class BBFReader {
166161
return result;
167162
}
168163

169-
// Zero-copy accessor for PyBind
170-
// Returns {pointer, size}
171164
std::pair<const char*, size_t> getPageRaw(uint32_t pageIndex) const {
172165
if (!isValid || pageIndex >= footer.pageCount) return {nullptr, 0};
173166

174-
// Indirect addressing: Page -> Asset -> Offset/Length
175167
const auto& asset = assets_[pages_[pageIndex].assetIndex];
176-
return { data_ptr + asset.offset, asset.length };
177-
}
168+
// Bounds check on asset
169+
if (asset.offset + asset.length > mmap.size) return {nullptr, 0};
178170

179-
// Legacy support (copies data)
180-
std::string getPageBytes(uint32_t pageIndex) const {
181-
auto raw = getPageRaw(pageIndex);
182-
if (!raw.first) return "";
183-
return std::string(raw.first, raw.second);
171+
return { data_ptr + asset.offset, static_cast<size_t>(asset.length) };
184172
}
185173

186174
std::map<std::string, uint64_t> getPageInfo(uint32_t pageIndex) const {
@@ -191,56 +179,62 @@ class BBFReader {
191179
{"length", asset.length},
192180
{"offset", asset.offset},
193181
{"hash", asset.xxh3Hash},
194-
{"type", asset.type}
182+
{"type", asset.type},
183+
{"decodedLength", asset.decodedLength} // ADDED: v1.1 Spec
195184
};
196185
}
197186

198-
bool verify() const {
199-
if (!isValid) return false;
187+
// Returns -1 for Success, -2 for Directory Error, or >=0 for Asset Index Error
188+
int64_t verify() const {
189+
if (!isValid) return -2;
200190

201191
// 1. Directory Hash Check
202192
size_t metaStart = footer.stringPoolOffset;
203193
size_t metaSize = mmap.size - sizeof(BBFFooter) - metaStart;
204-
if (XXH3_64bits(data_ptr + metaStart, metaSize) != footer.indexHash) return false;
194+
if (XXH3_64bits(data_ptr + metaStart, metaSize) != footer.indexHash) return -2;
205195

206196
// 2. Asset Integrity Check
207197
size_t count = footer.assetCount;
208-
const auto* local_assets = assets_; // Copy pointer for lambda capture
198+
const auto* local_assets = assets_;
209199
const auto* local_data = data_ptr;
200+
size_t max_size = mmap.size;
210201

211-
auto verifyRange = [local_assets, local_data](size_t start, size_t end) -> bool {
202+
// Lambda returns -1 if OK, or the index if Bad
203+
auto verifyRange = [local_assets, local_data, max_size](size_t start, size_t end) -> int64_t {
212204
for (size_t i = start; i < end; ++i) {
213205
const auto& a = local_assets[i];
206+
// Bounds check before hash
207+
if (a.offset + a.length > max_size) return (int64_t)i;
208+
214209
if (XXH3_64bits((const uint8_t*)local_data + a.offset, a.length) != a.xxh3Hash) {
215-
return false;
210+
return (int64_t)i; // Return the corrupted index
216211
}
217212
}
218-
return true;
213+
return -1; // Success
219214
};
220215

221-
// Optimization: Don't spawn threads for small files
222216
size_t numThreads = std::thread::hardware_concurrency();
223217
if (numThreads == 0) numThreads = 1;
224218

225-
// Heuristic: If assets < 128, threading overhead > hashing gain
226219
if (count < 128 || numThreads == 1) {
227220
return verifyRange(0, count);
228221
}
229222

230223
size_t chunkSize = count / numThreads;
231-
std::vector<std::future<bool>> futures;
224+
std::vector<std::future<int64_t>> futures; // Changed from bool to int64_t
232225
futures.reserve(numThreads);
233226

234227
for (size_t i = 0; i < numThreads; ++i) {
235228
size_t start = i * chunkSize;
236229
size_t end = (i == numThreads - 1) ? count : start + chunkSize;
237-
// Launch async
238230
futures.push_back(std::async(std::launch::async, verifyRange, start, end));
239231
}
240232

233+
// Check results
241234
for (auto& f : futures) {
242-
if (!f.get()) return false;
235+
int64_t result = f.get();
236+
if (result != -1) return result; // Bubble up the error index
243237
}
244-
return true;
238+
return -1; // All good
245239
}
246240
};

src/bbfenc.cpp

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
#define NOMINMAX
2+
13
#include "libbbf.h"
24
#include "xxhash.h"
35
#include <iostream>
@@ -133,13 +135,23 @@ class BBFReader
133135
{
134136
if (!mmap.map(path))
135137
return false;
138+
139+
// Basic size check
136140
if (mmap.size < sizeof(BBFHeader) + sizeof(BBFFooter))
137141
return false;
138142

143+
// Read the fixed-size part of the header first
139144
std::memcpy(&header, mmap.data, sizeof(BBFHeader));
145+
140146
if (std::memcmp(header.magic, "BBF1", 4) != 0)
141147
return false;
142148

149+
// FUTURE PROOFING:
150+
// If header.headerLen > sizeof(BBFHeader), we know there is extra data
151+
// in the header we should ignore. We don't need to do anything right now
152+
// because we read assets via absolute offsets, but it's good to know.
153+
154+
// Read Footer
143155
std::memcpy(&footer, (uint8_t *)mmap.data + mmap.size - sizeof(BBFFooter), sizeof(BBFFooter));
144156
if (std::memcmp(footer.magic, "BBF1", 4) != 0)
145157
return false;
@@ -574,9 +586,13 @@ int main(int argc, char *argv[])
574586
for (uint32_t i = start; i < end; ++i)
575587
{
576588
const auto &asset = assets[pages[i].assetIndex];
577-
std::string outPath = (fs::path(outDir) / ("p" + std::to_string(i + 1) + ((asset.type == 1) ? ".avif" : ".png"))).string();
578589

579-
// Optimized: Direct write from mapped memory
590+
// FIX: Use the library function to get the extension
591+
// This automatically handles PNG, JPG, AVIF, JXL, etc.
592+
std::string ext = MediaTypeToStr(asset.type);
593+
594+
std::string outPath = (fs::path(outDir) / ("p" + std::to_string(i + 1) + ext)).string();
595+
580596
std::ofstream ofs(outPath, std::ios::binary);
581597
ofs.write((const char *)reader.mmap.data + asset.offset, asset.length);
582598
}
@@ -686,7 +702,8 @@ int main(int argc, char *argv[])
686702
for (uint32_t i = 0; i < manifest.size(); ++i)
687703
{
688704
std::string ext = fs::path(manifest[i].path).extension().string();
689-
uint8_t type = (ext == ".avif" || ext == ".AVIF") ? 1 : 2;
705+
BBFMediaType mediaType = detectTypeFromExtension(ext);
706+
uint8_t type = static_cast<uint8_t>(mediaType);
690707
builder.addPage(manifest[i].path, type);
691708
fileToPage[manifest[i].filename] = i;
692709
}

src/bindings.cpp

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -26,21 +26,20 @@ PYBIND11_MODULE(libbbf, m) {
2626
py::class_<BBFReader>(m, "BBFReader")
2727
.def(py::init<const std::string &>())
2828
.def_readonly("is_valid", &BBFReader::isValid)
29-
.def_readonly("footer", &BBFReader::footer) // Optional: expose footer struct directly if bound
29+
// We don't need to expose footer struct directly unless you wrote a binding for BBFFooter
3030
.def("get_page_count", [](BBFReader& r) { return r.footer.pageCount; })
3131
.def("get_asset_count", [](BBFReader& r) { return r.footer.assetCount; })
3232

3333
.def("verify", &BBFReader::verify,
34-
py::call_guard<py::gil_scoped_release>(), // IMPORTANT: Release GIL during long hashing
35-
"Verify integrity of index and assets. Multithreaded.")
34+
py::call_guard<py::gil_scoped_release>(),
35+
"Verify integrity. Returns: -1 (Success), -2 (Directory Fail), or >=0 (Index of corrupt asset).")
3636

3737
.def("get_sections", [](BBFReader& r) {
38-
// Optimizing the conversion loop
3938
py::list result;
4039
const auto sections = r.getSections();
4140
for (const auto& s : sections) {
4241
py::dict d;
43-
d["title"] = s.title; // Moves string
42+
d["title"] = s.title;
4443
d["startPage"] = s.startPage;
4544
d["parent"] = s.parent;
4645
result.append(d);
@@ -50,26 +49,25 @@ PYBIND11_MODULE(libbbf, m) {
5049

5150
.def("get_metadata", &BBFReader::getMetadata,
5251
"Returns a list of (Key, Value) tuples.")
52+
53+
.def("get_page_info", &BBFReader::getPageInfo,
54+
"Returns dict with keys: length, offset, hash, type, decodedLength")
5355

5456
.def("get_page_data", [](BBFReader& r, uint32_t idx) {
5557
auto raw = r.getPageRaw(idx);
5658
if (!raw.first) return py::bytes("");
57-
// 1-Copy: Copies from mmap -> Python Bytes Object
5859
return py::bytes(raw.first, raw.second);
5960
}, "Returns the raw bytes of the page asset (1-Copy).")
6061

6162
.def("get_page_view", [](BBFReader& r, uint32_t idx) {
6263
auto raw = r.getPageRaw(idx);
6364
if (!raw.first) return py::memoryview(py::bytes(""));
6465

65-
// 0-Copy: Direct view into mmap
66-
// Warning: This view crashes Python if BBFReader is garbage collected before the view!
67-
// To fix this lifetime issue, we use 'py::keep_alive'.
6866
return py::memoryview::from_memory(
6967
const_cast<char*>(raw.first),
7068
raw.second,
7169
true // read-only
7270
);
73-
}, py::keep_alive<0, 1>(), // Keep BBFReader (1) alive while memoryview (0) exists
71+
}, py::keep_alive<0, 1>(),
7472
"Returns a zero-copy memoryview of the asset. Fastest method.");
7573
}

src/libbbf.cpp

Lines changed: 53 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
#include "libbbf.h"
2-
#define XXH_INLINE_ALL
32
#include "xxhash.h"
43

54
#include <iostream>
65
#include <vector>
6+
#include <algorithm>
7+
#include <string>
8+
#include <cctype>
79

810
BBFBuilder::BBFBuilder(const std::string& outputFilename) : currentOffset(0)
911
{
@@ -23,7 +25,9 @@ BBFBuilder::BBFBuilder(const std::string& outputFilename) : currentOffset(0)
2325
header.magic[1] = 'B';
2426
header.magic[2] = 'F';
2527
header.magic[3] = '1';
26-
header.version = 1;
28+
header.version = 2;
29+
header.flags = 0; // reserved for now as well.
30+
header.headerLen = sizeof(BBFHeader);
2731
header.reserved = 0; // Reserved for future expansions
2832

2933
// Write the header
@@ -93,16 +97,19 @@ bool BBFBuilder::addPage(const std::string& imagePath, uint8_t type, uint32_t fl
9397
// No dupe found. create a new asset.
9498
alignPadding(); // start by allocating necessary padding.
9599

96-
BBFAssetEntry newAsset;
100+
BBFAssetEntry newAsset = {0};
97101
newAsset.offset = currentOffset;
98102
newAsset.length = size;
103+
newAsset.decodedLength = size;
99104
newAsset.xxh3Hash = hash;
100105
newAsset.type = type;
106+
newAsset.flags = 0; // no flags yet.
101107

102-
for ( int i = 0; i < 7; i++ )
103-
{
104-
newAsset.reserved[i] = 0; // Add in the reserved bytes.
105-
}
108+
// set reserved equal to zero
109+
//newAsset.reserved[4] = {0};
110+
111+
// same for padding
112+
//newAsset.padding[7] = {0};
106113

107114
fileStream.write(buffer.data(), size);
108115
currentOffset += size;
@@ -161,8 +168,6 @@ bool BBFBuilder::addMetadata(const std::string& key, const std::string& value)
161168

162169
bool BBFBuilder::finalize()
163170
{
164-
uint64_t indexStart = currentOffset;
165-
166171
// Initialize XXH3 State
167172
XXH3_state_t* const state = XXH3_createState();
168173
if (state == nullptr) return false;
@@ -179,6 +184,7 @@ bool BBFBuilder::finalize()
179184
//write footer
180185
BBFFooter footer;
181186
footer.stringPoolOffset = currentOffset;
187+
footer.extraOffset = 0; // set the extraOffset to 0 since we aren't using it.
182188

183189
//fileStream.write(stringPool.data(), stringPool.size());
184190
//currentOffset += stringPool.size();
@@ -231,4 +237,42 @@ bool BBFBuilder::finalize()
231237
fileStream.write(reinterpret_cast<char*>(&footer), sizeof(BBFFooter));
232238
fileStream.close();
233239
return true;
240+
}
241+
242+
BBFMediaType detectTypeFromExtension(const std::string &extension)
243+
{
244+
std::string ext = extension;
245+
std::transform(ext.begin(), ext.end(), ext.begin(), ::tolower);
246+
247+
if (ext == ".png") return BBFMediaType::PNG;
248+
if (ext == ".jpg" || ext == ".jpeg") return BBFMediaType::JPG;
249+
if (ext == ".avif") return BBFMediaType::AVIF;
250+
if (ext == ".webp") return BBFMediaType::WEBP;
251+
if (ext == ".jxl") return BBFMediaType::JXL;
252+
if (ext == ".bmp") return BBFMediaType::BMP;
253+
if (ext == ".gif") return BBFMediaType::GIF;
254+
if (ext == ".tiff") return BBFMediaType::TIFF;
255+
256+
return BBFMediaType::UNKNOWN;
257+
}
258+
259+
std::string MediaTypeToStr(uint8_t type)
260+
{
261+
BBFMediaType mediaType = static_cast<BBFMediaType>(type);
262+
263+
switch (mediaType)
264+
{
265+
case BBFMediaType::AVIF: return ".avif";
266+
case BBFMediaType::PNG: return ".png";
267+
case BBFMediaType::JPG: return ".jpg";
268+
case BBFMediaType::WEBP: return ".webp";
269+
case BBFMediaType::JXL: return ".jxl";
270+
case BBFMediaType::BMP: return ".bmp";
271+
case BBFMediaType::GIF: return ".gif";
272+
case BBFMediaType::TIFF: return ".tiff";
273+
274+
case BBFMediaType::UNKNOWN:
275+
default:
276+
return ".png";
277+
}
234278
}

0 commit comments

Comments
 (0)