-
Notifications
You must be signed in to change notification settings - Fork 37
Expand file tree
/
Copy pathmigration_guide.py
More file actions
293 lines (228 loc) · 9.46 KB
/
migration_guide.py
File metadata and controls
293 lines (228 loc) · 9.46 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
#!/usr/bin/env python3
"""
Migration guide for switching from dict/SortedDict to BPlusTree.
This example shows how to migrate existing code that uses standard
dictionaries or SortedDict to use BPlusTree with minimal changes
while gaining performance benefits.
"""
import sys
import os
# Add parent directory to path for imports
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from bplustree import BPlusTreeMap
def demo_dict_migration():
"""Show how to migrate from regular dict to BPlusTree."""
print("=== Migrating from dict to BPlusTree ===\n")
print("BEFORE (using dict):")
print("```python")
print("# Original dict-based code")
print("data = {}")
print("data[1] = 'apple'")
print("data[3] = 'cherry'")
print("data[2] = 'banana'")
print("print(f'Length: {len(data)}')")
print("print(f'Value: {data[2]}')")
print("print(f'Keys: {list(data.keys())}')")
print("```")
# Original dict code
data = {}
data[1] = "apple"
data[3] = "cherry"
data[2] = "banana"
print(
f"Dict output - Length: {len(data)}, Value: {data[2]}, Keys: {list(data.keys())}"
)
print("\nAFTER (using BPlusTree):")
print("```python")
print("# Migrated to BPlusTree - MINIMAL CHANGES!")
print("data = BPlusTreeMap() # Only change: constructor")
print("data[1] = 'apple' # Same syntax")
print("data[3] = 'cherry' # Same syntax")
print("data[2] = 'banana' # Same syntax")
print("print(f'Length: {len(data)}')")
print("print(f'Value: {data[2]}')")
print("print(f'Keys: {list(data.keys())}')")
print("```")
# BPlusTree equivalent
data = BPlusTreeMap()
data[1] = "apple"
data[3] = "cherry"
data[2] = "banana"
print(
f"BPlusTree output - Length: {len(data)}, Value: {data[2]}, Keys: {list(data.keys())}"
)
print("✓ Keys are now automatically sorted!")
def demo_sorteddict_migration():
"""Show migration from SortedDict to BPlusTree."""
print("\n=== Migrating from SortedDict to BPlusTree ===\n")
try:
from sortedcontainers import SortedDict
print("BEFORE (using SortedDict):")
print("```python")
print("from sortedcontainers import SortedDict")
print("data = SortedDict()")
print("# ... same operations ...")
print("```")
# SortedDict example
sorted_data = SortedDict()
sorted_data.update({5: "five", 1: "one", 3: "three"})
print(f"SortedDict: {list(sorted_data.items())}")
except ImportError:
print("SortedDict not available, showing conceptual migration:")
print("\nAFTER (using BPlusTree):")
print("```python")
print("from bplustree import BPlusTreeMap")
print("data = BPlusTreeMap(capacity=64) # Optional: tune for performance")
print("# ... same operations ...")
print("```")
# BPlusTree equivalent
bplus_data = BPlusTreeMap(capacity=64)
bplus_data.update({5: "five", 1: "one", 3: "three"})
print(f"BPlusTree: {list(bplus_data.items())}")
print("✓ Same sorted behavior, potentially better performance!")
def demo_api_compatibility():
"""Demonstrate full API compatibility."""
print("\n=== Complete API Compatibility ===\n")
print("All standard dict methods work with BPlusTree:")
tree = BPlusTreeMap(capacity=8)
print("\n1. Basic operations:")
print(" tree[key] = value, tree[key], del tree[key], key in tree")
tree[1] = "one"
tree[2] = "two"
print(f" tree[1] = {tree[1]}")
print(f" 1 in tree: {1 in tree}")
del tree[1]
print(f" After del tree[1]: {1 in tree}")
print("\n2. Dictionary methods:")
print(" get(), pop(), popitem(), setdefault(), update(), copy(), clear()")
tree.update({3: "three", 4: "four", 5: "five"})
print(f" After update: {len(tree)} items")
value = tree.get(3, "default")
print(f" get(3): {value}")
popped = tree.pop(4)
print(f" pop(4): {popped}")
key, value = tree.popitem()
print(f" popitem(): ({key}, {value})")
result = tree.setdefault(10, "ten")
print(f" setdefault(10, 'ten'): {result}")
copied = tree.copy()
print(f" copy(): {len(copied)} items")
tree.clear()
print(f" After clear(): {len(tree)} items")
print(f" Copy still has: {len(copied)} items")
print("\n3. Iteration methods:")
print(" keys(), values(), items()")
tree.update({1: "one", 2: "two", 3: "three"})
print(f" keys(): {list(tree.keys())}")
print(f" values(): {list(tree.values())}")
print(f" items(): {list(tree.items())}")
def demo_performance_benefits():
"""Show where you get performance benefits after migration."""
print("\n=== Performance Benefits After Migration ===\n")
tree = BPlusTreeMap(capacity=32)
# Add sample data
for i in range(1000):
tree[i] = f"item_{i}"
print("BONUS: New capabilities not available with dict:")
print("\n1. Range queries (major advantage):")
print(" tree.range(start, end) - not possible with regular dict!")
range_items = list(tree.range(100, 110))
print(f" tree.range(100, 110): {len(range_items)} items")
for key, value in range_items[:3]:
print(f" {key}: {value}")
print(" ...")
print("\n2. Ordered iteration (automatic with BPlusTree):")
print(" No need to call sorted() on dict.items()!")
print("\n3. Performance advantages:")
print(" ✓ 2.5x faster for partial range scans")
print(" ✓ 1.4x faster for large dataset iteration")
print(" ✓ Excellent scaling with dataset size")
print(" ✓ Memory-efficient for large datasets")
def demo_gotchas_and_tips():
"""Show potential gotchas and migration tips."""
print("\n=== Migration Tips & Potential Gotchas ===\n")
print("1. CAPACITY TUNING:")
print(" Default capacity (128) is good for most use cases")
print(" For very large datasets, consider capacity=64 or higher")
print(" For testing/small data, capacity=4-16 is fine")
tree_small = BPlusTreeMap(capacity=4)
tree_large = BPlusTreeMap(capacity=128)
print(f" Small capacity tree: {tree_small.capacity}")
print(f" Large capacity tree: {tree_large.capacity}")
print("\n2. KEY ORDERING:")
print(" Keys must be comparable (support <, >, ==)")
print(" Mixed types that can't be compared will raise TypeError")
tree = BPlusTreeMap()
tree[1] = "number"
tree["hello"] = "string"
# tree[None] = "none" # This would fail: None < 1 not supported
print(" ✓ Use consistent key types for best results")
print("\n3. WHEN NOT TO MIGRATE:")
print(" - Very small datasets (< 100 items)")
print(" - Mostly random single-key lookups")
print(" - Memory is extremely constrained")
print(" - Keys are not orderable")
print("\n4. WHEN TO DEFINITELY MIGRATE:")
print(" ✓ Need range queries")
print(" ✓ Frequently iterate in order")
print(" ✓ Large datasets (1000+ items)")
print(" ✓ Database-like access patterns")
print(" ✓ Pagination or 'top N' queries")
def demo_real_world_migration():
"""Show a realistic migration example."""
print("\n=== Real-World Migration Example ===\n")
print("Scenario: User session management system")
print("\nBEFORE (dict-based):")
print("```python")
print("# Original implementation")
print("user_sessions = {}")
print("user_sessions[timestamp] = session_data")
print("# To get recent sessions, need to sort keys")
print("recent = sorted(user_sessions.items())[-10:]")
print("```")
print("\nAFTER (BPlusTree-based):")
print("```python")
print("# Migrated implementation")
print("user_sessions = BPlusTreeMap(capacity=64)")
print("user_sessions[timestamp] = session_data")
print("# Get recent sessions efficiently")
print("cutoff = time.time() - 3600 # Last hour")
print("recent = list(user_sessions.range(cutoff, None))")
print("```")
# Demonstrate the improvement
import time
user_sessions = BPlusTreeMap(capacity=64)
current_time = time.time()
# Add session data
for i in range(100):
timestamp = current_time - (100 - i) * 60 # Sessions over last 100 minutes
user_sessions[timestamp] = {
"user_id": f"user_{i % 20}",
"action": f"action_{i}",
"ip": f"192.168.1.{i % 255}",
}
# Get sessions from last 30 minutes
cutoff = current_time - 30 * 60
recent_sessions = list(user_sessions.range(cutoff, None))
print(f"\nResult: Found {len(recent_sessions)} recent sessions efficiently!")
print("This would require sorting the entire dict with the original approach.")
def main():
"""Run all migration demonstrations."""
print("🔄 BPlusTree Migration Guide 🔄\n")
print("Learn how to migrate your existing code to BPlusTree!\n")
demo_dict_migration()
demo_sorteddict_migration()
demo_api_compatibility()
demo_performance_benefits()
demo_gotchas_and_tips()
demo_real_world_migration()
print("\n=== Migration Checklist ===")
print("□ Replace dict() or {} with BPlusTreeMap()")
print("□ Add capacity parameter for performance tuning")
print("□ Ensure keys are consistently orderable")
print("□ Test with your actual dataset size")
print("□ Leverage new range query capabilities")
print("□ Measure performance improvements")
print("\n✅ Migration complete! Enjoy your performance boost!")
if __name__ == "__main__":
main()