Skip to content

Commit 6062f91

Browse files
authored
Merge pull request #32 from LaBackDoor:feature/memory_map_flows
Feature/memory_map_flows
2 parents 00d957e + 9f454bf commit 6062f91

20 files changed

Lines changed: 1654 additions & 84 deletions

File tree

Cargo.lock

Lines changed: 71 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ shared-version = true
2020
tag-name = "v{{version}}"
2121

2222
[workspace.package]
23-
version = "0.6.0"
23+
version = "0.6.1"
2424
edition = "2024"
2525
license = "GPL-3.0-only"
2626
authors = ["Stackforge Contributors"]
@@ -31,6 +31,8 @@ bytes = "1.11.0"
3131
smallvec = "1.15.1"
3232
thiserror = "2.0.17"
3333
dashmap = "6.1"
34+
memmap2 = "0.9"
35+
tempfile = "3"
3436
stackforge-core = { path = "crates/stackforge-core" }
3537
stackforge-automata = { path = "crates/stackforge-automata" }
3638

README.md

Lines changed: 62 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,10 @@
1111

1212
- **Scapy-style API** — Stack layers with `Ether() / IP() / TCP()`, set fields with keyword arguments
1313
- **High Performance** — Core logic in Rust, zero-copy parsing, copy-on-write mutation
14-
- **Broad Protocol Support** — Ethernet, ARP, IPv4/IPv6, TCP, UDP, ICMP/ICMPv6 (with echo correlation), DNS, HTTP/1.x, HTTP/2, QUIC, L2TP, 802.11 (Wi-Fi), 802.15.4 (Zigbee), and custom protocols
15-
- **Stateful Flow Extraction** — Extract bidirectional conversations from PCAP files with TCP state tracking, stream reassembly, UDP timeout handling, and optional max packet/flow length tracking
16-
- **PCAP I/O** — Read and write pcap files with `rdpcap()` / `wrpcap()`
14+
- **Broad Protocol Support** — Ethernet, ARP, IPv4/IPv6, TCP, UDP, ICMP/ICMPv6 (with echo correlation), DNS, HTTP/1.x, HTTP/2, QUIC, L2TP, MQTT, MQTT-SN, Modbus, Z-Wave, FTP, TFTP, SMTP, POP3, IMAP, 802.11 (Wi-Fi), 802.15.4 (Zigbee), and custom protocols
15+
- **Stateful Flow Extraction** — Extract bidirectional conversations from PCAP/PcapNG files with TCP state tracking, stream reassembly, UDP timeout handling, and optional max packet/flow length tracking
16+
- **Memory-Budgeted Streaming** — Process gigabyte-sized captures without loading everything into RAM; set a memory budget and reassembly buffers automatically spill to memory-mapped temp files
17+
- **PCAP & PcapNG I/O** — Read and write both classic PCAP and PcapNG files with auto-detection via `rdpcap()` / `wrpcap()` / `wrpcapng()`
1718
- **Python Bindings** — Seamless integration via PyO3/maturin
1819
- **Custom Protocols** — Define runtime protocols with `CustomLayer` and typed fields
1920

@@ -77,10 +78,10 @@ print(pkt.summary()) # "Ethernet / IPv4 / TCP"
7778
print(pkt.show()) # detailed layer view
7879
```
7980

80-
### Read and write PCAP files
81+
### Read and write PCAP / PcapNG files
8182

8283
```python
83-
from stackforge import rdpcap, wrpcap, PcapReader, Ether, IP, TCP
84+
from stackforge import rdpcap, wrpcap, wrpcapng, PcapReader, Ether, IP, TCP
8485

8586
# Write packets to a pcap file
8687
packets = [
@@ -89,13 +90,20 @@ packets = [
8990
]
9091
wrpcap("capture.pcap", packets)
9192

92-
# Read all packets at once
93-
packets = rdpcap("capture.pcap")
93+
# Write PcapNG format explicitly
94+
wrpcapng("capture.pcapng", packets)
95+
96+
# wrpcap auto-detects format from extension
97+
wrpcap("capture.pcapng", packets) # writes PcapNG
98+
99+
# Read any format (auto-detected)
100+
packets = rdpcap("capture.pcap") # classic PCAP
101+
packets = rdpcap("capture.pcapng") # PcapNG — same API
94102
for pkt in packets:
95103
print(pkt.summary())
96104

97-
# Stream large pcap files
98-
for pkt in PcapReader("large_capture.pcap"):
105+
# Stream large captures (works with both formats)
106+
for pkt in PcapReader("large_capture.pcapng"):
99107
print(pkt.summary())
100108
```
101109

@@ -257,6 +265,36 @@ pkt.parse()
257265
print(pkt.has_layer(LayerKind.L2tp))
258266
```
259267

268+
### IoT Protocols
269+
270+
```python
271+
from stackforge import MQTT, MQTTSN, Modbus, ZWave
272+
273+
# MQTT (auto-detected on TCP port 1883)
274+
pkt = Ether() / IP() / TCP(dport=1883) / MQTT(msg_type=1) # CONNECT
275+
276+
# MQTT-SN (auto-detected on UDP port 1883)
277+
pkt = Ether() / IP() / UDP(dport=1883) / MQTTSN(msg_type=0x04) # PUBLISH
278+
279+
# Modbus TCP (auto-detected on TCP port 502)
280+
pkt = Ether() / IP() / TCP(dport=502) / Modbus(func_code=3, data=b"\x00\x01\x00\x0a")
281+
282+
# Z-Wave (wireless, not auto-detected over TCP/UDP)
283+
pkt = ZWave(home_id=0x12345678, src=1, dst=2, cmd_class=0x25, cmd=0x01)
284+
```
285+
286+
### Email & File Transfer Protocols
287+
288+
```python
289+
from stackforge import FTP, TFTP, SMTP, POP3, IMAP
290+
291+
# FTP (TCP port 21), SMTP (TCP ports 25/587/465), POP3 (TCP port 110), IMAP (TCP port 143)
292+
# All auto-detected during packet parsing
293+
294+
# TFTP (UDP port 69)
295+
pkt = Ether() / IP() / UDP(dport=69) / TFTP(opcode=1, filename="test.txt", mode="octet")
296+
```
297+
260298
### Stateful Flow Extraction
261299

262300
Extract bidirectional conversations from PCAP captures with full TCP state machine tracking, stream reassembly, and UDP timeout-based flow grouping.
@@ -292,7 +330,7 @@ packets = rdpcap("capture.pcap")
292330
conversations = extract_flows_from_packets(packets)
293331
```
294332

295-
Customize timeouts and buffer limits with `FlowConfig`:
333+
Customize timeouts, buffer limits, and memory budget with `FlowConfig`:
296334

297335
```python
298336
config = FlowConfig(
@@ -303,6 +341,20 @@ config = FlowConfig(
303341
conversations = extract_flows("capture.pcap", config=config)
304342
```
305343

344+
#### Memory-Budgeted Flow Extraction
345+
346+
For large captures, set a memory budget so reassembly buffers automatically spill to disk when RAM is tight:
347+
348+
```python
349+
config = FlowConfig(
350+
memory_budget=256 * 1024 * 1024, # 256 MB RAM budget
351+
spill_dir="/tmp/stackforge-spill", # optional custom spill directory
352+
)
353+
conversations = extract_flows("large_capture.pcapng", config=config)
354+
```
355+
356+
Packets stream from disk one at a time (never loaded all at once). When TCP reassembly buffers exceed the budget, the largest buffers are transparently spilled to memory-mapped temp files and read back on demand. Temp files are automatically cleaned up via RAII.
357+
306358
Optional: Track maximum packet sizes during flow extraction:
307359

308360
```python

crates/stackforge-core/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ default-net = "0.22.0"
1515
rand = { version = "0.9.2", optional = true }
1616
pcap-file = "2"
1717
dashmap = { workspace = true }
18+
memmap2 = { workspace = true }
19+
tempfile = { workspace = true }
1820

1921
# TLS crypto dependencies
2022
hmac = { version = "0.12", optional = true }

crates/stackforge-core/src/flow/config.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use std::path::PathBuf;
12
use std::time::Duration;
23

34
/// Configuration for the flow extraction engine.
@@ -24,6 +25,11 @@ pub struct FlowConfig {
2425
pub track_max_packet_len: bool,
2526
/// Track maximum flow length per direction (default: false).
2627
pub track_max_flow_len: bool,
28+
/// Total RAM budget for flow extraction (None = unlimited).
29+
/// When set, reassembly buffers will be spilled to disk when exceeded.
30+
pub memory_budget: Option<usize>,
31+
/// Directory for spill files (None = system temp dir).
32+
pub spill_dir: Option<PathBuf>,
2733
}
2834

2935
impl Default for FlowConfig {
@@ -38,6 +44,8 @@ impl Default for FlowConfig {
3844
eviction_interval: Duration::from_secs(30),
3945
track_max_packet_len: false,
4046
track_max_flow_len: false,
47+
memory_budget: None,
48+
spill_dir: None,
4149
}
4250
}
4351
}

crates/stackforge-core/src/flow/error.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@ pub enum FlowError {
2020
#[error("too many discontinuous fragments ({count}, limit {limit})")]
2121
TooManyFragments { count: usize, limit: usize },
2222

23+
#[error("disk spill I/O error: {0}")]
24+
SpillError(String),
25+
2326
#[error(transparent)]
2427
PacketError(#[from] PacketError),
2528
}

0 commit comments

Comments
 (0)