From fdf6c84ba19ab67a9c71f93c3919220ce6a606cf Mon Sep 17 00:00:00 2001 From: Kures <14836932+Kures@users.noreply.github.com> Date: Fri, 8 May 2026 20:52:36 +0300 Subject: [PATCH 1/2] fix(http): unblock request reader after Zig 0.16 migration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Every HTTP request was timing out because readSliceShort blocks until the destination buffer is fully filled or the stream reaches EOF. The read chunk is 4096 bytes, but a typical request (e.g. GET /health) is ~84 bytes — so the reader sat waiting for 4 KB of input that never came. After the curl client gave up and FIN'd the connection, the reader would finally return; by then the response was useless. Switch to readVec, which performs a single recv per call and returns whatever the kernel had — typically the entire request in one shot. Effect: - Before: every endpoint hangs until client timeout, then ~5s after the request the response is sent into a closed socket. - After: /health and /metrics respond in <2 ms. zig build test still 340/340 because the test harness invokes api.handleRequest directly and never exercises this read path. The right structural follow-up is a black-box integration test that spawns the binary and asserts a real HTTP response, but that's out of scope for this fix. --- src/main.zig | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/main.zig b/src/main.zig index 8ea8451..2f435df 100644 --- a/src/main.zig +++ b/src/main.zig @@ -469,7 +469,11 @@ fn readHttpRequest(allocator: std.mem.Allocator, stream: *std.Io.net.Stream, max var chunk: [request_read_chunk]u8 = undefined; while (true) { - const n = try reader.interface.readSliceShort(&chunk); + var data: [1][]u8 = .{&chunk}; + const n = reader.interface.readVec(&data) catch |err| switch (err) { + error.EndOfStream => return null, + else => |e| return e, + }; if (n == 0) return null; try buffer.appendSlice(allocator, chunk[0..n]); From e7fc1206732438f7b431142ca6b2660f5f102fbe Mon Sep 17 00:00:00 2001 From: Kures <14836932+Kures@users.noreply.github.com> Date: Fri, 8 May 2026 21:31:45 +0300 Subject: [PATCH 2/2] test(http): add integration test that catches the readSliceShort regression MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The prior commit fixed the symptom (readVec instead of readSliceShort in readHttpRequest) but left the structural gap that allowed the regression to slip through: nothing in the unit-test suite exercises the live accept-read-write loop. All 340 tests invoke api.handleRequest directly, so a stall in the read step is invisible to them. This test binds 127.0.0.1:0, connects from a libc client socket on the same thread (kernel's listen backlog handles the race), sends a short HTTP request without ever closing the write side, and calls readHttpRequest on the accepted Stream. The assertion is wall-clock under 200 ms — comfortable headroom over loopback variance, far below the ~5 s the regression would take to surface (the read had to wait for the buffer to fill or for an EOF from the client; neither happens here). A SO_RCVTIMEO of 2 s on the server-side socket converts a regressed "hangs forever" into a clean test failure rather than a hung CI run. Verified two ways: - with the fix in place: 341/341 pass - with readVec reverted to readSliceShort: this test fails (340 pass, 1 crash) within the SO_RCVTIMEO window Skipped on Windows because the test reaches into libc socket primitives directly; the platform-portable equivalent would need a Winsock branch we do not need for the regression we are guarding. --- src/main.zig | 83 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) diff --git a/src/main.zig b/src/main.zig index 2f435df..ed10c7d 100644 --- a/src/main.zig +++ b/src/main.zig @@ -637,6 +637,89 @@ test "worker_protocol hasExplicitPath identifies explicit path URLs" { try std.testing.expect(worker_protocol.hasExplicitPath("http://localhost:3000/webhook")); } +// Integration test: covers the accept-read-write loop in main.zig that previously +// regressed when readSliceShort replaced one-shot semantics. The 340 unit tests +// invoke api.handleRequest directly and would not catch a stall in the live HTTP +// path. +// +// We bind a real loopback socket on a kernel-assigned port, connect a client +// from the same thread, send a small HTTP request without ever closing the +// write side, and call readHttpRequest on the accepted Stream. Pre-fix, this +// would block until the client FIN'd or the kernel buffer filled — neither +// happens here, so the test would hang. To make a regression fail loudly +// instead of hanging, we set SO_RCVTIMEO on the server-side socket so the +// reader returns an error within a small bound. +test "readHttpRequest does not block when request is smaller than chunk" { + if (builtin.os.tag == .windows) return error.SkipZigTest; + + const c_sys = std.posix.system; + + // 1. Listen on 127.0.0.1:0 + const server_addr: std.Io.net.IpAddress = .{ .ip4 = std.Io.net.Ip4Address.loopback(0) }; + var server = try server_addr.listen(std_compat.io(), .{ .reuse_address = true }); + defer server.deinit(std_compat.io()); + + const port = server.socket.address.ip4.port; + + // 2. Plain libc client socket — single-threaded: kernel completes 3-way + // handshake on connect against the listening socket, so accept on the + // same thread returns immediately afterwards. + const sock_rc = c_sys.socket(std.posix.AF.INET, std.posix.SOCK.STREAM, std.posix.IPPROTO.TCP); + try std.testing.expect(sock_rc >= 0); + const client_fd: std.posix.fd_t = @intCast(sock_rc); + defer _ = c_sys.close(client_fd); + + var sin: std.posix.sockaddr.in = .{ + .family = std.posix.AF.INET, + .port = std.mem.nativeToBig(u16, port), + .addr = std.mem.nativeToBig(u32, 0x7F000001), + }; + const cr = c_sys.connect(client_fd, @ptrCast(&sin), @sizeOf(@TypeOf(sin))); + try std.testing.expectEqual(@as(c_int, 0), cr); + + // 3. accept on the server side. On the bug version, the read below would + // block; the connection itself accepts fine. + var conn = try server.accept(std_compat.io()); + defer conn.close(std_compat.io()); + + // 4. Set a 2 second receive timeout on the server-side socket. With the + // fix, the read returns in <1 ms. With the regression, it would block + // indefinitely; the timeout turns that into a clean failure rather + // than a hung test. + const tv: std.posix.timeval = .{ .sec = 2, .usec = 0 }; + _ = c_sys.setsockopt( + conn.socket.handle, + std.posix.SOL.SOCKET, + std.posix.SO.RCVTIMEO, + @ptrCast(&tv), + @sizeOf(@TypeOf(tv)), + ); + + // 5. Send the request from the client side. Crucially we do NOT shut down + // the write side — there is no FIN, so a read that waits for EOF would + // block forever (modulo the timeout above). + const req = "GET /health HTTP/1.1\r\nHost: localhost\r\n\r\n"; + const sent = c_sys.send(client_fd, req.ptr, req.len, std.posix.MSG.NOSIGNAL); + try std.testing.expectEqual(@as(isize, @intCast(req.len)), sent); + + // 6. Time the read. + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + + const t0 = ids.nowMs(); + const parsed = try readHttpRequest(arena.allocator(), &conn, max_request_size); + const elapsed_ms = ids.nowMs() - t0; + + try std.testing.expect(parsed != null); + try std.testing.expectEqualStrings("GET", parsed.?.method); + try std.testing.expectEqualStrings("/health", parsed.?.target); + + // The fix delivers single-syscall recv semantics. 200 ms is comfortably + // above any plausible loopback variance and far below the 2 s timeout + // and the ~5 s the regression would take to surface. + try std.testing.expect(elapsed_ms < 200); +} + comptime { _ = @import("ids.zig"); _ = @import("types.zig");