From ebb1940c8ebdd26c64ab65f9f0b5d82c187d2265 Mon Sep 17 00:00:00 2001 From: Julik Tarkhanov Date: Tue, 2 Dec 2025 10:53:43 +0000 Subject: [PATCH] Add a state machine to track the state of the ZIP We have a number of things which may only occur on state transitions or in certain states - where the states are defined by the section of the ZIP file we have just output. It is better - for rollbacks, but also for general sanity - to control this via a state machine instead of flags. --- lib/zip_kit/streamer.rb | 94 ++++-- lib/zip_kit/streamer/state_machine.rb | 115 +++++++ spec/zip_kit/streamer/state_machine_spec.rb | 325 ++++++++++++++++++++ spec/zip_kit/streamer_spec.rb | 165 ++++++++++ 4 files changed, 675 insertions(+), 24 deletions(-) create mode 100644 lib/zip_kit/streamer/state_machine.rb create mode 100644 spec/zip_kit/streamer/state_machine_spec.rb diff --git a/lib/zip_kit/streamer.rb b/lib/zip_kit/streamer.rb index 2b32100..45c35bf 100644 --- a/lib/zip_kit/streamer.rb +++ b/lib/zip_kit/streamer.rb @@ -92,6 +92,7 @@ class ZipKit::Streamer autoload :Entry, File.dirname(__FILE__) + "/streamer/entry.rb" autoload :Filler, File.dirname(__FILE__) + "/streamer/filler.rb" autoload :Heuristic, File.dirname(__FILE__) + "/streamer/heuristic.rb" + autoload :StateMachine, File.dirname(__FILE__) + "/streamer/state_machine.rb" include ZipKit::WriteShovel @@ -100,6 +101,7 @@ class ZipKit::Streamer EntryBodySizeMismatch = Class.new(StandardError) InvalidOutput = Class.new(ArgumentError) + InvalidState = Class.new(StandardError) Overflow = Class.new(StandardError) UnknownMode = Class.new(StandardError) OffsetOutOfSync = Class.new(StandardError) @@ -127,6 +129,9 @@ def self.open(stream, **kwargs_for_new) # @param auto_rename_duplicate_filenames[Boolean] whether duplicate filenames, when encountered, # should be suffixed with (1), (2) etc. Default value is `false` - if # dupliate names are used an exception will be raised + # @return [ZipKit::Streamer::StateMachine] the state machine tracking ZIP output state + attr_reader :state_machine + def initialize(writable, writer: create_writer, auto_rename_duplicate_filenames: false) raise InvalidOutput, "The writable must respond to #<< or #write" unless writable.respond_to?(:<<) || writable.respond_to?(:write) @@ -135,6 +140,7 @@ def initialize(writable, writer: create_writer, auto_rename_duplicate_filenames: @path_set = ZipKit::PathSet.new @writer = writer @dedupe_filenames = auto_rename_duplicate_filenames + @state_machine = StateMachine.new end # Writes a part of a zip entry body (actual binary data of the entry) into the output stream. @@ -273,11 +279,8 @@ def add_empty_directory(dirname:, modification_time: Time.now.utc, unix_permissi # output (using `IO.copy_stream` is a good approach). # @return [ZipKit::Streamer::Writable] without a block - the Writable sink which has to be closed manually def write_file(filename, modification_time: Time.now.utc, unix_permissions: nil, &blk) - # Reset rollback state when starting a new entry attempt, so that if this entry - # fails before writing a header, rollback! won't use stale values from a previous entry - @offset_before_last_local_file_header = nil - @remove_last_file_at_rollback = false - + # Heuristic buffers data, then calls write_stored_file or write_deflated_file + # State only changes when actual ZIP data is written to the stream writable = ZipKit::Streamer::Heuristic.new(self, filename, modification_time: modification_time, unix_permissions: unix_permissions) yield_or_return_writable(writable, &blk) end @@ -406,9 +409,14 @@ def write_deflated_file(filename, modification_time: Time.now.utc, unix_permissi # # @return [Integer] the offset the output IO is at after closing the archive def close + raise_if_closed! + # Make sure offsets are in order verify_offsets! + # Transition to central directory state + @state_machine.begin_central_directory(@out.tell) + # Record the central directory offset, so that it can be written into the EOCD record cdir_starts_at = @out.tell @@ -438,6 +446,9 @@ def close central_directory_size: cdir_size, num_files_in_archive: @files.length) + # Finalize the state machine + @state_machine.finalize(@out.tell) + # Clear the files so that GC will not have to trace all the way to here to deallocate them @files.clear @path_set.clear @@ -479,6 +490,9 @@ def update_last_entry_and_write_data_descriptor(crc32:, compressed_size:, uncomp uncompressed_size: last_entry.uncompressed_size) last_entry.bytes_used_for_data_descriptor = @out.tell - offset_before_data_descriptor + # Transition state machine to data_descriptors state + @state_machine.write_data_descriptor(@out.tell) + @out.tell end @@ -506,21 +520,28 @@ def update_last_entry_and_write_data_descriptor(crc32:, compressed_size:, uncomp # end # @return [Integer] position in the output stream / ZIP archive def rollback! - @files.pop if @remove_last_file_at_rollback + state = @state_machine.state - # Recreate the path set from remaining entries (PathSet does not support cheap deletes yet) - @path_set.clear - @files.each do |e| - @path_set.add_directory_or_file_path(e.filename) unless e.filler? - end + # Only perform rollback if we are in entry-related states + # If state is still :initial (e.g., Heuristic failed before deciding), there's nothing to roll back + if state == StateMachine::ENTRY_BODY || state == StateMachine::LOCAL_HEADER + context = @state_machine.rollback(@out.tell) - # Create filler for the truncated or unusable local file entry that did get written into the output - # Only create a filler if a local file header was actually written (indicated by - # @offset_before_last_local_file_header being set). If it's nil, no header was written, - # so there's nothing to create a filler for. - if @offset_before_last_local_file_header - filler_size_bytes = @out.tell - @offset_before_last_local_file_header - @files << Filler.new(filler_size_bytes) + # Remove the failed entry from @files + if context[:current_entry] + @files.delete(context[:current_entry]) + end + + # Recreate the path set from remaining entries (PathSet does not support cheap deletes yet) + @path_set.clear + @files.each do |e| + @path_set.add_directory_or_file_path(e.filename) unless e.filler? + end + + # Create filler to maintain correct offsets in central directory + if context[:bytes_written] > 0 + @files << Filler.new(context[:bytes_written]) + end end @out.tell @@ -528,6 +549,30 @@ def rollback! private + def raise_if_cannot_begin_entry! + return if @state_machine.can_begin_entry? + + if @state_machine.state == StateMachine::LOCAL_HEADER + raise InvalidState, + "Cannot start new entry: local header was written but entry body not started. " \ + "This is an internal error." + elsif @state_machine.in_entry? + entry_name = @state_machine.current_entry&.filename || "(unknown)" + raise InvalidState, + "Cannot start new entry while #{entry_name.inspect} is being written. " \ + "Close the current entry first." + elsif @state_machine.closed? + raise InvalidState, "Cannot add entry: archive is already closed" + else + raise InvalidState, "Cannot add entry in state #{@state_machine.state}" + end + end + + def raise_if_closed! + return unless @state_machine.closed? + raise InvalidState, "Archive is already closed" + end + def yield_or_return_writable(writable, &block_to_pass_writable_to) if block_to_pass_writable_to begin @@ -575,11 +620,7 @@ def add_file_and_write_local_header( use_data_descriptor:, unix_permissions: ) - # Set state needed for proper rollback later. If write_local_file_header - # does manage to write _some_ bytes, but fails later (we write in tiny bits sometimes) - # we should be able to create a filler from this offset on when we - @offset_before_last_local_file_header = @out.tell - @remove_last_file_at_rollback = false + raise_if_cannot_begin_entry! # Clean backslashes filename = remove_backslash(filename) @@ -618,6 +659,9 @@ def add_file_and_write_local_header( _bytes_used_for_data_descriptor = 0, unix_permissions) + # Record state machine transition for entry start + @state_machine.begin_entry(e, local_header_starts_at) + @writer.write_local_file_header(io: @out, gp_flags: e.gp_flags, crc32: e.crc32, @@ -630,7 +674,9 @@ def add_file_and_write_local_header( e.bytes_used_for_local_header = @out.tell - e.local_header_offset @files << e - @remove_last_file_at_rollback = true + + # Transition to entry body state + @state_machine.begin_entry_body(@out.tell) end def remove_backslash(filename) diff --git a/lib/zip_kit/streamer/state_machine.rb b/lib/zip_kit/streamer/state_machine.rb new file mode 100644 index 0000000..b254eed --- /dev/null +++ b/lib/zip_kit/streamer/state_machine.rb @@ -0,0 +1,115 @@ +# frozen_string_literal: true + +# Tracks the state of ZIP output to enforce valid call sequences. +# States mirror the ZIP file structure: local headers, entry bodies, +# data descriptors, central directory, and EOCD. +class ZipKit::Streamer::StateMachine + INITIAL = :initial + LOCAL_HEADER = :local_header + ENTRY_BODY = :entry_body + DATA_DESCRIPTORS = :data_descriptors + CENTRAL_DIRECTORY = :central_directory + END_OF_CENTRAL_DIRECTORY = :end_of_central_directory + + TRANSITIONS = { + INITIAL => [LOCAL_HEADER, CENTRAL_DIRECTORY], + LOCAL_HEADER => [ENTRY_BODY], + ENTRY_BODY => [DATA_DESCRIPTORS, LOCAL_HEADER, CENTRAL_DIRECTORY], + DATA_DESCRIPTORS => [LOCAL_HEADER, CENTRAL_DIRECTORY], + CENTRAL_DIRECTORY => [END_OF_CENTRAL_DIRECTORY], + END_OF_CENTRAL_DIRECTORY => [] + }.freeze + + attr_reader :state, :previous_state, :transition_offset + attr_reader :entry_offset, :current_entry + + def initialize + @state = INITIAL + @previous_state = nil + @transition_offset = 0 + @entry_offset = nil + @current_entry = nil + end + + def transition!(to_state, offset) + unless TRANSITIONS[@state]&.include?(to_state) + raise ZipKit::Streamer::InvalidState, + "Cannot transition from #{@state} to #{to_state}. " \ + "Valid next states: #{TRANSITIONS[@state].inspect}" + end + + @previous_state = @state + @transition_offset = offset + @state = to_state + end + + def begin_entry(entry, offset) + transition!(LOCAL_HEADER, offset) + @entry_offset = offset + @current_entry = entry + end + + def begin_entry_body(offset) + transition!(ENTRY_BODY, offset) + end + + def write_data_descriptor(offset) + transition!(DATA_DESCRIPTORS, offset) + @entry_offset = nil + @current_entry = nil + end + + def begin_central_directory(offset) + transition!(CENTRAL_DIRECTORY, offset) + @entry_offset = nil + @current_entry = nil + end + + def finalize(offset) + transition!(END_OF_CENTRAL_DIRECTORY, offset) + end + + def rollback(current_offset) + unless @state == ENTRY_BODY || @state == LOCAL_HEADER + raise ZipKit::Streamer::InvalidState, + "Cannot rollback from #{@state}. Rollback is only valid during entry writing." + end + + context = { + entry_offset: @entry_offset, + current_entry: @current_entry, + bytes_written: current_offset - @entry_offset + } + + # Clear entry tracking, and transition to ENTRY_BODY + # (this allows starting new entries or closing the archive) + @previous_state = @state + @state = ENTRY_BODY + @transition_offset = current_offset + @entry_offset = nil + @current_entry = nil + + context + end + + # Query methods + def can_begin_entry? + TRANSITIONS[@state]&.include?(LOCAL_HEADER) + end + + def can_write_body? + @state == ENTRY_BODY + end + + def can_close_archive? + TRANSITIONS[@state]&.include?(CENTRAL_DIRECTORY) + end + + def closed? + @state == END_OF_CENTRAL_DIRECTORY + end + + def in_entry? + @state == LOCAL_HEADER || @state == ENTRY_BODY + end +end diff --git a/spec/zip_kit/streamer/state_machine_spec.rb b/spec/zip_kit/streamer/state_machine_spec.rb new file mode 100644 index 0000000..c927b3e --- /dev/null +++ b/spec/zip_kit/streamer/state_machine_spec.rb @@ -0,0 +1,325 @@ +# frozen_string_literal: true + +require_relative "../../spec_helper" + +describe ZipKit::Streamer::StateMachine do + subject { described_class.new } + + describe "initial state" do + it "starts in INITIAL state" do + expect(subject.state).to eq(:initial) + expect(subject.can_begin_entry?).to be true + expect(subject.can_close_archive?).to be true + expect(subject.closed?).to be false + expect(subject.in_entry?).to be false + end + + it "has nil previous_state" do + expect(subject.previous_state).to be_nil + end + + it "has zero transition_offset" do + expect(subject.transition_offset).to eq(0) + end + end + + describe "#begin_entry" do + it "transitions to LOCAL_HEADER state" do + entry = double("Entry", filename: "test.txt") + subject.begin_entry(entry, 0) + + expect(subject.state).to eq(:local_header) + expect(subject.previous_state).to eq(:initial) + expect(subject.entry_offset).to eq(0) + expect(subject.current_entry).to eq(entry) + expect(subject.in_entry?).to be true + end + end + + describe "#begin_entry_body" do + it "transitions from LOCAL_HEADER to ENTRY_BODY" do + subject.begin_entry(double("Entry"), 0) + subject.begin_entry_body(50) + + expect(subject.state).to eq(:entry_body) + expect(subject.previous_state).to eq(:local_header) + expect(subject.transition_offset).to eq(50) + expect(subject.can_write_body?).to be true + end + end + + describe "full entry lifecycle with data descriptor" do + it "follows the complete lifecycle" do + entry = double("Entry", filename: "test.txt") + + subject.begin_entry(entry, 0) + expect(subject.state).to eq(:local_header) + expect(subject.entry_offset).to eq(0) + + subject.begin_entry_body(50) + expect(subject.state).to eq(:entry_body) + expect(subject.can_write_body?).to be true + + subject.write_data_descriptor(150) + expect(subject.state).to eq(:data_descriptors) + expect(subject.entry_offset).to be_nil + expect(subject.current_entry).to be_nil + expect(subject.can_begin_entry?).to be true + end + end + + describe "invalid transitions" do + it "prevents LOCAL_HEADER -> LOCAL_HEADER" do + subject.begin_entry(double("Entry"), 0) + + expect { subject.begin_entry(double("Entry2"), 100) } + .to raise_error(ZipKit::Streamer::InvalidState, /Cannot transition/) + end + + it "prevents INITIAL -> ENTRY_BODY" do + expect { subject.begin_entry_body(0) } + .to raise_error(ZipKit::Streamer::InvalidState, /Cannot transition/) + end + + it "prevents INITIAL -> DATA_DESCRIPTORS" do + expect { subject.write_data_descriptor(0) } + .to raise_error(ZipKit::Streamer::InvalidState, /Cannot transition/) + end + + it "prevents LOCAL_HEADER -> CENTRAL_DIRECTORY" do + subject.begin_entry(double("Entry"), 0) + + expect { subject.begin_central_directory(50) } + .to raise_error(ZipKit::Streamer::InvalidState, /Cannot transition/) + end + end + + describe "entries without data descriptors" do + it "allows ENTRY_BODY -> LOCAL_HEADER" do + subject.begin_entry(double("Entry1"), 0) + subject.begin_entry_body(50) + + # Direct transition to next entry (no data descriptor) + subject.begin_entry(double("Entry2"), 100) + expect(subject.state).to eq(:local_header) + end + + it "allows ENTRY_BODY -> CENTRAL_DIRECTORY" do + subject.begin_entry(double("Entry1"), 0) + subject.begin_entry_body(50) + + subject.begin_central_directory(100) + expect(subject.state).to eq(:central_directory) + end + end + + describe "DATA_DESCRIPTORS transitions" do + before do + subject.begin_entry(double("Entry1"), 0) + subject.begin_entry_body(50) + subject.write_data_descriptor(150) + end + + it "allows DATA_DESCRIPTORS -> LOCAL_HEADER" do + subject.begin_entry(double("Entry2"), 150) + expect(subject.state).to eq(:local_header) + end + + it "allows DATA_DESCRIPTORS -> CENTRAL_DIRECTORY" do + subject.begin_central_directory(150) + expect(subject.state).to eq(:central_directory) + end + end + + describe "#rollback" do + it "raises when called from INITIAL" do + expect { subject.rollback(100) } + .to raise_error(ZipKit::Streamer::InvalidState, /only valid during entry writing/) + end + + it "raises when called from DATA_DESCRIPTORS" do + subject.begin_entry(double("Entry"), 0) + subject.begin_entry_body(50) + subject.write_data_descriptor(150) + + expect { subject.rollback(200) } + .to raise_error(ZipKit::Streamer::InvalidState, /only valid during entry writing/) + end + + it "returns context and transitions to ENTRY_BODY from ENTRY_BODY" do + entry = double("Entry", filename: "fail.txt") + subject.begin_entry(entry, 42) + subject.begin_entry_body(100) + + context = subject.rollback(250) + + expect(context[:entry_offset]).to eq(42) + expect(context[:current_entry]).to eq(entry) + expect(context[:bytes_written]).to eq(208) # 250 - 42 + + # Transitions to entry_body (allows next entry or close) + expect(subject.state).to eq(:entry_body) + expect(subject.entry_offset).to be_nil + expect(subject.current_entry).to be_nil + end + + it "returns context and transitions to ENTRY_BODY from LOCAL_HEADER" do + entry = double("Entry", filename: "fail.txt") + subject.begin_entry(entry, 42) + # Don't call begin_entry_body - simulates failure during header write + + context = subject.rollback(60) + + expect(context[:entry_offset]).to eq(42) + expect(context[:current_entry]).to eq(entry) + expect(context[:bytes_written]).to eq(18) # 60 - 42 + + # Transitions to entry_body (allows next entry or close) + expect(subject.state).to eq(:entry_body) + expect(subject.entry_offset).to be_nil + expect(subject.current_entry).to be_nil + end + + it "allows beginning a new entry after rollback from ENTRY_BODY" do + subject.begin_entry(double("Entry1"), 0) + subject.begin_entry_body(50) + subject.rollback(100) + + # Should be able to start a new entry + subject.begin_entry(double("Entry2"), 100) + expect(subject.state).to eq(:local_header) + end + + it "allows beginning a new entry after rollback from LOCAL_HEADER" do + subject.begin_entry(double("Entry1"), 0) + subject.rollback(50) + + # Should be able to start a new entry + subject.begin_entry(double("Entry2"), 50) + expect(subject.state).to eq(:local_header) + end + + it "allows closing archive after rollback" do + subject.begin_entry(double("Entry1"), 0) + subject.begin_entry_body(50) + subject.rollback(100) + + # Should be able to close archive + subject.begin_central_directory(100) + expect(subject.state).to eq(:central_directory) + end + end + + describe "#finalize" do + it "transitions from CENTRAL_DIRECTORY to END_OF_CENTRAL_DIRECTORY" do + subject.begin_central_directory(0) + subject.finalize(100) + + expect(subject.state).to eq(:end_of_central_directory) + expect(subject.closed?).to be true + end + end + + describe "empty archive" do + it "allows INITIAL -> CENTRAL_DIRECTORY (empty archive)" do + subject.begin_central_directory(0) + expect(subject.state).to eq(:central_directory) + end + end + + describe "query methods" do + describe "#can_begin_entry?" do + it "returns true from states that allow new entries" do + expect(subject.can_begin_entry?).to be true + + subject.begin_entry(double("Entry"), 0) + subject.begin_entry_body(50) + expect(subject.can_begin_entry?).to be true + + subject.write_data_descriptor(100) + expect(subject.can_begin_entry?).to be true + end + + it "returns false from LOCAL_HEADER" do + subject.begin_entry(double("Entry"), 0) + expect(subject.can_begin_entry?).to be false + end + + it "returns false from CENTRAL_DIRECTORY" do + subject.begin_central_directory(0) + expect(subject.can_begin_entry?).to be false + end + + it "returns false when closed" do + subject.begin_central_directory(0) + subject.finalize(100) + expect(subject.can_begin_entry?).to be false + end + end + + describe "#can_write_body?" do + it "returns true only from ENTRY_BODY" do + expect(subject.can_write_body?).to be false + + subject.begin_entry(double("Entry"), 0) + expect(subject.can_write_body?).to be false + + subject.begin_entry_body(50) + expect(subject.can_write_body?).to be true + + subject.write_data_descriptor(100) + expect(subject.can_write_body?).to be false + end + end + + describe "#can_close_archive?" do + it "returns true from states that allow closing" do + expect(subject.can_close_archive?).to be true + + subject.begin_entry(double("Entry"), 0) + subject.begin_entry_body(50) + expect(subject.can_close_archive?).to be true + + subject.write_data_descriptor(100) + expect(subject.can_close_archive?).to be true + end + + it "returns false from LOCAL_HEADER" do + subject.begin_entry(double("Entry"), 0) + expect(subject.can_close_archive?).to be false + end + + it "returns false from CENTRAL_DIRECTORY" do + subject.begin_central_directory(0) + expect(subject.can_close_archive?).to be false + end + end + + describe "#closed?" do + it "returns true only after finalize" do + expect(subject.closed?).to be false + + subject.begin_central_directory(0) + expect(subject.closed?).to be false + + subject.finalize(100) + expect(subject.closed?).to be true + end + end + + describe "#in_entry?" do + it "returns true from LOCAL_HEADER and ENTRY_BODY" do + expect(subject.in_entry?).to be false + + subject.begin_entry(double("Entry"), 0) + expect(subject.in_entry?).to be true + + subject.begin_entry_body(50) + expect(subject.in_entry?).to be true + + subject.write_data_descriptor(100) + expect(subject.in_entry?).to be false + end + end + end +end diff --git a/spec/zip_kit/streamer_spec.rb b/spec/zip_kit/streamer_spec.rb index 28342a6..6e3330d 100644 --- a/spec/zip_kit/streamer_spec.rb +++ b/spec/zip_kit/streamer_spec.rb @@ -751,4 +751,169 @@ def stream_with_just_write.write(bytes) expect(per_filename.size).to eq(1) expect(per_filename["success.txt"]).to eq("successful content") end + + describe "call sequence enforcement via state machine" do + it "raises on multiple close calls" do + zip = described_class.new(StringIO.new) + zip.close + + expect { zip.close } + .to raise_error(ZipKit::Streamer::InvalidState, /already closed/) + end + + it "prevents adding entry after close" do + zip = described_class.new(StringIO.new) + zip.close + + expect { zip.write_stored_file("late.txt") } + .to raise_error(ZipKit::Streamer::InvalidState, /already closed/) + end + + it "prevents add_stored_entry after close" do + zip = described_class.new(StringIO.new) + zip.close + + expect { zip.add_stored_entry(filename: "late.txt", size: 0, crc32: 0) } + .to raise_error(ZipKit::Streamer::InvalidState, /already closed/) + end + + it "allows sequential entries when properly closed" do + output = StringIO.new + described_class.open(output) do |zip| + zip.write_stored_file("a.txt") { |w| w << "a" } + zip.write_stored_file("b.txt") { |w| w << "b" } + end + + entries = [] + Zip::File.open_buffer(output.string) do |zipfile| + zipfile.each { |entry| entries << entry.name } + end + expect(entries).to eq(["a.txt", "b.txt"]) + end + + it "tracks state transitions correctly through entry lifecycle" do + zip = described_class.new(StringIO.new) + + expect(zip.state_machine.state).to eq(:initial) + + zip.add_stored_entry(filename: "test.txt", size: 5, crc32: 0) + expect(zip.state_machine.state).to eq(:entry_body) + + zip << "hello" + + # Next entry triggers transition + zip.add_stored_entry(filename: "test2.txt", size: 5, crc32: 0) + expect(zip.state_machine.state).to eq(:entry_body) + + zip << "world" + zip.close + expect(zip.state_machine.state).to eq(:end_of_central_directory) + end + + it "state remains INITIAL until Heuristic decides" do + zip = described_class.new(StringIO.new) + + heuristic = zip.write_file("test.txt") + expect(zip.state_machine.state).to eq(:initial) + + heuristic << "small" + # Still buffering, state unchanged + + heuristic.close + # Now decided and wrote entry + expect(zip.state_machine.state).to eq(:data_descriptors) + + zip.close + end + + it "rollback on first entry keeps state in entry_body" do + output = StringIO.new + described_class.open(output) do |zip| + expect { + zip.write_stored_file("fail.txt") { raise "boom" } + }.to raise_error("boom") + + # Stays in entry_body (no data descriptor written during rollback) + expect(zip.state_machine.state).to eq(:entry_body) + + zip.write_stored_file("ok.txt") { |w| w << "ok" } + end + + entries = [] + Zip::File.open_buffer(output.string) do |zipfile| + zipfile.each { |entry| entries << entry.name } + end + expect(entries).to eq(["ok.txt"]) + end + + it "rollback creates filler and maintains correct central directory offsets" do + output = StringIO.new + described_class.open(output) do |zip| + zip.write_stored_file("before.txt") { |w| w << "before" } + + expect { + zip.write_stored_file("fail.txt") do |w| + w << "x" * 1000 + raise "boom" + end + }.to raise_error("boom") + + zip.write_stored_file("after.txt") { |w| w << "after" } + end + + # Verify the archive is valid and readable + per_filename = {} + Zip::File.open_buffer(output.string) do |zipfile| + zipfile.each do |entry| + per_filename[entry.name] = entry.get_input_stream.read + end + end + + expect(per_filename.keys).to eq(["before.txt", "after.txt"]) + expect(per_filename["before.txt"]).to eq("before") + expect(per_filename["after.txt"]).to eq("after") + end + + it "no rollback needed if Heuristic fails before deciding" do + output = StringIO.new + described_class.open(output) do |zip| + expect { + zip.write_file("fail.txt") do |w| + w << "tiny" # Not enough to trigger decide + raise "boom" + end + }.to raise_error("boom") + + # State is still INITIAL, no filler created + expect(zip.state_machine.state).to eq(:initial) + + zip.write_stored_file("ok.txt") { |w| w << "ok" } + end + + entries = [] + Zip::File.open_buffer(output.string) do |zipfile| + zipfile.each { |entry| entries << entry.name } + end + expect(entries).to eq(["ok.txt"]) + end + + it "allows sequential add_stored_entry calls" do + output = StringIO.new + zip = described_class.new(output) + + zip.add_stored_entry(filename: "a.txt", size: 1, crc32: Zlib.crc32("a")) + zip << "a" + + zip.add_stored_entry(filename: "b.txt", size: 1, crc32: Zlib.crc32("b")) + zip << "b" + + zip.close + + entries = [] + Zip::File.open_buffer(output.string) do |zipfile| + zipfile.each { |entry| entries << entry.name } + end + expect(entries).to eq(["a.txt", "b.txt"]) + end + end end