From 080112e5b07980562bcbd6e17322d28fbaff121d Mon Sep 17 00:00:00 2001 From: Jerome Kelleher Date: Fri, 15 May 2026 14:09:30 +0100 Subject: [PATCH 1/2] Update vcztools --- uv.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/uv.lock b/uv.lock index 4657020..c70cd17 100644 --- a/uv.lock +++ b/uv.lock @@ -1737,8 +1737,8 @@ all = [ [[package]] name = "vcztools" -version = "0.1.3.dev326" -source = { git = "https://github.com/sgkit-dev/vcztools.git?rev=main#ae4136fecc8470f359d3152f95d6135330995d53" } +version = "0.1.3.dev328" +source = { git = "https://github.com/sgkit-dev/vcztools.git?rev=main#4136ddcf7476e16e2b7d4e0bdaf83bfc3a44b996" } dependencies = [ { name = "click" }, { name = "humanfriendly" }, From c21547a62085bb7a8ca0d2a9277f39d05328801f Mon Sep 17 00:00:00 2001 From: Jerome Kelleher Date: Fri, 15 May 2026 14:18:25 +0100 Subject: [PATCH 2/2] Wire --unphased through to BgenEncoder The ViewBgenOptions decorator already exposes --unphased; pass it into BgenEncoder so each per-connection encoder honours the flag. Tests pin the pass-through both at the spec-factory level and end-to-end across repeated opens of the mounted .bgen. --- biofuse/cli.py | 4 +++- biofuse/formats.py | 6 +++++- tests/test_bgen_apps.py | 13 +++++++++++++ tests/test_formats.py | 28 ++++++++++++++++++++++++++++ 4 files changed, 49 insertions(+), 2 deletions(-) diff --git a/biofuse/cli.py b/biofuse/cli.py index c468979..80f7bb8 100644 --- a/biofuse/cli.py +++ b/biofuse/cli.py @@ -111,7 +111,9 @@ def mount_bgen(vcz_url, mount_dir, basename, access_log_path, **kwargs): FUSE process's memory; only ``.bgen`` reads cross the wire. ``--no-sample-file`` and ``--no-bgi`` suppress the corresponding sidecar; ``--no-header-samples`` drops the sample identifiers from - the ``.bgen`` header block. + the ``.bgen`` header block. ``--unphased`` ignores the input's + ``call_genotype_phased`` field and encodes every variant with the + BGEN phased flag clear. The bcftools-view-style filter / backend / log options are inherited from ``vcztools view-bgen``; see ``vcztools view-bgen --help`` for the diff --git a/biofuse/formats.py b/biofuse/formats.py index 389ea2c..f547761 100644 --- a/biofuse/formats.py +++ b/biofuse/formats.py @@ -129,7 +129,11 @@ def _plink_encoder_factory(reader, opts): def _bgen_encoder_factory(reader, opts): - return vcztools.BgenEncoder(reader, embed_header_samples=not opts.no_header_samples) + return vcztools.BgenEncoder( + reader, + embed_header_samples=not opts.no_header_samples, + unphased=opts.unphased, + ) PLINK_SPEC = FormatSpec( diff --git a/tests/test_bgen_apps.py b/tests/test_bgen_apps.py index 8c66415..c74660b 100644 --- a/tests/test_bgen_apps.py +++ b/tests/test_bgen_apps.py @@ -182,6 +182,19 @@ async def test_no_header_samples_stable_across_opens( data = await trio.to_thread.run_sync(bgen_path.read_bytes) assert data == expected, f"cycle {cycle} differed from reference" + @pytest.mark.parametrize("unphased", [True, False]) + async def test_unphased_stable_across_opens(self, tmp_path, fx_small_vcz, unphased): + opts = dataclasses.replace(vcztools.ViewBgenOptions(), unphased=unphased) + ref_reader = opts.make_reader(str(fx_small_vcz.path)) + with vcztools.BgenEncoder(ref_reader, unphased=unphased) as ref: + expected = ref.read(0, ref.total_size) + + async with _mount_bgen(tmp_path, fx_small_vcz, opts=opts) as (mnt, basename): + bgen_path = mnt / f"{basename}.bgen" + for cycle in range(3): + data = await trio.to_thread.run_sync(bgen_path.read_bytes) + assert data == expected, f"cycle {cycle} differed from reference" + def _pread_sync(path: pathlib.Path, off: int, size: int) -> bytes: with path.open("rb") as f: diff --git a/tests/test_formats.py b/tests/test_formats.py index 3a92fb2..96ecb05 100644 --- a/tests/test_formats.py +++ b/tests/test_formats.py @@ -313,6 +313,34 @@ def test_embed_default_matches_in_process(self, fx_reader, fx_small_vcz): assert data == ref_data +class TestBgenUnphasedToggle: + """``--unphased`` flows through to ``BgenEncoder(unphased=True)``. + + With the flag set, the encoder must ignore ``call_genotype_phased`` + and produce the same bytes as an in-process ``BgenEncoder`` built + with ``unphased=True`` on the same reader. + """ + + def test_unphased_matches_in_process(self, fx_reader, fx_small_vcz): + opts = vcztools.ViewBgenOptions(unphased=True) + with formats.BGEN_SPEC.encoder_factory(fx_reader, opts) as encoder: + data = encoder.read(0, encoder.total_size) + ref_reader = _open_reader(fx_small_vcz.path) + with vcztools.BgenEncoder(ref_reader, unphased=True) as ref: + ref_data = ref.read(0, ref.total_size) + assert data == ref_data + + def test_unphased_default_matches_in_process(self, fx_reader, fx_small_vcz): + opts = vcztools.ViewBgenOptions() + assert opts.unphased is False + with formats.BGEN_SPEC.encoder_factory(fx_reader, opts) as encoder: + data = encoder.read(0, encoder.total_size) + ref_reader = _open_reader(fx_small_vcz.path) + with vcztools.BgenEncoder(ref_reader, unphased=False) as ref: + ref_data = ref.read(0, ref.total_size) + assert data == ref_data + + class TestSpecsRegistry: def test_specs_dict_has_both_entries(self): assert formats.SPECS == {"plink": formats.PLINK_SPEC, "bgen": formats.BGEN_SPEC}