diff --git a/biofuse/cli.py b/biofuse/cli.py index c468979..80f7bb8 100644 --- a/biofuse/cli.py +++ b/biofuse/cli.py @@ -111,7 +111,9 @@ def mount_bgen(vcz_url, mount_dir, basename, access_log_path, **kwargs): FUSE process's memory; only ``.bgen`` reads cross the wire. ``--no-sample-file`` and ``--no-bgi`` suppress the corresponding sidecar; ``--no-header-samples`` drops the sample identifiers from - the ``.bgen`` header block. + the ``.bgen`` header block. ``--unphased`` ignores the input's + ``call_genotype_phased`` field and encodes every variant with the + BGEN phased flag clear. The bcftools-view-style filter / backend / log options are inherited from ``vcztools view-bgen``; see ``vcztools view-bgen --help`` for the diff --git a/biofuse/formats.py b/biofuse/formats.py index 389ea2c..f547761 100644 --- a/biofuse/formats.py +++ b/biofuse/formats.py @@ -129,7 +129,11 @@ def _plink_encoder_factory(reader, opts): def _bgen_encoder_factory(reader, opts): - return vcztools.BgenEncoder(reader, embed_header_samples=not opts.no_header_samples) + return vcztools.BgenEncoder( + reader, + embed_header_samples=not opts.no_header_samples, + unphased=opts.unphased, + ) PLINK_SPEC = FormatSpec( diff --git a/tests/test_bgen_apps.py b/tests/test_bgen_apps.py index 8c66415..c74660b 100644 --- a/tests/test_bgen_apps.py +++ b/tests/test_bgen_apps.py @@ -182,6 +182,19 @@ async def test_no_header_samples_stable_across_opens( data = await trio.to_thread.run_sync(bgen_path.read_bytes) assert data == expected, f"cycle {cycle} differed from reference" + @pytest.mark.parametrize("unphased", [True, False]) + async def test_unphased_stable_across_opens(self, tmp_path, fx_small_vcz, unphased): + opts = dataclasses.replace(vcztools.ViewBgenOptions(), unphased=unphased) + ref_reader = opts.make_reader(str(fx_small_vcz.path)) + with vcztools.BgenEncoder(ref_reader, unphased=unphased) as ref: + expected = ref.read(0, ref.total_size) + + async with _mount_bgen(tmp_path, fx_small_vcz, opts=opts) as (mnt, basename): + bgen_path = mnt / f"{basename}.bgen" + for cycle in range(3): + data = await trio.to_thread.run_sync(bgen_path.read_bytes) + assert data == expected, f"cycle {cycle} differed from reference" + def _pread_sync(path: pathlib.Path, off: int, size: int) -> bytes: with path.open("rb") as f: diff --git a/tests/test_formats.py b/tests/test_formats.py index 3a92fb2..96ecb05 100644 --- a/tests/test_formats.py +++ b/tests/test_formats.py @@ -313,6 +313,34 @@ def test_embed_default_matches_in_process(self, fx_reader, fx_small_vcz): assert data == ref_data +class TestBgenUnphasedToggle: + """``--unphased`` flows through to ``BgenEncoder(unphased=True)``. + + With the flag set, the encoder must ignore ``call_genotype_phased`` + and produce the same bytes as an in-process ``BgenEncoder`` built + with ``unphased=True`` on the same reader. + """ + + def test_unphased_matches_in_process(self, fx_reader, fx_small_vcz): + opts = vcztools.ViewBgenOptions(unphased=True) + with formats.BGEN_SPEC.encoder_factory(fx_reader, opts) as encoder: + data = encoder.read(0, encoder.total_size) + ref_reader = _open_reader(fx_small_vcz.path) + with vcztools.BgenEncoder(ref_reader, unphased=True) as ref: + ref_data = ref.read(0, ref.total_size) + assert data == ref_data + + def test_unphased_default_matches_in_process(self, fx_reader, fx_small_vcz): + opts = vcztools.ViewBgenOptions() + assert opts.unphased is False + with formats.BGEN_SPEC.encoder_factory(fx_reader, opts) as encoder: + data = encoder.read(0, encoder.total_size) + ref_reader = _open_reader(fx_small_vcz.path) + with vcztools.BgenEncoder(ref_reader, unphased=False) as ref: + ref_data = ref.read(0, ref.total_size) + assert data == ref_data + + class TestSpecsRegistry: def test_specs_dict_has_both_entries(self): assert formats.SPECS == {"plink": formats.PLINK_SPEC, "bgen": formats.BGEN_SPEC} diff --git a/uv.lock b/uv.lock index 4657020..c70cd17 100644 --- a/uv.lock +++ b/uv.lock @@ -1737,8 +1737,8 @@ all = [ [[package]] name = "vcztools" -version = "0.1.3.dev326" -source = { git = "https://github.com/sgkit-dev/vcztools.git?rev=main#ae4136fecc8470f359d3152f95d6135330995d53" } +version = "0.1.3.dev328" +source = { git = "https://github.com/sgkit-dev/vcztools.git?rev=main#4136ddcf7476e16e2b7d4e0bdaf83bfc3a44b996" } dependencies = [ { name = "click" }, { name = "humanfriendly" },