Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion biofuse/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,9 @@ def mount_bgen(vcz_url, mount_dir, basename, access_log_path, **kwargs):
FUSE process's memory; only ``.bgen`` reads cross the wire.
``--no-sample-file`` and ``--no-bgi`` suppress the corresponding
sidecar; ``--no-header-samples`` drops the sample identifiers from
the ``.bgen`` header block.
the ``.bgen`` header block. ``--unphased`` ignores the input's
``call_genotype_phased`` field and encodes every variant with the
BGEN phased flag clear.

The bcftools-view-style filter / backend / log options are inherited
from ``vcztools view-bgen``; see ``vcztools view-bgen --help`` for the
Expand Down
6 changes: 5 additions & 1 deletion biofuse/formats.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,11 @@ def _plink_encoder_factory(reader, opts):


def _bgen_encoder_factory(reader, opts):
return vcztools.BgenEncoder(reader, embed_header_samples=not opts.no_header_samples)
return vcztools.BgenEncoder(
reader,
embed_header_samples=not opts.no_header_samples,
unphased=opts.unphased,
)


PLINK_SPEC = FormatSpec(
Expand Down
13 changes: 13 additions & 0 deletions tests/test_bgen_apps.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,19 @@ async def test_no_header_samples_stable_across_opens(
data = await trio.to_thread.run_sync(bgen_path.read_bytes)
assert data == expected, f"cycle {cycle} differed from reference"

@pytest.mark.parametrize("unphased", [True, False])
async def test_unphased_stable_across_opens(self, tmp_path, fx_small_vcz, unphased):
opts = dataclasses.replace(vcztools.ViewBgenOptions(), unphased=unphased)
ref_reader = opts.make_reader(str(fx_small_vcz.path))
with vcztools.BgenEncoder(ref_reader, unphased=unphased) as ref:
expected = ref.read(0, ref.total_size)

async with _mount_bgen(tmp_path, fx_small_vcz, opts=opts) as (mnt, basename):
bgen_path = mnt / f"{basename}.bgen"
for cycle in range(3):
data = await trio.to_thread.run_sync(bgen_path.read_bytes)
assert data == expected, f"cycle {cycle} differed from reference"


def _pread_sync(path: pathlib.Path, off: int, size: int) -> bytes:
with path.open("rb") as f:
Expand Down
28 changes: 28 additions & 0 deletions tests/test_formats.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,34 @@ def test_embed_default_matches_in_process(self, fx_reader, fx_small_vcz):
assert data == ref_data


class TestBgenUnphasedToggle:
"""``--unphased`` flows through to ``BgenEncoder(unphased=True)``.

With the flag set, the encoder must ignore ``call_genotype_phased``
and produce the same bytes as an in-process ``BgenEncoder`` built
with ``unphased=True`` on the same reader.
"""

def test_unphased_matches_in_process(self, fx_reader, fx_small_vcz):
opts = vcztools.ViewBgenOptions(unphased=True)
with formats.BGEN_SPEC.encoder_factory(fx_reader, opts) as encoder:
data = encoder.read(0, encoder.total_size)
ref_reader = _open_reader(fx_small_vcz.path)
with vcztools.BgenEncoder(ref_reader, unphased=True) as ref:
ref_data = ref.read(0, ref.total_size)
assert data == ref_data

def test_unphased_default_matches_in_process(self, fx_reader, fx_small_vcz):
opts = vcztools.ViewBgenOptions()
assert opts.unphased is False
with formats.BGEN_SPEC.encoder_factory(fx_reader, opts) as encoder:
data = encoder.read(0, encoder.total_size)
ref_reader = _open_reader(fx_small_vcz.path)
with vcztools.BgenEncoder(ref_reader, unphased=False) as ref:
ref_data = ref.read(0, ref.total_size)
assert data == ref_data


class TestSpecsRegistry:
def test_specs_dict_has_both_entries(self):
assert formats.SPECS == {"plink": formats.PLINK_SPEC, "bgen": formats.BGEN_SPEC}
4 changes: 2 additions & 2 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading