Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions docs/source/quickstart.rst
Original file line number Diff line number Diff line change
Expand Up @@ -80,3 +80,24 @@ Save a Numpy array as a SigMF Recording

# check for mistakes & write to disk
meta.tofile('example_cf32.sigmf-meta') # extension is optional

--------------------------------
Control Fixed-Point Data Scaling
--------------------------------

For fixed-point datasets, you can control whether samples are automatically scaled to floating-point values:

.. code-block:: python

import sigmf

# Default behavior: autoscale fixed-point data to [-1.0, 1.0] range
handle = sigmf.fromfile("fixed_point_data.sigmf")
samples = handle.read_samples() # Returns float32/complex64

# Disable autoscaling to access raw integer values
handle_raw = sigmf.fromfile("fixed_point_data.sigmf", autoscale=False)
raw_samples = handle_raw.read_samples() # Returns original integer types

# Both slicing and read_samples() respect the autoscale setting
assert handle[0:10].dtype == handle.read_samples(count=10).dtype
2 changes: 1 addition & 1 deletion sigmf/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# SPDX-License-Identifier: LGPL-3.0-or-later

# version of this python module
__version__ = "1.2.13"
__version__ = "1.3.0"
# matching version of the SigMF specification
__specification__ = "1.2.5"

Expand Down
8 changes: 5 additions & 3 deletions sigmf/archivereader.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,9 @@ class SigMFArchiveReader:
map_readonly : bool, optional
Indicate whether assignments on the numpy.memmap are allowed.
archive_buffer : buffer, optional

Alternative buffer to read archive from.
autoscale : bool, optional
If dataset is in a fixed-point representation, scale samples from (min, max) to (-1.0, 1.0).

Raises
------
Expand All @@ -41,7 +43,7 @@ class SigMFArchiveReader:
If metadata is invalid.
"""

def __init__(self, name=None, skip_checksum=False, map_readonly=True, archive_buffer=None):
def __init__(self, name=None, skip_checksum=False, map_readonly=True, archive_buffer=None, autoscale=True):
if name is not None:
path = Path(name)
if path.suffix != SIGMF_ARCHIVE_EXT:
Expand Down Expand Up @@ -90,7 +92,7 @@ def __init__(self, name=None, skip_checksum=False, map_readonly=True, archive_bu
if data_offset is None:
raise SigMFFileError("No .sigmf-data file found in archive!")

self.sigmffile = SigMFFile(metadata=json_contents)
self.sigmffile = SigMFFile(metadata=json_contents, autoscale=autoscale)
self.sigmffile.validate()

self.sigmffile.set_data_file(
Expand Down
144 changes: 99 additions & 45 deletions sigmf/sigmffile.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ class SigMFFile(SigMFMetafile):
]
VALID_KEYS = {GLOBAL_KEY: VALID_GLOBAL_KEYS, CAPTURE_KEY: VALID_CAPTURE_KEYS, ANNOTATION_KEY: VALID_ANNOTATION_KEYS}

def __init__(self, metadata=None, data_file=None, global_info=None, skip_checksum=False, map_readonly=True):
def __init__(self, metadata=None, data_file=None, global_info=None, skip_checksum=False, map_readonly=True, autoscale=True):
"""
API for SigMF I/O

Expand All @@ -174,12 +174,17 @@ def __init__(self, metadata=None, data_file=None, global_info=None, skip_checksu
When True will skip calculating hash on data_file (if present) to check against metadata.
map_readonly: bool, default True
Indicates whether assignments on the numpy.memmap are allowed.
autoscale: bool, default True
If dataset is in a fixed-point representation, scale samples from (min, max) to (-1.0, 1.0)
for all sample reading operations including slicing.
"""
super().__init__()
self.data_file = None
self.data_buffer = None
self.sample_count = 0
self._memmap = None
self.is_complex_data = False # numpy.iscomplexobj(self._memmap) is not adequate for fixed-point complex case
self.autoscale = autoscale

self.set_metadata(metadata)
if global_info is not None:
Expand Down Expand Up @@ -216,10 +221,39 @@ def __next__(self):
def __getitem__(self, sli):
mem = self._memmap[sli] # matches behavior of numpy.ndarray.__getitem__()

# apply _return_type conversion if set
if self._return_type is None:
return mem

# is_fixed_point and is_complex
# no special conversion needed
if not self.autoscale:
return mem
else:
# apply autoscaling for fixed-point data when autoscale=True
dtype = dtype_info(self.get_global_field(self.DATATYPE_KEY))
is_fixedpoint_data = dtype["is_fixedpoint"]

if is_fixedpoint_data:
# apply scaling for fixed-point data
is_unsigned_data = dtype["is_unsigned"]
component_size = dtype["component_size"]
data_type_out = np.dtype("f4") if not self.is_complex_data else np.dtype("f4, f4")

data = mem.astype(data_type_out)
data = data.view(np.dtype("f4"))
if is_unsigned_data:
data -= 2 ** (component_size * 8 - 1)
data *= 2 ** -(component_size * 8 - 1)
data = data.view(data_type_out)
if self.is_complex_data:
data = data.view(np.complex64)
# for single-channel complex data, flatten the last dimension
if data.ndim > 1 and self.get_num_channels() == 1:
data = data.flatten()
return data[0] if isinstance(sli, int) else data
else:
# floating-point data, no scaling needed
return mem

# handle complex data type conversion
if self._memmap.ndim == 2:
# num_channels == 1
ray = mem[:, 0].astype(self._return_type) + 1.0j * mem[:, 1].astype(self._return_type)
Expand Down Expand Up @@ -490,24 +524,27 @@ def _count_samples(self):
use 0.
For complex data, a 'sample' includes both the real and imaginary part.
"""
if self.data_file is None:
if self.data_file is None and self.data_buffer is None:
sample_count = self._get_sample_count_from_annotations()
else:
header_bytes = sum([c.get(self.HEADER_BYTES_KEY, 0) for c in self.get_captures()])
file_size = self.data_file.stat().st_size if self.data_size_bytes is None else self.data_size_bytes
file_data_size = file_size - self.get_global_field(self.TRAILING_BYTES_KEY, 0) - header_bytes # bytes
if self.data_file is not None:
file_bytes = self.data_file.stat().st_size if self.data_size_bytes is None else self.data_size_bytes
elif self.data_buffer is not None:
file_bytes = len(self.data_buffer.getbuffer()) if self.data_size_bytes is None else self.data_size_bytes
else:
file_bytes = 0
sample_bytes = file_bytes - self.get_global_field(self.TRAILING_BYTES_KEY, 0) - header_bytes
sample_size = self.get_sample_size() # size of a sample in bytes
num_channels = self.get_num_channels()
sample_count = file_data_size // sample_size // num_channels
if file_data_size % (sample_size * num_channels) != 0:
sample_count = sample_bytes // sample_size // num_channels
if sample_bytes % (sample_size * num_channels) != 0:
warnings.warn(
f"File `{self.data_file}` does not contain an integer number of samples across channels. "
f"Data source does not contain an integer number of samples across channels. "
"It may be invalid data."
)
if self._get_sample_count_from_annotations() > sample_count:
warnings.warn(
f"File `{self.data_file}` ends before the final annotation in the corresponding SigMF metadata."
)
warnings.warn(f"Data source ends before the final annotation in the corresponding SigMF metadata.")
self.sample_count = sample_count
return sample_count

Expand Down Expand Up @@ -653,7 +690,7 @@ def tofile(self, file_path, pretty=True, toarchive=False, skip_validate=False):
self.dump(fp, pretty=pretty)
fp.write("\n") # text files should end in carriage return

def read_samples_in_capture(self, index=0, autoscale=True):
def read_samples_in_capture(self, index=0):
"""
Reads samples from the specified captures segment in its entirety.

Expand All @@ -676,9 +713,9 @@ def read_samples_in_capture(self, index=0, autoscale=True):
"an integer number of samples across channels. It may be invalid."
)

return self._read_datafile(cb[0], (cb[1] - cb[0]) // self.get_sample_size(), autoscale, False)
return self._read_datafile(cb[0], (cb[1] - cb[0]) // self.get_sample_size())

def read_samples(self, start_index=0, count=-1, autoscale=True, raw_components=False):
def read_samples(self, start_index=0, count=-1):
"""
Reads the specified number of samples starting at the specified index from the associated data file.

Expand All @@ -688,16 +725,12 @@ def read_samples(self, start_index=0, count=-1, autoscale=True, raw_components=F
Starting sample index from which to read.
count : int, default -1
Number of samples to read. -1 will read whole file.
autoscale : bool, default True
If dataset is in a fixed-point representation, scale samples from (min, max) to (-1.0, 1.0)
raw_components : bool, default False
If True read and return the sample components (individual I & Q for complex, samples for real)
with no conversions or interleaved channels.

Returns
-------
data : ndarray
Samples are returned as an array of float or complex, with number of dimensions equal to NUM_CHANNELS_KEY.
Scaling behavior depends on the autoscale parameter set during construction.
"""
if count == 0:
raise IOError("Number of samples must be greater than zero, or -1 for all samples.")
Expand All @@ -713,9 +746,9 @@ def read_samples(self, start_index=0, count=-1, autoscale=True, raw_components=F

if not self._is_conforming_dataset():
warnings.warn(f"Recording dataset appears non-compliant, resulting data may be erroneous")
return self._read_datafile(first_byte, count * self.get_num_channels(), autoscale, False)
return self._read_datafile(first_byte, count * self.get_num_channels())

def _read_datafile(self, first_byte, nitems, autoscale, raw_components):
def _read_datafile(self, first_byte, nitems):
"""
internal function for reading samples from datafile
"""
Expand All @@ -735,26 +768,25 @@ def _read_datafile(self, first_byte, nitems, autoscale, raw_components):
fp.seek(first_byte, 0)
data = np.fromfile(fp, dtype=data_type_in, count=nitems)
elif self.data_buffer is not None:
data = np.frombuffer(self.data_buffer.getbuffer(), dtype=data_type_in, count=nitems)
# handle offset for data_buffer like we do for data_file
buffer_data = self.data_buffer.getbuffer()[first_byte:]
data = np.frombuffer(buffer_data, dtype=data_type_in, count=nitems)
else:
data = self._memmap

if num_channels != 1:
# return reshaped view for num_channels
# first dimension will be double size if `is_complex_data`
data = data.reshape(data.shape[0] // num_channels, num_channels)
if not raw_components:
data = data.astype(data_type_out)
if autoscale and is_fixedpoint_data:
data = data.view(np.dtype("f4"))
if is_unsigned_data:
data -= 2 ** (component_size * 8 - 1)
data *= 2 ** -(component_size * 8 - 1)
data = data.view(data_type_out)
if self.is_complex_data:
data = data.view(np.complex64)
else:
data = data.view(component_type_in)
data = data.astype(data_type_out)
if self.autoscale and is_fixedpoint_data:
data = data.view(np.dtype("f4"))
if is_unsigned_data:
data -= 2 ** (component_size * 8 - 1)
data *= 2 ** -(component_size * 8 - 1)
data = data.view(data_type_out)
if self.is_complex_data:
data = data.view(np.complex64)

if self.data_file is not None:
fp.close()
Expand Down Expand Up @@ -1053,29 +1085,51 @@ def get_dataset_filename_from_metadata(meta_fn, metadata=None):
return None


def fromarchive(archive_path, dir=None, skip_checksum=False):
def fromarchive(archive_path, dir=None, skip_checksum=False, autoscale=True):
"""Extract an archive and return a SigMFFile.

The `dir` parameter is no longer used as this function has been changed to
access SigMF archives without extracting them.

Parameters
----------
archive_path: str
Path to `sigmf-archive` tarball.
dir: str, optional
No longer used. Kept for compatibility.
skip_checksum: bool, default False
Skip dataset checksum calculation.
autoscale: bool, default True
If dataset is in a fixed-point representation, scale samples from (min, max) to (-1.0, 1.0).

Returns
-------
SigMFFile
Instance created from archive.
"""
from .archivereader import SigMFArchiveReader
return SigMFArchiveReader(archive_path, skip_checksum=skip_checksum).sigmffile

return SigMFArchiveReader(archive_path, skip_checksum=skip_checksum, autoscale=autoscale).sigmffile

def fromfile(filename, skip_checksum=False):

def fromfile(filename, skip_checksum=False, autoscale=True):
"""
Creates and returns a SigMFFile or SigMFCollection instance with metadata
loaded from the specified file. The filename may be that of either a
sigmf-meta file, a sigmf-data file, a sigmf-collection file, or a sigmf
archive.
Creates and returns a SigMFFile or SigMFCollection instance with metadata loaded from the specified file.

The file can be one of:
* A SigMF Metadata file (.sigmf-meta)
* A SigMF Dataset file (.sigmf-data)
* A SigMF Collection file (.sigmf-collection)
* A SigMF Archive file (.sigmf-archive)

Parameters
----------
filename: str | bytes | PathLike
Path for SigMF Metadata, Dataset, Archive or Collection (with or without extension).
skip_checksum: bool, default False
When True will not read entire dataset to calculate hash.
autoscale: bool, default True
If dataset is in a fixed-point representation, scale samples from (min, max) to (-1.0, 1.0).

Returns
-------
Expand All @@ -1092,7 +1146,7 @@ def fromfile(filename, skip_checksum=False):
ext = file_path.suffix

if (ext.lower().endswith(SIGMF_ARCHIVE_EXT) or not Path.is_file(meta_fn)) and Path.is_file(archive_fn):
return fromarchive(archive_fn, skip_checksum=skip_checksum)
return fromarchive(archive_fn, skip_checksum=skip_checksum, autoscale=autoscale)

if (ext.lower().endswith(SIGMF_COLLECTION_EXT) or not Path.is_file(meta_fn)) and Path.is_file(collection_fn):
collection_fp = open(collection_fn, "rb")
Expand All @@ -1112,7 +1166,7 @@ def fromfile(filename, skip_checksum=False):
meta_fp.close()

data_fn = get_dataset_filename_from_metadata(meta_fn, metadata)
return SigMFFile(metadata=metadata, data_file=data_fn, skip_checksum=skip_checksum)
return SigMFFile(metadata=metadata, data_file=data_fn, skip_checksum=skip_checksum, autoscale=autoscale)


def get_sigmf_filenames(filename):
Expand Down
Loading