Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,5 @@ install: check_prereqs
python3 -m pip install -e '.[dev]'

test: install
pylint singer -d missing-docstring,broad-except,bare-except,too-many-return-statements,too-many-branches,too-many-arguments,no-else-return,too-few-public-methods,fixme,protected-access
pylint singer --extension-pkg-whitelist=ciso8601 -d missing-docstring,broad-except,bare-except,too-many-return-statements,too-many-branches,too-many-arguments,no-else-return,too-few-public-methods,fixme,protected-access
nosetests --with-doctest -v
36 changes: 24 additions & 12 deletions singer/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,16 @@ def unix_seconds_to_datetime(value):
return strftime(datetime.datetime.fromtimestamp(int(value), datetime.timezone.utc))


def breadcrumb_path(breadcrumb):
"""
Transform breadcrumb into familiar object dot-notation
"""
name = ".".join(breadcrumb)
name = name.replace('properties.', '')
name = name.replace('.items', '[]')
return name


class SchemaMismatch(Exception):
def __init__(self, errors):
if not errors:
Expand All @@ -46,7 +56,7 @@ def __init__(self, errors):
msg = "Errors during transform\n\t{}".format("\n\t".join(estrs))
msg += "\n\n\nErrors during transform: [{}]".format(", ".join(estrs))

super(SchemaMismatch, self).__init__(msg)
super().__init__(msg)

class SchemaKey:
ref = "$ref"
Expand Down Expand Up @@ -110,25 +120,27 @@ def __enter__(self):
def __exit__(self, *args):
self.log_warning()

def filter_data_by_metadata(self, data, metadata):
def filter_data_by_metadata(self, data, metadata, parent=()):
if isinstance(data, dict) and metadata:
for field_name in list(data.keys()):
selected = singer.metadata.get(metadata, ('properties', field_name), 'selected')
inclusion = singer.metadata.get(metadata, ('properties', field_name), 'inclusion')
breadcrumb = parent + ('properties', field_name)
selected = singer.metadata.get(metadata, breadcrumb, 'selected')
inclusion = singer.metadata.get(metadata, breadcrumb, 'inclusion')
if inclusion == 'automatic':
continue

if selected is False:
if (selected is False) or (inclusion == 'unsupported'):
data.pop(field_name, None)
# Track that a field was filtered because the customer
# didn't select it.
self.filtered.add(field_name)
# didn't select it or the tap declared it as unsupported.
self.filtered.add(breadcrumb_path(breadcrumb))
else:
data[field_name] = self.filter_data_by_metadata(
data[field_name], metadata, breadcrumb)

if inclusion == 'unsupported':
data.pop(field_name, None)
# Track that the field was filtered because the tap
# declared it as unsupported.
self.filtered.add(field_name)
if isinstance(data, list) and metadata:
breadcrumb = parent + ('items',)
data = [self.filter_data_by_metadata(d, metadata, breadcrumb) for d in data]

return data

Expand Down
43 changes: 43 additions & 0 deletions tests/test_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,49 @@ def test_drops_fields_which_are_unsupported(self):
dict_value = {"name": "chicken"}
self.assertEqual({}, transform(dict_value, schema, NO_INTEGER_DATETIME_PARSING, metadata=metadata))

def test_drops_nested_object_fields_which_are_unselected(self):
schema = {"type": "object",
"properties": {"addr": {"type": "object",
"properties": {"addr1": {"type": "string"},
"city": {"type": "string"},
"state": {"type": "string"},
'amount': {'type': 'integer'}}}}}
metadata = {
('properties','addr'): {"selected": True},
('properties','addr', 'properties','amount'): {"selected": False}
}
data = {'addr':
{'addr1': 'address_1', 'city': 'city_1', 'state': 'state_1', 'amount': '123'}
}
expected = {'addr':
{'addr1': 'address_1', 'city': 'city_1', 'state': 'state_1'},
}
self.assertDictEqual(expected, transform(data, schema, NO_INTEGER_DATETIME_PARSING, metadata=metadata))

def test_drops_nested_array_fields_which_are_unselected(self):
schema = {"type": "object",
"properties": {"addrs": {"type": "array",
"items": {"type": "object",
"properties": {"addr1": {"type": "string"},
"city": {"type": "string"},
"state": {"type": "string"},
'amount': {'type': 'integer'}}}}}}
metadata = {
('properties','addrs'): {"selected": True},
('properties','addrs','items','properties','amount'): {"selected": False}
}
data = {'addrs': [
{'addr1': 'address_1', 'city': 'city_1', 'state': 'state_1', 'amount': '123'},
{'addr1': 'address_2', 'city': 'city_2', 'state': 'state_2', 'amount': '456'}
]
}
expected = {'addrs': [
{'addr1': 'address_1', 'city': 'city_1', 'state': 'state_1'},
{'addr1': 'address_2', 'city': 'city_2', 'state': 'state_2'}
]
}
self.assertDictEqual(expected, transform(data, schema, NO_INTEGER_DATETIME_PARSING, metadata=metadata))

class TestResolveSchemaReferences(unittest.TestCase):
def test_internal_refs_resolve(self):
schema = {"type": "object",
Expand Down