Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 15 additions & 10 deletions csvwlib/converter/ModelConverter.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def __init__(self, csv_url=None, metadata_url=None):
self.csvs = None
self.values_valiator = None
self.metadata_url = metadata_url
self.start_url = csv_url if csv_url is not None else metadata_url
self.start_url = csv_url if csv_url is not None else (metadata_url if not isinstance(metadata_url,dict) else metadata_url.get('url'))
self.metadata = None
self.atdm = {'@type': '@AnnotatedTableGroup'}
self.mode = CONST_STANDARD_MODE
Expand All @@ -34,6 +34,8 @@ def convert_to_atdm(self, mode=CONST_STANDARD_MODE):
metadata_validator = MetadataValidator(self.start_url)
self.mode = mode
self.metadata = MetadataLocator.find_and_get(self.csv_url, self.metadata_url)
if self.metadata_url and (isinstance(self.metadata_url,dict) or not self.metadata_url.startswith('http')):
self.metadata_url = "http://example.com/metadata"
self._normalize_metadata_base_url()
self._normalize_metadata_csv_url()
metadata_validator.validate_metadata(self.metadata)
Expand Down Expand Up @@ -75,14 +77,17 @@ def _add_table_metadata(table_metadata, table):
def _normalize_metadata_base_url(self):
if self.metadata is None:
return
for context_entry in self.metadata.get('@context',[]):
if type(context_entry) is dict and '@base' in context_entry:
original_url = self.metadata['url']
if original_url.startswith('http'):
directory, file_name = original_url.rsplit('/', 1)
self.metadata['url'] = directory + '/' + context_entry['@base'] + file_name
else:
self.metadata['url'] = context_entry['@base'] + self.metadata['url']
if isinstance(self.metadata,dict):
for context_entry in self.metadata.get('@context',[]):
if type(context_entry) is dict and '@base' in context_entry:
original_url = self.metadata["url"]
if original_url.startswith('http'):
directory, file_name = original_url.rsplit('/', 1)
self.metadata['url'] = directory + '/' + context_entry['@base'] + file_name
else:
self.metadata['url'] = context_entry['@base'] + self.metadata['url']
else:
print(f"Error: not dict, {self.metadata}")

def _normalize_metadata_csv_url(self):
""" Expands 'url' properties if necessary """
Expand All @@ -108,7 +113,7 @@ def _fetch_csvs(self):
CSVUtils.parse_csv_from_url_to_list(table['url'], self._delimiter(table)),
self.metadata['tables']))
else:
self.csvs = [CSVUtils.parse_csv_from_url_to_list(self.metadata['url'], self._delimiter(self.metadata))]
self.csvs = [CSVUtils.parse_csv_from_url_to_list(self.metadata.get('url'), self._delimiter(self.metadata))]

@staticmethod
def _delimiter(metadata):
Expand Down
2 changes: 1 addition & 1 deletion csvwlib/converter/ToRDFConverter.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def parse_virtual_columns(self, row_node, atdm_row, table_metadata):
self.graph.add((row_node, CSVW.describes, subject))
else:
print(f"term {virtual_column['propertyUrl']} not in namespaces")
S

def _add_file_metadata(self, metadata, node):
language = JSONLDUtils.language(self.metadata.get('@context',[]))
for key, value in metadata.items():
Expand Down
19 changes: 16 additions & 3 deletions csvwlib/utils/MetadataLocator.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import json as jsonlib

import requests
import requests, os


from csvwlib.utils.metadata import MetadataValidator
from csvwlib.utils.url.WellKnownUriResolver import WellKnownUriResolver
Expand All @@ -11,7 +12,20 @@ class MetadataLocator:
@staticmethod
def find_and_get(csv_url, metadata_url=None):
if metadata_url is not None:
return jsonlib.loads(requests.get(metadata_url).content.decode())
if isinstance(metadata_url, dict): # md already parsed
return jsonlib.loads(jsonlib.dumps(metadata_url))
try:
md = jsonlib.loads(metadata_url) # expect json?
if not isinstance(md,dict):
raise Exception('metadata not dict')
return md
except ValueError as e:
if metadata_url.startswith('http'): # if url
return jsonlib.loads(requests.get(metadata_url).content.decode())
elif os.path.exists(metadata_url): # expect local file?
with open(metadata_url,"r") as f:
return jsonlib.loads(f.read())
return None

response = requests.head(csv_url)
if 'Link' in response.headers and 'describedby' in response.links:
Expand All @@ -24,7 +38,6 @@ def find_and_get(csv_url, metadata_url=None):
metadata = MetadataLocator._retrieve_from_site_wide_conf(csv_url)
if metadata is not None:
return metadata

if '?' in csv_url:
csv_url, query = csv_url.split('?')
metadata_url = csv_url + '-metadata.json'
Expand Down
14 changes: 10 additions & 4 deletions csvwlib/utils/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,22 +75,26 @@ class MetadataValidator:
def __init__(self, start_url):
MetadataValidator.instance = self
self.metadata = {}
self.start_url = start_url
if isinstance(start_url,dict):
if 'url' in start_url.keys():
self.start_url = start_url['url']
else:
self.start_url = start_url
self.warnings = []
self.table = {}

def validate_metadata(self, metadata):
if metadata is None:
if metadata is None or not isinstance(metadata, dict):
return
self.metadata = metadata
if 'tableSchema' in metadata:
if 'tableSchema' in metadata.keys():
tables = [metadata]
else:
tables = metadata['tables']

for table in tables:
self.table = table
if 'tables' in metadata:
if 'tables' in metadata.keys():
self.check_member_property('tableGroup', metadata)
else:
self.check_member_property('table', metadata)
Expand All @@ -101,6 +105,8 @@ def validate_metadata(self, metadata):
self.check_titles(table)

def check_csv_reference(self, table, metadata):
if isinstance(self.start_url,dict):
return
if not self.start_url.endswith('.csv'):
return
if table['url'] != self.start_url:
Expand Down