Skip to content

Commit aee68d2

Browse files
authored
Merge pull request #11 from marklogic/feature/498-batch-metadata
DEVEXP-498 Can now define default metadata
2 parents f3305d4 + 6739fd9 commit aee68d2

File tree

2 files changed

+253
-91
lines changed

2 files changed

+253
-91
lines changed

marklogic/documents.py

Lines changed: 134 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,78 @@
11
import json
2+
from typing import Union
23

3-
from requests import Session
4+
from requests import Response, Session
45
from urllib3.fields import RequestField
56
from urllib3.filepost import encode_multipart_formdata
67

8+
"""
9+
Defines classes to simplify usage of the documents REST endpoint defined at
10+
https://docs.marklogic.com/REST/client/management.
11+
"""
712

8-
class Document:
13+
14+
class Metadata:
915
"""
10-
:param uri: the URI of the document; can be None when relying on MarkLogic to
11-
generate a URI.
12-
:param content: the content of the document.
16+
Defines the metadata properties that can be associated with a document and also
17+
used for specifying default metadata when writing many documents. One benefit
18+
of this class - besides encapsulating each bit of what MarkLogic defines as metadata
19+
for a document - is to provide a simpler mechanism for defining permissions via a
20+
dictionary as opposed to an array of dictionaries.
21+
1322
:param collections: array of collection URIs.
1423
:param permissions: dict with keys of role names and values of arrays of
1524
capabilities such as "read", "update", and "execute".
1625
:param quality: document quality, used for scoring in searches.
1726
:param metadata_values: dict with string keys and string values.
1827
:param properties: dict with string keys and values of any type.
28+
"""
29+
30+
def __init__(
31+
self,
32+
collections: list[str] = None,
33+
permissions: dict = None,
34+
quality: int = None,
35+
metadata_values: dict = None,
36+
properties: dict = None,
37+
):
38+
self.collections = collections
39+
self.permissions = permissions
40+
self.quality = quality
41+
self.metadata_values = metadata_values
42+
self.properties = properties
43+
44+
45+
def metadata_to_dict(metadata: Metadata) -> dict:
46+
"""
47+
Returns a dictionary with a structure matching what the /v1/documents endpoint
48+
requires.
49+
"""
50+
md = {}
51+
if metadata.permissions:
52+
md["permissions"] = [
53+
{"role-name": k, "capabilities": v} for k, v in metadata.permissions.items()
54+
]
55+
if metadata.collections:
56+
md["collections"] = metadata.collections
57+
if metadata.quality:
58+
md["quality"] = metadata.quality
59+
if metadata.properties:
60+
md["properties"] = metadata.properties
61+
if metadata.metadata_values:
62+
md["metadataValues"] = metadata.metadata_values
63+
return md
64+
65+
66+
class Document(Metadata):
67+
"""
68+
:param uri: the URI of the document; can be None when relying on MarkLogic to
69+
generate a URI.
70+
:param content: the content of the document.
71+
:param collections: see definition in parent class.
72+
:param permissions: see definition in parent class.
73+
:param quality: see definition in parent class.
74+
:param metadata_values: see definition in parent class.
75+
:param properties: see definition in parent class.
1976
:param content_type: the MIME type of the document; use when MarkLogic cannot
2077
determine the MIME type based on the URI.
2178
:param extension: specifies a suffix for a URI generated by MarkLogic.
@@ -46,13 +103,9 @@ def __init__(
46103
version_id: str = None,
47104
temporal_document: str = None,
48105
):
106+
super().__init__(collections, permissions, quality, metadata_values, properties)
49107
self.uri = uri
50108
self.content = content
51-
self.collections = collections
52-
self.permissions = permissions
53-
self.quality = quality
54-
self.metadata_values = metadata_values
55-
self.properties = properties
56109

57110
# The following are all specific to writing a document.
58111
self.content_type = content_type
@@ -64,38 +117,25 @@ def __init__(
64117
self.temporal_document = temporal_document
65118

66119
def to_request_field(self) -> RequestField:
120+
"""
121+
Returns a multipart request field representing the document to be written.
122+
"""
67123
data = self.content
68124
if type(data) is dict:
69125
data = json.dumps(data)
70126
field = RequestField(name=self.uri, data=data, filename=self.uri)
71127
field.make_multipart(
72-
content_disposition=self._make_disposition(),
128+
content_disposition=self._make_content_disposition(),
73129
content_type=self.content_type,
74130
)
75131
return field
76132

77-
def to_metadata_dict(self) -> dict:
133+
def to_metadata_request_field(self) -> RequestField:
78134
"""
79-
Returns a dictionary with a data structure matching what the /v1/documents
80-
endpoint requires.
135+
Returns a multipart request field if any metadata has been set on this
136+
document; returns None otherwise.
81137
"""
82-
metadata = {}
83-
if self.permissions:
84-
metadata["permissions"] = [
85-
{"role-name": k, "capabilities": v} for k, v in self.permissions.items()
86-
]
87-
if self.collections:
88-
metadata["collections"] = self.collections
89-
if self.quality:
90-
metadata["quality"] = self.quality
91-
if self.properties:
92-
metadata["properties"] = self.properties
93-
if self.metadata_values:
94-
metadata["metadataValues"] = self.metadata_values
95-
return metadata
96-
97-
def to_metadata_request_field(self) -> RequestField:
98-
metadata = self.to_metadata_dict()
138+
metadata = metadata_to_dict(self)
99139
if len(metadata.keys()) == 0:
100140
return None
101141

@@ -108,7 +148,12 @@ def to_metadata_request_field(self) -> RequestField:
108148
)
109149
return field
110150

111-
def _make_disposition(self) -> str:
151+
def _make_content_disposition(self) -> str:
152+
"""
153+
Returns a content disposition suitable for use when writing documents via
154+
https://docs.marklogic.com/REST/POST/v1/documents . See that page for more
155+
information on each part of the disposition.
156+
"""
112157
disposition = "attachment"
113158

114159
if not self.uri:
@@ -133,17 +178,68 @@ def _make_disposition(self) -> str:
133178
return disposition
134179

135180

181+
class DefaultMetadata(Metadata):
182+
"""
183+
Defines default metadata for use when writing many documents at one time.
184+
"""
185+
186+
def __init__(
187+
self,
188+
collections: list[str] = None,
189+
permissions: dict = None,
190+
quality: int = None,
191+
metadata_values: dict = None,
192+
properties: dict = None,
193+
):
194+
super().__init__(collections, permissions, quality, metadata_values, properties)
195+
196+
def to_metadata_request_field(self) -> RequestField:
197+
"""
198+
Returns a multipart request field suitable for use when writing many documents.
199+
"""
200+
metadata = metadata_to_dict(self)
201+
if len(metadata.keys()) == 0:
202+
return None
203+
field = RequestField(name=None, data=json.dumps(metadata), filename=None)
204+
field.make_multipart(
205+
content_disposition="inline; category=metadata",
206+
content_type="application/json",
207+
)
208+
return field
209+
210+
136211
class DocumentManager:
212+
"""
213+
Provides methods to simplify interacting with the /v1/documents REST endpoint
214+
defined at https://docs.marklogic.com/REST/client/management.
215+
"""
216+
137217
def __init__(self, session: Session):
138218
self._session = session
139219

140-
def write(self, documents: list[Document], **kwargs):
220+
def write(
221+
self, parts: list[Union[DefaultMetadata, Document]], **kwargs
222+
) -> Response:
223+
"""
224+
Write one or many documents at a time via a POST to the endpoint defined at
225+
https://docs.marklogic.com/REST/POST/v1/documents .
226+
227+
:param parts: a part can define either a document to be written, which can
228+
include metadata, or a set of default metadata to be applied to each document
229+
after it that does not define its own metadata. See
230+
https://docs.marklogic.com/guide/rest-dev/bulk#id_16015 for more information on
231+
how the REST endpoint uses metadata.
232+
"""
141233
fields = []
142-
for doc in documents:
143-
metadata_field = doc.to_metadata_request_field()
144-
if metadata_field:
145-
fields.append(metadata_field)
146-
fields.append(doc.to_request_field())
234+
235+
for part in parts:
236+
if isinstance(part, DefaultMetadata):
237+
fields.append(part.to_metadata_request_field())
238+
else:
239+
metadata_field = part.to_metadata_request_field()
240+
if metadata_field:
241+
fields.append(metadata_field)
242+
fields.append(part.to_request_field())
147243

148244
data, content_type = encode_multipart_formdata(fields)
149245

0 commit comments

Comments
 (0)