11import json
2+ from typing import Union
23
3- from requests import Session
4+ from requests import Response , Session
45from urllib3 .fields import RequestField
56from urllib3 .filepost import encode_multipart_formdata
67
8+ """
9+ Defines classes to simplify usage of the documents REST endpoint defined at
10+ https://docs.marklogic.com/REST/client/management.
11+ """
712
8- class Document :
13+
14+ class Metadata :
915 """
10- :param uri: the URI of the document; can be None when relying on MarkLogic to
11- generate a URI.
12- :param content: the content of the document.
16+ Defines the metadata properties that can be associated with a document and also
17+ used for specifying default metadata when writing many documents. One benefit
18+ of this class - besides encapsulating each bit of what MarkLogic defines as metadata
19+ for a document - is to provide a simpler mechanism for defining permissions via a
20+ dictionary as opposed to an array of dictionaries.
21+
1322 :param collections: array of collection URIs.
1423 :param permissions: dict with keys of role names and values of arrays of
1524 capabilities such as "read", "update", and "execute".
1625 :param quality: document quality, used for scoring in searches.
1726 :param metadata_values: dict with string keys and string values.
1827 :param properties: dict with string keys and values of any type.
28+ """
29+
30+ def __init__ (
31+ self ,
32+ collections : list [str ] = None ,
33+ permissions : dict = None ,
34+ quality : int = None ,
35+ metadata_values : dict = None ,
36+ properties : dict = None ,
37+ ):
38+ self .collections = collections
39+ self .permissions = permissions
40+ self .quality = quality
41+ self .metadata_values = metadata_values
42+ self .properties = properties
43+
44+
45+ def metadata_to_dict (metadata : Metadata ) -> dict :
46+ """
47+ Returns a dictionary with a structure matching what the /v1/documents endpoint
48+ requires.
49+ """
50+ md = {}
51+ if metadata .permissions :
52+ md ["permissions" ] = [
53+ {"role-name" : k , "capabilities" : v } for k , v in metadata .permissions .items ()
54+ ]
55+ if metadata .collections :
56+ md ["collections" ] = metadata .collections
57+ if metadata .quality :
58+ md ["quality" ] = metadata .quality
59+ if metadata .properties :
60+ md ["properties" ] = metadata .properties
61+ if metadata .metadata_values :
62+ md ["metadataValues" ] = metadata .metadata_values
63+ return md
64+
65+
66+ class Document (Metadata ):
67+ """
68+ :param uri: the URI of the document; can be None when relying on MarkLogic to
69+ generate a URI.
70+ :param content: the content of the document.
71+ :param collections: see definition in parent class.
72+ :param permissions: see definition in parent class.
73+ :param quality: see definition in parent class.
74+ :param metadata_values: see definition in parent class.
75+ :param properties: see definition in parent class.
1976 :param content_type: the MIME type of the document; use when MarkLogic cannot
2077 determine the MIME type based on the URI.
2178 :param extension: specifies a suffix for a URI generated by MarkLogic.
@@ -46,13 +103,9 @@ def __init__(
46103 version_id : str = None ,
47104 temporal_document : str = None ,
48105 ):
106+ super ().__init__ (collections , permissions , quality , metadata_values , properties )
49107 self .uri = uri
50108 self .content = content
51- self .collections = collections
52- self .permissions = permissions
53- self .quality = quality
54- self .metadata_values = metadata_values
55- self .properties = properties
56109
57110 # The following are all specific to writing a document.
58111 self .content_type = content_type
@@ -64,38 +117,25 @@ def __init__(
64117 self .temporal_document = temporal_document
65118
66119 def to_request_field (self ) -> RequestField :
120+ """
121+ Returns a multipart request field representing the document to be written.
122+ """
67123 data = self .content
68124 if type (data ) is dict :
69125 data = json .dumps (data )
70126 field = RequestField (name = self .uri , data = data , filename = self .uri )
71127 field .make_multipart (
72- content_disposition = self ._make_disposition (),
128+ content_disposition = self ._make_content_disposition (),
73129 content_type = self .content_type ,
74130 )
75131 return field
76132
77- def to_metadata_dict (self ) -> dict :
133+ def to_metadata_request_field (self ) -> RequestField :
78134 """
79- Returns a dictionary with a data structure matching what the /v1/documents
80- endpoint requires .
135+ Returns a multipart request field if any metadata has been set on this
136+ document; returns None otherwise .
81137 """
82- metadata = {}
83- if self .permissions :
84- metadata ["permissions" ] = [
85- {"role-name" : k , "capabilities" : v } for k , v in self .permissions .items ()
86- ]
87- if self .collections :
88- metadata ["collections" ] = self .collections
89- if self .quality :
90- metadata ["quality" ] = self .quality
91- if self .properties :
92- metadata ["properties" ] = self .properties
93- if self .metadata_values :
94- metadata ["metadataValues" ] = self .metadata_values
95- return metadata
96-
97- def to_metadata_request_field (self ) -> RequestField :
98- metadata = self .to_metadata_dict ()
138+ metadata = metadata_to_dict (self )
99139 if len (metadata .keys ()) == 0 :
100140 return None
101141
@@ -108,7 +148,12 @@ def to_metadata_request_field(self) -> RequestField:
108148 )
109149 return field
110150
111- def _make_disposition (self ) -> str :
151+ def _make_content_disposition (self ) -> str :
152+ """
153+ Returns a content disposition suitable for use when writing documents via
154+ https://docs.marklogic.com/REST/POST/v1/documents . See that page for more
155+ information on each part of the disposition.
156+ """
112157 disposition = "attachment"
113158
114159 if not self .uri :
@@ -133,17 +178,68 @@ def _make_disposition(self) -> str:
133178 return disposition
134179
135180
181+ class DefaultMetadata (Metadata ):
182+ """
183+ Defines default metadata for use when writing many documents at one time.
184+ """
185+
186+ def __init__ (
187+ self ,
188+ collections : list [str ] = None ,
189+ permissions : dict = None ,
190+ quality : int = None ,
191+ metadata_values : dict = None ,
192+ properties : dict = None ,
193+ ):
194+ super ().__init__ (collections , permissions , quality , metadata_values , properties )
195+
196+ def to_metadata_request_field (self ) -> RequestField :
197+ """
198+ Returns a multipart request field suitable for use when writing many documents.
199+ """
200+ metadata = metadata_to_dict (self )
201+ if len (metadata .keys ()) == 0 :
202+ return None
203+ field = RequestField (name = None , data = json .dumps (metadata ), filename = None )
204+ field .make_multipart (
205+ content_disposition = "inline; category=metadata" ,
206+ content_type = "application/json" ,
207+ )
208+ return field
209+
210+
136211class DocumentManager :
212+ """
213+ Provides methods to simplify interacting with the /v1/documents REST endpoint
214+ defined at https://docs.marklogic.com/REST/client/management.
215+ """
216+
137217 def __init__ (self , session : Session ):
138218 self ._session = session
139219
140- def write (self , documents : list [Document ], ** kwargs ):
220+ def write (
221+ self , parts : list [Union [DefaultMetadata , Document ]], ** kwargs
222+ ) -> Response :
223+ """
224+ Write one or many documents at a time via a POST to the endpoint defined at
225+ https://docs.marklogic.com/REST/POST/v1/documents .
226+
227+ :param parts: a part can define either a document to be written, which can
228+ include metadata, or a set of default metadata to be applied to each document
229+ after it that does not define its own metadata. See
230+ https://docs.marklogic.com/guide/rest-dev/bulk#id_16015 for more information on
231+ how the REST endpoint uses metadata.
232+ """
141233 fields = []
142- for doc in documents :
143- metadata_field = doc .to_metadata_request_field ()
144- if metadata_field :
145- fields .append (metadata_field )
146- fields .append (doc .to_request_field ())
234+
235+ for part in parts :
236+ if isinstance (part , DefaultMetadata ):
237+ fields .append (part .to_metadata_request_field ())
238+ else :
239+ metadata_field = part .to_metadata_request_field ()
240+ if metadata_field :
241+ fields .append (metadata_field )
242+ fields .append (part .to_request_field ())
147243
148244 data , content_type = encode_multipart_formdata (fields )
149245
0 commit comments