@@ -267,10 +267,56 @@ def _extract_values_from_header(part) -> dict:
267267 }
268268
269269
270+ def multipart_response_to_documents (response : Response ) -> list [Document ]:
271+ """
272+ Returns a list of Documents, one for each URI found in the various parts in the
273+ given multipart response. The response is assumed to correspond to the structure
274+ defined by https://docs.marklogic.com/REST/GET/v1/documents when the Accept header
275+ is "multipart/mixed".
276+ """
277+ decoder = MultipartDecoder .from_response (response )
278+
279+ uris_to_documents = OrderedDict ()
280+
281+ for part in decoder .parts :
282+ header_values = _extract_values_from_header (part )
283+ uri = header_values ["uri" ]
284+ if header_values ["category" ] == "content" :
285+ content = (
286+ json .loads (part .content )
287+ if header_values ["content_type" ] == "application/json"
288+ else part .content
289+ )
290+ content_type = header_values ["content_type" ]
291+ version_id = header_values ["version_id" ]
292+ if uris_to_documents .get (uri ):
293+ doc : Document = uris_to_documents [uri ]
294+ doc .content = content
295+ doc .content_type = content_type
296+ doc .version_id = version_id
297+ else :
298+ uris_to_documents [uri ] = Document (
299+ uri , content , content_type = content_type , version_id = version_id
300+ )
301+ else :
302+ doc = (
303+ uris_to_documents [uri ]
304+ if uris_to_documents .get (uri )
305+ else Document (uri , None )
306+ )
307+ uris_to_documents [uri ] = doc
308+ dict_to_metadata (json .loads (part .content ), doc )
309+
310+ return list (uris_to_documents .values ())
311+
312+
270313class DocumentManager :
271314 """
272- Provides methods to simplify interacting with the /v1/documents REST endpoint
273- defined at https://docs.marklogic.com/REST/client/management.
315+ Provides methods to simplify interacting with REST endpoints that either accept
316+ or return documents. Primarily involves endpoints defined at
317+ https://docs.marklogic.com/REST/client/management , but also includes support for
318+ the search endpoint at https://docs.marklogic.com/REST/POST/v1/search which can
319+ return documents as well.
274320 """
275321
276322 def __init__ (self , session : Session ):
@@ -311,11 +357,18 @@ def write(
311357
312358 return self ._session .post ("/v1/documents" , data = data , headers = headers , ** kwargs )
313359
314- def _get_multipart_documents_response (
315- self , uris : list [str ], categories : list [str ], ** kwargs
316- ) -> Response :
360+ def read (
361+ self , uris : list [str ], categories : list [str ] = None , ** kwargs
362+ ) -> Union [ list [ Document ], Response ] :
317363 """
318- Constructs and sends a multipart/mixed request to the v1/documents endpoint.
364+ Read one or many documents via a GET to the endpoint defined at
365+ https://docs.marklogic.com/REST/POST/v1/documents . If a 200 is not returned
366+ by that endpoint, then the Response is returned instead.
367+
368+ :param uris: list of URIs to read.
369+ :param categories: optional list of the categories of data to return for each
370+ URI. By default, only content will be returned for each URI. See the endpoint
371+ documentation for further information.
319372 """
320373 params = kwargs .pop ("params" , {})
321374 params ["uri" ] = uris
@@ -325,59 +378,92 @@ def _get_multipart_documents_response(
325378
326379 headers = kwargs .pop ("headers" , {})
327380 headers ["Accept" ] = "multipart/mixed"
328- return self ._session .get (
381+ response = self ._session .get (
329382 "/v1/documents" , params = params , headers = headers , ** kwargs
330383 )
331384
332- def read (
333- self , uris : list [str ], categories : list [str ] = None , ** kwargs
385+ return (
386+ multipart_response_to_documents (response )
387+ if response .status_code == 200
388+ else response
389+ )
390+
391+ def search (
392+ self ,
393+ query : Union [dict , str ] = None ,
394+ categories : list [str ] = None ,
395+ q : str = None ,
396+ start : int = None ,
397+ page_length : int = None ,
398+ options : str = None ,
399+ collections : list [str ] = None ,
400+ ** kwargs ,
334401 ) -> Union [list [Document ], Response ]:
335402 """
336- Read one or many documents via a GET to the endpoint defined at
337- https://docs.marklogic.com/REST/POST/v1/documents . If a 200 is not returned
338- by that endpoint, then the Response is returned instead.
339-
340- :param uris: list of URIs to read.
403+ Leverages the support in the search endpoint defined at
404+ https://docs.marklogic.com/REST/POST/v1/search for returning a list of
405+ documents instead of a search response. Parameters that are commonly used for
406+ that endpoint are included as arguments to this method for ease of use.
407+
408+ :param query: JSON or XML query matching one of the types supported by the
409+ search endpoint. The "Content-type" header will be set based on whether this
410+ is a dict, a string of JSON, or a string of XML.
341411 :param categories: optional list of the categories of data to return for each
342412 URI. By default, only content will be returned for each URI. See the endpoint
343413 documentation for further information.
414+ :param q: optional search string.
415+ :param start: index of the first result to return.
416+ :param page_length: maximum number of documents to return.
417+ :param options: name of a query options instance to use.
418+ :param collections: restrict results to documents in these collections.
344419 """
345- response = self ._get_multipart_documents_response (uris , categories , ** kwargs )
346- if response .status_code != 200 :
347- return response
348-
349- decoder = MultipartDecoder .from_response (response )
350-
351- # Use a dict to store URIs to Document objects so that we don't assume any
352- # order with how the metadata and content parts are returned. An OrderedDict is
353- # used to ensure that the order of the URIs is maintained, though the REST
354- # endpoint is not guaranteed to return them in the same order as provided by
355- # the user.
356- docs = OrderedDict ()
357-
358- for part in decoder .parts :
359- header_values = _extract_values_from_header (part )
360- uri = header_values ["uri" ]
361- if header_values ["category" ] == "content" :
362- content = (
363- json .loads (part .content )
364- if header_values ["content_type" ] == "application/json"
365- else part .content
366- )
367- content_type = header_values ["content_type" ]
368- version_id = header_values ["version_id" ]
369- if docs .get (uri ):
370- doc : Document = docs [uri ]
371- doc .content = content
372- doc .content_type = content_type
373- doc .version_id = version_id
374- else :
375- docs [uri ] = Document (
376- uri , content , content_type = content_type , version_id = version_id
377- )
378- else :
379- doc = docs [uri ] if docs .get (uri ) else Document (uri , None )
380- docs [uri ] = doc
381- dict_to_metadata (json .loads (part .content ), doc )
420+ params = kwargs .pop ("params" , {})
421+ params ["format" ] = "json" # This refers to the metadata format.
422+ if categories :
423+ params ["category" ] = categories
424+ if collections :
425+ params ["collection" ] = collections
426+ if q :
427+ params ["q" ] = q
428+ if start :
429+ params ["start" ] = start
430+ if page_length :
431+ params ["pageLength" ] = page_length
432+ if options :
433+ params ["options" ] = options
434+
435+ headers = kwargs .pop ("headers" , {})
436+ headers ["Accept" ] = "multipart/mixed"
437+ data = None
382438
383- return list (docs .values ())
439+ if query :
440+ if isinstance (query , dict ):
441+ data = json .dumps (query )
442+ headers ["Content-type" ] = "application/json"
443+ else :
444+ data = query
445+ try :
446+ json .loads (query )
447+ except Exception :
448+ headers ["Content-type" ] = "application/xml"
449+ else :
450+ headers ["Content-type" ] = "application/json"
451+
452+ if data :
453+ response = self ._session .post (
454+ "/v1/search" ,
455+ headers = headers ,
456+ params = params ,
457+ data = data ,
458+ ** kwargs ,
459+ )
460+ else :
461+ response = self ._session .post (
462+ "/v1/search" , headers = headers , params = params , ** kwargs
463+ )
464+
465+ return (
466+ multipart_response_to_documents (response )
467+ if response .status_code == 200
468+ else response
469+ )
0 commit comments