marklogic
diff --git a/‎marklogic/documents.py‎
Lines changed: 152 additions & 22 deletions b/‎marklogic/documents.py‎
Lines changed: 152 additions & 22 deletions
diff --git a/‎test-app/src/main/ml-config/security/users/python-not-rest-user.json‎
Lines changed: 8 additions & 0 deletions b/‎test-app/src/main/ml-config/security/users/python-not-rest-user.json‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎test-app/src/main/ml-modules/transforms/envelope.sjs‎
Lines changed: 6 additions & 0 deletions b/‎test-app/src/main/ml-modules/transforms/envelope.sjs‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎tests/conftest.py‎
Lines changed: 11 additions & 3 deletions b/‎tests/conftest.py‎
Lines changed: 11 additions & 3 deletions
diff --git a/‎tests/test_get_documents.py‎
Lines changed: 0 additions & 54 deletions b/‎tests/test_get_documents.py‎
Lines changed: 0 additions & 54 deletions
@@ -1,7 +1,9 @@
 import json
+from collections import OrderedDict
 from typing import Union
 
 from requests import Response, Session
+from requests_toolbelt.multipart.decoder import MultipartDecoder
 from urllib3.fields import RequestField
 from urllib3.filepost import encode_multipart_formdata
 
@@ -63,27 +65,29 @@ def metadata_to_dict(metadata: Metadata) -> dict:
     return md
 
 
+def dict_to_metadata(metadata: dict, target_metadata: Metadata) -> None:
+    """
+    Populates the given Metadata instance based on the metadata dictionary as returned
+    by the /v1/documents REST endpoint.
+    """
+    target_metadata.collections = metadata.get("collections")
+    target_metadata.quality = metadata.get("quality")
+    target_metadata.metadata_values = metadata.get("metadataValues")
+    target_metadata.properties = metadata.get("properties")
+    if metadata.get("permissions"):
+        perms = {}
+        for perm in metadata["permissions"]:
+            role = perm["role-name"]
+            perms[role] = perm["capabilities"]
+        target_metadata.permissions = perms
+    else:
+        target_metadata.perms = None
+
+
 class Document(Metadata):
     """
-    :param uri: the URI of the document; can be None when relying on MarkLogic to
-    generate a URI.
-    :param content: the content of the document.
-    :param collections: see definition in parent class.
-    :param permissions: see definition in parent class.
-    :param quality: see definition in parent class.
-    :param metadata_values: see definition in parent class.
-    :param properties: see definition in parent class.
-    :param content_type: the MIME type of the document; use when MarkLogic cannot
-    determine the MIME type based on the URI.
-    :param extension: specifies a suffix for a URI generated by MarkLogic.
-    :param directory: specifies a prefix for a URI generated by MarkLogic.
-    :param repair: for an XML document, the level of XML repair to perform; can be
-    "full" or "none", with "none" being the default.
-    :param version_id: affects updates when optimistic locking is enabled; see
-    https://docs.marklogic.com/REST/POST/v1/documents for more information.
-    :param temporal_document: the logical document URI for a document written to a
-    temporal collection; requires that a "temporal-collection" parameter be included in
-    the request.
+    Represents a document, either as read from MarkLogic or as a document to be
+    written to MarkLogic.
     """
 
     def __init__(
@@ -96,24 +100,48 @@ def __init__(
         metadata_values: dict = None,
         properties: dict = None,
         content_type: str = None,
+        version_id: str = None,
         extension: str = None,
         directory: str = None,
         repair: str = None,
         extract: str = None,
-        version_id: str = None,
         temporal_document: str = None,
     ):
+        """
+        :param uri: the URI of the document; can be None when relying on MarkLogic to
+        generate a URI.
+        :param content: the content of the document.
+        :param collections: see definition in parent class.
+        :param permissions: see definition in parent class.
+        :param quality: see definition in parent class.
+        :param metadata_values: see definition in parent class.
+        :param properties: see definition in parent class.
+        :param content_type: the MIME type of the document; use when MarkLogic cannot
+        determine the MIME type based on the URI.
+        :param version_id: affects updates when optimistic locking is enabled; see
+        https://docs.marklogic.com/REST/POST/v1/documents for more information.
+        :param temporal_document: the logical document URI for a document written to a
+        :param extension: specifies a suffix for a URI generated by MarkLogic; only used
+        when writing a document.
+        :param directory: specifies a prefix for a URI generated by MarkLogic; only used
+        when writing a document.
+        :param repair: for an XML document, the level of XML repair to perform; can be
+        "full" or "none", with "none" being the default; only used when writing a
+        document.
+        temporal collection; requires that a "temporal-collection" parameter be
+        included in the request; only used when writing a document.
+        """
         super().__init__(collections, permissions, quality, metadata_values, properties)
         self.uri = uri
         self.content = content
+        self.content_type = content_type
+        self.version_id = version_id
 
         # The following are all specific to writing a document.
-        self.content_type = content_type
         self.extension = extension
         self.directory = directory
         self.repair = repair
         self.extract = extract
-        self.version_id = version_id
         self.temporal_document = temporal_document
 
     def to_request_field(self) -> RequestField:
@@ -208,6 +236,37 @@ def to_metadata_request_field(self) -> RequestField:
         return field
 
 
+def _extract_values_from_header(part) -> dict:
+    """
+    Returns a dict containing values about the document content or metadata.
+    """
+    encoding = part.encoding
+    disposition = part.headers["Content-Disposition".encode(encoding)].decode(encoding)
+    disposition_values = {}
+    for item in disposition.split(";"):
+        tokens = item.split("=")
+        # The first item will be "attachment" and can be ignored.
+        if len(tokens) == 2:
+            disposition_values[tokens[0].strip()] = tokens[1]
+
+    content_type = None
+    if part.headers.get("Content-Type".encode(encoding)):
+        content_type = part.headers["Content-Type".encode(encoding)].decode(encoding)
+
+    uri = disposition_values["filename"]
+    if uri.startswith('"'):
+        uri = uri[1:]
+    if uri.endswith('"'):
+        uri = uri[:-1]
+
+    return {
+        "uri": uri,
+        "category": disposition_values["category"],
+        "content_type": content_type,
+        "version_id": disposition_values.get("versionId"),
+    }
+
+
 class DocumentManager:
     """
     Provides methods to simplify interacting with the /v1/documents REST endpoint
@@ -251,3 +310,74 @@ def write(
             headers["Accept"] = "application/json"
 
         return self._session.post("/v1/documents", data=data, headers=headers, **kwargs)
+
+    def _get_multipart_documents_response(
+        self, uris: list[str], categories: list[str], **kwargs
+    ) -> Response:
+        """
+        Constructs and sends a multipart/mixed request to the v1/documents endpoint.
+        """
+        params = kwargs.pop("params", {})
+        params["uri"] = uris
+        params["format"] = "json"  # This refers to the metadata format.
+        if categories:
+            params["category"] = categories
+
+        headers = kwargs.pop("headers", {})
+        headers["Accept"] = "multipart/mixed"
+        return self._session.get(
+            "/v1/documents", params=params, headers=headers, **kwargs
+        )
+
+    def read(
+        self, uris: list[str], categories: list[str] = None, **kwargs
+    ) -> Union[list[Document], Response]:
+        """
+        Read one or many documents via a GET to the endpoint defined at
+        https://docs.marklogic.com/REST/POST/v1/documents . If a 200 is not returned
+        by that endpoint, then the Response is returned instead.
+
+        :param uris: list of URIs to read.
+        :param categories: optional list of the categories of data to return for each
+        URI. By default, only content will be returned for each URI. See the endpoint
+        documentation for further information.
+        """
+        response = self._get_multipart_documents_response(uris, categories, **kwargs)
+        if response.status_code != 200:
+            return response
+
+        decoder = MultipartDecoder.from_response(response)
+
+        # Use a dict to store URIs to Document objects so that we don't assume any
+        # order with how the metadata and content parts are returned. An OrderedDict is
+        # used to ensure that the order of the URIs is maintained, though the REST
+        # endpoint is not guaranteed to return them in the same order as provided by
+        # the user.
+        docs = OrderedDict()
+
+        for part in decoder.parts:
+            header_values = _extract_values_from_header(part)
+            uri = header_values["uri"]
+            if header_values["category"] == "content":
+                content = (
+                    json.loads(part.content)
+                    if header_values["content_type"] == "application/json"
+                    else part.content
+                )
+                content_type = header_values["content_type"]
+                version_id = header_values["version_id"]
+                if docs.get(uri):
+                    doc: Document = docs[uri]
+                    doc.content = content
+                    doc.content_type = content_type
+                    doc.version_id = version_id
+                else:
+                    docs[uri] = Document(
+                        uri, content, content_type=content_type, version_id=version_id
+                    )
+            else:
+                doc = docs[uri] if docs.get(uri) else Document(uri, None)
+                docs[uri] = doc
+                dict_to_metadata(json.loads(part.content), doc)
+
+        return list(docs.values())
@@ -0,0 +1,8 @@
+{
+    "user-name": "python-not-rest-user",
+    "description": "For tests where the user does not have the privileges required by the REST API.",
+    "password": "password",
+    "role": [
+        "qconsole-user"
+    ]
+}
@@ -0,0 +1,6 @@
+function transform(context, params, content) {
+    return {
+        "envelope": content
+    }
+};
+exports.transform = transform;
@@ -1,21 +1,29 @@
 import pytest
+
 from marklogic import Client
 
+BASE_URL = "http://localhost:8030"
+
 
 @pytest.fixture
 def client():
-    return Client("http://localhost:8030", digest=("python-test-user", "password"))
+    return Client(BASE_URL, digest=("python-test-user", "password"))
 
 
 @pytest.fixture
 def admin_client():
-    return Client("http://localhost:8030", digest=("python-test-admin", "password"))
+    return Client(BASE_URL, digest=("python-test-admin", "password"))
 
 
 @pytest.fixture
 def basic_client():
     # requests allows a tuple to be passed when doing basic authentication.
-    return Client("http://localhost:8030", auth=("python-test-user", "password"))
+    return Client(BASE_URL, auth=("python-test-user", "password"))
+
+
+@pytest.fixture
+def not_rest_user_client():
+    return Client(BASE_URL, digest=("python-not-rest-user", "password"))
 
 
 @pytest.fixture