Skip to content

Commit 23a2b90

Browse files
committed
DEVEXP Can now write documents with metadata
1 parent 1e1ad80 commit 23a2b90

File tree

5 files changed

+264
-58
lines changed

5 files changed

+264
-58
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,4 @@ env
44
__pycache__
55
.venv
66
venv
7+
.idea

marklogic/documents.py

Lines changed: 57 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import json
2+
23
from requests import Session
34
from urllib3.fields import RequestField
45
from urllib3.filepost import encode_multipart_formdata
@@ -9,6 +10,12 @@ class Document:
910
:param uri: the URI of the document; can be None when relying on MarkLogic to
1011
generate a URI.
1112
:param content: the content of the document.
13+
:param collections: array of collection URIs.
14+
:param permissions: dict with keys of role names and values of arrays of
15+
capabilities such as "read", "update", and "execute".
16+
:param quality: document quality, used for scoring in searches.
17+
:param metadata_values: dict with string keys and string values.
18+
:param properties: dict with string keys and values of any type.
1219
:param content_type: the MIME type of the document; use when MarkLogic cannot
1320
determine the MIME type based on the URI.
1421
:param extension: specifies a suffix for a URI generated by MarkLogic.
@@ -26,6 +33,11 @@ def __init__(
2633
self,
2734
uri: str,
2835
content,
36+
collections: list[str] = None,
37+
permissions: dict = None,
38+
quality: int = None,
39+
metadata_values: dict = None,
40+
properties: dict = None,
2941
content_type: str = None,
3042
extension: str = None,
3143
directory: str = None,
@@ -36,6 +48,13 @@ def __init__(
3648
):
3749
self.uri = uri
3850
self.content = content
51+
self.collections = collections
52+
self.permissions = permissions
53+
self.quality = quality
54+
self.metadata_values = metadata_values
55+
self.properties = properties
56+
57+
# The following are all specific to writing a document.
3958
self.content_type = content_type
4059
self.extension = extension
4160
self.directory = directory
@@ -55,6 +74,40 @@ def to_request_field(self) -> RequestField:
5574
)
5675
return field
5776

77+
def to_metadata_dict(self) -> dict:
78+
"""
79+
Returns a dictionary with a data structure matching what the /v1/documents
80+
endpoint requires.
81+
"""
82+
metadata = {}
83+
if self.permissions:
84+
metadata["permissions"] = [
85+
{"role-name": k, "capabilities": v} for k, v in self.permissions.items()
86+
]
87+
if self.collections:
88+
metadata["collections"] = self.collections
89+
if self.quality:
90+
metadata["quality"] = self.quality
91+
if self.properties:
92+
metadata["properties"] = self.properties
93+
if self.metadata_values:
94+
metadata["metadataValues"] = self.metadata_values
95+
return metadata
96+
97+
def to_metadata_request_field(self) -> RequestField:
98+
metadata = self.to_metadata_dict()
99+
if len(metadata.keys()) == 0:
100+
return None
101+
102+
field = RequestField(
103+
name=self.uri, data=json.dumps(metadata), filename=self.uri
104+
)
105+
field.make_multipart(
106+
content_disposition=f"attachment; filename={self.uri}; category=metadata",
107+
content_type="application/json",
108+
)
109+
return field
110+
58111
def _make_disposition(self) -> str:
59112
disposition = "attachment"
60113

@@ -85,8 +138,11 @@ def __init__(self, session: Session):
85138
self._session = session
86139

87140
def write(self, documents: list[Document], **kwargs):
88-
fields = [self._make_default_metadata_field()]
141+
fields = []
89142
for doc in documents:
143+
metadata_field = doc.to_metadata_request_field()
144+
if metadata_field:
145+
fields.append(metadata_field)
90146
fields.append(doc.to_request_field())
91147

92148
data, content_type = encode_multipart_formdata(fields)
@@ -99,27 +155,3 @@ def write(self, documents: list[Document], **kwargs):
99155
headers["Accept"] = "application/json"
100156

101157
return self._session.post("/v1/documents", data=data, headers=headers, **kwargs)
102-
103-
def _make_default_metadata_field(self):
104-
"""
105-
Temporary method to ensure the test user can see written documents. Will be
106-
removed when this feature is implemented for real.
107-
"""
108-
metadata_field = RequestField(
109-
name="request-metadata",
110-
data=json.dumps(
111-
{
112-
"permissions": [
113-
{
114-
"role-name": "python-tester",
115-
"capabilities": ["read", "update"],
116-
}
117-
]
118-
}
119-
),
120-
)
121-
metadata_field.make_multipart(
122-
content_disposition="inline; category=metadata",
123-
content_type="application/json",
124-
)
125-
return metadata_field

tests/conftest.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,3 +44,21 @@ def cloud_config():
4444
"host": "support.test.marklogic.cloud",
4545
"key": "changeme",
4646
}
47+
48+
49+
@pytest.fixture(autouse=True)
50+
def prepare_test_database(admin_client: Client):
51+
"""
52+
Deletes any documents created by other tests to ensure a 'clean' database before a
53+
test runs. Does not delete documents in the 'test-data' collection which is intended
54+
to contain all the documents loaded by the test-app. A user with the 'admin' role
55+
is used so that temporal documents can be deleted.
56+
"""
57+
query = "cts:uris((), (), cts:not-query(cts:collection-query('test-data'))) \
58+
! xdmp:document-delete(.)"
59+
response = admin_client.post(
60+
"v1/eval",
61+
headers={"Content-type": "application/x-www-form-urlencoded"},
62+
data={"xquery": query},
63+
)
64+
assert 200 == response.status_code

tests/test_write_documents.py

Lines changed: 81 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -3,31 +3,15 @@
33
from marklogic import Client
44
from marklogic.documents import Document
55

6-
7-
@pytest.fixture(autouse=True)
8-
def prepare_test_database(admin_client: Client):
9-
"""
10-
Deletes any documents created by other tests to ensure a 'clean' database before a
11-
test runs. Does not delete documents in the 'test-data' collection which is intended
12-
to contain all the documents loaded by the test-app. A user with the 'admin' role
13-
is used so that temporal documents can be deleted.
14-
"""
15-
query = "cts:uris((), (), cts:not-query(cts:collection-query('test-data'))) \
16-
! xdmp:document-delete(.)"
17-
response = admin_client.post(
18-
"v1/eval",
19-
headers={"Content-type": "application/x-www-form-urlencoded"},
20-
data={"xquery": query},
21-
)
22-
assert 200 == response.status_code
6+
DEFAULT_PERMS = {"python-tester": ["read", "update"]}
237

248

259
def test_write_json(client: Client):
2610
# Verifies that JSON can be either a dict or a string.
2711
response = client.documents.write(
2812
[
29-
Document("/temp/doc1.json", {"doc": 1}),
30-
Document("/temp/doc2.json", '{"doc": 2}'),
13+
Document("/temp/doc1.json", {"doc": 1}, permissions=DEFAULT_PERMS),
14+
Document("/temp/doc2.json", '{"doc": 2}', permissions=DEFAULT_PERMS),
3115
]
3216
)
3317

@@ -48,8 +32,8 @@ def test_return_xml(client: Client):
4832
the Content-type to multipart/mixed.
4933
"""
5034
docs = [
51-
Document("/temp/doc1.json", {"doc": 1}),
52-
Document("/temp/doc2.json", {"doc": 2}),
35+
Document("/temp/doc1.json", {"doc": 1}, permissions=DEFAULT_PERMS),
36+
Document("/temp/doc2.json", {"doc": 2}, permissions=DEFAULT_PERMS),
5337
]
5438
response = client.documents.write(docs, headers={"Accept": "application/xml"})
5539

@@ -60,8 +44,8 @@ def test_return_xml(client: Client):
6044
def test_write_json_and_xml(client: Client):
6145
response = client.documents.write(
6246
[
63-
Document("/temp/doc1.json", {"doc": 1}),
64-
Document("/temp/doc2.xml", "<doc>2</doc>"),
47+
Document("/temp/doc1.json", {"doc": 1}, permissions=DEFAULT_PERMS),
48+
Document("/temp/doc2.xml", "<doc>2</doc>", permissions=DEFAULT_PERMS),
6549
]
6650
)
6751
assert 200 == response.status_code
@@ -79,8 +63,18 @@ def test_content_types(client: Client):
7963
"""
8064
response = client.documents.write(
8165
[
82-
Document("/temp/doc1", {"doc": 1}, content_type="application/json"),
83-
Document("/temp/doc2", "<doc>2</doc>", content_type="application/xml"),
66+
Document(
67+
"/temp/doc1",
68+
{"doc": 1},
69+
content_type="application/json",
70+
permissions=DEFAULT_PERMS,
71+
),
72+
Document(
73+
"/temp/doc2",
74+
"<doc>2</doc>",
75+
content_type="application/xml",
76+
permissions=DEFAULT_PERMS,
77+
),
8478
]
8579
)
8680
assert 200 == response.status_code
@@ -92,16 +86,27 @@ def test_content_types(client: Client):
9286

9387

9488
def test_single_doc(client):
95-
response = client.documents.write([Document("/temp/doc1.json", {"doc": 1})])
89+
response = client.documents.write(
90+
[Document("/temp/doc1.json", {"doc": 1}, permissions=DEFAULT_PERMS)]
91+
)
9692
assert 200 == response.status_code
9793

9894
doc1 = client.get("v1/documents?uri=/temp/doc1.json").json()
9995
assert 1 == doc1["doc"]
10096

10197

98+
@pytest.mark.skip("Will get this working when supporting batch-level metadata")
10299
def test_server_generated_uri(client):
103100
response = client.documents.write(
104-
[Document(None, {"doc": "serveruri"}, extension=".json", directory="/temp/")]
101+
[
102+
Document(
103+
None,
104+
{"doc": "serveruri"},
105+
extension=".json",
106+
directory="/temp/",
107+
permissions=DEFAULT_PERMS,
108+
)
109+
]
105110
)
106111
assert 200 == response.status_code
107112

@@ -116,7 +121,14 @@ def test_server_generated_uri(client):
116121

117122
def test_repair_xml(client):
118123
response = client.documents.write(
119-
[Document("/temp/doc1.xml", "<doc>needs <b>closing tag</doc>", repair="full")]
124+
[
125+
Document(
126+
"/temp/doc1.xml",
127+
"<doc>needs <b>closing tag</doc>",
128+
repair="full",
129+
permissions=DEFAULT_PERMS,
130+
)
131+
]
120132
)
121133
assert 200 == response.status_code
122134

@@ -128,14 +140,25 @@ def test_repair_xml(client):
128140
def test_extract_binary(client):
129141
content = "MarkLogic and Python".encode("ascii")
130142
response = client.documents.write(
131-
[Document("/temp/doc1.bin", content, extract="properties")]
143+
[
144+
Document(
145+
"/temp/doc1.bin",
146+
content,
147+
extract="properties",
148+
permissions=DEFAULT_PERMS,
149+
)
150+
]
132151
)
133152
assert 200 == response.status_code
134153

135154

136155
def test_optimistic_locking(client):
137156
response = client.documents.write(
138-
[Document("/temp/doc1.json", {"content": "original"})]
157+
[
158+
Document(
159+
"/temp/doc1.json", {"content": "original"}, permissions=DEFAULT_PERMS
160+
)
161+
]
139162
)
140163
assert 200 == response.status_code
141164

@@ -144,7 +167,14 @@ def test_optimistic_locking(client):
144167

145168
# Update the document, passing in the current version_id based on the ETag.
146169
response = client.documents.write(
147-
[Document("/temp/doc1.json", {"content": "updated!"}, version_id=etag)]
170+
[
171+
Document(
172+
"/temp/doc1.json",
173+
{"content": "updated!"},
174+
version_id=etag,
175+
permissions=DEFAULT_PERMS,
176+
)
177+
]
148178
)
149179
assert 200 == response.status_code
150180

@@ -154,7 +184,14 @@ def test_optimistic_locking(client):
154184

155185
# Next update should fail since the ETag is no longer the current version.
156186
response = client.documents.write(
157-
[Document("/temp/doc1.json", {"this": "should fail"}, version_id=etag)]
187+
[
188+
Document(
189+
"/temp/doc1.json",
190+
{"this": "should fail"},
191+
version_id=etag,
192+
permissions=DEFAULT_PERMS,
193+
)
194+
]
158195
)
159196
assert 412 == response.status_code, "412 is returned when the versionId is invalid."
160197
assert response.text.__contains__("RESTAPI-CONTENTWRONGVERSION")
@@ -170,7 +207,14 @@ def test_temporal_doc(client):
170207
}
171208

172209
response = client.documents.write(
173-
[Document("/temp/doc1.json", content, temporal_document="custom1")],
210+
[
211+
Document(
212+
"/temp/doc1.json",
213+
content,
214+
temporal_document="custom1",
215+
permissions=DEFAULT_PERMS,
216+
)
217+
],
174218
params={"temporal-collection": "temporal-collection"},
175219
)
176220
assert 200 == response.status_code
@@ -180,3 +224,7 @@ def test_temporal_doc(client):
180224
data = client.get("/v1/search?collection=custom1&format=json").json()
181225
assert 1 == data["total"]
182226
assert "/temp/doc1.json" == data["results"][0]["uri"]
227+
228+
229+
def test_metadata_no_content(client: Client):
230+
print("TODO!")

0 commit comments

Comments
 (0)