Skip to content

Commit a191226

Browse files
authored
Merge pull request #15 from marklogic/feature/fixes
Small improvements and fixes
2 parents 02cbb5f + 371d9f5 commit a191226

File tree

4 files changed

+145
-124
lines changed

4 files changed

+145
-124
lines changed

marklogic/documents.py

Lines changed: 24 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -92,8 +92,8 @@ class Document(Metadata):
9292

9393
def __init__(
9494
self,
95-
uri: str,
96-
content,
95+
uri: str = None,
96+
content=None,
9797
collections: list[str] = None,
9898
permissions: dict = None,
9999
quality: int = None,
@@ -148,6 +148,8 @@ def to_request_field(self) -> RequestField:
148148
"""
149149
Returns a multipart request field representing the document to be written.
150150
"""
151+
if self.content is None:
152+
return None
151153
data = self.content
152154
if type(data) is dict:
153155
data = json.dumps(data)
@@ -282,13 +284,14 @@ def multipart_response_to_documents(response: Response) -> list[Document]:
282284
header_values = _extract_values_from_header(part)
283285
uri = header_values["uri"]
284286
if header_values["category"] == "content":
285-
content = (
286-
json.loads(part.content)
287-
if header_values["content_type"] == "application/json"
288-
else part.content
289-
)
290-
content_type = header_values["content_type"]
291-
version_id = header_values["version_id"]
287+
content = part.content
288+
content_type = header_values.get("content_type")
289+
if content_type == "application/json":
290+
content = json.loads(content)
291+
elif content_type in ["application/xml", "text/xml", "text/plain"]:
292+
content = content.decode(part.encoding)
293+
294+
version_id = header_values.get("version_id")
292295
if uris_to_documents.get(uri):
293296
doc: Document = uris_to_documents[uri]
294297
doc.content = content
@@ -323,7 +326,7 @@ def __init__(self, session: Session):
323326
self._session = session
324327

325328
def write(
326-
self, parts: list[Union[DefaultMetadata, Document]], **kwargs
329+
self, parts: Union[Document, list[Union[DefaultMetadata, Document]]], **kwargs
327330
) -> Response:
328331
"""
329332
Write one or many documents at a time via a POST to the endpoint defined at
@@ -337,14 +340,19 @@ def write(
337340
"""
338341
fields = []
339342

343+
if isinstance(parts, Document):
344+
parts = [parts]
345+
340346
for part in parts:
341347
if isinstance(part, DefaultMetadata):
342348
fields.append(part.to_metadata_request_field())
343349
else:
344350
metadata_field = part.to_metadata_request_field()
345351
if metadata_field:
346352
fields.append(metadata_field)
347-
fields.append(part.to_request_field())
353+
content_field = part.to_request_field()
354+
if content_field:
355+
fields.append(content_field)
348356

349357
data, content_type = encode_multipart_formdata(fields)
350358

@@ -358,20 +366,20 @@ def write(
358366
return self._session.post("/v1/documents", data=data, headers=headers, **kwargs)
359367

360368
def read(
361-
self, uris: list[str], categories: list[str] = None, **kwargs
369+
self, uris: Union[str, list[str]], categories: list[str] = None, **kwargs
362370
) -> Union[list[Document], Response]:
363371
"""
364372
Read one or many documents via a GET to the endpoint defined at
365373
https://docs.marklogic.com/REST/POST/v1/documents . If a 200 is not returned
366374
by that endpoint, then the Response is returned instead.
367375
368-
:param uris: list of URIs to read.
376+
:param uris: list of URIs or a single URI to read.
369377
:param categories: optional list of the categories of data to return for each
370378
URI. By default, only content will be returned for each URI. See the endpoint
371379
documentation for further information.
372380
"""
373381
params = kwargs.pop("params", {})
374-
params["uri"] = uris
382+
params["uri"] = uris if isinstance(uris, list) else [uris]
375383
params["format"] = "json" # This refers to the metadata format.
376384
if categories:
377385
params["category"] = categories
@@ -405,8 +413,8 @@ def search(
405413
documents instead of a search response. Parameters that are commonly used for
406414
that endpoint are included as arguments to this method for ease of use.
407415
408-
:param query: JSON or XML query matching one of the types supported by the
409-
search endpoint. The "Content-type" header will be set based on whether this
416+
:param query: JSON or XML query matching one of the types supported by the
417+
search endpoint. The "Content-type" header will be set based on whether this
410418
is a dict, a string of JSON, or a string of XML.
411419
:param categories: optional list of the categories of data to return for each
412420
URI. By default, only content will be returned for each URI. See the endpoint

tests/test_read_documents.py

Lines changed: 38 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -9,38 +9,62 @@
99
def test_write_and_read_binary(client: Client):
1010
content = "MarkLogic and Python".encode("ascii")
1111
response = client.documents.write(
12-
[
13-
Document(
14-
"/temp/doc1.bin",
15-
content,
16-
permissions=DEFAULT_PERMS,
17-
)
18-
]
12+
Document(
13+
"/temp/doc1.bin",
14+
content,
15+
permissions=DEFAULT_PERMS,
16+
)
1917
)
2018
assert 200 == response.status_code
2119

22-
docs = client.documents.read(["/temp/doc1.bin"])
20+
docs = client.documents.read("/temp/doc1.bin")
2321
assert len(docs) == 1
2422
doc = docs[0]
2523
assert doc.uri == "/temp/doc1.bin"
2624
content = doc.content.decode("ascii")
2725
assert content == "MarkLogic and Python"
2826

2927

28+
def test_write_and_read_xml_document(client: Client):
29+
response = client.documents.write(
30+
Document("/doc1.xml", "<hello>world</hello>", permissions=DEFAULT_PERMS)
31+
)
32+
assert response.status_code == 200
33+
34+
doc = client.documents.read("/doc1.xml")[0]
35+
# Verify content was turned into a string
36+
assert "<hello>world</hello>" in doc.content
37+
38+
39+
def test_write_and_read_text_document(client: Client):
40+
response = client.documents.write(
41+
Document(
42+
"/doc1.txt",
43+
"hello world!",
44+
permissions=DEFAULT_PERMS,
45+
content_type="text/plain",
46+
)
47+
)
48+
assert response.status_code == 200
49+
50+
doc = client.documents.read("/doc1.txt")[0]
51+
assert doc.content == "hello world!"
52+
53+
3054
def test_read_uri_with_double_quotes(client: Client):
3155
uri = '/this/"works.json'
3256
response = client.documents.write(
33-
[Document(uri, {"hello": "world"}, permissions=DEFAULT_PERMS)]
57+
Document(uri, {"hello": "world"}, permissions=DEFAULT_PERMS)
3458
)
3559
assert response.status_code == 200
3660

37-
docs = client.documents.read(["/this/%22works.json"])
61+
docs = client.documents.read("/this/%22works.json")
3862
assert len(docs) == 1
3963
assert "/this/%22works.json" == docs[0].uri
4064

4165

4266
def test_uri_not_found(client: Client):
43-
docs = client.documents.read(["/doesnt-exist.json"])
67+
docs = client.documents.read("/doesnt-exist.json")
4468
assert docs is not None
4569
assert len(docs) == 0
4670

@@ -51,7 +75,7 @@ def test_read_with_transform(client: Client):
5175
parameters, along with the ones added by the client.
5276
"""
5377
docs = client.documents.read(
54-
["/doc1.json"],
78+
"/doc1.json",
5579
categories=["content", "metadata"],
5680
params={"transform": "envelope"},
5781
)
@@ -92,7 +116,7 @@ def test_with_accept_header(client: Client):
92116
expected to be set to multipart/mixed by the client.
93117
"""
94118
docs = client.documents.read(
95-
["/doc1.json"],
119+
"/doc1.json",
96120
headers={"Accept": "something/invalid"},
97121
categories=["content", "quality"],
98122
)
@@ -107,7 +131,7 @@ def test_with_accept_header(client: Client):
107131

108132
def test_read_with_basic_client(basic_client: Client):
109133
# Just verifies that basic auth works as expected.
110-
doc = basic_client.documents.read(["/doc1.json"])[0]
134+
doc = basic_client.documents.read("/doc1.json")[0]
111135
assert {"hello": "world"} == doc.content
112136

113137

tests/test_write_documents.py

Lines changed: 59 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import pytest
22

33
from marklogic import Client
4-
from marklogic.documents import Document
4+
from marklogic.documents import DefaultMetadata, Document
55

66
DEFAULT_PERMS = {"python-tester": ["read", "update"]}
77

@@ -94,48 +94,41 @@ def test_content_types(client: Client):
9494

9595
def test_single_doc(client):
9696
response = client.documents.write(
97-
[Document("/temp/doc1.json", {"doc": 1}, permissions=DEFAULT_PERMS)]
97+
Document("/temp/doc1.json", {"doc": 1}, permissions=DEFAULT_PERMS)
9898
)
9999
assert 200 == response.status_code
100100

101101
doc1 = client.get("v1/documents?uri=/temp/doc1.json").json()
102102
assert 1 == doc1["doc"]
103103

104104

105-
@pytest.mark.skip("Will get this working when supporting batch-level metadata")
106105
def test_server_generated_uri(client):
107106
response = client.documents.write(
108107
[
108+
DefaultMetadata(permissions=DEFAULT_PERMS),
109109
Document(
110-
None,
111-
{"doc": "serveruri"},
112-
extension=".json",
113-
directory="/temp/",
114-
permissions=DEFAULT_PERMS,
115-
)
110+
content={"doc": "serveruri"}, extension=".json", directory="/temp/"
111+
),
116112
]
117113
)
118114
assert 200 == response.status_code
119115

120-
# Do a search to find the URI.
121-
data = client.get("/v1/search?q=serveruri&format=json").json()
122-
assert 1 == data["total"]
123-
uri = data["results"][0]["uri"]
124-
125-
doc1 = client.get(f"v1/documents?uri={uri}").json()
126-
assert "serveruri" == doc1["doc"]
116+
# Do a search to verify the document was created.
117+
docs = client.documents.search(q="serveruri")
118+
assert len(docs) == 1
119+
doc = docs[0]
120+
assert doc.uri.startswith("/temp/")
121+
assert doc.uri.endswith(".json")
127122

128123

129124
def test_repair_xml(client):
130125
response = client.documents.write(
131-
[
132-
Document(
133-
"/temp/doc1.xml",
134-
"<doc>needs <b>closing tag</doc>",
135-
repair="full",
136-
permissions=DEFAULT_PERMS,
137-
)
138-
]
126+
Document(
127+
"/temp/doc1.xml",
128+
"<doc>needs <b>closing tag</doc>",
129+
repair="full",
130+
permissions=DEFAULT_PERMS,
131+
)
139132
)
140133
assert 200 == response.status_code
141134

@@ -147,25 +140,19 @@ def test_repair_xml(client):
147140
def test_extract_binary(client):
148141
content = "MarkLogic and Python".encode("ascii")
149142
response = client.documents.write(
150-
[
151-
Document(
152-
"/temp/doc1.bin",
153-
content,
154-
extract="properties",
155-
permissions=DEFAULT_PERMS,
156-
)
157-
]
143+
Document(
144+
"/temp/doc1.bin",
145+
content,
146+
extract="properties",
147+
permissions=DEFAULT_PERMS,
148+
)
158149
)
159150
assert 200 == response.status_code
160151

161152

162153
def test_optimistic_locking(client):
163154
response = client.documents.write(
164-
[
165-
Document(
166-
"/temp/doc1.json", {"content": "original"}, permissions=DEFAULT_PERMS
167-
)
168-
]
155+
Document("/temp/doc1.json", {"content": "original"}, permissions=DEFAULT_PERMS)
169156
)
170157
assert 200 == response.status_code
171158

@@ -174,14 +161,12 @@ def test_optimistic_locking(client):
174161

175162
# Update the document, passing in the current version_id based on the ETag.
176163
response = client.documents.write(
177-
[
178-
Document(
179-
"/temp/doc1.json",
180-
{"content": "updated!"},
181-
version_id=etag,
182-
permissions=DEFAULT_PERMS,
183-
)
184-
]
164+
Document(
165+
"/temp/doc1.json",
166+
{"content": "updated!"},
167+
version_id=etag,
168+
permissions=DEFAULT_PERMS,
169+
)
185170
)
186171
assert 200 == response.status_code
187172

@@ -191,14 +176,12 @@ def test_optimistic_locking(client):
191176

192177
# Next update should fail since the ETag is no longer the current version.
193178
response = client.documents.write(
194-
[
195-
Document(
196-
"/temp/doc1.json",
197-
{"this": "should fail"},
198-
version_id=etag,
199-
permissions=DEFAULT_PERMS,
200-
)
201-
]
179+
Document(
180+
"/temp/doc1.json",
181+
{"this": "should fail"},
182+
version_id=etag,
183+
permissions=DEFAULT_PERMS,
184+
)
202185
)
203186
assert 412 == response.status_code, "412 is returned when the versionId is invalid."
204187
assert response.text.__contains__("RESTAPI-CONTENTWRONGVERSION")
@@ -214,14 +197,12 @@ def test_temporal_doc(client):
214197
}
215198

216199
response = client.documents.write(
217-
[
218-
Document(
219-
"/temp/doc1.json",
220-
content,
221-
temporal_document="custom1",
222-
permissions=DEFAULT_PERMS,
223-
)
224-
],
200+
Document(
201+
"/temp/doc1.json",
202+
content,
203+
temporal_document="custom1",
204+
permissions=DEFAULT_PERMS,
205+
),
225206
params={"temporal-collection": "temporal-collection"},
226207
)
227208
assert 200 == response.status_code
@@ -236,4 +217,20 @@ def test_temporal_doc(client):
236217

237218

238219
def test_metadata_no_content(client: Client):
239-
print("TODO!")
220+
uri = "/temp/doc1.json"
221+
response = client.documents.write(
222+
Document(uri, {"doc": 1}, permissions=DEFAULT_PERMS),
223+
)
224+
assert response.status_code == 200
225+
226+
doc = client.documents.read(uri, categories=["metadata"])[0]
227+
# Collections is not None since MarkLogic returns [] for it.
228+
assert len(doc.collections) == 0
229+
230+
response = client.documents.write(Document(uri, collections=["c1", "c2"]))
231+
assert response.status_code == 200
232+
233+
doc = client.documents.read(uri, categories=["metadata"])[0]
234+
assert "c1" in doc.collections
235+
assert "c2" in doc.collections
236+
assert len(doc.collections) == 2

0 commit comments

Comments
 (0)