Skip to content

Commit d6a297b

Browse files
committed
Add support for no pre_head_insert and no js_notify callback
1 parent fa9f31b commit d6a297b

File tree

5 files changed

+73
-98
lines changed

5 files changed

+73
-98
lines changed

src/zimscraperlib/rewriting/html.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -132,9 +132,9 @@ class HtmlRewriter(HTMLParser):
132132
def __init__(
133133
self,
134134
url_rewriter: ArticleUrlRewriter,
135-
pre_head_insert: str,
135+
pre_head_insert: str | None,
136136
post_head_insert: str | None,
137-
notify_js_module: Callable[[ZimPath], None],
137+
notify_js_module: Callable[[ZimPath], None] | None,
138138
):
139139
super().__init__(convert_charrefs=False)
140140
self.url_rewriter = url_rewriter
@@ -430,7 +430,7 @@ def do_attribute_rewrite(
430430
css_rewriter: CssRewriter,
431431
url_rewriter: ArticleUrlRewriter,
432432
base_href: str | None,
433-
notify_js_module: Callable[[ZimPath], None],
433+
notify_js_module: Callable[[ZimPath], None] | None,
434434
) -> AttrNameAndValue:
435435
"""Utility function to process all attribute rewriting rules
436436
@@ -587,7 +587,7 @@ def rewrite_href_src_attributes(
587587
attrs: AttrsList,
588588
url_rewriter: ArticleUrlRewriter,
589589
base_href: str | None,
590-
notify_js_module: Callable[[ZimPath], None],
590+
notify_js_module: Callable[[ZimPath], None] | None,
591591
):
592592
"""Rewrite href and src attributes
593593
@@ -596,7 +596,10 @@ def rewrite_href_src_attributes(
596596
"""
597597
if attr_name not in ("href", "src") or not attr_value:
598598
return
599-
if get_html_rewrite_context(tag=tag, attrs=attrs) == "js-module":
599+
if (
600+
notify_js_module
601+
and get_html_rewrite_context(tag=tag, attrs=attrs) == "js-module"
602+
):
600603
notify_js_module(url_rewriter.get_item_path(attr_value, base_href=base_href))
601604
return (
602605
attr_name,

src/zimscraperlib/rewriting/js.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,7 @@ def __init__(
206206
self,
207207
url_rewriter: ArticleUrlRewriter,
208208
base_href: str | None,
209-
notify_js_module: Callable[[ZimPath], None],
209+
notify_js_module: Callable[[ZimPath], None] | None,
210210
):
211211
super().__init__(None)
212212
self.first_buff = self._init_local_declaration(GLOBAL_OVERRIDES)
@@ -298,11 +298,12 @@ def func(
298298
m_object: re.Match[str], _opts: dict[str, Any] | None = None
299299
) -> str:
300300
def sub_funct(match: re.Match[str]) -> str:
301-
self.notify_js_module(
302-
self.url_rewriter.get_item_path(
303-
match.group(2), base_href=self.base_href
301+
if self.notify_js_module:
302+
self.notify_js_module(
303+
self.url_rewriter.get_item_path(
304+
match.group(2), base_href=self.base_href
305+
)
304306
)
305-
)
306307
return (
307308
f"{match.group(1)}{get_rewriten_import_url(match.group(2))}"
308309
f"{match.group(3)}"

tests/rewriting/conftest.py

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -11,16 +11,6 @@
1111
)
1212

1313

14-
@pytest.fixture(scope="module")
15-
def no_js_notify():
16-
"""Fixture to not care about notification of detection of a JS file"""
17-
18-
def no_js_notify_handler(_: str):
19-
pass
20-
21-
yield no_js_notify_handler
22-
23-
2414
class SimpleUrlRewriter(ArticleUrlRewriter):
2515
"""Basic URL rewriter mocking most calls"""
2616

tests/rewriting/test_html_rewriting.py

Lines changed: 55 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -74,17 +74,15 @@ def no_rewrite_content(request: pytest.FixtureRequest):
7474
yield request.param
7575

7676

77-
def test_no_rewrite(
78-
no_rewrite_content: ContentForTests, no_js_notify: Callable[[ZimPath], None]
79-
):
77+
def test_no_rewrite(no_rewrite_content: ContentForTests):
8078
assert (
8179
HtmlRewriter(
8280
ArticleUrlRewriter(
8381
article_url=HttpUrl(f"http://{no_rewrite_content.article_url}"),
8482
),
85-
"",
86-
"",
87-
no_js_notify,
83+
None,
84+
None,
85+
None,
8886
)
8987
.rewrite(no_rewrite_content.input_str)
9088
.content
@@ -116,17 +114,15 @@ def escaped_content(request: pytest.FixtureRequest):
116114
yield request.param
117115

118116

119-
def test_escaped_content(
120-
escaped_content: ContentForTests, no_js_notify: Callable[[ZimPath], None]
121-
):
117+
def test_escaped_content(escaped_content: ContentForTests):
122118
transformed = (
123119
HtmlRewriter(
124120
ArticleUrlRewriter(
125121
article_url=HttpUrl(f"http://{escaped_content.article_url}")
126122
),
127-
"",
128-
"",
129-
no_js_notify,
123+
None,
124+
None,
125+
None,
130126
)
131127
.rewrite(escaped_content.input_str)
132128
.content
@@ -239,17 +235,15 @@ def js_rewrites(request: pytest.FixtureRequest):
239235
yield request.param
240236

241237

242-
def test_js_rewrites(
243-
js_rewrites: ContentForTests, no_js_notify: Callable[[ZimPath], None]
244-
):
238+
def test_js_rewrites(js_rewrites: ContentForTests):
245239
transformed = (
246240
HtmlRewriter(
247241
ArticleUrlRewriter(
248242
article_url=HttpUrl(f"http://{js_rewrites.article_url}")
249243
),
250-
"",
251-
"",
252-
no_js_notify,
244+
None,
245+
None,
246+
None,
253247
)
254248
.rewrite(js_rewrites.input_str)
255249
.content
@@ -334,24 +328,24 @@ def rewrite_url(request: pytest.FixtureRequest):
334328
yield request.param
335329

336330

337-
def test_rewrite(rewrite_url: ContentForTests, no_js_notify: Callable[[ZimPath], None]):
331+
def test_rewrite(rewrite_url: ContentForTests):
338332
assert (
339333
HtmlRewriter(
340334
ArticleUrlRewriter(
341335
article_url=HttpUrl(f"http://{rewrite_url.article_url}"),
342336
existing_zim_paths={ZimPath("exemple.com/a/long/path")},
343337
),
344-
"",
345-
"",
346-
no_js_notify,
338+
None,
339+
None,
340+
None,
347341
)
348342
.rewrite(rewrite_url.input_str)
349343
.content
350344
== rewrite_url.expected_str
351345
)
352346

353347

354-
def test_extract_title(no_js_notify: Callable[[ZimPath], None]):
348+
def test_extract_title():
355349
content = """<html>
356350
<head>
357351
<title>Page title</title>
@@ -367,25 +361,25 @@ def test_extract_title(no_js_notify: Callable[[ZimPath], None]):
367361
article_url=HttpUrl("http://example.com"),
368362
existing_zim_paths={ZimPath("exemple.com/a/long/path")},
369363
),
370-
"",
371-
"",
372-
no_js_notify,
364+
None,
365+
None,
366+
None,
373367
)
374368
.rewrite(content)
375369
.title
376370
== "Page title"
377371
)
378372

379373

380-
def test_rewrite_attributes(no_js_notify: Callable[[ZimPath], None]):
374+
def test_rewrite_attributes():
381375
rewriter = HtmlRewriter(
382376
ArticleUrlRewriter(
383377
article_url=HttpUrl("http://kiwix.org/"),
384378
existing_zim_paths={ZimPath("kiwix.org/foo")},
385379
),
386-
"",
387-
"",
388-
no_js_notify,
380+
None,
381+
None,
382+
None,
389383
)
390384

391385
assert (
@@ -407,13 +401,13 @@ def test_rewrite_attributes(no_js_notify: Callable[[ZimPath], None]):
407401
)
408402

409403

410-
def test_rewrite_css(no_js_notify: Callable[[ZimPath], None]):
404+
def test_rewrite_css():
411405
output = (
412406
HtmlRewriter(
413407
ArticleUrlRewriter(article_url=HttpUrl("http://kiwix.org/")),
414-
"",
415-
"",
416-
no_js_notify,
408+
None,
409+
None,
410+
None,
417411
)
418412
.rewrite(
419413
"<style>p { /* A comment with a http://link.org/ */ "
@@ -427,7 +421,7 @@ def test_rewrite_css(no_js_notify: Callable[[ZimPath], None]):
427421
)
428422

429423

430-
def test_head_insert(no_js_notify: Callable[[ZimPath], None]):
424+
def test_head_insert():
431425
content = """<html>
432426
<head>
433427
<title>A test content</title>
@@ -439,18 +433,17 @@ def test_head_insert(no_js_notify: Callable[[ZimPath], None]):
439433

440434
url_rewriter = ArticleUrlRewriter(article_url=HttpUrl("http://kiwix.org/"))
441435
assert (
442-
HtmlRewriter(url_rewriter, "", "", no_js_notify).rewrite(content).content
443-
== content
436+
HtmlRewriter(url_rewriter, None, None, None).rewrite(content).content == content
444437
)
445438

446-
assert HtmlRewriter(url_rewriter, "PRE_HEAD_INSERT", "", no_js_notify).rewrite(
439+
assert HtmlRewriter(url_rewriter, "PRE_HEAD_INSERT", None, None).rewrite(
447440
content
448441
).content == content.replace("<head>", "<head>PRE_HEAD_INSERT")
449-
assert HtmlRewriter(url_rewriter, "", "POST_HEAD_INSERT", no_js_notify).rewrite(
442+
assert HtmlRewriter(url_rewriter, None, "POST_HEAD_INSERT", None).rewrite(
450443
content
451444
).content == content.replace("</head>", "POST_HEAD_INSERT</head>")
452445
assert HtmlRewriter(
453-
url_rewriter, "PRE_HEAD_INSERT", "POST_HEAD_INSERT", no_js_notify
446+
url_rewriter, "PRE_HEAD_INSERT", "POST_HEAD_INSERT", None
454447
).rewrite(content).content == content.replace(
455448
"<head>", "<head>PRE_HEAD_INSERT"
456449
).replace(
@@ -735,9 +728,7 @@ def rewrite_base_href_content(request):
735728
yield request.param
736729

737730

738-
def test_rewrite_base_href(
739-
rewrite_base_href_content: ContentForTests, no_js_notify: Callable[[ZimPath], None]
740-
):
731+
def test_rewrite_base_href(rewrite_base_href_content: ContentForTests):
741732
assert (
742733
HtmlRewriter(
743734
ArticleUrlRewriter(
@@ -750,9 +741,9 @@ def test_rewrite_base_href(
750741
ZimPath("kiwix.org/favicon.png"),
751742
},
752743
),
753-
"",
754-
"",
755-
no_js_notify,
744+
None,
745+
None,
746+
None,
756747
)
757748
.rewrite(rewrite_base_href_content.input_str)
758749
.content
@@ -795,15 +786,13 @@ def test_rewrite_base_href(
795786
),
796787
],
797788
)
798-
def test_simple_rewrite(
799-
input_content: str, expected_output: str, no_js_notify: Callable[[ZimPath], None]
800-
):
789+
def test_simple_rewrite(input_content: str, expected_output: str):
801790
assert (
802791
HtmlRewriter(
803792
ArticleUrlRewriter(article_url=HttpUrl("http://example.com")),
804-
"",
805-
"",
806-
no_js_notify,
793+
None,
794+
None,
795+
None,
807796
)
808797
.rewrite(input_content)
809798
.content
@@ -862,9 +851,7 @@ def rewrite_onxxx_content(request: pytest.FixtureRequest):
862851
yield request.param
863852

864853

865-
def test_rewrite_onxxx_event(
866-
rewrite_onxxx_content: ContentForTests, no_js_notify: Callable[[ZimPath], None]
867-
):
854+
def test_rewrite_onxxx_event(rewrite_onxxx_content: ContentForTests):
868855
assert (
869856
HtmlRewriter(
870857
ArticleUrlRewriter(
@@ -877,9 +864,9 @@ def test_rewrite_onxxx_event(
877864
ZimPath("kiwix.org/favicon.png"),
878865
},
879866
),
880-
"",
881-
"",
882-
no_js_notify,
867+
None,
868+
None,
869+
None,
883870
)
884871
.rewrite(rewrite_onxxx_content.input_str)
885872
.content
@@ -924,20 +911,17 @@ def rewrite_meta_charset_content(request: pytest.FixtureRequest):
924911
yield request.param
925912

926913

927-
def test_rewrite_meta_charset(
928-
rewrite_meta_charset_content: ContentForTests,
929-
no_js_notify: Callable[[ZimPath], None],
930-
):
914+
def test_rewrite_meta_charset(rewrite_meta_charset_content: ContentForTests):
931915
assert (
932916
HtmlRewriter(
933917
ArticleUrlRewriter(
934918
article_url=HttpUrl(
935919
f"http://{rewrite_meta_charset_content.article_url}"
936920
)
937921
),
938-
"",
939-
"",
940-
no_js_notify,
922+
None,
923+
None,
924+
None,
941925
)
942926
.rewrite(rewrite_meta_charset_content.input_str)
943927
.content
@@ -963,7 +947,6 @@ def rewrite_meta_http_equiv_redirect_full_content(request: pytest.FixtureRequest
963947

964948
def test_rewrite_meta_http_equiv_redirect_full(
965949
rewrite_meta_http_equiv_redirect_full_content: ContentForTests,
966-
no_js_notify: Callable[[ZimPath], None],
967950
):
968951
assert (
969952
HtmlRewriter(
@@ -973,9 +956,9 @@ def test_rewrite_meta_http_equiv_redirect_full(
973956
),
974957
existing_zim_paths={ZimPath("kiwix.org/somepage")},
975958
),
976-
"",
977-
"",
978-
no_js_notify,
959+
None,
960+
None,
961+
None,
979962
)
980963
.rewrite(rewrite_meta_http_equiv_redirect_full_content.input_str)
981964
.content
@@ -1112,11 +1095,12 @@ def rewrite_tag_name(attr_name: str, attr_value: str | None) -> AttrNameAndValue
11121095
@rules.rewrite_attribute()
11131096
def rewrite_call_notify(
11141097
attr_name: str,
1115-
notify_js_module: Callable[[ZimPath], None],
1098+
notify_js_module: Callable[[ZimPath], None] | None,
11161099
) -> AttrNameAndValue | None:
11171100
if attr_name != "call_notify":
11181101
return
1119-
notify_js_module(ZimPath("foo"))
1102+
if notify_js_module:
1103+
notify_js_module(ZimPath("foo"))
11201104
return
11211105

11221106

0 commit comments

Comments
 (0)