Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions R/cpp11.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,14 @@ leadingSpaces <- function(lines) {
.Call(`_roxygen2_leadingSpaces`, lines)
}

escape_rd_for_md_c <- function(text) {
.Call(`_roxygen2_escape_rd_for_md_c`, text)
}

unescape_rd_for_md_c <- function(rd_text, tags) {
.Call(`_roxygen2_unescape_rd_for_md_c`, rd_text, tags)
}

tokenise_block <- function(lines, file, offset) {
.Call(`_roxygen2_tokenise_block`, lines, file, offset)
}
Expand Down
237 changes: 9 additions & 228 deletions R/markdown-escaping.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
#' `escape_rd_for_md()` replaces fragile Rd tags with placeholders, to avoid
#' interpreting them as markdown. `unescape_rd_for_md()` puts the original
#' text back in place of the placeholders after the markdown parsing is done.
#' The fragile tags are listed in `escaped_for_md`.
#'
#' Some Rd macros are treated specially:
#'
Expand All @@ -21,256 +20,38 @@
#' @rdname markdown-internals
#' @keywords internal
escape_rd_for_md <- function(text) {
rd_tags <- find_fragile_rd_tags(text, escaped_for_md)
protected <- protect_rd_tags(text, rd_tags)
double_escape_md(protected)
result <- escape_rd_for_md_c(text)
out <- result$text
attr(out, "roxygen-markdown-subst") <- as.character(result$tags)
out
}

escaped_for_md <- paste0(
"\\",
c(
"acronym",
"code",
"command",
"CRANpkg",
"deqn",
"doi",
"dontrun",
"dontshow",
"donttest",
"email",
"env",
"eqn",
"figure",
"file",
"if",
"ifelse",
"kbd",
"link",
"linkS4class",
"method",
"mjeqn",
"mjdeqn",
"mjseqn",
"mjsdeqn",
"mjteqn",
"mjtdeqn",
"newcommand",
"option",
"out",
"packageAuthor",
"packageDescription",
"packageDESCRIPTION",
"packageIndices",
"packageMaintainer",
"packageTitle",
"pkg",
"PR",
"preformatted",
"renewcommand",
"S3method",
"S4method",
"samp",
"special",
"testonly",
"url",
"var",
"verb"
)
)

#' @param rd_text The markdown parsed and interpreted text.
#' @param esc_text The original escaped text from
#' `escape_rd_for_md()`.
#' @rdname markdown-internals
unescape_rd_for_md <- function(rd_text, esc_text) {
id <- attr(esc_text, "roxygen-markdown-subst")$id
tags <- attr(esc_text, "roxygen-markdown-subst")$tags

for (i in seq_len(nrow(tags))) {
ph <- paste0(id, "-", i, "-")
rd_text <- sub(ph, tags$text[i], rd_text, fixed = TRUE)
}

rd_text
}

#' Find all fragile tags (int the supplied list) in the text
#'
#' Ignore the tags that are embedded into a fragile tag.
#'
#' @param text Input text, character scalar.
#' @param fragile Character vector of fragile tags to find.
#' @return Data frame of fragile tags, with columns:
#' `tag`, `start`, `end`, `argend`,
#' `text`.
#'
#' @noRd

find_fragile_rd_tags <- function(text, fragile) {
tags <- find_all_rd_tags(text)
ftags <- tags[tags$tag %in% fragile, ]

## Remove embedded ones
keep <- map_lgl(seq_len(nrow(ftags)), function(i) {
sum(ftags$start <= ftags$start[i] & ftags$argend >= ftags$argend[i]) == 1
})

ftags <- ftags[keep, ]

if (nrow(ftags)) {
ftags$text <- str_sub(text, ftags$start, ftags$argend)
}

ftags
}

#' Find all (complete) Rd tags in a string
#'
#' Complete means that we include the argument(s) as well.
#'
#' @param text Input text, character scalar.
#'
#' @noRd

find_all_rd_tags <- function(text) {
text_len <- nchar(text)

## Find the tag names
tags <- find_all_tag_names(text)

## Find the end of the argument list for each tag. Note that
## tags might be embedded into the arguments of other tags.
tags$argend <- map_int(seq_len(nrow(tags)), function(i) {
tag_plus <- str_sub(text, tags$end[i], text_len)
findEndOfTag(tag_plus, is_code = FALSE) + tags$end[i]
})

tags
}

#' Find all tag names in a string
#'
#' Note that we also protect these tags within code, strings
#' and comments, for now. We'll see if this causes any
#' problems.
#'
#' @param text Input text, scalar.
#' @return Data frame, with columns: `tag`, `start`,
#' `end`.
#'
#' @noRd

find_all_tag_names <- function(text) {
## Find the tags without arguments first
tag_pos <- str_locate_all(text, r"(\\[a-zA-Z][a-zA-Z0-9]*)")[[1]]

data.frame(
tag = str_sub(text, tag_pos[, "start"], tag_pos[, "end"]),
as.data.frame(tag_pos)
)
}

#' Replace fragile Rd tags with placeholders
#'
#' @param text The text, character scalar.
#' @param rd_tags Fragile Rd tags, in a data frame,
#' as returned by `find_fragile_rd_tags`.
#' @return Text, after the substitution. The original
#' text is added as an attribute.
#'
#' @noRd

protect_rd_tags <- function(text, rd_tags) {
id <- make_random_string()

text <- str_sub_same(text, rd_tags, id)

attr(text, "roxygen-markdown-subst") <-
list(tags = rd_tags, id = id)

text
}

#' Replace parts of the same string
#'
#' It assumes that the intervals to be replaced do not
#' overlap. Gives an error otherwise.
#'
#' @param str String scalar.
#' @param repl Data frame with columns: `start`, `end`,
#' `argend`, `text`.
#' @param id Placeholder string.
#' @return Input string with the replacements performed.
#' Note that all replacements are performed in parallel,
#' at least conceptually.
#'
#' @noRd

str_sub_same <- function(str, repl, id) {
repl <- repl[order(repl$start), ]

if (is.unsorted(repl$end) || is.unsorted(repl$argend)) {
cli::cli_abort("Replacement intervals must not overlap.", .internal = TRUE)
}

for (i in seq_len(nrow(repl))) {
## The trailing - is needed, to distinguish between -1 and -10
new_text <- paste0(id, "-", i, "-")
str_sub(str, repl$start[i], repl$argend[i]) <- new_text

## Need to shift other coordinates (we shift everything,
## it is just simpler).
inc <- nchar(new_text) - (repl$argend[i] - repl$start[i] + 1)
repl$start <- repl$start + inc
repl$end <- repl$end + inc
repl$argend <- repl$argend + inc
}

str
}

#' Make a random string
#'
#' We use this as the placeholder, to make sure that the
#' placeholder does not appear in the text.
#'
#' @return String scalar
#'
#' @noRd

make_random_string <- function(length = 32) {
paste(
sample(c(LETTERS, letters, 0:9), length, replace = TRUE),
collapse = ""
)
unescape_rd_for_md_c(rd_text, attr(esc_text, "roxygen-markdown-subst"))
}

#' Check markdown escaping
#'
#' @description
#' This is a regression test for Markdown escaping.
#'
#' @details
#' Each of the following bullets should look the same when rendered:
#'
#' * Backticks: `\`, `\%`, `\$`, `\_`
#' * `\verb{}`: \verb{\\}, \verb{\\%}, \verb{\$}, \verb{\_}
#'
#' \[ this isn't a link \]
#'
#' \\[ neither is this \\]
#'
#' @param text Input text.
#' @return Double-escaped text.
#' @name double_escape_md
#' @keywords internal
#' @examples
#' "%" # percent
#' "\"" # double quote
#' '\'' # single quote
double_escape_md <- function(text) {
text <- gsub(r"(\)", r"(\\)", text, fixed = TRUE)

# De-dup escaping used to avoid [] creating a link
text <- gsub(r"(\\[)", r"(\[)", text, fixed = TRUE)
text <- gsub(r"(\\])", r"(\])", text, fixed = TRUE)
text
}
NULL
13 changes: 2 additions & 11 deletions man/double_escape_md.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion man/markdown-internals.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

30 changes: 23 additions & 7 deletions src/cpp11.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,20 @@ extern "C" SEXP _roxygen2_leadingSpaces(SEXP lines) {
return cpp11::as_sexp(leadingSpaces(cpp11::as_cpp<cpp11::decay_t<cpp11::strings>>(lines)));
END_CPP11
}
// markdown-escaping.cpp
cpp11::list escape_rd_for_md_c(std::string text);
extern "C" SEXP _roxygen2_escape_rd_for_md_c(SEXP text) {
BEGIN_CPP11
return cpp11::as_sexp(escape_rd_for_md_c(cpp11::as_cpp<cpp11::decay_t<std::string>>(text)));
END_CPP11
}
// markdown-escaping.cpp
std::string unescape_rd_for_md_c(std::string rd_text, cpp11::strings tags);
extern "C" SEXP _roxygen2_unescape_rd_for_md_c(SEXP rd_text, SEXP tags) {
BEGIN_CPP11
return cpp11::as_sexp(unescape_rd_for_md_c(cpp11::as_cpp<cpp11::decay_t<std::string>>(rd_text), cpp11::as_cpp<cpp11::decay_t<cpp11::strings>>(tags)));
END_CPP11
}
// parser2.cpp
cpp11::list tokenise_block(cpp11::strings lines, std::string file, int offset);
extern "C" SEXP _roxygen2_tokenise_block(SEXP lines, SEXP file, SEXP offset) {
Expand All @@ -57,13 +71,15 @@ extern "C" SEXP _roxygen2_wrapUsage(SEXP string, SEXP width, SEXP indent) {

extern "C" {
static const R_CallMethodDef CallEntries[] = {
{"_roxygen2_escapeExamples", (DL_FUNC) &_roxygen2_escapeExamples, 1},
{"_roxygen2_findEndOfTag", (DL_FUNC) &_roxygen2_findEndOfTag, 2},
{"_roxygen2_find_includes", (DL_FUNC) &_roxygen2_find_includes, 1},
{"_roxygen2_leadingSpaces", (DL_FUNC) &_roxygen2_leadingSpaces, 1},
{"_roxygen2_rdComplete", (DL_FUNC) &_roxygen2_rdComplete, 2},
{"_roxygen2_tokenise_block", (DL_FUNC) &_roxygen2_tokenise_block, 3},
{"_roxygen2_wrapUsage", (DL_FUNC) &_roxygen2_wrapUsage, 3},
{"_roxygen2_escapeExamples", (DL_FUNC) &_roxygen2_escapeExamples, 1},
{"_roxygen2_escape_rd_for_md_c", (DL_FUNC) &_roxygen2_escape_rd_for_md_c, 1},
{"_roxygen2_findEndOfTag", (DL_FUNC) &_roxygen2_findEndOfTag, 2},
{"_roxygen2_find_includes", (DL_FUNC) &_roxygen2_find_includes, 1},
{"_roxygen2_leadingSpaces", (DL_FUNC) &_roxygen2_leadingSpaces, 1},
{"_roxygen2_rdComplete", (DL_FUNC) &_roxygen2_rdComplete, 2},
{"_roxygen2_tokenise_block", (DL_FUNC) &_roxygen2_tokenise_block, 3},
{"_roxygen2_unescape_rd_for_md_c", (DL_FUNC) &_roxygen2_unescape_rd_for_md_c, 2},
{"_roxygen2_wrapUsage", (DL_FUNC) &_roxygen2_wrapUsage, 3},
{NULL, NULL, 0}
};
}
Expand Down
Loading
Loading