diff --git a/DESCRIPTION b/DESCRIPTION index 381636e6ed..d5b8aae1de 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -86,6 +86,7 @@ Authors@R: c( person("Sebastian", "Meyer", role = "ctb"), person("Sietse", "Brouwer", role = "ctb"), person(c("Simon", "de"), "Bernard", role = "ctb"), + person("Steve", "Condylios", role = c("ctb"), comment = c(ORCID = "0000-0003-0599-844X")), person("Sylvain", "Rousseau", role = "ctb"), person("Taiyun", "Wei", role = "ctb"), person("Thibaut", "Assus", role = "ctb"), @@ -153,6 +154,7 @@ Collate: 'defaults.R' 'concordance.R' 'engine.R' + 'extract.R' 'highlight.R' 'themes.R' 'header.R' diff --git a/NAMESPACE b/NAMESPACE index 2838edef5a..be59c55419 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -53,6 +53,7 @@ export(hook_pngquant) export(hook_purl) export(hook_r2swf) export(hook_scianimator) +export(html_to_r) export(image_uri) export(imgur_upload) export(include_app) @@ -138,6 +139,8 @@ import(graphics) import(methods) import(stats) import(utils) +importFrom(stringr,str_match_all) +importFrom(stringr,str_replace_all) importFrom(xfun,attr) importFrom(xfun,file_ext) importFrom(xfun,file_string) diff --git a/R/extract.R b/R/extract.R new file mode 100644 index 0000000000..10594a92ef --- /dev/null +++ b/R/extract.R @@ -0,0 +1,88 @@ +#' Extract R code from a knitted R Markdown HTML file. +#' +#' @name html_to_r +#' @author Steve Condylios +#' @usage html_to_r(rmarkdown_html, padding, inc_out) +#' @param rmarkdown_html The HTML output of a knitted Rmd file +#' @param padding Specifies what goes between the last character of a code block and the +#' first character of the next code block. Defaults to two newlines (which gives +#' the visual appearance of one newline between code blocks). +#' @param inc_out \code{TRUE}/\code{FALSE} as to whether to include output of code chucks. Defaults +#' to \code{TRUE}. +#' +#' @return A character vector of length 1 containing the R code extracted from the +#' R Markdown HTML file. +#' +#' @export +#' +#' @examples +#' cat((rmarkdown_html <- "
Intro +#' ```R\n1 * 1\n```\nmore text\n```r\n2 * 2\n``` +#' some more text\n```\n3 * 3\n```\nThe End. +#' \n")) +#' +#' html_to_r(rmarkdown_html) +#' +#' cat(html_to_r(rmarkdown_html)) +#' +#' @importFrom stringr str_match_all str_replace_all + + + +html_to_r <- function(rmarkdown_html, padding, inc_out) { + + if(missing(padding)) { padding = "\n\n" } + if(missing(inc_out)) { inc_out = TRUE } + + extract_body <- function(rmarkdown_html) { + # light-weight replacement for html_nodes() + as.character(xml2::xml_find_all(xml2::read_html(rmarkdown_html), ".//body")) + } + + body <- extract_body(rmarkdown_html) + + remove_body_tags <- function(body) { + # light-weight replacement for html_text() + inner <- substr(body, 7, nchar(body)) + substr(inner, 1, nchar(inner)-6) + } + + inner <- remove_body_tags(body) + + # only include |``` if output is to be included + chunks <- if(inc_out == TRUE) { + str_match_all(inner, "(```R|```r|```)((.|\\s)*?)```") + }else{ + str_match_all(inner, "(```R|```r)((.|\\s)*?)```") + } + + + clean_chunks <- function(chunks) { + + # remove first char + neat_chunks <- substr(chunks[[1]][,3] , 2, nchar(chunks[[1]][,3])) + + # last first char + substr(neat_chunks , 1, nchar(neat_chunks) - 1) + } + + neat_chunks <- clean_chunks(chunks) + + + replace_character_entities <- function(char_entity){ + xml2::xml_text(xml2::read_html(paste0("