Skip to content

Commit

Permalink
Fix up procedure for extracting strings to .pot, and add newly-found …
Browse files Browse the repository at this point in the history
…messages (#6793)

* update .pot again

* Update CRAN_Release procedure

* Add 'notranslate' in R files, add a mark new strings for translation

* More R-side notranslate

* Mark many src* strings for non-translation

* Caught a few more untranslated messages

* sloppy find-and-replace

* Un-translate internal error

* oops, this is C

* nocov hard-to-cover regions

* Double-Pl_ for row(s), thread(s)

* Make the full message plural for better translator context

* Superfluous ')'

* rm ws
  • Loading branch information
MichaelChirico authored Feb 4, 2025
1 parent f6dca2d commit 4f7f567
Show file tree
Hide file tree
Showing 24 changed files with 714 additions and 149 deletions.
24 changes: 13 additions & 11 deletions .dev/CRAN_Release.cmd
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,19 @@
###############################################

# 1) Update messages for new release
## (a) Update C template file: src/data.table.pot
## ideally, we are including _() wrapping in
## new PRs throughout dev cycle, and this step
## becomes about tying up loose ends
## Check the output here for translatable messages
xgettext -o /dev/stdout ./*.c \
--keyword=Rprintf --keyword=error --keyword=warning --keyword=STOP --keyword=DTWARN --keyword=Error --keyword=DTPRINT --keyword=snprintf:3

## (b) Update R template file: src/R-data.table.pot
## NB: this relies on R >= 4.0 to remove a bug in update_pkg_po
Rscript -e "tools::update_pkg_po('.')"
dt_custom_translators = list(
R = 'catf:fmt|1',
# TODO(MichaelChirico/potools#318): restore snprintf:3 here too
src = c('STOP:1', 'DTWARN:1', 'DTPRINT:1')
)
message_db =
potools::get_message_data(custom_translation_functions = dt_custom_translators)
potools::check_cracked_messages(message_db)
potools::check_untranslated_cat(message_db)
potools::check_untranslated_src(message_db)

## (b) Update R template files (po/*.pot)
potools::po_extract(custom_translation_functions = dt_custom_translators)

# 2) Open a PR with the new templates & contact the translators
# * zh_CN: @hongyuanjia
Expand Down
2 changes: 1 addition & 1 deletion R/between.R
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ inrange = function(x,lower,upper,incbounds=TRUE) {
ops = if (incbounds) c(4L, 2L) else c(5L, 3L) # >=,<= and >,<
verbose = isTRUE(getOption("datatable.verbose"))
if (verbose) {last.started.at=proc.time();catf("forderv(query) took ... ");flush.console()}
if (verbose) {cat(timetaken(last.started.at),"\n"); flush.console()}
if (verbose) {cat(timetaken(last.started.at),"\n"); flush.console()} # notranslate
ans = bmerge(
shallow(subject), query,
icols=1L:2L, xcols=c(1L, 1L),
Expand Down
2 changes: 1 addition & 1 deletion R/bmerge.R
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
if (verbose) {last.started.at=proc.time();catf(" forder took ... ");flush.console()}
# TODO: could check/reuse secondary indices, but we need 'starts' attribute as well!
xo = forderv(x, xcols, retGrp=TRUE)
if (verbose) {cat(timetaken(last.started.at),"\n"); flush.console()}
if (verbose) {cat(timetaken(last.started.at),"\n"); flush.console()} # notranslate
xg = attr(xo, 'starts', exact=TRUE)
resetcols = head(xcols, non_equi-1L)
if (length(resetcols)) {
Expand Down
10 changes: 6 additions & 4 deletions R/data.table.R
Original file line number Diff line number Diff line change
Expand Up @@ -577,7 +577,7 @@ replace_dot_alias = function(e) {
} else {
irows = as.integer(fsort(as.numeric(irows))) ## nocov; parallelized for numeric, but overhead of type conversion
}
if (verbose) {cat(timetaken(last.started.at), "\n");flush.console()}
if (verbose) {cat(timetaken(last.started.at), "\n");flush.console()} # notranslate
}
## make sure, all columns are taken from x and not from i.
## This is done by simply telling data.table to continue as if there was a simple subset
Expand Down Expand Up @@ -634,7 +634,7 @@ replace_dot_alias = function(e) {
irows = irows[irows!=0L]
if (verbose) {last.started.at=proc.time();catf("Inverting irows for notjoin done in ... ");flush.console()}
i = irows = if (length(irows)) seq_len(nrow(x))[-irows] else NULL # NULL meaning all rows i.e. seq_len(nrow(x))
if (verbose) cat(timetaken(last.started.at), "\n")
if (verbose) cat(timetaken(last.started.at), "\n") # notranslate
leftcols = integer() # proceed as if row subset from now on, length(leftcols) is switched on later
rightcols = integer()
# Doing this once here, helps speed later when repeatedly subsetting each column. R's [irows] would do this for each
Expand Down Expand Up @@ -892,8 +892,10 @@ replace_dot_alias = function(e) {
}
tt = lengths(byval)
if (any(tt!=xnrow)) {
plural_part <- sprintf(ngettext(length(tt), "The item in the 'by' or 'keyby' list is length %s.", "The items in the 'by' or 'keyby' list have lengths %s."), brackify(tt))
stopf("%s Each must be length %d; the same length as there are rows in x (after subsetting if i is provided).", plural_part, xnrow)
stopf(ngettext(length(tt),
"The item in the 'by' or 'keyby' list is length %s. Each must be length %d; the same length as there are rows in x (after subsetting if i is provided).",
"The items in the 'by' or 'keyby' list have lengths %s. Each must be length %d; the same length as there are rows in x (after subsetting if i is provided)."),
brackify(tt), xnrow, domain=NA)
}
if (is.null(bynames)) bynames = rep.int("",length(byval))
if (length(idx <- which(!nzchar(bynames))) && !bynull) {
Expand Down
4 changes: 2 additions & 2 deletions R/print.data.table.R
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@ print.data.table = function(x, topn=getOption("datatable.print.topn"),
catf("Null data.%s (0 rows and 0 cols)\n", class) # See FAQ 2.5 and NEWS item in v1.8.9
} else {
catf("Empty data.%s (%d rows and %d cols)", class, NROW(x), NCOL(x))
if (length(x)>0L) cat(": ",paste(head(names(x),6L),collapse=","),if(length(x)>6L)"...",sep="")
cat("\n")
if (length(x)>0L) cat(": ",paste(head(names(x),6L),collapse=","),if(length(x)>6L)"...",sep="") # notranslate
cat("\n") # notranslate
}
return(invisible(x))
}
Expand Down
21 changes: 12 additions & 9 deletions R/test.data.table.R
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ test.data.table = function(script="tests.Rraw", verbose=FALSE, pkg=".", silent=F
scripts = gsub("[.]bz2$","",scripts)
return(sapply(scripts, function(fn) {
err = try(test.data.table(script=fn, verbose=verbose, pkg=pkg, silent=silent, showProgress=showProgress, testPattern=testPattern))
cat("\n");
cat("\n"); # notranslate
isTRUE(err)
}))
# nocov end
Expand Down Expand Up @@ -110,7 +110,7 @@ test.data.table = function(script="tests.Rraw", verbose=FALSE, pkg=".", silent=F
datatable.old.fread.datetime.character = FALSE
)

cat("getDTthreads(verbose=TRUE):\n") # for tracing on CRAN; output to log before anything is attempted
cat("getDTthreads(verbose=TRUE):\n") # notranslate: for tracing on CRAN; output to log before anything is attempted
getDTthreads(verbose=TRUE) # includes the returned value in the verbose output (rather than dangling '[1] 4'); e.g. "data.table is using 4 threads"
catf("test.data.table() running: %s\n", fn) # print fn to log before attempting anything on it (in case it is missing); on same line for slightly easier grep
assign("testDir", function(x) file.path(fulldir, x), envir=env)
Expand Down Expand Up @@ -226,6 +226,7 @@ test.data.table = function(script="tests.Rraw", verbose=FALSE, pkg=".", silent=F
# does show the full file output these days, so the 13 line limit no longer bites so much. It still bit recently
# when receiving output of R CMD check sent over email, though.
tz = Sys.getenv("TZ", unset=NA)
# notranslate start
cat("\n", date(), # so we can tell exactly when these tests ran on CRAN to double-check the result is up to date
" endian==", .Platform$endian,
", sizeof(long double)==", .Machine$sizeof.longdouble,
Expand All @@ -239,6 +240,7 @@ test.data.table = function(script="tests.Rraw", verbose=FALSE, pkg=".", silent=F
", .libPaths()==", paste0("'", .libPaths(), "'", collapse = ","),
", ", .Call(Cdt_zlib_version),
"\n", sep="")
# notranslate end

if (inherits(err,"try-error")) {
# nocov start
Expand Down Expand Up @@ -303,7 +305,7 @@ compactprint = function(DT, topn=2L) {
print(copy(DT)[,(cn):="",verbose=FALSE], topn=topn, class=FALSE)
} else {
print(DT, class=FALSE) # "Empty data.table (0 rows) of <ncol> columns ...
if (ncol(DT)) cat(cn,"\n")
if (ncol(DT)) cat(cn,"\n") # notranslate
}
invisible()
}
Expand Down Expand Up @@ -376,14 +378,15 @@ test = function(num,x,y=TRUE,error=NULL,warning=NULL,message=NULL,output=NULL,no
if (memtest==1L) gc() # see #5515 for before/after
inum = as.integer(num)
timings[inum, RSS:=max(rss(),RSS), verbose=FALSE] # TODO prefix inum with .. for clarity when that works
if (length(memtest.id) && memtest.id[1L]<=inum && inum<=memtest.id[2L]) cat(rss(),"\n") # after 'testing id ...' output; not using between() as it has verbose output when getOption(datatable.verbose)
if (length(memtest.id) && memtest.id[1L]<=inum && inum<=memtest.id[2L]) cat(rss(),"\n") # notranslate. after 'testing id ...' output; not using between() as it has verbose output when getOption(datatable.verbose)
if (memtest==2L) gc()
}
assign("lasttime", proc.time()[3L], parent.frame(), inherits=TRUE) # after gc() to exclude gc() time from next test when memtest
}, add=TRUE )
if (showProgress)
# \r can't be in gettextf msg
cat("\rRunning test id", numStr, " ") # nocov.
if (showProgress) {
cat("\r") # notranslate: \r can't be in gettextf msg
catf("Running test id", numStr, " ") # nocov.
}
# See PR #4090 for comments about change here in Dec 2019.
# If a segfault error occurs in future and we'd like to know after which test, then arrange for the
# try(sys.source()) in test.data.table() to be run in a separate R process. That process could write out
Expand Down Expand Up @@ -534,15 +537,15 @@ test = function(num,x,y=TRUE,error=NULL,warning=NULL,message=NULL,output=NULL,no
if (!fail) {
catf("Test %s ran without errors but failed check that x equals y:\n", numStr)
failPrint = function(x, xsub) {
cat(">", substitute(x), "=", xsub, "\n")
cat(">", substitute(x), "=", xsub, "\n") # notranslate
if (is.data.table(x)) compactprint(x) else {
nn = length(x)
catf("First %d of %d (type '%s'): \n", min(nn, 6L), length(x), typeof(x))
# head.matrix doesn't restrict columns
if (length(d <- dim(x))) do.call(`[`, c(list(x, drop = FALSE), lapply(pmin(d, 6L), seq_len)))
else print(head(x))
if (typeof(x) == 'character' && anyNonAscii(x)) {
cat("Non-ASCII string detected, raw representation:\n")
catf("Non-ASCII string detected, raw representation:\n")
print(lapply(head(x), charToRaw))
}
}
Expand Down
Loading

0 comments on commit 4f7f567

Please sign in to comment.