Fix up procedure for extracting strings to .pot, and add newly-found …

…messages (#6793) * update .pot again * Update CRAN_Release procedure * Add 'notranslate' in R files, add a mark new strings for translation * More R-side notranslate * Mark many src* strings for non-translation * Caught a few more untranslated messages * sloppy find-and-replace * Un-translate internal error * oops, this is C * nocov hard-to-cover regions * Double-Pl_ for row(s), thread(s) * Make the full message plural for better translator context * Superfluous ')' * rm ws
Rdatatable · Feb 4, 2025 · 4f7f567 · 4f7f567
1 parent f6dca2d
commit 4f7f567
Show file tree

Hide file tree

Showing 24 changed files with 714 additions and 149 deletions.
diff --git a/.dev/CRAN_Release.cmd b/.dev/CRAN_Release.cmd
@@ -3,17 +3,19 @@
 ###############################################
 
 # 1) Update messages for new release
-## (a) Update C template file: src/data.table.pot
-##     ideally, we are including _() wrapping in
-##     new PRs throughout dev cycle, and this step
-##     becomes about tying up loose ends
-## Check the output here for translatable messages
-xgettext -o /dev/stdout ./*.c \
-  --keyword=Rprintf --keyword=error --keyword=warning --keyword=STOP --keyword=DTWARN --keyword=Error --keyword=DTPRINT --keyword=snprintf:3
-
-## (b) Update R template file: src/R-data.table.pot
-##  NB: this relies on R >= 4.0 to remove a bug in update_pkg_po
-Rscript -e "tools::update_pkg_po('.')"
+dt_custom_translators = list(
+  R = 'catf:fmt|1',
+  # TODO(MichaelChirico/potools#318): restore snprintf:3 here too
+  src = c('STOP:1', 'DTWARN:1', 'DTPRINT:1')
+)
+message_db =
+  potools::get_message_data(custom_translation_functions = dt_custom_translators)
+potools::check_cracked_messages(message_db)
+potools::check_untranslated_cat(message_db)
+potools::check_untranslated_src(message_db)
+
+## (b) Update R template files (po/*.pot)
+potools::po_extract(custom_translation_functions = dt_custom_translators)
 
 # 2) Open a PR with the new templates & contact the translators
 #   * zh_CN: @hongyuanjia

diff --git a/R/between.R b/R/between.R
@@ -75,7 +75,7 @@ inrange = function(x,lower,upper,incbounds=TRUE) {
   ops = if (incbounds) c(4L, 2L) else c(5L, 3L) # >=,<= and >,<
   verbose = isTRUE(getOption("datatable.verbose"))
   if (verbose) {last.started.at=proc.time();catf("forderv(query) took ... ");flush.console()}
-  if (verbose) {cat(timetaken(last.started.at),"\n"); flush.console()}
+  if (verbose) {cat(timetaken(last.started.at),"\n"); flush.console()} # notranslate
   ans = bmerge(
     shallow(subject), query,
     icols=1L:2L, xcols=c(1L, 1L),

diff --git a/R/bmerge.R b/R/bmerge.R
@@ -188,7 +188,7 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
     if (verbose) {last.started.at=proc.time();catf("  forder took ... ");flush.console()}
     # TODO: could check/reuse secondary indices, but we need 'starts' attribute as well!
     xo = forderv(x, xcols, retGrp=TRUE)
-    if (verbose) {cat(timetaken(last.started.at),"\n"); flush.console()}
+    if (verbose) {cat(timetaken(last.started.at),"\n"); flush.console()} # notranslate
     xg = attr(xo, 'starts', exact=TRUE)
     resetcols = head(xcols, non_equi-1L)
     if (length(resetcols)) {

diff --git a/R/data.table.R b/R/data.table.R
@@ -577,7 +577,7 @@ replace_dot_alias = function(e) {
             } else {
               irows = as.integer(fsort(as.numeric(irows))) ## nocov; parallelized for numeric, but overhead of type conversion
             }
-          if (verbose) {cat(timetaken(last.started.at), "\n");flush.console()}
+          if (verbose) {cat(timetaken(last.started.at), "\n");flush.console()} # notranslate
         }
         ## make sure, all columns are taken from x and not from i.
         ## This is done by simply telling data.table to continue as if there was a simple subset
@@ -634,7 +634,7 @@ replace_dot_alias = function(e) {
       irows = irows[irows!=0L]
       if (verbose) {last.started.at=proc.time();catf("Inverting irows for notjoin done in ... ");flush.console()}
       i = irows = if (length(irows)) seq_len(nrow(x))[-irows] else NULL  # NULL meaning all rows i.e. seq_len(nrow(x))
-      if (verbose) cat(timetaken(last.started.at), "\n")
+      if (verbose) cat(timetaken(last.started.at), "\n") # notranslate
       leftcols = integer()  # proceed as if row subset from now on, length(leftcols) is switched on later
       rightcols = integer()
       # Doing this once here, helps speed later when repeatedly subsetting each column. R's [irows] would do this for each
@@ -892,8 +892,10 @@ replace_dot_alias = function(e) {
         }
         tt = lengths(byval)
         if (any(tt!=xnrow)) {
-          plural_part <- sprintf(ngettext(length(tt), "The item in the 'by' or 'keyby' list is length %s.", "The items in the 'by' or 'keyby' list have lengths %s."), brackify(tt))
-          stopf("%s Each must be length %d; the same length as there are rows in x (after subsetting if i is provided).", plural_part, xnrow)
+          stopf(ngettext(length(tt),
+                         "The item in the 'by' or 'keyby' list is length %s. Each must be length %d; the same length as there are rows in x (after subsetting if i is provided).",
+                         "The items in the 'by' or 'keyby' list have lengths %s. Each must be length %d; the same length as there are rows in x (after subsetting if i is provided)."),
+                brackify(tt), xnrow, domain=NA)
         }
         if (is.null(bynames)) bynames = rep.int("",length(byval))
         if (length(idx <- which(!nzchar(bynames))) && !bynull) {

diff --git a/R/print.data.table.R b/R/print.data.table.R
@@ -57,8 +57,8 @@ print.data.table = function(x, topn=getOption("datatable.print.topn"),
       catf("Null data.%s (0 rows and 0 cols)\n", class)  # See FAQ 2.5 and NEWS item in v1.8.9
     } else {
       catf("Empty data.%s (%d rows and %d cols)", class, NROW(x), NCOL(x))
-      if (length(x)>0L) cat(": ",paste(head(names(x),6L),collapse=","),if(length(x)>6L)"...",sep="")
-      cat("\n")
+      if (length(x)>0L) cat(": ",paste(head(names(x),6L),collapse=","),if(length(x)>6L)"...",sep="") # notranslate
+      cat("\n") # notranslate
     }
     return(invisible(x))
   }

diff --git a/R/test.data.table.R b/R/test.data.table.R
@@ -39,7 +39,7 @@ test.data.table = function(script="tests.Rraw", verbose=FALSE, pkg=".", silent=F
     scripts = gsub("[.]bz2$","",scripts)
     return(sapply(scripts, function(fn) {
       err = try(test.data.table(script=fn, verbose=verbose, pkg=pkg, silent=silent, showProgress=showProgress, testPattern=testPattern))
-      cat("\n");
+      cat("\n"); # notranslate
       isTRUE(err)
     }))
     # nocov end
@@ -110,7 +110,7 @@ test.data.table = function(script="tests.Rraw", verbose=FALSE, pkg=".", silent=F
     datatable.old.fread.datetime.character = FALSE
   )
 
-  cat("getDTthreads(verbose=TRUE):\n")         # for tracing on CRAN; output to log before anything is attempted
+  cat("getDTthreads(verbose=TRUE):\n")         # notranslate: for tracing on CRAN; output to log before anything is attempted
   getDTthreads(verbose=TRUE)                   # includes the returned value in the verbose output (rather than dangling '[1] 4'); e.g. "data.table is using 4 threads"
   catf("test.data.table() running: %s\n", fn)  # print fn to log before attempting anything on it (in case it is missing); on same line for slightly easier grep
   assign("testDir", function(x) file.path(fulldir, x), envir=env)
@@ -226,6 +226,7 @@ test.data.table = function(script="tests.Rraw", verbose=FALSE, pkg=".", silent=F
   # does show the full file output these days, so the 13 line limit no longer bites so much. It still bit recently
   # when receiving output of R CMD check sent over email, though.
   tz = Sys.getenv("TZ", unset=NA)
+  # notranslate start
   cat("\n", date(),   # so we can tell exactly when these tests ran on CRAN to double-check the result is up to date
     "  endian==", .Platform$endian,
     ", sizeof(long double)==", .Machine$sizeof.longdouble,
@@ -239,6 +240,7 @@ test.data.table = function(script="tests.Rraw", verbose=FALSE, pkg=".", silent=F
     ", .libPaths()==", paste0("'", .libPaths(), "'", collapse = ","),
     ", ", .Call(Cdt_zlib_version),
     "\n", sep="")
+  # notranslate end
 
   if (inherits(err,"try-error")) {
     # nocov start
@@ -303,7 +305,7 @@ compactprint = function(DT, topn=2L) {
     print(copy(DT)[,(cn):="",verbose=FALSE], topn=topn, class=FALSE)
   } else {
     print(DT, class=FALSE)  # "Empty data.table (0 rows) of <ncol> columns ...
-    if (ncol(DT)) cat(cn,"\n")
+    if (ncol(DT)) cat(cn,"\n") # notranslate
   }
   invisible()
 }
@@ -376,14 +378,15 @@ test = function(num,x,y=TRUE,error=NULL,warning=NULL,message=NULL,output=NULL,no
          if (memtest==1L) gc()  # see #5515 for before/after
          inum = as.integer(num)
          timings[inum, RSS:=max(rss(),RSS), verbose=FALSE]  # TODO prefix inum with .. for clarity when that works
-         if (length(memtest.id) && memtest.id[1L]<=inum && inum<=memtest.id[2L]) cat(rss(),"\n") # after 'testing id ...' output; not using between() as it has verbose output when getOption(datatable.verbose)
+         if (length(memtest.id) && memtest.id[1L]<=inum && inum<=memtest.id[2L]) cat(rss(),"\n") # notranslate. after 'testing id ...' output; not using between() as it has verbose output when getOption(datatable.verbose)
          if (memtest==2L) gc()
        }
        assign("lasttime", proc.time()[3L], parent.frame(), inherits=TRUE)  # after gc() to exclude gc() time from next test when memtest
     }, add=TRUE )
-    if (showProgress)
-      # \r can't be in gettextf msg
-      cat("\rRunning test id", numStr, "         ")   # nocov.
+    if (showProgress) {
+      cat("\r") # notranslate: \r can't be in gettextf msg
+      catf("Running test id", numStr, "         ")   # nocov.
+    }
     # See PR #4090 for comments about change here in Dec 2019.
     # If a segfault error occurs in future and we'd like to know after which test, then arrange for the
     # try(sys.source()) in test.data.table() to be run in a separate R process. That process could write out
@@ -534,15 +537,15 @@ test = function(num,x,y=TRUE,error=NULL,warning=NULL,message=NULL,output=NULL,no
     if (!fail) {
       catf("Test %s ran without errors but failed check that x equals y:\n", numStr)
       failPrint = function(x, xsub) {
-        cat(">", substitute(x), "=", xsub, "\n")
+        cat(">", substitute(x), "=", xsub, "\n") # notranslate
         if (is.data.table(x)) compactprint(x) else {
           nn = length(x)
           catf("First %d of %d (type '%s'): \n", min(nn, 6L), length(x), typeof(x))
           # head.matrix doesn't restrict columns
           if (length(d <- dim(x))) do.call(`[`, c(list(x, drop = FALSE), lapply(pmin(d, 6L), seq_len)))
           else print(head(x))
           if (typeof(x) == 'character' && anyNonAscii(x)) {
-            cat("Non-ASCII string detected, raw representation:\n")
+            catf("Non-ASCII string detected, raw representation:\n")
             print(lapply(head(x), charToRaw))
           }
         }