@@ -2,7 +2,6 @@ unit module Text::Utils;
22
33use Text::Utils::Vars;
44use Text::Utils::Subs;
5- use Text::Utils::TaggedSubs : ALL;
65
76use Font::AFM;
87
@@ -266,6 +265,131 @@ sub commify(
266265
267266} # commify
268267
268+ # -----------------------------------------------------------------------
269+ # | Purpose : Trim a string and collapse multiple whitespace characters
270+ # | to single ones
271+ # | Params : The string to be normalized
272+ # | Returns : The normalized string
273+
274+ # = keep or normalize
275+ # subset Kn of Any is export where { $_ ~~ /^ :i [0|k|n] /};
276+ subset Kn of Any where * ~~ /^ : i [0 | k| n] / ;
277+ # = collapse all contiguous ws
278+ # subset Sn of Any is export where { $_ ~~ /^ :i [0|n|s|t] /};
279+ subset Sn of Any where * ~~ /^ :i [0| n| s| t] /;
280+
281+ # per lizmat, 2024-04-26
282+ our & normalize-text is export (: normalize-text) = & normalize-string ;
283+ sub normalize-string (
284+ Str : D $ str is copy ,
285+ Kn : t(: $ tabs )= 0 , # = keep or normalize
286+ Kn : n(: $ newlines )= 0 , # = keep or normalize
287+ Sn : c(: $ collapse-ws-to )= 0 , # = collapse all contiguous ws
288+ # = to one char
289+ : $ no-trim , # = do not trim the input string
290+ --> Str
291+ ) is export (: normalize-string) {
292+ # default is to always trim first, but to do so we must save the
293+ # original leading and trailing spaces
294+ my ($ pre-ws , $ post-ws );
295+ if $ no-trim . defined {
296+ if $ str ~~ /^ (\s + ) / {
297+ $ pre-ws = ~ $0 ;
298+ }
299+ if $ str ~~ / (\s + ) $ / {
300+ $ post-ws = ~ $0 ;
301+ }
302+ $ str .= trim ;
303+ }
304+ else {
305+ $ str .= trim ;
306+ }
307+
308+ # then normalize all space characters
309+ $ str ~~ s :g / $ WS ** 2.. * /$ WS / ;
310+
311+ # then check for exceptions before normalizing all whitespace
312+
313+ # convenience aliases
314+ my $ t = $ tabs ;
315+ my $ c = $ collapse-ws-to ;
316+ my $ n = $ newlines ;
317+
318+ if $ collapse-ws-to {
319+ if $ c ~~ /^ :i s / {
320+ # collapse all to a single space
321+ $ str ~~ s :g / $ NL /$ WS / ;
322+ $ str ~~ s :g / $ TAB /$ WS / ;
323+ $ str ~~ s :g / $ WS ** 2.. * /$ WS / ;
324+ }
325+ elsif $ c ~~ /^ :i t / {
326+ # collapse all to a single tab
327+ $ str ~~ s :g / $ WS /$ TAB / ;
328+ $ str ~~ s :g / $ NL /$ TAB / ;
329+ $ str ~~ s :g / $ TAB ** 2.. * /$ TAB / ;
330+ }
331+ elsif $ c ~~ /^ :i n / {
332+ # collapse all to a single newline
333+ $ str ~~ s :g / $ WS /$ NL / ;
334+ $ str ~~ s :g / $ TAB /$ NL / ;
335+ $ str ~~ s :g / $ NL ** 2.. * /$ NL / ;
336+ }
337+ }
338+ elsif $ newlines and $ tabs {
339+ if $ t ~~ /^ :i k / {
340+ ; # ok, a no-op
341+ }
342+ elsif $ t ~~ /^ :i n / {
343+ $ str ~~ s :g / $ TAB ** 2.. * /$ TAB / ;
344+ }
345+ if $ n ~~ /^ :i k / {
346+ ; # ok, a no-op
347+ }
348+ elsif $ n ~~ /^ :i n / {
349+ $ str ~~ s :g / $ NL ** 2.. * /$ NL / ;
350+ }
351+ }
352+ elsif $ tabs {
353+ if $ t ~~ /^ :i k / {
354+ ; # ok, a no-op
355+ }
356+ elsif $ t ~~ /^ :i n / {
357+ $ str ~~ s :g / $ TAB ** 2.. * /$ TAB / ;
358+ }
359+ }
360+ elsif $ newlines {
361+ if $ n ~~ /^ :i k / {
362+ ; # ok, a no-op
363+ }
364+ elsif $ n ~~ /^ :i n / {
365+ $ str ~~ s :g / $ NL ** 2.. * /$ NL / ;
366+ }
367+ }
368+ else {
369+ $ str .= trim ;
370+ $ str ~~ s :g / \s ** 2.. * /$ WS / ;
371+ }
372+
373+ = begin comment
374+ else {
375+ #$str .= trim;
376+ # this also takes care of tabs and newlines
377+ $str ~~ s:g/ \s ** 2..*/ /;
378+ }
379+ = end comment
380+
381+ if $ no-trim . defined {
382+ # add back any original leading or trailing spaces
383+ if $ pre-ws {
384+ $ str = $ pre-ws ~ $ str ;
385+ }
386+ if $ post-ws {
387+ $ str = $ str ~ $ post-ws ;
388+ }
389+ }
390+ $ str ;
391+ } # normalize-string
392+
269393# -----------------------------------------------------------------------
270394# | Purpose : Wrap a list of words into a paragraph with a maximum line
271395# | width (default: 78) and update the input list with the
0 commit comments