Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions lib/LaTeXML/Core/Token.pm
Original file line number Diff line number Diff line change
Expand Up @@ -99,19 +99,19 @@ sub Token {
return bless [$string, (defined $cc ? $cc : CC_OTHER)], 'LaTeXML::Core::Token'; }

# Explode a string into a list of tokens, all w/catcode OTHER (except space).
# Note: convert \n to OTHER (NOT SPACE); ^^J generally should decode to Omega
sub Explode {
my ($string) = @_;
return (defined $string
? map { ($_ eq ' ' ? T_SPACE() : ($_ eq "\n" ? Token($_, CC_SPACE)
: T_OTHER($_))) } split('', $string)
? map { ($_ eq ' ' ? T_SPACE() : T_OTHER($_)) } split('', $string)
: ()); }

# Similar to Explode, but convert letters to catcode LETTER and others to OTHER
# Hopefully, this is essentially correct WITHOUT resorting to catcode lookup?
sub ExplodeText {
my ($string) = @_;
return (defined $string
? map { ($_ eq ' ' ? T_SPACE() : ($_ eq "\n" ? Token($_, CC_SPACE) : (/[a-zA-Z]/ ? T_LETTER($_) : T_OTHER($_)))) }
? map { ($_ eq ' ' ? T_SPACE() : (/[a-zA-Z]/ ? T_LETTER($_) : T_OTHER($_))) }
split('', $string)
: ()); }

Expand Down
19 changes: 16 additions & 3 deletions lib/LaTeXML/Engine/TeX_Character.pool.ltxml
Original file line number Diff line number Diff line change
Expand Up @@ -198,12 +198,25 @@ sub escapechar {
return (($code >= 0) && ($code <= 255) ? chr($code) : ''); }

# 1) Knuth, The TeXBook, page 40, paragraph 1, Chapter 7: How TEX Reads What You Type.
# suggests all characters except spaces are returned in category code Other, i.e. Explode()
# suggests all characters except spaces are returned in category code Other,
# i.e. like Explode(), but take care to distinguish T_SPACE(\n) and T_OTHER(\n)!
# the latter likely gets decoded to Omega
DefMacro('\string Token', sub {
my $s = $_[1]->toString;
my($gullet, $token) = @_;
my $cc = $token->getCatcode;
if($cc == CC_CS){
my $s = $token->toString;
if ($s =~ s/^\\//) {
$s = escapechar() . $s; }
Explode($s); });
return Explode($s); }
elsif($cc == CC_SPACE){
return ($token); }
elsif(($cc == CC_ESCAPE) || ($cc == CC_COMMENT) || ($cc == CC_INVALID)){
return; }
else { # All other catcodes
return T_OTHER($token->toString); }
});

#======================================================================
# Character properties
#----------------------------------------------------------------------
Expand Down
4 changes: 1 addition & 3 deletions lib/LaTeXML/Engine/TeX_Debugging.pool.ltxml
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ DefMacro('\meaning Token', sub {
# The actual tests start here
if ($type =~ /token$/i) {
my $cc = $definition->getCatcode;
my $char = $definition->toString;
my $char = ($cc == CC_SPACE ? ' ' : $definition->toString); # Normalize \n!
my $meaning_cc = $CATCODE_MEANING[$cc] || '';
$meaning_cc .= ' ' if $meaning_cc; # append space separator if defined
$meaning = $meaning_cc . $char; }
Expand Down Expand Up @@ -166,8 +166,6 @@ DefMacro('\meaning Token', sub {
elsif ($type =~ /chardef$/i) { # from \chardef or \mathchardef
my $prefix = ($$definition{mathglyph} ? '\mathchar' : '\char');
$meaning = $prefix . '"' . $definition->valueOf->valueOf; } }
# One catch: make sure all \s in the meaning string are normalized to a simple space ' '
$meaning =~ s/\s/ /g;
return Explode($meaning); });

#======================================================================
Expand Down
Binary file modified t/expansion/meaning.pdf
Binary file not shown.
103 changes: 49 additions & 54 deletions t/expansion/meaning.tex
Original file line number Diff line number Diff line change
@@ -1,68 +1,63 @@
\documentclass{article}
\usepackage[T1]{fontenc}
\def\tester#1{%
\par\noindent%
[\string #1]%
[\meaning #1]%
[\expandafter\meaning\string #1]%
[\detokenize{#1}]%
.}
\def\xtester#1{%
\expandafter\tester\expandafter{#1}}

\begin{document}

\def\A#1{saaa#1}
\def\Aa#1{#1saaa}
\def\Aaa#1{sa#1aa}
\def\b{bb}
\def\c a#1b#2c{one#1c#2}
\edef\foo{%
\csname A\endcsname%
\csname expl:n\endcsname%
\csname B\endcsname%
\csname *\endcsname%
\csname C\endcsname}

\meaning A

\meaning_

\meaning%
\meaning\

\meaning{

%\meaning ~
\meaning}

\tester{$}
\tester{&}
\tester{
}
\tester{#}
\tester{^}
\tester{_}
\tester{ }
\tester{A}
\tester{@}
{\catcode`\@=\active\let@\A
\meaning @}

\meaning ^

\meaning *

\meaning (

\meaning \

\meaning 1

\meaning !

\meaning @

\meaning #

\meaning &

\meaning \A

\meaning \Aa

\meaning \Aaa

\meaning \b


\meaning \c

\meaning \bla

\meaning {

\meaning }

\meaning $

\meaning\foo

\tester{@}}
\tester{*}
\tester{(}
\tester{1}
\tester{!}

\tester{\A}
\tester{\Aa}
\tester{\Aaa}
\tester{\b}
\tester{\c}
\tester{\bla}

\tester{^^J}
{ \catcode`\^^J=\active
\tester{^^J} }
{ \catcode`\Z=9\relax
\tester{Z} }

\def\aspace{ }
\def\carriagereturn{
}
\def\linefeed{^^J}
\xtester{\aspace}
\xtester{\carriagereturn}
\xtester{\linefeed}
\end{document}
107 changes: 63 additions & 44 deletions t/expansion/meaning.xml
Original file line number Diff line number Diff line change
@@ -1,77 +1,96 @@
<?xml version="1.0" encoding="UTF-8"?>
<?latexml class="article"?>
<?latexml package="fontenc" options="T1"?>
<?latexml RelaxNGSchema="LaTeXML"?>
<document xmlns="http://dlmf.nist.gov/LaTeXML">
<resource src="LaTeXML.css" type="text/css"/>
<resource src="ltx-article.css" type="text/css"/>
<para xml:id="p1">
<p>the letter A</p>
<p>macro:-¿“</p>
</para>
<para xml:id="p2">
<p>subscript character _</p>
<p>begin-group character </p>
</para>
<para xml:id="p3">
<p>\par</p>
<p>end-group character ˝</p>
</para>
<para xml:id="p4">
<p>macro:#1-&gt;saaa#1</p>
<para class="ltx_noindent" xml:id="p4">
<p>[$][math shift character $][the character $][$].</p>
</para>
<para xml:id="p5">
<p>superscript character ˆ</p>
<para class="ltx_noindent" xml:id="p5">
<p>[&amp;][alignment tab character &amp;][the character &amp;][&amp;].</p>
</para>
<para xml:id="p6">
<p>the character *</p>
<para class="ltx_noindent" xml:id="p6">
<p>[
][blank space ][blank space ][ ].</p>
</para>
<para xml:id="p7">
<p>the character (</p>
<para class="ltx_noindent" xml:id="p7">
<p>[#][macro parameter character #][the character #][##].</p>
</para>
<para xml:id="p8">
<p>macro:-&gt;\</p>
<para class="ltx_noindent" xml:id="p8">
<p>[ˆ][superscript character ˆ][the character ˆ][ˆ].</p>
</para>
<para xml:id="p9">
<p>the character 1</p>
<para class="ltx_noindent" xml:id="p9">
<p>[˙][subscript character ˙][the character ˙][˙].</p>
</para>
<para xml:id="p10">
<p>the character !</p>
<para class="ltx_noindent" xml:id="p10">
<p>[ ][blank space ][blank space ][ ].</p>
</para>
<para xml:id="p11">
<p>the character @</p>
<para class="ltx_noindent" xml:id="p11">
<p>[A][the letter A][the character A][A].</p>
</para>
<para xml:id="p12">
<p>macro parameter character #</p>
<para class="ltx_noindent" xml:id="p12">
<p>[@][the character @][the character @][@].</p>
</para>
<para xml:id="p13">
<p>alignment tab character &amp;</p>
<para class="ltx_noindent" xml:id="p13">
<p>[@][macro:#1-¿saaa#1][the character @][@].</p>
</para>
<para xml:id="p14">
<p>macro:#1-&gt;saaa#1</p>
<para class="ltx_noindent" xml:id="p14">
<p>[*][the character *][the character *][*].</p>
</para>
<para xml:id="p15">
<p>macro:#1-&gt;#1saaa</p>
<para class="ltx_noindent" xml:id="p15">
<p>[(][the character (][the character (][(].</p>
</para>
<para xml:id="p16">
<p>macro:#1-&gt;sa#1aa</p>
<para class="ltx_noindent" xml:id="p16">
<p>[1][the character 1][the character 1][1].</p>
</para>
<para xml:id="p17">
<p>macro:-&gt;bb</p>
<para class="ltx_noindent" xml:id="p17">
<p>[!][the character !][the character !][!].</p>
</para>
<para xml:id="p18">
<p>macro:a#1b#2c-&gt;one#1c#2</p>
<para class="ltx_noindent" xml:id="p18">
<p>[“A][macro:#1-¿saaa#1][the character “A][“A ].</p>
</para>
<para xml:id="p19">
<p>undefined</p>
<para class="ltx_noindent" xml:id="p19">
<p>[“Aa][macro:#1-¿#1saaa][the character “Aa][“Aa ].</p>
</para>
<para xml:id="p20">
<p>begin-group character {</p>
<para class="ltx_noindent" xml:id="p20">
<p>[“Aaa][macro:#1-¿sa#1aa][the character “Aaa][“Aaa ].</p>
</para>
<para xml:id="p21">
<p>end-group character }</p>
<para class="ltx_noindent" xml:id="p21">
<p>[“b][macro:-¿bb][the character “b][“b ].</p>
</para>
<para xml:id="p22">
<p>math shift character $</p>
<para class="ltx_noindent" xml:id="p22">
<p>[“c][macro:a#1b#2c-¿one#1c#2][the character “c][“c ].</p>
</para>
<para xml:id="p23">
<p>macro:-&gt;saaa\expl:n \B \*\C</p>
<para class="ltx_noindent" xml:id="p23">
<p>[“bla][undefined][the character “bla][“bla ].</p>
</para>
<para class="ltx_noindent" xml:id="p24">
<p>[Ω][the character Ω][the character Ω][Ω].</p>
</para>
<para class="ltx_noindent" xml:id="p25">
<p>[Ω][undefined][the character Ω][Ω].</p>
</para>
<para class="ltx_noindent" xml:id="p26">
<p>[][the character ][the character ][].</p>
</para>
<para class="ltx_noindent" xml:id="p27">
<p>[ ][blank space ][blank space ][ ].</p>
</para>
<para class="ltx_noindent" xml:id="p28">
<p>[
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

probably not essential, but this newline char in the XML for the p28 test is a regular space in the PDF output.

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The intention was that they should have an equivalent effect in the xml/html, which seems to be the case.

I'd wondered whether the one's in blank space \n should also keep the newline, rather than normalize to space, but that would have required a lot of changes & complication in \meaning to keep the right newlines and decode the wrong ones, so I opted to revert Explode.

][blank space ][blank space ][ ].</p>
</para>
<para class="ltx_noindent" xml:id="p29">
<p>[Ω][the character Ω][the character Ω][Ω].</p>
</para>
</document>