@@ -146,6 +146,38 @@ public static String addJSinline(String script) {
146146 return "<script>" + script + "</script>" ;
147147 }
148148
149+ /** Convert HTML & codes to text format. */
150+ private static String convertAmpCodes (String html ) {
151+ StringBuilder txt = new StringBuilder ();
152+ int html_len = html .length ();
153+ int html_off = 0 ;
154+ while (html_off < html_len ) {
155+ int i1 = html .indexOf ('&' , html_off );
156+ if (i1 == -1 ) {
157+ txt .append (html .substring (html_off , html_len ));
158+ html_off = html_len ;
159+ } else {
160+ if (i1 > 0 ) {
161+ txt .append (html .substring (html_off , i1 ));
162+ }
163+ int i2 = html .indexOf (';' , i1 );
164+ if (i2 == -1 ) {
165+ JFLog .log ("HTML.toText() : amp code left open" );
166+ break ;
167+ } else {
168+ String tag = html .substring (i1 + 1 , i2 );
169+ switch (tag ) {
170+ case "amp" : txt .append ("&" ); break ;
171+ case "lt" : txt .append ("<" ); break ;
172+ case "gt" : txt .append (">" ); break ;
173+ }
174+ html_off = i2 + 1 ;
175+ }
176+ }
177+ }
178+ return txt .toString ();
179+ }
180+
149181 /** Converts HTML to text/plain. */
150182 public static String toText (String html ) {
151183 StringBuilder txt = new StringBuilder ();
@@ -156,12 +188,13 @@ public static String toText(String html) {
156188 int i1 = html .indexOf ('<' , html_off );
157189 if (debug ) JFLog .log ("i1=" + i1 );
158190 if (i1 == -1 ) {
159- txt .append (html .substring (html_off , html_len - html_off ));
191+ if (debug ) JFLog .log ("substring=" + html_off + "," + html_len );
192+ txt .append (convertAmpCodes (html .substring (html_off , html_len )));
160193 html_off = html_len ;
161194 } else {
162195 if (i1 > 0 ) {
163196 if (debug ) JFLog .log ("substring=" + html_off + "," + i1 );
164- txt .append (html .substring (html_off , i1 ));
197+ txt .append (convertAmpCodes ( html .substring (html_off , i1 ) ));
165198 }
166199 int i2 = html .indexOf ('>' , i1 );
167200 if (debug ) JFLog .log ("i2=" + i2 );
@@ -181,7 +214,7 @@ public static String toText(String html) {
181214 }
182215
183216 public static void main (String [] args ) {
184- String html = "<h1>This is HTML</h1><br>Converted to text!<br>" ;
217+ String html = "<h1>This is HTML</h1><br>Converted to text!<br>Here are some amp codes & < > " ;
185218 System .out .println (html );
186219 String txt = toText (html );
187220 System .out .println (txt );
0 commit comments