Skip to content

Commit 6848265

Browse files
committed
HTML.toText() add support for & codes
1 parent fefe009 commit 6848265

1 file changed

Lines changed: 36 additions & 3 deletions

File tree

src/javaforce/HTML.java

Lines changed: 36 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,38 @@ public static String addJSinline(String script) {
146146
return "<script>" + script + "</script>";
147147
}
148148

149+
/** Convert HTML &amp; codes to text format. */
150+
private static String convertAmpCodes(String html) {
151+
StringBuilder txt = new StringBuilder();
152+
int html_len = html.length();
153+
int html_off = 0;
154+
while (html_off < html_len) {
155+
int i1 = html.indexOf('&', html_off);
156+
if (i1 == -1) {
157+
txt.append(html.substring(html_off, html_len));
158+
html_off = html_len;
159+
} else {
160+
if (i1 > 0) {
161+
txt.append(html.substring(html_off, i1));
162+
}
163+
int i2 = html.indexOf(';', i1);
164+
if (i2 == -1) {
165+
JFLog.log("HTML.toText() : amp code left open");
166+
break;
167+
} else {
168+
String tag = html.substring(i1 + 1, i2);
169+
switch (tag) {
170+
case "amp": txt.append("&"); break;
171+
case "lt": txt.append("<"); break;
172+
case "gt": txt.append(">"); break;
173+
}
174+
html_off = i2 + 1;
175+
}
176+
}
177+
}
178+
return txt.toString();
179+
}
180+
149181
/** Converts HTML to text/plain. */
150182
public static String toText(String html) {
151183
StringBuilder txt = new StringBuilder();
@@ -156,12 +188,13 @@ public static String toText(String html) {
156188
int i1 = html.indexOf('<', html_off);
157189
if (debug) JFLog.log("i1=" + i1);
158190
if (i1 == -1) {
159-
txt.append(html.substring(html_off, html_len - html_off));
191+
if (debug) JFLog.log("substring=" + html_off + "," + html_len);
192+
txt.append(convertAmpCodes(html.substring(html_off, html_len)));
160193
html_off = html_len;
161194
} else {
162195
if (i1 > 0) {
163196
if (debug) JFLog.log("substring=" + html_off + "," + i1);
164-
txt.append(html.substring(html_off, i1));
197+
txt.append(convertAmpCodes(html.substring(html_off, i1)));
165198
}
166199
int i2 = html.indexOf('>', i1);
167200
if (debug) JFLog.log("i2=" + i2);
@@ -181,7 +214,7 @@ public static String toText(String html) {
181214
}
182215

183216
public static void main(String[] args) {
184-
String html = "<h1>This is HTML</h1><br>Converted to text!<br>";
217+
String html = "<h1>This is HTML</h1><br>Converted to text!<br>Here are some amp codes &amp; &lt; &gt;";
185218
System.out.println(html);
186219
String txt = toText(html);
187220
System.out.println(txt);

0 commit comments

Comments
 (0)