Skip to content

Commit ab31b3c

Browse files
committed
* added jUnit tests to check for uncicode NUL character, and accompanying test file
* currently fails this test
1 parent 2ddf27d commit ab31b3c

File tree

3 files changed

+121
-0
lines changed

3 files changed

+121
-0
lines changed

pom.xml

+18
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,24 @@
3434
<type>jar</type>
3535
<scope>compile</scope>
3636
</dependency>
37+
<dependency>
38+
<groupId>junit</groupId>
39+
<artifactId>junit</artifactId>
40+
<version>4.10</version>
41+
<scope>test</scope>
42+
</dependency>
43+
<dependency>
44+
<groupId>org.slf4j</groupId>
45+
<artifactId>slf4j-api</artifactId>
46+
<version>1.7.5</version>
47+
<scope>test</scope>
48+
</dependency>
49+
<dependency>
50+
<groupId>org.slf4j</groupId>
51+
<artifactId>slf4j-simple</artifactId>
52+
<version>1.7.5</version>
53+
<scope>test</scope>
54+
</dependency>
3755
</dependencies>
3856

3957
<build>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
package org.w3c.tidy.tests;
2+
3+
4+
import java.io.ByteArrayOutputStream;
5+
import java.io.InputStream;
6+
import org.junit.Assert;
7+
import static org.junit.Assert.assertNotNull;
8+
import org.junit.Before;
9+
import org.junit.Test;
10+
import org.slf4j.Logger;
11+
import org.slf4j.LoggerFactory;
12+
import org.w3c.tidy.Tidy;
13+
import org.w3c.tidy.TidyMessage;
14+
import org.w3c.tidy.TidyMessageListener;
15+
16+
/**
17+
* tests if a file is read and parsed correctly
18+
* @author Willie Scholtz
19+
*/
20+
public class WordCharacterTest {
21+
22+
private static final Logger LOG = LoggerFactory.getLogger(WordCharacterTest.class);
23+
private static final String TEST_FILE_NAME = "word-chars-test.html";
24+
25+
private Tidy tidy;
26+
27+
@Before
28+
public void setUp() {
29+
this.tidy = new Tidy();
30+
31+
this.tidy.setTidyMark(false);
32+
this.tidy.setXHTML(true);
33+
34+
// ensure input & output encodings are set
35+
this.tidy.setInputEncoding("UTF-8");
36+
this.tidy.setOutputEncoding("UTF-8");
37+
38+
// debugging opts
39+
this.tidy.setShowWarnings(false);
40+
this.tidy.setQuiet(true);
41+
42+
// ensure doctype
43+
this.tidy.setDocType("auto");
44+
45+
//tidy.setShowWarnings(true);
46+
this.tidy.setSmartIndent(true);
47+
this.tidy.setMessageListener(new TidyMessageListener() {
48+
@Override
49+
public void messageReceived(TidyMessage message) {
50+
LOG.info("level={}, line={}, column={}, message={}",
51+
message.getLevel(), message.getLine(),
52+
message.getColumn(), message.getMessage());
53+
}
54+
});
55+
}
56+
57+
@Test
58+
public void testFileExists() {
59+
assertNotNull("Test file missing", this.getClass()
60+
.getResource("/" + TEST_FILE_NAME));
61+
}
62+
63+
@Test
64+
public void testParse() {
65+
ByteArrayOutputStream baos = null;
66+
InputStream is = null;
67+
try {
68+
is = this.getClass().getResourceAsStream("/" + TEST_FILE_NAME);
69+
baos = new ByteArrayOutputStream();
70+
71+
this.tidy.parse(is, baos);
72+
baos.flush();
73+
74+
final String result = baos.toString("UTF-8");
75+
76+
LOG.info("got parsed result[{}]", result);
77+
78+
assertNotNull("failed to parse test file["
79+
+ TEST_FILE_NAME + "]", result != null);
80+
81+
// look for unicode NUL within the result, this makes DOM parsers fail
82+
final char NUL = 0x0;
83+
84+
Assert.assertFalse("the parsed string contains unicode NUL["
85+
+ NUL + "], DOM parsers will fail!", result.indexOf(NUL) > -1);
86+
} catch (Exception e) {
87+
LOG.info("error while tidy-ing html!", e);
88+
} finally {
89+
if (is != null) {
90+
try {
91+
is.close();
92+
} catch (Exception e) {}
93+
}
94+
95+
if (baos != null) {
96+
try {
97+
baos.close();
98+
} catch (Exception e) {}
99+
}
100+
}
101+
}
102+
}

src/test/resources/word-chars-test.html

+1
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)