Skip to content

Commit 862f51a

Browse files
committed
include opennlp lang model in tika-eval during assembly
convert paths to OS-independent paths in unit tests add headless in tests
1 parent 2d73e91 commit 862f51a

File tree

4 files changed

+51
-16
lines changed

4 files changed

+51
-16
lines changed

assembly.xml

+2-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,8 @@
2626
<excludes>
2727
<exclude>**/target/**</exclude>
2828
<exclude>**/.*/**</exclude>
29-
<exclude>**/opennlp/*.bin</exclude>
29+
<exclude>**/opennlp/ner-*.bin</exclude>
30+
<exclude>**/opennlp/en-*.bin</exclude>
3031
<exclude>**/recognition/*.bin</exclude>
3132
<exclude>**/*.releaseBackup</exclude>
3233
</excludes>

tika-app/src/test/java/org/apache/tika/cli/TikaCLIBatchCommandLineTest.java

+2-3
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131

3232
import org.apache.commons.io.FileUtils;
3333
import org.apache.commons.io.IOUtils;
34+
import org.apache.tika.utils.ProcessUtils;
3435
import org.junit.After;
3536
import org.junit.Before;
3637
import org.junit.Test;
@@ -176,9 +177,7 @@ public void testOneDirOneFileException() throws Exception {
176177
boolean ex = false;
177178
try {
178179
String path = testFile.toAbsolutePath().toString();
179-
if (path.contains(" ")) {
180-
path = "\"" + path + "\"";
181-
}
180+
path = ProcessUtils.escapeCommandLine(path);
182181
String[] params = {testInputPathForCommandLine, path};
183182

184183
String[] commandLine = BatchCommandLineBuilder.build(params);

tika-batch/src/test/java/org/apache/tika/batch/fs/FSBatchTestBase.java

+2
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,7 @@ public ProcessBuilder getNewBatchRunnerProcess(String testConfig, String loggerP
170170
private String[] commandLine(String testConfig, String loggerProps, String[] args) {
171171
List<String> commandLine = new ArrayList<>();
172172
commandLine.add("java");
173+
commandLine.add("-Djava.awt.headless=true");
173174
commandLine.add("-Dlog4j.configuration=file:"+
174175
this.getClass().getResource(loggerProps).getFile());
175176
commandLine.add("-Xmx128m");
@@ -200,6 +201,7 @@ public BatchProcessDriverCLI getNewDriver(String testConfig,
200201
String[] args) throws Exception {
201202
List<String> commandLine = new ArrayList<>();
202203
commandLine.add("java");
204+
commandLine.add("-Djava.awt.headless=true");
203205
commandLine.add("-Xmx128m");
204206
commandLine.add("-cp");
205207
String cp = System.getProperty("java.class.path");

tika-eval/src/test/java/org/apache/tika/eval/TikaEvalCLITest.java

+45-12
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535

3636
import org.apache.commons.io.FileUtils;
3737
import org.apache.tika.TikaTest;
38+
import org.apache.tika.utils.ProcessUtils;
3839
import org.junit.AfterClass;
3940
import org.junit.BeforeClass;
4041
import org.junit.Ignore;
@@ -123,9 +124,14 @@ private static void compare() throws IOException {
123124
List<String> args = new ArrayList<>();
124125
args.add("Compare");
125126
args.add("-extractsA");
126-
args.add(extractsDir.resolve("extractsA").toAbsolutePath().toString());
127+
args.add(
128+
ProcessUtils.escapeCommandLine(
129+
extractsDir.resolve("extractsA")
130+
.toAbsolutePath().toString()));
127131
args.add("-extractsB");
128-
args.add(extractsDir.resolve("extractsB").toAbsolutePath().toString());
132+
args.add(ProcessUtils.escapeCommandLine(
133+
extractsDir.resolve("extractsB")
134+
.toAbsolutePath().toString()));
129135
//add these just to confirm this info doesn't cause problems w cli
130136
args.add("-maxTokens");
131137
args.add("10000000");
@@ -135,7 +141,10 @@ private static void compare() throws IOException {
135141
args.add("100000");
136142

137143
args.add("-db");
138-
args.add(compareDBDir.toAbsolutePath().toString()+"/"+dbName);
144+
args.add(
145+
ProcessUtils.escapeCommandLine(
146+
compareDBDir
147+
.toAbsolutePath().toString()+"/"+dbName));
139148

140149
execute(args, 60000);
141150

@@ -145,7 +154,9 @@ private static void profile() throws IOException {
145154
List<String> args = new ArrayList<>();
146155
args.add("Profile");
147156
args.add("-extracts");
148-
args.add(extractsDir.resolve("extractsA").toAbsolutePath().toString());
157+
args.add(ProcessUtils.escapeCommandLine(
158+
extractsDir.resolve("extractsA")
159+
.toAbsolutePath().toString()));
149160
//add these just to confirm this info doesn't cause problems w cli
150161
args.add("-maxTokens");
151162
args.add("10000000");
@@ -155,27 +166,39 @@ private static void profile() throws IOException {
155166
args.add("100000");
156167

157168
args.add("-db");
158-
args.add(profileDBDir.toAbsolutePath().toString()+"/"+dbName);
169+
args.add(
170+
ProcessUtils.escapeCommandLine(
171+
profileDBDir
172+
.toAbsolutePath().toString()+"/"+dbName));
159173
execute(args, 60000);
160174
}
161175

162176
private static void reportProfile() throws IOException {
163177
List<String> args = new ArrayList<>();
164178
args.add("Report");
165179
args.add("-db");
166-
args.add(profileDBDir.toAbsolutePath().toString()+"/"+dbName);
180+
args.add(
181+
ProcessUtils.escapeCommandLine(
182+
profileDBDir.toAbsolutePath()
183+
.toString()+"/"+dbName));
167184
args.add("-rd");
168-
args.add(profileReportsDir.toAbsolutePath().toString());
185+
args.add(
186+
ProcessUtils.escapeCommandLine(
187+
profileReportsDir.toAbsolutePath().toString()));
169188
execute(args, 60000);
170189
}
171190

172191
private static void reportCompare() throws IOException {
173192
List<String> args = new ArrayList<>();
174193
args.add("Report");
175194
args.add("-db");
176-
args.add(compareDBDir.toAbsolutePath().toString()+"/"+dbName);
195+
args.add(
196+
ProcessUtils.escapeCommandLine(
197+
compareDBDir.toAbsolutePath().toString()+"/"+dbName));
177198
args.add("-rd");
178-
args.add(compareReportsDir.toAbsolutePath().toString());
199+
args.add(
200+
ProcessUtils.escapeCommandLine(
201+
compareReportsDir.toAbsolutePath().toString()));
179202
execute(args, 60000);
180203
}
181204

@@ -186,11 +209,20 @@ public void testOneOff() throws Exception {
186209
List<String> args = new ArrayList<>();
187210
args.add("Compare");
188211
args.add("-extractsA");
189-
args.add(extractsDir.resolve("extractsA").toAbsolutePath().toString());
212+
args.add(
213+
ProcessUtils.escapeCommandLine(
214+
extractsDir.resolve("extractsA")
215+
.toAbsolutePath().toString()));
190216
args.add("-extractsB");
191-
args.add(extractsDir.resolve("extractsB").toAbsolutePath().toString());
217+
args.add(
218+
ProcessUtils.escapeCommandLine(
219+
extractsDir.resolve("extractsB")
220+
.toAbsolutePath().toString()));
192221
args.add("-db");
193-
args.add(compareDBDir.toAbsolutePath().toString()+"/"+dbName);
222+
args.add(
223+
ProcessUtils.escapeCommandLine(
224+
compareDBDir.toAbsolutePath()
225+
.toString()+"/"+dbName));
194226

195227
execute(args, 60000);
196228
// args.add("-drop");
@@ -202,6 +234,7 @@ private static void execute(List<String> incomingArgs, long maxMillis) throws IO
202234
List<String> args = new ArrayList<>();
203235
String cp = System.getProperty("java.class.path");
204236
args.add("java");
237+
args.add("-Djava.awt.headless=true");
205238
args.add("-cp");
206239
args.add(cp);
207240
args.add("org.apache.tika.eval.TikaEvalCLI");

0 commit comments

Comments
 (0)