Skip to content

Commit a46edc0

Browse files
committed
streamlined some constructors and file paths
1 parent b2f196b commit a46edc0

10 files changed

+186
-28
lines changed

.gitignore

+7-1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,12 @@ buildNumber.properties
1111
# Avoid ignoring Maven wrapper jar file (.jar files are usually ignored)
1212
!/.mvn/wrapper/maven-wrapper.jar
1313

14+
# Eclipse
15+
.classpath
16+
.project
17+
.settings/
18+
1419
# Ignore output files of data import and model training
1520
data/opennlp_*
16-
data/mallet_*
21+
data/mallet_*
22+
data/corenlp_*

data/intentCompactTest.txt

+17
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
Show me the way from Dover to Eastborne --- # --- DIRECTIONS
2+
Need a plane from FRA to DUS --- # --- DIRECTIONS
3+
I'd like to visit New York --- # --- DIRECTIONS
4+
What's the weather like? --- # --- WEATHER
5+
How is the weather in LA? --- # --- WEATHER
6+
Give me the weather forecast? --- # --- WEATHER
7+
Will it snow? --- # --- WEATHER
8+
Alarm for 8 o'clock --- # --- ALARM
9+
Set an alarm to ten o'clock --- # --- ALARM
10+
Wake me up at in six hours --- # --- ALARM
11+
Set a timer for six minutes --- # --- TIMER
12+
Timer for three hours --- # --- TIMER
13+
Show me news --- # --- NEWS
14+
Show me science news --- # --- NEWS
15+
What is going on? --- # --- NEWS
16+
How are you? --- # --- OTHER
17+
Can you hear me? --- # --- OTHER

data/intentCompactTrain.txt

+84
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
Way from Paris to Marseille please --- O O LOC_START O LOC_END O --- DIRECTIONS
2+
Way from NY to SF --- O O LOC_START O LOC_END --- DIRECTIONS
3+
Way from London to Liverpool --- O O LOC_START O LOC_END --- DIRECTIONS
4+
Way from LA to SF please --- O O LOC_START O LOC_END O --- DIRECTIONS
5+
Way from LA to SF --- O O LOC_START O LOC_END --- DIRECTIONS
6+
Way from DUS to LAX --- O O LOC_START O LOC_END --- DIRECTIONS
7+
Way from C to D --- O O LOC_START O LOC_END --- DIRECTIONS
8+
Way from Bruxelles to Bonn --- O O LOC_START O LOC_END --- DIRECTIONS
9+
Way from BB to AA please --- O O LOC_START O LOC_END O --- DIRECTIONS
10+
Way from B to A please --- O O LOC_START O LOC_END O --- DIRECTIONS
11+
Way from B to A --- O O LOC_START O LOC_END --- DIRECTIONS
12+
Way from A to B --- O O LOC_START O LOC_END --- DIRECTIONS
13+
The way from New York to Jersey City --- O O O LOC_START LOC_START O LOC_END LOC_END --- DIRECTIONS
14+
The way from Los Angeles to San Diego --- O O O LOC_START LOC_START O LOC_END LOC_END --- DIRECTIONS
15+
The way from Las Vegas to Jersey City --- O O O LOC_START LOC_START O LOC_END LOC_END --- DIRECTIONS
16+
The way from Jersey City to New York --- O O O LOC_START LOC_START O LOC_END LOC_END --- DIRECTIONS
17+
Show me the way from Frankfurt to Nürnberg --- O O O O O LOC_START O LOC_END --- DIRECTIONS
18+
Show me the way from Berlin to Munich --- O O O O O LOC_START O LOC_END --- DIRECTIONS
19+
Show me the road to Rio de Janeiro --- O O O O O LOC_END LOC_END LOC_END --- DIRECTIONS
20+
Need to go from SF to NY --- O O O O LOC_START O LOC_END --- DIRECTIONS
21+
Need to go from NY to SF --- O O O O LOC_START O LOC_END --- DIRECTIONS
22+
Need a train from SF to LA please --- O O O O LOC_START O LOC_END O --- DIRECTIONS
23+
Need a train from SF to LA please --- O O O O LOC_START O LOC_END O --- DIRECTIONS
24+
Need a plane from LAX to JFK --- O O O O LOC_START O LOC_END --- DIRECTIONS
25+
Need a plane from DUS to FRA --- O O O O LOC_START O LOC_END --- DIRECTIONS
26+
I'd like to visit the US --- O O O O O LOC_END --- DIRECTIONS
27+
I'd like to visit the Azores --- O O O O O LOC_END --- DIRECTIONS
28+
I want to visit the Statue of Liberty --- O O O O O LOC_END LOC_END LOC_END --- DIRECTIONS
29+
I want to visit the Louvre in Paris --- O O O O O LOC_END LOC_END LOC_END --- DIRECTIONS
30+
I wanna go to Washington --- O O O O LOC_END --- DIRECTIONS
31+
I wanna go to California --- O O O O LOC_END --- DIRECTIONS
32+
I need to get from SF to NY --- O O O O O LOC_START O LOC_END --- DIRECTIONS
33+
I need to get from Rome to Venice --- O O O O O LOC_START O LOC_END --- DIRECTIONS
34+
Do you know the way from SF to LA? --- O O O O O O LOC_START O LOC_END --- DIRECTIONS
35+
Do you know the way from SF to LA? --- O O O O O O LOC_START O LOC_END --- DIRECTIONS
36+
How is the weather? --- # --- WEATHER
37+
How is the weather in New York? --- # --- WEATHER
38+
What's the weather in California? --- # --- WEATHER
39+
Do you know the weather? --- # --- WEATHER
40+
Will it rain today? --- # --- WEATHER
41+
Will it be sunny today? --- # --- WEATHER
42+
Do I need an umbrella today? --- # --- WEATHER
43+
What's the weather tomorrow? --- # --- WEATHER
44+
Is it going to be cold tomorrow? --- # --- WEATHER
45+
Show me the weather please? --- # --- WEATHER
46+
What's the temperature? --- # --- WEATHER
47+
What is the temperature in Berlin? --- # --- WEATHER
48+
Set an alarm for 8 o'clock --- # --- ALARM
49+
Set an alarm for seven o'clock --- # --- ALARM
50+
Set an alarm to six o'clock --- # --- ALARM
51+
Set an alarm --- # --- ALARM
52+
Alarm to nine o'clock --- # --- ALARM
53+
Alarm to 9 o'clock --- # --- ALARM
54+
Alarm --- # --- ALARM
55+
Wake me up at 10 o'clock please --- # --- ALARM
56+
Wake me up in 9 hours please --- # --- ALARM
57+
I need to get up at 6 --- # --- ALARM
58+
I need to wake up at 7 --- # --- ALARM
59+
Set the alarm clock for 10 am --- # --- ALARM
60+
Set a timer for 10 minutes --- # --- TIMER
61+
Set a timer for 5 minutes --- # --- TIMER
62+
Set a timer --- # --- TIMER
63+
Timer --- # --- TIMER
64+
Timer for twenty minutes please --- # --- TIMER
65+
Timer for one hour please --- # --- TIMER
66+
Countdown for 30 seconds --- # --- TIMER
67+
Count to 30 --- # --- TIMER
68+
Show me the news please --- # --- NEWS
69+
I'd like to see the news --- # --- NEWS
70+
What's new? --- # --- NEWS
71+
What's on the news --- # --- NEWS
72+
Show me the news of the day --- # --- NEWS
73+
Show me sports news --- # --- NEWS
74+
Show me tech news --- # --- NEWS
75+
What is going on in the world? --- # --- NEWS
76+
What's happening in the world of entertainment? --- # --- NEWS
77+
Hello --- # --- OTHER
78+
How are you? --- # --- OTHER
79+
What's up? --- # --- OTHER
80+
What is happening? --- # --- OTHER
81+
What's your name? --- # --- OTHER
82+
What are you called? --- # --- OTHER
83+
Can you help me? --- # --- OTHER
84+
You are cool --- # --- OTHER

data/nerCompactTest.txt

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
Way from Rio to Tokio --- O O LOC_START O LOC_END
2+
Show me the way from Essen to Bochum --- O O O O O LOC_START O LOC_END
3+
Let's go to Amsterdam --- O O O LOC_END
4+
Let's fly to Brasil --- O O O LOC_END
5+
Let's drive from Potsdam to Kopenhagen --- O O O LOC_START O LOC_END
6+
I'm thinking about a trip to Las Vegas --- O O O O O O LOC_END LOC_END
7+
I'm planning to drive from Barcelona to Granada --- O O O O O LOC_START O LOC_END
8+
I'm looking for flights from SFO to ORD --- O O O O O LOC_START O LOC_END
9+
I want to walk from Santiago de Compostela to Porto --- O O O O O LOC_START LOC_START LOC_START O LOC_END
10+
I want to visit Chicago --- O O O O LOC_END
11+
I want to go to the Statue of Liberty --- O O O O O O LOC_END LOC_END LOC_END
12+
I want to go to Santiago de Compostela --- O O O O O LOC_END LOC_END LOC_END
13+
I wanna visit Vienna --- O O O LOC_END
14+
I need to go to Westminster Abbey --- O O O O O LOC_END LOC_END
15+
I need a plane from Düsseldorf to Machester --- O O O O O LOC_START O LOC_END
16+
Check some flights to Buenos Aires --- O O O O LOC_END LOC_END

data/nerCompactTrain.txt

+35
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
Way from Paris to Marseille please --- O O LOC_START O LOC_END O
2+
Way from NY to SF --- O O LOC_START O LOC_END
3+
Way from London to Liverpool --- O O LOC_START O LOC_END
4+
Way from LA to SF please --- O O LOC_START O LOC_END O
5+
Way from LA to SF --- O O LOC_START O LOC_END
6+
Way from DUS to LAX --- O O LOC_START O LOC_END
7+
Way from C to D --- O O LOC_START O LOC_END
8+
Way from Bruxelles to Bonn --- O O LOC_START O LOC_END
9+
Way from BB to AA please --- O O LOC_START O LOC_END O
10+
Way from B to A please --- O O LOC_START O LOC_END O
11+
Way from B to A --- O O LOC_START O LOC_END
12+
Way from A to B --- O O LOC_START O LOC_END
13+
The way from New York to Jersey City --- O O O LOC_START LOC_START O LOC_END LOC_END
14+
The way from Los Angeles to San Diego --- O O O LOC_START LOC_START O LOC_END LOC_END
15+
The way from Las Vegas to Jersey City --- O O O LOC_START LOC_START O LOC_END LOC_END
16+
The way from Jersey City to New York --- O O O LOC_START LOC_START O LOC_END LOC_END
17+
Show me the way from Frankfurt to Nürnberg --- O O O O O LOC_START O LOC_END
18+
Show me the way from Berlin to Munich --- O O O O O LOC_START O LOC_END
19+
Show me the road to Rio de Janeiro --- O O O O O LOC_END LOC_END LOC_END
20+
Need to go from SF to NY --- O O O O LOC_START O LOC_END
21+
Need to go from NY to SF --- O O O O LOC_START O LOC_END
22+
Need a train from SF to LA please --- O O O O LOC_START O LOC_END O
23+
Need a train from SF to LA please --- O O O O LOC_START O LOC_END O
24+
Need a plane from LAX to JFK --- O O O O LOC_START O LOC_END
25+
Need a plane from DUS to FRA --- O O O O LOC_START O LOC_END
26+
I'd like to visit the US --- O O O O O LOC_END
27+
I'd like to visit the Azores --- O O O O O LOC_END
28+
I want to visit the Statue of Liberty --- O O O O O LOC_END LOC_END LOC_END
29+
I want to visit the Louvre in Paris --- O O O O O LOC_END LOC_END LOC_END
30+
I wanna go to Washington --- O O O O LOC_END
31+
I wanna go to California --- O O O O LOC_END
32+
I need to get from SF to NY --- O O O O O LOC_START O LOC_END
33+
I need to get from Rome to Venice --- O O O O O LOC_START O LOC_END
34+
Do you know the way from SF to LA? --- O O O O O O LOC_START O LOC_END
35+
Do you know the way from SF to LA? --- O O O O O O LOC_START O LOC_END

pom.xml

+4-3
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
22
<modelVersion>4.0.0</modelVersion>
33
<groupId>net.b07z.sepia.nlu</groupId>
4-
<artifactId>sepia-mallet-test</artifactId>
5-
<version>0.0.1-SNAPSHOT</version>
6-
<name>SEPIA MALLET test</name>
4+
<artifactId>java-nlu-tools</artifactId>
5+
<version>0.8.0</version>
6+
<name>Java NLU tools</name>
7+
<description>NLU tools for SEPIA made with Java</description>
78

89
<properties>
910
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>

src/main/java/net/b07z/sepia/nlu/classifiers/MalletNerClassifier.java

+6-2
Original file line numberDiff line numberDiff line change
@@ -32,17 +32,21 @@ public class MalletNerClassifier implements NerClassifier{
3232

3333
private CRF crfModel;
3434
private Tokenizer tokenizer;
35+
private String languageCode;
3536

3637
private int nBest = 3; //How many answers to output
3738
private int cacheSize = 100000; //How much state information to memoize in n-best decoding
3839

3940
/**
4041
* Create NER classifier with model and tokenizer.
41-
* @param model
42+
* @param modelFileBase
4243
* @param tokenizer
44+
* @param languageCode
4345
*/
44-
public MalletNerClassifier(String modelFile, Tokenizer tokenizer) throws Exception {
46+
public MalletNerClassifier(String modelFileBase, Tokenizer tokenizer, String languageCode) throws Exception {
47+
this.languageCode = languageCode;
4548
//load model
49+
String modelFile = modelFileBase + "_" + this.languageCode;
4650
try (ObjectInputStream s = new ObjectInputStream(new FileInputStream(modelFile))){
4751
this.crfModel = (CRF) s.readObject();
4852
}catch(Exception e){

src/main/java/net/b07z/sepia/nlu/classifiers/OpenNlpIntentClassifier.java

-4
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,10 @@
22

33
import java.io.FileInputStream;
44
import java.io.InputStream;
5-
import java.util.ArrayList;
65
import java.util.Collection;
76
import java.util.List;
87
import java.util.Map;
98
import java.util.Map.Entry;
10-
import java.util.Set;
11-
import java.util.SortedMap;
12-
139
import net.b07z.sepia.nlu.tokenizers.Tokenizer;
1410
import opennlp.tools.doccat.DoccatModel;
1511
import opennlp.tools.doccat.DocumentCategorizerME;

src/main/java/net/b07z/sepia/nlu/examples/MalletNerDemo.java

+8-13
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,10 @@ public static void main(String[] args) throws Exception {
2929
String nerCompactTrainDataFile = "data/nerCompactTrain.txt";
3030
String nerCompactTestDataFile = "data/nerCompactTest.txt";
3131
String nerTrainerPropertiesFile = null; //"data/malletner.properties";
32-
String nerModelFile = "data/mallet_model_ner_en";
33-
String nerTrainFile = "data/mallet_train_ner_en";
32+
String nerModelFileBase = "data/mallet_model_ner";
33+
String nerTrainFileBase = "data/mallet_train_ner";
34+
35+
String languageCode = "en";
3436

3537
//Create training data from compact custom format
3638
Collection<CompactDataEntry> trainData = CustomDataHandler.importCompactData(nerCompactTrainDataFile);
@@ -41,14 +43,15 @@ public static void main(String[] args) throws Exception {
4143
//Tokenizer tokenizer = new RealLifeChatTokenizer();
4244
CompactDataHandler cdh = new MalletDataHandler();
4345
List<String> trainDataLines = cdh.importTrainDataNer(trainData, tokenizer, false, null);
44-
CustomDataHandler.writeTrainData(nerTrainFile, trainDataLines);
46+
String trainFile = nerTrainFileBase + "_" + languageCode;
47+
CustomDataHandler.writeTrainData(trainFile, trainDataLines);
4548

4649
//Train
47-
NerTrainer trainer = new MalletNerTrainer(nerTrainerPropertiesFile, nerTrainFile, nerModelFile);
50+
NerTrainer trainer = new MalletNerTrainer(nerTrainerPropertiesFile, nerTrainFileBase, nerModelFileBase, languageCode);
4851
trainer.train();
4952

5053
//Test
51-
NerClassifier ner = new MalletNerClassifier(nerModelFile, tokenizer);
54+
NerClassifier ner = new MalletNerClassifier(nerModelFileBase, tokenizer, languageCode);
5255
int good = 0;
5356
int bad = 0;
5457
for (CompactDataEntry cde : testData){
@@ -71,14 +74,6 @@ public static void main(String[] args) throws Exception {
7174
}
7275
System.out.println("Good: " + good + ", bad: " + bad + ", prec.: " + ((double)good/(good+bad)));
7376
System.out.println("Took: " + (System.currentTimeMillis() - tic) + "ms");
74-
/*
75-
System.out.println(ner.analyzeSentence("Show me the way from Essen to Bochum"));
76-
System.out.println(ner.analyzeSentence("Show me the way from LA to SF"));
77-
System.out.println(ner.analyzeSentence("I want to go to the Statue of Liberty"));
78-
System.out.println(ner.getEntities("I need to go to Westminster Abbey"));
79-
System.out.println(ner.getEntities("I'm looking for flights from SFO to ORD"));
80-
System.out.println(ner.getEntities("I want to visit Chicago"));
81-
*/
8277
}
8378

8479
}

src/main/java/net/b07z/sepia/nlu/trainers/MalletNerTrainer.java

+9-5
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ public class MalletNerTrainer implements NerTrainer {
3737
private String modelFile;
3838
private Reader trainingFile;
3939
private CRF crfModel;
40+
private String languageCode;
4041

4142
private String defaultLabel = "O"; //Label for initial context and uninteresting tokens
4243
private double gaussianVariance = 10.0; //The gaussian prior variance used for training
@@ -52,13 +53,16 @@ public class MalletNerTrainer implements NerTrainer {
5253
/**
5354
* Setup NER classifier with properties file and training data.
5455
* @param propertiesFile
55-
* @param trainDataFile
56-
* @param modelOutputFile
56+
* @param trainDataFileBase
57+
* @param modelOutputFileBase
58+
* @param languageCode
5759
* @throws Exception
5860
*/
59-
public MalletNerTrainer(String propertiesFile, String trainDataFile, String modelOutputFile) throws Exception{
60-
modelFile = modelOutputFile;
61-
trainingFile = new FileReader(new File(trainDataFile));
61+
public MalletNerTrainer(String propertiesFile, String trainDataFileBase, String modelOutputFileBase, String languageCode) throws Exception{
62+
this.languageCode = languageCode;
63+
this.modelFile = modelOutputFileBase + "_" + this.languageCode;
64+
String trainDataFile = trainDataFileBase + "_" + this.languageCode;
65+
this.trainingFile = new FileReader(new File(trainDataFile));
6266

6367
if (propertiesFile != null && !propertiesFile.isEmpty()){
6468
props = CustomDataHandler.loadProperties(propertiesFile);

0 commit comments

Comments
 (0)