Skip to content

Commit 698d84b

Browse files
committed
Add a prop file for a newer version of the French WikiNER dataset
1 parent 766013c commit 698d84b

File tree

2 files changed

+51
-1
lines changed

2 files changed

+51
-1
lines changed

Diff for: scripts/ner/Makefile

+6-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11

2-
all: chinese genia german hungarian italian all.3class nowiki.3class conll.4class muc.7class spanish
2+
all: chinese french genia german hungarian italian all.3class nowiki.3class conll.4class muc.7class spanish
33

44
chinese: chinese.misc.nodistsim.ser.gz chinese.misc.distsim.ser.gz
55

@@ -9,6 +9,11 @@ chinese.misc.nodistsim.ser.gz:
99
chinese.misc.distsim.ser.gz:
1010
java -mx15g edu.stanford.nlp.ie.crf.CRFClassifier -prop chinese.misc.distsim.prop > chinese.misc.distsim.out 2>&1
1111

12+
french: french-wikiner-4class.crf.ser.gz
13+
14+
french-wikiner-4class.crf.ser.gz:
15+
java -mx15g edu.stanford.nlp.ie.crf.CRFClassifier -prop french.wikiner.nodistsim.4class.prop > french.wikiner.nodistsim.out 2>&1
16+
1217
genia: genia-nlpba-2004.crf.gz
1318

1419
genia-nlpba-2004.crf.gz:

Diff for: scripts/ner/french.wikiner.nodistsim.4class.prop

+45
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
trainFileList = /home/john/stanza/data/ner/fr_wikinergold.train.bioes
2+
testFiles = /home/john/stanza/data/ner/fr_wikinergold.dev.bioes
3+
serializeTo = french-wikiner-4class.crf.ser.gz
4+
5+
useDistSim = false
6+
7+
map = word=0,answer=1
8+
9+
useTitle = true
10+
useClassFeature=true
11+
useWord=true
12+
useNGrams=true
13+
noMidNGrams=true
14+
usePrev=true
15+
useNext=true
16+
useLongSequences=true
17+
useSequences=true
18+
usePrevSequences=true
19+
maxLeft=1
20+
useTypeSeqs=true
21+
useTypeSeqs2=true
22+
useTypeySequences=true
23+
useOccurrencePatterns=true
24+
useLastRealWord=true
25+
useNextRealWord=true
26+
normalize=true
27+
wordShape=dan2uselC
28+
useDisjunctive=true
29+
disjunctionWidth=5
30+
#useDisjunctiveShapeInteraction=true
31+
32+
type=crf
33+
34+
saveFeatureIndexToDisk = true
35+
36+
readerAndWriter=edu.stanford.nlp.sequences.ColumnDocumentReaderAndWriter
37+
38+
useObservedSequencesOnly=true
39+
40+
sigma = 1
41+
useQN = true
42+
QNsize = 25
43+
44+
# makes it go faster
45+
featureDiffThresh=0.05

0 commit comments

Comments
 (0)