Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Resolves "Reviewer feedback for 2.0.0 RC1" #283

Merged
merged 7 commits into from
Nov 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 0 additions & 25 deletions dkpro-jwpl-api/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -182,13 +182,6 @@
<include>**/*</include>
</includes>
</resource>
<resource>
<directory>src/it/resources</directory>
<filtering>true</filtering>
<includes>
<include>**/*</include>
</includes>
</resource>
</resources>
<plugins>
<plugin>
Expand Down Expand Up @@ -270,24 +263,6 @@
</execution>
</executions>
</plugin>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<executions>
<execution>
<id>create-executable-jar</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
<configuration>
<attach>false</attach>
<descriptorRefs>
<descriptor>jar-with-dependencies</descriptor>
</descriptorRefs>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
import java.util.Set;

import org.dkpro.jwpl.api.exception.WikiApiException;
import org.dkpro.jwpl.util.GraphUtilities;
import org.dkpro.jwpl.api.util.GraphUtilities;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,10 @@
import org.dkpro.jwpl.api.exception.WikiApiException;
import org.dkpro.jwpl.api.exception.WikiPageNotFoundException;
import org.dkpro.jwpl.api.exception.WikiTitleParsingException;
import org.dkpro.jwpl.api.util.ApiUtilities;
import org.dkpro.jwpl.api.util.CommonUtilities;
import org.dkpro.jwpl.api.util.GraphSerialization;
import org.dkpro.jwpl.util.ApiUtilities;
import org.dkpro.jwpl.util.CommonUtilities;
import org.dkpro.jwpl.util.OS;
import org.dkpro.jwpl.api.util.OS;
import org.jgrapht.GraphPath;
import org.jgrapht.alg.connectivity.ConnectivityInspector;
import org.jgrapht.alg.shortestpath.DijkstraShortestPath;
Expand Down
10 changes: 5 additions & 5 deletions dkpro-jwpl-api/src/main/java/org/dkpro/jwpl/api/Page.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,14 @@
import org.dkpro.jwpl.api.exception.WikiTitleParsingException;
import org.dkpro.jwpl.api.hibernate.PageDAO;
import org.dkpro.jwpl.api.sweble.PlainTextConverter;
import org.dkpro.jwpl.util.UnmodifiableArraySet;
import org.dkpro.jwpl.api.util.UnmodifiableArraySet;
import org.dkpro.jwpl.shade.org.sweble.wikitext.engine.PageId;
import org.dkpro.jwpl.shade.org.sweble.wikitext.engine.PageTitle;
import org.dkpro.jwpl.shade.org.sweble.wikitext.engine.WtEngineImpl;
import org.dkpro.jwpl.shade.org.sweble.wikitext.engine.nodes.EngProcessedPage;
import org.hibernate.LockOptions;
import org.hibernate.Session;
import org.hibernate.type.StandardBasicTypes;
import org.sweble.wikitext.engine.PageId;
import org.sweble.wikitext.engine.PageTitle;
import org.sweble.wikitext.engine.WtEngineImpl;
import org.sweble.wikitext.engine.nodes.EngProcessedPage;

import de.fau.cs.osr.ptk.common.AstVisitor;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@

import org.dkpro.jwpl.api.exception.WikiApiException;
import org.dkpro.jwpl.api.exception.WikiPageNotFoundException;
import org.dkpro.jwpl.util.ApiUtilities;
import org.dkpro.jwpl.util.StringUtils;
import org.dkpro.jwpl.api.util.ApiUtilities;
import org.dkpro.jwpl.api.util.StringUtils;
import org.hibernate.Session;
import org.hibernate.query.Query;
import org.slf4j.Logger;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@

import javax.xml.parsers.ParserConfigurationException;

import org.sweble.wikitext.engine.config.WikiConfig;
import org.sweble.wikitext.engine.utils.DefaultConfigEnWp;
import org.sweble.wikitext.engine.utils.LanguageConfigGenerator;
import org.dkpro.jwpl.shade.org.sweble.wikitext.engine.config.WikiConfig;
import org.dkpro.jwpl.shade.org.sweble.wikitext.engine.utils.DefaultConfigEnWp;
import org.dkpro.jwpl.shade.org.sweble.wikitext.engine.utils.LanguageConfigGenerator;
import org.xml.sax.SAXException;

import com.neovisionaries.i18n.LanguageCode;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,13 @@
import org.dkpro.jwpl.api.exception.WikiPageNotFoundException;
import org.dkpro.jwpl.api.exception.WikiTitleParsingException;
import org.dkpro.jwpl.api.hibernate.WikiHibernateUtil;
import org.dkpro.jwpl.util.distance.LevenshteinStringDistance;
import org.dkpro.jwpl.api.util.distance.LevenshteinStringDistance;
import org.dkpro.jwpl.shade.org.sweble.wikitext.engine.config.WikiConfig;
import org.hibernate.Session;
import org.hibernate.query.NativeQuery;
import org.hibernate.type.StandardBasicTypes;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.sweble.wikitext.engine.config.WikiConfig;

/**
* Provides access to Wikipedia articles and categories.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@

import org.dkpro.jwpl.api.exception.WikiApiException;
import org.dkpro.jwpl.api.exception.WikiPageNotFoundException;
import org.dkpro.jwpl.util.ApiUtilities;
import org.dkpro.jwpl.api.util.ApiUtilities;
import org.hibernate.Session;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,11 @@
import java.util.List;
import java.util.regex.Pattern;

import org.dkpro.jwpl.shade.org.sweble.wikitext.engine.PageTitle;
import org.dkpro.jwpl.shade.org.sweble.wikitext.engine.config.WikiConfig;
import org.dkpro.jwpl.shade.org.sweble.wikitext.engine.utils.DefaultConfigEnWp;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.sweble.wikitext.engine.PageTitle;
import org.sweble.wikitext.engine.config.WikiConfig;
import org.sweble.wikitext.engine.utils.DefaultConfigEnWp;
import org.sweble.wikitext.parser.nodes.WtBold;
import org.sweble.wikitext.parser.nodes.WtExternalLink;
import org.sweble.wikitext.parser.nodes.WtHorizontalRule;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@
import java.util.LinkedList;
import java.util.List;

import org.sweble.wikitext.engine.config.WikiConfig;
import org.sweble.wikitext.engine.utils.DefaultConfigEnWp;
import org.dkpro.jwpl.shade.org.sweble.wikitext.engine.config.WikiConfig;
import org.dkpro.jwpl.shade.org.sweble.wikitext.engine.utils.DefaultConfigEnWp;
import org.sweble.wikitext.parser.nodes.WtNode;
import org.sweble.wikitext.parser.nodes.WtTemplate;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.dkpro.jwpl.util;
package org.dkpro.jwpl.api.util;

import java.lang.invoke.MethodHandles;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.dkpro.jwpl.util;
package org.dkpro.jwpl.api.util;

import java.util.Arrays;
import java.util.Map;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.dkpro.jwpl.util;
package org.dkpro.jwpl.api.util;

import java.lang.invoke.MethodHandles;
import java.sql.Connection;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.dkpro.jwpl.util;
package org.dkpro.jwpl.api.util;

import java.lang.invoke.MethodHandles;
import java.util.HashSet;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.dkpro.jwpl.util;
package org.dkpro.jwpl.api.util;

import java.util.HashMap;
import java.util.Map;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.dkpro.jwpl.util;
package org.dkpro.jwpl.api.util;

public class OS
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.dkpro.jwpl.util;
package org.dkpro.jwpl.api.util;

import java.util.Collection;
import java.util.Iterator;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.dkpro.jwpl.util;
package org.dkpro.jwpl.api.util;

import java.util.Arrays;
import java.util.Collection;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.dkpro.jwpl.util.distance;
package org.dkpro.jwpl.api.util.distance;

public class LevenshteinStringDistance
implements StringDistance
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.dkpro.jwpl.util.distance;
package org.dkpro.jwpl.api.util.distance;

public interface StringDistance
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@

import static org.junit.jupiter.api.Assertions.assertSame;

import org.dkpro.jwpl.shade.org.sweble.wikitext.engine.config.WikiConfig;
import org.junit.jupiter.api.Test;
import org.sweble.wikitext.engine.config.WikiConfig;

public class WikiConfigTest
{
Expand Down
8 changes: 8 additions & 0 deletions dkpro-jwpl-deps/dkpro-jwpl-swc-engine-shade/NOTICE.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
All classes from this module are taken from third parties.

The classes were published by the Sweble project provided
via https://github.com/sweble/sweble-wikitext
by the Open Source Research Group, University of Erlangen-Nürnberg

under the Apache License, Version 2.0
(http://www.apache.org/licenses/LICENSE-2.0)
5 changes: 4 additions & 1 deletion dkpro-jwpl-deps/dkpro-jwpl-swc-engine-shade/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,10 @@
<pattern>javax.xml.bind</pattern>
<shadedPattern>jakarta.xml.bind</shadedPattern>
</relocation>
<relocation>
<pattern>org.sweble.wikitext.engine</pattern>
<shadedPattern>org.dkpro.jwpl.shade.org.sweble.wikitext.engine</shadedPattern>
</relocation>
</relocations>
</configuration>
</execution>
Expand All @@ -146,7 +150,6 @@
-->
<groupId>org.codehaus.mojo</groupId>
<artifactId>build-helper-maven-plugin</artifactId>
<version>3.4.0</version>
<executions>
<execution>
<id>workaround-makeItVisibleOnIntellij</id>
Expand Down
80 changes: 68 additions & 12 deletions dkpro-jwpl-revisionmachine/README
Original file line number Diff line number Diff line change
@@ -1,17 +1,73 @@
Known Issues
# Config Examples

Please also see the issues on Google Code
http://code.google.com/p/jwpl/issues/list
## Diff Tool Config

```xml
<config>
<values>
<VALUE_MINIMUM_LONGEST_COMMON_SUBSTRING>12</VALUE_MINIMUM_LONGEST_COMMON_SUBSTRING>
<COUNTER_FULL_REVISION>1000</COUNTER_FULL_REVISION>
</values>
<externals>
<sevenzip>"D:\Programme\Utilities\7-Zip\7z.exe"</sevenzip>
</externals>
<input>
<MODE_SURROGATES>DISCARD_REVISION</MODE_SURROGATES>
<WIKIPEDIA_ENCODING>UTF-8</WIKIPEDIA_ENCODING>
<archive>
<type>SEVENZIP</type>
<path>"D:\simplewiki.7z"</path>
<start>0</start>
</archive>
</input>
<output>
<OUTPUT_MODE>SQL</OUTPUT_MODE>
<PATH>""</PATH>
<LIMIT_SQL_FILE_SIZE>1000000000</LIMIT_SQL_FILE_SIZE>
<MODE_ZIP_COMPRESSION_ENABLED>true</MODE_ZIP_COMPRESSION_ENABLED>
</output>
<cache>
<LIMIT_TASK_SIZE_REVISIONS>5000000</LIMIT_TASK_SIZE_REVISIONS>
<LIMIT_TASK_SIZE_DIFFS>1000000</LIMIT_TASK_SIZE_DIFFS>
<LIMIT_SQLSERVER_MAX_ALLOWED_PACKET>1000000</LIMIT_SQLSERVER_MAX_ALLOWED_PACKET>
</cache>
<logging>
<root_folder>""</root_folder>
<diff_tool>
<level>INFO</level>
</diff_tool>
</logging>
<debug>
<verification_diff>false</verification_diff>
<verification_encoding>false</verification_encoding>
<statistical_output>false</statistical_output>
<debug_output>
<enabled>false</enabled>
<path>""</path>
</debug_output>
</debug>
<filter>
<namespaces>
<!-- White list of namespaces; if nothing here then all namespaces are allowed -->
<ns>0</ns>
<ns>1</ns>
</namespaces>
</filter>
</config>

I)
Handling of surrogate characters
```

There are 4 possible modes of handling UTF8 surrogate characters.
Currently, the only reliable mode is "Discard Revision", in which any revision that contains surrogate characters is discarded.
The other three modes in "org.dkpro.jwpl.revisionmachine.difftool.data.SurrogateModes" have been disabled for now.
The corresponding config-section in the config tool has also been made invisible (org.dkpro.jwpl.revisionmachine.difftool.config.gui.panels.InputPanel)
The disabled parts are marked with TODO-markers
## Index Generator Config

In order to use the other three surrogate modes, which try to handle surrogate characters differently,
the corresponding code has to be checked. Afterwards, the modes can be reenables in the config tool (InputPanel.java) and the SurrogateModes-class
```
host=localhost
db=wiki_en_20110405_rev
user=student
password=student
output=target
outputDatabase=false
outputDatafile=false
charset=UTF8
buffer=15000
maxAllowedPackets=16760832
```
Loading