Skip to content

Commit

Permalink
Convert to JUnit5. URN tests
Browse files Browse the repository at this point in the history
  • Loading branch information
afs committed Oct 7, 2024
1 parent 9fdea69 commit 4f4c5e9
Show file tree
Hide file tree
Showing 34 changed files with 674 additions and 420 deletions.
7 changes: 7 additions & 0 deletions iri4ld/src/main/java/org/seaborne/rfc3986/Builder.java
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@ public class Builder{
private String fragment = null;
/*package*/ Builder() {}

static class IRIBuildException extends RuntimeException {
public IRIBuildException(String message) {super(message); }
}

public String scheme() {
return scheme;
}
Expand All @@ -46,6 +50,7 @@ public String authority() {
return authority;
}

/** Set the "auhtority" part of an IRI - this clear previous host and port settings" */
public Builder authority(String authority) {
this.authority = authority;
this.host = null;
Expand All @@ -68,6 +73,8 @@ public int port() {
}

public Builder port(int port) {
if ( port < 0 )
throw new IRIBuildException("port is less than zero");
this.authority = null;
this.port = port;
return this;
Expand Down
4 changes: 4 additions & 0 deletions iri4ld/src/main/java/org/seaborne/rfc3986/Chars3986.java
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,10 @@ public static boolean isAlpha(char ch) {
return range(ch, 'a', 'z') || range(ch, 'A', 'Z');
}

public static boolean isAlphaNum(char ch) {
return isAlpha(ch) || isDigit(ch);
}

/** RFC3987: International alphabetic. */
public static boolean isIAlpha(char ch) {
return isAlpha(ch) || isUcsChar(ch);
Expand Down
211 changes: 116 additions & 95 deletions iri4ld/src/main/java/org/seaborne/rfc3986/IRI3986.java

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,6 @@ public class IRIParseException extends RuntimeException {
// Must gave a message.
public IRIParseException(String message) {super(message); }

// Where in the parser, the exception comes from is not relevant.
// Where in a parser, the exception comes from is not relevant.
@Override public Throwable fillInStackTrace() { return this ; }
}
1 change: 0 additions & 1 deletion iri4ld/src/main/java/org/seaborne/rfc3986/Issue.java
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ public enum Issue {
http_port_not_advised,
http_omit_well_known_port,
// urn:uuid and uuid
urn_uuid_bad_pattern,
uuid_bad_pattern,
uuid_has_query,
uuid_has_fragment,
Expand Down
12 changes: 11 additions & 1 deletion iri4ld/src/main/java/org/seaborne/rfc3986/LibParseIRI.java
Original file line number Diff line number Diff line change
Expand Up @@ -62,12 +62,22 @@ public static char charAt(CharSequence string, int x) {
return string.charAt(x);
}

// Copied from jena-base to make this package dependency-free.
/** Hex digits : upper case **/
final private static char[] hexDigitsUC = {
'0' , '1' , '2' , '3' , '4' , '5' , '6' , '7' , '8' ,
'9' , 'A' , 'B' , 'C' , 'D' , 'E' , 'F' } ;

/** Case insensitive test of whether a string has a prefix. */
static boolean caseInsensitivePrefix(String string, String prefix) {
return caseInsensitiveRegion(string, 0 , prefix);
}

/** Case insensitive test of whether a string has a prefix. */
static boolean caseInsensitiveRegion(String string, int idx, String substr) {
return string.regionMatches(true, idx, substr, 0, substr.length());
}


/* package */ static void encodeAsHex(StringBuilder buff, char marker, char ch) {
if ( ch < 256 ) {
buff.append(marker);
Expand Down
9 changes: 2 additions & 7 deletions iri4ld/src/main/java/org/seaborne/rfc3986/ParseDID.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@

package org.seaborne.rfc3986;

import static org.seaborne.rfc3986.Chars3986.charAt;

import java.util.regex.Pattern;

public class ParseDID {
Expand Down Expand Up @@ -135,11 +137,4 @@ static class DIDParseException extends IRIParseException {
private static void error(String didString, String msg) {
throw new DIDParseException(didString, msg);
}

/** String.charAt except with an EOF character, not an exception. */
private static char charAt(String str, int x) {
if ( x >= str.length() )
return EOF;
return str.charAt(x);
}
}
225 changes: 225 additions & 0 deletions iri4ld/src/main/java/org/seaborne/rfc3986/ParseURN.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,225 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.seaborne.rfc3986;

import static org.seaborne.rfc3986.Chars3986.EOF;
import static org.seaborne.rfc3986.Chars3986.charAt;
import static org.seaborne.rfc3986.LibParseIRI.caseInsensitivePrefix;

import java.util.function.BiConsumer;
import java.util.regex.Pattern;

public class ParseURN {
// RFC 8141
// @formatter:off
/*
* namestring = assigned-name
* [ rq-components ]
* [ "#" f-component ]
* assigned-name = "urn" ":" NID ":" NSS
* NID = (alphanum) 0*30(ldh) (alphanum)
* ldh = alphanum / "-"
* NSS = pchar *(pchar / "/")
* rq-components = [ "?+" r-component ]
* [ "?=" q-component ]
* r-component = pchar *( pchar / "/" / "?" )
* q-component = pchar *( pchar / "/" / "?" )
* f-component = fragment
*/
/*
* InformalNamespaceName = "urn-" Number
* Number = DigitNonZero 0*Digit
* DigitNonZero = "1"/ "2" / "3" / "4"/ "5"
* / "6" / "7" / "8" / "9"
* Digit = "0" / DigitNonZero
*/
/*
* alphanum, fragment, and pchar from RFC 3986
*
// pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
alphanum ALPHA / DIGIT
fragment = *( pchar / "/" / "?" )
pct-encoded = "%" HEXDIG HEXDIG
unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
reserved = gen-delims / sub-delims
gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
/ "*" / "+" / "," / ";" / "="
*/
// @formatter:on

public record AssignedName(String scheme, String NID, String NSS) {}
public static class URNParseException extends IRIParseException {
URNParseException(String entity, String msg) { super(entity, msg); }
}

// V1 - regex bases
private static Pattern URN_NAMESPACE = Pattern.compile("^urn:[0-9-a-f]{2,31}:", Pattern.CASE_INSENSITIVE);

/**
* Check the scheme and the namespace identifier of an IRI string assumed to be valid RFC 3986 syntax.
* Return the start of the namespace specific string or -1 if failed to process the string.
* Call {@code handler} to pass back scheme-specific violations.
*/
static int analyseURN(String string, BiConsumer<Issue, String> handler) {
int N = string.length();
boolean urnScheme = caseInsensitivePrefix(string, "urn:");
if ( ! urnScheme ) {
handler.accept(Issue.urn_bad_pattern, "Failed find the URN scheme name");
return -1;
}
// Start of namespace id
int startNamespace = 4;
int x = startNamespace;
// First character, alpha.
char ch = charAt(string, x);
if ( ch == EOF ) {
handler.accept(Issue.urn_bad_nid, "No namespace id");
return -1;
}
if ( ! Chars3986.isAlphaNum(ch) ) {
handler.accept(Issue.urn_bad_nid, "Namespace id does no start with an alphabetic ASCII character");
return -1;
}
x++;
char prevChar = EOF;
while(x < N ) {
prevChar = ch;
ch = charAt(string, x);
if ( ch == ':' ) {
if ( prevChar == '-' ) {
// Can't end in hyphen
handler.accept(Issue.urn_bad_nid, "Namespace id end in '-'");
return -1;
}
break;
}
if ( ! isLDH(ch) ) {
handler.accept(Issue.urn_bad_nid, "Bad character in Namespace id");
return -1;
}
if ( x-startNamespace > 31 ) {
handler.accept(Issue.urn_bad_nid, "Namespace id more than 32 characters");
return -1;
}
x++;
}
int finishNamespace = x;

if ( ch != ':' ) {
handler.accept(Issue.urn_bad_nid, "Namespace not terminated by ':'");
return -1;
}
x++;

if ( finishNamespace-startNamespace < 2 )
handler.accept(Issue.urn_bad_nid, "Namespace id must be at least 2 characters");
// already done
// if ( x-startNamespace > 32 )
// throw new URNParseException(string, "Namespace id more than 3 characters");
return finishNamespace;

}

// LDH = letter-digit-hyphen
static boolean isLDH(char ch) {
return Chars3986.isAlphaNum(ch) || ch == '-';
}

static
public void parse(String string) {
int N = string.length();
// fast path - correct
boolean urnScheme = caseInsensitivePrefix(string, "urn:");
if ( ! urnScheme )
throw new URNParseException(string, "Does not start 'urn:'");

// Start of namespace id
int startNamespace = 4;
int x = startNamespace;

// First character, alpha.
char ch = charAt(string, x);
if ( ch == EOF )
throw new URNParseException(string, "No namespace id");

if ( ! Chars3986.isAlpha(ch) )
throw new URNParseException(string, "Namespace id does no start with an alphabetic ASCII character");
x++;

while(x < N ) {
ch = charAt(string, x);
if ( ! Chars3986.isAlphaNum(ch) )
break;
if ( x-startNamespace > 32 )
throw new URNParseException(string, "Namespace id more than 3 characaters");
x++;
}

int finishNamespace = x;
if ( x-startNamespace < 2 )
throw new URNParseException(string, "Namepsace id must be at least 2 characters");
// already done
// if ( x-startNamespace > 32 )
// throw new URNParseException(string, "Namespace id more than 3 characters");


if ( ch != ':' )
throw new URNParseException(string, "Namespace not termninated by ':'");
x++;
if ( x >= N )
throw new URNParseException(string, "Zero-length namespace specific string");

// Allow international chars
int startNSS = x;
// First character

ch = charAt(string, x);
if ( ! Chars3986.isPChar(ch, string, x) )
throw new URNParseException(string, "First character of NSS is not a pChar");
x++;

while(x < N ) {
ch = charAt(string, x);
if ( ! Chars3986.isPChar(ch, string, x) && ( ch != '/') )
break;
x++;
}
int finishNSS = x;


String scheme = string.substring(0, 3);
String namespace = string.substring(startNamespace, finishNamespace);
String nsSpecific = string.substring(startNSS, finishNSS);

System.out.printf(" %s:%s:%s\n", scheme, namespace, nsSpecific);
}


static
public void parseRegex(String string) {
// boolean schemeNamepace = URN_NAMESPACE.matcher(string).matches();
// if ( ! schemeNamepace )
// throw new URNParseException(string, "Does not match scheme-namespaceid grammar - may not be a URN");


}

}
10 changes: 9 additions & 1 deletion iri4ld/src/main/java/org/seaborne/rfc3986/Severity.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,12 @@

package org.seaborne.rfc3986;

public enum Severity { IGNORE, ERROR, WARNING, INVALID }
/**
* Setting for reporting issues
*/
public enum Severity {
IGNORE, // Ignore the issue
WARNING, // Scheme-specific issue; valid RDF3986 syntax OK
ERROR, // Scheme-specific issue; valid RDF3986 syntax OK
INVALID // Treat as "can't continue" e.g. RFC3986 parse error.
}
Loading

0 comments on commit 4f4c5e9

Please sign in to comment.