Skip to content

Commit 95640db

Browse files
committed
#6067 - Upgrade to RDF4J 6.x
- Strip URL-embedded credentials before reaching Apache HttpClient 5 (used by RDF4J 6, which rejects URIs with a userinfo component) - Sanitize incoming SPARQL URLs in getRemoteConfig(url) — userinfo is stripped at config-creation time and a warning is logged - Add eager startup migration: walk every REMOTE KB at onContextRefreshed, split any embedded user:pass into a clean URL plus BasicAuthenticationTraits (only when the KB has no explicit auth traits yet), persist via updateKnowledgeBase - Factor out splitUrlUserInfo helper and refactor applyBasicHttpAuthenticationConfigurationFromUrl to share it as a runtime safety net - buildSparqlRepository test helper splits userinfo and applies credentials via setUsernameAndPassword so existing test fixtures keep passing URLs with embedded credentials
1 parent c2dbbde commit 95640db

2 files changed

Lines changed: 133 additions & 15 deletions

File tree

inception/inception-kb/src/main/java/de/tudarmstadt/ukp/inception/kb/KnowledgeBaseServiceImpl.java

Lines changed: 99 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
package de.tudarmstadt.ukp.inception.kb;
1919

2020
import static de.tudarmstadt.ukp.inception.kb.RepositoryType.LOCAL;
21+
import static de.tudarmstadt.ukp.inception.kb.RepositoryType.REMOTE;
2122
import static de.tudarmstadt.ukp.inception.kb.http.PerThreadSslCheckingHttpClientUtils.restoreSslVerification;
2223
import static de.tudarmstadt.ukp.inception.kb.http.PerThreadSslCheckingHttpClientUtils.skipCertificateChecks;
2324
import static de.tudarmstadt.ukp.inception.kb.querybuilder.SPARQLQueryBuilder.DEFAULT_LIMIT;
@@ -273,6 +274,9 @@ void onContextRefreshed()
273274
if (LOCAL == kb.getType()) {
274275
reconfigureLocalKnowledgeBase(kb);
275276
}
277+
else if (REMOTE == kb.getType()) {
278+
migrateUrlEmbeddedCredentials(kb);
279+
}
276280
}
277281

278282
if (!orphanedIDs.isEmpty()) {
@@ -700,7 +704,14 @@ public RepositoryImplConfig getNativeConfig()
700704
@Override
701705
public RepositoryImplConfig getRemoteConfig(String url)
702706
{
703-
return new SPARQLRepositoryConfig(url);
707+
var split = splitUrlUserInfo(url);
708+
if (split.userInfo() != null) {
709+
LOG.warn(
710+
"URL [{}] contains embedded credentials. Stripping them - configure "
711+
+ "authentication via the KB auth-traits UI instead.",
712+
split.cleanUrl());
713+
}
714+
return new SPARQLRepositoryConfig(split.cleanUrl());
704715
}
705716

706717
@Override
@@ -833,25 +844,100 @@ private void addAdditionalHeaders(SPARQLRepository aSparqlRepo, Map<String, Stri
833844
private void applyBasicHttpAuthenticationConfigurationFromUrl(
834845
SPARQLRepositoryConfig sparqlRepoConfig, SPARQLRepository sparqlRepo)
835846
{
836-
var uri = URI.create(sparqlRepoConfig.getQueryEndpointUrl());
837-
var userInfo = uri.getUserInfo();
838-
if (isNotBlank(userInfo)) {
839-
userInfo = userInfo.trim();
840-
String username;
841-
String password;
842-
if (userInfo.contains(":")) {
843-
username = substringBefore(userInfo, ":");
844-
password = substringAfter(userInfo, ":");
847+
var split = splitUrlUserInfo(sparqlRepoConfig.getQueryEndpointUrl());
848+
if (split.user() != null) {
849+
sparqlRepo.setUsernameAndPassword(split.user(), split.password());
850+
}
851+
}
852+
853+
/**
854+
* Migrates URL-embedded credentials ({@code http://user:pass@host/...}) on a REMOTE KB into
855+
* {@link BasicAuthenticationTraits} and a cleaned URL. Apache HttpClient 5 (used by RDF4J 6)
856+
* rejects URIs with a userinfo component outright, so legacy configs that worked under RDF4J 5
857+
* must be normalized before the next connection attempt. Invoked once per KB at startup.
858+
*/
859+
private void migrateUrlEmbeddedCredentials(KnowledgeBase aKB)
860+
{
861+
try {
862+
var cfg = getKnowledgeBaseConfig(aKB);
863+
if (!(cfg instanceof SPARQLRepositoryConfig sparqlCfg)) {
864+
return;
865+
}
866+
867+
var queryUrl = sparqlCfg.getQueryEndpointUrl();
868+
var updateUrl = sparqlCfg.getUpdateEndpointUrl();
869+
var querySplit = splitUrlUserInfo(queryUrl);
870+
var updateSplit = splitUrlUserInfo(updateUrl);
871+
872+
if (querySplit.userInfo() == null && updateSplit.userInfo() == null) {
873+
return;
874+
}
875+
876+
var newCfg = updateUrl == null //
877+
? new SPARQLRepositoryConfig(querySplit.cleanUrl()) //
878+
: new SPARQLRepositoryConfig(querySplit.cleanUrl(), updateSplit.cleanUrl());
879+
880+
// Prefer the query URL's credentials; fall back to the update URL's.
881+
var credSource = querySplit.user() != null ? querySplit : updateSplit;
882+
883+
var traits = isNotBlank(aKB.getTraits()) ? readTraits(aKB) : null;
884+
if (traits == null) {
885+
traits = new RemoteRepositoryTraits();
886+
}
887+
var hadAuth = traits.getAuthentication() != null;
888+
if (!hadAuth && credSource.user() != null) {
889+
var basic = new BasicAuthenticationTraits();
890+
basic.setUsername(credSource.user());
891+
basic.setPassword(credSource.password());
892+
traits.setAuthentication(basic);
893+
aKB.setTraits(JSONUtil.toJsonString(traits));
894+
}
895+
896+
updateKnowledgeBase(aKB, newCfg);
897+
898+
if (hadAuth) {
899+
LOG.info(
900+
"Migrated KB [{}]: stripped URL-embedded credentials "
901+
+ "(KB already had explicit auth traits configured).",
902+
aKB.getName());
845903
}
846904
else {
847-
username = userInfo;
848-
password = "";
905+
LOG.info("Migrated KB [{}]: moved URL-embedded credentials into "
906+
+ "basic-auth traits.", aKB.getName());
849907
}
908+
}
909+
catch (Exception e) {
910+
LOG.error(
911+
"Unable to migrate URL-embedded credentials for KB [{}]. "
912+
+ "Remote connections may fail until the URL is corrected manually.",
913+
aKB.getName(), e);
914+
}
915+
}
850916

851-
sparqlRepo.setUsernameAndPassword(username, password);
917+
/**
918+
* Parses a URL, returning the userinfo (or {@code null}) and the URL with the userinfo
919+
* stripped. Returns {@code (cleanUrl=null, user=null, password=null, userInfo=null)} for a
920+
* {@code null} input URL.
921+
*/
922+
private static UrlUserInfo splitUrlUserInfo(String aUrl)
923+
{
924+
if (aUrl == null) {
925+
return new UrlUserInfo(null, null, null, null);
852926
}
927+
var uri = URI.create(aUrl);
928+
var userInfo = uri.getUserInfo();
929+
if (!isNotBlank(userInfo)) {
930+
return new UrlUserInfo(aUrl, null, null, null);
931+
}
932+
userInfo = userInfo.trim();
933+
var user = userInfo.contains(":") ? substringBefore(userInfo, ":") : userInfo;
934+
var password = userInfo.contains(":") ? substringAfter(userInfo, ":") : "";
935+
var cleanUrl = aUrl.replace(uri.getRawUserInfo() + "@", "");
936+
return new UrlUserInfo(cleanUrl, user, password, userInfo);
853937
}
854938

939+
private record UrlUserInfo(String cleanUrl, String user, String password, String userInfo) {}
940+
855941
@SuppressWarnings("resource")
856942
@Override
857943
public void importData(KnowledgeBase kb, String aFilename, InputStream aIS)

inception/inception-kb/src/test/java/de/tudarmstadt/ukp/inception/kb/querybuilder/SPARQLQueryBuilderLocalTestScenarios.java

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
import java.io.InputStream;
3939
import java.lang.invoke.MethodHandles;
4040
import java.lang.reflect.Method;
41+
import java.net.URI;
4142
import java.util.List;
4243
import java.util.Map;
4344
import java.util.Set;
@@ -367,7 +368,9 @@ public Scenario(String aName,
367368

368369
static Repository buildSparqlRepository(String aUrl)
369370
{
370-
var repo = new SPARQLRepository(aUrl);
371+
var creds = extractCredentials(aUrl);
372+
var repo = new SPARQLRepository(creds.urlWithoutUserInfo());
373+
applyCredentials(repo, creds);
371374
repo.setHttpClient(newPerThreadSslCheckingHttpClient());
372375
repo.setAdditionalHttpHeaders(Map.of("User-Agent", "INCEpTION/0.0.1-SNAPSHOT"));
373376
repo.init();
@@ -376,13 +379,42 @@ static Repository buildSparqlRepository(String aUrl)
376379

377380
static Repository buildSparqlRepository(String aQueryUrl, String aUpdateUrl)
378381
{
379-
var repo = new SPARQLRepository(aQueryUrl, aUpdateUrl);
382+
var queryCreds = extractCredentials(aQueryUrl);
383+
var updateCreds = extractCredentials(aUpdateUrl);
384+
var repo = new SPARQLRepository(queryCreds.urlWithoutUserInfo(),
385+
updateCreds.urlWithoutUserInfo());
386+
applyCredentials(repo, queryCreds);
380387
repo.setHttpClient(newPerThreadSslCheckingHttpClient());
381388
repo.setAdditionalHttpHeaders(Map.of("User-Agent", "INCEpTION/0.0.1-SNAPSHOT"));
382389
repo.init();
383390
return repo;
384391
}
385392

393+
private record UrlCredentials(String urlWithoutUserInfo, String user, String password) {}
394+
395+
// Apache HttpClient 5 (used by RDF4J 6's default HTTP client) rejects URIs with a userinfo
396+
// component. Split user:pass@ out of the URL and pass it via setUsernameAndPassword instead.
397+
private static UrlCredentials extractCredentials(String aUrl)
398+
{
399+
var uri = URI.create(aUrl);
400+
var userInfo = uri.getRawUserInfo();
401+
if (userInfo == null) {
402+
return new UrlCredentials(aUrl, null, null);
403+
}
404+
var sep = userInfo.indexOf(':');
405+
var user = sep < 0 ? userInfo : userInfo.substring(0, sep);
406+
var password = sep < 0 ? "" : userInfo.substring(sep + 1);
407+
var stripped = aUrl.replace(userInfo + "@", "");
408+
return new UrlCredentials(stripped, user, password);
409+
}
410+
411+
private static void applyCredentials(SPARQLRepository aRepo, UrlCredentials aCreds)
412+
{
413+
if (aCreds.user() != null) {
414+
aRepo.setUsernameAndPassword(aCreds.user(), aCreds.password());
415+
}
416+
}
417+
386418
/**
387419
* Checks that {@code SPARQLQueryBuilder#exists(RepositoryConnection, boolean)} can return
388420
* {@code true} by querying for a list of all classes in {@link #DATA_CLASS_RDFS_HIERARCHY}

0 commit comments

Comments
 (0)