From f89ff3fc7c9a838c847122336d90959780e9557a Mon Sep 17 00:00:00 2001 From: ckindermann Date: Mon, 20 Jan 2025 23:15:58 -0800 Subject: [PATCH 01/12] Use SHA-256 for hashing blank node IDs --- src/ldtab/thin2thick.clj | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/ldtab/thin2thick.clj b/src/ldtab/thin2thick.clj index c304972..c382125 100644 --- a/src/ldtab/thin2thick.clj +++ b/src/ldtab/thin2thick.clj @@ -5,7 +5,9 @@ [ldtab.rdf-list-handling :as rdf-list] [ldtab.gci-handling :as gci] [cheshire.core :as cs]) - (:import [org.apache.jena.graph NodeFactory Triple Node]) + (:import [org.apache.jena.graph NodeFactory Triple Node] + [java.security MessageDigest] + [java.math BigInteger]) ;[org.apache.jena.rdf.model ModelFactory Model StmtIterator Resource Property RDFNode Statement]) (:gen-class)) @@ -16,12 +18,19 @@ (and (string? input) (str/starts-with? input "")) + (str "")) triple)) ;TODO: add support for user input prefixes (using prefix table) @@ -141,7 +150,6 @@ datatype)) :else "ERROR"))) - (defn existential-blanknode-2-triples [existential-blanknode] ;(print "existblanknode: " existential-blanknode) From 6c6f2e85aa1c14caed67e8bc0ea568b782808047 Mon Sep 17 00:00:00 2001 From: ckindermann Date: Wed, 29 Jan 2025 18:45:26 -0800 Subject: [PATCH 02/12] Fix SHA hashes --- src/ldtab/thin2thick.clj | 39 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 36 insertions(+), 3 deletions(-) diff --git a/src/ldtab/thin2thick.clj b/src/ldtab/thin2thick.clj index c382125..8e82237 100644 --- a/src/ldtab/thin2thick.clj +++ b/src/ldtab/thin2thick.clj @@ -12,6 +12,8 @@ (:gen-class)) (declare node-2-thick-map) +(declare sort-json) +(declare sort-string-json) (defn is-wiring-blanknode [input] @@ -30,7 +32,7 @@ (if (is-wiring-blanknode (:subject triple)) (assoc triple :subject - (str "")) + (str "")) triple)) ;TODO: add support for user input prefixes (using prefix table) @@ -152,7 +154,6 @@ (defn existential-blanknode-2-triples [existential-blanknode] - ;(print "existblanknode: " existential-blanknode) (let [blanknode (:subject existential-blanknode) object (:object existential-blanknode) datatype (:datatype existential-blanknode) @@ -162,7 +163,6 @@ :object (get (first v) "object"), :datatype (get (first v) "datatype")}) object) [existential-blanknode])] - ;(print "translated: " triples) triples)) (defn split-existential-blanknode-encoding @@ -234,6 +234,39 @@ root-triples (filter (fn [^Triple x] (contains? root (.getSubject x))) triples)] root-triples)) + +;this is the same as sort-json but keys of the JSON value are expected to be strings + + +(defn sort-string-json + "Given a JSON value, return a lexicographically ordered representation." + [m] + (cond + ; sort RDF lists + (and (map? m) + (contains? m "datatype") + (= (get m "datatype") "_JSONLIST")) + (let [sorted-list {:datatype "_JSONLIST", :object (map sort-string-json (get m "object"))}] + (if (contains? m "subject") ; top-level RDF list + (into (sorted-map) (merge sorted-list + {:subject (sort-string-json (get m "subject")) + :predicate (:predicate m) + :graph (:graph m) + :assertion (:assertion m) + :retraction (:retraction m) + :annotation (:annotation m)})) + (into (sorted-map) sorted-list))); nested RDF list + + (map? m) + (into (sorted-map) (map-on-hash-map-vals sort-string-json m)) ; sort by key + + (coll? m) + (vec (map cs/parse-string ; sort by string comparison + (sort (map #(cs/generate-string (sort-string-json %)) m)))) + + :else + m)) + ;NB: sorting transfoms keywords to strings (defn sort-json "Given a JSON value, return a lexicographically ordered representation." From b4e5f9c87f14f728f56e3ac794637b56298dc3a9 Mon Sep 17 00:00:00 2001 From: ckindermann Date: Wed, 29 Jan 2025 18:53:45 -0800 Subject: [PATCH 03/12] Change prefix from wiring to ldtab --- src/ldtab/annotation_handling.clj | 2 +- src/ldtab/thick_rdf.clj | 10 +++++----- src/ldtab/thin2thick.clj | 24 ++++++++++++------------ 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/src/ldtab/annotation_handling.clj b/src/ldtab/annotation_handling.clj index 7fb4733..bc069e8 100644 --- a/src/ldtab/annotation_handling.clj +++ b/src/ldtab/annotation_handling.clj @@ -126,7 +126,7 @@ The raw thick triple of an OWL annotation - {:subject wiring:blanknode:G__1130, + {:subject ldtab:blanknode:G__1130, :predicate owl:Axiom, :object {obo:IAO_0010000 [{:object obo:050-003}], owl:annotatedTarget [{:object \"literal\"}], diff --git a/src/ldtab/thick_rdf.clj b/src/ldtab/thick_rdf.clj index db3d9e7..f2f9e6c 100644 --- a/src/ldtab/thick_rdf.clj +++ b/src/ldtab/thick_rdf.clj @@ -159,10 +159,10 @@ (parse-json object) object))) -(defn is-wiring-blanknode +(defn is-ldtab-blanknode [input] (and (string? input) - (str/starts-with? input " (count v) 1)) blanknode-2-triples)) triples (remove #(contains? complex-blanknodes (:subject %)) triples) @@ -210,8 +210,8 @@ annotation (parse-json (:annotation thick-triple))] (when annotation (translate-annotation subject predicate object annotation prefix-2-base model)) - (if (is-wiring-blanknode subject-json) - model ;remove generated wiring:blank nodes + (if (is-ldtab-blanknode subject-json) + model ;remove generated ldtab:blank nodes (.add model subject predicate object)))) (defn triples-2-rdf-model-stream diff --git a/src/ldtab/thin2thick.clj b/src/ldtab/thin2thick.clj index 8e82237..dc2791c 100644 --- a/src/ldtab/thin2thick.clj +++ b/src/ldtab/thin2thick.clj @@ -15,10 +15,10 @@ (declare sort-json) (declare sort-string-json) -(defn is-wiring-blanknode +(defn is-ldtab-blanknode [input] (and (string? input) - (str/starts-with? input "")) + (str "")) triple)) ;TODO: add support for user input prefixes (using prefix table) @@ -88,9 +88,9 @@ "Given a set of triples, identify root blank nodes and add triples of the form - [wiring:blanknode:id type _:blankNode] + [ldtab:blanknode:id type _:blankNode] - where 'wiring:blanknode:id' is a newly generated subject, + where 'ldtab:blanknode:id' is a newly generated subject, type is the rdf:type of the identified root _:blankNode, and _:blankNode is the root node. @@ -104,7 +104,7 @@ the following triple would be added: - [wiring:blanknode:1, rdf:type, _:B] + [ldtab:blanknode:1, rdf:type, _:B] Explanation: We collapse blank nodes into JSON maps. @@ -118,11 +118,11 @@ blank-roots (filter (fn [^Node x] (.isBlank x)) root) ;TODO blank-leaves also need to be skolemised: ;for a given blank-leaf [s p _b:leaf] - ;we need to add the triple [_b:leaf rdf:type wiring:blanknode] + ;we need to add the triple [_b:leaf rdf:type ldtab:blanknode] ;so that we collapse the blank node into it's skolem form - additions (map (fn [^Node x] (new Triple (NodeFactory/createURI (str "wiring:blanknode:" (gensym))) - ;(NodeFactory/createURI "wiring:blanknode") + additions (map (fn [^Node x] (new Triple (NodeFactory/createURI (str "ldtab:blanknode:" (gensym))) + ;(NodeFactory/createURI "ldtab:blanknode") (get-type (get subject-to-triples x)) x)) blank-roots)] @@ -167,8 +167,8 @@ (defn split-existential-blanknode-encoding [triples] - (let [existential-blanknodes (filter (fn [x] (is-wiring-blanknode (:subject x))) triples) - triples (remove (fn [x] (is-wiring-blanknode (:subject x))) triples) + (let [existential-blanknodes (filter (fn [x] (is-ldtab-blanknode (:subject x))) triples) + triples (remove (fn [x] (is-ldtab-blanknode (:subject x))) triples) existential-blanknode-triples (mapcat existential-blanknode-2-triples existential-blanknodes) triples (concat existential-blanknode-triples triples)] triples)) From 90260a2e61b7ca4fde1871f0ef854fe6cdcc9e9e Mon Sep 17 00:00:00 2001 From: ckindermann Date: Mon, 25 Aug 2025 23:42:47 -0700 Subject: [PATCH 04/12] Use full IRIs for hashing blank node IDs --- src/ldtab/thin2thick.clj | 37 ++++++++++++++++++++++++++++++++----- 1 file changed, 32 insertions(+), 5 deletions(-) diff --git a/src/ldtab/thin2thick.clj b/src/ldtab/thin2thick.clj index dc2791c..e451694 100644 --- a/src/ldtab/thin2thick.clj +++ b/src/ldtab/thin2thick.clj @@ -1,6 +1,7 @@ (ns ldtab.thin2thick (:require [clojure.set :as set] [clojure.string :as str] + [clojure.walk :as walk] [ldtab.annotation-handling :as ann] [ldtab.rdf-list-handling :as rdf-list] [ldtab.gci-handling :as gci] @@ -14,6 +15,7 @@ (declare node-2-thick-map) (declare sort-json) (declare sort-string-json) +(declare expand-curies-in-json) (defn is-ldtab-blanknode [input] @@ -28,12 +30,20 @@ (format "%064x" (BigInteger. 1 (.digest md))))) (defn hash-existential-subject-blanknode - [triple] + ([triple] (if (is-ldtab-blanknode (:subject triple)) - (assoc triple - :subject - (str "")) + (let [string-to-hash (cs/generate-string (sort-string-json (cs/parse-string (cs/generate-string (:object triple)))))] + (assoc triple + :subject + (str "")) + ) triple)) + ([triple iri2prefix] + (let [object (:object triple) + expansion (expand-curies-in-json object iri2prefix) + triple (assoc triple :object expansion) + hash-triple (hash-existential-subject-blanknode triple)] + hash-triple))) ;TODO: add support for user input prefixes (using prefix table) (defn curify @@ -52,6 +62,23 @@ (str/replace uri (:base found) (str (:prefix found) ":")) (str "<" uri ">")))) +(defn expand-with + [^String curie iri2prefix] + (let [[prefix local] (str/split curie #":" 2) + found (some #(when (= (:prefix %) prefix) %) iri2prefix)] + (if found + (str "<" (:base found) local ">") + curie))) + +(defn expand-curies-in-json + [json iri2prefix] + (walk/postwalk + (fn [x] + (if (string? x) + (expand-with x iri2prefix) + x)) + json)) + (defn map-on-hash-map-vals "Given a hashmap m and a function f, apply f to all values of m. @@ -368,7 +395,7 @@ %) gcis) rdf-lists (map rdf-list/encode-rdf-list annotations) sorted (map sort-json rdf-lists) - hashed (map hash-existential-subject-blanknode sorted) + hashed (map #(hash-existential-subject-blanknode % iri2prefix) sorted) split (split-existential-blanknode-encoding hashed) normalised (map #(cs/parse-string (cs/generate-string %)) split)];TODO: stringify keys - this is a (probably an inefficient?) workaround normalised))) From c061a14fcbb0b18a69ce1fafa36705924a831b1f Mon Sep 17 00:00:00 2001 From: ckindermann Date: Sun, 14 Sep 2025 23:42:08 -0700 Subject: [PATCH 05/12] Convert IRIs back to CURIEs after blank node hashing --- src/ldtab/thin2thick.clj | 65 +++++++++++++++++++++++++++++----------- 1 file changed, 48 insertions(+), 17 deletions(-) diff --git a/src/ldtab/thin2thick.clj b/src/ldtab/thin2thick.clj index e451694..653216e 100644 --- a/src/ldtab/thin2thick.clj +++ b/src/ldtab/thin2thick.clj @@ -15,7 +15,7 @@ (declare node-2-thick-map) (declare sort-json) (declare sort-string-json) -(declare expand-curies-in-json) +;(declare expand-curies-in-json) (defn is-ldtab-blanknode [input] @@ -29,22 +29,6 @@ (.update md (.getBytes input "UTF-8")) (format "%064x" (BigInteger. 1 (.digest md))))) -(defn hash-existential-subject-blanknode - ([triple] - (if (is-ldtab-blanknode (:subject triple)) - (let [string-to-hash (cs/generate-string (sort-string-json (cs/parse-string (cs/generate-string (:object triple)))))] - (assoc triple - :subject - (str "")) - ) - triple)) - ([triple iri2prefix] - (let [object (:object triple) - expansion (expand-curies-in-json object iri2prefix) - triple (assoc triple :object expansion) - hash-triple (hash-existential-subject-blanknode triple)] - hash-triple))) - ;TODO: add support for user input prefixes (using prefix table) (defn curify [^String s] @@ -63,6 +47,7 @@ (str "<" uri ">")))) (defn expand-with + "Turn a CURIE into a full IRI using iri2prefix" [^String curie iri2prefix] (let [[prefix local] (str/split curie #":" 2) found (some #(when (= (:prefix %) prefix) %) iri2prefix)] @@ -71,6 +56,7 @@ curie))) (defn expand-curies-in-json + "Walk a (parsed) JSON value and expand any CURIEs into full IRis." [json iri2prefix] (walk/postwalk (fn [x] @@ -79,6 +65,51 @@ x)) json)) +(defn contract-with + "Turn a full IRI (e.g., ) into a CURIE using iri2prefix, + If no base matches, return the original string unchanged. + Prefers the *longest* matching base" + ^String + [^String s iri2prefix] + (let [iri (if (and (str/starts-with? s "<") (str/ends-with? s ">")) + (subs s 1 (dec (count s))) ; strip angle brackets + s) + candidates (seq (filter #(str/starts-with? iri (:base %)) iri2prefix)) + best (when candidates + (apply max-key #(count (:base %)) candidates))] + (if best + (str (:prefix best) ":" (subs iri (count (:base best)))) + s))) + +(defn contract-iris-in-json + "Walk a (parsed) JSON value and contract any string IRIs into CURIEs." + [json iri2prefix] + (walk/postwalk + (fn [x] + (if (string? x) + (contract-with x iri2prefix) + x)) + json)) + + +(defn hash-existential-subject-blanknode + ([triple] + (if (is-ldtab-blanknode (:subject triple)) + (let [string-to-hash (cs/generate-string (sort-string-json (cs/parse-string (cs/generate-string (:object triple)))))] + (assoc triple + :subject + (str "")) + ) + triple)) + ([triple iri2prefix] + (let [object (:object triple) + expansion (expand-curies-in-json object iri2prefix) + triple (assoc triple :object expansion) + hash-triple (hash-existential-subject-blanknode triple) + contraction (contract-iris-in-json hash-triple iri2prefix)] + contraction))) + + (defn map-on-hash-map-vals "Given a hashmap m and a function f, apply f to all values of m. From 74e60bb2415d4b8bd8979b5b623f9de0a53bd2aa Mon Sep 17 00:00:00 2001 From: ckindermann Date: Mon, 27 Oct 2025 23:36:29 -0700 Subject: [PATCH 06/12] Split JSON objects in subject column --- src/ldtab/thin2thick.clj | 40 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 36 insertions(+), 4 deletions(-) diff --git a/src/ldtab/thin2thick.clj b/src/ldtab/thin2thick.clj index 653216e..4a00c1f 100644 --- a/src/ldtab/thin2thick.clj +++ b/src/ldtab/thin2thick.clj @@ -231,6 +231,36 @@ triples (concat existential-blanknode-triples triples)] triples)) + +(defn is-subject-object + [triple] + (map? (:subject triple))) + + +(defn subject-json-object-2-triples + [triple] + (let [subject (:subject triple) + string-to-hash (cs/generate-string (sort-string-json subject)) + blanknode (str "") + triples (map (fn [[k v]] {:subject blanknode, + :predicate k, + :object (get (first v) "object"), + :datatype (get (first v) "datatype")}) subject) + triples (conj triples + {:subject blanknode, + :predicate (:predicate triple), + :object (:object triple), + :datatype (:datatype triple)})] + triples)) + +(defn split-subject-json-objects + [triples] + (let [subject-objects (filter (fn [x] (is-subject-object x)) triples) + triples (remove (fn [x] (is-subject-object x)) triples) + subject-object-triples (mapcat subject-json-object-2-triples subject-objects) + triples (concat subject-object-triples triples)] + triples)) + (defn encode-object "Given a triple t = [s p o] and a map from subject nodes to its triples, returns predicate map for the o" @@ -411,8 +441,9 @@ rdf-lists (map rdf-list/encode-rdf-list annotations) sorted (map sort-json rdf-lists) hashed (map hash-existential-subject-blanknode sorted) - split (split-existential-blanknode-encoding hashed) - normalised (map #(cs/parse-string (cs/generate-string %)) split)];TODO: stringify keys - this is a (probably an inefficient?) workaround + split-objects (split-existential-blanknode-encoding hashed) + split-subjects (split-subject-json-objects split-objects) + normalised (map #(cs/parse-string (cs/generate-string %)) split-subjects)];TODO: stringify keys - this is a (probably an inefficient?) workaround normalised)) ([triples iri2prefix] (let [raw-thick-triples (thin-2-thick-raw triples iri2prefix) @@ -427,6 +458,7 @@ rdf-lists (map rdf-list/encode-rdf-list annotations) sorted (map sort-json rdf-lists) hashed (map #(hash-existential-subject-blanknode % iri2prefix) sorted) - split (split-existential-blanknode-encoding hashed) - normalised (map #(cs/parse-string (cs/generate-string %)) split)];TODO: stringify keys - this is a (probably an inefficient?) workaround + split-objects (split-existential-blanknode-encoding hashed) + split-subjects (split-subject-json-objects split-objects) + normalised (map #(cs/parse-string (cs/generate-string %)) split-subjects)];TODO: stringify keys - this is a (probably an inefficient?) workaround normalised))) From f29c447cb8a0b442ada108a6ab34ac8773d95d21 Mon Sep 17 00:00:00 2001 From: ckindermann Date: Mon, 24 Nov 2025 22:15:53 -0800 Subject: [PATCH 07/12] Only translate blanknodes --- src/ldtab/thin2thick.clj | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/ldtab/thin2thick.clj b/src/ldtab/thin2thick.clj index 4a00c1f..3e63e90 100644 --- a/src/ldtab/thin2thick.clj +++ b/src/ldtab/thin2thick.clj @@ -102,12 +102,14 @@ ) triple)) ([triple iri2prefix] - (let [object (:object triple) - expansion (expand-curies-in-json object iri2prefix) - triple (assoc triple :object expansion) - hash-triple (hash-existential-subject-blanknode triple) - contraction (contract-iris-in-json hash-triple iri2prefix)] - contraction))) + (if (is-ldtab-blanknode (:subject triple)) + (let [object (:object triple) + expansion (expand-curies-in-json object iri2prefix) + triple (assoc triple :object expansion) + hash-triple (hash-existential-subject-blanknode triple) + contraction (contract-iris-in-json hash-triple iri2prefix)] + contraction) + triple))) (defn map-on-hash-map-vals From e6fb71532c1f742c567c07fc7ea7ee2a91374c2a Mon Sep 17 00:00:00 2001 From: ckindermann Date: Mon, 1 Dec 2025 17:44:42 -0800 Subject: [PATCH 08/12] Bump checkout action --- .github/workflows/ldtab-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ldtab-tests.yml b/.github/workflows/ldtab-tests.yml index 8537621..73debbb 100644 --- a/.github/workflows/ldtab-tests.yml +++ b/.github/workflows/ldtab-tests.yml @@ -17,7 +17,7 @@ jobs: uses: actions/checkout@v2 - name: Set up JDK 17 - uses: actions/setup-java@v2 + uses: actions/setup-java@v4 with: distribution: 'adopt' java-version: '17' From 959299461f3e5d7829698da0d2fe19bde25cc9d6 Mon Sep 17 00:00:00 2001 From: ckindermann Date: Mon, 1 Dec 2025 17:55:48 -0800 Subject: [PATCH 09/12] Fix typo --- .github/workflows/ldtab-tests.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ldtab-tests.yml b/.github/workflows/ldtab-tests.yml index 73debbb..3b47d51 100644 --- a/.github/workflows/ldtab-tests.yml +++ b/.github/workflows/ldtab-tests.yml @@ -14,10 +14,10 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Set up JDK 17 - uses: actions/setup-java@v4 + uses: actions/setup-java@v2 with: distribution: 'adopt' java-version: '17' From cb79f910b5511af58519f3e258df8cfd69d37384 Mon Sep 17 00:00:00 2001 From: ckindermann Date: Mon, 1 Dec 2025 17:59:07 -0800 Subject: [PATCH 10/12] Bump action cache (instead of checkout) --- .github/workflows/ldtab-tests.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ldtab-tests.yml b/.github/workflows/ldtab-tests.yml index 3b47d51..514aeda 100644 --- a/.github/workflows/ldtab-tests.yml +++ b/.github/workflows/ldtab-tests.yml @@ -14,7 +14,7 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v4 + uses: actions/checkout@v2 - name: Set up JDK 17 uses: actions/setup-java@v2 @@ -33,7 +33,7 @@ jobs: cljfmt: 0.10.2 # cljfmt - name: Cache Leiningen dependencies - uses: actions/cache@v2 + uses: actions/cache@v4 with: path: ~/.m2 key: ${{ runner.os }}-m2-${{ hashFiles('**/project.clj') }} From 60f8310f1568d9c3d52359e38fa65f70c0916bf2 Mon Sep 17 00:00:00 2001 From: ckindermann Date: Mon, 1 Dec 2025 18:02:57 -0800 Subject: [PATCH 11/12] Bump older GitHub actions --- .github/workflows/ldtab-tests.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ldtab-tests.yml b/.github/workflows/ldtab-tests.yml index 514aeda..d17c475 100644 --- a/.github/workflows/ldtab-tests.yml +++ b/.github/workflows/ldtab-tests.yml @@ -14,12 +14,12 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Set up JDK 17 - uses: actions/setup-java@v2 + uses: actions/setup-java@v4 with: - distribution: 'adopt' + distribution: 'temurin' java-version: '17' - name: Install clojure tools From cbfe0fc405ef743c535c041e758fb561a96f5499 Mon Sep 17 00:00:00 2001 From: ckindermann Date: Mon, 1 Dec 2025 18:04:48 -0800 Subject: [PATCH 12/12] Bump Clojure setup --- .github/workflows/ldtab-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ldtab-tests.yml b/.github/workflows/ldtab-tests.yml index d17c475..e66d6bb 100644 --- a/.github/workflows/ldtab-tests.yml +++ b/.github/workflows/ldtab-tests.yml @@ -23,7 +23,7 @@ jobs: java-version: '17' - name: Install clojure tools - uses: DeLaGuardo/setup-clojure@12.5 + uses: DeLaGuardo/setup-clojure@13.4 with: # Install just one or all simultaneously # The value must indicate a particular version of the tool, or use 'latest'