From 27d178d42837661183e0765680c51c21a46870ed Mon Sep 17 00:00:00 2001 From: Michele Righi Date: Sun, 7 Jul 2024 00:41:24 +0200 Subject: [PATCH 01/13] fix #157: split function accounting for quotes Added Util.splitCSVLine() function that splits a CSV-formatted string into an array of tokens, accounting for quotes (single and double) and escape characters. --- src/persist/Adapter.lua | 2 +- src/util/Util.lua | 62 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+), 1 deletion(-) diff --git a/src/persist/Adapter.lua b/src/persist/Adapter.lua index 342e19d..acb4bb5 100644 --- a/src/persist/Adapter.lua +++ b/src/persist/Adapter.lua @@ -28,7 +28,7 @@ function Adapter.loadPolicyLine(line, model) return end - local tokens = Util.split(line, ",") + local tokens = Util.splitCSVLine(line) local key = tokens[1] local sec = key:sub(1, 1) diff --git a/src/util/Util.lua b/src/util/Util.lua index 462a1c3..b2fba09 100644 --- a/src/util/Util.lua +++ b/src/util/Util.lua @@ -252,4 +252,66 @@ function Util.printTable(t) return s end +function Util.splitCSVLine(line, sep, trimFields) + local result = {} + local i = 1 + local inQuotes = false + local quoteChar = nil + local escaping = false + local field = "" + + if sep == nil then sep = ',' end + if trimFields == nil then trimFields = true end + + -- Loop over the characters of the line + while i <= #line do + local char = line:sub(i, i) + + -- Check if we found the escape character and are not inside single quotes + if char == '\\' and quoteChar ~= '\'' then + -- The next character will be escaped (added directly to the token) + escaping = true + else + if escaping then + field = field .. char + escaping = false + else + -- Check if we are already inside quotes + if inQuotes then + -- Check if we can close quoted text + if char == quoteChar then + inQuotes = false + quoteChar = nil + else + field = field .. char + end + else -- not in quotes + if char == '"' or char == '\'' then + inQuotes = true + quoteChar = char + elseif char == ',' then + if trimFields then + field = Util.trim(field) + end + + table.insert(result, field) + field = "" + else + field = field .. char + end + end + end + end + i = i + 1 + end + + -- Add the last field (since it won't have the delimiter after it) + if trimFields then + field = Util.trim(field) + end + table.insert(result, field) + + return result +end + return Util \ No newline at end of file From 0a98b022accde6e0435f3f633f38df2d5fbf8096 Mon Sep 17 00:00:00 2001 From: Michele Righi Date: Sun, 7 Jul 2024 10:36:25 +0200 Subject: [PATCH 02/13] fix CI warning Warning: "src/util/Util.lua:263:24: (W311) value assigned to variable sep is unused" --- src/util/Util.lua | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/util/Util.lua b/src/util/Util.lua index b2fba09..f99685f 100644 --- a/src/util/Util.lua +++ b/src/util/Util.lua @@ -289,7 +289,7 @@ function Util.splitCSVLine(line, sep, trimFields) if char == '"' or char == '\'' then inQuotes = true quoteChar = char - elseif char == ',' then + elseif char == sep then if trimFields then field = Util.trim(field) end From c1c2b4f717039a2b1866fdd73b3a7c292283b41b Mon Sep 17 00:00:00 2001 From: Michele Righi Date: Mon, 8 Jul 2024 19:40:38 +0200 Subject: [PATCH 03/13] update split function - renamed from splitCSVLine to splitEnhanced, since it's not compliant to RFC 4180 and it could be deceiving; - added optional parameters; - now throws error if quotes are not closed; --- src/persist/Adapter.lua | 2 +- src/util/Util.lua | 21 ++++++++++++++------- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/src/persist/Adapter.lua b/src/persist/Adapter.lua index acb4bb5..43290fd 100644 --- a/src/persist/Adapter.lua +++ b/src/persist/Adapter.lua @@ -28,7 +28,7 @@ function Adapter.loadPolicyLine(line, model) return end - local tokens = Util.splitCSVLine(line) + local tokens = Util.splitEnhanced(line, ',', true, true, true) local key = tokens[1] local sec = key:sub(1, 1) diff --git a/src/util/Util.lua b/src/util/Util.lua index f99685f..68f1ad1 100644 --- a/src/util/Util.lua +++ b/src/util/Util.lua @@ -252,23 +252,25 @@ function Util.printTable(t) return s end -function Util.splitCSVLine(line, sep, trimFields) +function Util.splitEnhanced(line, sep, trimFields, allowEscapes, allowLiterals) local result = {} local i = 1 local inQuotes = false - local quoteChar = nil + local quotesChar = nil local escaping = false local field = "" if sep == nil then sep = ',' end if trimFields == nil then trimFields = true end + if allowEscapes == nil then allowEscapes = false end + if allowLiterals == nil then allowLiterals = false end -- Loop over the characters of the line while i <= #line do local char = line:sub(i, i) -- Check if we found the escape character and are not inside single quotes - if char == '\\' and quoteChar ~= '\'' then + if allowEscapes and char == '\\' and (allowLiterals and quotesChar ~= '\'') then -- The next character will be escaped (added directly to the token) escaping = true else @@ -279,16 +281,16 @@ function Util.splitCSVLine(line, sep, trimFields) -- Check if we are already inside quotes if inQuotes then -- Check if we can close quoted text - if char == quoteChar then + if char == quotesChar then inQuotes = false - quoteChar = nil + quotesChar = nil else field = field .. char end else -- not in quotes - if char == '"' or char == '\'' then + if char == '"' or (allowLiterals and char == '\'') then inQuotes = true - quoteChar = char + quotesChar = char elseif char == sep then if trimFields then field = Util.trim(field) @@ -304,6 +306,11 @@ function Util.splitCSVLine(line, sep, trimFields) end i = i + 1 end + + -- Throw error if there are quotes left open + if inQuotes then + error("Unmatched quotes: " .. quotesChar) + end -- Add the last field (since it won't have the delimiter after it) if trimFields then From 3f32390d9d9f5c08e81750f5e811a21fad1376ff Mon Sep 17 00:00:00 2001 From: Michele Righi Date: Mon, 8 Jul 2024 23:13:34 +0200 Subject: [PATCH 04/13] update Util.splitEnhanced() - fixed its behaviour to comply with Casbin documentation (see the note at https://casbin.org/docs/policy-storage#loading-policy-from-a-csv-file and issue 886 at casbin/casbin): "If your file contains commas and double quotes, you should enclose the field in double quotes and double any embedded double quotes." Therefore I removed the extra behaviour related to single quotes ' and escape character \ and refactored the function. --- src/persist/Adapter.lua | 2 +- src/util/Util.lua | 77 ++++++++++++++++++++++------------------- 2 files changed, 43 insertions(+), 36 deletions(-) diff --git a/src/persist/Adapter.lua b/src/persist/Adapter.lua index 43290fd..fc0e74d 100644 --- a/src/persist/Adapter.lua +++ b/src/persist/Adapter.lua @@ -28,7 +28,7 @@ function Adapter.loadPolicyLine(line, model) return end - local tokens = Util.splitEnhanced(line, ',', true, true, true) + local tokens = Util.splitEnhanced(line, ',', true) local key = tokens[1] local sec = key:sub(1, 1) diff --git a/src/util/Util.lua b/src/util/Util.lua index 68f1ad1..69e3946 100644 --- a/src/util/Util.lua +++ b/src/util/Util.lua @@ -252,64 +252,71 @@ function Util.printTable(t) return s end -function Util.splitEnhanced(line, sep, trimFields, allowEscapes, allowLiterals) +function Util.isOnlyWhitespaces(str) + return str:match("^%s*$") ~= nil +end + +function Util.splitEnhanced(line, sep, trimFields) local result = {} local i = 1 - local inQuotes = false - local quotesChar = nil + local quotedField local escaping = false local field = "" if sep == nil then sep = ',' end if trimFields == nil then trimFields = true end - if allowEscapes == nil then allowEscapes = false end - if allowLiterals == nil then allowLiterals = false end -- Loop over the characters of the line while i <= #line do local char = line:sub(i, i) - - -- Check if we found the escape character and are not inside single quotes - if allowEscapes and char == '\\' and (allowLiterals and quotesChar ~= '\'') then - -- The next character will be escaped (added directly to the token) - escaping = true - else - if escaping then - field = field .. char - escaping = false - else - -- Check if we are already inside quotes - if inQuotes then - -- Check if we can close quoted text - if char == quotesChar then - inQuotes = false - quotesChar = nil - else - field = field .. char + + -- Check if it's the first character and it's a double quote. + if Util.isOnlyWhitespaces(field) and char == '"' then + -- Then this field is a quoted field + quotedField = true + else -- Not first character or not " + if quotedField then + if escaping then + if char == sep then + if trimFields then + field = Util.trim(field) + end + + table.insert(result, field) + field = "" + quotedField = false + else + field = field .. char + escaping = false + end + else -- Not escaping + if char == '"' then + escaping = true + else + field = field .. char + end end - else -- not in quotes - if char == '"' or (allowLiterals and char == '\'') then - inQuotes = true - quotesChar = char - elseif char == sep then + + else -- Not quotedField + if char == sep then if trimFields then field = Util.trim(field) - end - - table.insert(result, field) - field = "" + end + + table.insert(result, field) + field = "" else field = field .. char end end - end end + i = i + 1 end -- Throw error if there are quotes left open - if inQuotes then - error("Unmatched quotes: " .. quotesChar) + if quotedField then + error("Unmatched quotes.") end -- Add the last field (since it won't have the delimiter after it) From 27b9737112c20fc46bc32b64e249f564345e350a Mon Sep 17 00:00:00 2001 From: Michele Righi Date: Mon, 8 Jul 2024 23:13:56 +0200 Subject: [PATCH 05/13] added example with double quotes --- examples/basic_model_with_regex.conf | 11 +++++++++++ examples/basic_policy_with_regex.csv | 3 +++ 2 files changed, 14 insertions(+) create mode 100644 examples/basic_model_with_regex.conf create mode 100644 examples/basic_policy_with_regex.csv diff --git a/examples/basic_model_with_regex.conf b/examples/basic_model_with_regex.conf new file mode 100644 index 0000000..72346c6 --- /dev/null +++ b/examples/basic_model_with_regex.conf @@ -0,0 +1,11 @@ +[request_definition] +r = sub, obj, act + +[policy_definition] +p = sub, obj, act + +[policy_effect] +e = some(where (p.eft == allow)) + +[matchers] +m = regexMatch(r.sub == p.sub) && regexMatch(r.obj == p.obj) && regexMatch(r.act == p.act) \ No newline at end of file diff --git a/examples/basic_policy_with_regex.csv b/examples/basic_policy_with_regex.csv new file mode 100644 index 0000000..d28d36b --- /dev/null +++ b/examples/basic_policy_with_regex.csv @@ -0,0 +1,3 @@ +p, ^admin$, .*, .* +p, ^user.*, "^/users/[a-zA-Z0-9]{4-10}$", PUT|POST|PATCH|GET|OPTIONS +p, ^guest$, ^/guest/.*, GET|OPTIONS \ No newline at end of file From 99e6dcdc03f02d73af5f702851bf8f4ce82ec23c Mon Sep 17 00:00:00 2001 From: Michele Righi Date: Mon, 8 Jul 2024 23:14:29 +0200 Subject: [PATCH 06/13] Unit test for Util.splitEnhanced() --- tests/util/util_spec.lua | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/tests/util/util_spec.lua b/tests/util/util_spec.lua index 5bfc310..f107170 100644 --- a/tests/util/util_spec.lua +++ b/tests/util/util_spec.lua @@ -84,6 +84,31 @@ describe("util tests", function() assert.are.same({"a", "b", "c"}, Util.split(" a, b ,c ", ",")) end) + it("test isOnlyWhitespaces", function() + assert.is.True(Util.isOnlyWhitespaces(" ")) + assert.is.True(Util.isOnlyWhitespaces("")) + assert.is.True(Util.isOnlyWhitespaces("\t\t")) + assert.is.False(Util.isOnlyWhitespaces(" abc")) + assert.is.False(Util.isOnlyWhitespaces("abc\t")) + assert.is.False(Util.isOnlyWhitespaces("\"")) + end) + + it("test splitEnhanced", function() + assert.are.same({"a", "b", "c"}, Util.splitEnhanced("a ,b ,c", ",", true)) + assert.are.same({"a", "b", "c"}, Util.splitEnhanced("a,b,c", ",", true)) + assert.are.same({"a", "b", "c"}, Util.splitEnhanced("a, b, c", ",", true)) + assert.are.same({"a", "b", "c"}, Util.splitEnhanced(" a, b ,c ", ",", true)) + + assert.are.same({"a", " b", " c"}, Util.splitEnhanced('a, b, c', ",", false)) + assert.are.same({"a", "b", "c"}, Util.splitEnhanced('a,b,c', ",", false)) + assert.are.same({" a", "b", "c"}, Util.splitEnhanced(' a,b,c', ",", false)) + assert.are.same({"a, b", "c"}, Util.splitEnhanced('"a, b", c', ",", true)) + assert.are.same({"a, b", "c"}, Util.splitEnhanced('" a, b", c', ",", true)) + assert.are.same({"a == \"b\"", "c"}, Util.splitEnhanced('a == "b", c', ",", true)) + assert.are.same({"a == \"b\"", "c"}, Util.splitEnhanced('"a == ""b"" ", c', ",", true)) + assert.has_error(function () Util.splitEnhanced('a, b, "c ') end, "Unmatched quotes.") + end) + it("test isInstance", function() local parent = {} parent.__index = parent From 416b47245172d9756ee657c6c699ece51161b7f1 Mon Sep 17 00:00:00 2001 From: Michele Righi Date: Mon, 8 Jul 2024 23:51:16 +0200 Subject: [PATCH 07/13] fixed basic with regex example --- examples/basic_model_with_regex.conf | 2 +- examples/basic_policy_with_regex.csv | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/basic_model_with_regex.conf b/examples/basic_model_with_regex.conf index 72346c6..b7c3269 100644 --- a/examples/basic_model_with_regex.conf +++ b/examples/basic_model_with_regex.conf @@ -8,4 +8,4 @@ p = sub, obj, act e = some(where (p.eft == allow)) [matchers] -m = regexMatch(r.sub == p.sub) && regexMatch(r.obj == p.obj) && regexMatch(r.act == p.act) \ No newline at end of file +m = regexMatch(r.sub, p.sub) && regexMatch(r.obj, p.obj) && regexMatch(r.act, p.act) diff --git a/examples/basic_policy_with_regex.csv b/examples/basic_policy_with_regex.csv index d28d36b..590d4e5 100644 --- a/examples/basic_policy_with_regex.csv +++ b/examples/basic_policy_with_regex.csv @@ -1,3 +1,3 @@ p, ^admin$, .*, .* p, ^user.*, "^/users/[a-zA-Z0-9]{4-10}$", PUT|POST|PATCH|GET|OPTIONS -p, ^guest$, ^/guest/.*, GET|OPTIONS \ No newline at end of file +p, ^guest$, ^/guest/.*, GET|OPTIONS From 175f21f5ab84f8243a225ae3938de79fd4b254a7 Mon Sep 17 00:00:00 2001 From: Michele Righi Date: Mon, 8 Jul 2024 23:55:50 +0200 Subject: [PATCH 08/13] Update basic_policy_with_regex.csv typo --- examples/basic_policy_with_regex.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/basic_policy_with_regex.csv b/examples/basic_policy_with_regex.csv index 590d4e5..836804e 100644 --- a/examples/basic_policy_with_regex.csv +++ b/examples/basic_policy_with_regex.csv @@ -1,3 +1,3 @@ p, ^admin$, .*, .* -p, ^user.*, "^/users/[a-zA-Z0-9]{4-10}$", PUT|POST|PATCH|GET|OPTIONS +p, ^user.*, "^/users/[a-zA-Z0-9]{4,10}$", PUT|POST|PATCH|GET|OPTIONS p, ^guest$, ^/guest/.*, GET|OPTIONS From c7e03cb34c1d77d3b2f63d54b7a604ce00fa2e13 Mon Sep 17 00:00:00 2001 From: Michele Righi Date: Mon, 8 Jul 2024 23:55:54 +0200 Subject: [PATCH 09/13] Unit test for regexMatch with {N,M} quantifier --- tests/main/enforcer_spec.lua | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/tests/main/enforcer_spec.lua b/tests/main/enforcer_spec.lua index 611c208..122d226 100644 --- a/tests/main/enforcer_spec.lua +++ b/tests/main/enforcer_spec.lua @@ -515,4 +515,28 @@ describe("Enforcer tests", function () assert.is.True(e:enforce("bob", "data2", "write")) assert.is.False(e:enforce("bogus", "data2", "write")) -- Non-existent subject end) + + + it("regexMatch test", function () + + local model = path .. "/examples/basic_model_with_regex.conf" + local policy = path .. "/examples/basic_policy_with_regex.csv" + local e = Enforcer:new(model, policy) + + assert.is.True(e:enforce("admin", "/", "PUT")) + assert.is.True(e:enforce("admin", "/admin", "GET")) + assert.is.True(e:enforce("admin", "/admin/anything", "POST")) + assert.is.False(e:enforce("admin123", "/admin", "PUT")) + + assert.is.True(e:enforce("user", "/users/alice", "GET")) + assert.is.True(e:enforce("user", "/users/alice", "PUT")) + assert.is.True(e:enforce("user123", "/users/alice", "PUT")) + assert.is.False(e:enforce("user", "/users/", "PUT")) + assert.is.False(e:enforce("user", "/users/123", "PUT")) + assert.is.False(e:enforce("user", "/users/alice123456789", "PUT")) + assert.is.False(e:enforce("user", "/admin", "PUT")) + + assert.is.True(e:enforce("guest", "/guest/test", "GET")) + assert.is.False(e:enforce("guest", "/guest/test", "PUT")) + end) end) From 6edfe121b78503f745e0625dfc5c664938fcb2f4 Mon Sep 17 00:00:00 2001 From: Michele Righi Date: Tue, 9 Jul 2024 16:10:35 +0200 Subject: [PATCH 10/13] more unit tests for Util.splitEnhanced - check if the last field is a quoted field - throwing error when there are extra characters after the double quote that closes the quoted field. --- tests/util/util_spec.lua | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/util/util_spec.lua b/tests/util/util_spec.lua index f107170..d0c559d 100644 --- a/tests/util/util_spec.lua +++ b/tests/util/util_spec.lua @@ -106,6 +106,10 @@ describe("util tests", function() assert.are.same({"a, b", "c"}, Util.splitEnhanced('" a, b", c', ",", true)) assert.are.same({"a == \"b\"", "c"}, Util.splitEnhanced('a == "b", c', ",", true)) assert.are.same({"a == \"b\"", "c"}, Util.splitEnhanced('"a == ""b"" ", c', ",", true)) + assert.are.same({"a", "b, c"}, Util.splitEnhanced('a, "b, c"', ",", true)) + + assert.has_error(function () Util.splitEnhanced('a, "b, c" ', ",", true) end, "Quoted fields cannot have extra characters outside double quotes.") + assert.has_error(function () Util.splitEnhanced('"a, b" hello, c', ",", true) end, "Quoted fields cannot have extra characters outside double quotes.") assert.has_error(function () Util.splitEnhanced('a, b, "c ') end, "Unmatched quotes.") end) From 99187a0c2482c42a2ea84a3fa8206e918e9e463d Mon Sep 17 00:00:00 2001 From: Michele Righi Date: Tue, 9 Jul 2024 16:11:49 +0200 Subject: [PATCH 11/13] Update Util.lua - support for quotes in last field; - throws an exception if there are other characters after the double quote that closes the quoted field. --- src/util/Util.lua | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/util/Util.lua b/src/util/Util.lua index 69e3946..6cd98f9 100644 --- a/src/util/Util.lua +++ b/src/util/Util.lua @@ -274,9 +274,10 @@ function Util.splitEnhanced(line, sep, trimFields) if Util.isOnlyWhitespaces(field) and char == '"' then -- Then this field is a quoted field quotedField = true - else -- Not first character or not " + else if quotedField then if escaping then + -- ", End of quoted field if char == sep then if trimFields then field = Util.trim(field) @@ -285,9 +286,13 @@ function Util.splitEnhanced(line, sep, trimFields) table.insert(result, field) field = "" quotedField = false - else + -- "" Escapes the double quote character + elseif char == '"' then field = field .. char escaping = false + -- " followed by some other character (not allowed) + else + error("Quoted fields cannot have extra characters outside double quotes.") end else -- Not escaping if char == '"' then @@ -315,7 +320,7 @@ function Util.splitEnhanced(line, sep, trimFields) end -- Throw error if there are quotes left open - if quotedField then + if quotedField and not escaping then error("Unmatched quotes.") end From c09fe4d28409ce992c4018a90697a670f0694148 Mon Sep 17 00:00:00 2001 From: Michele Righi Date: Tue, 9 Jul 2024 16:24:57 +0200 Subject: [PATCH 12/13] changed "sep" parameter name to "delim" (uniform to Util.split() ) --- src/util/Util.lua | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/util/Util.lua b/src/util/Util.lua index 6cd98f9..c357ad3 100644 --- a/src/util/Util.lua +++ b/src/util/Util.lua @@ -256,14 +256,14 @@ function Util.isOnlyWhitespaces(str) return str:match("^%s*$") ~= nil end -function Util.splitEnhanced(line, sep, trimFields) +function Util.splitEnhanced(line, delim, trimFields) local result = {} local i = 1 local quotedField local escaping = false local field = "" - - if sep == nil then sep = ',' end + + if delim == nil then delim = ',' end if trimFields == nil then trimFields = true end -- Loop over the characters of the line @@ -278,7 +278,7 @@ function Util.splitEnhanced(line, sep, trimFields) if quotedField then if escaping then -- ", End of quoted field - if char == sep then + if char == delim then if trimFields then field = Util.trim(field) end @@ -303,7 +303,7 @@ function Util.splitEnhanced(line, sep, trimFields) end else -- Not quotedField - if char == sep then + if char == delim then if trimFields then field = Util.trim(field) end From 9fb72151ae33b5305c1f6a11c9004d1052a7d49d Mon Sep 17 00:00:00 2001 From: Michele Righi Date: Tue, 9 Jul 2024 16:26:09 +0200 Subject: [PATCH 13/13] changed "line" parameter name to "str" (uniform to Util.split() ) --- src/util/Util.lua | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/util/Util.lua b/src/util/Util.lua index c357ad3..5026d9c 100644 --- a/src/util/Util.lua +++ b/src/util/Util.lua @@ -256,7 +256,7 @@ function Util.isOnlyWhitespaces(str) return str:match("^%s*$") ~= nil end -function Util.splitEnhanced(line, delim, trimFields) +function Util.splitEnhanced(str, delim, trimFields) local result = {} local i = 1 local quotedField @@ -266,9 +266,9 @@ function Util.splitEnhanced(line, delim, trimFields) if delim == nil then delim = ',' end if trimFields == nil then trimFields = true end - -- Loop over the characters of the line - while i <= #line do - local char = line:sub(i, i) + -- Loop over the characters of the string + while i <= #str do + local char = str:sub(i, i) -- Check if it's the first character and it's a double quote. if Util.isOnlyWhitespaces(field) and char == '"' then