diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index c3e63ba..517a6e0 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -39,6 +39,7 @@ jobs: tests/benchmarks/benchmark_model.py tests/benchmarks/benchmark_management_api.py tests/benchmarks/benchmark_role_manager.py + tests/benchmarks/benchmark_adapter.py - name: Upload coverage data to coveralls.io run: coveralls --service=github diff --git a/casbin/persist/adapter.py b/casbin/persist/adapter.py index 2c9b9a3..b5c457d 100644 --- a/casbin/persist/adapter.py +++ b/casbin/persist/adapter.py @@ -12,34 +12,56 @@ # See the License for the specific language governing permissions and # limitations under the License. +import re -def load_policy_line(line, model): - """loads a text line as a policy rule to model.""" +_INTERESTING_TOKENS_RE = re.compile(r"[,\[\]\(\)]") + + +def _extract_tokens(line): + """Return the list of 'tokens' from the line, or None if this line has none""" if line == "": - return + return None if line[:1] == "#": - return + return None stack = [] tokens = [] - for c in line: + + # The tokens are separated by commas, but we support nesting so a naive `line.split(",")` is + # wrong. E.g. `abc(def, ghi), jkl` is two tokens: `abc(def, ghi)` and `jkl`. We do this by + # iterating over the locations of any tokens of interest, and either: + # + # - [](): adjust the nesting depth + # - ,: slice the line to save the token, if the , is at the top-level, outside all []() + # + # `start_idx` represents the start of the current token, that we haven't seen a `,` for yet. + start_idx = 0 + for match in _INTERESTING_TOKENS_RE.finditer(line): + c = match.group() if c == "[" or c == "(": stack.append(c) - tokens[-1] += c elif c == "]" or c == ")": stack.pop() - tokens[-1] += c - elif c == "," and len(stack) == 0: - tokens.append("") - else: - if len(tokens) == 0: - tokens.append(c) - else: - tokens[-1] += c - - tokens = [x.strip() for x in tokens] + elif not stack: + # must be a comma outside of any nesting: we've found the end of a top level token so + # save that and start a new one + tokens.append(line[start_idx : match.start()].strip()) + start_idx = match.end() + + # trailing token after the last , + tokens.append(line[start_idx:].strip()) + + return tokens + + +def load_policy_line(line, model): + """loads a text line as a policy rule to model.""" + + tokens = _extract_tokens(line) + if tokens is None: + return key = tokens[0] sec = key[0] diff --git a/tests/benchmarks/benchmark_adapter.py b/tests/benchmarks/benchmark_adapter.py new file mode 100644 index 0000000..3a09466 --- /dev/null +++ b/tests/benchmarks/benchmark_adapter.py @@ -0,0 +1,30 @@ +from casbin.persist.adapter import _extract_tokens + + +def _benchmark_extract_tokens(benchmark, line): + @benchmark + def run_benchmark(): + _extract_tokens(line) + + +def test_benchmark_extract_tokens_short_simple(benchmark): + _benchmark_extract_tokens(benchmark, "abc,def,ghi") + + +def test_benchmark_extract_tokens_long_simple(benchmark): + # fixed UUIDs for length and to be similar to "real world" usage of UUIDs + _benchmark_extract_tokens( + benchmark, + "00000000-0000-0000-0000-000000000000,00000000-0000-0000-0000-000000000001,00000000-0000-0000-0000-000000000002", + ) + + +def test_benchmark_extract_tokens_short_nested(benchmark): + _benchmark_extract_tokens(benchmark, "abc(def,ghi),jkl(mno,pqr)") + + +def test_benchmark_extract_tokens_long_nested(benchmark): + _benchmark_extract_tokens( + benchmark, + "00000000-0000-0000-0000-000000000000(00000000-0000-0000-0000-000000000001,00000000-0000-0000-0000-000000000002),00000000-0000-0000-0000-000000000003(00000000-0000-0000-0000-000000000004,00000000-0000-0000-0000-000000000005)", + ) diff --git a/tests/persist/test_adapter.py b/tests/persist/test_adapter.py new file mode 100644 index 0000000..c602854 --- /dev/null +++ b/tests/persist/test_adapter.py @@ -0,0 +1,53 @@ +from casbin.persist.adapter import _extract_tokens +from tests import TestCaseBase + + +class TestExtractTokens(TestCaseBase): + def test_ignore_lines(self): + self.assertIsNone(_extract_tokens("")) # empty + self.assertIsNone(_extract_tokens("# comment")) + + def test_simple_lines(self): + # split on top-level commas, strip whitespace from start and end + self.assertEqual(_extract_tokens("one"), ["one"]) + self.assertEqual(_extract_tokens("one,two"), ["one", "two"]) + self.assertEqual(_extract_tokens(" ignore \t,\t external, spaces "), ["ignore", "external", "spaces"]) + + self.assertEqual(_extract_tokens("internal spaces preserved"), ["internal spaces preserved"]) + + def test_nested_lines(self): + # basic nesting within a single token + self.assertEqual( + _extract_tokens("outside1()"), + ["outside1()"], + ) + self.assertEqual( + _extract_tokens("outside1(inside1())"), + ["outside1(inside1())"], + ) + + # split on top-level commas, but not on internal ones + self.assertEqual( + _extract_tokens("outside1(inside1(), inside2())"), + ["outside1(inside1(), inside2())"], + ) + self.assertEqual( + _extract_tokens("outside1(inside1(), inside2(inside3(), inside4()))"), + ["outside1(inside1(), inside2(inside3(), inside4()))"], + ) + self.assertEqual( + _extract_tokens("outside1(inside1(), inside2()), outside2(inside3(), inside4())"), + ["outside1(inside1(), inside2())", "outside2(inside3(), inside4())"], + ) + + # different delimiters + self.assertEqual( + _extract_tokens( + "all_square[inside1[], inside2[]],square_and_parens[inside1(), inside2()],parens_and_square(inside1[], inside2[])" + ), + [ + "all_square[inside1[], inside2[]]", + "square_and_parens[inside1(), inside2()]", + "parens_and_square(inside1[], inside2[])", + ], + )