Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimise load_policy_line to avoid quadratic individual-character loop #355

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ jobs:
tests/benchmarks/benchmark_model.py
tests/benchmarks/benchmark_management_api.py
tests/benchmarks/benchmark_role_manager.py
tests/benchmarks/benchmark_adapter.py

- name: Upload coverage data to coveralls.io
run: coveralls --service=github
Expand Down
54 changes: 38 additions & 16 deletions casbin/persist/adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,34 +12,56 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import re

def load_policy_line(line, model):
"""loads a text line as a policy rule to model."""
_INTERESTING_TOKENS_RE = re.compile(r"[,\[\]\(\)]")


def _extract_tokens(line):
"""Return the list of 'tokens' from the line, or None if this line has none"""

if line == "":
return
return None

if line[:1] == "#":
return
return None

stack = []
tokens = []
for c in line:

# The tokens are separated by commas, but we support nesting so a naive `line.split(",")` is
# wrong. E.g. `abc(def, ghi), jkl` is two tokens: `abc(def, ghi)` and `jkl`. We do this by
# iterating over the locations of any tokens of interest, and either:
#
# - [](): adjust the nesting depth
# - ,: slice the line to save the token, if the , is at the top-level, outside all []()
#
# `start_idx` represents the start of the current token, that we haven't seen a `,` for yet.
start_idx = 0
for match in _INTERESTING_TOKENS_RE.finditer(line):
c = match.group()
if c == "[" or c == "(":
stack.append(c)
tokens[-1] += c
elif c == "]" or c == ")":
stack.pop()
tokens[-1] += c
elif c == "," and len(stack) == 0:
tokens.append("")
else:
if len(tokens) == 0:
tokens.append(c)
else:
tokens[-1] += c

tokens = [x.strip() for x in tokens]
elif not stack:
# must be a comma outside of any nesting: we've found the end of a top level token so
# save that and start a new one
tokens.append(line[start_idx : match.start()].strip())
start_idx = match.end()

# trailing token after the last ,
tokens.append(line[start_idx:].strip())

return tokens


def load_policy_line(line, model):
"""loads a text line as a policy rule to model."""

tokens = _extract_tokens(line)
if tokens is None:
return

key = tokens[0]
sec = key[0]
Expand Down
30 changes: 30 additions & 0 deletions tests/benchmarks/benchmark_adapter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from casbin.persist.adapter import _extract_tokens


def _benchmark_extract_tokens(benchmark, line):
@benchmark
def run_benchmark():
_extract_tokens(line)


def test_benchmark_extract_tokens_short_simple(benchmark):
_benchmark_extract_tokens(benchmark, "abc,def,ghi")


def test_benchmark_extract_tokens_long_simple(benchmark):
# fixed UUIDs for length and to be similar to "real world" usage of UUIDs
_benchmark_extract_tokens(
benchmark,
"00000000-0000-0000-0000-000000000000,00000000-0000-0000-0000-000000000001,00000000-0000-0000-0000-000000000002",
)


def test_benchmark_extract_tokens_short_nested(benchmark):
_benchmark_extract_tokens(benchmark, "abc(def,ghi),jkl(mno,pqr)")


def test_benchmark_extract_tokens_long_nested(benchmark):
_benchmark_extract_tokens(
benchmark,
"00000000-0000-0000-0000-000000000000(00000000-0000-0000-0000-000000000001,00000000-0000-0000-0000-000000000002),00000000-0000-0000-0000-000000000003(00000000-0000-0000-0000-000000000004,00000000-0000-0000-0000-000000000005)",
)
53 changes: 53 additions & 0 deletions tests/persist/test_adapter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
from casbin.persist.adapter import _extract_tokens
from tests import TestCaseBase


class TestExtractTokens(TestCaseBase):
def test_ignore_lines(self):
self.assertIsNone(_extract_tokens("")) # empty
self.assertIsNone(_extract_tokens("# comment"))

def test_simple_lines(self):
# split on top-level commas, strip whitespace from start and end
self.assertEqual(_extract_tokens("one"), ["one"])
self.assertEqual(_extract_tokens("one,two"), ["one", "two"])
self.assertEqual(_extract_tokens(" ignore \t,\t external, spaces "), ["ignore", "external", "spaces"])

self.assertEqual(_extract_tokens("internal spaces preserved"), ["internal spaces preserved"])

def test_nested_lines(self):
# basic nesting within a single token
self.assertEqual(
_extract_tokens("outside1()"),
["outside1()"],
)
self.assertEqual(
_extract_tokens("outside1(inside1())"),
["outside1(inside1())"],
)

# split on top-level commas, but not on internal ones
self.assertEqual(
_extract_tokens("outside1(inside1(), inside2())"),
["outside1(inside1(), inside2())"],
)
self.assertEqual(
_extract_tokens("outside1(inside1(), inside2(inside3(), inside4()))"),
["outside1(inside1(), inside2(inside3(), inside4()))"],
)
self.assertEqual(
_extract_tokens("outside1(inside1(), inside2()), outside2(inside3(), inside4())"),
["outside1(inside1(), inside2())", "outside2(inside3(), inside4())"],
)

# different delimiters
self.assertEqual(
_extract_tokens(
"all_square[inside1[], inside2[]],square_and_parens[inside1(), inside2()],parens_and_square(inside1[], inside2[])"
),
[
"all_square[inside1[], inside2[]]",
"square_and_parens[inside1(), inside2()]",
"parens_and_square(inside1[], inside2[])",
],
)