Skip to content

Commit

Permalink
Fix handling of local $refs (#3)
Browse files Browse the repository at this point in the history
- To streamline sorting code, creating sort keys for all of the schema's nodes, not just object values
- Set JSON node's sort key only if not already set: for referenced schemas, this makes sure we assign the sort key according to the location in referring schema (the property containing the $ref), not the referred schema.
- Rewrite traversal from "iterate over children and do work then recurse" to "do work then iterate over children and recurse"
  • Loading branch information
ikonst authored Dec 8, 2021
1 parent 1ce0ab3 commit 39a5364
Show file tree
Hide file tree
Showing 3 changed files with 93 additions and 11 deletions.
31 changes: 21 additions & 10 deletions jschon_sort/_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,57 +2,68 @@
import math
from typing import Dict
from typing import List
from typing import Mapping
from typing import Tuple

import jschon.jsonschema
from jschon.json import AnyJSONCompatible


def _get_sort_keys_for_json_nodes(node: jschon.JSON) -> Dict[jschon.JSONPointer, Tuple[int, ...]]:
def _get_sort_keys_for_json_nodes(root_node: jschon.JSON) -> Mapping[jschon.JSONPointer, Tuple[int, ...]]:
"""
Gets a mapping from JSON nodes (as JSON pointers) to sort keys (as tuples of integers) that match their position
within the JSON.
"""
mapping = {}
root_depth = len(root_node.path)

def _recurse(node: jschon.JSON, node_sort_key: Tuple[int, ...]) -> None:
relative_path = node.path[root_depth:]
mapping[relative_path] = node_sort_key

if node.type == "object":
for idx, v in enumerate(node.data.values()):
new_loc = (*node_sort_key, idx)
mapping[v.path] = new_loc
_recurse(v, new_loc)
elif node.type == "array":
for idx, v in enumerate(node.data):
new_loc = (*node_sort_key, idx)
_recurse(v, new_loc)

_recurse(node, ())
_recurse(root_node, ())

return mapping


def sort_doc_by_schema(*, doc_data: AnyJSONCompatible, schema_data: AnyJSONCompatible) -> AnyJSONCompatible:
schema_json = jschon.JSON(schema_data)
schema_sort_keys = _get_sort_keys_for_json_nodes(schema_json)

try:
schema = jschon.JSONSchema(schema_data)
root_schema = jschon.JSONSchema(schema_data)
except jschon.CatalogError:
# jschon only supports newer jsonschema drafts
schema_data = copy.copy(schema_data)
schema_data['$schema'] = "https://json-schema.org/draft/2020-12/schema"
schema = jschon.JSONSchema(schema_data)
root_schema = jschon.JSONSchema(schema_data)

doc_json = jschon.JSON(doc_data)
res = schema.evaluate(doc_json)
res = root_schema.evaluate(doc_json)
if not res.valid:
raise ValueError('Document failed schema validation')

schema_sort_keys_cache: Dict[jschon.URI, Mapping[jschon.JSONPointer, Tuple[int, ...]]] = {}

def _get_sort_keys_for_schema(schema: jschon.JSONSchema) -> Mapping[jschon.JSONPointer, Tuple[int, ...]]:
if sort_keys := schema_sort_keys_cache.get(schema.canonical_uri):
return sort_keys
sort_keys = _get_sort_keys_for_json_nodes(schema)
schema_sort_keys_cache[schema.canonical_uri] = sort_keys
return sort_keys

doc_sort_keys: Dict[jschon.JSONPointer, Tuple[int, ...]] = {}

def _traverse_scope(scope: jschon.jsonschema.Scope) -> None:
schema_sort_keys = _get_sort_keys_for_schema(scope.schema)
doc_sort_keys.setdefault(scope.instpath, schema_sort_keys[scope.relpath])
for child in scope.iter_children():
doc_sort_keys[child.instpath] = schema_sort_keys[child.path]
_traverse_scope(child)

_traverse_scope(res)
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[metadata]
name = jschon-sort
version = 0.0.2
version = 0.0.3
description = Sorts a JSON or YAML document to match a JSON Schema's order of properties
long_description = file: README.md
long_description_content_type = text/markdown
Expand Down
71 changes: 71 additions & 0 deletions tests/test_jschon_sort.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,3 +62,74 @@ def test_sort_doc_by_schema(schema_version: str) -> None:
assert actual is not doc
assert json.dumps(doc) == doc_str, "ensure doc is not modified in place"
assert json.dumps(actual) == '{"ranges": [{"BBB": 42, "AAA": 42, "start": 10, "end": 20}]}'


def test_sort_doc_by_schema__local_ref() -> None:
# Arrange
doc_str = '{"foo": {"end": 20, "start": 10}}'
doc = json.loads(doc_str)

schema = {
"$schema": "https://json-schema.org/draft/2020-12/schema",
"type": "object",
"$defs": {
"range": {
"type": "object",
"properties": {
"start": {"type": "number"},
"end": {"type": "number"},
},
"required": ["start", "end"],
"additionalProperties": False,
},
},
"additionalProperties": {
"$ref": "#/$defs/range",
},
}

# Act
actual = sort_doc_by_schema(doc_data=doc, schema_data=schema)

# Assert
assert actual is not doc
assert json.dumps(actual) == '{"foo": {"start": 10, "end": 20}}'


def test_sort_doc_by_schema__oneof() -> None:
# Arrange
doc_str = '{"abc": {"end": 20, "start": 10}, "xyz": {"to": 40, "from": 30}}'
doc = json.loads(doc_str)

schema = {
"$schema": "https://json-schema.org/draft/2020-12/schema",
"type": "object",
"additionalProperties": {
"type": "object",
"oneOf": [
{
"properties": {
"start": {"type": "number"},
"end": {"type": "number"},
},
"required": ["start", "end"],
"additionalProperties": False,
},
{
"properties": {
"from": {"type": "number"},
"to": {"type": "number"},
},
"required": ["from", "to"],
"additionalProperties": False,
},
],
},
}

# Act
actual = sort_doc_by_schema(doc_data=doc, schema_data=schema)

# Assert
assert actual is not doc
assert json.dumps(actual) == '{"abc": {"start": 10, "end": 20}, "xyz": {"from": 30, "to": 40}}'

0 comments on commit 39a5364

Please sign in to comment.