Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add schema_reference field to uns during add-labels #626

Merged
merged 4 commits into from
Sep 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions cellxgene_schema_cli/cellxgene_schema/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,5 @@
PARSED_ONTOLOGIES_FILE = os.path.join(ONTOLOGY_DIR, "all_ontology.json.gz")
SCHEMA_DEFINITIONS_DIR = os.path.join(PACKAGE_ROOT, "schema_definitions")
SCHEMA_DEFINITION_FILE = os.path.join(SCHEMA_DEFINITIONS_DIR, "schema_definition.yaml")
SCHEMA_REFERENCE_BASE_URL = "https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema"
SCHEMA_REFERENCE_FILE_NAME = "schema.md"
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ components:
required: null # Means it's required
reserved_columns:
- schema_version
- schema_reference
deprecated_columns:
- X_normalization
- default_field
Expand Down
6 changes: 6 additions & 0 deletions cellxgene_schema_cli/cellxgene_schema/write_labels.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import pandas as pd

from cellxgene_schema import ontology
from cellxgene_schema.env import SCHEMA_REFERENCE_BASE_URL, SCHEMA_REFERENCE_FILE_NAME
from cellxgene_schema.validate import ONTOLOGY_CHECKER, Validator

from .utils import enforce_canonical_format, getattr_anndata
Expand Down Expand Up @@ -323,6 +324,9 @@ def _remove_categories_with_zero_values(self):
if col.dtype == "category":
df[column] = col.cat.remove_unused_categories()

def _build_schema_reference_url(self, schema_version: str):
return f"{SCHEMA_REFERENCE_BASE_URL}/{schema_version}/{SCHEMA_REFERENCE_FILE_NAME}"

def write_labels(self, add_labels_file: str):
"""
From a valid (per cellxgene's schema) h5ad, this function writes a new h5ad file with ontology/gene labels added
Expand All @@ -341,6 +345,8 @@ def write_labels(self, add_labels_file: str):

# Set version
self.adata.uns["schema_version"] = self.validator.schema_version
# Set schema reference URL
self.adata.uns["schema_reference"] = self._build_schema_reference_url(self.validator.schema_version)

enforce_canonical_format(self.adata)

Expand Down
1 change: 1 addition & 0 deletions cellxgene_schema_cli/tests/fixtures/examples_validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@

good_uns_with_labels = {
"schema_version": "4.0.0",
"schema_reference": "https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/4.0.0/schema.md",
"title": "A title",
"default_embedding": "X_umap",
"X_approximate_distribution": "normal",
Expand Down
4 changes: 4 additions & 0 deletions cellxgene_schema_cli/tests/test_schema_compliance.py
Original file line number Diff line number Diff line change
Expand Up @@ -1023,6 +1023,10 @@ def test_reserved_columns_presence(self):

for reserved_column in self.validator.schema_def["components"]["uns"]["reserved_columns"]:
with self.subTest(column=reserved_column):
# Resetting validator
self.validator.adata = examples.adata.copy()
self.validator.errors = []

self.validator.adata.uns[reserved_column] = "dummy_value"
self.validator.validate_adata()
self.assertEqual(
Expand Down
Loading