Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: {column}_colors validation #625

Merged
merged 13 commits into from
Sep 25, 2023
182 changes: 182 additions & 0 deletions cellxgene_schema_cli/cellxgene_schema/validate.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import logging
import math
import os
import re
from datetime import datetime
from typing import Dict, List, Optional, Union

Expand Down Expand Up @@ -677,6 +678,185 @@ def _validate_dataframe(self, df_name: str):
self.warnings.append(column_def["warning_message"])
self._validate_column(column, column_name, df_name, column_def)

def _validate_colors_in_uns_dict(self, uns_dict: dict) -> None:
df = getattr_anndata(self.adata, "obs")
df_definition = self._get_component_def("obs")

# Mapping from obs column name to number of unique categorical values
category_mapping = {}

if "columns" in df_definition:
for column_name, column_def in df_definition["columns"].items():
if column_name not in df.columns:
# Skip this, dataframe validation should already append an error for this
continue

if column_def.get("type") == "categorical":
category_mapping[column_name] = df[column_name].nunique()

for column_name, num_unique_vals in category_mapping.items():
colors_options = uns_dict.get(f"{column_name}_colors", [])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should only run validation against {column_name}_colors if it exists, as having a corresponding {column}_colors column is optional. As written, this will fail if a categorical column opts out of {column}_colors

if len(colors_options) < num_unique_vals:
self.errors.append(
f"Annotated categorical field {column_name} must have at least {num_unique_vals} color options "
f"in uns[{column_name}_colors]. Found: {colors_options}"
)
for color in colors_options:
if not self._validate_color(color):
self.errors.append(
f"Color {color} in uns[{column_name}_colors] is not valid. Colors must be a valid hex "
f"code (#08c0ff) or a CSS4 named color"
)

def _validate_color(self, color: str) -> bool:
css4_named_colors = [
"aliceblue",
"antiquewhite",
"aqua",
"aquamarine",
"azure",
"beige",
"bisque",
"black",
"blanchedalmond",
"blue",
"blueviolet",
"brown",
"burlywood",
"cadetblue",
"chartreuse",
"chocolate",
"coral",
"cornflowerblue",
"cornsilk",
"crimson",
"cyan",
"darkblue",
"darkcyan",
"darkgoldenrod",
"darkgray",
"darkgreen",
"darkkhaki",
"darkmagenta",
"darkolivegreen",
"darkorange",
"darkorchid",
"darkred",
"darksalmon",
"darkseagreen",
"darkslateblue",
"darkslategray",
"darkturquoise",
"darkviolet",
"deeppink",
"deepskyblue",
"dimgray",
"dodgerblue",
"firebrick",
"floralwhite",
"forestgreen",
"fuchsia",
"gainsboro",
"ghostwhite",
"gold",
"goldenrod",
"gray",
"green",
"greenyellow",
"grey",
"honeydew",
"hotpink",
"indianred",
"indigo",
"ivory",
"khaki",
"lavender",
"lavenderblush",
"lawngreen",
"lemonchiffon",
"lightblue",
"lightcoral",
"lightcyan",
"lightgoldenrodyellow",
"lightgray",
"lightgreen",
"lightpink",
"lightsalmon",
"lightseagreen",
"lightskyblue",
"lightslategray",
"lightsteelblue",
"lightyellow",
"lime",
"limegreen",
"linen",
"magenta",
"maroon",
"mediumaquamarine",
"mediumblue",
"mediumorchid",
"mediumpurple",
"mediumseagreen",
"mediumslateblue",
"mediumspringgreen",
"mediumturquoise",
"mediumvioletred",
"midnightblue",
"mintcream",
"mistyrose",
"moccasin",
"navajowhite",
"navy",
"oldlace",
"olive",
"olivedrab",
"orange",
"orangered",
"orchid",
"palegoldenrod",
"palegreen",
"paleturquoise",
"palevioletred",
"papayawhip",
"peachpuff",
"peru",
"pink",
"plum",
"powderblue",
"purple",
"rebeccapurple",
"red",
"rosybrown",
"royalblue",
"saddlebrown",
"salmon",
"sandybrown",
"seagreen",
"seashell",
"sienna",
"silver",
"skyblue",
"slateblue",
"slategray",
"snow",
"springgreen",
"steelblue",
"tan",
"teal",
"thistle",
"tomato",
"turquoise",
"violet",
"wheat",
"white",
"whitesmoke",
"yellow",
"yellowgreen",
]
if color in css4_named_colors:
return True
return re.match(r"^#([0-9a-fA-F]{6})$", color)

def _validate_sparsity(self):
"""
calculates sparsity of x and raw.x, if bigger than indicated in the schema and not a scipy sparse matrix, then
Expand Down Expand Up @@ -1105,6 +1285,8 @@ def _deep_check(self):
elif component_def["type"] == "dict":
dictionary = getattr(self.adata, component)
self._validate_dict(dictionary, component, component_def)
if component == "uns":
self._validate_colors_in_uns_dict(dictionary)
elif component_def["type"] == "embedding_dict":
self._validate_embedding_dict()
else:
Expand Down
6 changes: 6 additions & 0 deletions cellxgene_schema_cli/tests/fixtures/examples_validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,9 @@
"default_embedding": "X_umap",
"X_approximate_distribution": "normal",
"batch_condition": ["is_primary_data"],
"donor_id_colors": ["black", "pink"],
"suspension_type_colors": ["red", "#000000"],
"tissue_type_colors": ["blue", "#ffffff"],
}

good_uns_with_labels = {
Expand All @@ -175,6 +178,9 @@
"default_embedding": "X_umap",
"X_approximate_distribution": "normal",
"batch_condition": ["is_primary_data"],
"donor_id_colors": ["black", "pink"],
"suspension_type_colors": ["red", "#000000"],
"tissue_type_colors": ["blue", "#ffffff"],
}

# ---
Expand Down
Binary file modified cellxgene_schema_cli/tests/fixtures/h5ads/example_valid.h5ad
Binary file not shown.
22 changes: 22 additions & 0 deletions cellxgene_schema_cli/tests/test_schema_compliance.py
Original file line number Diff line number Diff line change
Expand Up @@ -546,6 +546,7 @@ def test_tissue_ontology_term_id_cell_culture(self):
suffixes.
"""
self.validator.adata.obs.loc[self.validator.adata.obs.index[0], "tissue_type"] = "cell culture"
self.validator.adata.uns["tissue_type_colors"] = ["red"]

with self.subTest(case="error, suffix in term ID"):
self.validator.adata.obs.loc[
Expand Down Expand Up @@ -1359,6 +1360,27 @@ def test_deprecated_fields(self):
],
)

def test_not_enough_color_options(self):
self.validator.adata.uns["suspension_type_colors"] = ["green"]
self.validator.validate_adata()
self.assertEqual(
self.validator.errors,
[
"ERROR: Annotated categorical field suspension_type must have at least 2 color options in uns[suspension_type_colors]. Found: ['green']"
],
)

def test_invalid_color_options(self):
self.validator.adata.uns["suspension_type_colors"] = ["#000", "pynk"]
self.validator.validate_adata()
self.assertEqual(
self.validator.errors,
[
"ERROR: Color #000 in uns[suspension_type_colors] is not valid. Colors must be a valid hex code (#08c0ff) or a CSS4 named color",
"ERROR: Color pynk in uns[suspension_type_colors] is not valid. Colors must be a valid hex code (#08c0ff) or a CSS4 named color",
],
)


class TestObsm(BaseValidationTest):
"""
Expand Down
Loading