Skip to content

Commit

Permalink
add test for get_hash_digest_column in TestUtils
Browse files Browse the repository at this point in the history
  • Loading branch information
nayib-jose-gloria committed Sep 18, 2023
1 parent 025bd0d commit 679f4bd
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 13 deletions.
14 changes: 1 addition & 13 deletions cellxgene_schema_cli/cellxgene_schema/write_labels.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,13 @@
import traceback
from typing import Dict, List, Optional

import numpy as np
import pandas as pd

from cellxgene_schema import ontology
from cellxgene_schema.env import SCHEMA_REFERENCE_BASE_URL, SCHEMA_REFERENCE_FILE_NAME
from cellxgene_schema.validate import ONTOLOGY_CHECKER, Validator

from .utils import enforce_canonical_format, getattr_anndata, get_hash_digest_column
from .utils import enforce_canonical_format, get_hash_digest_column, getattr_anndata

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -328,17 +327,6 @@ def _remove_categories_with_zero_values(self):
def _build_schema_reference_url(self, schema_version: str):
return f"{SCHEMA_REFERENCE_BASE_URL}/{schema_version}/{SCHEMA_REFERENCE_FILE_NAME}"

def _get_observation_joinid_column(self):
"""
Set column with unique join ID for each row in obs dataframe.
"""
return (
self.adata.obs.index.to_series()
.map(xxh3_64_intdigest)
.astype(np.uint64)
.apply(lambda v: b85encode(v.to_bytes(8, "big")).decode("ascii"))
)

def write_labels(self, add_labels_file: str):
"""
From a valid (per cellxgene's schema) h5ad, this function writes a new h5ad file with ontology/gene labels added
Expand Down
11 changes: 11 additions & 0 deletions cellxgene_schema_cli/tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import numpy as np
from pandas import Series
import pytest
from anndata import AnnData
from cellxgene_schema.utils import (
enforce_canonical_format,
get_hash_digest_column,
map_ontology_term,
remove_deprecated_features,
replace_ontology_term,
Expand Down Expand Up @@ -107,3 +109,12 @@ def test_adata_with_canonical_raw_X(self, adata_with_raw):
def test_adata_with_canonical_X(self, adata_without_raw):
enforce_canonical_format(adata)
assert adata_without_raw.X.has_canonical_format is True


class TestGetHashDigestColumn:
def test_get_hash_digest_column(self, adata_with_raw):
hash_digest_column = get_hash_digest_column(adata_with_raw.obs)
assert isinstance(hash_digest_column, Series)
for val in hash_digest_column:
assert isinstance(val, str)
assert len(val) == 10 # 5 ASCII chars / 4 bytes, each ID is 8 bytes

0 comments on commit 679f4bd

Please sign in to comment.