From 1aa16ef3ba2379f3a896d43973fa0d88f13868bb Mon Sep 17 00:00:00 2001 From: Jerry Morrison <1fish2@users.noreply.github.com> Date: Thu, 22 Jul 2021 12:35:07 -0700 Subject: [PATCH] independent km caches for distinct cases Put a checksum into the `KmcountsCached` cache filename so different cases get independent cache files, e.g. when switching git branches, Parca options during parameter optimization, or mono/polycistronic operons. This renames the cache file from `fixtures/endo_km/km3.cPickle` to `parca-km-1918837868.cPickle`, for instance. Q. Does anyone prefer the "fixtures" directory name? The cache files `cache/parca-km-*.cPickle` will accumulate until `make clean`. Does this succeed in distinguishing current cases? We could make this more sensitive by checksumming more inputs or less picky by rounding `Kmcounts.astype(np.float16)`. See #1123 --- Makefile | 2 +- reconstruction/ecoli/fit_sim_data_1.py | 22 +++++++++++++--------- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/Makefile b/Makefile index c8a3d3b441..6dbf3e86a2 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ compile: # write_ode_file.py in Parca code. # Fireworks writes launcher_20* and block_20*. clean: - rm -fr fixtures + rm -fr fixtures cache (cd reconstruction/ecoli/dataclasses/process && rm -f equilibrium_odes.py two_component_system_odes*.py) find . -name "*.pyc" -exec rm -rf {} \; find . -name "*.o" -exec rm -fr {} \; diff --git a/reconstruction/ecoli/fit_sim_data_1.py b/reconstruction/ecoli/fit_sim_data_1.py index 2ad175dfdc..43fb4a4e3f 100644 --- a/reconstruction/ecoli/fit_sim_data_1.py +++ b/reconstruction/ecoli/fit_sim_data_1.py @@ -5,8 +5,7 @@ TODO: functionalize so that values are not both set and returned from some methods """ -from __future__ import absolute_import, division, print_function - +import binascii import functools import itertools import os @@ -26,7 +25,6 @@ from wholecell.containers.bulk_objects_container import BulkObjectsContainer from wholecell.utils import filepath, parallelization, units from wholecell.utils.fitting import normalize, masses_and_counts_for_homeostatic_target -from wholecell.utils import parallelization # Fitting parameters @@ -3164,6 +3162,13 @@ def calculateRnapRecruitment(sim_data, cell_specs): } +def crc32(arr: np.ndarray) -> int: + """Return a CRC32 checksum of an ndarray.""" + shape = str(arr.shape).encode() + values = arr.tobytes() + return binascii.crc32(shape + values) + + def setKmCooperativeEndoRNonLinearRNAdecay(sim_data, bulkContainer): """ Fits the affinities (Michaelis-Menten constants) for RNAs binding to endoRNAses. @@ -3312,13 +3317,12 @@ def setKmCooperativeEndoRNonLinearRNAdecay(sim_data, bulkContainer): alpha ) + # The checksum in the filename picks independent caches for distinct cases + # such as different Parca options or Parca code in different git branches. + # `make clean` will delete the cache files. needToUpdate = False - fixturesDir = filepath.makedirs(filepath.ROOT_PATH, "fixtures", "endo_km") - # Numpy 'U' fields make these files incompatible with older code, so change - # the filename. No need to make files compatible between Python 2 & 3; we'd - # have to set the same protocol version and set Python 3-only args like - # encoding='latin1'. - km_filepath = os.path.join(fixturesDir, 'km{}.cPickle'.format(sys.version_info[0])) + cache_dir = filepath.makedirs(filepath.ROOT_PATH, "cache") + km_filepath = os.path.join(cache_dir, f'parca-km-{crc32(Kmcounts)}.cPickle') if os.path.exists(km_filepath): with open(km_filepath, "rb") as f: