From db58648d22d82275fe60bb0f6f21a408f46e774f Mon Sep 17 00:00:00 2001 From: Reid Priedhorsky <1682574+reidpr@users.noreply.github.com> Date: Fri, 13 Oct 2023 16:19:25 -0600 Subject: [PATCH] PR #1727: convert SIGINT and SIGTERM to Fatal_Error --- lib/build.py | 31 +++++++++++++++++++++++++++---- lib/build_cache.py | 7 ++++--- lib/charliecloud.py | 40 ++++++++++++++++++++++++++++++++++++---- lib/filesystem.py | 20 ++++++++++++++++++++ lib/image.py | 4 ++-- lib/pull.py | 1 + 6 files changed, 90 insertions(+), 13 deletions(-) diff --git a/lib/build.py b/lib/build.py index abb7c2b4a..6bff7f305 100644 --- a/lib/build.py +++ b/lib/build.py @@ -242,6 +242,7 @@ def __default__(self, tree): return except ch.Fatal_Error: inst.announce_maybe() + inst.prepare_rollback() raise if (inst.miss): if (self.miss_ct == 1): @@ -466,10 +467,10 @@ def prepare(self, miss_ct): time in prepare() should call announce_maybe() as soon as they know hit/miss status. - 2. Errors: Calling ch.FATAL() normally exits immediately, but here - this often happens before the instruction has been announced - (see issue #1486). Therefore, the caller catches Fatal_Error, - announces, and then re-raises. + 2. Errors: The caller catches Fatal_Error, announces, calls + prepare_rollback(), and then re-raises. This to ensure the + instruction is announced (see #1486) and any + possibly-inconsistent state is fixed before existing. 3. Modifying image metadata: Instructions like ARG, ENV, FROM, LABEL, SHELL, and WORKDIR must modify metadata here, not in @@ -479,6 +480,9 @@ def prepare(self, miss_ct): self.git_hash = bu.cache.find_sid(self.sid, self.image.ref.for_path) return miss_ct + int(self.miss) + def prepare_rollback(self): + pass # typically a no-op + def ready(self): bu.cache.ready(self.image) @@ -1146,6 +1150,25 @@ def prepare(self, miss_ct): # Done. return int(self.miss) # will still miss in disabled mode + def prepare_rollback(self): + # AFAICT the only thing that might be busted is the unpack directories + # for either the base image or the image. We could probably be smarter + # about this, but for now just delete them. + try: + base_image = self.base_image + except AttributeError: + base_image = None + try: + image = self.image + except AttributeError: + image = None + if (base_image is not None or image is not None): + ch.INFO("something went wrong, rolling back ...") + if (base_image is not None): + bu.cache.unpack_delete(base_image, missing_ok=True) + if (image is not None): + bu.cache.unpack_delete(image, missing_ok=True) + class Run(Instruction): diff --git a/lib/build_cache.py b/lib/build_cache.py index 8a8b837aa..1e0a8b17a 100644 --- a/lib/build_cache.py +++ b/lib/build_cache.py @@ -1066,10 +1066,10 @@ def pull_lazy(self, img, src_ref, last_layer=None, pullet=None): # a young hen, especially one less than one year old pullet = pull.Image_Puller(img, src_ref) pullet.download() - self.worktree_add(img, "root") pullet.unpack(last_layer) sid = self.sid_from_parent(self.root_id, pullet.sid_input) pullet.done() + self.worktree_adopt(img, "root") commit = self.commit(img.unpack_path, sid, "PULL %s" % src_ref, []) self.ready(img) if (img.ref != src_ref): @@ -1220,12 +1220,14 @@ def tree_print(self): self.git(args + ["--format=%s" % fmt], quiet=False) print() # blank line to separate from summary - def unpack_delete(self, image): + def unpack_delete(self, image, missing_ok=False): """Wrapper for Image.unpack_delete() that first detaches the work tree's head. If we delete an image's unpack path without first detaching HEAD, the corresponding work tree must also be deleted before the bucache branch. This involves multiple calls to worktrees_fix(), which is clunky, so we use this method instead.""" + if (not image.unpack_exist_p and missing_ok): + return (_, commit) = self.find_commit(image.ref.for_path) if (commit is not None): # Off with her head! @@ -1381,7 +1383,6 @@ def permissions_fix(self, path): for i in itertools.chain(subdirs, files): (dir_ // i).chmod_min() - def pull_lazy(self, img, src_ref, last_layer=None, pullet=None): if (pullet is None and os.path.exists(img.unpack_path)): ch.VERBOSE("base image already exists, skipping pull") diff --git a/lib/charliecloud.py b/lib/charliecloud.py index facbb4c37..47ef53ae8 100644 --- a/lib/charliecloud.py +++ b/lib/charliecloud.py @@ -397,26 +397,45 @@ def start(self): class Progress_Writer: """Wrapper around a binary file object to maintain a progress meter while - data are written.""" + data are written. Overwrite the file if it already exists. + + This downloads to a temporary file to ease recovery if the download is + interrupted. This uses a predictable name to support restarts in the + future, which would probably require verification after download. For + now, we just delete any leftover temporary files in Storage.init(). + + An interesting alternative is to download to an anonymous temporary file + that vanishes if not linked into the filesystem. Recent Linux provides a + very cool procedure to do this -- open(2) with O_TMPFILE followed by + linkat(2) [1] -- but it’s not always supported and the workaround + (create, then immediately unlink(2)) does not support re-linking [2]. + This would also not support restarting the download. + + [1]: https://man7.org/linux/man-pages/man2/open.2.html + [2]: https://stackoverflow.com/questions/4171713""" __slots__ = ("fp", "msg", "path", + "path_tmp", "progress") def __init__(self, path, msg): self.msg = msg self.path = path + self.path_tmp = path.with_name("part_" + path.name) self.progress = None def close(self): if (self.progress is not None): self.progress.done() close_(self.fp) + self.path.unlink_(missing_ok=True) + self.path_tmp.rename_(self.path) def start(self, length): self.progress = Progress(self.msg, "MiB", 2**20, length) - self.fp = self.path.open_("wb") + self.fp = self.path_tmp.open_("wb") def write(self, data): self.progress.update(len(data)) @@ -655,8 +674,10 @@ def init(cli): log_fp = file_.open_("at") atexit.register(color_reset, log_fp) VERBOSE("version: %s" % version.VERSION) - VERBOSE("verbose level: %d (%s))" % (log_level.value, - log_level.name)) + VERBOSE("verbose level: %d (%s))" % (log_level.value, log_level.name)) + # signal handling + signal.signal(signal.SIGINT, sigterm) + signal.signal(signal.SIGTERM, sigterm) # storage directory global storage storage = fs.Storage(cli.storage) @@ -821,6 +842,17 @@ def si_decimal(ct): ct /= 1000 assert False, "unreachable" +def sigterm(signum, frame): + "Handler for SIGTERM and friends." + # Ignore further signals because we are already cleaning up. + signal.signal(signal.SIGINT, signal.SIG_IGN) + signal.signal(signal.SIGTERM, signal.SIG_IGN) + # Don’t stomp on progress meter if one is being printed. + print() + signame = signal.Signals(signum).name + ERROR("received %s, exiting" % signame) + FATAL("received %s" % signame) + def user(): "Return the current username; exit with error if it can’t be obtained." try: diff --git a/lib/filesystem.py b/lib/filesystem.py index 51a42f280..d809dca65 100644 --- a/lib/filesystem.py +++ b/lib/filesystem.py @@ -522,6 +522,17 @@ def root_env(): def build_large_path(self, name): return self.build_large // name + def cleanup(self): + "Called during initialization after we know the storage dir is valid." + # Delete partial downloads. + part_ct = 0 + for path in self.download_cache.glob("part_*"): + ch.VERBOSE("deleting: %s" % path) + path.unlink_() + part_ct += 1 + if (part_ct > 0): + ch.WARNING("deleted %d partially downloaded files" % part_ct) + def fatman_for_download(self, image_ref): return self.download_cache // ("%s.fat.json" % image_ref.for_path) @@ -593,6 +604,7 @@ def init(self): % (v_found, self.root), 'you can delete and re-initialize with "ch-image reset"') self.validate_strict() + self.cleanup() def lock(self): """Lock the storage directory. Charliecloud does not at present support @@ -655,7 +667,15 @@ def validate_strict(self): entries.remove(entry) except KeyError: ch.FATAL("%s: missing file or directory: %s" % (msg_prefix, entry)) + # Ignore some files that may or may not exist. entries -= { i.name for i in (self.lockfile, self.mount_point) } + # Delete some files that exist only if we crashed. + for i in (self.image_tmp, ): + if (i.name in entries): + ch.WARNING("deleting leftover temporary file/dir: %s" % i.name) + i.rmtree() + entries.remove(i.name) + # If anything is left, yell about it. if (len(entries) > 0): ch.FATAL("%s: extraneous file(s): %s" % (msg_prefix, " ".join(sorted(entries)))) diff --git a/lib/image.py b/lib/image.py index 7729322f1..e81987b75 100644 --- a/lib/image.py +++ b/lib/image.py @@ -451,8 +451,8 @@ def unpack(self, layer_tars, last_layer=None): """Unpack config_json (path to JSON config file) and layer_tars (sequence of paths to tarballs, with lowest layer first) into the unpack directory, validating layer contents and dealing with - whiteouts. Empty layers are ignored. The unpack directory must exist, - and either be empty or contain only a single entry called “.git”.""" + whiteouts. Empty layers are ignored. The unpack directory must not + exist.""" if (last_layer is None): last_layer = sys.maxsize ch.INFO("flattening image") diff --git a/lib/pull.py b/lib/pull.py index 7d4ac74e8..1584c8565 100644 --- a/lib/pull.py +++ b/lib/pull.py @@ -310,6 +310,7 @@ def bad_key(key): def unpack(self, last_layer=None): layer_paths = [self.layer_path(h) for h in self.layer_hashes] + bu.cache.unpack_delete(self.image, missing_ok=True) self.image.unpack(layer_paths, last_layer) self.image.metadata_replace(self.config_path) # Check architecture we got. This is limited because image metadata does