Skip to content

Commit

Permalink
Store statistics in array metadata
Browse files Browse the repository at this point in the history
Under the key _ome2024_ngff_challenge_stats we now store
the following statistics for each array conversion:

 - input: endpoint of the input location, "default", or "" if not s3
 - output: endpoint of the output location, "default", or "" if not s3
 - read: total bytes read
 - written: total bytes written
 - start: epoch at which the conversion started
 - stop: epoch at which the conversion stopped
 - elapsed: equals "stop - start"

For example:

```
  "attributes": {
    "_ome2024_ngff_challenge_stats": {
      "input": "https://uk1s3.embassy.ebi.ac.uk",
      "output": "",
      "start": 1724228072.7066,
      "stop": 1724228073.515819,
      "read": 2312282,
      "written": 1661288,
      "elapsed": 0.8092191219329834
    }
```
  • Loading branch information
joshmoore committed Aug 21, 2024
1 parent 53cdf2b commit 6c443fd
Showing 1 changed file with 48 additions and 4 deletions.
52 changes: 48 additions & 4 deletions src/ome2024_ngff_challenge/resave.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,8 +274,25 @@ def s3_string(self):
def fs_string(self):
return str(self.path / self.subpath) if self.subpath else str(self.path)

def is_s3(self):
return bool(self.bucket)

def s3_endpoint(self):
"""
Returns a representation of the S3 endpoint set on this configuration.
* "" if this is not an S3 configuration
* "default" if no explicit endpoint is set
* otherwise the URL is returned
"""
if self.is_s3():
if self.endpoint:
return self.endpoint
return "default"
return ""

def __str__(self):
if self.bucket:
if self.is_s3():
return self.s3_string()
return self.fs_string()

Expand Down Expand Up @@ -444,12 +461,39 @@ def convert_array(

after = TSMetrics(input_config.ts_config, write_config, before)

stats = {
"input": input_config.s3_endpoint(),
"output": output_config.s3_endpoint(),
"start": before.time,
"stop": after.time,
"read": after.read(),
"written": after.written(),
"elapsed": after.elapsed(),
}
LOGGER.info(f"""Re-encode (tensorstore) {input_config} to {output_config}
read: {after.read()}
write: {after.written()}
time: {after.elapsed()}
read: {stats["read"]}
write: {stats["written"]}
time: {stats["elapsed"]}
""")

## TODO: there is likely an easier way of doing this
metadata = write.kvstore["zarr.json"]
metadata = json.loads(metadata)
if "attributes" in metadata:
attributes = metadata["attributes"]
else:
attributes = {}
metadata["attributes"] = attributes
attributes["_ome2024_ngff_challenge_stats"] = stats
metadata = json.dumps(metadata)
write.kvstore["zarr.json"] = metadata

## TODO: This is not working with v3 branch nor with released version
## zr_array = zarr.open_array(store=output_config.zr_store, mode="a", zarr_format=3)
## zr_array.update_attributes({
## "_ome2024_ngff_challenge_stats": stats,
## })

verify = ts.open(verify_config).result()
LOGGER.info(f"Verifying <{output_config}>\t{read.shape}\t")
for x in range(10):
Expand Down

0 comments on commit 6c443fd

Please sign in to comment.