Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[GENIE] Add support for genie-icetray. #41

Merged
merged 14 commits into from
Nov 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ jobs:
python3 -m pip install nuflux
fi
- name: Download Test Data
run: curl -u icecube:${{secrets.ICECUBE_PASSWORD}} https://convey.icecube.wisc.edu/data/user/kmeagher/simweights_testdata.tar.gz | tar xz
run: curl -u icecube:${{secrets.ICECUBE_PASSWORD}} https://convey.icecube.wisc.edu/data/ana/Software/simweights/test-data/simweights_testdata.tar.gz | tar xz
- name: Run Tests
env:
SIMWEIGHTS_TESTDATA: .
Expand Down
16 changes: 13 additions & 3 deletions contrib/book_simweights_testdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,12 @@ def fake_event_header(frame: dict) -> None:
"genie": [
"/data/sim/IceCubeUpgrade/genie/step3/141828/upgrade_genie_step3_141828_000000.i3.zst",
"/data/sim/IceCube/2023/generated/GENIE/22590/0000000-0000999/GENIE_NuMu_IceCubeUpgrade_v58.22590.000000.i3.zst",
"/data/user/mlarson/icetray/src/genie-reader/resources/scripts/genie_numu_volume_scaling.i3.zst",
"/data/ana/Software/simweights/test-data/genie_numu_volume_scaling.i3.zst",
"/data/ana/Software/simweights/test-data/genie-icetray.140000A_000000.i3.zst",
"/data/ana/Software/simweights/test-data/genie-icetray.140000B_000000.i3.zst",
"/data/ana/Software/simweights/test-data/genie-icetray.140000C_000000.i3.zst",
"/data/ana/Software/simweights/test-data/genie-icetray.140000D_000000.i3.zst",
"/data/ana/Software/simweights/test-data/level2_genie-icetray.140000_000000.i3.zst",
],
}
keys = {
Expand All @@ -74,7 +79,12 @@ def fake_event_header(frame: dict) -> None:
"I3CorsikaWeight",
],
"nugen": ["I3MCWeightDict"],
"genie": ["I3GenieInfo", "I3GenieResult", "I3MCWeightDict"],
"genie": [
"I3GenieInfo",
"I3GenieResult",
"I3GENIEResultDict",
"I3MCWeightDict",
],
"icetop": ["I3TopInjectorInfo", "MCPrimary"],
}
streams = {
Expand Down Expand Up @@ -133,7 +143,7 @@ def fake_event_header(frame: dict) -> None:
tray.Execute()
del tray

tarfilename = "/data/user/kmeagher/simweights_testdata_test.tar.gz"
tarfilename = "/data/ana/Software/simweights/test-data/simweights_testdata.tar.gz"
print(f"Writing tarfile {tarfilename}")

with tarfile.open(tarfilename, "w:gz") as tar:
Expand Down
17 changes: 17 additions & 0 deletions examples/basic_genie_icetray.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#!/usr/bin/env python3

# SPDX-FileCopyrightText: © 2022 the SimWeights contributors
#
# SPDX-License-Identifier: BSD-2-Clause
import nuflux
import numpy as np
import pandas as pd

import simweights

simfile = pd.HDFStore("level2_genie-icetray.140000_000000.hdf")
flux_model = nuflux.makeFlux("IPhonda2014_spl_solmax")
weight_obj = simweights.GenieWeighter(simfile, 1)
weights = weight_obj.get_weights(flux_model)
print("Rate from simweights", weights.sum(), "Hz")
simfile.close()
134 changes: 115 additions & 19 deletions src/simweights/_genie_weighter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,63 @@
#
# SPDX-License-Identifier: BSD-2-Clause

from __future__ import annotations

from typing import Any, Iterable, Mapping

import numpy as np

from ._generation_surface import GenerationSurface, generation_surface
from ._nugen_weighter import nugen_spatial, nugen_spectrum
from ._powerlaw import PowerLaw
from ._spatial import CircleInjector
from ._utils import Column, Const, get_column, get_table, has_column
from ._utils import Column, Const, constcol, get_column, get_table, has_column, has_table
from ._weighter import Weighter


def genie_surface(table: Iterable[Mapping[str, float]]) -> GenerationSurface:
def genie_icetray_surface(
mcweightdict: list[Mapping[str, float]], geniedict: Iterable[Mapping[str, float]], nufraction: float = 0.7
) -> GenerationSurface:
"""Inspect the rows of a GENIE-icetray"s I3MCWeightDict table object to generate a surface object.

This is a bit of a pain: the oscillations group historically produced 4-5 energy bands with varying
generation parameters, then merged them all into one "file". Because of this, we need to check the
neutrino type, volume, and spectrum to get the correct surfaces. The type weight also isn"t stored
in the files: this was fixed to 70/30 for oscillation-produced genie-icetray files.
"""
gen_schemes = np.array(
[
get_column(geniedict, "neu"),
get_column(mcweightdict, "GeneratorVolume"),
get_column(mcweightdict, "PowerLawIndex"),
get_column(mcweightdict, "MinEnergyLog"),
get_column(mcweightdict, "MaxEnergyLog"),
]
).T
unique_schemes = np.unique(gen_schemes, axis=0)

if len(unique_schemes) == 0:
msg = "`I3MCWeightDict` is empty. SimWeights cannot process this file"
raise RuntimeError(msg)

surfaces = []
for row in unique_schemes:
(pid, _, _, _, _) = row
mask = np.all(gen_schemes == row[None, :], axis=1)

spatial = nugen_spatial(mcweightdict[mask])
spectrum = nugen_spectrum(mcweightdict[mask])

type_weight = nufraction if pid > 0 else 1 - nufraction
n_events = type_weight * constcol(mcweightdict, "NEvents", mask)

surfaces.append(n_events * generation_surface(pid, Column("wght"), spectrum, spatial))
ret = sum(surfaces)
assert isinstance(ret, GenerationSurface)
return ret


def genie_reader_surface(table: Iterable[Mapping[str, float]]) -> GenerationSurface:
"""Inspect the rows of a GENIE S-Frame table object to generate a surface object."""
surfaces = []

Expand Down Expand Up @@ -44,27 +88,79 @@ def genie_surface(table: Iterable[Mapping[str, float]]) -> GenerationSurface:
return retval


def GenieWeighter(file_obj: Any) -> Weighter: # noqa: N802
def GenieWeighter(file_obj: Any, nfiles: float | None = None) -> Weighter: # noqa: N802
# pylint: disable=invalid-name
"""Weighter for GENIE simulation.

Reads ``I3GenieInfo`` from S-Frames and ``I3GenieResult`` from Q-Frames.
Reads ``I3GenieInfo`` from S-Frames and ``I3GenieResult`` from Q-Frames for genie-reader files
and ``I3MCWeightDict`` and "I3GENIEResultDict" from Q-Frames for older legacy genie-icetray files.
"""
info_table = get_table(file_obj, "I3GenieInfo")
result_table = get_table(file_obj, "I3GenieResult")
surface = genie_surface(info_table)

weighter = Weighter([file_obj], surface)
weighter.add_weight_column("pdgid", get_column(result_table, "neu").astype(np.int32))
weighter.add_weight_column("energy", get_column(result_table, "Ev"))
weighter.add_weight_column("cos_zen", get_column(result_table, "pzv"))
weighter.add_weight_column("wght", get_column(result_table, "wght"))

# Include the effect of the muon scaling introduced in icecube/icetray#3607, if present.
if has_column(result_table, "volscale"):
volscale = get_column(result_table, "volscale")
if not any(has_table(file_obj, colname) for colname in ["I3GenieInfo", "I3GenieResult", "I3GENIEResultDict"]):
msg = (
f"The file `{getattr(file_obj, 'filename', '<NONE>')}` does not contain at least one of I3GenieInfo, "
"I3GenieResult, or I3GENIEResultDict, so this is unlikely to be a GENIE file."
)
raise TypeError(msg)
if has_table(file_obj, "I3GenieInfo") and has_table(file_obj, "I3GenieResult"):
# Branch for newer genie-reader files
if nfiles is not None:
msg = (
f"GenieWeighter received an nfiles={nfiles}, but `{getattr(file_obj, 'filename', '<NONE>')}` "
"was produced with genie-reader instead of genie-icetray. We expect to read the number of "
"files from the number of observed S-frames in the file, so this is unnecessary. Do not pass "
"in a value for nfiles for genie-reader files."
)
raise RuntimeError(msg)

info_table = get_table(file_obj, "I3GenieInfo")
result_table = get_table(file_obj, "I3GenieResult")
surface = genie_reader_surface(info_table)

weighter = Weighter([file_obj], surface)
weighter.add_weight_column("pdgid", get_column(result_table, "neu").astype(np.int32))
weighter.add_weight_column("energy", get_column(result_table, "Ev"))
weighter.add_weight_column("cos_zen", get_column(result_table, "pzv"))
weighter.add_weight_column("wght", get_column(result_table, "wght"))

# Include the effect of the muon scaling introduced in icecube/icetray#3607, if present.
if has_column(result_table, "volscale"):
volscale = get_column(result_table, "volscale")
else:
volscale = np.ones_like(get_column(result_table, "wght"))
weighter.add_weight_column("volscale", volscale)

elif has_table(file_obj, "I3MCWeightDict") and has_table(file_obj, "I3GENIEResultDict"):
# Branch for older genie-icetray files
if nfiles is None:
msg = (
f"GenieWeighter received an nfiles={nfiles}, but `{getattr(file_obj, 'filename', '<NONE>')}` "
"was produced with genie-icetray instead of genie-reader. We require the number of files to be "
"passed in for genie-icetray files since we can't simply count S-frames."
)
raise RuntimeError(msg)

weight_table = get_table(file_obj, "I3MCWeightDict")
result_table = get_table(file_obj, "I3GENIEResultDict")

surface = nfiles * genie_icetray_surface(weight_table, result_table)

momentum_vec = np.array(
[get_column(result_table, "pxv"), get_column(result_table, "pyv"), get_column(result_table, "pzv")]
)
cos_zen = -1 * get_column(result_table, "pzv") / np.sum(momentum_vec**2, axis=0) ** 0.5

weighter = Weighter([file_obj], surface)
weighter.add_weight_column("pdgid", get_column(result_table, "neu").astype(np.int32))
weighter.add_weight_column("energy", get_column(result_table, "Ev"))
weighter.add_weight_column("cos_zen", cos_zen)
weighter.add_weight_column("wght", get_column(result_table, "wght") * get_column(result_table, "_glbprbscale"))

else:
volscale = np.ones_like(get_column(result_table, "wght"))
weighter.add_weight_column("volscale", volscale)
msg = (
"Missing at least one necessary object for GENIE event weighting. If your file is produced by "
"genie-icetray, be sure to include both the I3MCWeightDict and I3GENIEResultDict in your input "
"file. If the file is produced by genie-reader, include both I3GenieInfo and I3GenieResult."
)
raise KeyError(msg)

return weighter
82 changes: 82 additions & 0 deletions tests/test_genie_icetray_datasets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
#!/usr/bin/env python

# SPDX-FileCopyrightText: © 2022 the SimWeights contributors
#
# SPDX-License-Identifier: BSD-2-Clause
import os
import sys
from pathlib import Path

import h5py
import numpy as np
import pandas as pd
import pytest
import tables

from simweights import GenieWeighter

datasets = [
"genie-icetray.140000A_000000.hdf5",
"genie-icetray.140000B_000000.hdf5",
"genie-icetray.140000C_000000.hdf5",
"genie-icetray.140000D_000000.hdf5",
"level2_genie-icetray.140000_000000.hdf5",
]
approx = pytest.approx
datadir = os.environ.get("SIMWEIGHTS_TESTDATA", None)


@pytest.mark.parametrize("fname", datasets)
@pytest.mark.skipif(not datadir, reason="environment variable SIMWEIGHTS_TESTDATA not set")
def test_dataset(fname):
filename = Path(datadir) / fname
reffile = h5py.File(str(filename), "r")

wd = reffile["I3MCWeightDict"]
grd = reffile["I3GENIEResultDict"]
pdgid = grd["neu"]
emin, emax = 10 ** wd["MinEnergyLog"], 10 ** wd["MaxEnergyLog"]

solid_angle = 2 * np.pi * (np.cos(wd["MinZenith"]) - np.cos(wd["MaxZenith"]))
injection_area_cm = 1e4 * np.pi * wd["InjectionSurfaceR"] ** 2
total_weight = wd["TotalInteractionProbabilityWeight"]

type_weight = np.empty_like(wd["OneWeight"])
type_weight[pdgid > 0] = 0.7
type_weight[pdgid < 0] = 0.3
w0 = wd["OneWeight"] / (wd["NEvents"] * type_weight)

fobjs = [
tables.open_file(str(filename), "r"),
pd.HDFStore(str(filename), "r"),
]

for fobj in fobjs:
w = GenieWeighter(fobj, nfiles=1)

event_weight = w.get_weight_column("wght")
assert event_weight == approx(total_weight)

for particle in np.unique(pdgid):
for spectrum in w.surface.spectra[particle]:
power_min, power_max = spectrum.dists[1].a, spectrum.dists[1].b
event_mask = (pdgid == particle) & (emin == power_min) & (emax == power_max)

power_law = spectrum.dists[1]
energy_factor = 1 / power_law.pdf(w.get_weight_column("energy"))

one_weight = (
w.get_weight_column("wght")[event_mask]
* energy_factor[event_mask]
* solid_angle[event_mask]
* injection_area_cm[event_mask]
)

assert one_weight == approx(wd["OneWeight"][event_mask])

assert w0 == approx(w.get_weights(1), rel=1e-5)
fobj.close()


if __name__ == "__main__":
sys.exit(pytest.main(["-v", __file__, *sys.argv[1:]]))
Loading