Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

mc_truth/interaction + true_pos in prompt_hit + backtrack #128

Merged
merged 25 commits into from
Jun 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
24f8beb
fill interactions with vertices dataset if mc_hdr is not available
Mar 29, 2024
c4c21a7
use unique vertex/traj id
Apr 23, 2024
d049d90
correct the pixel pitch
May 4, 2024
1b4a947
temporary fix by bumping up the buffer size
May 4, 2024
459a409
add true position and E to hits dataset
May 24, 2024
d13d96b
math.log
May 24, 2024
757f337
store all the true positions
May 30, 2024
9800a96
Update raw_event_generator.py
YifanC May 30, 2024
909fbe6
make sure hit merger can use calib_hits_dtype
May 30, 2024
996e6b6
Merge remote-tracking branch 'origin/feature_interaction' into featur…
May 30, 2024
52245cf
Merge remote-tracking branch 'origin/feature_true_pos' into feature_b…
May 30, 2024
10f5d7b
only use file_traj_id when it's available for mc_stack and trajectories
May 31, 2024
b759735
Merge branch 'feature_interaction' of github.com:DUNE/ndlar_flow into…
May 31, 2024
dc5e8d6
copy the backtrack info instead of refill
Jun 1, 2024
559f9e2
remove the part using segment id as index for reference
Jun 1, 2024
d772fe4
update calib hit merger subsequently
Jun 4, 2024
5342f23
Merge remote-tracking branch 'origin/develop' into feature_interaction
Jun 4, 2024
c8705b4
Update RawEventGenerator.yaml
YifanC Jun 4, 2024
d525b31
manually resolve merging issues in calib_prompt_hits.py
Jun 4, 2024
06e566d
resolve merging issue in calib_prompt_hits.py [2]
YifanC Jun 4, 2024
eacffbf
manually solve merging conflict in calib_prompt_hits.py
Jun 4, 2024
58c952e
Merge remote-tracking branch 'origin/feature_true_pos' into feature_b…
Jun 4, 2024
4e29c68
correct an issue caused by merging
Jun 4, 2024
1f0dc40
Update dtype just once
mjkramer Jun 5, 2024
ad12385
typo
mjkramer Jun 5, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 13 additions & 22 deletions src/proto_nd_flow/reco/charge/calib_hit_merger.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def init(self, source_name):

self.hit_frac_dtype = np.dtype([
('fraction', f'({self.max_contrib_segments},)f8'),
('segment_id', f'({self.max_contrib_segments},)u8')
('segment_ids', f'({self.max_contrib_segments},)i8')
])

self.data_manager.create_dset(self.merged_name, dtype=self.merged_dtype)
Expand Down Expand Up @@ -106,31 +106,21 @@ def merge_hits(self,hits, weights, seg_fracs, dt_cut, sum_fields=None, weighted_

'''

has_mc_truth = seg_fracs[0] is not None
has_mc_truth = seg_fracs is not None
iteration_count = 0
mask = hits.mask['id'].copy()
new_hits = hits.data.copy()
weights = weights.data.copy()
old_ids = hits.data['id'].copy()[...,np.newaxis]
old_id_mask = hits.mask['id'].copy()[...,np.newaxis]
if has_mc_truth:
new_seg_bt = np.array(seg_fracs[0])
new_frac_bt = np.array(seg_fracs[1])
hit_contributions = np.full(shape=weights.shape+(3,self.max_contrib_segments),fill_value=0.)
# initialize hit contribution lists with unmerged data.
# [there is probably a more pythonic way of doing this...]
for it, q in np.ndenumerate(weights):
if len(new_frac_bt[it]) > 1:
print('!!!!!!!!!!!!!!!!!')
break
counter=0
for entry_it, entry in enumerate(new_frac_bt[it][0]):
if abs(entry) < 0.001: continue
hit_contributions[it][0][counter] = q
hit_contributions[it][1][counter] = new_frac_bt[it][0][entry_it]
hit_contributions[it][2][counter] = new_seg_bt[it][0][entry_it]['segment_id']
counter+=1

prompt_hit_max_seg_contrib = seg_fracs['segment_ids'].shape[-1]
hit_contributions[:,:,0,:] = np.pad(np.expand_dims(weights,axis=2),((0,0),(0,0),(0,self.max_contrib_segments-1)),'mean')
hit_contributions[:,:,1,:] = np.pad(np.squeeze(seg_fracs['fraction']),((0,0),(0,0),(0, self.max_contrib_segments-prompt_hit_max_seg_contrib)),'constant',constant_values=0)
hit_contributions[:,:,2,:] = np.pad(np.squeeze(seg_fracs['segment_ids']),((0,0),(0,0),(0, self.max_contrib_segments-prompt_hit_max_seg_contrib)),'constant',constant_values=-1)
while new_hits.size > 0 and iteration_count != max_steps:
iteration_count += 1
# algorithm is iterative, but typically only needs to loop a few (~2-3) times
Expand Down Expand Up @@ -213,9 +203,9 @@ def merge_hits(self,hits, weights, seg_fracs, dt_cut, sum_fields=None, weighted_
hit_contributions[...,:-1][hit_it][0][e+comb_it] = hit_contributions[...,:][hit_it[0],hit_it[1]+1][0][comb_it]
hit_contributions[...,:-1][hit_it][2][e+comb_it] = hit_contributions[...,:][hit_it[0],hit_it[1]+1][2][comb_it]
# and remove them from the hit that was merged in
hit_contributions[hit_it[0],hit_it[1]+1][1][comb_it] = 0
hit_contributions[hit_it[0],hit_it[1]+1][0][comb_it] = 0.
hit_contributions[hit_it[0],hit_it[1]+1][2][comb_it] = 0.
hit_contributions[hit_it[0],hit_it[1]+1][1][comb_it] = 0.
hit_contributions[hit_it[0],hit_it[1]+1][2][comb_it] = -1

# now we mask off hits that have already been merged
mask[...,1:] = mask[...,1:] | to_merge
Expand Down Expand Up @@ -249,7 +239,9 @@ def merge_hits(self,hits, weights, seg_fracs, dt_cut, sum_fields=None, weighted_
for hit_it, hit in np.ndenumerate(new_hits):
if mask[hit_it]: continue
hit_contr = hit_contributions[hit_it]

# renormalize the fractional contributions given the charge weighted average
# YC, 2024-06-03: I think we should check this norm is consistent with the sum of Q from all contributed prompt hits
norm = np.sum(np.multiply(hit_contr[0],hit_contr[1]))
if norm == 0.: norm = 1.
tmp_bt_0 = np.multiply(hit_contr[0],hit_contr[1])/norm # fractional contributions
Expand All @@ -267,9 +259,9 @@ def merge_hits(self,hits, weights, seg_fracs, dt_cut, sum_fields=None, weighted_
bt_unique_segs = np.take_along_axis(bt_unique_segs, isort, axis=-1)
bt_unique_frac = np.take_along_axis(bt_unique_frac, isort, axis=-1)
back_track[hit_it]['fraction'] = [0.]*self.max_contrib_segments
back_track[hit_it]['segment_id'] = [0]*self.max_contrib_segments
back_track[hit_it]['segment_ids'] = [-1]*self.max_contrib_segments
back_track[hit_it]['fraction'][:bt_unique_frac.shape[0]] = bt_unique_frac
back_track[hit_it]['segment_id'][:bt_unique_segs.shape[0]] = bt_unique_segs
back_track[hit_it]['segment_ids'][:bt_unique_segs.shape[0]] = bt_unique_segs
else: back_track = None

new_hit_idx = np.broadcast_to(np.cumsum(~mask.ravel(), axis=0).reshape(mask.shape + (1,)), old_ids.shape)-1
Expand All @@ -285,10 +277,9 @@ def run(self, source_name, source_slice, cache):

event_id = np.r_[source_slice]
packet_frac_bt = cache['packet_frac_backtrack']
packet_seg_bt = cache['packet_seg_backtrack']
hits = cache[self.hits_name]

merged, ref, back_track = self.merge_hits(hits, weights=hits['Q'], seg_fracs=[packet_seg_bt,packet_frac_bt],dt_cut=self.merge_cut, sum_fields=self.sum_fields, weighted_mean_fields=self.weighted_mean_fields, max_steps=self.max_merge_steps, mode=self.merge_mode)
merged, ref, back_track = self.merge_hits(hits, weights=hits['Q'], seg_fracs=packet_frac_bt,dt_cut=self.merge_cut, sum_fields=self.sum_fields, weighted_mean_fields=self.weighted_mean_fields, max_steps=self.max_merge_steps, mode=self.merge_mode)

merged_mask = merged.mask['id']

Expand Down
90 changes: 47 additions & 43 deletions src/proto_nd_flow/reco/charge/calib_prompt_hits.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,39 +102,12 @@ def __init__(self, **params):
self.t0_dset_name = params.get('t0_dset_name')
self.pedestal_file = params.get('pedestal_file', '')
self.configuration_file = params.get('configuration_file', '')
self.max_contrib_segments = params.get('max_contrib_segments','')

def init(self, source_name):
super(CalibHitBuilder, self).init(source_name)
self.load_pedestals()
self.load_configurations()

self.hit_frac_dtype = np.dtype([
('fraction', f'({self.max_contrib_segments},)f8'),
('segment_id', f'({self.max_contrib_segments},)u8')
])

# save all config info
self.data_manager.set_attrs(self.calib_hits_dset_name,
classname=self.classname,
class_version=self.class_version,
source_dset=source_name,
packets_dset=self.packets_dset_name,
t0_dset=self.t0_dset_name,
pedestal_file=self.pedestal_file,
configuration_file=self.configuration_file
)

# then set up new datasets
self.data_manager.create_dset(self.calib_hits_dset_name, dtype=self.calib_hits_dtype)
if resources['RunData'].is_mc:
self.data_manager.create_dset(self.mc_hit_frac_dset_name, dtype=self.hit_frac_dtype)
self.data_manager.create_ref(source_name, self.calib_hits_dset_name)
self.data_manager.create_ref(self.calib_hits_dset_name, self.packets_dset_name)
self.data_manager.create_ref(self.events_dset_name, self.calib_hits_dset_name)
if resources['RunData'].is_mc:
self.data_manager.create_ref(self.calib_hits_dset_name, self.mc_hit_frac_dset_name)

def run(self, source_name, source_slice, cache):
super(CalibHitBuilder, self).run(source_name, source_slice, cache)
events_data = cache[self.events_dset_name]
Expand All @@ -146,6 +119,8 @@ def run(self, source_name, source_slice, cache):
t0_data = cache[self.t0_dset_name]
raw_hits = cache[self.raw_hits_dset_name]

has_mc_truth = packet_frac_bt is not None

mask = ~rfn.structured_to_unstructured(packets_data.mask).any(axis=-1)
rh_mask = ~rfn.structured_to_unstructured(raw_hits.mask).any(axis=-1)

Expand All @@ -157,16 +132,36 @@ def run(self, source_name, source_slice, cache):
packets_arr = packets_data.data[mask]
if resources['RunData'].is_mc:
packet_frac_bt_arr = packet_frac_bt.data[mask]
packet_frac_bt_arr = np.concatenate(packet_frac_bt_arr)
packet_seg_bt_arr = packet_seg_bt.data[mask]
index_arr = packets_index.data[mask]
else:
n = 0
index_arr = np.zeros((0,), dtype=packets_index.dtype)

if resources['RunData'].is_mc:
has_mc_truth = packet_seg_bt is not None
else:
has_mc_truth = False
if has_mc_truth and ('x_true_seg_t' not in self.calib_hits_dtype.fields):
self.calib_hits_dtype = np.dtype(self.calib_hits_dtype.descr + [('x_true_seg_t', f'({packet_seg_bt.shape[-1]},)f8'), ('E_true_recomb_elife', f'({packet_seg_bt.shape[-1]},)f8')])

# save all config info
self.data_manager.set_attrs(self.calib_hits_dset_name,
classname=self.classname,
class_version=self.class_version,
source_dset=source_name,
packets_dset=self.packets_dset_name,
t0_dset=self.t0_dset_name,
pedestal_file=self.pedestal_file,
configuration_file=self.configuration_file
)

# then set up new datasets
self.data_manager.create_dset(self.calib_hits_dset_name, dtype=self.calib_hits_dtype)
if has_mc_truth:
self.data_manager.create_dset(self.mc_hit_frac_dset_name, dtype=packet_frac_bt_arr.dtype)
self.data_manager.create_ref(source_name, self.calib_hits_dset_name)
self.data_manager.create_ref(self.calib_hits_dset_name, self.packets_dset_name)
self.data_manager.create_ref(self.events_dset_name, self.calib_hits_dset_name)
if has_mc_truth:
self.data_manager.create_ref(self.calib_hits_dset_name, self.mc_hit_frac_dset_name)

# reserve new data
calib_hits_slice = self.data_manager.reserve_data(self.calib_hits_dset_name, n)
Expand Down Expand Up @@ -198,11 +193,16 @@ def run(self, source_name, source_slice, cache):
hit_t0[first_index:last_index] = np.full(n_not_masked,t0)
first_index += n_not_masked

drift_t = raw_hits_arr['ts_pps'] - hit_t0

drift_t = raw_hits_arr['ts_pps'] - hit_t0 #ticks
drift_d = drift_t * (resources['LArData'].v_drift * resources['RunData'].crs_ticks) / units.cm # convert mm -> cm
x = resources['Geometry'].get_drift_coordinate(packets_arr['io_group'],packets_arr['io_channel'],drift_d)

# true drift position pair
if has_mc_truth:
drift_t_true = packet_seg_bt_arr['t'] #us
drift_d_true = drift_t_true * (resources['LArData'].v_drift) / units.cm # convert mm -> cm
x_true_seg_t = resources['Geometry'].get_drift_coordinate(packets_arr['io_group'],packets_arr['io_channel'],drift_d_true)

zy = resources['Geometry'].pixel_coordinates_2D[packets_arr['io_group'],
packets_arr['io_channel'], packets_arr['chip_id'], packets_arr['channel_id']]
tile_id = resources['Geometry'].tile_id[packets_arr['io_group'],packets_arr['io_channel']]
Expand All @@ -219,27 +219,30 @@ def run(self, source_name, source_slice, cache):
for unique_id in hit_uniqueid_str])
calib_hits_arr['id'] = calib_hits_slice.start + np.arange(n, dtype=int)
calib_hits_arr['x'] = x
if has_mc_truth:
calib_hits_arr['x_true_seg_t'] = x_true_seg_t
calib_hits_arr['y'] = zy[:,1]
calib_hits_arr['z'] = zy[:,0]
calib_hits_arr['ts_pps'] = raw_hits_arr['ts_pps']
calib_hits_arr['t_drift'] = drift_t
calib_hits_arr['io_group'] = packets_arr['io_group']
calib_hits_arr['io_channel'] = packets_arr['io_channel']
calib_hits_arr['Q'] = self.charge_from_dataword(packets_arr['dataword'],vref,vcm,ped)
#!!! hardcoding W_ion, R=0.7, and not accounting for finite electron lifetime
calib_hits_arr['E'] = self.charge_from_dataword(packets_arr['dataword'],vref,vcm,ped) * 23.6e-3 / 0.7

# create truth-level backtracking dataset
hits_charge = self.charge_from_dataword(packets_arr['dataword'],vref,vcm,ped) # ke-
calib_hits_arr['Q'] = hits_charge # ke-
#FIXME supply more realistic dEdx in the recombination; also apply measured electron lifetime
calib_hits_arr['E'] = hits_charge * (1000 * units.e) / resources['LArData'].ionization_recombination(mode=2,dEdx=2) * (resources['LArData'].ionization_w / units.MeV) # MeV
if has_mc_truth:
back_track = np.full(shape=packets_arr.shape,fill_value=0.,dtype=self.hit_frac_dtype)
for hit_it, pack in np.ndenumerate(packets_arr):
back_track[hit_it]['fraction'] = packet_frac_bt_arr[hit_it]
back_track[hit_it]['segment_id'] = packet_seg_bt_arr[hit_it]['segment_id']
true_recomb = resources['LArData'].ionization_recombination(mode=1,dEdx=packet_seg_bt_arr['dEdx'])
calib_hits_arr['E_true_recomb_elife'] = np.divide(hits_charge.reshape((hits_charge.shape[0],1)) * (1000 * units.e), true_recomb, out=np.zeros_like(true_recomb), where=true_recomb!=0) / resources['LArData'].charge_reduction_lifetime(t_drift=drift_t_true) * (resources['LArData'].ionization_w / units.MeV) # MeV

# if back tracking information was available, write the merged back tracking
# dataset to file
if has_mc_truth:
self.data_manager.write_data(self.mc_hit_frac_dset_name, hit_bt_slice, back_track)
# make sure packets and packet backtracking match in numbers
if packets_arr.shape[0] == packet_frac_bt_arr.shape[0]:
self.data_manager.write_data(self.mc_hit_frac_dset_name, hit_bt_slice, packet_frac_bt_arr)
else:
raise Exception("The data packet and backtracking info do not match in size.")

# write
self.data_manager.write_data(self.calib_hits_dset_name, calib_hits_slice, calib_hits_arr)
Expand Down Expand Up @@ -277,3 +280,4 @@ def load_configurations(self):
with open(self.configuration_file, 'r') as infile:
for key, value in json.load(infile).items():
self.configuration[key] = value

Loading
Loading