Skip to content

Commit

Permalink
Change names associated with Kriging and GeoDataFrames that hold fina…
Browse files Browse the repository at this point in the history
…l results (#57)

* remove Area (km^2) column from the mesh input data

* change area_calc to cell_area_nmi2

* change krig_biomass_vals to biomass

* change krig_biomass_vp/ep/eps to biomass_density_adult_mean/var/samplevar

* remove areal as a prefix for variables and functions, but still include the specification in docs and comments

* change all vp/ep/eps_val occurances to field_mean/var/samplevar and vp/ep/eps_arr occurances to field_mean/var/samplevar_arr

* change final_biomass_table to transect_results_gdf and krig_results_gdf to kriging_results_gdf
  • Loading branch information
b-reyes authored Dec 16, 2022
1 parent aa0e870 commit e2452b9
Show file tree
Hide file tree
Showing 11 changed files with 128 additions and 129 deletions.
56 changes: 28 additions & 28 deletions EchoPro/computation/biomass_density.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ def __init__(self, survey=None):
self.strata_df = None
self.specimen_df = None
self.nasc_df = None
self.final_biomass_table = None
self.krig_results_gdf = None
self.transect_results_gdf = None
self.kriging_results_gdf = None
self.bio_param_df = None # biomass parameters for each stratum
self.weight_fraction_adult_df = None
self.strata_sig_b = None
Expand Down Expand Up @@ -597,14 +597,14 @@ def _get_interval(nasc_df: pd.DataFrame) -> np.ndarray:

return interval

def _get_tot_areal_biomass_density(self, areal_numerical_density: pd.Series) -> np.ndarray:
def _get_tot_biomass_density(self, numerical_density: pd.Series) -> np.ndarray:
"""
Calculates the total areal biomass density
for each NASC value.
Parameters
----------
areal_numerical_density : pd.Series
numerical_density : pd.Series
Series representing the areal numerical density
Returns
Expand All @@ -616,17 +616,17 @@ def _get_tot_areal_biomass_density(self, areal_numerical_density: pd.Series) ->
bc_expanded_df = self.bio_param_df.loc[self.nasc_df.stratum_num.values]

# compute the areal numerical density for males and females
areal_numerical_density_male = np.round(areal_numerical_density.values * bc_expanded_df.M_prop.values)
areal_numerical_density_female = np.round(areal_numerical_density.values * bc_expanded_df.F_prop.values)
numerical_density_male = np.round(numerical_density.values * bc_expanded_df.M_prop.values)
numerical_density_female = np.round(numerical_density.values * bc_expanded_df.F_prop.values)

# compute the areal biomass density for males, females, and unsexed
areal_biomass_density_male = areal_numerical_density_male * bc_expanded_df.averaged_weight_M.values
areal_biomass_density_female = areal_numerical_density_female * bc_expanded_df.averaged_weight_F.values
areal_biomass_density_unsexed = (areal_numerical_density.values - areal_numerical_density_male
- areal_numerical_density_female) * bc_expanded_df.averaged_weight.values
biomass_density_male = numerical_density_male * bc_expanded_df.averaged_weight_M.values
biomass_density_female = numerical_density_female * bc_expanded_df.averaged_weight_F.values
biomass_density_unsexed = (numerical_density.values - numerical_density_male
- numerical_density_female) * bc_expanded_df.averaged_weight.values

# compute the total areal biomass density
return areal_biomass_density_male + areal_biomass_density_female + areal_biomass_density_unsexed
return biomass_density_male + biomass_density_female + biomass_density_unsexed

def _get_age_weight_conversion(self, df: Union[pd.DataFrame, pd.Series]) -> float:
"""
Expand Down Expand Up @@ -686,20 +686,20 @@ def _get_weight_fraction_adult(self) -> None:
for i in stratum_ind:
self.weight_fraction_adult_df.loc[i].val = 1.0 - self._get_age_weight_conversion(spec_drop.loc[i])

def _construct_biomass_table(self, areal_biomass_density_adult: np.array) -> None:
def _construct_biomass_table(self, biomass_density_adult: np.array) -> None:
"""
Constructs self.final_biomass_table, which
Constructs self.transect_results_gdf, which
contains the areal biomass density for adults.
Parameters
----------
areal_biomass_density_adult : np.array
biomass_density_adult : np.array
Numpy array of areal biomass density adult
"""

# minimal columns to do Jolly Hampton CV on data that has not been kriged
final_df = self.nasc_df[['latitude', 'longitude', 'stratum_num', 'transect_spacing']].copy()
final_df["areal_biomass_density_adult"] = areal_biomass_density_adult
final_df["biomass_density_adult"] = biomass_density_adult

# TODO: should we include the below values in the final biomass table?
# calculates the interval for the area calculation
Expand All @@ -709,12 +709,12 @@ def _construct_biomass_table(self, areal_biomass_density_adult: np.array) -> Non
# final_df["Area"] = interval * self.nasc_df['transect_spacing']

# calculate the total number of fish in a given area
# final_df["N_A"] = areal_numerical_density * A
# final_df["N_A"] = numerical_density * A

# construct GeoPandas DataFrame to simplify downstream processes
self.final_biomass_table = gpd.GeoDataFrame(final_df,
geometry=gpd.points_from_xy(final_df.longitude,
final_df.latitude))
self.transect_results_gdf = gpd.GeoDataFrame(final_df,
geometry=gpd.points_from_xy(final_df.longitude,
final_df.latitude))

def set_class_variables(self, selected_transects: Optional[List] = None) -> None:
"""
Expand Down Expand Up @@ -758,10 +758,10 @@ def set_class_variables(self, selected_transects: Optional[List] = None) -> None
self.specimen_df = self.survey.specimen_df.copy()
self.nasc_df = self.survey.nasc_df

def get_final_biomass_table(self, selected_transects: Optional[List] = None) -> None:
def get_transect_results_gdf(self, selected_transects: Optional[List] = None) -> None:
"""
Orchestrates the calculation of the areal biomass density
and creation of self.final_biomass_table, which contains
and creation of self.transect_results_gdf, which contains
the areal biomass density of adult hake and associated useful variables.
Parameters
Expand Down Expand Up @@ -793,14 +793,14 @@ def get_final_biomass_table(self, selected_transects: Optional[List] = None) ->
mix_sa_ratio = self.nasc_df.apply(lambda x: wgt_vals[x.haul_num] if x.haul_num in wgt_vals_ind else 0.0, axis=1)

# calculate the areal numerical density
areal_numerical_density = np.round((mix_sa_ratio*self.nasc_df.NASC) /
self.strata_sig_b.loc[self.nasc_df.stratum_num].values)
numerical_density = np.round((mix_sa_ratio*self.nasc_df.NASC) /
self.strata_sig_b.loc[self.nasc_df.stratum_num].values)

# total areal biomass density for each areal_numerical_density value
areal_biomass_density = self._get_tot_areal_biomass_density(areal_numerical_density)
# total areal biomass density for each numerical_density value
biomass_density = self._get_tot_biomass_density(numerical_density)

# obtain the areal biomass density for adults
areal_biomass_density_adult = (areal_biomass_density *
self.weight_fraction_adult_df.loc[self.nasc_df.stratum_num.values].values.flatten())
biomass_density_adult = (biomass_density *
self.weight_fraction_adult_df.loc[self.nasc_df.stratum_num.values].values.flatten())

self._construct_biomass_table(areal_biomass_density_adult)
self._construct_biomass_table(biomass_density_adult)
4 changes: 2 additions & 2 deletions EchoPro/computation/bootstrapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def _get_results_for_no_kriging(self) -> List[float]:
"""

# calculate total biomass density
tot_bio_mass_no_kriging = self.survey.bio_calc.final_biomass_table["areal_biomass_density_adult"].sum()
tot_bio_mass_no_kriging = self.survey.bio_calc.transect_results_gdf["biomass_density_adult"].sum()

# perform CV analysis on data
CV_JH_mean_no_kriging = self.survey.run_cv_analysis(kriged_data=False)
Expand Down Expand Up @@ -136,7 +136,7 @@ def _get_results_for_kriging(self, krig_mesh_obj: KrigingMesh,
krig.run_biomass_kriging(krig_mesh_obj)

# calculate the total Kriged biomass density
tot_bio_mass_kriging = self.survey.bio_calc.krig_results_gdf.krig_biomass_vals.sum()
tot_bio_mass_kriging = self.survey.bio_calc.kriging_results_gdf.biomass.sum()

# perform CV analysis on Kriged data
CV_JH_mean_kriging = self.survey.run_cv_analysis(kriged_data=True)
Expand Down
14 changes: 7 additions & 7 deletions EchoPro/computation/cv.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def get_transect_strata_info_no_kriging(lat_inpfc: Tuple[float],
each region within a survey (established by INPFC)
biomass_table : pd.DataFrame
DataFrame containing Longitude, Latitude, Spacing, and
areal_biomass_density_adult columns
biomass_density_adult columns
Returns
-------
Expand All @@ -43,7 +43,7 @@ def get_transect_strata_info_no_kriging(lat_inpfc: Tuple[float],
transect_info["mean_spacing"] = biomass_table['transect_spacing'].groupby(level=0).mean()

# store the sum of the biomass for each transect
transect_info["biomass"] = biomass_table['areal_biomass_density_adult'].groupby(level=0).sum()
transect_info["biomass"] = biomass_table['biomass_density_adult'].groupby(level=0).sum()

# compute the length of each transect
transect_info["distance"] = transect_info.apply(
Expand Down Expand Up @@ -82,7 +82,7 @@ def get_transect_strata_info_kriged(lat_inpfc: Tuple[float],
each region within a survey (established by INPFC)
biomass_table : pd.DataFrame
DataFrame containing Latitude of centroid,
Longitude of centroid, and krig_biomass_vals columns
Longitude of centroid, and biomass columns
Returns
-------
Expand All @@ -95,7 +95,7 @@ def get_transect_strata_info_kriged(lat_inpfc: Tuple[float],
# reduce biomass table to only essential columns
reduced_table = biomass_table[["centroid_latitude",
"centroid_longitude",
"krig_biomass_vals"]].copy()
"biomass"]].copy()

# number of "virtual transects" within a latitude degree
n_transect_per_lat = 5 # TODO: make this an input
Expand All @@ -113,7 +113,7 @@ def get_transect_strata_info_kriged(lat_inpfc: Tuple[float],
transect_info = pd.DataFrame(index=uniq_lat_eq_inc, dtype=np.float64)

# store the sum of the biomass for each transect
transect_info['biomass'] = reduced_table['krig_biomass_vals'].groupby(level='lat_eq_inc').sum()
transect_info['biomass'] = reduced_table['biomass'].groupby(level='lat_eq_inc').sum()

# store max and min of the longitude
transect_info["max_longitude"] = reduced_table['centroid_longitude'].groupby(level=0).max()
Expand Down Expand Up @@ -190,10 +190,10 @@ def run_jolly_hampton(survey, nr: int, lat_inpfc: Tuple[float],

if kriged_data:
transect_info, strata_info = get_transect_strata_info_kriged(lat_inpfc,
survey.bio_calc.krig_results_gdf)
survey.bio_calc.kriging_results_gdf)
else:
transect_info, strata_info = get_transect_strata_info_no_kriging(lat_inpfc,
survey.bio_calc.final_biomass_table)
survey.bio_calc.transect_results_gdf)

# get numpy form of dataframe values, so we can use Numba
transect_distances = transect_info['distance'].values.flatten()
Expand Down
82 changes: 41 additions & 41 deletions EchoPro/computation/kriging.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,8 +242,7 @@ def _compute_kriging_vals(field_data: np.ndarray, M2: np.ndarray, lamb: np.ndarr
M2_weight: float, R_ind: np.ndarray, R_ind_not: np.ndarray,
dis_sel_ind: np.ndarray) -> Tuple[float, float, float]:
"""
Computes the Kriged values, Kriging variance, and
Kriging sample variance.
Computes the Kriging mean, variance, and sample variance.
Parameters
----------
Expand All @@ -265,12 +264,12 @@ def _compute_kriging_vals(field_data: np.ndarray, M2: np.ndarray, lamb: np.ndarr
Returns
-------
ep_val : float
field_var : float
Kriging variance
eps_val : float
field_samplevar : float
Kriging sample variance
vp_val : float
Kriged value
field_mean : float
Kriged value mean
"""

# obtain field values for indices within R
Expand All @@ -281,19 +280,20 @@ def _compute_kriging_vals(field_data: np.ndarray, M2: np.ndarray, lamb: np.ndarr
field_vals = field_data[dis_sel_ind]

# calculate Kriging value and variance
vp_val = np.nansum(lamb[:len(R_ind)] * field_vals) * M2_weight
ep_val = np.nansum(lamb * M2)
field_mean = np.nansum(lamb[:len(R_ind)] * field_vals) * M2_weight
field_var = np.nansum(lamb * M2)

# calculate Kriging sample variance
if abs(vp_val) < np.finfo(float).eps:
eps_val = np.nan
if abs(field_mean) < np.finfo(float).eps:
field_samplevar = np.nan
else:
field_var = np.nanvar(field_vals, ddof=1)
eps_val = np.sqrt(ep_val * field_var) / abs(vp_val)
# compute the statistical variance using field values
stat_field_var = np.nanvar(field_vals, ddof=1)
field_samplevar = np.sqrt(field_var * stat_field_var) / abs(field_mean)

# TODO: Do we count the anomalies like Chu does?

return ep_val, eps_val, vp_val
return field_var, field_samplevar, field_mean

def run_kriging(self, x_mesh: np.ndarray, x_data: np.ndarray,
y_mesh: np.ndarray, y_data: np.ndarray,
Expand All @@ -318,12 +318,12 @@ def run_kriging(self, x_mesh: np.ndarray, x_data: np.ndarray,
Returns
-------
ep_arr : np.ndarray
field_var_arr : np.ndarray
1D array representing the Kriging variance for each mesh coordinate
eps_arr : np.ndarray
field_samplevar_arr : np.ndarray
1D array representing the Kriging sample variance for each mesh coordinate
vp_arr : np.ndarray
1D array representing the Kriged value for each mesh coordinate
field_mean_arr : np.ndarray
1D array representing the Kriged mean value for each mesh coordinate
Notes
-----
Expand All @@ -334,9 +334,9 @@ def run_kriging(self, x_mesh: np.ndarray, x_data: np.ndarray,
y_mesh, y_data)

# initialize arrays that store calculated Kriging values
ep_arr = np.empty(dis_kmax_ind.shape[0])
eps_arr = np.empty(dis_kmax_ind.shape[0])
vp_arr = np.empty(dis_kmax_ind.shape[0])
field_var_arr = np.empty(dis_kmax_ind.shape[0])
field_samplevar_arr = np.empty(dis_kmax_ind.shape[0])
field_mean_arr = np.empty(dis_kmax_ind.shape[0])

# TODO: This loop can be parallelized, if necessary
# does Ordinary Kriging, follow Journel and Huijbregts, p. 307
Expand All @@ -351,27 +351,27 @@ def run_kriging(self, x_mesh: np.ndarray, x_data: np.ndarray,

lamb = self._compute_lambda_weights(M2, K)

ep_val, eps_val, vp_val = self._compute_kriging_vals(field_data, M2, lamb,
M2_weight, R_ind,
R_ind_not, dis_sel_ind)
field_var, field_samplevar, field_mean = self._compute_kriging_vals(field_data, M2, lamb,
M2_weight, R_ind,
R_ind_not, dis_sel_ind)

# store important calculated values
ep_arr[row] = ep_val
eps_arr[row] = eps_val
vp_arr[row] = vp_val
field_var_arr[row] = field_var
field_samplevar_arr[row] = field_samplevar
field_mean_arr[row] = field_mean

# zero-out all vp values that are nan or negative # TODO: Is this necessary?
neg_nan_ind = np.argwhere((vp_arr < 0) | np.isnan(vp_arr)).flatten()
vp_arr[neg_nan_ind] = 0.0
# zero-out all field mean values that are nan or negative # TODO: Is this necessary?
neg_nan_ind = np.argwhere((field_mean_arr < 0) | np.isnan(field_mean_arr)).flatten()
field_mean_arr[neg_nan_ind] = 0.0

return ep_arr, eps_arr, vp_arr
return field_var_arr, field_samplevar_arr, field_mean_arr

def run_biomass_kriging(self, krig_mesh: KrigingMesh) -> None:
"""
A high-level interface that sets up and runs
Kriging using the areal biomass density.
The results are then stored in the ``Survey``
object as ``krig_results_gdf``.
object as ``kriging_results_gdf``.
Parameters
Expand All @@ -388,23 +388,23 @@ def run_biomass_kriging(self, krig_mesh: KrigingMesh) -> None:
if not isinstance(krig_mesh, KrigingMesh):
raise ValueError("You must provide a KrigingMesh object!")

if (not isinstance(self.survey.bio_calc.final_biomass_table, gpd.GeoDataFrame)) \
and ('areal_biomass_density_adult' not in self.survey.bio_calc.final_biomass_table):
if (not isinstance(self.survey.bio_calc.transect_results_gdf, gpd.GeoDataFrame)) \
and ('biomass_density_adult' not in self.survey.bio_calc.transect_results_gdf):
raise ValueError("The areal biomass density must be calculated before running this routine!")

ep_arr, eps_arr, vp_arr = self.run_kriging(
field_var_arr, field_samplevar_arr, field_mean_arr = self.run_kriging(
krig_mesh.transformed_mesh_df['x_mesh'].values,
krig_mesh.transformed_transect_df['x_transect'].values,
krig_mesh.transformed_mesh_df['y_mesh'].values,
krig_mesh.transformed_transect_df['y_transect'].values,
self.survey.bio_calc.final_biomass_table['areal_biomass_density_adult'].values.flatten())
self.survey.bio_calc.transect_results_gdf['biomass_density_adult'].values.flatten())

# collect all important Kriging results
results_gdf = krig_mesh.mesh_gdf.copy()
results_gdf['krig_biomass_vp'] = vp_arr
results_gdf['krig_biomass_ep'] = ep_arr
results_gdf['krig_biomass_eps'] = eps_arr
results_gdf["area_calc"] = self.survey.params['kriging_A0'] * results_gdf['fraction_cell_in_polygon']
results_gdf["krig_biomass_vals"] = results_gdf['krig_biomass_vp'] * results_gdf["area_calc"]
results_gdf['biomass_density_adult_mean'] = field_mean_arr
results_gdf['biomass_density_adult_var'] = field_var_arr
results_gdf['biomass_density_adult_samplevar'] = field_samplevar_arr
results_gdf["cell_area_nmi2"] = self.survey.params['kriging_A0'] * results_gdf['fraction_cell_in_polygon']
results_gdf["biomass"] = results_gdf['biomass_density_adult_mean'] * results_gdf["cell_area_nmi2"]

self.survey.bio_calc.krig_results_gdf = results_gdf
self.survey.bio_calc.kriging_results_gdf = results_gdf
Loading

0 comments on commit e2452b9

Please sign in to comment.