From e2452b9bad5b2058dba09f44575d4126826c3024 Mon Sep 17 00:00:00 2001 From: b-reyes <53541061+b-reyes@users.noreply.github.com> Date: Fri, 16 Dec 2022 08:21:00 -0800 Subject: [PATCH] Change names associated with Kriging and GeoDataFrames that hold final results (#57) * remove Area (km^2) column from the mesh input data * change area_calc to cell_area_nmi2 * change krig_biomass_vals to biomass * change krig_biomass_vp/ep/eps to biomass_density_adult_mean/var/samplevar * remove areal as a prefix for variables and functions, but still include the specification in docs and comments * change all vp/ep/eps_val occurances to field_mean/var/samplevar and vp/ep/eps_arr occurances to field_mean/var/samplevar_arr * change final_biomass_table to transect_results_gdf and krig_results_gdf to kriging_results_gdf --- EchoPro/computation/biomass_density.py | 56 ++++++------- EchoPro/computation/bootstrapping.py | 4 +- EchoPro/computation/cv.py | 14 ++-- EchoPro/computation/kriging.py | 82 +++++++++---------- EchoPro/data_loader/kriging_mesh.py | 17 ++-- EchoPro/survey.py | 18 ++-- EchoPro/visualization.py | 18 ++-- example_notebooks/echopro_workflow.ipynb | 20 ++--- .../kriging_mesh_walkthrough.ipynb | 8 +- .../semi_variogram_workflow.ipynb | 2 +- .../transect_selection_workflow.ipynb | 18 ++-- 11 files changed, 128 insertions(+), 129 deletions(-) diff --git a/EchoPro/computation/biomass_density.py b/EchoPro/computation/biomass_density.py index 6c287363..860ed156 100644 --- a/EchoPro/computation/biomass_density.py +++ b/EchoPro/computation/biomass_density.py @@ -29,8 +29,8 @@ def __init__(self, survey=None): self.strata_df = None self.specimen_df = None self.nasc_df = None - self.final_biomass_table = None - self.krig_results_gdf = None + self.transect_results_gdf = None + self.kriging_results_gdf = None self.bio_param_df = None # biomass parameters for each stratum self.weight_fraction_adult_df = None self.strata_sig_b = None @@ -597,14 +597,14 @@ def _get_interval(nasc_df: pd.DataFrame) -> np.ndarray: return interval - def _get_tot_areal_biomass_density(self, areal_numerical_density: pd.Series) -> np.ndarray: + def _get_tot_biomass_density(self, numerical_density: pd.Series) -> np.ndarray: """ Calculates the total areal biomass density for each NASC value. Parameters ---------- - areal_numerical_density : pd.Series + numerical_density : pd.Series Series representing the areal numerical density Returns @@ -616,17 +616,17 @@ def _get_tot_areal_biomass_density(self, areal_numerical_density: pd.Series) -> bc_expanded_df = self.bio_param_df.loc[self.nasc_df.stratum_num.values] # compute the areal numerical density for males and females - areal_numerical_density_male = np.round(areal_numerical_density.values * bc_expanded_df.M_prop.values) - areal_numerical_density_female = np.round(areal_numerical_density.values * bc_expanded_df.F_prop.values) + numerical_density_male = np.round(numerical_density.values * bc_expanded_df.M_prop.values) + numerical_density_female = np.round(numerical_density.values * bc_expanded_df.F_prop.values) # compute the areal biomass density for males, females, and unsexed - areal_biomass_density_male = areal_numerical_density_male * bc_expanded_df.averaged_weight_M.values - areal_biomass_density_female = areal_numerical_density_female * bc_expanded_df.averaged_weight_F.values - areal_biomass_density_unsexed = (areal_numerical_density.values - areal_numerical_density_male - - areal_numerical_density_female) * bc_expanded_df.averaged_weight.values + biomass_density_male = numerical_density_male * bc_expanded_df.averaged_weight_M.values + biomass_density_female = numerical_density_female * bc_expanded_df.averaged_weight_F.values + biomass_density_unsexed = (numerical_density.values - numerical_density_male + - numerical_density_female) * bc_expanded_df.averaged_weight.values # compute the total areal biomass density - return areal_biomass_density_male + areal_biomass_density_female + areal_biomass_density_unsexed + return biomass_density_male + biomass_density_female + biomass_density_unsexed def _get_age_weight_conversion(self, df: Union[pd.DataFrame, pd.Series]) -> float: """ @@ -686,20 +686,20 @@ def _get_weight_fraction_adult(self) -> None: for i in stratum_ind: self.weight_fraction_adult_df.loc[i].val = 1.0 - self._get_age_weight_conversion(spec_drop.loc[i]) - def _construct_biomass_table(self, areal_biomass_density_adult: np.array) -> None: + def _construct_biomass_table(self, biomass_density_adult: np.array) -> None: """ - Constructs self.final_biomass_table, which + Constructs self.transect_results_gdf, which contains the areal biomass density for adults. Parameters ---------- - areal_biomass_density_adult : np.array + biomass_density_adult : np.array Numpy array of areal biomass density adult """ # minimal columns to do Jolly Hampton CV on data that has not been kriged final_df = self.nasc_df[['latitude', 'longitude', 'stratum_num', 'transect_spacing']].copy() - final_df["areal_biomass_density_adult"] = areal_biomass_density_adult + final_df["biomass_density_adult"] = biomass_density_adult # TODO: should we include the below values in the final biomass table? # calculates the interval for the area calculation @@ -709,12 +709,12 @@ def _construct_biomass_table(self, areal_biomass_density_adult: np.array) -> Non # final_df["Area"] = interval * self.nasc_df['transect_spacing'] # calculate the total number of fish in a given area - # final_df["N_A"] = areal_numerical_density * A + # final_df["N_A"] = numerical_density * A # construct GeoPandas DataFrame to simplify downstream processes - self.final_biomass_table = gpd.GeoDataFrame(final_df, - geometry=gpd.points_from_xy(final_df.longitude, - final_df.latitude)) + self.transect_results_gdf = gpd.GeoDataFrame(final_df, + geometry=gpd.points_from_xy(final_df.longitude, + final_df.latitude)) def set_class_variables(self, selected_transects: Optional[List] = None) -> None: """ @@ -758,10 +758,10 @@ def set_class_variables(self, selected_transects: Optional[List] = None) -> None self.specimen_df = self.survey.specimen_df.copy() self.nasc_df = self.survey.nasc_df - def get_final_biomass_table(self, selected_transects: Optional[List] = None) -> None: + def get_transect_results_gdf(self, selected_transects: Optional[List] = None) -> None: """ Orchestrates the calculation of the areal biomass density - and creation of self.final_biomass_table, which contains + and creation of self.transect_results_gdf, which contains the areal biomass density of adult hake and associated useful variables. Parameters @@ -793,14 +793,14 @@ def get_final_biomass_table(self, selected_transects: Optional[List] = None) -> mix_sa_ratio = self.nasc_df.apply(lambda x: wgt_vals[x.haul_num] if x.haul_num in wgt_vals_ind else 0.0, axis=1) # calculate the areal numerical density - areal_numerical_density = np.round((mix_sa_ratio*self.nasc_df.NASC) / - self.strata_sig_b.loc[self.nasc_df.stratum_num].values) + numerical_density = np.round((mix_sa_ratio*self.nasc_df.NASC) / + self.strata_sig_b.loc[self.nasc_df.stratum_num].values) - # total areal biomass density for each areal_numerical_density value - areal_biomass_density = self._get_tot_areal_biomass_density(areal_numerical_density) + # total areal biomass density for each numerical_density value + biomass_density = self._get_tot_biomass_density(numerical_density) # obtain the areal biomass density for adults - areal_biomass_density_adult = (areal_biomass_density * - self.weight_fraction_adult_df.loc[self.nasc_df.stratum_num.values].values.flatten()) + biomass_density_adult = (biomass_density * + self.weight_fraction_adult_df.loc[self.nasc_df.stratum_num.values].values.flatten()) - self._construct_biomass_table(areal_biomass_density_adult) + self._construct_biomass_table(biomass_density_adult) diff --git a/EchoPro/computation/bootstrapping.py b/EchoPro/computation/bootstrapping.py index 476f361f..29b97aa9 100644 --- a/EchoPro/computation/bootstrapping.py +++ b/EchoPro/computation/bootstrapping.py @@ -91,7 +91,7 @@ def _get_results_for_no_kriging(self) -> List[float]: """ # calculate total biomass density - tot_bio_mass_no_kriging = self.survey.bio_calc.final_biomass_table["areal_biomass_density_adult"].sum() + tot_bio_mass_no_kriging = self.survey.bio_calc.transect_results_gdf["biomass_density_adult"].sum() # perform CV analysis on data CV_JH_mean_no_kriging = self.survey.run_cv_analysis(kriged_data=False) @@ -136,7 +136,7 @@ def _get_results_for_kriging(self, krig_mesh_obj: KrigingMesh, krig.run_biomass_kriging(krig_mesh_obj) # calculate the total Kriged biomass density - tot_bio_mass_kriging = self.survey.bio_calc.krig_results_gdf.krig_biomass_vals.sum() + tot_bio_mass_kriging = self.survey.bio_calc.kriging_results_gdf.biomass.sum() # perform CV analysis on Kriged data CV_JH_mean_kriging = self.survey.run_cv_analysis(kriged_data=True) diff --git a/EchoPro/computation/cv.py b/EchoPro/computation/cv.py index c0472ddb..0f044c3d 100644 --- a/EchoPro/computation/cv.py +++ b/EchoPro/computation/cv.py @@ -25,7 +25,7 @@ def get_transect_strata_info_no_kriging(lat_inpfc: Tuple[float], each region within a survey (established by INPFC) biomass_table : pd.DataFrame DataFrame containing Longitude, Latitude, Spacing, and - areal_biomass_density_adult columns + biomass_density_adult columns Returns ------- @@ -43,7 +43,7 @@ def get_transect_strata_info_no_kriging(lat_inpfc: Tuple[float], transect_info["mean_spacing"] = biomass_table['transect_spacing'].groupby(level=0).mean() # store the sum of the biomass for each transect - transect_info["biomass"] = biomass_table['areal_biomass_density_adult'].groupby(level=0).sum() + transect_info["biomass"] = biomass_table['biomass_density_adult'].groupby(level=0).sum() # compute the length of each transect transect_info["distance"] = transect_info.apply( @@ -82,7 +82,7 @@ def get_transect_strata_info_kriged(lat_inpfc: Tuple[float], each region within a survey (established by INPFC) biomass_table : pd.DataFrame DataFrame containing Latitude of centroid, - Longitude of centroid, and krig_biomass_vals columns + Longitude of centroid, and biomass columns Returns ------- @@ -95,7 +95,7 @@ def get_transect_strata_info_kriged(lat_inpfc: Tuple[float], # reduce biomass table to only essential columns reduced_table = biomass_table[["centroid_latitude", "centroid_longitude", - "krig_biomass_vals"]].copy() + "biomass"]].copy() # number of "virtual transects" within a latitude degree n_transect_per_lat = 5 # TODO: make this an input @@ -113,7 +113,7 @@ def get_transect_strata_info_kriged(lat_inpfc: Tuple[float], transect_info = pd.DataFrame(index=uniq_lat_eq_inc, dtype=np.float64) # store the sum of the biomass for each transect - transect_info['biomass'] = reduced_table['krig_biomass_vals'].groupby(level='lat_eq_inc').sum() + transect_info['biomass'] = reduced_table['biomass'].groupby(level='lat_eq_inc').sum() # store max and min of the longitude transect_info["max_longitude"] = reduced_table['centroid_longitude'].groupby(level=0).max() @@ -190,10 +190,10 @@ def run_jolly_hampton(survey, nr: int, lat_inpfc: Tuple[float], if kriged_data: transect_info, strata_info = get_transect_strata_info_kriged(lat_inpfc, - survey.bio_calc.krig_results_gdf) + survey.bio_calc.kriging_results_gdf) else: transect_info, strata_info = get_transect_strata_info_no_kriging(lat_inpfc, - survey.bio_calc.final_biomass_table) + survey.bio_calc.transect_results_gdf) # get numpy form of dataframe values, so we can use Numba transect_distances = transect_info['distance'].values.flatten() diff --git a/EchoPro/computation/kriging.py b/EchoPro/computation/kriging.py index fa206d6d..4c80a474 100644 --- a/EchoPro/computation/kriging.py +++ b/EchoPro/computation/kriging.py @@ -242,8 +242,7 @@ def _compute_kriging_vals(field_data: np.ndarray, M2: np.ndarray, lamb: np.ndarr M2_weight: float, R_ind: np.ndarray, R_ind_not: np.ndarray, dis_sel_ind: np.ndarray) -> Tuple[float, float, float]: """ - Computes the Kriged values, Kriging variance, and - Kriging sample variance. + Computes the Kriging mean, variance, and sample variance. Parameters ---------- @@ -265,12 +264,12 @@ def _compute_kriging_vals(field_data: np.ndarray, M2: np.ndarray, lamb: np.ndarr Returns ------- - ep_val : float + field_var : float Kriging variance - eps_val : float + field_samplevar : float Kriging sample variance - vp_val : float - Kriged value + field_mean : float + Kriged value mean """ # obtain field values for indices within R @@ -281,19 +280,20 @@ def _compute_kriging_vals(field_data: np.ndarray, M2: np.ndarray, lamb: np.ndarr field_vals = field_data[dis_sel_ind] # calculate Kriging value and variance - vp_val = np.nansum(lamb[:len(R_ind)] * field_vals) * M2_weight - ep_val = np.nansum(lamb * M2) + field_mean = np.nansum(lamb[:len(R_ind)] * field_vals) * M2_weight + field_var = np.nansum(lamb * M2) # calculate Kriging sample variance - if abs(vp_val) < np.finfo(float).eps: - eps_val = np.nan + if abs(field_mean) < np.finfo(float).eps: + field_samplevar = np.nan else: - field_var = np.nanvar(field_vals, ddof=1) - eps_val = np.sqrt(ep_val * field_var) / abs(vp_val) + # compute the statistical variance using field values + stat_field_var = np.nanvar(field_vals, ddof=1) + field_samplevar = np.sqrt(field_var * stat_field_var) / abs(field_mean) # TODO: Do we count the anomalies like Chu does? - return ep_val, eps_val, vp_val + return field_var, field_samplevar, field_mean def run_kriging(self, x_mesh: np.ndarray, x_data: np.ndarray, y_mesh: np.ndarray, y_data: np.ndarray, @@ -318,12 +318,12 @@ def run_kriging(self, x_mesh: np.ndarray, x_data: np.ndarray, Returns ------- - ep_arr : np.ndarray + field_var_arr : np.ndarray 1D array representing the Kriging variance for each mesh coordinate - eps_arr : np.ndarray + field_samplevar_arr : np.ndarray 1D array representing the Kriging sample variance for each mesh coordinate - vp_arr : np.ndarray - 1D array representing the Kriged value for each mesh coordinate + field_mean_arr : np.ndarray + 1D array representing the Kriged mean value for each mesh coordinate Notes ----- @@ -334,9 +334,9 @@ def run_kriging(self, x_mesh: np.ndarray, x_data: np.ndarray, y_mesh, y_data) # initialize arrays that store calculated Kriging values - ep_arr = np.empty(dis_kmax_ind.shape[0]) - eps_arr = np.empty(dis_kmax_ind.shape[0]) - vp_arr = np.empty(dis_kmax_ind.shape[0]) + field_var_arr = np.empty(dis_kmax_ind.shape[0]) + field_samplevar_arr = np.empty(dis_kmax_ind.shape[0]) + field_mean_arr = np.empty(dis_kmax_ind.shape[0]) # TODO: This loop can be parallelized, if necessary # does Ordinary Kriging, follow Journel and Huijbregts, p. 307 @@ -351,27 +351,27 @@ def run_kriging(self, x_mesh: np.ndarray, x_data: np.ndarray, lamb = self._compute_lambda_weights(M2, K) - ep_val, eps_val, vp_val = self._compute_kriging_vals(field_data, M2, lamb, - M2_weight, R_ind, - R_ind_not, dis_sel_ind) + field_var, field_samplevar, field_mean = self._compute_kriging_vals(field_data, M2, lamb, + M2_weight, R_ind, + R_ind_not, dis_sel_ind) # store important calculated values - ep_arr[row] = ep_val - eps_arr[row] = eps_val - vp_arr[row] = vp_val + field_var_arr[row] = field_var + field_samplevar_arr[row] = field_samplevar + field_mean_arr[row] = field_mean - # zero-out all vp values that are nan or negative # TODO: Is this necessary? - neg_nan_ind = np.argwhere((vp_arr < 0) | np.isnan(vp_arr)).flatten() - vp_arr[neg_nan_ind] = 0.0 + # zero-out all field mean values that are nan or negative # TODO: Is this necessary? + neg_nan_ind = np.argwhere((field_mean_arr < 0) | np.isnan(field_mean_arr)).flatten() + field_mean_arr[neg_nan_ind] = 0.0 - return ep_arr, eps_arr, vp_arr + return field_var_arr, field_samplevar_arr, field_mean_arr def run_biomass_kriging(self, krig_mesh: KrigingMesh) -> None: """ A high-level interface that sets up and runs Kriging using the areal biomass density. The results are then stored in the ``Survey`` - object as ``krig_results_gdf``. + object as ``kriging_results_gdf``. Parameters @@ -388,23 +388,23 @@ def run_biomass_kriging(self, krig_mesh: KrigingMesh) -> None: if not isinstance(krig_mesh, KrigingMesh): raise ValueError("You must provide a KrigingMesh object!") - if (not isinstance(self.survey.bio_calc.final_biomass_table, gpd.GeoDataFrame)) \ - and ('areal_biomass_density_adult' not in self.survey.bio_calc.final_biomass_table): + if (not isinstance(self.survey.bio_calc.transect_results_gdf, gpd.GeoDataFrame)) \ + and ('biomass_density_adult' not in self.survey.bio_calc.transect_results_gdf): raise ValueError("The areal biomass density must be calculated before running this routine!") - ep_arr, eps_arr, vp_arr = self.run_kriging( + field_var_arr, field_samplevar_arr, field_mean_arr = self.run_kriging( krig_mesh.transformed_mesh_df['x_mesh'].values, krig_mesh.transformed_transect_df['x_transect'].values, krig_mesh.transformed_mesh_df['y_mesh'].values, krig_mesh.transformed_transect_df['y_transect'].values, - self.survey.bio_calc.final_biomass_table['areal_biomass_density_adult'].values.flatten()) + self.survey.bio_calc.transect_results_gdf['biomass_density_adult'].values.flatten()) # collect all important Kriging results results_gdf = krig_mesh.mesh_gdf.copy() - results_gdf['krig_biomass_vp'] = vp_arr - results_gdf['krig_biomass_ep'] = ep_arr - results_gdf['krig_biomass_eps'] = eps_arr - results_gdf["area_calc"] = self.survey.params['kriging_A0'] * results_gdf['fraction_cell_in_polygon'] - results_gdf["krig_biomass_vals"] = results_gdf['krig_biomass_vp'] * results_gdf["area_calc"] + results_gdf['biomass_density_adult_mean'] = field_mean_arr + results_gdf['biomass_density_adult_var'] = field_var_arr + results_gdf['biomass_density_adult_samplevar'] = field_samplevar_arr + results_gdf["cell_area_nmi2"] = self.survey.params['kriging_A0'] * results_gdf['fraction_cell_in_polygon'] + results_gdf["biomass"] = results_gdf['biomass_density_adult_mean'] * results_gdf["cell_area_nmi2"] - self.survey.bio_calc.krig_results_gdf = results_gdf + self.survey.bio_calc.kriging_results_gdf = results_gdf diff --git a/EchoPro/data_loader/kriging_mesh.py b/EchoPro/data_loader/kriging_mesh.py index f5028a6b..0607af5e 100644 --- a/EchoPro/data_loader/kriging_mesh.py +++ b/EchoPro/data_loader/kriging_mesh.py @@ -31,7 +31,7 @@ def __init__(self, survey=None): self.survey = survey # expected columns for the mesh Dataframe - self.mesh_cols = {'centroid_latitude', 'centroid_longitude', 'Area (km^2)', 'fraction_cell_in_polygon'} + self.mesh_cols = {'centroid_latitude', 'centroid_longitude', 'fraction_cell_in_polygon'} # expected columns for the smoothed contour Dataframe self.contour_cols = {'latitude', 'longitude'} @@ -96,12 +96,11 @@ def _load_mesh(self) -> None: self._check_mesh_df(df, file_path) # obtaining those columns that are required - df = df[['centroid_latitude', 'centroid_longitude', 'Area (km^2)', 'fraction_cell_in_polygon']].copy() + df = df[['centroid_latitude', 'centroid_longitude', 'fraction_cell_in_polygon']].copy() # set data types of dataframe df = df.astype({'centroid_latitude': float, 'centroid_longitude': float, - 'Area (km^2)': float, 'fraction_cell_in_polygon': np.float64}) # construct geopandas DataFrame to simplify downstream processes @@ -363,7 +362,7 @@ def _transform_transect_data(self, lon_ref: float = -124.78338, x_offset: float = -124.78338, y_offset: float = 45.0) -> None: """ - Applies a coordinate transformation to ``survey.bio_calc.final_biomass_table`` + Applies a coordinate transformation to ``survey.bio_calc.transect_results_gdf`` by first aligning the longitude along the smoothed contour data specified by the configuration parameter ``'smoothed_contour_filename'`` and then transforming the input coordinates from degrees to distance. @@ -394,9 +393,9 @@ def _transform_transect_data(self, lon_ref: float = -124.78338, value and the minimum latitude value (after aligning the longitude) """ - if isinstance(self.survey.bio_calc.final_biomass_table, gpd.GeoDataFrame): + if isinstance(self.survey.bio_calc.transect_results_gdf, gpd.GeoDataFrame): # apply transformations to transect points - transect_df = self.align_longitude(self.survey.bio_calc.final_biomass_table, lon_ref) + transect_df = self.align_longitude(self.survey.bio_calc.transect_results_gdf, lon_ref) # compute distances for each transect d_x = transect_df.geometry.x.max() - transect_df.geometry.x.min() @@ -415,7 +414,7 @@ def _transform_transect_data(self, lon_ref: float = -124.78338, self.transect_d_x = d_x self.transect_d_y = d_y else: - raise RuntimeError("survey.bio_calc.final_biomass_table has not been constructed yet. One " + raise RuntimeError("survey.bio_calc.transect_results_gdf has not been constructed yet. One " "must compute the biomass density before running this function!") def apply_coordinate_transformation(self, coord_type: str = 'transect', @@ -423,7 +422,7 @@ def apply_coordinate_transformation(self, coord_type: str = 'transect', x_offset: float = -124.78338, y_offset: float = 45.0) -> None: """ - Applies a coordinate transformation to either ``survey.bio_calc.final_biomass_table`` + Applies a coordinate transformation to either ``survey.bio_calc.transect_results_gdf`` or ``self.mesh_gdf`` by first aligning the longitude along the smoothed contour data specified by the configuration parameter ``'smoothed_contour_filename'`` and then @@ -435,7 +434,7 @@ def apply_coordinate_transformation(self, coord_type: str = 'transect', The type of coordinate points to transform. Possible options: - ``'transect'`` specifies that one should - copy and transform ``survey.bio_calc.final_biomass_table`` + copy and transform ``survey.bio_calc.transect_results_gdf`` - ``'mesh'`` specifies that one should copy ' and transform `self.mesh_gdf`` lon_ref : float diff --git a/EchoPro/survey.py b/EchoPro/survey.py index ad90093c..405adc46 100644 --- a/EchoPro/survey.py +++ b/EchoPro/survey.py @@ -277,7 +277,7 @@ def load_survey_data(self, file_type: str = 'all') -> None: def compute_biomass_density(self, selected_transects: Optional[List] = None) -> None: """ Computes the areal biomass density and - creates ``self.bio_calc.final_biomass_table``, which + creates ``self.bio_calc.transect_results_gdf``, which is a Pandas DataFrame that contains the areal biomass density for adult hake and associated useful variables. @@ -290,7 +290,7 @@ def compute_biomass_density(self, selected_transects: Optional[List] = None) -> self.bio_calc = None self.bio_calc = ComputeBiomassDensity(self) - self.bio_calc.get_final_biomass_table(selected_transects) + self.bio_calc.get_transect_results_gdf(selected_transects) def run_cv_analysis(self, lat_inpfc: Tuple[float] = (np.NINF, 36, 40.5, 43.000, 45.7667, 48.5, 55.0000), @@ -330,10 +330,10 @@ def run_cv_analysis(self, nr = 10000 # number of realizations if kriged_data: - if self.bio_calc.krig_results_gdf is None: + if self.bio_calc.kriging_results_gdf is None: raise RuntimeError("Kriging must be ran before performing CV anlysis on Kriged data!") else: - if self.bio_calc.final_biomass_table is None: + if self.bio_calc.transect_results_gdf is None: raise RuntimeError("The biomass density must be calculated before performing CV anlysis on data!") return run_jolly_hampton(self, nr, lat_inpfc, seed, kriged_data) @@ -432,21 +432,21 @@ def get_semi_variogram(self, krig_mesh: KrigingMesh = None, if not isinstance(val, expected_type): raise TypeError(f"{key} is not of type {expected_type}") - # provide a warning if the final_biomass_table being used was + # provide a warning if the transect_results_gdf being used was # created from a subset of the full data - if (len(self.bio_calc.final_biomass_table) != len(self.nasc_df)) and warning: + if (len(self.bio_calc.transect_results_gdf) != len(self.nasc_df)) and warning: warn("The biomass data being used is a subset of the full dataset. " "It is recommended that you use the biomass data created from the full dataset. " "To silence this warning set the warning argument to False.") - if (not isinstance(self.bio_calc.final_biomass_table, gpd.GeoDataFrame)) \ - and ('areal_biomass_density_adult' not in self.bio_calc.final_biomass_table): + if (not isinstance(self.bio_calc.transect_results_gdf, gpd.GeoDataFrame)) \ + and ('biomass_density_adult' not in self.bio_calc.transect_results_gdf): raise ValueError("The areal biomass density must be calculated before running this routine!") semi_vario = SemiVariogram( krig_mesh.transformed_transect_df.x_transect.values, krig_mesh.transformed_transect_df.y_transect.values, - self.bio_calc.final_biomass_table['areal_biomass_density_adult'].values.flatten(), + self.bio_calc.transect_results_gdf['biomass_density_adult'].values.flatten(), params['lag_res'], params['nlag'], ) diff --git a/EchoPro/visualization.py b/EchoPro/visualization.py index 66e5616c..d90f4344 100644 --- a/EchoPro/visualization.py +++ b/EchoPro/visualization.py @@ -119,7 +119,7 @@ def plot_layered_points(krig_mesh_obj: KrigingMesh, """ This function constructs a layered Folium plot. The layers correspond to the full set of mesh - points, the ``final_biomass_table`` points with + points, the ``transect_results_gdf`` points with color corresponding to the transect number, and the smoothed contour points (e.g. 200m isobath). @@ -157,7 +157,7 @@ def plot_layered_points(krig_mesh_obj: KrigingMesh, # plot the transect points and add them to fmap folium_layer = folium.FeatureGroup(name='transects') - folium_layer = plot_points(krig_mesh_obj.survey.bio_calc.final_biomass_table, folium_layer, + folium_layer = plot_points(krig_mesh_obj.survey.bio_calc.transect_results_gdf, folium_layer, cmap_column='transect_num', color='hex') folium_layer.add_to(fmap) @@ -173,7 +173,7 @@ def plot_layered_points(krig_mesh_obj: KrigingMesh, # Visualization function for Kriging -def plot_kriging_results(krig_results_gdf: gpd.GeoDataFrame, +def plot_kriging_results(kriging_results_gdf: gpd.GeoDataFrame, krig_field_name: str, greater_than_0: bool = False) -> folium.Map: """ @@ -182,11 +182,11 @@ def plot_kriging_results(krig_results_gdf: gpd.GeoDataFrame, Parameters ---------- - krig_results_gdf: gpd.GeoDataFrame + kriging_results_gdf: gpd.GeoDataFrame Dataframe containing a geometry column that holds the mesh coordinates and the column ``krig_field_name`` krig_field_name: str - The name of the column in ``krig_results_gdf`` containing + The name of the column in ``kriging_results_gdf`` containing the Kriging values to plot at each mesh point greater_than_0: bool, default=False If False, plot kriged values over all points in the mesh. @@ -205,12 +205,12 @@ def plot_kriging_results(krig_results_gdf: gpd.GeoDataFrame, if greater_than_0: # filter to only values > 0 - krig_results_gdf = krig_results_gdf[krig_results_gdf[krig_field_name] > 0] + kriging_results_gdf = kriging_results_gdf[kriging_results_gdf[krig_field_name] > 0] # collect the appropriate data from the input Dataframe - x_mesh = krig_results_gdf.geometry.x.values - y_mesh = krig_results_gdf.geometry.y.values - krig_val = krig_results_gdf[krig_field_name].values + x_mesh = kriging_results_gdf.geometry.x.values + y_mesh = kriging_results_gdf.geometry.y.values + krig_val = kriging_results_gdf[krig_field_name].values # create a colormap for the values colormap = bcm.LinearColormap(colors=['#3385ff', '#FF0000'], diff --git a/example_notebooks/echopro_workflow.ipynb b/example_notebooks/echopro_workflow.ipynb index 7a9bf802..1d7a24b6 100644 --- a/example_notebooks/echopro_workflow.ipynb +++ b/example_notebooks/echopro_workflow.ipynb @@ -111,7 +111,7 @@ "metadata": {}, "source": [ "## Compute the areal biomass density\n", - "* The areal biomass density is stored in `survey_2019`" + "* The areal biomass density is stored in `survey_2019.bio_calc.transect_results_gdf` as `biomass_density_adult`" ] }, { @@ -132,7 +132,7 @@ "metadata": {}, "outputs": [], "source": [ - "survey_2019.bio_calc.final_biomass_table.head()" + "survey_2019.bio_calc.transect_results_gdf.head()" ] }, { @@ -441,11 +441,11 @@ { "cell_type": "code", "execution_count": null, - "id": "9cf3549f", + "id": "7a64cd62", "metadata": {}, "outputs": [], "source": [ - "krig_results = survey_2019.bio_calc.krig_results_gdf" + "krig_results = survey_2019.bio_calc.kriging_results_gdf" ] }, { @@ -463,7 +463,7 @@ "metadata": {}, "outputs": [], "source": [ - "krig_results.krig_biomass_vals = 1e-6 * krig_results.krig_biomass_vals" + "krig_results.biomass = 1e-6 * krig_results.biomass" ] }, { @@ -473,7 +473,7 @@ "metadata": {}, "outputs": [], "source": [ - "print(f\"Total Kriged Biomass Estimate: {krig_results.krig_biomass_vals.sum():.3f} kmt\")" + "print(f\"Total Kriged Biomass Estimate: {krig_results.biomass.sum():.3f} kmt\")" ] }, { @@ -493,7 +493,7 @@ "metadata": {}, "outputs": [], "source": [ - "plot_kriging_results(krig_results, krig_field_name=\"krig_biomass_vals\", greater_than_0=True)" + "plot_kriging_results(krig_results, krig_field_name=\"biomass\", greater_than_0=True)" ] }, { @@ -507,9 +507,9 @@ ], "metadata": { "kernelspec": { - "display_name": "Python [conda env:echopro]", + "display_name": "echopro_conda", "language": "python", - "name": "conda-env-echopro-py" + "name": "echopro_conda" }, "language_info": { "codemirror_mode": { @@ -521,7 +521,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.13" + "version": "3.9.12" }, "toc": { "base_numbering": 1, diff --git a/example_notebooks/kriging_mesh_walkthrough.ipynb b/example_notebooks/kriging_mesh_walkthrough.ipynb index 9307eabd..e5f556d8 100644 --- a/example_notebooks/kriging_mesh_walkthrough.ipynb +++ b/example_notebooks/kriging_mesh_walkthrough.ipynb @@ -96,7 +96,7 @@ "metadata": {}, "source": [ "## Compute the areal biomass density\n", - "* The areal biomass density is stored in `survey_2019`" + "* The areal biomass density is stored in `survey_2019.bio_calc.transect_results_gdf` as `biomass_density_adult`" ] }, { @@ -179,7 +179,7 @@ "outputs": [], "source": [ "# obtain the polygon of the transects \n", - "transect_polygon = krig_mesh.get_polygon_of_transects(survey_2019.bio_calc.final_biomass_table, \n", + "transect_polygon = krig_mesh.get_polygon_of_transects(survey_2019.bio_calc.transect_results_gdf, \n", " n_close=4, nm_to_buffer=1.25)" ] }, @@ -191,7 +191,7 @@ "outputs": [], "source": [ "# plot transect points \n", - "fmap_polygon = plot_points(survey_2019.bio_calc.final_biomass_table, cmap_column='transect_num', color='hex')\n", + "fmap_polygon = plot_points(survey_2019.bio_calc.transect_results_gdf, cmap_column='transect_num', color='hex')\n", "\n", "# Add polygon to folium map and display it\n", "fmap_polygon.add_child(folium.GeoJson(transect_polygon))" @@ -228,7 +228,7 @@ "fmap_reduced = plot_points(reduced_mesh_gdf, color='red')\n", "\n", "# plot the transect points \n", - "fmap_reduced = plot_points(survey_2019.bio_calc.final_biomass_table, fmap_reduced, \n", + "fmap_reduced = plot_points(survey_2019.bio_calc.transect_results_gdf, fmap_reduced, \n", " cmap_column='transect_num', color='hex')\n", "\n", "# display transects and reduced mesh\n", diff --git a/example_notebooks/semi_variogram_workflow.ipynb b/example_notebooks/semi_variogram_workflow.ipynb index 80a37ee5..e61b4ef6 100644 --- a/example_notebooks/semi_variogram_workflow.ipynb +++ b/example_notebooks/semi_variogram_workflow.ipynb @@ -99,7 +99,7 @@ "metadata": {}, "source": [ "## Compute the areal biomass density\n", - "* The areal biomass density is stored in `survey_2019`" + "* The areal biomass density is stored in `survey_2019.bio_calc.transect_results_gdf` as `biomass_density_adult`" ] }, { diff --git a/example_notebooks/transect_selection_workflow.ipynb b/example_notebooks/transect_selection_workflow.ipynb index 03a56775..fd159c8d 100644 --- a/example_notebooks/transect_selection_workflow.ipynb +++ b/example_notebooks/transect_selection_workflow.ipynb @@ -152,7 +152,7 @@ "metadata": {}, "source": [ "## Compute the areal biomass density on subset of transects\n", - "* The areal biomass density is stored in `survey_2019.bio_calc`" + "* The areal biomass density is stored in `survey_2019.bio_calc.transect_results_gdf` as `biomass_density_adult`" ] }, { @@ -173,7 +173,7 @@ "metadata": {}, "outputs": [], "source": [ - "survey_2019.bio_calc.final_biomass_table.head()" + "survey_2019.bio_calc.transect_results_gdf.head()" ] }, { @@ -430,7 +430,7 @@ "metadata": {}, "outputs": [], "source": [ - "krig_results = survey_2019.bio_calc.krig_results_gdf" + "krig_results = survey_2019.bio_calc.kriging_results_gdf" ] }, { @@ -440,7 +440,7 @@ "metadata": {}, "outputs": [], "source": [ - "print(f\"Total Kriged Biomass Estimate: {1e-6*krig_results.krig_biomass_vals.sum():.3f} kmt\")" + "print(f\"Total Kriged Biomass Estimate: {1e-6*krig_results.biomass.sum():.3f} kmt\")" ] }, { @@ -459,8 +459,8 @@ "outputs": [], "source": [ "# plot mesh points with biomass values > 0\n", - "krig_results.krig_biomass_vals = 1e-6 * krig_results.krig_biomass_vals\n", - "plot_kriging_results(krig_results, krig_field_name=\"krig_biomass_vals\", greater_than_0=True)" + "krig_results.biomass = 1e-6 * krig_results.biomass\n", + "plot_kriging_results(krig_results, krig_field_name=\"biomass\", greater_than_0=True)" ] }, { @@ -474,9 +474,9 @@ ], "metadata": { "kernelspec": { - "display_name": "Python [conda env:echopro]", + "display_name": "echopro_conda", "language": "python", - "name": "conda-env-echopro-py" + "name": "echopro_conda" }, "language_info": { "codemirror_mode": { @@ -488,7 +488,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.13" + "version": "3.9.12" }, "toc": { "base_numbering": 1,