diff --git a/tests/baseline/test_curve_2d_plot.png b/tests/baseline/test_curve_2d_plot.png index 59d368a8..95662421 100644 Binary files a/tests/baseline/test_curve_2d_plot.png and b/tests/baseline/test_curve_2d_plot.png differ diff --git a/tests/test_well.py b/tests/test_well.py index 10888bbf..57968c6c 100644 --- a/tests/test_well.py +++ b/tests/test_well.py @@ -5,6 +5,8 @@ import os from pathlib import Path +import pandas as pd + import welly from welly import Well @@ -148,3 +150,11 @@ def test_iter_well(well): for curve in well: assert curve == well.data['CALI'] break + +def test_df_object_cols(df): + + df["Test object"] = "54" + df["test_str"] = '1z' + well = Well.from_df(df) + assert all(well.df()["Test object"] == 54) + assert all(well.df()["test_str"] == "1z") diff --git a/welly/curve.py b/welly/curve.py index 4e5af516..1205ffb2 100644 --- a/welly/curve.py +++ b/welly/curve.py @@ -10,6 +10,7 @@ import numpy as np import pandas as pd from pandas.api.types import is_any_real_numeric_dtype +from pandas.api.types import is_numeric_dtype from scipy.interpolate import interp1d from welly.plot import plot_2d_curve, plot_curve, plot_kde_curve @@ -970,7 +971,7 @@ def to_basis(self, Curve. The current instance in the new basis. """ # category data type or a string in data defaults to 'nearest' - if pd.api.types.is_categorical_dtype(self.df.iloc[:, 0]) or pd.api.types.is_string_dtype(self.df.iloc[:, 0]): + if not is_numeric_dtype(self.df.iloc[:, 0]): interp_kind = 'nearest' new_curve = copy.deepcopy(self) diff --git a/welly/well.py b/welly/well.py index 7af9482f..707b1163 100644 --- a/welly/well.py +++ b/welly/well.py @@ -632,17 +632,28 @@ def df(self, # swap MultiIndex levels df = df.swaplevel() - # I think this is the wrong place to do this. - # Anyway, use i not name just in case there are duplicate names. - for i, (_, column) in enumerate(df.iteritems()): - if is_object_dtype(column.dtype): - try: - df.iloc[:, i] = column.astype(float) - except ValueError: - pass + df = self._convert_object_cols_to_numeric(df) return df + def _convert_object_cols_to_numeric(self, df): + """ + Convert object columns into numeric columns, if possible. + + Args: + df (pd.DataFrame): dataframe to work + Returns: + pd.DataFrame. Whole dataframe with conversions + """ + df_nonobject = df.select_dtypes(exclude="object") + df_object = df.select_dtypes(include="object") + for col in df_object.columns: + try: + df_object[col] = pd.to_numeric(df_object[col]) + except ValueError: + pass + return pd.concat([df_nonobject, df_object], axis=1) + def add_curves_from_las(self, fname, remap=None, funcs=None): """ Given a LAS file, add curves from it to the current well instance.